Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merge trilinos/develop into e3sm-project/develop #102

Merged
merged 21 commits into from
Feb 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
78b845e
MueLu: Fix bug in InterfaceAggFactory
csteimer Oct 18, 2023
0fb78e7
MueLu: Rework SegregatedAFactory for map-pair filtering
csteimer Oct 18, 2023
42dbade
MueLu: Adapt blockmap unittest for new input mechanism of SegAFact
csteimer Oct 18, 2023
4b7d0ac
MueLu: Add contact framework test and segAFactory matrix filtering
csteimer Oct 18, 2023
fd52a9f
MueLu: reactivate interfaceAggFactory test for node-based aggregation
csteimer Nov 28, 2023
5eced1e
MueLu: Fix failing interfaceAgg Tests
csteimer Nov 28, 2023
bc030d8
MueLu: Apply clang tidy
csteimer Jan 16, 2024
a3f623d
MueLu: Add importOffRankDroppingInfo
csteimer Jan 22, 2024
bd215dc
MueLu:: SegregatedAFactory remove unused variable
csteimer Feb 20, 2024
8ecd723
MueLu: Run clang-format on utilities
csteimer Feb 21, 2024
cb4c1d5
Merge Pull Request #12415 from csteimer/Trilinos/rework_segregatedAFa…
trilinos-autotester Feb 21, 2024
2dec895
Do not process krino tests if testing is disabled
sebrowne Feb 21, 2024
a4f4ddc
Use new options for enabling UVM
sebrowne Feb 21, 2024
2fa360c
Merge Pull Request #12762 from sebrowne/Trilinos/fix_krino_test_enable
trilinos-autotester Feb 21, 2024
589d1f9
Add config to facilitate fixing UVM
sebrowne Feb 21, 2024
fe84214
Merge Pull Request #12763 from sebrowne/Trilinos/uvm_updates
trilinos-autotester Feb 21, 2024
161c058
Tpetra: Fix fillComplete in Add
cgcgcg Feb 21, 2024
f429c52
MueLu: Fix EpetraVsTpetra test
cgcgcg Feb 21, 2024
2a55cbe
Merge pull request #12765 from cgcgcg/tpetraAddFix
cgcgcg Feb 21, 2024
95c84fc
Merge Pull Request #12766 from cgcgcg/Trilinos/MueLuPLIfix
trilinos-autotester Feb 22, 2024
77aca26
Merge remote-tracking branch 'upstream/develop' into develop
bartgol Feb 22, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 11 additions & 2 deletions packages/framework/ini-files/config-specs.ini
Original file line number Diff line number Diff line change
Expand Up @@ -791,10 +791,12 @@ opt-set-cmake-var Kokkos_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE BOOL : OFF
#

[USE-UVM|YES]
opt-set-cmake-var Kokkos_ENABLE_CUDA_UVM BOOL : ON
opt-set-cmake-var Tpetra_ALLOCATE_IN_SHARED_SPACE BOOL : ON
opt-set-cmake-var KokkosKernels_INST_MEMSPACE_CUDAUVMSPACE BOOL : ON

[USE-UVM|NO]
opt-set-cmake-var Kokkos_ENABLE_CUDA_UVM BOOL : OFF
opt-set-cmake-var Tpetra_ALLOCATE_IN_SHARED_SPACE BOOL : OFF
opt-set-cmake-var KokkosKernels_INST_MEMSPACE_CUDAUVMSPACE BOOL : OFF



Expand Down Expand Up @@ -2406,6 +2408,13 @@ use CUDA11-RUN-SERIAL-TESTS
# MPI issue (TRILFRAME-552)
opt-set-cmake-var ROL_example_PinT_parabolic-control_AugmentedSystem_test_MPI_2_DISABLE BOOL FORCE : ON


[rhel7_sems-cuda-11.4.2-sems-gnu-10.1.0-sems-openmpi-4.0.5_release_static_Volta70_no-asan_complex_no-fpic_mpi_pt_no-rdc_uvm_deprecated-on_all]
use rhel7_sems-cuda-11.4.2-sems-gnu-10.1.0-sems-openmpi-4.0.5_release_static_Volta70_no-asan_complex_no-fpic_mpi_pt_no-rdc_uvm_deprecated-on_no-package-enables
use PACKAGE-ENABLES|ALL
opt-set-cmake-var Trilinos_ENABLE_TESTS BOOL FORCE : OFF


[rhel7_sems-cuda-11.4.2-sems-gnu-10.1.0-sems-openmpi-4.0.5_release_static_Volta70_no-asan_complex_no-fpic_mpi_pt_rdc_uvm_deprecated-on_all]
# uses sems-v2 modules
use rhel7_sems-cuda-11.4.2-sems-gnu-10.1.0-sems-openmpi-4.0.5_release_static_Volta70_no-asan_complex_no-fpic_mpi_pt_no-rdc_uvm_deprecated-on_no-package-enables
Expand Down
2 changes: 1 addition & 1 deletion packages/krino/krino/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ add_subdirectory(krino_lib)
add_subdirectory(region)
add_subdirectory(rebalance_utils)
add_subdirectory(parser)
add_subdirectory(unit_tests)
TRIBITS_ADD_TEST_DIRECTORIES(unit_tests)

SET(SOURCES_MAIN Apps_krino.cpp)

Expand Down
35 changes: 16 additions & 19 deletions packages/muelu/src/Misc/MueLu_InterfaceAggregationFactory_def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,6 @@ void InterfaceAggregationFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Bui
primalInterfaceDofRowMap = Get<RCP<const Map>>(currentLevel, "Primal interface DOF map");
}
TEUCHOS_ASSERT(!primalInterfaceDofRowMap.is_null());

if (A01->IsView("stridedMaps") && rcp_dynamic_cast<const StridedMap>(A01->getRowMap("stridedMaps")) != Teuchos::null) {
auto stridedRowMap = rcp_dynamic_cast<const StridedMap>(A01->getRowMap("stridedMaps"));
auto stridedColMap = rcp_dynamic_cast<const StridedMap>(A01->getColMap("stridedMaps"));
Expand Down Expand Up @@ -286,9 +285,8 @@ void InterfaceAggregationFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Bui
* - is 2 or 3 (for 2d or 3d problems) on coarser levels (same as on finest level, whereas there
* are 3 or 6 displacement dofs per node)
*/
GlobalOrdinal dualDofOffset = A01->getColMap()->getMinAllGlobalIndex();
GlobalOrdinal dualDofOffset = A01->getRowMap()->getMaxAllGlobalIndex() + 1;
LocalOrdinal dualBlockDim = numDofsPerDualNode;

// Generate global replicated mapping "lagrNodeId -> dispNodeId"
RCP<const Map> dualDofMap = A01->getDomainMap();
GlobalOrdinal gMaxDualNodeId = AmalgamationFactory::DOFGid2NodeId(
Expand Down Expand Up @@ -326,22 +324,22 @@ void InterfaceAggregationFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Bui
const GlobalOrdinal gPrimalNodeId = AmalgamationFactory::DOFGid2NodeId(gPrimalRowId, primalBlockDim, primalDofOffset, primalInterfaceDofRowMap->getIndexBase());
const LocalOrdinal lPrimalNodeId = lPrimalRowId / numDofsPerPrimalNode;
const LocalOrdinal primalAggId = primalVertex2AggId[lPrimalNodeId];

const GlobalOrdinal gDualDofId = A01->getColMap()->getGlobalElement(r);

const GlobalOrdinal gDualNodeId = AmalgamationFactory::DOFGid2NodeId(gDualDofId, dualBlockDim, dualDofOffset, 0);

if (local_dualNodeId2primalNodeId[gDualNodeId - gMinDualNodeId] == -GO_ONE) {
local_dualNodeId2primalNodeId[gDualNodeId - gMinDualNodeId] = gPrimalNodeId;
local_dualNodeId2primalAggId[gDualNodeId - gMinDualNodeId] = primalAggId;
} else {
GetOStream(Warnings) << "PROC: " << myRank << " gDualNodeId " << gDualNodeId << " is already connected to primal nodeId "
<< local_dualNodeId2primalNodeId[gDualNodeId - gMinDualNodeId]
<< ". Ignore new dispNodeId: " << gPrimalNodeId << std::endl;
}
const GlobalOrdinal gDualDofId = A01->getDomainMap()->getGlobalElement(r);
const GlobalOrdinal gDualNodeId = AmalgamationFactory::DOFGid2NodeId(gDualDofId, dualBlockDim, dualDofOffset, 0);

TEUCHOS_TEST_FOR_EXCEPTION(local_dualNodeId2primalNodeId[gDualNodeId - gMinDualNodeId] != -GO_ONE,
MueLu::Exceptions::RuntimeError,
"PROC: " << myRank << " gDualNodeId " << gDualNodeId
<< " is already connected to primal nodeId "
<< local_dualNodeId2primalNodeId[gDualNodeId - gMinDualNodeId]
<< ". This shouldn't be. A possible reason might be: "
"Check if parallel distribution of primalInterfaceDofRowMap corresponds "
"to the parallel distribution of subblock matrix A01.");

local_dualNodeId2primalNodeId[gDualNodeId - gMinDualNodeId] = gPrimalNodeId;
local_dualNodeId2primalAggId[gDualNodeId - gMinDualNodeId] = primalAggId;
}
}

const int dualNodeId2primalNodeIdSize = Teuchos::as<int>(local_dualNodeId2primalNodeId.size());
Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, dualNodeId2primalNodeIdSize,
&local_dualNodeId2primalNodeId[0], &dualNodeId2primalNodeId[0]);
Expand Down Expand Up @@ -389,7 +387,6 @@ void InterfaceAggregationFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Bui
}

const LocalOrdinal fullblocksize = numDofsPerDualNode;
const GlobalOrdinal offset = A01->getColMap()->getMinAllGlobalIndex();
const LocalOrdinal blockid = -1;
const LocalOrdinal nStridedOffset = 0;
const LocalOrdinal stridedblocksize = fullblocksize;
Expand All @@ -408,7 +405,7 @@ void InterfaceAggregationFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Bui

RCP<AmalgamationInfo> dualAmalgamationInfo = rcp(new AmalgamationInfo(rowTranslation, colTranslation,
A01->getDomainMap(), A01->getDomainMap(), A01->getDomainMap(),
fullblocksize, offset, blockid, nStridedOffset, stridedblocksize));
fullblocksize, dualDofOffset, blockid, nStridedOffset, stridedblocksize));

dualAggregates->SetNumAggregates(nLocalAggregates);
dualAggregates->AggregatesCrossProcessors(primalAggregates->AggregatesCrossProcessors());
Expand Down
83 changes: 57 additions & 26 deletions packages/muelu/src/Misc/MueLu_SegregatedAFactory_decl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,26 +47,62 @@
#ifndef MUELU_SEGREGATEDAFACTORY_DECL_HPP
#define MUELU_SEGREGATEDAFACTORY_DECL_HPP

#include "MueLu_ConfigDefs.hpp"
#include "MueLu_SegregatedAFactory_fwd.hpp"

#include "MueLu_Level_fwd.hpp"
#include "MueLu_SingleLevelFactoryBase.hpp"

namespace MueLu {

/*!
@class SegregatedAFactory class.
@brief Factory for building a new "segregated" A operator. Here, "segregated" means that the user
provides a map (containing a subset of the row gids of the input matrix A) and the factory
drops the off-diagonal entries (a,b) and (b,a) in A where "a" denotes a GID entry in the provided map
and "b" denotes a GID that is not contained in the provided map.
@class SegregatedAFactory class.
@brief Factory for building a new "segregated" A operator. Here, "segregated" means that the user
provides some map(s) (containing a subset of GIDs of the input matrix A) and the factory
drops entries depending on the dropping scheme.

## Idea ##

The idea is to use the output matrix A as input for the aggregation factory to have control over
the aggregates and make sure that aggregates do not cross certain areas.

## Remarks ##

This factory supports multiple dropping schemes based on different inputs. They are:

- blockmap: Based on the user provided "blockmap", the off-diagonal entries (a,b) and (b,a) in A are dropped.
"a" denotes a GID entry in the provided map and "b" denotes a GID that is not contained in the provided map.
In this use case the Factory expects a dropMap1 (==blockmap).
The blockmap scheme also doesn't support the "Call ReduceAll on dropMap1/2" options.

- map-pair: Based on a "map-pair", the user provides two maps "dropMap1" and "dropMap2",
which specify global row/column pairs in the operator A to be dropped.
The Factory drops any possible combination of the dropMaps 1 and 2. To ensure that entry A(a,b) is
dropped, as well as entry A(b,a), there is an option to create redundant dropMaps on all Procs.
This ensures that entries aren't overlooked due to the local rowmaps of the operator A.

Note: we have to drop the entries (i.e. not just set them to zero) as the CoalesceDropFactory
does not distinguish between matrix entries which are zero and nonzero.

## Input/output of this factory ##

The idea is to use the output matrix A as input for the aggregation factory to have control over
the aggregates and make sure that aggregates do not cross certain areas.
### User parameters of SegregatedAFactory ###
Parameter | type | default | master.xml | validated | requested | description
----------|------|---------|:----------:|:---------:|:---------:|------------
A | Factory | null | | * | * | Generating factory of the matrix A
droppingScheme| string | vague | | * | * | Strategy to drop entries from matrix A based on the input of some map(s) [blockmap, map-pair]
dropMap1 | Factory | null | | * | * | Generating factory for dropMap1
dropMap2 | Factory | null | | * | * | Generating factory for dropMap2
Call ReduceAll on dropMap1 | bool | | * | | Boolean for calling reduceAll on dropMap1
Call ReduceAll on dropMap2 | bool | | * | | Boolean for calling reduceAll on dropMap2

Note: we have to drop the entries (i.e. not just set them to zero) as the CoalesceDropFactory
does not distinguish between matrix entries which are zero and nonzero.
The * in the @c master.xml column denotes that the parameter is defined in the @c master.xml file.<br>
The * in the @c validated column means that the parameter is declared in the list of valid input parameters (see @c GetValidParameters() ).<br>
The * in the @c requested column states that the data is requested as input with all dependencies (see @c DeclareInput() ).

### Variables provided by this factory ###

After SegregatedAFactory::Build the following data is available (if requested)

Parameter | generated by | description
----------|--------------|------------
A | SegregatedAFactory | Provides a filtered Matrix, where all possible combinations of the entries of the dropMap(s) have been removed from the input matrix A
*/

template <class Scalar = DefaultScalar,
Expand All @@ -78,33 +114,28 @@ class SegregatedAFactory : public SingleLevelFactoryBase {
#include "MueLu_UseShortNames.hpp"

public:
//! Constructor.
SegregatedAFactory() = default;

//! Input
//@{

void DeclareInput(Level& currentLevel) const;

RCP<const ParameterList> GetValidParameterList() const;
RCP<const ParameterList> GetValidParameterList() const override;

//@}
void DeclareInput(Level& currentLevel) const override;

//! @name Build methods.
//@{

/*!
@brief Build method.

Builds filtered matrix and returns it in <tt>currentLevel</tt>.
*/
void Build(Level& currentLevel) const;

//@}
void Build(Level& currentLevel) const override;

private:
//! Generating factory of input variable
mutable RCP<const FactoryBase> mapFact_;
void BuildBasedOnBlockmap(Level& currentLevel) const;

void BuildBasedOnMapPair(Level& currentLevel) const;

// RCP<const Map> CreateRedundantMaps(Teuchos::RCP<const Map> localDropMap, Teuchos::RCP<const Matrix> Ain) const;

}; // class SegregatedAFactory

Expand Down
Loading
Loading