Skip to content

Commit

Permalink
Merge pull request #102 from E3SM-Project/update-develop-20240222
Browse files Browse the repository at this point in the history
Merge trilinos/develop into e3sm-project/develop
  • Loading branch information
github-actions[bot] authored Feb 22, 2024
2 parents f528a4c + 77aca26 commit 7ef74d7
Show file tree
Hide file tree
Showing 30 changed files with 4,259 additions and 313 deletions.
13 changes: 11 additions & 2 deletions packages/framework/ini-files/config-specs.ini
Original file line number Diff line number Diff line change
Expand Up @@ -791,10 +791,12 @@ opt-set-cmake-var Kokkos_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE BOOL : OFF
#

[USE-UVM|YES]
opt-set-cmake-var Kokkos_ENABLE_CUDA_UVM BOOL : ON
opt-set-cmake-var Tpetra_ALLOCATE_IN_SHARED_SPACE BOOL : ON
opt-set-cmake-var KokkosKernels_INST_MEMSPACE_CUDAUVMSPACE BOOL : ON

[USE-UVM|NO]
opt-set-cmake-var Kokkos_ENABLE_CUDA_UVM BOOL : OFF
opt-set-cmake-var Tpetra_ALLOCATE_IN_SHARED_SPACE BOOL : OFF
opt-set-cmake-var KokkosKernels_INST_MEMSPACE_CUDAUVMSPACE BOOL : OFF



Expand Down Expand Up @@ -2406,6 +2408,13 @@ use CUDA11-RUN-SERIAL-TESTS
# MPI issue (TRILFRAME-552)
opt-set-cmake-var ROL_example_PinT_parabolic-control_AugmentedSystem_test_MPI_2_DISABLE BOOL FORCE : ON


[rhel7_sems-cuda-11.4.2-sems-gnu-10.1.0-sems-openmpi-4.0.5_release_static_Volta70_no-asan_complex_no-fpic_mpi_pt_no-rdc_uvm_deprecated-on_all]
use rhel7_sems-cuda-11.4.2-sems-gnu-10.1.0-sems-openmpi-4.0.5_release_static_Volta70_no-asan_complex_no-fpic_mpi_pt_no-rdc_uvm_deprecated-on_no-package-enables
use PACKAGE-ENABLES|ALL
opt-set-cmake-var Trilinos_ENABLE_TESTS BOOL FORCE : OFF


[rhel7_sems-cuda-11.4.2-sems-gnu-10.1.0-sems-openmpi-4.0.5_release_static_Volta70_no-asan_complex_no-fpic_mpi_pt_rdc_uvm_deprecated-on_all]
# uses sems-v2 modules
use rhel7_sems-cuda-11.4.2-sems-gnu-10.1.0-sems-openmpi-4.0.5_release_static_Volta70_no-asan_complex_no-fpic_mpi_pt_no-rdc_uvm_deprecated-on_no-package-enables
Expand Down
2 changes: 1 addition & 1 deletion packages/krino/krino/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ add_subdirectory(krino_lib)
add_subdirectory(region)
add_subdirectory(rebalance_utils)
add_subdirectory(parser)
add_subdirectory(unit_tests)
TRIBITS_ADD_TEST_DIRECTORIES(unit_tests)

SET(SOURCES_MAIN Apps_krino.cpp)

Expand Down
35 changes: 16 additions & 19 deletions packages/muelu/src/Misc/MueLu_InterfaceAggregationFactory_def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,6 @@ void InterfaceAggregationFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Bui
primalInterfaceDofRowMap = Get<RCP<const Map>>(currentLevel, "Primal interface DOF map");
}
TEUCHOS_ASSERT(!primalInterfaceDofRowMap.is_null());

if (A01->IsView("stridedMaps") && rcp_dynamic_cast<const StridedMap>(A01->getRowMap("stridedMaps")) != Teuchos::null) {
auto stridedRowMap = rcp_dynamic_cast<const StridedMap>(A01->getRowMap("stridedMaps"));
auto stridedColMap = rcp_dynamic_cast<const StridedMap>(A01->getColMap("stridedMaps"));
Expand Down Expand Up @@ -286,9 +285,8 @@ void InterfaceAggregationFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Bui
* - is 2 or 3 (for 2d or 3d problems) on coarser levels (same as on finest level, whereas there
* are 3 or 6 displacement dofs per node)
*/
GlobalOrdinal dualDofOffset = A01->getColMap()->getMinAllGlobalIndex();
GlobalOrdinal dualDofOffset = A01->getRowMap()->getMaxAllGlobalIndex() + 1;
LocalOrdinal dualBlockDim = numDofsPerDualNode;

// Generate global replicated mapping "lagrNodeId -> dispNodeId"
RCP<const Map> dualDofMap = A01->getDomainMap();
GlobalOrdinal gMaxDualNodeId = AmalgamationFactory::DOFGid2NodeId(
Expand Down Expand Up @@ -326,22 +324,22 @@ void InterfaceAggregationFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Bui
const GlobalOrdinal gPrimalNodeId = AmalgamationFactory::DOFGid2NodeId(gPrimalRowId, primalBlockDim, primalDofOffset, primalInterfaceDofRowMap->getIndexBase());
const LocalOrdinal lPrimalNodeId = lPrimalRowId / numDofsPerPrimalNode;
const LocalOrdinal primalAggId = primalVertex2AggId[lPrimalNodeId];

const GlobalOrdinal gDualDofId = A01->getColMap()->getGlobalElement(r);

const GlobalOrdinal gDualNodeId = AmalgamationFactory::DOFGid2NodeId(gDualDofId, dualBlockDim, dualDofOffset, 0);

if (local_dualNodeId2primalNodeId[gDualNodeId - gMinDualNodeId] == -GO_ONE) {
local_dualNodeId2primalNodeId[gDualNodeId - gMinDualNodeId] = gPrimalNodeId;
local_dualNodeId2primalAggId[gDualNodeId - gMinDualNodeId] = primalAggId;
} else {
GetOStream(Warnings) << "PROC: " << myRank << " gDualNodeId " << gDualNodeId << " is already connected to primal nodeId "
<< local_dualNodeId2primalNodeId[gDualNodeId - gMinDualNodeId]
<< ". Ignore new dispNodeId: " << gPrimalNodeId << std::endl;
}
const GlobalOrdinal gDualDofId = A01->getDomainMap()->getGlobalElement(r);
const GlobalOrdinal gDualNodeId = AmalgamationFactory::DOFGid2NodeId(gDualDofId, dualBlockDim, dualDofOffset, 0);

TEUCHOS_TEST_FOR_EXCEPTION(local_dualNodeId2primalNodeId[gDualNodeId - gMinDualNodeId] != -GO_ONE,
MueLu::Exceptions::RuntimeError,
"PROC: " << myRank << " gDualNodeId " << gDualNodeId
<< " is already connected to primal nodeId "
<< local_dualNodeId2primalNodeId[gDualNodeId - gMinDualNodeId]
<< ". This shouldn't be. A possible reason might be: "
"Check if parallel distribution of primalInterfaceDofRowMap corresponds "
"to the parallel distribution of subblock matrix A01.");

local_dualNodeId2primalNodeId[gDualNodeId - gMinDualNodeId] = gPrimalNodeId;
local_dualNodeId2primalAggId[gDualNodeId - gMinDualNodeId] = primalAggId;
}
}

const int dualNodeId2primalNodeIdSize = Teuchos::as<int>(local_dualNodeId2primalNodeId.size());
Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, dualNodeId2primalNodeIdSize,
&local_dualNodeId2primalNodeId[0], &dualNodeId2primalNodeId[0]);
Expand Down Expand Up @@ -389,7 +387,6 @@ void InterfaceAggregationFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Bui
}

const LocalOrdinal fullblocksize = numDofsPerDualNode;
const GlobalOrdinal offset = A01->getColMap()->getMinAllGlobalIndex();
const LocalOrdinal blockid = -1;
const LocalOrdinal nStridedOffset = 0;
const LocalOrdinal stridedblocksize = fullblocksize;
Expand All @@ -408,7 +405,7 @@ void InterfaceAggregationFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Bui

RCP<AmalgamationInfo> dualAmalgamationInfo = rcp(new AmalgamationInfo(rowTranslation, colTranslation,
A01->getDomainMap(), A01->getDomainMap(), A01->getDomainMap(),
fullblocksize, offset, blockid, nStridedOffset, stridedblocksize));
fullblocksize, dualDofOffset, blockid, nStridedOffset, stridedblocksize));

dualAggregates->SetNumAggregates(nLocalAggregates);
dualAggregates->AggregatesCrossProcessors(primalAggregates->AggregatesCrossProcessors());
Expand Down
83 changes: 57 additions & 26 deletions packages/muelu/src/Misc/MueLu_SegregatedAFactory_decl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,26 +47,62 @@
#ifndef MUELU_SEGREGATEDAFACTORY_DECL_HPP
#define MUELU_SEGREGATEDAFACTORY_DECL_HPP

#include "MueLu_ConfigDefs.hpp"
#include "MueLu_SegregatedAFactory_fwd.hpp"

#include "MueLu_Level_fwd.hpp"
#include "MueLu_SingleLevelFactoryBase.hpp"

namespace MueLu {

/*!
@class SegregatedAFactory class.
@brief Factory for building a new "segregated" A operator. Here, "segregated" means that the user
provides a map (containing a subset of the row gids of the input matrix A) and the factory
drops the off-diagonal entries (a,b) and (b,a) in A where "a" denotes a GID entry in the provided map
and "b" denotes a GID that is not contained in the provided map.
@class SegregatedAFactory class.
@brief Factory for building a new "segregated" A operator. Here, "segregated" means that the user
provides some map(s) (containing a subset of GIDs of the input matrix A) and the factory
drops entries depending on the dropping scheme.
## Idea ##
The idea is to use the output matrix A as input for the aggregation factory to have control over
the aggregates and make sure that aggregates do not cross certain areas.
## Remarks ##
This factory supports multiple dropping schemes based on different inputs. They are:
- blockmap: Based on the user provided "blockmap", the off-diagonal entries (a,b) and (b,a) in A are dropped.
"a" denotes a GID entry in the provided map and "b" denotes a GID that is not contained in the provided map.
In this use case the Factory expects a dropMap1 (==blockmap).
The blockmap scheme also doesn't support the "Call ReduceAll on dropMap1/2" options.
- map-pair: Based on a "map-pair", the user provides two maps "dropMap1" and "dropMap2",
which specify global row/column pairs in the operator A to be dropped.
The Factory drops any possible combination of the dropMaps 1 and 2. To ensure that entry A(a,b) is
dropped, as well as entry A(b,a), there is an option to create redundant dropMaps on all Procs.
This ensures that entries aren't overlooked due to the local rowmaps of the operator A.
Note: we have to drop the entries (i.e. not just set them to zero) as the CoalesceDropFactory
does not distinguish between matrix entries which are zero and nonzero.
## Input/output of this factory ##
The idea is to use the output matrix A as input for the aggregation factory to have control over
the aggregates and make sure that aggregates do not cross certain areas.
### User parameters of SegregatedAFactory ###
Parameter | type | default | master.xml | validated | requested | description
----------|------|---------|:----------:|:---------:|:---------:|------------
A | Factory | null | | * | * | Generating factory of the matrix A
droppingScheme| string | vague | | * | * | Strategy to drop entries from matrix A based on the input of some map(s) [blockmap, map-pair]
dropMap1 | Factory | null | | * | * | Generating factory for dropMap1
dropMap2 | Factory | null | | * | * | Generating factory for dropMap2
Call ReduceAll on dropMap1 | bool | | * | | Boolean for calling reduceAll on dropMap1
Call ReduceAll on dropMap2 | bool | | * | | Boolean for calling reduceAll on dropMap2
Note: we have to drop the entries (i.e. not just set them to zero) as the CoalesceDropFactory
does not distinguish between matrix entries which are zero and nonzero.
The * in the @c master.xml column denotes that the parameter is defined in the @c master.xml file.<br>
The * in the @c validated column means that the parameter is declared in the list of valid input parameters (see @c GetValidParameters() ).<br>
The * in the @c requested column states that the data is requested as input with all dependencies (see @c DeclareInput() ).
### Variables provided by this factory ###
After SegregatedAFactory::Build the following data is available (if requested)
Parameter | generated by | description
----------|--------------|------------
A | SegregatedAFactory | Provides a filtered Matrix, where all possible combinations of the entries of the dropMap(s) have been removed from the input matrix A
*/

template <class Scalar = DefaultScalar,
Expand All @@ -78,33 +114,28 @@ class SegregatedAFactory : public SingleLevelFactoryBase {
#include "MueLu_UseShortNames.hpp"

public:
//! Constructor.
SegregatedAFactory() = default;

//! Input
//@{

void DeclareInput(Level& currentLevel) const;

RCP<const ParameterList> GetValidParameterList() const;
RCP<const ParameterList> GetValidParameterList() const override;

//@}
void DeclareInput(Level& currentLevel) const override;

//! @name Build methods.
//@{

/*!
@brief Build method.
Builds filtered matrix and returns it in <tt>currentLevel</tt>.
*/
void Build(Level& currentLevel) const;

//@}
void Build(Level& currentLevel) const override;

private:
//! Generating factory of input variable
mutable RCP<const FactoryBase> mapFact_;
void BuildBasedOnBlockmap(Level& currentLevel) const;

void BuildBasedOnMapPair(Level& currentLevel) const;

// RCP<const Map> CreateRedundantMaps(Teuchos::RCP<const Map> localDropMap, Teuchos::RCP<const Matrix> Ain) const;

}; // class SegregatedAFactory

Expand Down
Loading

0 comments on commit 7ef74d7

Please sign in to comment.