Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Decouple the collection buffer creation from the collection #394

Merged
merged 14 commits into from
May 9, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 0 additions & 9 deletions include/podio/CollectionBase.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,15 +47,6 @@ class CollectionBase {
/// Get the collection buffers for this collection
virtual podio::CollectionWriteBuffers getBuffers() = 0;

/// Create (empty) collection buffers from which a collection can be constructed
virtual podio::CollectionReadBuffers createBuffers() /*const*/ = 0;

/// Create (empty) collection buffers from which a collection can be constructed
/// Versioned to support schema evolution
virtual podio::CollectionReadBuffers createSchemaEvolvableBuffers(int readSchemaVersion,
podio::Backend backend) /*const*/
= 0;

/// check for validity of the container after read
virtual bool isValid() const = 0;

Expand Down
84 changes: 84 additions & 0 deletions include/podio/CollectionBufferFactory.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
#ifndef PODIO_COLLECTIONBUFFERFACTORY_H
#define PODIO_COLLECTIONBUFFERFACTORY_H

#include "podio/CollectionBuffers.h"
#include "podio/SchemaEvolution.h"

#include <functional>
#include <optional>
#include <unordered_map>
#include <vector>

namespace podio {

/**
* The CollectionBufferFactory allows to create buffers of known datatypes,
* which can then be populated by e.g. readers. In order to support schema
* evolution, the buffers have a version and this factory will also require a
* schema version to create buffers.
*
* It is implemented as a singleton, which is populated at the time a shared
* datamodel library is loaded. It is assumed that that happens early on in the
* startup of an appliation, such that only a single thread will access the
* factory instance for registering datatypes. Since the necessary creation
* functions are part of the core datamodel library, this should be very easy to
* achieve by simply linking to that library. Once the factory is populated it
* can be safely accessed from multiple threads concurrently to obtain buffers.
*/
class CollectionBufferFactory {
/// Internal storage is a map to an array of creation functions, where the
/// version determines the place in that array. This should be a viable
/// approach because we know the "latest and greatest" schema version
using CreationFuncT = std::function<podio::CollectionReadBuffers(bool)>;
using VersionMapT = std::vector<CreationFuncT>;
using MapT = std::unordered_map<std::string, VersionMapT>;

public:
/// The buffer factory is a singleton so we disable all copy and move
/// constructors explicitly
CollectionBufferFactory(CollectionBufferFactory const&) = delete;
CollectionBufferFactory& operator=(CollectionBufferFactory const&) = delete;
CollectionBufferFactory(CollectionBufferFactory&&) = delete;
CollectionBufferFactory& operator=(CollectionBufferFactory&&) = delete;
~CollectionBufferFactory() = default;

/// Mutable instance only used for the initial registration of functions
/// during library loading
static CollectionBufferFactory& mutInstance();
/// Get the factory instance
static CollectionBufferFactory const& instance();

/**
* Create buffers for a given collection type of a given schema version.
*
* @param collType The collection type name (e.g. from collection->getTypeName())
* @param version The schema version the created buffers should have
* @param susbsetColl Should the buffers be for a subset collection or not
*
* @return CollectionReadBuffers if a creation function for this collection
* type has been registered, otherwise an empty optional
*/
std::optional<podio::CollectionReadBuffers> createBuffers(const std::string& collType, SchemaVersionT version,
bool subsetColl) const;
/**
* Register a creation function for a given collection type and schema version.
*
* @param collType The collection type name (i.e. what
* collection->getTypeName() returns)
* @param version The schema version for which this creation function is valid
* @param creationFunc The function that when invoked returns buffers for this
* collection type and schema version. The signature has to be
* podio::CollectionReadBuffers(bool) where the boolean parameter steers
* whether the buffers are for a subset collection or not.
*/
void registerCreationFunc(const std::string& collType, SchemaVersionT version, const CreationFuncT& creationFunc);

private:
CollectionBufferFactory() = default;

MapT m_funcMap{}; ///< Map to the creation functions
};

} // namespace podio

#endif // PODIO_COLLECTIONBUFFERFACTORY_H
7 changes: 4 additions & 3 deletions include/podio/ROOTFrameReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,10 @@ class TTree;
namespace podio {

namespace detail {
// Information about the data vector as wall as the collection class type
// and the index in the collection branches cache vector
using CollectionInfo = std::tuple<const TClass*, const TClass*, size_t>;
// Information about the collection class type, whether it is a subset, the
// schema version on file and the index in the collection branches cache
// vector
using CollectionInfo = std::tuple<std::string, bool, SchemaVersionT, size_t>;

} // namespace detail

Expand Down
8 changes: 4 additions & 4 deletions include/podio/ROOTLegacyReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@ class TTree;
namespace podio {

namespace detail {
// Information about the data vector as wall as the collection class type
// and the index in the collection branches cache vector
using CollectionInfo = std::tuple<const TClass*, const TClass*, size_t>;

// Information about the collection class type, whether it is a subset, the
// schema version on file and the index in the collection branches cache
// vector
using CollectionInfo = std::tuple<std::string, bool, SchemaVersionT, size_t>;
} // namespace detail

class EventStore;
Expand Down
7 changes: 4 additions & 3 deletions include/podio/SIOBlock.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,16 +78,17 @@ class SIOBlock : public sio::block {
return sio::block::name();
}

void setSubsetCollection(bool subsetColl) {
m_subsetColl = subsetColl;
}

void setCollection(podio::CollectionBase* col) {
m_subsetColl = col->isSubsetCollection();
m_buffers = col->getBuffers();
}

virtual SIOBlock* create(const std::string& name) const = 0;

// create a new collection for this block
virtual void createBuffers(const bool subsetCollection = false) = 0;

protected:
bool m_subsetColl{false};
podio::CollectionReadBuffers m_buffers{};
Expand Down
26 changes: 12 additions & 14 deletions include/podio/SIOBlockUserData.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#ifndef PODIO_SIOBLOCKUSERDATA_H
#define PODIO_SIOBLOCKUSERDATA_H

#include "podio/CollectionBufferFactory.h"
#include "podio/CollectionBuffers.h"
#include "podio/SIOBlock.h"
#include "podio/UserDataCollection.h"
Expand Down Expand Up @@ -29,15 +30,23 @@ namespace podio {
template <typename BasicType, typename = EnableIfSupportedUserType<BasicType>>
class SIOBlockUserData : public podio::SIOBlock {
public:
SIOBlockUserData() : SIOBlock(::sio_name<BasicType>(), sio::version::encode_version(0, 1)) {
SIOBlockUserData() :
SIOBlock(::sio_name<BasicType>(), sio::version::encode_version(UserDataCollection<BasicType>::schemaVersion, 0)) {

podio::SIOBlockFactory::instance().registerBlockForCollection(podio::userDataTypeName<BasicType>(), this);
}

SIOBlockUserData(const std::string& name) : SIOBlock(name, sio::version::encode_version(0, 1)) {
SIOBlockUserData(const std::string& name) :
SIOBlock(name, sio::version::encode_version(UserDataCollection<BasicType>::schemaVersion, 0)) {
}

void read(sio::read_device& device, sio::version_type /*version*/) override {
void read(sio::read_device& device, sio::version_type version) override {
const auto& bufferFactory = podio::CollectionBufferFactory::instance();
m_buffers =
bufferFactory
.createBuffers(podio::userDataCollTypeName<BasicType>(), sio::version::major_version(version), false)
.value();

auto* dataVec = new std::vector<BasicType>();
unsigned size(0);
device.data(size);
Expand All @@ -53,17 +62,6 @@ class SIOBlockUserData : public podio::SIOBlock {
podio::handlePODDataSIO(device, &(*dataVec)[0], size);
}

void createBuffers(bool) override {

m_buffers.references = new podio::CollRefCollection();
m_buffers.vectorMembers = new podio::VectorMembersInfo();

// Nothing to do here since UserDataCollections cannot be subset collections
m_buffers.createCollection = [](podio::CollectionReadBuffers buffers, bool) {
return std::make_unique<UserDataCollection<BasicType>>(std::move(*buffers.dataAsVector<BasicType>()));
};
}

SIOBlock* create(const std::string& name) const override {
return new SIOBlockUserData(name);
}
Expand Down
34 changes: 16 additions & 18 deletions include/podio/UserDataCollection.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include "podio/CollectionBase.h"
#include "podio/CollectionBuffers.h"
#include "podio/DatamodelRegistry.h"
#include "podio/SchemaEvolution.h"
#include "podio/utilities/TypeHelpers.h"

#include <map>
Expand All @@ -16,6 +17,10 @@
template <> \
constexpr const char* userDataTypeName<type>() { \
return #type; \
} \
template <> \
constexpr const char* userDataCollTypeName<type>() { \
return "podio::UserDataCollection<" #type ">"; \
}

namespace podio {
Expand All @@ -37,6 +42,12 @@ using EnableIfSupportedUserType = std::enable_if_t<detail::isInTuple<T, Supporte
template <typename BasicType, typename = EnableIfSupportedUserType<BasicType>>
constexpr const char* userDataTypeName();

/** Helper template to provide the fully qualified name of a UserDataCollection.
* Implementations are populated by the PODIO_ADD_USER_TYPE macro.
*/
template <typename BasicType, typename = EnableIfSupportedUserType<BasicType>>
constexpr const char* userDataCollTypeName();

PODIO_ADD_USER_TYPE(float)
PODIO_ADD_USER_TYPE(double)

Expand Down Expand Up @@ -79,6 +90,9 @@ class UserDataCollection : public CollectionBase {
UserDataCollection& operator=(UserDataCollection&&) = default;
~UserDataCollection() = default;

/// The schema version of UserDataCollections
static constexpr SchemaVersionT schemaVersion = 1;

/// prepare buffers for serialization
void prepareForWrite() const override {
}
Expand Down Expand Up @@ -108,22 +122,6 @@ class UserDataCollection : public CollectionBase {
return {&_vecPtr, &m_refCollections, &m_vecmem_info};
}

podio::CollectionReadBuffers createBuffers() /*const*/ final {
return {nullptr, nullptr, nullptr,
[](podio::CollectionReadBuffers buffers, bool) {
return std::make_unique<UserDataCollection<BasicType>>(std::move(*buffers.dataAsVector<BasicType>()));
},
[](podio::CollectionReadBuffers& buffers) {
buffers.data = podio::CollectionWriteBuffers::asVector<BasicType>(buffers.data);
}};
}

podio::CollectionReadBuffers createSchemaEvolvableBuffers(__attribute__((unused)) int readSchemaVersion,
__attribute__((unused))
podio::Backend backend) /*const*/ final {
return createBuffers();
}

/// check for validity of the container after read
bool isValid() const override {
return true;
Expand All @@ -136,7 +134,7 @@ class UserDataCollection : public CollectionBase {

/// fully qualified type name
std::string getTypeName() const override {
return std::string("podio::UserDataCollection<") + userDataTypeName<BasicType>() + ">";
return userDataCollTypeName<BasicType>();
}

/// fully qualified type name of elements - with namespace
Expand Down Expand Up @@ -165,7 +163,7 @@ class UserDataCollection : public CollectionBase {

/// The schema version is fixed manually
SchemaVersionT getSchemaVersion() const final {
return 1;
return schemaVersion;
}

/// Print this collection to the passed stream
Expand Down
3 changes: 1 addition & 2 deletions python/podio/generator_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,10 +71,9 @@ def _is_fixed_width_type(type_name):
class DataType:
"""Simple class to hold information about a datatype or component that is
defined in the datamodel."""
def __init__(self, klass, schema_version):
def __init__(self, klass):
self.full_type = klass
self.namespace, self.bare_type = _get_namespace_class(self.full_type)
self.schema_version = schema_version

def __str__(self):
if self.namespace:
Expand Down
6 changes: 4 additions & 2 deletions python/podio/podio_config_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -412,10 +412,12 @@ def parse_model(cls, model_dict, package_name, upstream_edm=None):

if "schema_version" in model_dict:
schema_version = model_dict["schema_version"]
if int(schema_version) <= 0:
raise DefinitionError(f"schema_version has to be larger than 0 (is {schema_version})")
else:
warnings.warn("Please provide a schema_version entry. It will become mandatory. Setting it to 0 as default",
warnings.warn("Please provide a schema_version entry. It will become mandatory. Setting it to 1 as default",
FutureWarning, stacklevel=3)
schema_version = 0
schema_version = 1

components = {}
if "components" in model_dict:
Expand Down
12 changes: 6 additions & 6 deletions python/podio_class_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,6 @@ def __init__(self, yamlfile, install_dir, package_name, io_handlers, verbose, dr
self.incfolder = self.datamodel.options['includeSubfolder']
self.expose_pod_members = self.datamodel.options["exposePODMembers"]
self.upstream_edm = upstream_edm
self.schema_version = self.datamodel.schema_version

self.clang_format = []
self.generated_files = []
Expand Down Expand Up @@ -264,7 +263,7 @@ def _process_component(self, name, component):
includes.update(component.get("ExtraCode", {}).get("includes", "").split('\n'))

component['includes'] = self._sort_includes(includes)
component['class'] = DataType(name, self.schema_version)
component['class'] = DataType(name)

self._fill_templates('Component', component)

Expand Down Expand Up @@ -411,7 +410,7 @@ def _preprocess_datatype(self, name, definition):
# Make a copy here and add the preprocessing steps to that such that the
# original definition can be left untouched
data = deepcopy(definition)
data['class'] = DataType(name, self.schema_version)
data['class'] = DataType(name)
data['includes_data'] = self._get_member_includes(definition["Members"])
self._preprocess_for_class(data)
self._preprocess_for_obj(data)
Expand All @@ -426,6 +425,7 @@ def _write_edm_def_file(self):
'package_name': self.package_name,
'edm_definition': model_encoder.encode(self.datamodel),
'incfolder': self.incfolder,
'schema_version': self.datamodel.schema_version,
}

self._write_file('DatamodelDefinition.h',
Expand Down Expand Up @@ -494,9 +494,9 @@ def _needs_include(self, classname) -> IncludeFrom:

def _create_selection_xml(self):
"""Create the selection xml that is necessary for ROOT I/O"""
data = {'components': [DataType(c, self.schema_version) for c in self.datamodel.components],
'datatypes': [DataType(d, self.schema_version) for d in self.datamodel.datatypes],
'old_schema_components': [DataType(d, self.schema_version) for d in
data = {'components': [DataType(c) for c in self.datamodel.components],
'datatypes': [DataType(d) for d in self.datamodel.datatypes],
'old_schema_components': [DataType(d) for d in
self.old_datamodels_datatypes | self.old_datamodels_components]}
self._write_file('selection.xml', self._eval_template('selection.xml.jinja2', data))

Expand Down
Loading