Skip to content

Commit

Permalink
Refactor the RNTupleWriter internals and reduce some code duplication (
Browse files Browse the repository at this point in the history
…#614)

* Refactor root utilities and reduce code duplication

- Lift common type defs into one header to not redefine them
- Keep all root related utilitie classes in one header

* Rename CollectionInfo to CategoryInfo to better convey intention

* Remove unnecessary member variables

* Add functionality to get all values for a type to GenericParameters

* Move some global state into category state

* Avoid unnecessary copy

* Fix include guards to conform to style guide

* Make sure to take lock long enough

* Switch to tuple for consistency

* Remove obsolete reset

* Comment usage of the mutex
  • Loading branch information
tmadlener authored Jun 11, 2024
1 parent c671edd commit ab6b2a4
Show file tree
Hide file tree
Showing 14 changed files with 169 additions and 158 deletions.
23 changes: 0 additions & 23 deletions include/podio/CollectionBranches.h

This file was deleted.

19 changes: 19 additions & 0 deletions include/podio/GenericParameters.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

#include <algorithm>
#include <iostream>
#include <iterator>
#include <map>
#include <memory>
#include <mutex>
Expand Down Expand Up @@ -105,6 +106,10 @@ class GenericParameters {
template <typename T, typename = EnableIfValidGenericDataType<T>>
std::vector<std::string> getKeys() const;

/// Get all the available values for a given type
template <typename T, typename = EnableIfValidGenericDataType<T>>
std::vector<std::vector<T>> getValues() const;

/// erase all elements
void clear() {
_intMap.clear();
Expand Down Expand Up @@ -243,5 +248,19 @@ std::vector<std::string> GenericParameters::getKeys() const {
return keys;
}

template <typename T, typename>
std::vector<std::vector<T>> GenericParameters::getValues() const {
std::vector<std::vector<T>> values;
auto& mtx = getMutex<T>();
const auto& map = getMap<T>();
{
// Lock to avoid concurrent changes to the map while we get the stored
// values
std::lock_guard lock{mtx};
values.reserve(map.size());
std::transform(map.begin(), map.end(), std::back_inserter(values), [](const auto& pair) { return pair.second; });
}
return values;
}
} // namespace podio
#endif
2 changes: 0 additions & 2 deletions include/podio/RNTupleReader.h
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
#ifndef PODIO_RNTUPLEREADER_H
#define PODIO_RNTUPLEREADER_H

#include "podio/CollectionBranches.h"
#include "podio/ICollectionProvider.h"
#include "podio/ROOTFrameData.h"
#include "podio/SchemaEvolution.h"
#include "podio/podioVersion.h"
Expand Down
56 changes: 28 additions & 28 deletions include/podio/RNTupleWriter.h
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
#ifndef PODIO_RNTUPLEWRITER_H
#define PODIO_RNTUPLEWRITER_H

#include "podio/CollectionBase.h"
#include "podio/Frame.h"
#include "podio/GenericParameters.h"
#include "podio/SchemaEvolution.h"
#include "podio/utilities/DatamodelRegistryIOHelpers.h"
#include "podio/utilities/RootHelpers.h"

#include "TFile.h"
#include <ROOT/RNTuple.hxx>
Expand Down Expand Up @@ -102,42 +102,42 @@ class RNTupleWriter {
checkConsistency(const std::vector<std::string>& collsToWrite, const std::string& category) const;

private:
template <typename T>
void fillParams(GenericParameters& params, ROOT::Experimental::REntry* entry);
std::unique_ptr<ROOT::Experimental::RNTupleModel>
createModels(const std::vector<root_utils::StoreCollection>& collections);

/// Helper struct to group all the necessary information for one category.
struct CategoryInfo {
std::unique_ptr<ROOT::Experimental::RNTupleWriter> writer{nullptr}; ///< The RNTupleWriter for this category

// The following are assumed to run in parallel!
std::vector<uint32_t> ids{}; ///< The ids of all collections
std::vector<std::string> names{}; ///< The names of all collections
std::vector<std::string> types{}; ///< The types of all collections
std::vector<short> subsetCollections{}; ///< The flags identifying the subcollections
std::vector<SchemaVersionT> schemaVersions{}; ///< The schema versions of all collections

// Storage for the keys & values of all the parameters of this category
// (resp. at least the current entry)
root_utils::ParamStorage<int> intParams{};
root_utils::ParamStorage<float> floatParams{};
root_utils::ParamStorage<double> doubleParams{};
root_utils::ParamStorage<std::string> stringParams{};
};
CategoryInfo& getCategoryInfo(const std::string& category);

using StoreCollection = std::pair<const std::string&, podio::CollectionBase*>;
std::unique_ptr<ROOT::Experimental::RNTupleModel> createModels(const std::vector<StoreCollection>& collections);
template <typename T>
void fillParams(const GenericParameters& params, CategoryInfo& catInfo, ROOT::Experimental::REntry* entry);

std::unique_ptr<ROOT::Experimental::RNTupleModel> m_metadata{};
std::unique_ptr<ROOT::Experimental::RNTupleWriter> m_metadataWriter{};
template <typename T>
root_utils::ParamStorage<T>& getParamStorage(CategoryInfo& catInfo);

std::unique_ptr<TFile> m_file{};

DatamodelDefinitionCollector m_datamodelCollector{};

struct CollectionInfo {
std::vector<uint32_t> id{};
std::vector<std::string> name{};
std::vector<std::string> type{};
std::vector<short> isSubsetCollection{};
std::vector<SchemaVersionT> schemaVersion{};
std::unique_ptr<ROOT::Experimental::RNTupleWriter> writer{nullptr};
};
CollectionInfo& getCategoryInfo(const std::string& category);

std::unordered_map<std::string, CollectionInfo> m_categories{};
std::unordered_map<std::string, CategoryInfo> m_categories{};

bool m_finished{false};

std::vector<std::string> m_intkeys{}, m_floatkeys{}, m_doublekeys{}, m_stringkeys{};

std::vector<std::vector<int>> m_intvalues{};
std::vector<std::vector<float>> m_floatvalues{};
std::vector<std::vector<double>> m_doublevalues{};
std::vector<std::vector<std::string>> m_stringvalues{};

template <typename T>
std::pair<std::vector<std::string>&, std::vector<std::vector<T>>&> getKeyValueVectors();
};

} // namespace podio
Expand Down
5 changes: 2 additions & 3 deletions include/podio/ROOTLegacyReader.h
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
#ifndef PODIO_ROOTLEGACYREADER_H
#define PODIO_ROOTLEGACYREADER_H

#include "podio/CollectionBranches.h"
#include "podio/ROOTFrameData.h"
#include "podio/podioVersion.h"
#include "podio/utilities/RootHelpers.h"

#include "TChain.h"

#include <iostream>
#include <memory>
#include <string>
#include <tuple>
Expand Down Expand Up @@ -114,7 +113,7 @@ class ROOTLegacyReader {
private:
std::pair<TTree*, unsigned> getLocalTreeAndEntry(const std::string& treename);

void createCollectionBranches(const std::vector<std::tuple<uint32_t, std::string, bool, unsigned int>>& collInfo);
void createCollectionBranches(const std::vector<root_utils::CollectionWriteInfoT>& collInfo);

podio::GenericParameters readEventMetaData();

Expand Down
3 changes: 1 addition & 2 deletions include/podio/ROOTReader.h
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
#ifndef PODIO_ROOTREADER_H
#define PODIO_ROOTREADER_H

#include "podio/CollectionBranches.h"
#include "podio/ROOTFrameData.h"
#include "podio/podioVersion.h"
#include "podio/utilities/DatamodelRegistryIOHelpers.h"
#include "podio/utilities/RootHelpers.h"

#include "TChain.h"

#include <iostream>
#include <memory>
#include <string>
#include <string_view>
Expand Down
22 changes: 8 additions & 14 deletions include/podio/ROOTWriter.h
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
#ifndef PODIO_ROOTWRITER_H
#define PODIO_ROOTWRITER_H

#include "podio/CollectionBranches.h"
#include "podio/CollectionIDTable.h"
#include "podio/utilities/DatamodelRegistryIOHelpers.h"
#include "podio/utilities/RootHelpers.h"

#include "TFile.h"

Expand Down Expand Up @@ -100,31 +100,25 @@ class ROOTWriter {
checkConsistency(const std::vector<std::string>& collsToWrite, const std::string& category) const;

private:
using StoreCollection = std::pair<const std::string&, podio::CollectionBase*>;

// collectionID, collectionType, subsetCollection
// @note same as in rootUtils.h private header!
using CollectionInfoT = std::tuple<uint32_t, std::string, bool, unsigned int>;

/// Helper struct to group together all necessary state to write / process a
/// given category. Created during the first writing of a category
struct CategoryInfo {
TTree* tree{nullptr}; ///< The TTree to which this category is written
std::vector<root_utils::CollectionBranches> branches{}; ///< The branches for this category
std::vector<CollectionInfoT> collInfo{}; ///< Collection info for this category
podio::CollectionIDTable idTable{}; ///< The collection id table for this category
std::vector<std::string> collsToWrite{}; ///< The collections to write for this category
TTree* tree{nullptr}; ///< The TTree to which this category is written
std::vector<root_utils::CollectionBranches> branches{}; ///< The branches for this category
std::vector<root_utils::CollectionWriteInfoT> collInfo{}; ///< Collection info for this category
podio::CollectionIDTable idTable{}; ///< The collection id table for this category
std::vector<std::string> collsToWrite{}; ///< The collections to write for this category
};

/// Initialize the branches for this category
void initBranches(CategoryInfo& catInfo, const std::vector<StoreCollection>& collections,
void initBranches(CategoryInfo& catInfo, const std::vector<root_utils::StoreCollection>& collections,
/*const*/ podio::GenericParameters& parameters);

/// Get the (potentially uninitialized category information for this category)
CategoryInfo& getCategoryInfo(const std::string& category);

static void resetBranches(std::vector<root_utils::CollectionBranches>& branches,
const std::vector<ROOTWriter::StoreCollection>& collections,
const std::vector<root_utils::StoreCollection>& collections,
/*const*/ podio::GenericParameters* parameters);

std::unique_ptr<TFile> m_file{nullptr}; ///< The storage file
Expand Down
44 changes: 44 additions & 0 deletions include/podio/utilities/RootHelpers.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#ifndef PODIO_UTILITIES_ROOTHELPERS_H
#define PODIO_UTILITIES_ROOTHELPERS_H

#include "TBranch.h"

#include <string>
#include <tuple>
#include <vector>

namespace podio {
class CollectionBase;

namespace root_utils {

// A collection of additional information that describes the collection: the
// collectionID, the collection (data) type, whether it is a subset
// collection, and its schema version
using CollectionWriteInfoT = std::tuple<uint32_t, std::string, bool, unsigned int>;
// for backwards compatibility
using CollectionInfoWithoutSchemaT = std::tuple<int, std::string, bool>;

/// A collection name and a base pointer grouped together for writing
using StoreCollection = std::tuple<const std::string&, podio::CollectionBase*>;

/// Small helper struct to collect all branches that are necessary to read or
/// write a collection. Needed to cache the branch pointers and avoid having to
/// get them from a TTree/TChain for every event.
struct CollectionBranches {
TBranch* data{nullptr};
std::vector<TBranch*> refs{};
std::vector<TBranch*> vecs{};
std::vector<std::string> refNames{}; ///< The names of the relation branches
std::vector<std::string> vecNames{}; ///< The names of the vector member branches
};

/// Pair of keys and values for one type of the ones that can be stored in
/// GenericParameters
template <typename T>
using ParamStorage = std::tuple<std::vector<std::string>, std::vector<std::vector<T>>>;

} // namespace root_utils
} // namespace podio

#endif // PODIO_UTILITIES_ROOTHELPERS_H
1 change: 1 addition & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ SET(root_headers
${PROJECT_SOURCE_DIR}/include/podio/ROOTLegacyReader.h
${PROJECT_SOURCE_DIR}/include/podio/ROOTWriter.h
${PROJECT_SOURCE_DIR}/include/podio/ROOTFrameData.h
${PROJECT_SOURCE_DIR}/include/podio/utilities/RootHelpers.h
)
if(ENABLE_RNTUPLE)
list(APPEND root_headers
Expand Down
Loading

0 comments on commit ab6b2a4

Please # to comment.