Skip to content

Commit 6309593

Browse files
authored
AOD Merger (#5665)
1 parent ce1046d commit 6309593

2 files changed

Lines changed: 202 additions & 0 deletions

File tree

Analysis/Core/CMakeLists.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,11 @@ o2_target_root_dictionary(AnalysisCore
2424
include/AnalysisCore/HFConfigurables.h
2525
LINKDEF src/AnalysisCoreLinkDef.h)
2626

27+
o2_add_executable(merger
28+
COMPONENT_NAME aod
29+
SOURCES src/AODMerger.cxx
30+
PUBLIC_LINK_LIBRARIES ROOT::Hist ROOT::Core)
31+
2732
if(FastJet_FOUND)
2833
o2_add_library(AnalysisJets
2934
SOURCES src/JetFinder.cxx

Analysis/Core/src/AODMerger.cxx

Lines changed: 197 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,197 @@
1+
// Copyright CERN and copyright holders of ALICE O2. This software is
2+
// distributed under the terms of the GNU General Public License v3 (GPL
3+
// Version 3), copied verbatim in the file "COPYING".
4+
//
5+
// See http://alice-o2.web.cern.ch/license for full licensing information.
6+
//
7+
// In applying this license CERN does not waive the privileges and immunities
8+
// granted to it by virtue of its status as an Intergovernmental Organization
9+
// or submit itself to any jurisdiction.
10+
11+
#include <map>
12+
#include <fstream>
13+
#include <getopt.h>
14+
15+
#include "TFile.h"
16+
#include "TTree.h"
17+
#include "TList.h"
18+
#include "TDirectory.h"
19+
20+
// AOD merger with correct index rewriting
21+
// No need to know the datamodel because the branch names follow a canonical standard (identified by fIndex)
22+
int main(int argc, char* argv[])
23+
{
24+
std::string inputCollection("input.txt");
25+
std::string outputFileName("AO2D.root");
26+
long maxDirSize = 100000000;
27+
28+
int this_option_optind = optind ? optind : 1;
29+
int option_index = 0;
30+
static struct option long_options[] = {
31+
{"input", required_argument, 0, 0},
32+
{"output", required_argument, 0, 1},
33+
{"max-size", required_argument, 0, 2},
34+
{"help", no_argument, 0, 3},
35+
{0, 0, 0, 0}};
36+
37+
while (true) {
38+
int c = getopt_long(argc, argv, "", long_options, &option_index);
39+
if (c == -1) {
40+
break;
41+
} else if (c == 0) {
42+
inputCollection = optarg;
43+
} else if (c == 1) {
44+
outputFileName = optarg;
45+
} else if (c == 2) {
46+
maxDirSize = atol(optarg);
47+
} else if (c == 3) {
48+
printf("AOD merging tool. Options: \n");
49+
printf(" --input <inputfile.txt> Contains path to files to be merged. Default: %s\n", inputCollection.c_str());
50+
printf(" --output <outputfile.root> Target output ROOT file. Default: %s\n", outputFileName.c_str());
51+
printf(" --max-size <size in Bytes> Target directory size: %ld \n", maxDirSize);
52+
return -1;
53+
} else {
54+
return -2;
55+
}
56+
}
57+
58+
printf("AOD merger started with:\n");
59+
printf(" Input file: %s\n", inputCollection.c_str());
60+
printf(" Ouput file name: %s\n", outputFileName.c_str());
61+
printf(" Maximal folder size (uncompressed): %ld\n", maxDirSize);
62+
63+
std::map<std::string, TTree*> trees;
64+
std::map<std::string, int> offsets;
65+
66+
auto outputFile = TFile::Open(outputFileName.c_str(), "RECREATE", "", 501);
67+
TDirectory* outputDir = nullptr;
68+
long currentDirSize = 0;
69+
70+
std::ifstream in;
71+
in.open(inputCollection);
72+
TString line;
73+
while (in.good()) {
74+
in >> line;
75+
76+
if (line.Length() == 0)
77+
continue;
78+
79+
printf("Processing input file: %s\n", line.Data());
80+
81+
auto inputFile = TFile::Open(line);
82+
TList* keyList = inputFile->GetListOfKeys();
83+
keyList->Sort();
84+
85+
for (auto key1 : *keyList) {
86+
if (!((TObjString*)key1)->GetString().BeginsWith("DF_"))
87+
continue;
88+
89+
auto dfName = ((TObjString*)key1)->GetString().Data();
90+
91+
printf(" Processing folder %s\n", dfName);
92+
auto folder = (TDirectoryFile*)inputFile->Get(dfName);
93+
auto treeList = folder->GetListOfKeys();
94+
95+
for (auto key2 : *treeList) {
96+
auto treeName = ((TObjString*)key2)->GetString().Data();
97+
98+
printf(" Processing tree %s\n", treeName);
99+
auto inputTree = (TTree*)inputFile->Get(Form("%s/%s", dfName, treeName));
100+
101+
if (trees.count(treeName) == 0) {
102+
// clone tree
103+
// NOTE Basket size etc. are copied in CloneTree() ?
104+
if (!outputDir) {
105+
outputDir = outputFile->mkdir(dfName);
106+
currentDirSize = 0;
107+
printf("Writing to output folder %s\n", dfName);
108+
}
109+
outputDir->cd();
110+
auto outputTree = inputTree->CloneTree(-1, "fast");
111+
outputTree->SetAutoFlush(0);
112+
trees[treeName] = outputTree;
113+
currentDirSize += inputTree->GetTotBytes();
114+
} else {
115+
// append tree
116+
auto outputTree = trees[treeName];
117+
118+
outputTree->CopyAddresses(inputTree);
119+
120+
// register index columns
121+
std::vector<std::pair<int*, int>> indexList;
122+
TObjArray* branches = inputTree->GetListOfBranches();
123+
for (int i = 0; i < branches->GetEntriesFast(); ++i) {
124+
TBranch* br = (TBranch*)branches->UncheckedAt(i);
125+
TString branchName(br->GetName());
126+
if (branchName.BeginsWith("fIndex")) {
127+
// Syntax: fIndex<Table>[_<Suffix>]
128+
branchName.Remove(0, 6);
129+
if (branchName.First("_") > 0) {
130+
branchName.Remove(branchName.First("_"));
131+
}
132+
branchName.Remove(branchName.Length() - 1); // remove s
133+
branchName.ToLower();
134+
branchName = "O2" + branchName;
135+
136+
indexList.push_back({new int, offsets[branchName.Data()]});
137+
138+
inputTree->SetBranchAddress(br->GetName(), indexList.back().first);
139+
outputTree->SetBranchAddress(br->GetName(), indexList.back().first);
140+
}
141+
}
142+
143+
auto entries = inputTree->GetEntries();
144+
for (int i = 0; i < entries; i++) {
145+
inputTree->GetEntry(i);
146+
// shift index columns by offset
147+
for (const auto& idx : indexList) {
148+
*(idx.first) += idx.second;
149+
}
150+
int nbytes = outputTree->Fill();
151+
if (nbytes > 0) {
152+
currentDirSize += nbytes;
153+
}
154+
}
155+
156+
for (const auto& idx : indexList) {
157+
delete idx.first;
158+
}
159+
160+
delete inputTree;
161+
}
162+
}
163+
164+
// update offsets
165+
for (auto const& tree : trees) {
166+
offsets[tree.first] = tree.second->GetEntries();
167+
}
168+
169+
// check for not found tables
170+
for (auto const& offset : offsets) {
171+
if (trees.count(offset.first) == 0) {
172+
printf("ERROR: Index on %s but no tree found\n", offset.first.c_str());
173+
}
174+
}
175+
176+
if (currentDirSize > maxDirSize) {
177+
printf("Maximum size reached: %ld. Closing folder.\n", currentDirSize);
178+
for (auto const& tree : trees) {
179+
//Printf("Writing %s", tree.first.c_str());
180+
outputDir->cd();
181+
tree.second->Write();
182+
delete tree.second;
183+
}
184+
outputDir = nullptr;
185+
trees.clear();
186+
offsets.clear();
187+
}
188+
}
189+
inputFile->Close();
190+
}
191+
outputFile->Write();
192+
outputFile->Close();
193+
194+
printf("AOD merger finished.\n");
195+
196+
return 0;
197+
}

0 commit comments

Comments
 (0)