Skip to content

Commit

Permalink
feat: support input files on public EOS (#185)
Browse files Browse the repository at this point in the history
* add new config option to support reading input files on EOS via xrootd
  • Loading branch information
alexander-held authored Aug 4, 2023
1 parent 6409b67 commit 24a069b
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 13 deletions.
25 changes: 15 additions & 10 deletions analyses/cms-open-data-ttbar/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,35 +2,40 @@ global:

# ServiceX: ignore cache with repeated queries
SERVICEX_IGNORE_CACHE: false

# analysis facility: set to "coffea_casa" for coffea-casa environments, "EAF" for FNAL, "local" for local setups
AF: coffea_casa

benchmarking:

# chunk size to use
CHUNKSIZE: 500000


# read files from public EOS (thanks to the CMS DPOA team!)
# note that they are likely only available temporarily
# and not part of an official CMS Open Data release
INPUT_FROM_EOS: false

# metadata to propagate through to metrics
# "ssl-dev" allows for the switch to local data on /data
AF_NAME: coffea_casa

# currently has no effect
SYSTEMATICS: all

# does not do anything, only used for metric gathering (set to 2 for distributed coffea-casa)
CORES_PER_WORKER: 2

# scaling for local setups with FuturesExecutor
NUM_CORES: 4

# only I/O, all other processing disabled
DISABLE_PROCESSING: false

# read additional branches (only with DISABLE_PROCESSING = True)
# acceptable values are 4.1, 15, 25, 50 (corresponding to % of file read), 4.1% corresponds to the standard branches used in the notebook
IO_FILE_PERCENT: '4.1'

# nanoAOD branches that correspond to different values of IO_FILE_PERCENT
IO_BRANCHES:
'4.1':
Expand Down Expand Up @@ -79,4 +84,4 @@ benchmarking:
- LHEPart_mass
- Jet_qgl
- Jet_muonSubtrFactor
- Jet_puIdDisc
- Jet_puIdDisc
7 changes: 6 additions & 1 deletion analyses/cms-open-data-ttbar/ttbar_analysis_pipeline.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -387,7 +387,12 @@
}
],
"source": [
"fileset = utils.construct_fileset(N_FILES_MAX_PER_SAMPLE, use_xcache=False, af_name=config[\"benchmarking\"][\"AF_NAME\"]) # local files on /data for ssl-dev\n",
"fileset = utils.construct_fileset(\n",
" N_FILES_MAX_PER_SAMPLE,\n",
" use_xcache=False,\n",
" af_name=config[\"benchmarking\"][\"AF_NAME\"],\n",
" input_from_eos=config[\"benchmarking\"][\"INPUT_FROM_EOS\"]\n",
" ) # local files on /data for ssl-dev as af_name\n",
"\n",
"print(f\"processes in fileset: {list(fileset.keys())}\")\n",
"print(f\"\\nexample of information in fileset:\\n{{\\n 'files': [{fileset['ttbar__nominal']['files'][0]}, ...],\")\n",
Expand Down
6 changes: 4 additions & 2 deletions analyses/cms-open-data-ttbar/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def set_style():
plt.rcParams['text.color'] = "222222"


def construct_fileset(n_files_max_per_sample, use_xcache=False, af_name=""):
def construct_fileset(n_files_max_per_sample, use_xcache=False, af_name="", input_from_eos=False):
# using https://atlas-groupdata.web.cern.ch/atlas-groupdata/dev/AnalysisTop/TopDataPreparation/XSection-MC15-13TeV.data
# for reference
# x-secs are in pb
Expand Down Expand Up @@ -80,9 +80,11 @@ def construct_fileset(n_files_max_per_sample, use_xcache=False, af_name=""):
file_paths = [f["path"] for f in file_list]
if use_xcache:
file_paths = [f.replace("https://xrootd-local.unl.edu:1094", "root://red-xcache1.unl.edu") for f in file_paths]
if af_name == "ssl-dev":
elif af_name == "ssl-dev":
# point to local files on /data
file_paths = [f.replace("https://xrootd-local.unl.edu:1094//store/user/", "/data/alheld/") for f in file_paths]
elif input_from_eos:
file_paths = [f.replace("https://xrootd-local.unl.edu:1094//store/user/AGC/nanoAOD", "root://eospublic.cern.ch//eos/opendata/cms/upload/agc/1.0.0/") for f in file_paths]
nevts_total = sum([f["nevts"] for f in file_list])
metadata = {"process": process, "variation": variation, "nevts": nevts_total, "xsec": xsec_info[process]}
fileset.update({f"{process}__{variation}": {"files": file_paths, "metadata": metadata}})
Expand Down

0 comments on commit 24a069b

Please # to comment.