diff --git a/Data/setup_main_analysis.py b/Data/setup_main_analysis.py index e04c28c..54b4954 100644 --- a/Data/setup_main_analysis.py +++ b/Data/setup_main_analysis.py @@ -2,34 +2,58 @@ from __future__ import print_function from __future__ import division +import functools import hashlib import os -import functools import warnings -import urllib -warnings.filterwarnings("ignore") +from subprocess import check_output import numpy from ipywidgets import interact import pandas +from root_pandas import read_root +from scipy import stats as st +from matplotlib import pyplot as plt -rcParams['image.cmap'] = 'Blues' # change default colormap +warnings.filterwarnings("ignore") +rcParams['image.cmap'] = 'Blues' # change default colormap pandas.set_option('display.max_columns', None) -# TODO Prevent this happening twice -_hist = pandas.Series.hist -@functools.wraps(_hist) -def new_hist(self, *args, **kwargs): - kwargs['histtype'] = 'step' - kwargs['grid'] = False - return _hist(self, *args, **kwargs) -pandas.Series.hist = new_hist -from root_pandas import read_root +def check_hash(filename, fn_hash, block_size=65536): + if not os.path.isfile(filename): + return False + + hasher = hashlib.sha256() + with open(filename, 'rb') as afile: + buf = afile.read(block_size) + while len(buf) > 0: + hasher.update(buf) + buf = afile.read(block_size) + + if hasher.hexdigest() == fn_hash: + return True + else: + print(filename, 'did not match expected hash, retrying') + os.remove(filename) + return False + + +try: + # Ensure this doesn't run twice + new_hist +except NameError: + _hist = pandas.Series.hist + + @functools.wraps(_hist) + def new_hist(self, *args, **kwargs): + kwargs['histtype'] = 'step' + kwargs['grid'] = False + return _hist(self, *args, **kwargs) + + pandas.Series.hist = new_hist -from scipy import stats as st -from matplotlib import pyplot as plt def get_plot_func(data): def plot_hist(bins, x_min, x_max): @@ -38,26 +62,16 @@ def plot_hist(bins, x_min, x_max): plt.ylabel('Number of Events') return plot_hist -def download_data(filename, url, expected_hash): - while not os.path.isfile(filename): - try: - print('Downloading', filename) - urllib.urlretrieve (url, filename) - except Exception: - if not os.path.isfile(filename): - continue - _hash = hashlib.md5(open(filename, 'rb').read()).hexdigest() - if _hash != expected_hash: - print('Hash does not match for', filename, '- retrying') - os.remove(filename) - -download_data( - 'B2HHH_MagnetDown.root', - 'https://cernbox.cern.ch/index.php/s/gPi4yJkPZrSBenW/download', - '7901d0070a0c74a13755f6878f420e92' -) -download_data( - 'B2HHH_MagnetUp.root', - 'https://cernbox.cern.ch/index.php/s/8rckTojLRJuEfTF/download', - 'a2ccdd0441b9942f92929390c3b5221e' -) + +eos_server = 'root://eospublic.cern.ch/' +data_dir = '/eos/opendata/lhcb/AntimatterMatters2017/data/' +filenames = { + 'B2HHH_MagnetDown.root': 'b98651b24f825979053544c37010cf7ef9ce5c56ee62357c7e4ae2c392068379', + 'B2HHH_MagnetUp.root': 'c42ad9e47931e1404bf94ad82ea22a0acd10bc9cfbb58e77a6b0fff08ead7859', +} + +for fn, fn_hash in filenames.items(): + while not check_hash('Data/'+fn, fn_hash): + fn_url = eos_server + data_dir + fn + print('Downloading', fn_url) + check_output('xrdcp ' + fn_url + ' ./Data/', shell=True) diff --git a/LHCb_Open_Data_Project.ipynb b/LHCb_Open_Data_Project.ipynb index a0d867a..8cb6665 100644 --- a/LHCb_Open_Data_Project.ipynb +++ b/LHCb_Open_Data_Project.ipynb @@ -520,7 +520,7 @@ }, "outputs": [], "source": [ - "real_data = read_root(['./B2HHH_MagnetDown.root', './B2HHH_MagnetUp.root'], where=preselection)" + "real_data = read_root(['Data/B2HHH_MagnetDown.root', 'Data/B2HHH_MagnetUp.root'], where=preselection)" ] }, { diff --git a/README.md b/README.md index 6f48da1..f8c9464 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ to get started. If you have access to an [`everware`](https://github.com/everwar you can try out this repository simply by surfing there and then pasting a link to this repository or just by clicking this badge -[![run at everware](https://cdn.rawgit.com/chrisburr/opendata-project/master/Images/badge.svg)](https://everware.rep.school.yandex.net/hub/oauth_login?repourl=https://github.com/lhcb/opendata-project.git) +[![run at everware](https://cdn.rawgit.com/chrisburr/opendata-project/master/Images/badge.svg)](https://everware.ysda.yandex.net/hub/oauth_login?repourl=https://github.com/lhcb/opendata-project.git) If you've come here through everware then just click the "LHCb open data portal project"