diff --git a/postprocess.py b/postprocess.py deleted file mode 100644 index e459235..0000000 --- a/postprocess.py +++ /dev/null @@ -1,252 +0,0 @@ -#!/usr/bin/env python3 - -from pathlib import Path -import warnings - -import numpy as np -from scipy.stats import sem - - -RESULTS_DIRECTORY = Path("data") -FINAL_DIRECTORY = Path("final") - - -def process_file(file_name): - """Reads the file_name using numpy.loadtxt with delimiter ' '. - - Parameters - ---------- - file_name : str - - Returns - ------- - np.ndarray - """ - - return np.loadtxt(file_name, delimiter=' ') - - -def read_files_via_numpy(file_list): - """Reads all files as listed in the file_list using numpy.loadtxt. Can - also use a multiprocessing Pool().map operation here but it runs into an - OS error, indicating too many simultaneous open files. - - Parameters - ---------- - file_list : list - A list of full paths. - - Returns - ------- - list - A list of all the loaded numpy arrays. - """ - - return list(map(process_file, file_list)) - - -def get_all_results_filenames(): - """Gets a list of all of the filenames in the provided path - - Parameters - ---------- - path : posix Path, optional - - Returns - ------- - list - """ - - fnames = list(Path(RESULTS_DIRECTORY).iterdir()) - return [str(f) for f in fnames] - - -def obs1(all_filenames, substring, save_path): - """Processes files that have returned energy information.""" - - files = [f for f in all_filenames if substring in f] - if len(files) == 0: - return - arrays = read_files_via_numpy(files) - # for arr in arrays: - # print(arr.shape) - arr = np.array(arrays) - grid = np.loadtxt("grids/energy.txt") - mu = arr.mean(axis=0) - sd = arr.std(axis=0) - stderr = sem(arr, axis=0) - final = np.array([grid, mu, sd, stderr]).T - np.savetxt(save_path, final, fmt='%.08e') - - -def emax(all_filenames, substring, save_path): - """Processes files that have returned energy information.""" - - files = [f for f in all_filenames if substring in f] - if len(files) == 0: - return - arr = np.array(read_files_via_numpy(files)) - grid = np.loadtxt("grids/pi2.txt") - mu = arr.mean(axis=0) - sd = arr.std(axis=0) - stderr = sem(arr, axis=0) - final = np.array([grid, mu, sd, stderr]).T - np.savetxt(save_path, final, fmt='%.08e') - - -def ridge(all_filenames, substring, save_path): - """Processes files that have returned energy information.""" - - files = [f for f in all_filenames if substring in f] - if len(files) == 0: - return - arr = np.array(read_files_via_numpy(files)) - grid = np.loadtxt("grids/energy.txt") - - # This is for the mean median - weights = arr[:, :, 2] - dat = arr[:, :, 0] - mu = np.ma.average(dat, axis=0, weights=weights) - var = np.ma.average((mu - dat)**2, axis=0, weights=weights) - - # This is for the mean mean - weights = arr[:, :, 2] - dat = arr[:, :, 1] - mu2 = np.ma.average(dat, axis=0, weights=weights) - var2 = np.ma.average((mu2 - dat)**2, axis=0, weights=weights) - - final = np.array([ - grid, - mu, - np.sqrt(var), - np.sqrt(var) / np.sqrt(weights.sum(axis=0)), - mu2, - np.sqrt(var2), - np.sqrt(var2) / np.sqrt(weights.sum(axis=0)) - ]).T - np.savetxt(save_path, final, fmt='%.08e') - - -def cache_size(all_filenames, substring, save_path): - - files = [f for f in all_filenames if substring in f] - if len(files) == 0: - return - arr = np.array(read_files_via_numpy(files)) - n = arr[0, 0] - arr = arr[:, 1:] / n # Percentage of cache filled - grid = np.loadtxt("grids/energy.txt") - mu = arr.mean(axis=0) - sd = arr.std(axis=0) - stderr = sem(arr, axis=0) - final = np.array([grid, mu, sd, stderr]).T - np.savetxt(save_path, final, fmt='%.08e') - - -def psi_config(all_filenames, substring, save_path): - """Process all of the config information.""" - - files = [f for f in all_filenames if substring in f] - if len(files) == 0: - return - files = read_files_via_numpy(files) - arr = np.array([np.array(f) for f in files if len(f) > 0]) - grid = np.array([2**ii for ii in range(arr.shape[1])]) - psi = arr.mean(axis=0) - psi = psi / psi.sum() - sd = arr.std(axis=0) - stderr = sem(arr, axis=0) - final = np.array([grid, psi, sd, stderr]).T - where = np.where(final[:, 1] != 0)[0] - final = final[where, :] - np.savetxt(save_path, final, fmt='%.08e') - - -def psi_basin(all_filenames, substring, save_path): - files = [f for f in all_filenames if substring in f] - if len(files) == 0: - return - arr = np.array(read_files_via_numpy(files)) - arr = arr[:, :, 0].squeeze() # ignore unique configs/basin - grid = np.array([2**ii for ii in range(arr.shape[1])]) - mu = arr.mean(axis=0) - sd = arr.std(axis=0) - stderr = sem(arr, axis=0) - final = np.array([grid, mu, sd, stderr]).T - where = np.where(final[:, 1] != 0)[0] - final = final[where, :] - np.savetxt(save_path, final, fmt='%.08e') - - -def Pi_config(all_filenames, substring, save_path): - """Process all the aging config results.""" - - files = [f for f in all_filenames if substring in f] - if len(files) == 0: - return - arr = np.array(read_files_via_numpy(files)) - eq = arr[:, :, 0] == arr[:, :, 1] - mu = eq.mean(axis=0) - sd = eq.std(axis=0) - stderr = sem(eq, axis=0) - grid = np.loadtxt("grids/pi1.txt") - final = np.array([grid, mu, sd, stderr]).T - np.savetxt(save_path, final, fmt='%.08e') - - -def Pi_basin(all_filenames, substring, save_path): - """""" - - files = [f for f in all_filenames if substring in f] - if len(files) == 0: - return - - with warnings.catch_warnings(): - warnings.filterwarnings('ignore') - arr = np.array(read_files_via_numpy(files)) - - mu = [] - sd = [] - stderr = [] - - for gridpoint in range(arr.shape[1]): - subarr = arr[:, gridpoint, :] - - # Indexes are vec idx 1, in basin 1, vec idx 2, in basin 2 - in_basin_1 = np.where(subarr[:, 1] == 1)[0] - eq = subarr[in_basin_1, 0] == subarr[in_basin_1, 2] - - mu.append(np.mean(eq)) - sd.append(np.std(eq)) - stderr.append(sem(eq)) - - grid = np.loadtxt("grids/pi1.txt") - final = np.array([grid, mu, sd, stderr]).T - np.savetxt(save_path, final, fmt='%.08e') - - -if __name__ == '__main__': - Path(FINAL_DIRECTORY).mkdir(exist_ok=True, parents=False) - fnames = get_all_results_filenames() - - # Handle all standard one-point observables - obs1(fnames, "_energy.txt", Path(FINAL_DIRECTORY) / "energy.txt") - obs1(fnames, "_energy_IS.txt", Path(FINAL_DIRECTORY) / "energy_IS.txt") - emax(fnames, "_emax.txt", Path(FINAL_DIRECTORY) / "emax.txt") - ridge(fnames, "_ridge_E.txt", Path(FINAL_DIRECTORY) / "ridge_E.txt") - ridge(fnames, "_ridge_S.txt", Path(FINAL_DIRECTORY) / "ridge_S.txt") - obs1(fnames, "_acceptance_rate.txt", Path(FINAL_DIRECTORY) / "acceptance_rate.txt") - obs1(fnames, "_inherent_structure_timings.txt", Path(FINAL_DIRECTORY) / "inherent_structure_timings.txt") - obs1(fnames, "_walltime_per_waitingtime.txt", Path(FINAL_DIRECTORY) / "walltime_per_waitingtime.txt") - - # All two point observables - psi_config(fnames, "_psi_config.txt", Path(FINAL_DIRECTORY) / "psi_config.txt") - psi_basin(fnames, "_psi_basin_E.txt", Path(FINAL_DIRECTORY) / "psi_basin_E.txt") - psi_basin(fnames, "_psi_basin_S.txt", Path(FINAL_DIRECTORY) / "psi_basin_S.txt") - - Pi_config(fnames, "_Pi_config.txt", Path(FINAL_DIRECTORY) / "Pi_config.txt") - Pi_basin(fnames, "_Pi_basin_E.txt", Path(FINAL_DIRECTORY) / "Pi_basin_E.txt") - Pi_basin(fnames, "_Pi_basin_S.txt", Path(FINAL_DIRECTORY) / "Pi_basin_S.txt") - - # ... - cache_size(fnames, "_cache_size.txt", Path(FINAL_DIRECTORY) / "cache_size.txt") diff --git a/src/obs2.cpp b/src/obs2.cpp index e36de85..83088d4 100644 --- a/src/obs2.cpp +++ b/src/obs2.cpp @@ -13,7 +13,7 @@ size_t _get_max_counter(const utils::SimulationParameters params) // let N be the log base 10 total timesteps // log2(10^N) = N log2(10) // Give the max counter a lot of space - return params.log10_N_timesteps * log2(10.0) + 10; + return params.log10_N_timesteps * log(10.0) + 10; } diff --git a/src/processing_utils.cpp b/src/processing_utils.cpp index df1f15b..9c948bc 100644 --- a/src/processing_utils.cpp +++ b/src/processing_utils.cpp @@ -228,6 +228,123 @@ json get_ridge_statistics(const std::vector results, const std::string key } +json get_aging_config_statistics(const std::vector results) +{ + + auto t_start = std::chrono::high_resolution_clock::now(); + + json j; + const size_t N = results.size(); + const double sqrt_N = sqrt(N); + const size_t M = results[0]["aging_config_pi1"].size(); + + std::vector> aging_pi1, aging_pi2; + for (auto &result : results) + { + aging_pi1.push_back(result["aging_config_pi1"]); + aging_pi2.push_back(result["aging_config_pi2"]); + } + + std::vector tmp; + std::vector means, medians, standard_deviations, standard_errors; + for (size_t jj=0; jj results) +{ + + auto t_start = std::chrono::high_resolution_clock::now(); + + json j; + const size_t N = results.size(); + const size_t M = results[0]["aging_basin_E_index_1"].size(); + + std::vector> aging_pi1, aging_pi2; + std::vector> in_basin; + for (auto &result : results) + { + aging_pi1.push_back(result["aging_basin_E_index_1"]); + aging_pi2.push_back(result["aging_basin_E_index_2"]); + in_basin.push_back(result["aging_basin_E_prev_state_in_basin_1"]); + } + + std::vector tmp; + std::vector means, medians, standard_deviations, standard_errors; + for (size_t jj=0; jj get_psi_grid(const size_t N) +{ + const double e = 2.71828182845904523536; + std::vector v; + for (size_t ii=0; ii