-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbuild_hdf5_database.py
executable file
·76 lines (62 loc) · 2.21 KB
/
build_hdf5_database.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
#!/usr/bin/env python3
# -*- encoding: utf-8 -*-
from argparse import ArgumentParser
from pathlib import Path
import logging as log
from rich.logging import RichHandler
from striptease import DataStorage
DEFAULT_DATABASE_NAME = "index.db"
def main():
parser = ArgumentParser(prog="build_hdf5_database.py")
parser.add_argument(
"--database-name",
"-d",
type=str,
default=DEFAULT_DATABASE_NAME,
help="""Name of the file that will contain the database.
The default is {default}""".format(
default=DEFAULT_DATABASE_NAME
),
)
parser.add_argument(
"--start-from-scratch",
action="store_true",
default=False,
help="""If true, any existing database will be removed and a new one will
be created from scratch. CAUTION: this might take a lot of time!""",
)
parser.add_argument(
"--update-hdf5",
default=False,
action="store_true",
help="""If specified, MJD timestamps that are not found in HDF5 files will
be saved back in the files. This requires write-access to the HDF5 files.""",
)
parser.add_argument("path", type=str, help="Path where the HDF5 files are stored")
args = parser.parse_args()
path = Path(args.path)
log.basicConfig(level="INFO", format="%(message)s", handlers=[RichHandler()])
log.info(f'looking for a database in "{path}" with name "{args.database_name}"')
db_path = path / args.database_name
if db_path.is_file():
log.info(f'an existing database has been found in "{path}"')
if args.start_from_scratch:
log.info(
'"--start-from-scratch" was specified, so I am removing the database'
)
db_path.unlink()
log.info(f'database "{db_path}" was removed from disk')
log.info(f"going to scan {path} for HDF5 files…")
ds = DataStorage(
path,
database_name=args.database_name,
update_database=True,
update_hdf5=args.update_hdf5,
)
log.info(
"the database has been updated and now contains {} entries".format(
len(ds.get_list_of_files())
)
)
if __name__ == "__main__":
main()