From f23669878ea9d87a4929d483217f4a61959fd9ea Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Sun, 15 Dec 2024 19:00:44 +0100 Subject: [PATCH 01/25] Updated dataset, including the following changes: - Allow to add other types of entries to the triplestore that are not datasets. Ex: samples, models, instruments, people, projects... - Renamed list_data_iris() to search_iris(). It can now be use to search for all types of entries. - Renamed prepare() to as_jsonld() and made it part of the public API --- tests/dataset/test_dataset.py | 14 ++-- tests/input/semdata.yaml | 12 ++-- tripper/context/0.2/context.json | 4 +- tripper/dataset/__init__.py | 3 +- tripper/dataset/dataset.py | 120 +++++++++++++++++++------------ 5 files changed, 91 insertions(+), 62 deletions(-) diff --git a/tests/dataset/test_dataset.py b/tests/dataset/test_dataset.py index 4aa8fbdb..105db426 100644 --- a/tests/dataset/test_dataset.py +++ b/tests/dataset/test_dataset.py @@ -116,12 +116,7 @@ def test_datadoc(): # pylint: disable=too-many-statements from tripper import CHAMEO, DCAT, EMMO, OTEIO, Triplestore - from tripper.dataset import ( - list_dataset_iris, - load_dict, - save_datadoc, - save_dict, - ) + from tripper.dataset import load_dict, save_datadoc, save_dict, search_iris pytest.importorskip("dlite") pytest.importorskip("rdflib") @@ -188,20 +183,19 @@ def test_datadoc(): # Test searching the triplestore SAMPLE = ts.namespaces["sample"] - datasets = list_dataset_iris(ts) + datasets = search_iris(ts) named_datasets = { SEMDATA["SEM_cement_batch2/77600-23-001/77600-23-001_5kV_400x_m001"], SEMDATA["SEM_cement_batch2/77600-23-001"], SEMDATA["SEM_cement_batch2"], - SAMPLE["SEM_cement_batch2/77600-23-001"], } assert not named_datasets.difference(datasets) - assert set(list_dataset_iris(ts, creator="Sigurd Wenner")) == { + assert set(search_iris(ts, creator="Sigurd Wenner")) == { SEMDATA["SEM_cement_batch2/77600-23-001/77600-23-001_5kV_400x_m001"], SEMDATA["SEM_cement_batch2/77600-23-001"], SEMDATA["SEM_cement_batch2"], } - assert set(list_dataset_iris(ts, _type=CHAMEO.Sample)) == { + assert set(search_iris(ts, type=CHAMEO.Sample)) == { SAMPLE["SEM_cement_batch2/77600-23-001"], } diff --git a/tests/input/semdata.yaml b/tests/input/semdata.yaml index 0e99919f..2d1da201 100644 --- a/tests/input/semdata.yaml +++ b/tests/input/semdata.yaml @@ -1,4 +1,5 @@ --- + # This extends the list of prefixes that are already defined in the context prefixes: sem: https://w3id.com/emmo/domain/sem/0.1# @@ -62,10 +63,6 @@ datasets: downloadURL: sftp://nas.aimen.es/P_MATCHMAKER_SHARE_SINTEF/SEM_cement_batch2 mediaType: inode/directory - - "@id": sample:SEM_cement_batch2/77600-23-001 - "@type": chameo:Sample - title: Series for SEM images for sample 77600-23-001. - parsers: - "@id": parser:sem_hitachi @@ -81,3 +78,10 @@ generators: generatorType: application/vnd.dlite-generate configuration: driver: hitachi + + +# Other entities, like samples, instruments, persons, models etc... +other_entries: + - "@id": sample:SEM_cement_batch2/77600-23-001 + "@type": chameo:Sample + title: Series for SEM images for sample 77600-23-001. diff --git a/tripper/context/0.2/context.json b/tripper/context/0.2/context.json index d5903ba9..3f658c0d 100644 --- a/tripper/context/0.2/context.json +++ b/tripper/context/0.2/context.json @@ -32,8 +32,8 @@ "hasCurrentVersion": "dcat:hasCurrentVersion", "hasVersion": "dcat:hasVersion", "inSeries": { - "@id" : "dcat:inSeries", - "@type" : "@id" + "@id": "dcat:inSeries", + "@type": "@id" }, "keyword": "dcat:keyword", "landingPage": "dcat:landingPage", diff --git a/tripper/dataset/__init__.py b/tripper/dataset/__init__.py index 0a3a5088..d6435b8d 100644 --- a/tripper/dataset/__init__.py +++ b/tripper/dataset/__init__.py @@ -2,12 +2,13 @@ from .dataaccess import load, save from .dataset import ( + as_jsonld, get_jsonld_context, get_partial_pipeline, get_prefixes, - list_dataset_iris, load_dict, read_datadoc, save_datadoc, save_dict, + search_iris, ) diff --git a/tripper/dataset/dataset.py b/tripper/dataset/dataset.py index 0387328d..ba4a0fde 100644 --- a/tripper/dataset/dataset.py +++ b/tripper/dataset/dataset.py @@ -9,12 +9,13 @@ - `save_datadoc()`: Save documentation from YAML file to the triplestore. Functions for searching the triplestore: - - `list_dataset_iris()`: Get IRIs of matching datasets. + - `search_iris()`: Get IRIs of matching entries in the triplestore. Functions for working with the dict-representation: - `read_datadoc()`: Read documentation from YAML file and return it as dict. - `save_dict()`: Save dict documentation to the triplestore. - `load_dict()`: Load dict documentation from the triplestore. + - `as_jsonld()`: Return the dict as JSON-LD (represented as a Python dict) Functions for interaction with OTEAPI: - `get_partial_pipeline()`: Returns a OTELib partial pipeline. @@ -28,7 +29,6 @@ """ -# pylint: enable=line-too-long # pylint: disable=invalid-name,redefined-builtin,import-outside-toplevel import functools import io @@ -41,7 +41,7 @@ import requests import yaml # type: ignore -from tripper import DCAT, EMMO, OTEIO, RDF, Triplestore +from tripper import DCAT, EMMO, OTEIO, OWL, RDF, Triplestore from tripper.utils import AttrDict, as_python if TYPE_CHECKING: # pragma: no cover @@ -90,6 +90,12 @@ "datadoc_label": "datasets", "@type": [DCAT.Dataset, EMMO.DataSet], }, + "entry": { + # General datacatalog entry that is not one of the above + # Ex: samples, instruments, models, people, projects, ... + "datadoc_label": "other_entries", # XXX better label? + "@type": OWL.NamedIndividual, + }, } @@ -120,14 +126,15 @@ def save_dict( Notes: The keys in `dct` and `kwargs` may be either properties defined in the - [JSON-LD context](https://raw.githubusercontent.com/EMMC-ASBL/oteapi-dlite/refs/heads/rdf-serialisation/oteapi_dlite/context/0.2/context.json) - or one of the following special keywords: + [JSON-LD context] or one of the following special keywords: - "@id": Dataset IRI. Must always be given. - "@type": IRI of the ontology class for this type of data. For datasets, it is typically used to refer to a specific subclass of `emmo:DataSet` that provides a semantic description of this dataset. + References: + [JSON-LD context]: https://raw.githubusercontent.com/EMMC-ASBL/oteapi-dlite/refs/heads/rdf-serialisation/oteapi_dlite/context/0.2/context.json """ if "@id" not in dct: raise ValueError("`dct` must have an '@id' key") @@ -136,7 +143,7 @@ def save_dict( if prefixes: all_prefixes.update(prefixes) - d = prepare(type=type, dct=dct, prefixes=all_prefixes, **kwargs) + d = as_jsonld(dct=dct, type=type, prefixes=all_prefixes, **kwargs) # Bind prefixes for prefix, ns in all_prefixes.items(): @@ -199,8 +206,7 @@ def save_extra_content(ts: Triplestore, dct: dict) -> None: except ( dlite.DLiteMissingInstanceError # pylint: disable=no-member ): - # __FIXME__: check session whether want to warn or re-reise - # in this case + # __FIXME__: check session whether to warn or re-reise warnings.warn(f"cannot load datamodel: {uri}") else: add_dataset(ts, dm) @@ -476,7 +482,7 @@ def save_datadoc( for spec in dicttypes.values(): label = spec["datadoc_label"] for dct in get(d, label): - dct = prepare(types[label], dct, prefixes=prefixes) + dct = as_jsonld(dct=dct, type=types[label], prefixes=prefixes) f = io.StringIO(json.dumps(dct)) with Triplestore(backend="rdflib") as ts2: ts2.parse(f, format="json-ld") @@ -505,52 +511,65 @@ def prepare_datadoc(datadoc: dict) -> dict: for type, spec in dicttypes.items(): label = spec["datadoc_label"] for i, dct in enumerate(get(d, label)): - d[label][i] = prepare(type, dct, prefixes=d.prefixes) + d[label][i] = as_jsonld(dct=dct, type=type, prefixes=d.prefixes) return d -def prepare( - type: str, dct: dict, prefixes: dict, _recur: bool = False, **kwargs +def as_jsonld( + dct: dict, + type: "Optional[str]" = "dataset", + prefixes: "Optional[dict]" = None, + _entryid: "Optional[str]" = None, + **kwargs, ) -> dict: - """Return an updated copy of dict `dct` with additional key-value - pairs needed for serialisation to RDF. + """Return an updated copy of dict `dct` as valid JSON-LD. Arguments: - type: Type of dict to prepare. Should be one of: "dataset", - "distribution", "parser" or "generator". dct: Dict to return an updated copy of. + type: Type of dict to prepare. Should either be one of the + pre-defined names: "dataset", "distribution", "accessService", + "parser" and "generator" or an IRI to a class in an ontology. + Defaults to "dataset". prefixes: Dict with prefixes in addition to those included in the JSON-LD context. Should map namespace prefixes to IRIs. - _recur: Whether this function is called recursively. Intended for - internal use. + _entryid: Id of base entry that is documented. Intended for + internal use only. kwargs: Additional keyword arguments to add to the returned dict. A leading underscore in a key will be translated to a - leading "@"-sign. For example, "@id=..." may be provided - as "_id=...". + leading "@"-sign. For example, "@id" or "@context" may be + provided as "_id" or "_context", respectively. + Returns: - An updated copy of `dct`. + An updated copy of `dct` as valid JSON-LD. """ # pylint: disable=too-many-branches - if type not in dicttypes: - raise ValueError( - f"`type` must be one of: {', '.join(dicttypes.keys())}. " - f"Got: '{type}'" - ) - spec = dicttypes[type] - d = AttrDict() - if not _recur: + if not _entryid: d["@context"] = CONTEXT_URL - add(d, "@type", spec["@type"]) # get type at top - d.update(dct) - add(d, "@type", spec["@type"]) # readd type if overwritten + + if type: + t = dicttypes[type]["@type"] if type in dicttypes else type + add(d, "@type", t) # get type at top + d.update(dct) + add(d, "@type", t) # readd type if overwritten + else: + d.update(dct) for k, v in kwargs.items(): key = f"@{k[1:]}" if re.match("^_([^_]|([^_].*[^_]))$", k) else k add(d, key, v) + if "@id" not in d and not _entryid: + raise ValueError("Missing '@id' in dict to document") + + if not _entryid: + _entryid = d["@id"] + + if "@type" not in d: + warnings.warn(f"Missing '@type' in dict to document: {_entryid}") + all_prefixes = get_prefixes() if prefixes: all_prefixes.update(prefixes) @@ -584,9 +603,11 @@ def prepare( if isinstance(e, str): v[i] = expand_iri(e, all_prefixes) elif isinstance(e, dict) and k in nested: - v[i] = prepare(k, e, prefixes=prefixes) + v[i] = as_jsonld( + e, k, _entryid=_entryid, prefixes=prefixes + ) elif isinstance(v, dict) and k in nested: - d[k] = prepare(k, v, prefixes=prefixes) + d[k] = as_jsonld(v, k, _entryid=_entryid, prefixes=prefixes) return d @@ -711,31 +732,42 @@ def get_partial_pipeline( return pipeline -def list_dataset_iris(ts: Triplestore, **kwargs): - """Return a list of IRIs for all datasets matching a set of criterias - specified by `kwargs`. +def search_iris(ts: Triplestore, type=DCAT.Dataset, **kwargs): + """Return a list of IRIs for all entries of the given type. + Additional matching criterias can be specified by `kwargs`. + Arguments: ts: Triplestore to search. + type: Search for entries that are individuals of the class with + this IRI. The default is `dcat:Dataset`. kwargs: Match criterias. Examples: List all dataset IRIs: - list_dataset_iris(ts) + search_iris(ts) List IRIs of all datasets with John Doe as `contactPoint`: - list_dataset_iris(ts, contactPoint="John Doe") + search_iris(ts, contactPoint="John Doe") + + List IRIs of all samples: - List IRIs of all datasets with John Doe as `contactPoint` AND that are + search_iris(ts, type=CHAMEO.Sample) + + List IRIs of all datasets with John Doe as `contactPoint` AND are measured on a given sample: - list_dataset_iris( + search_iris( ts, contactPoint="John Doe", fromSample=SAMPLE.batch2/sample3 ) """ crit = [] + + if type: + crit.append(f" ?iri rdf:type <{type}> .") + expanded = {v: k for k, v in get_shortnames().items()} for k, v in kwargs.items(): key = f"@{k[1:]}" if k.startswith("_") else k @@ -748,14 +780,12 @@ def list_dataset_iris(ts: Triplestore, **kwargs): ) else: value = v - crit.append(f" ?dataset <{predicate}> {value} .") + crit.append(f" ?iri <{predicate}> {value} .") criterias = "\n".join(crit) query = f""" PREFIX rdf: <{RDF}> - PREFIX dcat: <{DCAT}> - SELECT ?dataset + SELECT ?iri WHERE {{ - ?dataset rdf:type dcat:Dataset . {criterias} }} """ From 94fa59a0788f49a33964aad14d0008b91ea4cf18 Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Mon, 16 Dec 2024 00:18:00 +0100 Subject: [PATCH 02/25] Added new TableDoc class providing a table interface for data documentation. --- docs/api_reference/dataset/tabledoc.md | 3 + pyproject.toml | 7 ++- tests/dataset/dataset_paths.py | 12 ++++ tests/dataset/test_dataaccess.py | 16 ++---- tests/dataset/test_dataset.py | 47 +++++++++------ tests/dataset/test_tabledoc.py | 79 ++++++++++++++++++++++++++ tripper/dataset/__init__.py | 1 + tripper/dataset/dataaccess.py | 4 +- tripper/dataset/dataset.py | 79 +++++++++++++++++++++++--- tripper/dataset/tabledoc.py | 68 ++++++++++++++++++++++ 10 files changed, 275 insertions(+), 41 deletions(-) create mode 100644 docs/api_reference/dataset/tabledoc.md create mode 100644 tests/dataset/dataset_paths.py create mode 100644 tests/dataset/test_tabledoc.py create mode 100644 tripper/dataset/tabledoc.py diff --git a/docs/api_reference/dataset/tabledoc.md b/docs/api_reference/dataset/tabledoc.md new file mode 100644 index 00000000..f3a73929 --- /dev/null +++ b/docs/api_reference/dataset/tabledoc.md @@ -0,0 +1,3 @@ +# tabledoc + +::: tripper.dataset.tabledoc diff --git a/pyproject.toml b/pyproject.toml index d5f7f94a..21196860 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -104,7 +104,8 @@ max-public-methods = 25 max-locals = 20 disable = [ "fixme", - "too-many-positional-arguments", + "invalid-name", + #"too-many-positional-arguments", ] good-names = [ # Default @@ -115,8 +116,8 @@ good-names = [ "s", "p", "o", # Namespaces "EX", - # dict, value, file, ... - "d", "v", "f", + # dict, value, file, keyword... + "d", "v", "f", "kw", ] [tool.pytest.ini_options] diff --git a/tests/dataset/dataset_paths.py b/tests/dataset/dataset_paths.py new file mode 100644 index 00000000..e84b2f47 --- /dev/null +++ b/tests/dataset/dataset_paths.py @@ -0,0 +1,12 @@ +"""Defines paths for tests. + +It defines some directories and some utility functions that can be used +with or without conftest. +""" + +from pathlib import Path + +testdir = Path(__file__).resolve().parent.parent +ontodir = testdir / "ontologies" +indir = testdir / "input" +outdir = testdir / "output" diff --git a/tests/dataset/test_dataaccess.py b/tests/dataset/test_dataaccess.py index c3a7b75d..bdc0ef45 100644 --- a/tests/dataset/test_dataaccess.py +++ b/tests/dataset/test_dataaccess.py @@ -2,18 +2,12 @@ # pylint: disable=invalid-name,too-many-locals,duplicate-code -from pathlib import Path - import pytest +from dataset_paths import outdir pytest.importorskip("yaml") pytest.importorskip("requests") -thisdir = Path(__file__).resolve().parent -testdir = thisdir.parent -inputdir = testdir / "input" -outputdir = testdir / "output" - # if True: def test_save_and_load(): @@ -38,7 +32,6 @@ def test_save_and_load(): # Test save dict save_dict( ts, - type="dataset", dct={ "@id": SEMDATA.img1, "distribution": { @@ -49,6 +42,7 @@ def test_save_and_load(): "format": "tiff", }, }, + type="dataset", ) newdistr = load_dict(ts, SEMDATA.img1) assert newdistr["@type"] == [DCAT.Dataset, EMMO.DataSet] @@ -57,12 +51,12 @@ def test_save_and_load(): save_dict( ts, - type="generator", dct={ "@id": GEN.sem_hitachi, "generatorType": "application/vnd.dlite-generate", "configuration": {"driver": "hitachi"}, }, + type="generator", ) # Test load dataset (this downloads an actual image from github) @@ -70,7 +64,7 @@ def test_save_and_load(): assert len(data) == 53502 # Test save dataset with anonymous distribution - newfile = outputdir / "newimage.tiff" + newfile = outdir / "newimage.tiff" newfile.unlink(missing_ok=True) buf = b"some bytes..." save( @@ -94,7 +88,7 @@ def test_save_and_load(): assert newimage.distribution.downloadURL == f"file:{newfile}" # Test save dataset with named distribution - newfile2 = outputdir / "newimage.png" + newfile2 = outdir / "newimage.png" newfile2.unlink(missing_ok=True) save( ts, diff --git a/tests/dataset/test_dataset.py b/tests/dataset/test_dataset.py index 105db426..4e43cd10 100644 --- a/tests/dataset/test_dataset.py +++ b/tests/dataset/test_dataset.py @@ -2,18 +2,12 @@ # pylint: disable=invalid-name,too-many-locals,duplicate-code -from pathlib import Path - import pytest +from dataset_paths import indir pytest.importorskip("yaml") pytest.importorskip("requests") -thisdir = Path(__file__).resolve().parent -testdir = thisdir.parent -inputdir = testdir / "input" -outputdir = testdir / "output" - def test_get_jsonld_context(): """Test get_jsonld_context().""" @@ -73,12 +67,31 @@ def test_add(): from tripper.dataset.dataset import add d = {} - add(d, "a", 1) - add(d, "b", 1) - add(d, "b", 1) - add(d, "a", 2) - add(d, "a", 1) - assert d == {"a": [1, 2], "b": 1} + add(d, "a", "1") + add(d, "b", "1") + add(d, "b", "1") + add(d, "a", "2") + add(d, "a", "1") + add(d, "a", {"c": "3"}) + assert d == {"a": ["1", "2", {"c": "3"}], "b": "1"} + + +def test_addnested(): + """Test help-function addnested().""" + from tripper.dataset.dataset import addnested + from tripper.utils import AttrDict + + d = AttrDict() + addnested(d, "a.b", "1") + assert d == {"a": {"b": "1"}} + + addnested(d, "a", "2") + assert d == {"a": ["2", {"b": "1"}]} + + addnested(d, "a.b.c", {"d": "3"}) + assert d.a[0] == "2" + assert d.a[1].b[1].c == {"d": "3"} + assert d == {"a": ["2", {"b": ["1", {"c": {"d": "3"}}]}]} def test_get(): @@ -124,7 +137,7 @@ def test_datadoc(): ts = Triplestore("rdflib") # Load data documentation into triplestore - datadoc = save_datadoc(ts, inputdir / "semdata.yaml") + datadoc = save_datadoc(ts, indir / "semdata.yaml") assert isinstance(datadoc, dict) assert "@context" in datadoc @@ -167,8 +180,8 @@ def test_datadoc(): # Test save dict save_dict( ts, - "distribution", - {"@id": SEMDATA.newdistr, "format": "txt"}, + dct={"@id": SEMDATA.newdistr, "format": "txt"}, + type="distribution", prefixes={"echem": "https://w3id.org/emmo/domain/electrochemistry"}, ) newdistr = load_dict(ts, SEMDATA.newdistr) @@ -210,7 +223,7 @@ def test_pipeline(): # Prepare triplestore ts = Triplestore("rdflib") - save_datadoc(ts, inputdir / "semdata.yaml") + save_datadoc(ts, indir / "semdata.yaml") SEMDATA = ts.namespaces["semdata"] diff --git a/tests/dataset/test_tabledoc.py b/tests/dataset/test_tabledoc.py new file mode 100644 index 00000000..77e81dcc --- /dev/null +++ b/tests/dataset/test_tabledoc.py @@ -0,0 +1,79 @@ +"""Test the dataset module.""" + +from tripper import Triplestore +from tripper.dataset import TableDoc + + +# if True: +def test_as_dicts(): + """Test the as_dicts() method.""" + + from tripper import DCAT, EMMO, Namespace + + ONTO = Namespace("http:/example.com/onto#") + DS = Namespace("http:/example.com/datasets#") + + td = TableDoc( + header=[ + "@id", + "@type", + "@type", + "inSeries", + "distribution.downloadURL", + ], + data=[ + ("ds:s1", "onto:T1", "onto:T2", None, "file:///data/"), + ("ds:d1", "onto:T1", None, "ds:s1", "file:///data/d1.txt"), + ("ds:d2", "onto:T2", None, "ds:s1", "file:///data/d2.txt"), + ], + prefixes={ + "onto": "http:/example.com/onto#", + "ds": "http:/example.com/datasets#", + }, + # context={ + # "ds": "http:/example.com/datasets#", + # }, + ) + + s1, d1, d2 = td.asdicts() # pylint: disable=unbalanced-tuple-unpacking + + assert s1["@id"] == DS.s1 + assert set(s1["@type"]) == { + DCAT.Dataset, + EMMO.DataSet, + ONTO.T1, + ONTO.T2, + } + assert "inSeries" not in s1 + assert s1.distribution == { + "@type": DCAT.Distribution, + "downloadURL": "file:///data/", + } + + assert d1["@id"] == DS.d1 + assert set(d1["@type"]) == { + DCAT.Dataset, + EMMO.DataSet, + ONTO.T1, + } + assert d1.inSeries == DS.s1 + assert d1.distribution == { + "@type": DCAT.Distribution, + "downloadURL": "file:///data/d1.txt", + } + + assert d2["@id"] == DS.d2 + assert set(d2["@type"]) == { + DCAT.Dataset, + EMMO.DataSet, + ONTO.T2, + } + assert d2.inSeries == DS.s1 + assert d2.distribution == { + "@type": DCAT.Distribution, + "downloadURL": "file:///data/d2.txt", + } + + ts = Triplestore(backend="rdflib") + td.save(ts) + print(ts.serialize()) diff --git a/tripper/dataset/__init__.py b/tripper/dataset/__init__.py index d6435b8d..e0b53d58 100644 --- a/tripper/dataset/__init__.py +++ b/tripper/dataset/__init__.py @@ -12,3 +12,4 @@ save_dict, search_iris, ) +from .tabledoc import TableDoc diff --git a/tripper/dataset/dataaccess.py b/tripper/dataset/dataaccess.py index 672b2a59..3e248e36 100644 --- a/tripper/dataset/dataaccess.py +++ b/tripper/dataset/dataaccess.py @@ -175,9 +175,9 @@ def save( # Update triplestore ts.add_triples(triples) if save_dataset: - save_dict(ts, "dataset", dataset, prefixes=prefixes) + save_dict(ts, dataset, "dataset", prefixes=prefixes) elif save_distribution: - save_dict(ts, "distribution", distribution, prefixes=prefixes) + save_dict(ts, distribution, "distribution", prefixes=prefixes) return dataset["@id"] diff --git a/tripper/dataset/dataset.py b/tripper/dataset/dataset.py index ba4a0fde..2bb5e6a1 100644 --- a/tripper/dataset/dataset.py +++ b/tripper/dataset/dataset.py @@ -101,8 +101,8 @@ def save_dict( ts: Triplestore, - type: str, dct: dict, + type: str = "dataset", prefixes: "Optional[dict]" = None, **kwargs, ) -> dict: @@ -111,9 +111,11 @@ def save_dict( Arguments: ts: Triplestore to save to. - type: Type of dict to save. Should be one of: "dataset", - "distribution", "parser" or "generator". dct: Dict with data to save. + type: Type of data to save. Should either be one of the + pre-defined names: "dataset", "distribution", "accessService", + "parser" and "generator" or an IRI to a class in an ontology. + Defaults to "dataset". prefixes: Dict with prefixes in addition to those included in the JSON-LD context. Should map namespace prefixes to IRIs. kwargs: Additional keyword arguments to add to the returned dict. @@ -333,6 +335,9 @@ def get_values( return values +# TODO: update this function to take an initial argument `context`, +# which can be an URL (string), dict with raw context or a list of +# strings or dicts. @cache # type: ignore def get_jsonld_context(timeout: float = 5, fromfile: bool = True) -> dict: """Returns the JSON-LD context as a dict. @@ -355,6 +360,8 @@ def get_jsonld_context(timeout: float = 5, fromfile: bool = True) -> dict: return context +# TODO: update this to take an initial argument `context`. +# See get_jsonld_context() def get_prefixes(timeout: float = 5) -> dict: """Loads the JSON-LD context and returns a dict mapping prefixes to their namespace URL.""" @@ -367,6 +374,8 @@ def get_prefixes(timeout: float = 5) -> dict: return prefixes +# TODO: update this to take an initial argument `context`. +# See get_jsonld_context() def get_shortnames(timeout: float = 5) -> dict: """Loads the JSON-LD context and returns a dict mapping IRIs to their short names defined in the context.""" @@ -407,9 +416,61 @@ def add(d: dict, key: str, value: "Any") -> None: d[key] = value else: klst = d[key] if isinstance(d[key], list) else [d[key]] - vlst = value if isinstance(value, list) else [value] - v = list(set(klst).union(vlst)) - d[key] = v[0] if len(v) == 1 else sorted(v) + if isinstance(value, dict): + v = klst if value in klst else klst + [value] + else: + vlst = value if isinstance(value, list) else [value] + try: + v = list(set(klst).union(vlst)) + except TypeError: # klst contains unhashable dicts + v = klst + [x for x in vlst if x not in klst] + d[key] = ( + v[0] + if len(v) == 1 + else sorted( + # Sort dicts at end, by representing them with a huge + # unicode character + v, + key=lambda x: "\uffff" if isinstance(x, dict) else x, + ) + ) + + +def addnested(d: "Union[dict, list]", key: str, value: "Any"): + """Like add(), but allows `key` to be a dot-separated list of sub-keys. + + Each sub-key will be added to `d` as a corresponding sub-dict. + + Example: + + >>> d = {} + >>> addnested(d, "a.b.c", "val") + {'a': {'b': {'c': 'val'}}} + + """ + if "." in key: + first, rest = key.split(".", 1) + if isinstance(d, list): + for ele in d: + if isinstance(ele, dict): + addnested(ele, key, value) + break + else: + d.append(addnested({}, key, value)) + elif first in d and isinstance(d[first], (dict, list)): + addnested(d[first], rest, value) + else: + addnested(d, first, addnested(AttrDict(), rest, value)) + elif isinstance(d, list): + for ele in d: + if isinstance(ele, dict): + add(ele, key, value) + break + else: + d.append({key: value}) + else: + add(d, key, value) + return d def get( @@ -516,6 +577,8 @@ def prepare_datadoc(datadoc: dict) -> dict: return d +# TODO: update this function to correctly handle multiple contexts +# provided with the `_context` keyword argument. def as_jsonld( dct: dict, type: "Optional[str]" = "dataset", @@ -526,8 +589,8 @@ def as_jsonld( """Return an updated copy of dict `dct` as valid JSON-LD. Arguments: - dct: Dict to return an updated copy of. - type: Type of dict to prepare. Should either be one of the + dct: Dict with data documentation represent as JSON-LD. + type: Type of data to document. Should either be one of the pre-defined names: "dataset", "distribution", "accessService", "parser" and "generator" or an IRI to a class in an ontology. Defaults to "dataset". diff --git a/tripper/dataset/tabledoc.py b/tripper/dataset/tabledoc.py new file mode 100644 index 00000000..b1a8ef51 --- /dev/null +++ b/tripper/dataset/tabledoc.py @@ -0,0 +1,68 @@ +"""Basic interface for tabular documentation of datasets.""" + +from typing import TYPE_CHECKING + +from tripper import Triplestore +from tripper.dataset.dataset import addnested, as_jsonld, save_dict +from tripper.utils import AttrDict + +if TYPE_CHECKING: # pragma: no cover + from typing import List, Optional, Sequence, Union + + +class TableDoc: + """Representation of tabular documentation of datasets. + + Arguments: + header: Sequence of column header labels. Nested data can + be represented by dot-separated label strings (e.g. + "distribution.downloadURL") + data: Sequence of rows of data. Each row documents an entry. + type: Type of data to save (applies to all rows). Should + either be one of the pre-defined names: "dataset", + "distribution", "accessService", "parser" and "generator" + or an IRI to a class in an ontology. Defaults to + "dataset". + prefixes: Dict with prefixes in addition to those included in the + JSON-LD context. Should map namespace prefixes to IRIs. + context: Dict with user-defined JSON-LD context. + + """ + + # pylint: disable=redefined-builtin,too-few-public-methods + + def __init__( + self, + header: "Sequence[str]", + data: "Sequence[Sequence[str]]", + type: "Optional[str]" = "dataset", + prefixes: "Optional[dict]" = None, + context: "Optional[Union[dict, list]]" = None, + ): + self.header = header + self.data = data + self.type = type + self.prefixes = prefixes + self.context = context + + def asdicts(self) -> "List[dict]": + """Return the table as a list of dicts.""" + kw = {"_context": self.context} if self.context else {} + + results = [] + for row in self.data: + d = AttrDict() + for i, colname in enumerate(self.header): + cell = row[i] + if cell: + addnested(d, colname, cell) + jsonld = as_jsonld( + d, type=self.type, prefixes=self.prefixes, **kw # type: ignore + ) + results.append(jsonld) + return results + + def save(self, ts: Triplestore) -> None: + """Save tabular datadocumentation to triplestore.""" + for d in self.asdicts(): + save_dict(ts, d) From 028054fad4ae6a7e0055ad746b90f200c807e965 Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Mon, 16 Dec 2024 00:39:21 +0100 Subject: [PATCH 03/25] Import indir/outdir inside test functions --- tests/dataset/test_dataaccess.py | 3 ++- tests/dataset/test_dataset.py | 5 ++++- tripper/dataset/dataset.py | 3 ++- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/tests/dataset/test_dataaccess.py b/tests/dataset/test_dataaccess.py index bdc0ef45..0cbc7727 100644 --- a/tests/dataset/test_dataaccess.py +++ b/tests/dataset/test_dataaccess.py @@ -3,7 +3,6 @@ # pylint: disable=invalid-name,too-many-locals,duplicate-code import pytest -from dataset_paths import outdir pytest.importorskip("yaml") pytest.importorskip("requests") @@ -14,6 +13,8 @@ def test_save_and_load(): """Test save() and load().""" # pylint: disable=too-many-statements + from dataset_paths import outdir + from tripper import DCAT, DCTERMS, EMMO, Triplestore from tripper.dataset import load, load_dict, save, save_dict diff --git a/tests/dataset/test_dataset.py b/tests/dataset/test_dataset.py index 4e43cd10..1a0cffbd 100644 --- a/tests/dataset/test_dataset.py +++ b/tests/dataset/test_dataset.py @@ -3,7 +3,6 @@ # pylint: disable=invalid-name,too-many-locals,duplicate-code import pytest -from dataset_paths import indir pytest.importorskip("yaml") pytest.importorskip("requests") @@ -128,6 +127,8 @@ def test_datadoc(): """Test save_datadoc() and load_dict()/save_dict().""" # pylint: disable=too-many-statements + from dataset_paths import indir + from tripper import CHAMEO, DCAT, EMMO, OTEIO, Triplestore from tripper.dataset import load_dict, save_datadoc, save_dict, search_iris @@ -219,6 +220,8 @@ def test_pipeline(): from tripper import Triplestore otelib = pytest.importorskip("otelib") + from dataset_paths import indir + from tripper.dataset import get_partial_pipeline, save_datadoc # Prepare triplestore diff --git a/tripper/dataset/dataset.py b/tripper/dataset/dataset.py index 2bb5e6a1..13fdb935 100644 --- a/tripper/dataset/dataset.py +++ b/tripper/dataset/dataset.py @@ -445,7 +445,8 @@ def addnested(d: "Union[dict, list]", key: str, value: "Any"): >>> d = {} >>> addnested(d, "a.b.c", "val") - {'a': {'b': {'c': 'val'}}} + >>> d == {'a': {'b': {'c': 'val'}}} + True """ if "." in key: From ef5239ad7e96f586c5ce970cec834cf7881864ff Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Mon, 16 Dec 2024 00:56:02 +0100 Subject: [PATCH 04/25] Fixed doctest issue --- tripper/dataset/dataset.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/tripper/dataset/dataset.py b/tripper/dataset/dataset.py index 13fdb935..8db5b497 100644 --- a/tripper/dataset/dataset.py +++ b/tripper/dataset/dataset.py @@ -38,9 +38,6 @@ from pathlib import Path from typing import TYPE_CHECKING -import requests -import yaml # type: ignore - from tripper import DCAT, EMMO, OTEIO, OWL, RDF, Triplestore from tripper.utils import AttrDict, as_python @@ -171,6 +168,8 @@ def save_extra_content(ts: Triplestore, dct: dict) -> None: - data models (require that DLite is installed) """ + import requests + # Save statements and mappings statements = get_values(dct, "statements") statements.extend(get_values(dct, "mappings")) @@ -351,6 +350,8 @@ def get_jsonld_context(timeout: float = 5, fromfile: bool = True) -> dict: fromfile: Whether to load the context from local file. """ + import requests + if fromfile: with open(CONTEXT_PATH[7:], "r", encoding="utf-8") as f: context = json.load(f)["@context"] @@ -436,16 +437,18 @@ def add(d: dict, key: str, value: "Any") -> None: ) -def addnested(d: "Union[dict, list]", key: str, value: "Any"): +def addnested( + d: "Union[dict, list]", key: str, value: "Any" +) -> "Union[dict, list]": """Like add(), but allows `key` to be a dot-separated list of sub-keys. + Returns the updated `d`. Each sub-key will be added to `d` as a corresponding sub-dict. Example: >>> d = {} - >>> addnested(d, "a.b.c", "val") - >>> d == {'a': {'b': {'c': 'val'}}} + >>> addnested(d, "a.b.c", "val") == {'a': {'b': {'c': 'val'}}} True """ @@ -508,6 +511,8 @@ def expand_iri(iri: str, prefixes: dict) -> str: def read_datadoc(filename: "Union[str, Path]") -> dict: """Read YAML data documentation and return it as a dict.""" + import yaml # type: ignore + with open(filename, "r", encoding="utf-8") as f: d = yaml.safe_load(f) return prepare_datadoc(d) From 331878a1756ed225d48fc3a2e0acdd567ccc4774 Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Mon, 16 Dec 2024 01:01:22 +0100 Subject: [PATCH 05/25] Skip test_tabledoc if rdflib isn't available --- tests/dataset/test_tabledoc.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/dataset/test_tabledoc.py b/tests/dataset/test_tabledoc.py index 77e81dcc..49902b69 100644 --- a/tests/dataset/test_tabledoc.py +++ b/tests/dataset/test_tabledoc.py @@ -1,5 +1,7 @@ """Test the dataset module.""" +import pytest + from tripper import Triplestore from tripper.dataset import TableDoc @@ -10,6 +12,8 @@ def test_as_dicts(): from tripper import DCAT, EMMO, Namespace + pytest.importorskip("rdflib") + ONTO = Namespace("http:/example.com/onto#") DS = Namespace("http:/example.com/datasets#") From 5fe9cf7c387a9fb3ccd02856c0878ef8dceb8eba Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Mon, 16 Dec 2024 01:04:02 +0100 Subject: [PATCH 06/25] More pylint fixes... --- tests/dataset/test_dataaccess.py | 2 +- tests/dataset/test_dataset.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/dataset/test_dataaccess.py b/tests/dataset/test_dataaccess.py index 0cbc7727..ecf98dba 100644 --- a/tests/dataset/test_dataaccess.py +++ b/tests/dataset/test_dataaccess.py @@ -13,7 +13,7 @@ def test_save_and_load(): """Test save() and load().""" # pylint: disable=too-many-statements - from dataset_paths import outdir + from dataset_paths import outdir # pytest: disable=import-error from tripper import DCAT, DCTERMS, EMMO, Triplestore from tripper.dataset import load, load_dict, save, save_dict diff --git a/tests/dataset/test_dataset.py b/tests/dataset/test_dataset.py index 1a0cffbd..4fb5ec09 100644 --- a/tests/dataset/test_dataset.py +++ b/tests/dataset/test_dataset.py @@ -127,7 +127,7 @@ def test_datadoc(): """Test save_datadoc() and load_dict()/save_dict().""" # pylint: disable=too-many-statements - from dataset_paths import indir + from dataset_paths import indir # pytest: disable=import-error from tripper import CHAMEO, DCAT, EMMO, OTEIO, Triplestore from tripper.dataset import load_dict, save_datadoc, save_dict, search_iris @@ -220,7 +220,7 @@ def test_pipeline(): from tripper import Triplestore otelib = pytest.importorskip("otelib") - from dataset_paths import indir + from dataset_paths import indir # pytest: disable=import-error from tripper.dataset import get_partial_pipeline, save_datadoc From 4aaeed8551b56ece4c7c9af151ce3152a2779077 Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Mon, 16 Dec 2024 01:08:20 +0100 Subject: [PATCH 07/25] Placed importskip before importing EMMO --- tests/dataset/test_tabledoc.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/tests/dataset/test_tabledoc.py b/tests/dataset/test_tabledoc.py index 49902b69..4a1c0613 100644 --- a/tests/dataset/test_tabledoc.py +++ b/tests/dataset/test_tabledoc.py @@ -2,18 +2,16 @@ import pytest -from tripper import Triplestore -from tripper.dataset import TableDoc - # if True: def test_as_dicts(): """Test the as_dicts() method.""" - from tripper import DCAT, EMMO, Namespace - pytest.importorskip("rdflib") + from tripper import DCAT, EMMO, Namespace, Triplestore + from tripper.dataset import TableDoc + ONTO = Namespace("http:/example.com/onto#") DS = Namespace("http:/example.com/datasets#") From 0f21fbbde6f6b6eaaf6583145f403fb7f841c0a7 Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Mon, 16 Dec 2024 01:12:12 +0100 Subject: [PATCH 08/25] typo --- tests/dataset/test_dataaccess.py | 2 +- tests/dataset/test_dataset.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/dataset/test_dataaccess.py b/tests/dataset/test_dataaccess.py index ecf98dba..af058440 100644 --- a/tests/dataset/test_dataaccess.py +++ b/tests/dataset/test_dataaccess.py @@ -13,7 +13,7 @@ def test_save_and_load(): """Test save() and load().""" # pylint: disable=too-many-statements - from dataset_paths import outdir # pytest: disable=import-error + from dataset_paths import outdir # pylint: disable=import-error from tripper import DCAT, DCTERMS, EMMO, Triplestore from tripper.dataset import load, load_dict, save, save_dict diff --git a/tests/dataset/test_dataset.py b/tests/dataset/test_dataset.py index 4fb5ec09..9bdec2c6 100644 --- a/tests/dataset/test_dataset.py +++ b/tests/dataset/test_dataset.py @@ -127,7 +127,7 @@ def test_datadoc(): """Test save_datadoc() and load_dict()/save_dict().""" # pylint: disable=too-many-statements - from dataset_paths import indir # pytest: disable=import-error + from dataset_paths import indir # pylint: disable=import-error from tripper import CHAMEO, DCAT, EMMO, OTEIO, Triplestore from tripper.dataset import load_dict, save_datadoc, save_dict, search_iris @@ -220,7 +220,7 @@ def test_pipeline(): from tripper import Triplestore otelib = pytest.importorskip("otelib") - from dataset_paths import indir # pytest: disable=import-error + from dataset_paths import indir # pylint: disable=import-error from tripper.dataset import get_partial_pipeline, save_datadoc From 4cc88cb0fb119697f35184177e9dbfb697141ac4 Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Mon, 16 Dec 2024 15:53:34 +0100 Subject: [PATCH 09/25] Fixed pylint errors --- tripper/dataset/dataset.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tripper/dataset/dataset.py b/tripper/dataset/dataset.py index b98eaeaa..bbb4a178 100644 --- a/tripper/dataset/dataset.py +++ b/tripper/dataset/dataset.py @@ -42,9 +42,6 @@ from pathlib import Path from typing import TYPE_CHECKING -import requests -import yaml # type: ignore - from tripper import DCAT, EMMO, OTEIO, OWL, RDF, Triplestore from tripper.utils import AttrDict, as_python From 92b213d7b2a292b04ddbaf0921d4e046a27e95db Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Thu, 19 Dec 2024 10:10:35 +0100 Subject: [PATCH 10/25] added csv file --- tests/input/semdata.csv | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 tests/input/semdata.csv diff --git a/tests/input/semdata.csv b/tests/input/semdata.csv new file mode 100644 index 00000000..631d9e69 --- /dev/null +++ b/tests/input/semdata.csv @@ -0,0 +1,5 @@ +@id;@type;title;description;creator;contactPoint;inSeries;datamodel;datamodelStorage;distribution.downloadURL;distribution.mediaType;distribution.parser;fromSample;isDescriptionOf +semdata:SEM_cement_batch2/77600-23-001/77600-23-001_5kV_400x_m001;sem:SEMImage;SEM image of cement;Back-scattered SEM image of cement sample 77600 from Heidelberg, polished with 1 µm diamond compound.;Sigurd Wenner;Sigurd Wenner ;semdata:SEM_cement_batch2/77600-23-001;http://onto-ns.com/meta/matchmaker/0.2/SEMImage;https://github.com/HEU-MatCHMaker/DataDocumentation/blob/master/SEM/datamodels/SEMImage.yaml;https://github.com/EMMC-ASBL/tripper/raw/refs/heads/dataset/tests/input/77600-23-001_5kV_400x_m001.tif;image/tiff;parser:sem_hitachi;sample:SEM_cement_batch2/77600-23-001;mat:concrete1 +semdata:SEM_cement_batch2/77600-23-001;sem:SEMImageSeries;Series of SEM image of cement sample 77600;Back-scattered SEM image of cement sample 77600, polished with 1 µm diamond compound.;Sigurd Wenner;Sigurd Wenner ;semdata:SEM_cement_batch2; ;;sftp://nas.aimen.es/P_MATCHMAKER_SHARE_SINTEF/SEM_cement_batch2/77600-23-001;inode/directory;;; +semdata:SEM_cement_batch2;sem:SEMImageSeries;Nested series of SEM images of cement batch2;…;Sigurd Wenner;Sigurd Wenner ; ;;;sftp://nas.aimen.es/P_MATCHMAKER_SHARE_SINTEF/SEM_cement_batch2;inode/directory;;; +mple:SEM_cement_batch2/77600-23-001;chameo:Sample;Series for SEM images for sample 77600-23-001.; ;;;;;;;;;; From ae20a0a3dc36926c90511f522b47dbbce5b04259 Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Thu, 19 Dec 2024 10:54:48 +0100 Subject: [PATCH 11/25] Added csv parser --- tests/dataset/test_tabledoc.py | 28 ++++++++++++++++++++++++++++ tests/input/semdata.csv | 8 ++++---- tripper/dataset/tabledoc.py | 25 +++++++++++++++++++++++++ 3 files changed, 57 insertions(+), 4 deletions(-) diff --git a/tests/dataset/test_tabledoc.py b/tests/dataset/test_tabledoc.py index 4a1c0613..52ea9236 100644 --- a/tests/dataset/test_tabledoc.py +++ b/tests/dataset/test_tabledoc.py @@ -79,3 +79,31 @@ def test_as_dicts(): ts = Triplestore(backend="rdflib") td.save(ts) print(ts.serialize()) + + +if True: + # def test_parse_csv(): + """Test parsing a csv file.""" + from dataset_paths import indir # pylint: disable=import-error + + from tripper.dataset import TableDoc + + td = TableDoc.parse_csv( + indir / "semdata.csv", + delimiter=";", + prefixes={ + "sem": "https://w3id.com/emmo/domain/sem/0.1#", + "semdata": "https://he-matchmaker.eu/data/sem/", + "sample": "https://he-matchmaker.eu/sample/", + "mat": "https://he-matchmaker.eu/material/", + "dm": "http://onto-ns.com/meta/characterisation/0.1/SEMImage#", + "parser": "http://sintef.no/dlite/parser#", + "gen": "http://sintef.no/dlite/generator#", + }, + ) + + img, series, batch, sample = td.asdicts() + assert img["@id"] == ( + "https://he-matchmaker.eu/data/sem/SEM_cement_batch2/" + "77600-23-001/77600-23-001_5kV_400x_m001" + ) diff --git a/tests/input/semdata.csv b/tests/input/semdata.csv index 631d9e69..4df732ef 100644 --- a/tests/input/semdata.csv +++ b/tests/input/semdata.csv @@ -1,5 +1,5 @@ @id;@type;title;description;creator;contactPoint;inSeries;datamodel;datamodelStorage;distribution.downloadURL;distribution.mediaType;distribution.parser;fromSample;isDescriptionOf -semdata:SEM_cement_batch2/77600-23-001/77600-23-001_5kV_400x_m001;sem:SEMImage;SEM image of cement;Back-scattered SEM image of cement sample 77600 from Heidelberg, polished with 1 µm diamond compound.;Sigurd Wenner;Sigurd Wenner ;semdata:SEM_cement_batch2/77600-23-001;http://onto-ns.com/meta/matchmaker/0.2/SEMImage;https://github.com/HEU-MatCHMaker/DataDocumentation/blob/master/SEM/datamodels/SEMImage.yaml;https://github.com/EMMC-ASBL/tripper/raw/refs/heads/dataset/tests/input/77600-23-001_5kV_400x_m001.tif;image/tiff;parser:sem_hitachi;sample:SEM_cement_batch2/77600-23-001;mat:concrete1 -semdata:SEM_cement_batch2/77600-23-001;sem:SEMImageSeries;Series of SEM image of cement sample 77600;Back-scattered SEM image of cement sample 77600, polished with 1 µm diamond compound.;Sigurd Wenner;Sigurd Wenner ;semdata:SEM_cement_batch2; ;;sftp://nas.aimen.es/P_MATCHMAKER_SHARE_SINTEF/SEM_cement_batch2/77600-23-001;inode/directory;;; -semdata:SEM_cement_batch2;sem:SEMImageSeries;Nested series of SEM images of cement batch2;…;Sigurd Wenner;Sigurd Wenner ; ;;;sftp://nas.aimen.es/P_MATCHMAKER_SHARE_SINTEF/SEM_cement_batch2;inode/directory;;; -mple:SEM_cement_batch2/77600-23-001;chameo:Sample;Series for SEM images for sample 77600-23-001.; ;;;;;;;;;; +semdata:SEM_cement_batch2/77600-23-001/77600-23-001_5kV_400x_m001;sem:SEMImage;SEM image of cement;Back-scattered SEM image of cement sample 77600 from Heidelberg, polished with 1 um diamond compound.;Sigurd Wenner;Sigurd Wenner ;semdata:SEM_cement_batch2/77600-23-001;http://onto-ns.com/meta/matchmaker/0.2/SEMImage;https://github.com/HEU-MatCHMaker/DataDocumentation/blob/master/SEM/datamodels/SEMImage.yaml;https://github.com/EMMC-ASBL/tripper/raw/refs/heads/dataset/tests/input/77600-23-001_5kV_400x_m001.tif;image/tiff;parser:sem_hitachi;sample:SEM_cement_batch2/77600-23-001;mat:concrete1 +semdata:SEM_cement_batch2/77600-23-001;sem:SEMImageSeries;Series of SEM image of cement sample 77600;Back-scattered SEM image of cement sample 77600, polished with 1 um diamond compound.;Sigurd Wenner;Sigurd Wenner ;semdata:SEM_cement_batch2; ;;sftp://nas.aimen.es/P_MATCHMAKER_SHARE_SINTEF/SEM_cement_batch2/77600-23-001;inode/directory;;; +semdata:SEM_cement_batch2;sem:SEMImageSeries;Nested series of SEM images of cement batch2;...;Sigurd Wenner;Sigurd Wenner ; ;;;sftp://nas.aimen.es/P_MATCHMAKER_SHARE_SINTEF/SEM_cement_batch2;inode/directory;;; +sample:SEM_cement_batch2/77600-23-001;chameo:Sample;Series for SEM images for sample 77600-23-001.; ;;;;;;;;;; diff --git a/tripper/dataset/tabledoc.py b/tripper/dataset/tabledoc.py index b1a8ef51..50fe86b7 100644 --- a/tripper/dataset/tabledoc.py +++ b/tripper/dataset/tabledoc.py @@ -1,5 +1,7 @@ """Basic interface for tabular documentation of datasets.""" +import csv +from pathlib import Path from typing import TYPE_CHECKING from tripper import Triplestore @@ -66,3 +68,26 @@ def save(self, ts: Triplestore) -> None: """Save tabular datadocumentation to triplestore.""" for d in self.asdicts(): save_dict(ts, d) + + @classmethod + def parse_csv( + self, + csvfile: "Union[Path, str]", + type: "Optional[str]" = "dataset", + prefixes: "Optional[dict]" = None, + context: "Optional[Union[dict, list]]" = None, + dialect="excel", + **kwargs, + ) -> None: + """Parse a csv file.""" + with open(csvfile, newline="") as f: + reader = csv.reader(f, dialect=dialect, **kwargs) + header = next(reader)[0].split(reader.dialect.delimiter) + data = [row for row in reader] + return TableDoc( + header=header, + data=data, + type=type, + prefixes=prefixes, + context=context, + ) From 543e99e743876f68eaa8913c02d9df33ffc3ca07 Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Thu, 19 Dec 2024 10:59:01 +0100 Subject: [PATCH 12/25] Updated the test --- tests/dataset/test_tabledoc.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/tests/dataset/test_tabledoc.py b/tests/dataset/test_tabledoc.py index 52ea9236..ae7ca348 100644 --- a/tests/dataset/test_tabledoc.py +++ b/tests/dataset/test_tabledoc.py @@ -81,13 +81,16 @@ def test_as_dicts(): print(ts.serialize()) -if True: - # def test_parse_csv(): +#if True: +def test_parse_csv(): """Test parsing a csv file.""" from dataset_paths import indir # pylint: disable=import-error + from tripper import Triplestore from tripper.dataset import TableDoc + pytest.importorskip("rdflib") + td = TableDoc.parse_csv( indir / "semdata.csv", delimiter=";", @@ -107,3 +110,7 @@ def test_as_dicts(): "https://he-matchmaker.eu/data/sem/SEM_cement_batch2/" "77600-23-001/77600-23-001_5kV_400x_m001" ) + + ts = Triplestore(backend="rdflib") + td.save(ts) + print(ts.serialize()) From b3e3d0723f879547099aabcc45f9148e4a959700 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 19 Dec 2024 09:59:28 +0000 Subject: [PATCH 13/25] [pre-commit.ci] auto fixes from pre-commit hooks For more information, see https://pre-commit.ci --- tests/dataset/test_tabledoc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/dataset/test_tabledoc.py b/tests/dataset/test_tabledoc.py index ae7ca348..00179810 100644 --- a/tests/dataset/test_tabledoc.py +++ b/tests/dataset/test_tabledoc.py @@ -81,7 +81,7 @@ def test_as_dicts(): print(ts.serialize()) -#if True: +# if True: def test_parse_csv(): """Test parsing a csv file.""" from dataset_paths import indir # pylint: disable=import-error From 700c514282a8374f32dd5d858be87e466be1eb4a Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Thu, 19 Dec 2024 13:17:46 +0100 Subject: [PATCH 14/25] Fixed failing tests --- tests/dataset/test_tabledoc.py | 8 +++++--- tripper/dataset/tabledoc.py | 35 +++++++++++++++++++++++++++------- 2 files changed, 33 insertions(+), 10 deletions(-) diff --git a/tests/dataset/test_tabledoc.py b/tests/dataset/test_tabledoc.py index ae7ca348..f1640dbc 100644 --- a/tests/dataset/test_tabledoc.py +++ b/tests/dataset/test_tabledoc.py @@ -81,16 +81,16 @@ def test_as_dicts(): print(ts.serialize()) -#if True: +# if True: def test_parse_csv(): """Test parsing a csv file.""" from dataset_paths import indir # pylint: disable=import-error + pytest.importorskip("rdflib") + from tripper import Triplestore from tripper.dataset import TableDoc - pytest.importorskip("rdflib") - td = TableDoc.parse_csv( indir / "semdata.csv", delimiter=";", @@ -105,7 +105,9 @@ def test_parse_csv(): }, ) + # pylint: disable=unused-variable,unbalanced-tuple-unpacking img, series, batch, sample = td.asdicts() + assert img["@id"] == ( "https://he-matchmaker.eu/data/sem/SEM_cement_batch2/" "77600-23-001/77600-23-001_5kV_400x_m001" diff --git a/tripper/dataset/tabledoc.py b/tripper/dataset/tabledoc.py index 50fe86b7..1dc37098 100644 --- a/tripper/dataset/tabledoc.py +++ b/tripper/dataset/tabledoc.py @@ -69,21 +69,42 @@ def save(self, ts: Triplestore) -> None: for d in self.asdicts(): save_dict(ts, d) - @classmethod + @staticmethod def parse_csv( - self, csvfile: "Union[Path, str]", type: "Optional[str]" = "dataset", prefixes: "Optional[dict]" = None, context: "Optional[Union[dict, list]]" = None, - dialect="excel", + dialect: "Union[csv.Dialect, str]" = "excel", **kwargs, - ) -> None: - """Parse a csv file.""" - with open(csvfile, newline="") as f: + ) -> "TableDoc": + # pylint: disable=line-too-long + """Parse a csv file using the standard library csv module. + + Arguments: + csvfile: CSV file to parse. + type: Type of data to save (applies to all rows). Should + either be one of the pre-defined names: "dataset", + "distribution", "accessService", "parser" and "generator" + or an IRI to a class in an ontology. Defaults to + "dataset". + prefixes: Dict with prefixes in addition to those included in the + JSON-LD context. Should map namespace prefixes to IRIs. + context: Dict with user-defined JSON-LD context. + dialect: A subclass of csv.Dialect, or the name of the dialect, + specifying how the `csvfile` is formatted. For more details, + see [Dialects and Formatting Parameters]. + kwargs: Additional keyword arguments overriding individual + formatting parameters. For more details, see + [Dialects and Formatting Parameters]. + + References: + [Dialects and Formatting Parameters]: https://docs.python.org/3/library/csv.html#dialects-and-formatting-parameters + """ + with open(csvfile, encoding="utf-8") as f: reader = csv.reader(f, dialect=dialect, **kwargs) header = next(reader)[0].split(reader.dialect.delimiter) - data = [row for row in reader] + data = list(reader) return TableDoc( header=header, data=data, From 4d7d77adc9233b39ad05dfd6fb4222feef814b94 Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Thu, 19 Dec 2024 13:33:10 +0100 Subject: [PATCH 15/25] Added encoding to keyword arguments --- tests/input/semdata.csv | 4 ++-- tripper/dataset/tabledoc.py | 5 ++++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/tests/input/semdata.csv b/tests/input/semdata.csv index 4df732ef..c3cf536c 100644 --- a/tests/input/semdata.csv +++ b/tests/input/semdata.csv @@ -1,5 +1,5 @@ @id;@type;title;description;creator;contactPoint;inSeries;datamodel;datamodelStorage;distribution.downloadURL;distribution.mediaType;distribution.parser;fromSample;isDescriptionOf -semdata:SEM_cement_batch2/77600-23-001/77600-23-001_5kV_400x_m001;sem:SEMImage;SEM image of cement;Back-scattered SEM image of cement sample 77600 from Heidelberg, polished with 1 um diamond compound.;Sigurd Wenner;Sigurd Wenner ;semdata:SEM_cement_batch2/77600-23-001;http://onto-ns.com/meta/matchmaker/0.2/SEMImage;https://github.com/HEU-MatCHMaker/DataDocumentation/blob/master/SEM/datamodels/SEMImage.yaml;https://github.com/EMMC-ASBL/tripper/raw/refs/heads/dataset/tests/input/77600-23-001_5kV_400x_m001.tif;image/tiff;parser:sem_hitachi;sample:SEM_cement_batch2/77600-23-001;mat:concrete1 -semdata:SEM_cement_batch2/77600-23-001;sem:SEMImageSeries;Series of SEM image of cement sample 77600;Back-scattered SEM image of cement sample 77600, polished with 1 um diamond compound.;Sigurd Wenner;Sigurd Wenner ;semdata:SEM_cement_batch2; ;;sftp://nas.aimen.es/P_MATCHMAKER_SHARE_SINTEF/SEM_cement_batch2/77600-23-001;inode/directory;;; +semdata:SEM_cement_batch2/77600-23-001/77600-23-001_5kV_400x_m001;sem:SEMImage;SEM image of cement;Back-scattered SEM image of cement sample 77600 from Heidelberg, polished with 1 μm diamond compound.;Sigurd Wenner;Sigurd Wenner ;semdata:SEM_cement_batch2/77600-23-001;http://onto-ns.com/meta/matchmaker/0.2/SEMImage;https://github.com/HEU-MatCHMaker/DataDocumentation/blob/master/SEM/datamodels/SEMImage.yaml;https://github.com/EMMC-ASBL/tripper/raw/refs/heads/dataset/tests/input/77600-23-001_5kV_400x_m001.tif;image/tiff;parser:sem_hitachi;sample:SEM_cement_batch2/77600-23-001;mat:concrete1 +semdata:SEM_cement_batch2/77600-23-001;sem:SEMImageSeries;Series of SEM image of cement sample 77600;Back-scattered SEM image of cement sample 77600, polished with 1 μm diamond compound.;Sigurd Wenner;Sigurd Wenner ;semdata:SEM_cement_batch2; ;;sftp://nas.aimen.es/P_MATCHMAKER_SHARE_SINTEF/SEM_cement_batch2/77600-23-001;inode/directory;;; semdata:SEM_cement_batch2;sem:SEMImageSeries;Nested series of SEM images of cement batch2;...;Sigurd Wenner;Sigurd Wenner ; ;;;sftp://nas.aimen.es/P_MATCHMAKER_SHARE_SINTEF/SEM_cement_batch2;inode/directory;;; sample:SEM_cement_batch2/77600-23-001;chameo:Sample;Series for SEM images for sample 77600-23-001.; ;;;;;;;;;; diff --git a/tripper/dataset/tabledoc.py b/tripper/dataset/tabledoc.py index 1dc37098..46ad4313 100644 --- a/tripper/dataset/tabledoc.py +++ b/tripper/dataset/tabledoc.py @@ -75,6 +75,7 @@ def parse_csv( type: "Optional[str]" = "dataset", prefixes: "Optional[dict]" = None, context: "Optional[Union[dict, list]]" = None, + encoding: str = "utf-8", dialect: "Union[csv.Dialect, str]" = "excel", **kwargs, ) -> "TableDoc": @@ -91,6 +92,8 @@ def parse_csv( prefixes: Dict with prefixes in addition to those included in the JSON-LD context. Should map namespace prefixes to IRIs. context: Dict with user-defined JSON-LD context. + encoding: The encoding of the csv file. Note that Excel may + encode as "ISO-8859" (commonly used in 1990th). dialect: A subclass of csv.Dialect, or the name of the dialect, specifying how the `csvfile` is formatted. For more details, see [Dialects and Formatting Parameters]. @@ -101,7 +104,7 @@ def parse_csv( References: [Dialects and Formatting Parameters]: https://docs.python.org/3/library/csv.html#dialects-and-formatting-parameters """ - with open(csvfile, encoding="utf-8") as f: + with open(csvfile, encoding=encoding) as f: reader = csv.reader(f, dialect=dialect, **kwargs) header = next(reader)[0].split(reader.dialect.delimiter) data = list(reader) From 80048677f0c4612154dcca687565c90423a56eb0 Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Fri, 20 Dec 2024 15:30:39 +0100 Subject: [PATCH 16/25] Strip off blanks when parsing a table. --- tripper/dataset/tabledoc.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/tripper/dataset/tabledoc.py b/tripper/dataset/tabledoc.py index 46ad4313..75edb181 100644 --- a/tripper/dataset/tabledoc.py +++ b/tripper/dataset/tabledoc.py @@ -28,6 +28,7 @@ class TableDoc: prefixes: Dict with prefixes in addition to those included in the JSON-LD context. Should map namespace prefixes to IRIs. context: Dict with user-defined JSON-LD context. + strip: Whether to strip leading and trailing whitespaces from cells. """ @@ -40,12 +41,14 @@ def __init__( type: "Optional[str]" = "dataset", prefixes: "Optional[dict]" = None, context: "Optional[Union[dict, list]]" = None, + strip: bool = True, ): self.header = header self.data = data self.type = type self.prefixes = prefixes self.context = context + self.strip = strip def asdicts(self) -> "List[dict]": """Return the table as a list of dicts.""" @@ -55,9 +58,11 @@ def asdicts(self) -> "List[dict]": for row in self.data: d = AttrDict() for i, colname in enumerate(self.header): - cell = row[i] + cell = row[i].strip() if self.strip else row[i] if cell: - addnested(d, colname, cell) + addnested( + d, colname.strip() if self.strip else colname, cell + ) jsonld = as_jsonld( d, type=self.type, prefixes=self.prefixes, **kw # type: ignore ) @@ -106,8 +111,9 @@ def parse_csv( """ with open(csvfile, encoding=encoding) as f: reader = csv.reader(f, dialect=dialect, **kwargs) - header = next(reader)[0].split(reader.dialect.delimiter) + header = next(reader) data = list(reader) + return TableDoc( header=header, data=data, From 731253cd16ba58525b739166892251a0d88ca8af Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Fri, 20 Dec 2024 15:39:14 +0100 Subject: [PATCH 17/25] Added extra test to ensure that all properties are parsed correctly --- tests/dataset/test_tabledoc.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/dataset/test_tabledoc.py b/tests/dataset/test_tabledoc.py index f1640dbc..e9fff0c1 100644 --- a/tests/dataset/test_tabledoc.py +++ b/tests/dataset/test_tabledoc.py @@ -112,6 +112,10 @@ def test_parse_csv(): "https://he-matchmaker.eu/data/sem/SEM_cement_batch2/" "77600-23-001/77600-23-001_5kV_400x_m001" ) + assert img.distribution.downloadURL == ( + "https://github.com/EMMC-ASBL/tripper/raw/refs/heads/dataset/" + "tests/input/77600-23-001_5kV_400x_m001.tif" + ) ts = Triplestore(backend="rdflib") td.save(ts) From 60b0c6d2657242d36c5cac1f979ba7172783fba6 Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Fri, 20 Dec 2024 15:57:11 +0100 Subject: [PATCH 18/25] Added write_csv() method to TableDoc --- tests/dataset/test_tabledoc.py | 9 +++++++-- tripper/dataset/tabledoc.py | 31 ++++++++++++++++++++++++++++++- 2 files changed, 37 insertions(+), 3 deletions(-) diff --git a/tests/dataset/test_tabledoc.py b/tests/dataset/test_tabledoc.py index e9fff0c1..5020989c 100644 --- a/tests/dataset/test_tabledoc.py +++ b/tests/dataset/test_tabledoc.py @@ -82,15 +82,16 @@ def test_as_dicts(): # if True: -def test_parse_csv(): +def test_csv(): """Test parsing a csv file.""" - from dataset_paths import indir # pylint: disable=import-error + from dataset_paths import indir, outdir # pylint: disable=import-error pytest.importorskip("rdflib") from tripper import Triplestore from tripper.dataset import TableDoc + # Read csv file td = TableDoc.parse_csv( indir / "semdata.csv", delimiter=";", @@ -117,6 +118,10 @@ def test_parse_csv(): "tests/input/77600-23-001_5kV_400x_m001.tif" ) + # Write the table to a new csv file + td.write_csv(outdir / "semdata.csv") + + # Print serialised KB ts = Triplestore(backend="rdflib") td.save(ts) print(ts.serialize()) diff --git a/tripper/dataset/tabledoc.py b/tripper/dataset/tabledoc.py index 75edb181..65337d83 100644 --- a/tripper/dataset/tabledoc.py +++ b/tripper/dataset/tabledoc.py @@ -109,7 +109,7 @@ def parse_csv( References: [Dialects and Formatting Parameters]: https://docs.python.org/3/library/csv.html#dialects-and-formatting-parameters """ - with open(csvfile, encoding=encoding) as f: + with open(csvfile, mode="rt", encoding=encoding) as f: reader = csv.reader(f, dialect=dialect, **kwargs) header = next(reader) data = list(reader) @@ -121,3 +121,32 @@ def parse_csv( prefixes=prefixes, context=context, ) + + def write_csv( + self, + csvfile: "Union[Path, str]", + encoding: str = "utf-8", + dialect: "Union[csv.Dialect, str]" = "excel", + **kwargs, + ) -> None: + # pylint: disable=line-too-long + """Write the table to a csv file using the standard library csv module. + + Arguments: + csvfile: CSV file to parse. + encoding: The encoding of the csv file. + dialect: A subclass of csv.Dialect, or the name of the dialect, + specifying how the `csvfile` is formatted. For more details, + see [Dialects and Formatting Parameters]. + kwargs: Additional keyword arguments overriding individual + formatting parameters. For more details, see + [Dialects and Formatting Parameters]. + + References: + [Dialects and Formatting Parameters]: https://docs.python.org/3/library/csv.html#dialects-and-formatting-parameters + """ + with open(csvfile, mode="wt", encoding=encoding) as f: + writer = csv.writer(f, dialect=dialect, **kwargs) + writer.writerow(self.header) + for row in self.data: + writer.writerow(row) From d26d92fdabfc27090558a66ed5c9362de026bb11 Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Mon, 30 Dec 2024 12:14:49 +0100 Subject: [PATCH 19/25] Save serialised documentation to turtle file. --- tests/dataset/test_tabledoc.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/dataset/test_tabledoc.py b/tests/dataset/test_tabledoc.py index 5020989c..2d3c8779 100644 --- a/tests/dataset/test_tabledoc.py +++ b/tests/dataset/test_tabledoc.py @@ -124,4 +124,5 @@ def test_csv(): # Print serialised KB ts = Triplestore(backend="rdflib") td.save(ts) + ts.serialize(outdir / "semdata.ttl") print(ts.serialize()) From 66b9dd75d0291359f9507033b9a8159cf0a8d320 Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Mon, 30 Dec 2024 12:23:13 +0100 Subject: [PATCH 20/25] Apply suggestions from code review Co-authored-by: Tor S. Haugland --- pyproject.toml | 1 - tests/dataset/test_tabledoc.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 21196860..0398f0a0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -105,7 +105,6 @@ max-locals = 20 disable = [ "fixme", "invalid-name", - #"too-many-positional-arguments", ] good-names = [ # Default diff --git a/tests/dataset/test_tabledoc.py b/tests/dataset/test_tabledoc.py index 4a1c0613..da74203c 100644 --- a/tests/dataset/test_tabledoc.py +++ b/tests/dataset/test_tabledoc.py @@ -1,4 +1,4 @@ -"""Test the dataset module.""" +"""Test the TableDoc class.""" import pytest From 575f09d2b13deb60ce8ae9addeb734650223ca31 Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Mon, 30 Dec 2024 12:30:22 +0100 Subject: [PATCH 21/25] Apply suggestions from code review Co-authored-by: Tor S. Haugland --- tripper/dataset/tabledoc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tripper/dataset/tabledoc.py b/tripper/dataset/tabledoc.py index b1a8ef51..9fd5d988 100644 --- a/tripper/dataset/tabledoc.py +++ b/tripper/dataset/tabledoc.py @@ -47,7 +47,7 @@ def __init__( def asdicts(self) -> "List[dict]": """Return the table as a list of dicts.""" - kw = {"_context": self.context} if self.context else {} + kw = {"@context": self.context} if self.context else {} results = [] for row in self.data: From f45376db770b151d6ea7eb4de901fb646ef3dc43 Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Mon, 30 Dec 2024 12:49:06 +0100 Subject: [PATCH 22/25] Added a clarifying comment as a responce to review comment by @torhaugl. --- tests/dataset/test_tabledoc.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/dataset/test_tabledoc.py b/tests/dataset/test_tabledoc.py index da74203c..278e7881 100644 --- a/tests/dataset/test_tabledoc.py +++ b/tests/dataset/test_tabledoc.py @@ -32,6 +32,8 @@ def test_as_dicts(): "onto": "http:/example.com/onto#", "ds": "http:/example.com/datasets#", }, + # Replace the "ds" prefix above with this, once the "context" keyword + # argument is fully implemented. # context={ # "ds": "http:/example.com/datasets#", # }, From 1752db0016a521ad6f0b3e9a2bd4ab5997482181 Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Mon, 30 Dec 2024 15:43:00 +0100 Subject: [PATCH 23/25] Fix test failure --- tripper/dataset/tabledoc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tripper/dataset/tabledoc.py b/tripper/dataset/tabledoc.py index 0711565b..6dbf8b32 100644 --- a/tripper/dataset/tabledoc.py +++ b/tripper/dataset/tabledoc.py @@ -58,7 +58,7 @@ def asdicts(self) -> "List[dict]": for row in self.data: d = AttrDict() for i, colname in enumerate(self.header): - cell = row[i].strip() if self.strip else row[i] + cell = row[i].strip() if row[i] and self.strip else row[i] if cell: addnested( d, colname.strip() if self.strip else colname, cell From 2988a324bc612691a7462a912f9a94ef2806c1f0 Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Mon, 30 Dec 2024 18:52:44 +0100 Subject: [PATCH 24/25] Updated .gitignore files --- .gitignore | 1 + tests/output/.gitignore | 1 + 2 files changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index c872f80e..9a0e7df5 100644 --- a/.gitignore +++ b/.gitignore @@ -18,3 +18,4 @@ dist/ # Test output route.svg +coverage.xml diff --git a/tests/output/.gitignore b/tests/output/.gitignore index c26b5163..613dbf65 100644 --- a/tests/output/.gitignore +++ b/tests/output/.gitignore @@ -3,3 +3,4 @@ *.ttl *.png *.tiff +*.csv From 85a51ae24c7531b370a94946b62644fcae94d71c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 3 Jan 2025 22:29:16 +0000 Subject: [PATCH 25/25] [pre-commit.ci] auto fixes from pre-commit hooks For more information, see https://pre-commit.ci --- tripper/dataset/tabledoc.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tripper/dataset/tabledoc.py b/tripper/dataset/tabledoc.py index c4f14567..6dbf8b32 100644 --- a/tripper/dataset/tabledoc.py +++ b/tripper/dataset/tabledoc.py @@ -150,4 +150,3 @@ def write_csv( writer.writerow(self.header) for row in self.data: writer.writerow(row) -