From f23669878ea9d87a4929d483217f4a61959fd9ea Mon Sep 17 00:00:00 2001
From: Jesper Friis <jesper.friis@sintef.no>
Date: Sun, 15 Dec 2024 19:00:44 +0100
Subject: [PATCH 01/25] Updated dataset, including the following changes:   -
 Allow to add other types of entries to the triplestore that are not    
 datasets. Ex: samples, models, instruments, people, projects...   - Renamed
 list_data_iris() to search_iris(). It can now be use to search     for all
 types of entries.   - Renamed prepare() to as_jsonld() and made it part of
 the public API

---
 tests/dataset/test_dataset.py    |  14 ++--
 tests/input/semdata.yaml         |  12 ++--
 tripper/context/0.2/context.json |   4 +-
 tripper/dataset/__init__.py      |   3 +-
 tripper/dataset/dataset.py       | 120 +++++++++++++++++++------------
 5 files changed, 91 insertions(+), 62 deletions(-)

diff --git a/tests/dataset/test_dataset.py b/tests/dataset/test_dataset.py
index 4aa8fbdb..105db426 100644
--- a/tests/dataset/test_dataset.py
+++ b/tests/dataset/test_dataset.py
@@ -116,12 +116,7 @@ def test_datadoc():
     # pylint: disable=too-many-statements
 
     from tripper import CHAMEO, DCAT, EMMO, OTEIO, Triplestore
-    from tripper.dataset import (
-        list_dataset_iris,
-        load_dict,
-        save_datadoc,
-        save_dict,
-    )
+    from tripper.dataset import load_dict, save_datadoc, save_dict, search_iris
 
     pytest.importorskip("dlite")
     pytest.importorskip("rdflib")
@@ -188,20 +183,19 @@ def test_datadoc():
 
     # Test searching the triplestore
     SAMPLE = ts.namespaces["sample"]
-    datasets = list_dataset_iris(ts)
+    datasets = search_iris(ts)
     named_datasets = {
         SEMDATA["SEM_cement_batch2/77600-23-001/77600-23-001_5kV_400x_m001"],
         SEMDATA["SEM_cement_batch2/77600-23-001"],
         SEMDATA["SEM_cement_batch2"],
-        SAMPLE["SEM_cement_batch2/77600-23-001"],
     }
     assert not named_datasets.difference(datasets)
-    assert set(list_dataset_iris(ts, creator="Sigurd Wenner")) == {
+    assert set(search_iris(ts, creator="Sigurd Wenner")) == {
         SEMDATA["SEM_cement_batch2/77600-23-001/77600-23-001_5kV_400x_m001"],
         SEMDATA["SEM_cement_batch2/77600-23-001"],
         SEMDATA["SEM_cement_batch2"],
     }
-    assert set(list_dataset_iris(ts, _type=CHAMEO.Sample)) == {
+    assert set(search_iris(ts, type=CHAMEO.Sample)) == {
         SAMPLE["SEM_cement_batch2/77600-23-001"],
     }
 
diff --git a/tests/input/semdata.yaml b/tests/input/semdata.yaml
index 0e99919f..2d1da201 100644
--- a/tests/input/semdata.yaml
+++ b/tests/input/semdata.yaml
@@ -1,4 +1,5 @@
 ---
+
 # This extends the list of prefixes that are already defined in the context
 prefixes:
   sem: https://w3id.com/emmo/domain/sem/0.1#
@@ -62,10 +63,6 @@ datasets:
       downloadURL: sftp://nas.aimen.es/P_MATCHMAKER_SHARE_SINTEF/SEM_cement_batch2
       mediaType: inode/directory
 
-  - "@id": sample:SEM_cement_batch2/77600-23-001
-    "@type": chameo:Sample
-    title: Series for SEM images for sample 77600-23-001.
-
 
 parsers:
   - "@id": parser:sem_hitachi
@@ -81,3 +78,10 @@ generators:
     generatorType: application/vnd.dlite-generate
     configuration:
       driver: hitachi
+
+
+# Other entities, like samples, instruments, persons, models etc...
+other_entries:
+  - "@id": sample:SEM_cement_batch2/77600-23-001
+    "@type": chameo:Sample
+    title: Series for SEM images for sample 77600-23-001.
diff --git a/tripper/context/0.2/context.json b/tripper/context/0.2/context.json
index d5903ba9..3f658c0d 100644
--- a/tripper/context/0.2/context.json
+++ b/tripper/context/0.2/context.json
@@ -32,8 +32,8 @@
     "hasCurrentVersion": "dcat:hasCurrentVersion",
     "hasVersion": "dcat:hasVersion",
     "inSeries": {
-      "@id" : "dcat:inSeries",
-      "@type" : "@id"
+      "@id": "dcat:inSeries",
+      "@type": "@id"
     },
     "keyword": "dcat:keyword",
     "landingPage": "dcat:landingPage",
diff --git a/tripper/dataset/__init__.py b/tripper/dataset/__init__.py
index 0a3a5088..d6435b8d 100644
--- a/tripper/dataset/__init__.py
+++ b/tripper/dataset/__init__.py
@@ -2,12 +2,13 @@
 
 from .dataaccess import load, save
 from .dataset import (
+    as_jsonld,
     get_jsonld_context,
     get_partial_pipeline,
     get_prefixes,
-    list_dataset_iris,
     load_dict,
     read_datadoc,
     save_datadoc,
     save_dict,
+    search_iris,
 )
diff --git a/tripper/dataset/dataset.py b/tripper/dataset/dataset.py
index 0387328d..ba4a0fde 100644
--- a/tripper/dataset/dataset.py
+++ b/tripper/dataset/dataset.py
@@ -9,12 +9,13 @@
   - `save_datadoc()`: Save documentation from YAML file to the triplestore.
 
 Functions for searching the triplestore:
-  - `list_dataset_iris()`: Get IRIs of matching datasets.
+  - `search_iris()`: Get IRIs of matching entries in the triplestore.
 
 Functions for working with the dict-representation:
   - `read_datadoc()`: Read documentation from YAML file and return it as dict.
   - `save_dict()`: Save dict documentation to the triplestore.
   - `load_dict()`: Load dict documentation from the triplestore.
+  - `as_jsonld()`: Return the dict as JSON-LD (represented as a Python dict)
 
 Functions for interaction with OTEAPI:
   - `get_partial_pipeline()`: Returns a OTELib partial pipeline.
@@ -28,7 +29,6 @@
 
 """
 
-# pylint: enable=line-too-long
 # pylint: disable=invalid-name,redefined-builtin,import-outside-toplevel
 import functools
 import io
@@ -41,7 +41,7 @@
 import requests
 import yaml  # type: ignore
 
-from tripper import DCAT, EMMO, OTEIO, RDF, Triplestore
+from tripper import DCAT, EMMO, OTEIO, OWL, RDF, Triplestore
 from tripper.utils import AttrDict, as_python
 
 if TYPE_CHECKING:  # pragma: no cover
@@ -90,6 +90,12 @@
         "datadoc_label": "datasets",
         "@type": [DCAT.Dataset, EMMO.DataSet],
     },
+    "entry": {
+        # General datacatalog entry that is not one of the above
+        # Ex: samples, instruments, models, people, projects, ...
+        "datadoc_label": "other_entries",  # XXX better label?
+        "@type": OWL.NamedIndividual,
+    },
 }
 
 
@@ -120,14 +126,15 @@ def save_dict(
 
     Notes:
         The keys in `dct` and `kwargs` may be either properties defined in the
-        [JSON-LD context](https://raw.githubusercontent.com/EMMC-ASBL/oteapi-dlite/refs/heads/rdf-serialisation/oteapi_dlite/context/0.2/context.json)
-        or one of the following special keywords:
+        [JSON-LD context] or one of the following special keywords:
           - "@id": Dataset IRI.  Must always be given.
           - "@type": IRI of the ontology class for this type of data.
             For datasets, it is typically used to refer to a specific subclass
             of `emmo:DataSet` that provides a semantic description of this
             dataset.
 
+    References:
+    [JSON-LD context]: https://raw.githubusercontent.com/EMMC-ASBL/oteapi-dlite/refs/heads/rdf-serialisation/oteapi_dlite/context/0.2/context.json
     """
     if "@id" not in dct:
         raise ValueError("`dct` must have an '@id' key")
@@ -136,7 +143,7 @@ def save_dict(
     if prefixes:
         all_prefixes.update(prefixes)
 
-    d = prepare(type=type, dct=dct, prefixes=all_prefixes, **kwargs)
+    d = as_jsonld(dct=dct, type=type, prefixes=all_prefixes, **kwargs)
 
     # Bind prefixes
     for prefix, ns in all_prefixes.items():
@@ -199,8 +206,7 @@ def save_extra_content(ts: Triplestore, dct: dict) -> None:
                 except (
                     dlite.DLiteMissingInstanceError  # pylint: disable=no-member
                 ):
-                    # __FIXME__: check session whether want to warn or re-reise
-                    # in this case
+                    # __FIXME__: check session whether to warn or re-reise
                     warnings.warn(f"cannot load datamodel: {uri}")
                 else:
                     add_dataset(ts, dm)
@@ -476,7 +482,7 @@ def save_datadoc(
     for spec in dicttypes.values():
         label = spec["datadoc_label"]
         for dct in get(d, label):
-            dct = prepare(types[label], dct, prefixes=prefixes)
+            dct = as_jsonld(dct=dct, type=types[label], prefixes=prefixes)
             f = io.StringIO(json.dumps(dct))
             with Triplestore(backend="rdflib") as ts2:
                 ts2.parse(f, format="json-ld")
@@ -505,52 +511,65 @@ def prepare_datadoc(datadoc: dict) -> dict:
     for type, spec in dicttypes.items():
         label = spec["datadoc_label"]
         for i, dct in enumerate(get(d, label)):
-            d[label][i] = prepare(type, dct, prefixes=d.prefixes)
+            d[label][i] = as_jsonld(dct=dct, type=type, prefixes=d.prefixes)
 
     return d
 
 
-def prepare(
-    type: str, dct: dict, prefixes: dict, _recur: bool = False, **kwargs
+def as_jsonld(
+    dct: dict,
+    type: "Optional[str]" = "dataset",
+    prefixes: "Optional[dict]" = None,
+    _entryid: "Optional[str]" = None,
+    **kwargs,
 ) -> dict:
-    """Return an updated copy of dict `dct` with additional key-value
-    pairs needed for serialisation to RDF.
+    """Return an updated copy of dict `dct` as valid JSON-LD.
 
     Arguments:
-        type: Type of dict to prepare.  Should be one of: "dataset",
-            "distribution", "parser" or "generator".
         dct: Dict to return an updated copy of.
+        type: Type of dict to prepare.  Should either be one of the
+            pre-defined names: "dataset", "distribution", "accessService",
+            "parser" and "generator" or an IRI to a class in an ontology.
+            Defaults to "dataset".
         prefixes: Dict with prefixes in addition to those included in the
             JSON-LD context.  Should map namespace prefixes to IRIs.
-        _recur: Whether this function is called recursively. Intended for
-            internal use.
+        _entryid: Id of base entry that is documented. Intended for
+            internal use only.
         kwargs: Additional keyword arguments to add to the returned dict.
             A leading underscore in a key will be translated to a
-            leading "@"-sign.  For example, "@id=..." may be provided
-            as "_id=...".
+            leading "@"-sign.  For example, "@id" or "@context" may be
+            provided as "_id" or "_context", respectively.
+
 
     Returns:
-        An updated copy of `dct`.
+        An updated copy of `dct` as valid JSON-LD.
     """
     # pylint: disable=too-many-branches
-    if type not in dicttypes:
-        raise ValueError(
-            f"`type` must be one of: {', '.join(dicttypes.keys())}. "
-            f"Got: '{type}'"
-        )
-    spec = dicttypes[type]
-
     d = AttrDict()
-    if not _recur:
+    if not _entryid:
         d["@context"] = CONTEXT_URL
-    add(d, "@type", spec["@type"])  # get type at top
-    d.update(dct)
-    add(d, "@type", spec["@type"])  # readd type if overwritten
+
+    if type:
+        t = dicttypes[type]["@type"] if type in dicttypes else type
+        add(d, "@type", t)  # get type at top
+        d.update(dct)
+        add(d, "@type", t)  # readd type if overwritten
+    else:
+        d.update(dct)
 
     for k, v in kwargs.items():
         key = f"@{k[1:]}" if re.match("^_([^_]|([^_].*[^_]))$", k) else k
         add(d, key, v)
 
+    if "@id" not in d and not _entryid:
+        raise ValueError("Missing '@id' in dict to document")
+
+    if not _entryid:
+        _entryid = d["@id"]
+
+    if "@type" not in d:
+        warnings.warn(f"Missing '@type' in dict to document: {_entryid}")
+
     all_prefixes = get_prefixes()
     if prefixes:
         all_prefixes.update(prefixes)
@@ -584,9 +603,11 @@ def prepare(
                 if isinstance(e, str):
                     v[i] = expand_iri(e, all_prefixes)
                 elif isinstance(e, dict) and k in nested:
-                    v[i] = prepare(k, e, prefixes=prefixes)
+                    v[i] = as_jsonld(
+                        e, k, _entryid=_entryid, prefixes=prefixes
+                    )
         elif isinstance(v, dict) and k in nested:
-            d[k] = prepare(k, v, prefixes=prefixes)
+            d[k] = as_jsonld(v, k, _entryid=_entryid, prefixes=prefixes)
 
     return d
 
@@ -711,31 +732,42 @@ def get_partial_pipeline(
     return pipeline
 
 
-def list_dataset_iris(ts: Triplestore, **kwargs):
-    """Return a list of IRIs for all datasets matching a set of criterias
-    specified by `kwargs`.
+def search_iris(ts: Triplestore, type=DCAT.Dataset, **kwargs):
+    """Return a list of IRIs for all entries of the given type.
+    Additional matching criterias can be specified by `kwargs`.
+
 
     Arguments:
         ts: Triplestore to search.
+        type: Search for entries that are individuals of the class with
+            this IRI.  The default is `dcat:Dataset`.
         kwargs: Match criterias.
 
     Examples:
         List all dataset IRIs:
 
-            list_dataset_iris(ts)
+            search_iris(ts)
 
         List IRIs of all datasets with John Doe as `contactPoint`:
 
-            list_dataset_iris(ts, contactPoint="John Doe")
+            search_iris(ts, contactPoint="John Doe")
+
+        List IRIs of all samples:
 
-        List IRIs of all datasets with John Doe as `contactPoint` AND that are
+            search_iris(ts, type=CHAMEO.Sample)
+
+        List IRIs of all datasets with John Doe as `contactPoint` AND are
         measured on a given sample:
 
-            list_dataset_iris(
+            search_iris(
                 ts, contactPoint="John Doe", fromSample=SAMPLE.batch2/sample3
             )
     """
     crit = []
+
+    if type:
+        crit.append(f"  ?iri rdf:type <{type}> .")
+
     expanded = {v: k for k, v in get_shortnames().items()}
     for k, v in kwargs.items():
         key = f"@{k[1:]}" if k.startswith("_") else k
@@ -748,14 +780,12 @@ def list_dataset_iris(ts: Triplestore, **kwargs):
             )
         else:
             value = v
-        crit.append(f"  ?dataset <{predicate}> {value} .")
+        crit.append(f"  ?iri <{predicate}> {value} .")
     criterias = "\n".join(crit)
     query = f"""
     PREFIX rdf: <{RDF}>
-    PREFIX dcat: <{DCAT}>
-    SELECT ?dataset
+    SELECT ?iri
     WHERE {{
-      ?dataset rdf:type dcat:Dataset .
     {criterias}
     }}
     """

From 94fa59a0788f49a33964aad14d0008b91ea4cf18 Mon Sep 17 00:00:00 2001
From: Jesper Friis <jesper.friis@sintef.no>
Date: Mon, 16 Dec 2024 00:18:00 +0100
Subject: [PATCH 02/25] Added new TableDoc class providing a table interface
 for data documentation.

---
 docs/api_reference/dataset/tabledoc.md |  3 +
 pyproject.toml                         |  7 ++-
 tests/dataset/dataset_paths.py         | 12 ++++
 tests/dataset/test_dataaccess.py       | 16 ++----
 tests/dataset/test_dataset.py          | 47 +++++++++------
 tests/dataset/test_tabledoc.py         | 79 ++++++++++++++++++++++++++
 tripper/dataset/__init__.py            |  1 +
 tripper/dataset/dataaccess.py          |  4 +-
 tripper/dataset/dataset.py             | 79 +++++++++++++++++++++++---
 tripper/dataset/tabledoc.py            | 68 ++++++++++++++++++++++
 10 files changed, 275 insertions(+), 41 deletions(-)
 create mode 100644 docs/api_reference/dataset/tabledoc.md
 create mode 100644 tests/dataset/dataset_paths.py
 create mode 100644 tests/dataset/test_tabledoc.py
 create mode 100644 tripper/dataset/tabledoc.py

diff --git a/docs/api_reference/dataset/tabledoc.md b/docs/api_reference/dataset/tabledoc.md
new file mode 100644
index 00000000..f3a73929
--- /dev/null
+++ b/docs/api_reference/dataset/tabledoc.md
@@ -0,0 +1,3 @@
+# tabledoc
+
+::: tripper.dataset.tabledoc
diff --git a/pyproject.toml b/pyproject.toml
index d5f7f94a..21196860 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -104,7 +104,8 @@ max-public-methods = 25
 max-locals = 20
 disable = [
     "fixme",
-    "too-many-positional-arguments",
+    "invalid-name",
+    #"too-many-positional-arguments",
 ]
 good-names = [
     # Default
@@ -115,8 +116,8 @@ good-names = [
     "s", "p", "o",
     # Namespaces
     "EX",
-    # dict, value, file, ...
-    "d", "v", "f",
+    # dict, value, file, keyword...
+    "d", "v", "f", "kw",
 ]
 
 [tool.pytest.ini_options]
diff --git a/tests/dataset/dataset_paths.py b/tests/dataset/dataset_paths.py
new file mode 100644
index 00000000..e84b2f47
--- /dev/null
+++ b/tests/dataset/dataset_paths.py
@@ -0,0 +1,12 @@
+"""Defines paths for tests.
+
+It defines some directories and some utility functions that can be used
+with or without conftest.
+"""
+
+from pathlib import Path
+
+testdir = Path(__file__).resolve().parent.parent
+ontodir = testdir / "ontologies"
+indir = testdir / "input"
+outdir = testdir / "output"
diff --git a/tests/dataset/test_dataaccess.py b/tests/dataset/test_dataaccess.py
index c3a7b75d..bdc0ef45 100644
--- a/tests/dataset/test_dataaccess.py
+++ b/tests/dataset/test_dataaccess.py
@@ -2,18 +2,12 @@
 
 # pylint: disable=invalid-name,too-many-locals,duplicate-code
 
-from pathlib import Path
-
 import pytest
+from dataset_paths import outdir
 
 pytest.importorskip("yaml")
 pytest.importorskip("requests")
 
-thisdir = Path(__file__).resolve().parent
-testdir = thisdir.parent
-inputdir = testdir / "input"
-outputdir = testdir / "output"
-
 
 # if True:
 def test_save_and_load():
@@ -38,7 +32,6 @@ def test_save_and_load():
     # Test save dict
     save_dict(
         ts,
-        type="dataset",
         dct={
             "@id": SEMDATA.img1,
             "distribution": {
@@ -49,6 +42,7 @@ def test_save_and_load():
                 "format": "tiff",
             },
         },
+        type="dataset",
     )
     newdistr = load_dict(ts, SEMDATA.img1)
     assert newdistr["@type"] == [DCAT.Dataset, EMMO.DataSet]
@@ -57,12 +51,12 @@ def test_save_and_load():
 
     save_dict(
         ts,
-        type="generator",
         dct={
             "@id": GEN.sem_hitachi,
             "generatorType": "application/vnd.dlite-generate",
             "configuration": {"driver": "hitachi"},
         },
+        type="generator",
     )
 
     # Test load dataset (this downloads an actual image from github)
@@ -70,7 +64,7 @@ def test_save_and_load():
     assert len(data) == 53502
 
     # Test save dataset with anonymous distribution
-    newfile = outputdir / "newimage.tiff"
+    newfile = outdir / "newimage.tiff"
     newfile.unlink(missing_ok=True)
     buf = b"some bytes..."
     save(
@@ -94,7 +88,7 @@ def test_save_and_load():
     assert newimage.distribution.downloadURL == f"file:{newfile}"
 
     # Test save dataset with named distribution
-    newfile2 = outputdir / "newimage.png"
+    newfile2 = outdir / "newimage.png"
     newfile2.unlink(missing_ok=True)
     save(
         ts,
diff --git a/tests/dataset/test_dataset.py b/tests/dataset/test_dataset.py
index 105db426..4e43cd10 100644
--- a/tests/dataset/test_dataset.py
+++ b/tests/dataset/test_dataset.py
@@ -2,18 +2,12 @@
 
 # pylint: disable=invalid-name,too-many-locals,duplicate-code
 
-from pathlib import Path
-
 import pytest
+from dataset_paths import indir
 
 pytest.importorskip("yaml")
 pytest.importorskip("requests")
 
-thisdir = Path(__file__).resolve().parent
-testdir = thisdir.parent
-inputdir = testdir / "input"
-outputdir = testdir / "output"
-
 
 def test_get_jsonld_context():
     """Test get_jsonld_context()."""
@@ -73,12 +67,31 @@ def test_add():
     from tripper.dataset.dataset import add
 
     d = {}
-    add(d, "a", 1)
-    add(d, "b", 1)
-    add(d, "b", 1)
-    add(d, "a", 2)
-    add(d, "a", 1)
-    assert d == {"a": [1, 2], "b": 1}
+    add(d, "a", "1")
+    add(d, "b", "1")
+    add(d, "b", "1")
+    add(d, "a", "2")
+    add(d, "a", "1")
+    add(d, "a", {"c": "3"})
+    assert d == {"a": ["1", "2", {"c": "3"}], "b": "1"}
+
+
+def test_addnested():
+    """Test help-function addnested()."""
+    from tripper.dataset.dataset import addnested
+    from tripper.utils import AttrDict
+
+    d = AttrDict()
+    addnested(d, "a.b", "1")
+    assert d == {"a": {"b": "1"}}
+
+    addnested(d, "a", "2")
+    assert d == {"a": ["2", {"b": "1"}]}
+
+    addnested(d, "a.b.c", {"d": "3"})
+    assert d.a[0] == "2"
+    assert d.a[1].b[1].c == {"d": "3"}
+    assert d == {"a": ["2", {"b": ["1", {"c": {"d": "3"}}]}]}
 
 
 def test_get():
@@ -124,7 +137,7 @@ def test_datadoc():
     ts = Triplestore("rdflib")
 
     # Load data documentation into triplestore
-    datadoc = save_datadoc(ts, inputdir / "semdata.yaml")
+    datadoc = save_datadoc(ts, indir / "semdata.yaml")
     assert isinstance(datadoc, dict)
     assert "@context" in datadoc
 
@@ -167,8 +180,8 @@ def test_datadoc():
     # Test save dict
     save_dict(
         ts,
-        "distribution",
-        {"@id": SEMDATA.newdistr, "format": "txt"},
+        dct={"@id": SEMDATA.newdistr, "format": "txt"},
+        type="distribution",
         prefixes={"echem": "https://w3id.org/emmo/domain/electrochemistry"},
     )
     newdistr = load_dict(ts, SEMDATA.newdistr)
@@ -210,7 +223,7 @@ def test_pipeline():
 
     # Prepare triplestore
     ts = Triplestore("rdflib")
-    save_datadoc(ts, inputdir / "semdata.yaml")
+    save_datadoc(ts, indir / "semdata.yaml")
 
     SEMDATA = ts.namespaces["semdata"]
 
diff --git a/tests/dataset/test_tabledoc.py b/tests/dataset/test_tabledoc.py
new file mode 100644
index 00000000..77e81dcc
--- /dev/null
+++ b/tests/dataset/test_tabledoc.py
@@ -0,0 +1,79 @@
+"""Test the dataset module."""
+
+from tripper import Triplestore
+from tripper.dataset import TableDoc
+
+
+# if True:
+def test_as_dicts():
+    """Test the as_dicts() method."""
+
+    from tripper import DCAT, EMMO, Namespace
+
+    ONTO = Namespace("http:/example.com/onto#")
+    DS = Namespace("http:/example.com/datasets#")
+
+    td = TableDoc(
+        header=[
+            "@id",
+            "@type",
+            "@type",
+            "inSeries",
+            "distribution.downloadURL",
+        ],
+        data=[
+            ("ds:s1", "onto:T1", "onto:T2", None, "file:///data/"),
+            ("ds:d1", "onto:T1", None, "ds:s1", "file:///data/d1.txt"),
+            ("ds:d2", "onto:T2", None, "ds:s1", "file:///data/d2.txt"),
+        ],
+        prefixes={
+            "onto": "http:/example.com/onto#",
+            "ds": "http:/example.com/datasets#",
+        },
+        # context={
+        #    "ds": "http:/example.com/datasets#",
+        # },
+    )
+
+    s1, d1, d2 = td.asdicts()  # pylint: disable=unbalanced-tuple-unpacking
+
+    assert s1["@id"] == DS.s1
+    assert set(s1["@type"]) == {
+        DCAT.Dataset,
+        EMMO.DataSet,
+        ONTO.T1,
+        ONTO.T2,
+    }
+    assert "inSeries" not in s1
+    assert s1.distribution == {
+        "@type": DCAT.Distribution,
+        "downloadURL": "file:///data/",
+    }
+
+    assert d1["@id"] == DS.d1
+    assert set(d1["@type"]) == {
+        DCAT.Dataset,
+        EMMO.DataSet,
+        ONTO.T1,
+    }
+    assert d1.inSeries == DS.s1
+    assert d1.distribution == {
+        "@type": DCAT.Distribution,
+        "downloadURL": "file:///data/d1.txt",
+    }
+
+    assert d2["@id"] == DS.d2
+    assert set(d2["@type"]) == {
+        DCAT.Dataset,
+        EMMO.DataSet,
+        ONTO.T2,
+    }
+    assert d2.inSeries == DS.s1
+    assert d2.distribution == {
+        "@type": DCAT.Distribution,
+        "downloadURL": "file:///data/d2.txt",
+    }
+
+    ts = Triplestore(backend="rdflib")
+    td.save(ts)
+    print(ts.serialize())
diff --git a/tripper/dataset/__init__.py b/tripper/dataset/__init__.py
index d6435b8d..e0b53d58 100644
--- a/tripper/dataset/__init__.py
+++ b/tripper/dataset/__init__.py
@@ -12,3 +12,4 @@
     save_dict,
     search_iris,
 )
+from .tabledoc import TableDoc
diff --git a/tripper/dataset/dataaccess.py b/tripper/dataset/dataaccess.py
index 672b2a59..3e248e36 100644
--- a/tripper/dataset/dataaccess.py
+++ b/tripper/dataset/dataaccess.py
@@ -175,9 +175,9 @@ def save(
     # Update triplestore
     ts.add_triples(triples)
     if save_dataset:
-        save_dict(ts, "dataset", dataset, prefixes=prefixes)
+        save_dict(ts, dataset, "dataset", prefixes=prefixes)
     elif save_distribution:
-        save_dict(ts, "distribution", distribution, prefixes=prefixes)
+        save_dict(ts, distribution, "distribution", prefixes=prefixes)
 
     return dataset["@id"]
 
diff --git a/tripper/dataset/dataset.py b/tripper/dataset/dataset.py
index ba4a0fde..2bb5e6a1 100644
--- a/tripper/dataset/dataset.py
+++ b/tripper/dataset/dataset.py
@@ -101,8 +101,8 @@
 
 def save_dict(
     ts: Triplestore,
-    type: str,
     dct: dict,
+    type: str = "dataset",
     prefixes: "Optional[dict]" = None,
     **kwargs,
 ) -> dict:
@@ -111,9 +111,11 @@ def save_dict(
 
     Arguments:
         ts: Triplestore to save to.
-        type: Type of dict to save.  Should be one of: "dataset",
-            "distribution", "parser" or "generator".
         dct: Dict with data to save.
+        type: Type of data to save.  Should either be one of the
+            pre-defined names: "dataset", "distribution", "accessService",
+            "parser" and "generator" or an IRI to a class in an ontology.
+            Defaults to "dataset".
         prefixes: Dict with prefixes in addition to those included in the
             JSON-LD context.  Should map namespace prefixes to IRIs.
         kwargs: Additional keyword arguments to add to the returned dict.
@@ -333,6 +335,9 @@ def get_values(
     return values
 
 
+# TODO: update this function to take an initial argument `context`,
+# which can be an URL (string), dict with raw context or a list of
+# strings or dicts.
 @cache  # type: ignore
 def get_jsonld_context(timeout: float = 5, fromfile: bool = True) -> dict:
     """Returns the JSON-LD context as a dict.
@@ -355,6 +360,8 @@ def get_jsonld_context(timeout: float = 5, fromfile: bool = True) -> dict:
     return context
 
 
+# TODO: update this to take an initial argument `context`.
+# See get_jsonld_context()
 def get_prefixes(timeout: float = 5) -> dict:
     """Loads the JSON-LD context and returns a dict mapping prefixes to
     their namespace URL."""
@@ -367,6 +374,8 @@ def get_prefixes(timeout: float = 5) -> dict:
     return prefixes
 
 
+# TODO: update this to take an initial argument `context`.
+# See get_jsonld_context()
 def get_shortnames(timeout: float = 5) -> dict:
     """Loads the JSON-LD context and returns a dict mapping IRIs to their
     short names defined in the context."""
@@ -407,9 +416,61 @@ def add(d: dict, key: str, value: "Any") -> None:
         d[key] = value
     else:
         klst = d[key] if isinstance(d[key], list) else [d[key]]
-        vlst = value if isinstance(value, list) else [value]
-        v = list(set(klst).union(vlst))
-        d[key] = v[0] if len(v) == 1 else sorted(v)
+        if isinstance(value, dict):
+            v = klst if value in klst else klst + [value]
+        else:
+            vlst = value if isinstance(value, list) else [value]
+            try:
+                v = list(set(klst).union(vlst))
+            except TypeError:  # klst contains unhashable dicts
+                v = klst + [x for x in vlst if x not in klst]
+        d[key] = (
+            v[0]
+            if len(v) == 1
+            else sorted(
+                # Sort dicts at end, by representing them with a huge
+                # unicode character
+                v,
+                key=lambda x: "\uffff" if isinstance(x, dict) else x,
+            )
+        )
+
+
+def addnested(d: "Union[dict, list]", key: str, value: "Any"):
+    """Like add(), but allows `key` to be a dot-separated list of sub-keys.
+
+    Each sub-key will be added to `d` as a corresponding sub-dict.
+
+    Example:
+
+        >>> d = {}
+        >>> addnested(d, "a.b.c", "val")
+        {'a': {'b': {'c': 'val'}}}
+
+    """
+    if "." in key:
+        first, rest = key.split(".", 1)
+        if isinstance(d, list):
+            for ele in d:
+                if isinstance(ele, dict):
+                    addnested(ele, key, value)
+                    break
+            else:
+                d.append(addnested({}, key, value))
+        elif first in d and isinstance(d[first], (dict, list)):
+            addnested(d[first], rest, value)
+        else:
+            addnested(d, first, addnested(AttrDict(), rest, value))
+    elif isinstance(d, list):
+        for ele in d:
+            if isinstance(ele, dict):
+                add(ele, key, value)
+                break
+        else:
+            d.append({key: value})
+    else:
+        add(d, key, value)
+    return d
 
 
 def get(
@@ -516,6 +577,8 @@ def prepare_datadoc(datadoc: dict) -> dict:
     return d
 
 
+# TODO: update this function to correctly handle multiple contexts
+# provided with the `_context` keyword argument.
 def as_jsonld(
     dct: dict,
     type: "Optional[str]" = "dataset",
@@ -526,8 +589,8 @@ def as_jsonld(
     """Return an updated copy of dict `dct` as valid JSON-LD.
 
     Arguments:
-        dct: Dict to return an updated copy of.
-        type: Type of dict to prepare.  Should either be one of the
+        dct: Dict with data documentation represent as JSON-LD.
+        type: Type of data to document.  Should either be one of the
             pre-defined names: "dataset", "distribution", "accessService",
             "parser" and "generator" or an IRI to a class in an ontology.
             Defaults to "dataset".
diff --git a/tripper/dataset/tabledoc.py b/tripper/dataset/tabledoc.py
new file mode 100644
index 00000000..b1a8ef51
--- /dev/null
+++ b/tripper/dataset/tabledoc.py
@@ -0,0 +1,68 @@
+"""Basic interface for tabular documentation of datasets."""
+
+from typing import TYPE_CHECKING
+
+from tripper import Triplestore
+from tripper.dataset.dataset import addnested, as_jsonld, save_dict
+from tripper.utils import AttrDict
+
+if TYPE_CHECKING:  # pragma: no cover
+    from typing import List, Optional, Sequence, Union
+
+
+class TableDoc:
+    """Representation of tabular documentation of datasets.
+
+    Arguments:
+        header: Sequence of column header labels.  Nested data can
+            be represented by dot-separated label strings (e.g.
+            "distribution.downloadURL")
+        data: Sequence of rows of data. Each row documents an entry.
+        type: Type of data to save (applies to all rows).  Should
+            either be one of the pre-defined names: "dataset",
+            "distribution", "accessService", "parser" and "generator"
+            or an IRI to a class in an ontology.  Defaults to
+            "dataset".
+        prefixes: Dict with prefixes in addition to those included in the
+            JSON-LD context.  Should map namespace prefixes to IRIs.
+        context: Dict with user-defined JSON-LD context.
+
+    """
+
+    # pylint: disable=redefined-builtin,too-few-public-methods
+
+    def __init__(
+        self,
+        header: "Sequence[str]",
+        data: "Sequence[Sequence[str]]",
+        type: "Optional[str]" = "dataset",
+        prefixes: "Optional[dict]" = None,
+        context: "Optional[Union[dict, list]]" = None,
+    ):
+        self.header = header
+        self.data = data
+        self.type = type
+        self.prefixes = prefixes
+        self.context = context
+
+    def asdicts(self) -> "List[dict]":
+        """Return the table as a list of dicts."""
+        kw = {"_context": self.context} if self.context else {}
+
+        results = []
+        for row in self.data:
+            d = AttrDict()
+            for i, colname in enumerate(self.header):
+                cell = row[i]
+                if cell:
+                    addnested(d, colname, cell)
+            jsonld = as_jsonld(
+                d, type=self.type, prefixes=self.prefixes, **kw  # type: ignore
+            )
+            results.append(jsonld)
+        return results
+
+    def save(self, ts: Triplestore) -> None:
+        """Save tabular datadocumentation to triplestore."""
+        for d in self.asdicts():
+            save_dict(ts, d)

From 028054fad4ae6a7e0055ad746b90f200c807e965 Mon Sep 17 00:00:00 2001
From: Jesper Friis <jesper.friis@sintef.no>
Date: Mon, 16 Dec 2024 00:39:21 +0100
Subject: [PATCH 03/25] Import indir/outdir inside test functions

---
 tests/dataset/test_dataaccess.py | 3 ++-
 tests/dataset/test_dataset.py    | 5 ++++-
 tripper/dataset/dataset.py       | 3 ++-
 3 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/tests/dataset/test_dataaccess.py b/tests/dataset/test_dataaccess.py
index bdc0ef45..0cbc7727 100644
--- a/tests/dataset/test_dataaccess.py
+++ b/tests/dataset/test_dataaccess.py
@@ -3,7 +3,6 @@
 # pylint: disable=invalid-name,too-many-locals,duplicate-code
 
 import pytest
-from dataset_paths import outdir
 
 pytest.importorskip("yaml")
 pytest.importorskip("requests")
@@ -14,6 +13,8 @@ def test_save_and_load():
     """Test save() and load()."""
     # pylint: disable=too-many-statements
 
+    from dataset_paths import outdir
+
     from tripper import DCAT, DCTERMS, EMMO, Triplestore
     from tripper.dataset import load, load_dict, save, save_dict
 
diff --git a/tests/dataset/test_dataset.py b/tests/dataset/test_dataset.py
index 4e43cd10..1a0cffbd 100644
--- a/tests/dataset/test_dataset.py
+++ b/tests/dataset/test_dataset.py
@@ -3,7 +3,6 @@
 # pylint: disable=invalid-name,too-many-locals,duplicate-code
 
 import pytest
-from dataset_paths import indir
 
 pytest.importorskip("yaml")
 pytest.importorskip("requests")
@@ -128,6 +127,8 @@ def test_datadoc():
     """Test save_datadoc() and load_dict()/save_dict()."""
     # pylint: disable=too-many-statements
 
+    from dataset_paths import indir
+
     from tripper import CHAMEO, DCAT, EMMO, OTEIO, Triplestore
     from tripper.dataset import load_dict, save_datadoc, save_dict, search_iris
 
@@ -219,6 +220,8 @@ def test_pipeline():
     from tripper import Triplestore
 
     otelib = pytest.importorskip("otelib")
+    from dataset_paths import indir
+
     from tripper.dataset import get_partial_pipeline, save_datadoc
 
     # Prepare triplestore
diff --git a/tripper/dataset/dataset.py b/tripper/dataset/dataset.py
index 2bb5e6a1..13fdb935 100644
--- a/tripper/dataset/dataset.py
+++ b/tripper/dataset/dataset.py
@@ -445,7 +445,8 @@ def addnested(d: "Union[dict, list]", key: str, value: "Any"):
 
         >>> d = {}
         >>> addnested(d, "a.b.c", "val")
-        {'a': {'b': {'c': 'val'}}}
+        >>> d == {'a': {'b': {'c': 'val'}}}
+        True
 
     """
     if "." in key:

From ef5239ad7e96f586c5ce970cec834cf7881864ff Mon Sep 17 00:00:00 2001
From: Jesper Friis <jesper.friis@sintef.no>
Date: Mon, 16 Dec 2024 00:56:02 +0100
Subject: [PATCH 04/25] Fixed doctest issue

---
 tripper/dataset/dataset.py | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/tripper/dataset/dataset.py b/tripper/dataset/dataset.py
index 13fdb935..8db5b497 100644
--- a/tripper/dataset/dataset.py
+++ b/tripper/dataset/dataset.py
@@ -38,9 +38,6 @@
 from pathlib import Path
 from typing import TYPE_CHECKING
 
-import requests
-import yaml  # type: ignore
-
 from tripper import DCAT, EMMO, OTEIO, OWL, RDF, Triplestore
 from tripper.utils import AttrDict, as_python
 
@@ -171,6 +168,8 @@ def save_extra_content(ts: Triplestore, dct: dict) -> None:
     - data models (require that DLite is installed)
 
     """
+    import requests
+
     # Save statements and mappings
     statements = get_values(dct, "statements")
     statements.extend(get_values(dct, "mappings"))
@@ -351,6 +350,8 @@ def get_jsonld_context(timeout: float = 5, fromfile: bool = True) -> dict:
         fromfile: Whether to load the context from local file.
 
     """
+    import requests
+
     if fromfile:
         with open(CONTEXT_PATH[7:], "r", encoding="utf-8") as f:
             context = json.load(f)["@context"]
@@ -436,16 +437,18 @@ def add(d: dict, key: str, value: "Any") -> None:
         )
 
 
-def addnested(d: "Union[dict, list]", key: str, value: "Any"):
+def addnested(
+    d: "Union[dict, list]", key: str, value: "Any"
+) -> "Union[dict, list]":
     """Like add(), but allows `key` to be a dot-separated list of sub-keys.
+    Returns the updated `d`.
 
     Each sub-key will be added to `d` as a corresponding sub-dict.
 
     Example:
 
         >>> d = {}
-        >>> addnested(d, "a.b.c", "val")
-        >>> d == {'a': {'b': {'c': 'val'}}}
+        >>> addnested(d, "a.b.c", "val") == {'a': {'b': {'c': 'val'}}}
         True
 
     """
@@ -508,6 +511,8 @@ def expand_iri(iri: str, prefixes: dict) -> str:
 
 def read_datadoc(filename: "Union[str, Path]") -> dict:
     """Read YAML data documentation and return it as a dict."""
+    import yaml  # type: ignore
+
     with open(filename, "r", encoding="utf-8") as f:
         d = yaml.safe_load(f)
     return prepare_datadoc(d)

From 331878a1756ed225d48fc3a2e0acdd567ccc4774 Mon Sep 17 00:00:00 2001
From: Jesper Friis <jesper.friis@sintef.no>
Date: Mon, 16 Dec 2024 01:01:22 +0100
Subject: [PATCH 05/25] Skip test_tabledoc if rdflib isn't available

---
 tests/dataset/test_tabledoc.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tests/dataset/test_tabledoc.py b/tests/dataset/test_tabledoc.py
index 77e81dcc..49902b69 100644
--- a/tests/dataset/test_tabledoc.py
+++ b/tests/dataset/test_tabledoc.py
@@ -1,5 +1,7 @@
 """Test the dataset module."""
 
+import pytest
+
 from tripper import Triplestore
 from tripper.dataset import TableDoc
 
@@ -10,6 +12,8 @@ def test_as_dicts():
 
     from tripper import DCAT, EMMO, Namespace
 
+    pytest.importorskip("rdflib")
+
     ONTO = Namespace("http:/example.com/onto#")
     DS = Namespace("http:/example.com/datasets#")
 

From 5fe9cf7c387a9fb3ccd02856c0878ef8dceb8eba Mon Sep 17 00:00:00 2001
From: Jesper Friis <jesper.friis@sintef.no>
Date: Mon, 16 Dec 2024 01:04:02 +0100
Subject: [PATCH 06/25] More pylint fixes...

---
 tests/dataset/test_dataaccess.py | 2 +-
 tests/dataset/test_dataset.py    | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/dataset/test_dataaccess.py b/tests/dataset/test_dataaccess.py
index 0cbc7727..ecf98dba 100644
--- a/tests/dataset/test_dataaccess.py
+++ b/tests/dataset/test_dataaccess.py
@@ -13,7 +13,7 @@ def test_save_and_load():
     """Test save() and load()."""
     # pylint: disable=too-many-statements
 
-    from dataset_paths import outdir
+    from dataset_paths import outdir  # pytest: disable=import-error
 
     from tripper import DCAT, DCTERMS, EMMO, Triplestore
     from tripper.dataset import load, load_dict, save, save_dict
diff --git a/tests/dataset/test_dataset.py b/tests/dataset/test_dataset.py
index 1a0cffbd..4fb5ec09 100644
--- a/tests/dataset/test_dataset.py
+++ b/tests/dataset/test_dataset.py
@@ -127,7 +127,7 @@ def test_datadoc():
     """Test save_datadoc() and load_dict()/save_dict()."""
     # pylint: disable=too-many-statements
 
-    from dataset_paths import indir
+    from dataset_paths import indir  # pytest: disable=import-error
 
     from tripper import CHAMEO, DCAT, EMMO, OTEIO, Triplestore
     from tripper.dataset import load_dict, save_datadoc, save_dict, search_iris
@@ -220,7 +220,7 @@ def test_pipeline():
     from tripper import Triplestore
 
     otelib = pytest.importorskip("otelib")
-    from dataset_paths import indir
+    from dataset_paths import indir  # pytest: disable=import-error
 
     from tripper.dataset import get_partial_pipeline, save_datadoc
 

From 4aaeed8551b56ece4c7c9af151ce3152a2779077 Mon Sep 17 00:00:00 2001
From: Jesper Friis <jesper.friis@sintef.no>
Date: Mon, 16 Dec 2024 01:08:20 +0100
Subject: [PATCH 07/25] Placed importskip before importing EMMO

---
 tests/dataset/test_tabledoc.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/tests/dataset/test_tabledoc.py b/tests/dataset/test_tabledoc.py
index 49902b69..4a1c0613 100644
--- a/tests/dataset/test_tabledoc.py
+++ b/tests/dataset/test_tabledoc.py
@@ -2,18 +2,16 @@
 
 import pytest
 
-from tripper import Triplestore
-from tripper.dataset import TableDoc
-
 
 # if True:
 def test_as_dicts():
     """Test the as_dicts() method."""
 
-    from tripper import DCAT, EMMO, Namespace
-
     pytest.importorskip("rdflib")
 
+    from tripper import DCAT, EMMO, Namespace, Triplestore
+    from tripper.dataset import TableDoc
+
     ONTO = Namespace("http:/example.com/onto#")
     DS = Namespace("http:/example.com/datasets#")
 

From 0f21fbbde6f6b6eaaf6583145f403fb7f841c0a7 Mon Sep 17 00:00:00 2001
From: Jesper Friis <jesper.friis@sintef.no>
Date: Mon, 16 Dec 2024 01:12:12 +0100
Subject: [PATCH 08/25] typo

---
 tests/dataset/test_dataaccess.py | 2 +-
 tests/dataset/test_dataset.py    | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/dataset/test_dataaccess.py b/tests/dataset/test_dataaccess.py
index ecf98dba..af058440 100644
--- a/tests/dataset/test_dataaccess.py
+++ b/tests/dataset/test_dataaccess.py
@@ -13,7 +13,7 @@ def test_save_and_load():
     """Test save() and load()."""
     # pylint: disable=too-many-statements
 
-    from dataset_paths import outdir  # pytest: disable=import-error
+    from dataset_paths import outdir  # pylint: disable=import-error
 
     from tripper import DCAT, DCTERMS, EMMO, Triplestore
     from tripper.dataset import load, load_dict, save, save_dict
diff --git a/tests/dataset/test_dataset.py b/tests/dataset/test_dataset.py
index 4fb5ec09..9bdec2c6 100644
--- a/tests/dataset/test_dataset.py
+++ b/tests/dataset/test_dataset.py
@@ -127,7 +127,7 @@ def test_datadoc():
     """Test save_datadoc() and load_dict()/save_dict()."""
     # pylint: disable=too-many-statements
 
-    from dataset_paths import indir  # pytest: disable=import-error
+    from dataset_paths import indir  # pylint: disable=import-error
 
     from tripper import CHAMEO, DCAT, EMMO, OTEIO, Triplestore
     from tripper.dataset import load_dict, save_datadoc, save_dict, search_iris
@@ -220,7 +220,7 @@ def test_pipeline():
     from tripper import Triplestore
 
     otelib = pytest.importorskip("otelib")
-    from dataset_paths import indir  # pytest: disable=import-error
+    from dataset_paths import indir  # pylint: disable=import-error
 
     from tripper.dataset import get_partial_pipeline, save_datadoc
 

From 4cc88cb0fb119697f35184177e9dbfb697141ac4 Mon Sep 17 00:00:00 2001
From: Jesper Friis <jesper.friis@sintef.no>
Date: Mon, 16 Dec 2024 15:53:34 +0100
Subject: [PATCH 09/25] Fixed pylint errors

---
 tripper/dataset/dataset.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/tripper/dataset/dataset.py b/tripper/dataset/dataset.py
index b98eaeaa..bbb4a178 100644
--- a/tripper/dataset/dataset.py
+++ b/tripper/dataset/dataset.py
@@ -42,9 +42,6 @@
 from pathlib import Path
 from typing import TYPE_CHECKING
 
-import requests
-import yaml  # type: ignore
-
 from tripper import DCAT, EMMO, OTEIO, OWL, RDF, Triplestore
 from tripper.utils import AttrDict, as_python
 

From 92b213d7b2a292b04ddbaf0921d4e046a27e95db Mon Sep 17 00:00:00 2001
From: Jesper Friis <jesper.friis@sintef.no>
Date: Thu, 19 Dec 2024 10:10:35 +0100
Subject: [PATCH 10/25] added csv file

---
 tests/input/semdata.csv | 5 +++++
 1 file changed, 5 insertions(+)
 create mode 100644 tests/input/semdata.csv

diff --git a/tests/input/semdata.csv b/tests/input/semdata.csv
new file mode 100644
index 00000000..631d9e69
--- /dev/null
+++ b/tests/input/semdata.csv
@@ -0,0 +1,5 @@
+@id;@type;title;description;creator;contactPoint;inSeries;datamodel;datamodelStorage;distribution.downloadURL;distribution.mediaType;distribution.parser;fromSample;isDescriptionOf
+semdata:SEM_cement_batch2/77600-23-001/77600-23-001_5kV_400x_m001;sem:SEMImage;SEM image of cement;Back-scattered SEM image of cement sample 77600 from Heidelberg, polished with 1 �m diamond compound.;Sigurd Wenner;Sigurd Wenner <Sigurd.Wenner@sintef.no>;semdata:SEM_cement_batch2/77600-23-001;http://onto-ns.com/meta/matchmaker/0.2/SEMImage;https://github.com/HEU-MatCHMaker/DataDocumentation/blob/master/SEM/datamodels/SEMImage.yaml;https://github.com/EMMC-ASBL/tripper/raw/refs/heads/dataset/tests/input/77600-23-001_5kV_400x_m001.tif;image/tiff;parser:sem_hitachi;sample:SEM_cement_batch2/77600-23-001;mat:concrete1
+semdata:SEM_cement_batch2/77600-23-001;sem:SEMImageSeries;Series of SEM image of cement sample 77600;Back-scattered SEM image of cement sample 77600, polished with 1 �m diamond compound.;Sigurd Wenner;Sigurd Wenner <Sigurd.Wenner@sintef.no>;semdata:SEM_cement_batch2; ;;sftp://nas.aimen.es/P_MATCHMAKER_SHARE_SINTEF/SEM_cement_batch2/77600-23-001;inode/directory;;;
+semdata:SEM_cement_batch2;sem:SEMImageSeries;Nested series of SEM images of cement batch2;�;Sigurd Wenner;Sigurd Wenner <Sigurd.Wenner@sintef.no>; ;;;sftp://nas.aimen.es/P_MATCHMAKER_SHARE_SINTEF/SEM_cement_batch2;inode/directory;;;
+mple:SEM_cement_batch2/77600-23-001;chameo:Sample;Series for SEM images for sample 77600-23-001.; ;;;;;;;;;;

From ae20a0a3dc36926c90511f522b47dbbce5b04259 Mon Sep 17 00:00:00 2001
From: Jesper Friis <jesper.friis@sintef.no>
Date: Thu, 19 Dec 2024 10:54:48 +0100
Subject: [PATCH 11/25] Added csv parser

---
 tests/dataset/test_tabledoc.py | 28 ++++++++++++++++++++++++++++
 tests/input/semdata.csv        |  8 ++++----
 tripper/dataset/tabledoc.py    | 25 +++++++++++++++++++++++++
 3 files changed, 57 insertions(+), 4 deletions(-)

diff --git a/tests/dataset/test_tabledoc.py b/tests/dataset/test_tabledoc.py
index 4a1c0613..52ea9236 100644
--- a/tests/dataset/test_tabledoc.py
+++ b/tests/dataset/test_tabledoc.py
@@ -79,3 +79,31 @@ def test_as_dicts():
     ts = Triplestore(backend="rdflib")
     td.save(ts)
     print(ts.serialize())
+
+
+if True:
+    # def test_parse_csv():
+    """Test parsing a csv file."""
+    from dataset_paths import indir  # pylint: disable=import-error
+
+    from tripper.dataset import TableDoc
+
+    td = TableDoc.parse_csv(
+        indir / "semdata.csv",
+        delimiter=";",
+        prefixes={
+            "sem": "https://w3id.com/emmo/domain/sem/0.1#",
+            "semdata": "https://he-matchmaker.eu/data/sem/",
+            "sample": "https://he-matchmaker.eu/sample/",
+            "mat": "https://he-matchmaker.eu/material/",
+            "dm": "http://onto-ns.com/meta/characterisation/0.1/SEMImage#",
+            "parser": "http://sintef.no/dlite/parser#",
+            "gen": "http://sintef.no/dlite/generator#",
+        },
+    )
+
+    img, series, batch, sample = td.asdicts()
+    assert img["@id"] == (
+        "https://he-matchmaker.eu/data/sem/SEM_cement_batch2/"
+        "77600-23-001/77600-23-001_5kV_400x_m001"
+    )
diff --git a/tests/input/semdata.csv b/tests/input/semdata.csv
index 631d9e69..4df732ef 100644
--- a/tests/input/semdata.csv
+++ b/tests/input/semdata.csv
@@ -1,5 +1,5 @@
 @id;@type;title;description;creator;contactPoint;inSeries;datamodel;datamodelStorage;distribution.downloadURL;distribution.mediaType;distribution.parser;fromSample;isDescriptionOf
-semdata:SEM_cement_batch2/77600-23-001/77600-23-001_5kV_400x_m001;sem:SEMImage;SEM image of cement;Back-scattered SEM image of cement sample 77600 from Heidelberg, polished with 1 �m diamond compound.;Sigurd Wenner;Sigurd Wenner <Sigurd.Wenner@sintef.no>;semdata:SEM_cement_batch2/77600-23-001;http://onto-ns.com/meta/matchmaker/0.2/SEMImage;https://github.com/HEU-MatCHMaker/DataDocumentation/blob/master/SEM/datamodels/SEMImage.yaml;https://github.com/EMMC-ASBL/tripper/raw/refs/heads/dataset/tests/input/77600-23-001_5kV_400x_m001.tif;image/tiff;parser:sem_hitachi;sample:SEM_cement_batch2/77600-23-001;mat:concrete1
-semdata:SEM_cement_batch2/77600-23-001;sem:SEMImageSeries;Series of SEM image of cement sample 77600;Back-scattered SEM image of cement sample 77600, polished with 1 �m diamond compound.;Sigurd Wenner;Sigurd Wenner <Sigurd.Wenner@sintef.no>;semdata:SEM_cement_batch2; ;;sftp://nas.aimen.es/P_MATCHMAKER_SHARE_SINTEF/SEM_cement_batch2/77600-23-001;inode/directory;;;
-semdata:SEM_cement_batch2;sem:SEMImageSeries;Nested series of SEM images of cement batch2;�;Sigurd Wenner;Sigurd Wenner <Sigurd.Wenner@sintef.no>; ;;;sftp://nas.aimen.es/P_MATCHMAKER_SHARE_SINTEF/SEM_cement_batch2;inode/directory;;;
-mple:SEM_cement_batch2/77600-23-001;chameo:Sample;Series for SEM images for sample 77600-23-001.; ;;;;;;;;;;
+semdata:SEM_cement_batch2/77600-23-001/77600-23-001_5kV_400x_m001;sem:SEMImage;SEM image of cement;Back-scattered SEM image of cement sample 77600 from Heidelberg, polished with 1 um diamond compound.;Sigurd Wenner;Sigurd Wenner <Sigurd.Wenner@sintef.no>;semdata:SEM_cement_batch2/77600-23-001;http://onto-ns.com/meta/matchmaker/0.2/SEMImage;https://github.com/HEU-MatCHMaker/DataDocumentation/blob/master/SEM/datamodels/SEMImage.yaml;https://github.com/EMMC-ASBL/tripper/raw/refs/heads/dataset/tests/input/77600-23-001_5kV_400x_m001.tif;image/tiff;parser:sem_hitachi;sample:SEM_cement_batch2/77600-23-001;mat:concrete1
+semdata:SEM_cement_batch2/77600-23-001;sem:SEMImageSeries;Series of SEM image of cement sample 77600;Back-scattered SEM image of cement sample 77600, polished with 1 um diamond compound.;Sigurd Wenner;Sigurd Wenner <Sigurd.Wenner@sintef.no>;semdata:SEM_cement_batch2; ;;sftp://nas.aimen.es/P_MATCHMAKER_SHARE_SINTEF/SEM_cement_batch2/77600-23-001;inode/directory;;;
+semdata:SEM_cement_batch2;sem:SEMImageSeries;Nested series of SEM images of cement batch2;...;Sigurd Wenner;Sigurd Wenner <Sigurd.Wenner@sintef.no>; ;;;sftp://nas.aimen.es/P_MATCHMAKER_SHARE_SINTEF/SEM_cement_batch2;inode/directory;;;
+sample:SEM_cement_batch2/77600-23-001;chameo:Sample;Series for SEM images for sample 77600-23-001.; ;;;;;;;;;;
diff --git a/tripper/dataset/tabledoc.py b/tripper/dataset/tabledoc.py
index b1a8ef51..50fe86b7 100644
--- a/tripper/dataset/tabledoc.py
+++ b/tripper/dataset/tabledoc.py
@@ -1,5 +1,7 @@
 """Basic interface for tabular documentation of datasets."""
 
+import csv
+from pathlib import Path
 from typing import TYPE_CHECKING
 
 from tripper import Triplestore
@@ -66,3 +68,26 @@ def save(self, ts: Triplestore) -> None:
         """Save tabular datadocumentation to triplestore."""
         for d in self.asdicts():
             save_dict(ts, d)
+
+    @classmethod
+    def parse_csv(
+        self,
+        csvfile: "Union[Path, str]",
+        type: "Optional[str]" = "dataset",
+        prefixes: "Optional[dict]" = None,
+        context: "Optional[Union[dict, list]]" = None,
+        dialect="excel",
+        **kwargs,
+    ) -> None:
+        """Parse a csv file."""
+        with open(csvfile, newline="") as f:
+            reader = csv.reader(f, dialect=dialect, **kwargs)
+            header = next(reader)[0].split(reader.dialect.delimiter)
+            data = [row for row in reader]
+        return TableDoc(
+            header=header,
+            data=data,
+            type=type,
+            prefixes=prefixes,
+            context=context,
+        )

From 543e99e743876f68eaa8913c02d9df33ffc3ca07 Mon Sep 17 00:00:00 2001
From: Jesper Friis <jesper.friis@sintef.no>
Date: Thu, 19 Dec 2024 10:59:01 +0100
Subject: [PATCH 12/25] Updated the test

---
 tests/dataset/test_tabledoc.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/tests/dataset/test_tabledoc.py b/tests/dataset/test_tabledoc.py
index 52ea9236..ae7ca348 100644
--- a/tests/dataset/test_tabledoc.py
+++ b/tests/dataset/test_tabledoc.py
@@ -81,13 +81,16 @@ def test_as_dicts():
     print(ts.serialize())
 
 
-if True:
-    # def test_parse_csv():
+#if True:
+def test_parse_csv():
     """Test parsing a csv file."""
     from dataset_paths import indir  # pylint: disable=import-error
 
+    from tripper import Triplestore
     from tripper.dataset import TableDoc
 
+    pytest.importorskip("rdflib")
+
     td = TableDoc.parse_csv(
         indir / "semdata.csv",
         delimiter=";",
@@ -107,3 +110,7 @@ def test_as_dicts():
         "https://he-matchmaker.eu/data/sem/SEM_cement_batch2/"
         "77600-23-001/77600-23-001_5kV_400x_m001"
     )
+
+    ts = Triplestore(backend="rdflib")
+    td.save(ts)
+    print(ts.serialize())

From b3e3d0723f879547099aabcc45f9148e4a959700 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 19 Dec 2024 09:59:28 +0000
Subject: [PATCH 13/25] [pre-commit.ci] auto fixes from pre-commit hooks

For more information, see https://pre-commit.ci
---
 tests/dataset/test_tabledoc.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/dataset/test_tabledoc.py b/tests/dataset/test_tabledoc.py
index ae7ca348..00179810 100644
--- a/tests/dataset/test_tabledoc.py
+++ b/tests/dataset/test_tabledoc.py
@@ -81,7 +81,7 @@ def test_as_dicts():
     print(ts.serialize())
 
 
-#if True:
+# if True:
 def test_parse_csv():
     """Test parsing a csv file."""
     from dataset_paths import indir  # pylint: disable=import-error

From 700c514282a8374f32dd5d858be87e466be1eb4a Mon Sep 17 00:00:00 2001
From: Jesper Friis <jesper.friis@sintef.no>
Date: Thu, 19 Dec 2024 13:17:46 +0100
Subject: [PATCH 14/25] Fixed failing tests

---
 tests/dataset/test_tabledoc.py |  8 +++++---
 tripper/dataset/tabledoc.py    | 35 +++++++++++++++++++++++++++-------
 2 files changed, 33 insertions(+), 10 deletions(-)

diff --git a/tests/dataset/test_tabledoc.py b/tests/dataset/test_tabledoc.py
index ae7ca348..f1640dbc 100644
--- a/tests/dataset/test_tabledoc.py
+++ b/tests/dataset/test_tabledoc.py
@@ -81,16 +81,16 @@ def test_as_dicts():
     print(ts.serialize())
 
 
-#if True:
+# if True:
 def test_parse_csv():
     """Test parsing a csv file."""
     from dataset_paths import indir  # pylint: disable=import-error
 
+    pytest.importorskip("rdflib")
+
     from tripper import Triplestore
     from tripper.dataset import TableDoc
 
-    pytest.importorskip("rdflib")
-
     td = TableDoc.parse_csv(
         indir / "semdata.csv",
         delimiter=";",
@@ -105,7 +105,9 @@ def test_parse_csv():
         },
     )
 
+    # pylint: disable=unused-variable,unbalanced-tuple-unpacking
     img, series, batch, sample = td.asdicts()
+
     assert img["@id"] == (
         "https://he-matchmaker.eu/data/sem/SEM_cement_batch2/"
         "77600-23-001/77600-23-001_5kV_400x_m001"
diff --git a/tripper/dataset/tabledoc.py b/tripper/dataset/tabledoc.py
index 50fe86b7..1dc37098 100644
--- a/tripper/dataset/tabledoc.py
+++ b/tripper/dataset/tabledoc.py
@@ -69,21 +69,42 @@ def save(self, ts: Triplestore) -> None:
         for d in self.asdicts():
             save_dict(ts, d)
 
-    @classmethod
+    @staticmethod
     def parse_csv(
-        self,
         csvfile: "Union[Path, str]",
         type: "Optional[str]" = "dataset",
         prefixes: "Optional[dict]" = None,
         context: "Optional[Union[dict, list]]" = None,
-        dialect="excel",
+        dialect: "Union[csv.Dialect, str]" = "excel",
         **kwargs,
-    ) -> None:
-        """Parse a csv file."""
-        with open(csvfile, newline="") as f:
+    ) -> "TableDoc":
+        # pylint: disable=line-too-long
+        """Parse a csv file using the standard library csv module.
+
+        Arguments:
+            csvfile: CSV file to parse.
+            type: Type of data to save (applies to all rows).  Should
+                either be one of the pre-defined names: "dataset",
+                "distribution", "accessService", "parser" and "generator"
+                or an IRI to a class in an ontology.  Defaults to
+                "dataset".
+            prefixes: Dict with prefixes in addition to those included in the
+                JSON-LD context.  Should map namespace prefixes to IRIs.
+            context: Dict with user-defined JSON-LD context.
+            dialect: A subclass of csv.Dialect, or the name of the dialect,
+                specifying how the `csvfile` is formatted.  For more details,
+                see [Dialects and Formatting Parameters].
+            kwargs: Additional keyword arguments overriding individual
+                formatting parameters.  For more details, see
+                [Dialects and Formatting Parameters].
+
+        References:
+        [Dialects and Formatting Parameters]: https://docs.python.org/3/library/csv.html#dialects-and-formatting-parameters
+        """
+        with open(csvfile, encoding="utf-8") as f:
             reader = csv.reader(f, dialect=dialect, **kwargs)
             header = next(reader)[0].split(reader.dialect.delimiter)
-            data = [row for row in reader]
+            data = list(reader)
         return TableDoc(
             header=header,
             data=data,

From 4d7d77adc9233b39ad05dfd6fb4222feef814b94 Mon Sep 17 00:00:00 2001
From: Jesper Friis <jesper.friis@sintef.no>
Date: Thu, 19 Dec 2024 13:33:10 +0100
Subject: [PATCH 15/25] Added encoding to keyword arguments

---
 tests/input/semdata.csv     | 4 ++--
 tripper/dataset/tabledoc.py | 5 ++++-
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/tests/input/semdata.csv b/tests/input/semdata.csv
index 4df732ef..c3cf536c 100644
--- a/tests/input/semdata.csv
+++ b/tests/input/semdata.csv
@@ -1,5 +1,5 @@
 @id;@type;title;description;creator;contactPoint;inSeries;datamodel;datamodelStorage;distribution.downloadURL;distribution.mediaType;distribution.parser;fromSample;isDescriptionOf
-semdata:SEM_cement_batch2/77600-23-001/77600-23-001_5kV_400x_m001;sem:SEMImage;SEM image of cement;Back-scattered SEM image of cement sample 77600 from Heidelberg, polished with 1 um diamond compound.;Sigurd Wenner;Sigurd Wenner <Sigurd.Wenner@sintef.no>;semdata:SEM_cement_batch2/77600-23-001;http://onto-ns.com/meta/matchmaker/0.2/SEMImage;https://github.com/HEU-MatCHMaker/DataDocumentation/blob/master/SEM/datamodels/SEMImage.yaml;https://github.com/EMMC-ASBL/tripper/raw/refs/heads/dataset/tests/input/77600-23-001_5kV_400x_m001.tif;image/tiff;parser:sem_hitachi;sample:SEM_cement_batch2/77600-23-001;mat:concrete1
-semdata:SEM_cement_batch2/77600-23-001;sem:SEMImageSeries;Series of SEM image of cement sample 77600;Back-scattered SEM image of cement sample 77600, polished with 1 um diamond compound.;Sigurd Wenner;Sigurd Wenner <Sigurd.Wenner@sintef.no>;semdata:SEM_cement_batch2; ;;sftp://nas.aimen.es/P_MATCHMAKER_SHARE_SINTEF/SEM_cement_batch2/77600-23-001;inode/directory;;;
+semdata:SEM_cement_batch2/77600-23-001/77600-23-001_5kV_400x_m001;sem:SEMImage;SEM image of cement;Back-scattered SEM image of cement sample 77600 from Heidelberg, polished with 1 μm diamond compound.;Sigurd Wenner;Sigurd Wenner <Sigurd.Wenner@sintef.no>;semdata:SEM_cement_batch2/77600-23-001;http://onto-ns.com/meta/matchmaker/0.2/SEMImage;https://github.com/HEU-MatCHMaker/DataDocumentation/blob/master/SEM/datamodels/SEMImage.yaml;https://github.com/EMMC-ASBL/tripper/raw/refs/heads/dataset/tests/input/77600-23-001_5kV_400x_m001.tif;image/tiff;parser:sem_hitachi;sample:SEM_cement_batch2/77600-23-001;mat:concrete1
+semdata:SEM_cement_batch2/77600-23-001;sem:SEMImageSeries;Series of SEM image of cement sample 77600;Back-scattered SEM image of cement sample 77600, polished with 1 μm diamond compound.;Sigurd Wenner;Sigurd Wenner <Sigurd.Wenner@sintef.no>;semdata:SEM_cement_batch2; ;;sftp://nas.aimen.es/P_MATCHMAKER_SHARE_SINTEF/SEM_cement_batch2/77600-23-001;inode/directory;;;
 semdata:SEM_cement_batch2;sem:SEMImageSeries;Nested series of SEM images of cement batch2;...;Sigurd Wenner;Sigurd Wenner <Sigurd.Wenner@sintef.no>; ;;;sftp://nas.aimen.es/P_MATCHMAKER_SHARE_SINTEF/SEM_cement_batch2;inode/directory;;;
 sample:SEM_cement_batch2/77600-23-001;chameo:Sample;Series for SEM images for sample 77600-23-001.; ;;;;;;;;;;
diff --git a/tripper/dataset/tabledoc.py b/tripper/dataset/tabledoc.py
index 1dc37098..46ad4313 100644
--- a/tripper/dataset/tabledoc.py
+++ b/tripper/dataset/tabledoc.py
@@ -75,6 +75,7 @@ def parse_csv(
         type: "Optional[str]" = "dataset",
         prefixes: "Optional[dict]" = None,
         context: "Optional[Union[dict, list]]" = None,
+        encoding: str = "utf-8",
         dialect: "Union[csv.Dialect, str]" = "excel",
         **kwargs,
     ) -> "TableDoc":
@@ -91,6 +92,8 @@ def parse_csv(
             prefixes: Dict with prefixes in addition to those included in the
                 JSON-LD context.  Should map namespace prefixes to IRIs.
             context: Dict with user-defined JSON-LD context.
+            encoding: The encoding of the csv file.  Note that Excel may
+                encode as "ISO-8859" (commonly used in 1990th).
             dialect: A subclass of csv.Dialect, or the name of the dialect,
                 specifying how the `csvfile` is formatted.  For more details,
                 see [Dialects and Formatting Parameters].
@@ -101,7 +104,7 @@ def parse_csv(
         References:
         [Dialects and Formatting Parameters]: https://docs.python.org/3/library/csv.html#dialects-and-formatting-parameters
         """
-        with open(csvfile, encoding="utf-8") as f:
+        with open(csvfile, encoding=encoding) as f:
             reader = csv.reader(f, dialect=dialect, **kwargs)
             header = next(reader)[0].split(reader.dialect.delimiter)
             data = list(reader)

From 80048677f0c4612154dcca687565c90423a56eb0 Mon Sep 17 00:00:00 2001
From: Jesper Friis <jesper.friis@sintef.no>
Date: Fri, 20 Dec 2024 15:30:39 +0100
Subject: [PATCH 16/25] Strip off blanks when parsing a table.

---
 tripper/dataset/tabledoc.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/tripper/dataset/tabledoc.py b/tripper/dataset/tabledoc.py
index 46ad4313..75edb181 100644
--- a/tripper/dataset/tabledoc.py
+++ b/tripper/dataset/tabledoc.py
@@ -28,6 +28,7 @@ class TableDoc:
         prefixes: Dict with prefixes in addition to those included in the
             JSON-LD context.  Should map namespace prefixes to IRIs.
         context: Dict with user-defined JSON-LD context.
+        strip: Whether to strip leading and trailing whitespaces from cells.
 
     """
 
@@ -40,12 +41,14 @@ def __init__(
         type: "Optional[str]" = "dataset",
         prefixes: "Optional[dict]" = None,
         context: "Optional[Union[dict, list]]" = None,
+        strip: bool = True,
     ):
         self.header = header
         self.data = data
         self.type = type
         self.prefixes = prefixes
         self.context = context
+        self.strip = strip
 
     def asdicts(self) -> "List[dict]":
         """Return the table as a list of dicts."""
@@ -55,9 +58,11 @@ def asdicts(self) -> "List[dict]":
         for row in self.data:
             d = AttrDict()
             for i, colname in enumerate(self.header):
-                cell = row[i]
+                cell = row[i].strip() if self.strip else row[i]
                 if cell:
-                    addnested(d, colname, cell)
+                    addnested(
+                        d, colname.strip() if self.strip else colname, cell
+                    )
             jsonld = as_jsonld(
                 d, type=self.type, prefixes=self.prefixes, **kw  # type: ignore
             )
@@ -106,8 +111,9 @@ def parse_csv(
         """
         with open(csvfile, encoding=encoding) as f:
             reader = csv.reader(f, dialect=dialect, **kwargs)
-            header = next(reader)[0].split(reader.dialect.delimiter)
+            header = next(reader)
             data = list(reader)
+
         return TableDoc(
             header=header,
             data=data,

From 731253cd16ba58525b739166892251a0d88ca8af Mon Sep 17 00:00:00 2001
From: Jesper Friis <jesper.friis@sintef.no>
Date: Fri, 20 Dec 2024 15:39:14 +0100
Subject: [PATCH 17/25] Added extra test to ensure that all properties are
 parsed correctly

---
 tests/dataset/test_tabledoc.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tests/dataset/test_tabledoc.py b/tests/dataset/test_tabledoc.py
index f1640dbc..e9fff0c1 100644
--- a/tests/dataset/test_tabledoc.py
+++ b/tests/dataset/test_tabledoc.py
@@ -112,6 +112,10 @@ def test_parse_csv():
         "https://he-matchmaker.eu/data/sem/SEM_cement_batch2/"
         "77600-23-001/77600-23-001_5kV_400x_m001"
     )
+    assert img.distribution.downloadURL == (
+        "https://github.com/EMMC-ASBL/tripper/raw/refs/heads/dataset/"
+        "tests/input/77600-23-001_5kV_400x_m001.tif"
+    )
 
     ts = Triplestore(backend="rdflib")
     td.save(ts)

From 60b0c6d2657242d36c5cac1f979ba7172783fba6 Mon Sep 17 00:00:00 2001
From: Jesper Friis <jesper.friis@sintef.no>
Date: Fri, 20 Dec 2024 15:57:11 +0100
Subject: [PATCH 18/25] Added write_csv() method to TableDoc

---
 tests/dataset/test_tabledoc.py |  9 +++++++--
 tripper/dataset/tabledoc.py    | 31 ++++++++++++++++++++++++++++++-
 2 files changed, 37 insertions(+), 3 deletions(-)

diff --git a/tests/dataset/test_tabledoc.py b/tests/dataset/test_tabledoc.py
index e9fff0c1..5020989c 100644
--- a/tests/dataset/test_tabledoc.py
+++ b/tests/dataset/test_tabledoc.py
@@ -82,15 +82,16 @@ def test_as_dicts():
 
 
 # if True:
-def test_parse_csv():
+def test_csv():
     """Test parsing a csv file."""
-    from dataset_paths import indir  # pylint: disable=import-error
+    from dataset_paths import indir, outdir  # pylint: disable=import-error
 
     pytest.importorskip("rdflib")
 
     from tripper import Triplestore
     from tripper.dataset import TableDoc
 
+    # Read csv file
     td = TableDoc.parse_csv(
         indir / "semdata.csv",
         delimiter=";",
@@ -117,6 +118,10 @@ def test_parse_csv():
         "tests/input/77600-23-001_5kV_400x_m001.tif"
     )
 
+    # Write the table to a new csv file
+    td.write_csv(outdir / "semdata.csv")
+
+    # Print serialised KB
     ts = Triplestore(backend="rdflib")
     td.save(ts)
     print(ts.serialize())
diff --git a/tripper/dataset/tabledoc.py b/tripper/dataset/tabledoc.py
index 75edb181..65337d83 100644
--- a/tripper/dataset/tabledoc.py
+++ b/tripper/dataset/tabledoc.py
@@ -109,7 +109,7 @@ def parse_csv(
         References:
         [Dialects and Formatting Parameters]: https://docs.python.org/3/library/csv.html#dialects-and-formatting-parameters
         """
-        with open(csvfile, encoding=encoding) as f:
+        with open(csvfile, mode="rt", encoding=encoding) as f:
             reader = csv.reader(f, dialect=dialect, **kwargs)
             header = next(reader)
             data = list(reader)
@@ -121,3 +121,32 @@ def parse_csv(
             prefixes=prefixes,
             context=context,
         )
+
+    def write_csv(
+        self,
+        csvfile: "Union[Path, str]",
+        encoding: str = "utf-8",
+        dialect: "Union[csv.Dialect, str]" = "excel",
+        **kwargs,
+    ) -> None:
+        # pylint: disable=line-too-long
+        """Write the table to a csv file using the standard library csv module.
+
+        Arguments:
+            csvfile: CSV file to parse.
+            encoding: The encoding of the csv file.
+            dialect: A subclass of csv.Dialect, or the name of the dialect,
+                specifying how the `csvfile` is formatted.  For more details,
+                see [Dialects and Formatting Parameters].
+            kwargs: Additional keyword arguments overriding individual
+                formatting parameters.  For more details, see
+                [Dialects and Formatting Parameters].
+
+        References:
+        [Dialects and Formatting Parameters]: https://docs.python.org/3/library/csv.html#dialects-and-formatting-parameters
+        """
+        with open(csvfile, mode="wt", encoding=encoding) as f:
+            writer = csv.writer(f, dialect=dialect, **kwargs)
+            writer.writerow(self.header)
+            for row in self.data:
+                writer.writerow(row)

From d26d92fdabfc27090558a66ed5c9362de026bb11 Mon Sep 17 00:00:00 2001
From: Jesper Friis <jesper.friis@sintef.no>
Date: Mon, 30 Dec 2024 12:14:49 +0100
Subject: [PATCH 19/25] Save serialised documentation to turtle file.

---
 tests/dataset/test_tabledoc.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/dataset/test_tabledoc.py b/tests/dataset/test_tabledoc.py
index 5020989c..2d3c8779 100644
--- a/tests/dataset/test_tabledoc.py
+++ b/tests/dataset/test_tabledoc.py
@@ -124,4 +124,5 @@ def test_csv():
     # Print serialised KB
     ts = Triplestore(backend="rdflib")
     td.save(ts)
+    ts.serialize(outdir / "semdata.ttl")
     print(ts.serialize())

From 66b9dd75d0291359f9507033b9a8159cf0a8d320 Mon Sep 17 00:00:00 2001
From: Jesper Friis <jesper-friis@users.noreply.github.com>
Date: Mon, 30 Dec 2024 12:23:13 +0100
Subject: [PATCH 20/25] Apply suggestions from code review

Co-authored-by: Tor S. Haugland <torshaugland@gmail.com>
---
 pyproject.toml                 | 1 -
 tests/dataset/test_tabledoc.py | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 21196860..0398f0a0 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -105,7 +105,6 @@ max-locals = 20
 disable = [
     "fixme",
     "invalid-name",
-    #"too-many-positional-arguments",
 ]
 good-names = [
     # Default
diff --git a/tests/dataset/test_tabledoc.py b/tests/dataset/test_tabledoc.py
index 4a1c0613..da74203c 100644
--- a/tests/dataset/test_tabledoc.py
+++ b/tests/dataset/test_tabledoc.py
@@ -1,4 +1,4 @@
-"""Test the dataset module."""
+"""Test the TableDoc class."""
 
 import pytest
 

From 575f09d2b13deb60ce8ae9addeb734650223ca31 Mon Sep 17 00:00:00 2001
From: Jesper Friis <jesper-friis@users.noreply.github.com>
Date: Mon, 30 Dec 2024 12:30:22 +0100
Subject: [PATCH 21/25] Apply suggestions from code review

Co-authored-by: Tor S. Haugland <torshaugland@gmail.com>
---
 tripper/dataset/tabledoc.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tripper/dataset/tabledoc.py b/tripper/dataset/tabledoc.py
index b1a8ef51..9fd5d988 100644
--- a/tripper/dataset/tabledoc.py
+++ b/tripper/dataset/tabledoc.py
@@ -47,7 +47,7 @@ def __init__(
 
     def asdicts(self) -> "List[dict]":
         """Return the table as a list of dicts."""
-        kw = {"_context": self.context} if self.context else {}
+        kw = {"@context": self.context} if self.context else {}
 
         results = []
         for row in self.data:

From f45376db770b151d6ea7eb4de901fb646ef3dc43 Mon Sep 17 00:00:00 2001
From: Jesper Friis <jesper.friis@sintef.no>
Date: Mon, 30 Dec 2024 12:49:06 +0100
Subject: [PATCH 22/25] Added a clarifying comment as a responce to review
 comment by @torhaugl.

---
 tests/dataset/test_tabledoc.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/dataset/test_tabledoc.py b/tests/dataset/test_tabledoc.py
index da74203c..278e7881 100644
--- a/tests/dataset/test_tabledoc.py
+++ b/tests/dataset/test_tabledoc.py
@@ -32,6 +32,8 @@ def test_as_dicts():
             "onto": "http:/example.com/onto#",
             "ds": "http:/example.com/datasets#",
         },
+        # Replace the "ds" prefix above with this, once the "context" keyword
+        # argument is fully implemented.
         # context={
         #    "ds": "http:/example.com/datasets#",
         # },

From 1752db0016a521ad6f0b3e9a2bd4ab5997482181 Mon Sep 17 00:00:00 2001
From: Jesper Friis <jesper.friis@sintef.no>
Date: Mon, 30 Dec 2024 15:43:00 +0100
Subject: [PATCH 23/25] Fix test failure

---
 tripper/dataset/tabledoc.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tripper/dataset/tabledoc.py b/tripper/dataset/tabledoc.py
index 0711565b..6dbf8b32 100644
--- a/tripper/dataset/tabledoc.py
+++ b/tripper/dataset/tabledoc.py
@@ -58,7 +58,7 @@ def asdicts(self) -> "List[dict]":
         for row in self.data:
             d = AttrDict()
             for i, colname in enumerate(self.header):
-                cell = row[i].strip() if self.strip else row[i]
+                cell = row[i].strip() if row[i] and self.strip else row[i]
                 if cell:
                     addnested(
                         d, colname.strip() if self.strip else colname, cell

From 2988a324bc612691a7462a912f9a94ef2806c1f0 Mon Sep 17 00:00:00 2001
From: Jesper Friis <jesper.friis@sintef.no>
Date: Mon, 30 Dec 2024 18:52:44 +0100
Subject: [PATCH 24/25] Updated .gitignore files

---
 .gitignore              | 1 +
 tests/output/.gitignore | 1 +
 2 files changed, 2 insertions(+)

diff --git a/.gitignore b/.gitignore
index c872f80e..9a0e7df5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -18,3 +18,4 @@ dist/
 
 # Test output
 route.svg
+coverage.xml
diff --git a/tests/output/.gitignore b/tests/output/.gitignore
index c26b5163..613dbf65 100644
--- a/tests/output/.gitignore
+++ b/tests/output/.gitignore
@@ -3,3 +3,4 @@
 *.ttl
 *.png
 *.tiff
+*.csv

From 85a51ae24c7531b370a94946b62644fcae94d71c Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 3 Jan 2025 22:29:16 +0000
Subject: [PATCH 25/25] [pre-commit.ci] auto fixes from pre-commit hooks

For more information, see https://pre-commit.ci
---
 tripper/dataset/tabledoc.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tripper/dataset/tabledoc.py b/tripper/dataset/tabledoc.py
index c4f14567..6dbf8b32 100644
--- a/tripper/dataset/tabledoc.py
+++ b/tripper/dataset/tabledoc.py
@@ -150,4 +150,3 @@ def write_csv(
             writer.writerow(self.header)
             for row in self.data:
                 writer.writerow(row)
-