diff --git a/datalad_catalog/extractors/catalog_core.py b/datalad_catalog/extractors/catalog_core.py
new file mode 100644
index 00000000..c84d7f1f
--- /dev/null
+++ b/datalad_catalog/extractors/catalog_core.py
@@ -0,0 +1,305 @@
+# emacs: -*- mode: python; py-indent-offset: 4; tab-width: 4; indent-tabs-mode: nil -*-
+# ex: set sts=4 ts=4 sw=4 noet:
+# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
+#
+#   See COPYING file distributed along with the datalad package for the
+#   copyright and license terms.
+#
+# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
+"""Extractor for DataLad dataset metadata - ported from datalad-metalad"""
+from argparse import ArgumentParser
+import hashlib
+import json
+from pathlib import Path
+import os.path as op
+import time
+
+import datalad.support.network as dsn
+from datalad_next.constraints import EnsureBool
+from datalad_next.constraints.dataset import EnsureDataset
+
+
+def get_dataset_metadata(ds, refcommit, status):
+    meta = []
+    # Get dataset commit info
+    commitinfo = get_commit_info(ds, refcommit)
+    # Get contributors from commits
+    contributor_ids = []
+    for contributor in commitinfo.pop('contributors', []):
+        contributor_id = get_agent_id(*contributor[:2])
+        meta.append({
+            '@id': contributor_id,
+            # we cannot distinguish real people from machine-committers
+            '@type': 'agent',
+            'name': contributor[0],
+            'email': contributor[1],
+        })
+        contributor_ids.append(contributor_id)
+    # Set up dataset metadata dict
+    dsmeta = {
+        # the uniquest ID for this metadata record is the refcommit SHA
+        '@id': refcommit,
+        # the dataset UUID is the main identifier
+        'identifier': ds.id,
+        '@type': 'Dataset',
+    }
+    dsmeta.update(commitinfo)
+    # Add contributors
+    if contributor_ids:
+        c = [{'@id': i} for i in contributor_ids]
+        dsmeta['hasContributor'] = c[0] if len(c) == 1 else c
+    # Add subdatasets
+    parts = []
+    for subds in [s for s in status if s['type'] == 'dataset']:
+        subdsinfo = {
+            # reference by subdataset commit
+            '@id': 'datalad:{}'.format(subds['gitshasum']),
+            '@type': 'Dataset',
+            'name': Path(subds['path']).relative_to(ds.pathobj).as_posix(),
+        }
+        subdsid = ds.subdatasets(
+            contains=subds['path'],
+            return_type='item-or-list',
+            result_renderer="disabled").get('gitmodule_datalad-id', None)
+        if subdsid:
+            subdsinfo['identifier'] = 'datalad:{}'.format(subdsid)
+        parts.append(subdsinfo)
+    if parts:
+        dsmeta['hasPart'] = parts
+    # Add distributions
+    if ds.config.obtain(
+            'datalad.metadata.datalad-core.report-remotes',
+            True, valtype=EnsureBool()):
+        remote_names = ds.repo.get_remotes()
+        distributions = []
+        known_uuids = {}
+        # start with configured Git remotes
+        for r in remote_names:
+            info = {
+                'name': r,
+                # not very informative
+                #'description': 'DataLad dataset sibling',
+            }
+            url = ds.config.get('remote.{}.url'.format(r), None)
+            # best effort to recode whatever is configured into a URL
+            if url is not None:
+                url = ri2url(dsn.RI(url))
+            if url:
+                info['url'] = url
+            # do we have information on the annex ID?
+            annex_uuid = ds.config.get(
+                'remote.{}.annex-uuid'.format(r), None)
+            if annex_uuid is not None:
+                info['@id'] = 'datalad:{}'.format(annex_uuid)
+                known_uuids[annex_uuid] = info
+            if 'url' in info or '@id' in info:
+                # only record if we have any identifying information
+                # otherwise it is pointless cruft
+                distributions.append(info)
+        # now look for annex info
+        if hasattr(ds.repo, 'repo_info'):
+            info = ds.repo.repo_info(fast=True)
+            for cat in ('trusted repositories',
+                        'semitrusted repositories',
+                        'untrusted repositories'):
+                for r in info[cat]:
+                    if r['here'] or r['uuid'] in (
+                            '00000000-0000-0000-0000-000000000001',
+                            '00000000-0000-0000-0000-000000000002'):
+                        # ignore local and universally available
+                        # remotes
+                        continue
+                    # avoid duplicates, but record all sources, even
+                    # if not URLs are around
+                    if r['uuid'] not in known_uuids:
+                        distributions.append({'@id': r['uuid']})
+        if distributions:
+            dsmeta['distribution'] = sorted(
+                distributions,
+                key=lambda x: x.get('@id', x.get('url', None))
+            )
+        meta.append(dsmeta)
+    return meta
+
+
+def get_commit_info(ds, refcommit):
+    """Get info about all commits, up to (and incl. the refcommit)"""
+    #- get all the commit info with git log --pretty='%aN%x00%aI%x00%H'
+    #  - use all first-level paths other than .datalad and .git for the query
+    #- from this we can determine all modification timestamps, described refcommit
+    #- do a subsequent git log query for the determined refcommit to determine
+    #  a version by counting all commits since inception up to the refcommit
+    #  - we cannot use the first query, because it will be constrained by the
+    #    present paths that may not have existed previously at all
+
+    # grab the history until the refcommit
+    commits = [
+        line.split('\0')
+        for line in ds.repo.call_git_items_(
+            # name, email, timestamp, shasum
+            ['log', '--pretty=format:%aN%x00%aE%x00%aI%x00%H', refcommit]
+        )
+    ]
+    # version, always anchored on the first commit (tags could move and
+    # make the integer commit count ambiguous, and subtantially complicate
+    # version comparisons
+    version = '0-{}-g{}'.format(
+        len(commits),
+        # abbreviated shasum (like git-describe)
+        ds.repo.get_hexsha(commits[0][3], short=True),
+    )
+    meta = {
+        'version': version,
+    }
+    if ds.config.obtain(
+            'datalad.metadata.datalad-core.report-contributors',
+            True, valtype=EnsureBool()):
+        meta.update(
+            contributors=sorted(set(tuple(c[:2]) for c in commits)))
+    if ds.config.obtain(
+            'datalad.metadata.datalad-core.report-modification-dates',
+            True, valtype=EnsureBool()):
+        meta.update(
+            dateCreated=commits[-1][2],
+            dateModified=commits[0][2],
+        )
+    return meta
+
+
+def get_agent_id(name, email):
+    """Return a suitable '@id' for committers/authors
+
+    In most cases we will not have a URL for people/software agents.
+    Let's create a string ID that is based on the combination of both
+    name and email. Return an MD5 hash instead of a plain-text string
+    to discourage direct interpretation by humans.
+    """
+    return hashlib.md5(u'{}<{}>'.format(
+        name.replace(' ', '_'),
+        email
+    ).encode('utf-8')).hexdigest()
+
+
+# TODO RF to be merged with datalad.support.network
+def ri2url(ri):
+    f = ri.fields
+    if isinstance(ri, dsn.URL):
+        return ri.as_str()
+    elif isinstance(ri, dsn.SSHRI):
+        # construct a URL that Git would understand
+        return 'ssh://{}{}{}{}{}{}'.format(
+            f['username'],
+            '@' if f['username'] else '',
+            f['hostname'],
+            ':' if f['port'] else '',
+            f['port'],
+            f['path'] if op.isabs(f['path'])
+            else '/{}'.format(f['path']) if f['path'].startswith('~')
+            else '/~/{}'.format(f['path'])
+        )
+    elif isinstance(ri, dsn.PathRI):
+        # this has no chance of being resolved outside this machine
+        # not work reporting
+        return None
+    
+
+def get_dataset_url(graph):
+    dataset = [x for x in graph if x["@type"] == "Dataset"]
+    dataset = dataset[0]
+    dist = dataset.get("distribution", [])
+    return [d["url"] for d in dist if "url" in d]
+    
+
+def get_authors(graph):
+    return [{"name": x["name"], "email": x["email"]} for x in graph if x["@type"] == "agent"]
+
+
+def get_subdatasets(graph):
+    dataset = [x for x in graph if x["@type"] == "Dataset"]
+    dataset = dataset[0]
+    haspart = dataset.get("hasPart", [])
+    subs = [
+        {
+            "dataset_id": h["identifier"].replace("datalad:", ""),
+            "dataset_version": h["@id"].replace("datalad:", ""),
+            "dataset_path": h["name"],
+            "dirs_from_path": [],
+        } for h in haspart
+    ]
+    return subs if len(subs) > 0 else None
+    
+
+def get_metadata_source(metadata_record):
+    s = {
+        "key_source_map": {},
+        "sources": [
+            {
+                "source_name": metadata_record["extractor_name"], 
+                "source_version": metadata_record["extractor_version"], 
+                "source_parameter": metadata_record["extraction_parameter"], 
+                "source_time": metadata_record["extraction_time"], 
+                "agent_email": metadata_record["agent_email"], 
+                "agent_name": metadata_record["agent_name"]
+            }
+        ]
+    }
+    return s if len(s) > 0 else None
+
+
+def translate(metadata_record, graph):
+    translated_record = {
+        "type": metadata_record["type"],
+        "dataset_id": metadata_record["dataset_id"],
+        "dataset_version": metadata_record["dataset_version"],
+        "metadata_sources": get_metadata_source(metadata_record),
+        "name": "",
+        "url": get_dataset_url(graph),
+        "authors": get_authors(graph),
+        "subdatasets": get_subdatasets(graph),
+    }
+    return {k: v for k, v in translated_record.items() if v is not None}
+
+
+
+# SCRIPT EXECUTION STARTS HERE
+
+parser = ArgumentParser()
+parser.add_argument("dataset_path", type=Path, help="Path to the datalad dataset")
+args = parser.parse_args()
+source_dataset=EnsureDataset(
+    installed=True, 
+    purpose='extract core metadata',
+    require_id=True)(args.dataset_path).ds
+source_dataset_id = source_dataset.id,
+source_dataset_version = source_dataset.repo.get_hexsha()
+agent_name = source_dataset.config.get("user.name")
+agent_email = source_dataset.config.get("user.email")
+status = source_dataset.subdatasets(result_renderer="disabled")
+metadata = get_dataset_metadata(source_dataset, source_dataset_version, status)
+default_context = {
+    # schema.org definitions by default
+    "@vocab": "http://schema.org/",
+    # DataLad ID prefix, pointing to our own resolver
+    "datalad": "http://dx.datalad.org/",
+}
+# meta_out structures the metadata in exactly the same
+# way as datalad meta-extract outputs it
+meta_out = dict(
+    dataset_id = source_dataset.id,
+    dataset_version = source_dataset_version,
+    extractor_name = "catalog_core",
+    extractor_version = "1",
+    extraction_parameter = {},
+    extraction_time = time.time(),
+    agent_name = agent_name,
+    agent_email = agent_email,
+    extracted_metadata = {
+        "@context": default_context,
+        "@graph": metadata,
+    },
+    type="dataset",
+)
+# translate() does more or less the same as
+# datalad_catalog.translators.metalad_core_translator
+meta_translated = translate(meta_out, metadata)
+print(json.dumps(meta_translated))
\ No newline at end of file