From 7a893227b334936677165329aa5324e3b1e62a1b Mon Sep 17 00:00:00 2001
From: Francois Daoust <fd@tidoust.net>
Date: Thu, 1 Aug 2024 22:12:54 +0200
Subject: [PATCH] Major rewrite to make things more consistent and extensible

**IMPORTANT: Work in progress, not yet functional!**

The code in Strudy did the same thing in multiple ways:
- The main CLI was linked to the first type of analysis reports, that still get
published to `w3c/webref-analysis`.
- A few CLIs were there, used mainly for exploration purpose.
- The study modules could sometimes be used as CLI, for no apparent reason.
- The study modules used different conventions for inputs/outputs
- The reporting code was somewhat separated from the rest, whereas it could be
useful to extend it.
- Preparing flexible reports and amending the "file an issue" workflow was a
bit tedious.

This rewrite attempts to solve the above by:
- Rewriting study modules to expose one similar study function
- Adding a generic study module that knows about all anomaly types, dispatches
the study to the different study modules depending on the request, and can
structure and format the result as needed in markdown to file one or more
issues.
- Adding a few tests to be more confident that the analyses are doing the right
thing!
- **(Not done yet)** Updating the entry points to use the new study module
- **(Not done yet)** Getting rid of all side CLIs
---
 src/lib/study-algorithms.js |  27 +-
 src/lib/study-backrefs.js   | 124 ++++-----
 src/lib/study-dfns.js       | 478 ++++++++++++++++++++++++++++++++++
 src/lib/study-refs.js       | 129 ++++++++--
 src/lib/study-webidl.js     |  58 ++---
 src/lib/study.js            | 498 ++++++++++++++++++++++++++++++++++++
 test/study-algorithms.js    |  37 +++
 test/study-backrefs.js      |  25 +-
 test/study-dfns.js          |  39 +++
 test/study-refs.js          |  55 +++-
 test/study-webidl.js        |  11 +-
 test/study.js               | 150 +++++++++++
 12 files changed, 1470 insertions(+), 161 deletions(-)
 create mode 100644 src/lib/study-dfns.js
 create mode 100644 src/lib/study.js
 create mode 100644 test/study-algorithms.js
 create mode 100644 test/study-dfns.js
 create mode 100644 test/study.js

diff --git a/src/lib/study-algorithms.js b/src/lib/study-algorithms.js
index 76f76fd9..2466a282 100644
--- a/src/lib/study-algorithms.js
+++ b/src/lib/study-algorithms.js
@@ -1,11 +1,4 @@
 import { JSDOM } from 'jsdom';
-import { recordCategorizedAnomaly } from './util.js';
-
-const possibleAnomalies = [
-  'missingTaskForPromise',
-  'missingTaskForEvent'
-];
-
 
 /**
  * Normalize whitespaces in string to make analysis easier
@@ -57,9 +50,8 @@ function nestParallelSteps(algo) {
 /**
  * Main function, study all algorithms
  */
-function studyAlgorithms(edResults) {
+function studyAlgorithms(specs) {
   const report = [];
-  const recordAnomaly = recordCategorizedAnomaly(report, 'algorithms', possibleAnomalies);
 
   // Return human-friendly markdown that identifies the given algorithm
   function getAlgoName(algo) {
@@ -95,11 +87,19 @@ function studyAlgorithms(edResults) {
           // https://w3c.github.io/clipboard-apis/#dom-clipboard-read
           !html.includes('systemClipboardRepresentation')
       ) {
-        recordAnomaly(spec, 'missingTaskForPromise', `${getAlgoName(algo)} has a parallel step that resolves/rejects a promise directly`);
+        report.push({
+          name: 'missingTaskForPromise',
+          message: `${getAlgoName(algo)} has a parallel step that resolves/rejects a promise directly`,
+          spec
+        });
         return true;
       }
       else if (html.match(/fire an?( \w+)? event/i)) {
-        recordAnomaly(spec, 'missingTaskForEvent', `${getAlgoName(algo)} has a parallel step that fires an event directly`);
+        report.push({
+          name: 'missingTaskForEvent',
+          message: `${getAlgoName(algo)} has a parallel step that fires an event directly`,
+          spec
+        });
         return true;
       }
     }
@@ -133,13 +133,10 @@ function studyAlgorithms(edResults) {
     return anomalyFound;
   }
 
-  // We're only interested in specs that define algorithms
-  const specs = edResults.filter(spec => !!spec.algorithms);
-
   // Study algorithms in turn.
   // Note: the root level of each algorithm is its first step. It may say
   // something like "run these steps in parallel" in particular.
-  for (const spec of specs) {
+  for (const spec of specs.filter(spec => !!spec.algorithms)) {
     for (const algo of spec.algorithms) {
       nestParallelSteps(algo);
       studyAlgorithmStep(spec, algo, algo);
diff --git a/src/lib/study-backrefs.js b/src/lib/study-backrefs.js
index 2be544fa..2d046d82 100644
--- a/src/lib/study-backrefs.js
+++ b/src/lib/study-backrefs.js
@@ -1,18 +1,3 @@
-import { loadCrawlResults, recordCategorizedAnomaly } from './util.js';
-import { fileURLToPath } from 'node:url';
-
-const possibleAnomalies = [
-  'brokenLinks',
-  'datedUrls',
-  'evolvingLinks',
-  'frailLinks',
-  'nonCanonicalRefs',
-  'notDfn',
-  'notExported',
-  'outdatedSpecs',
-  'unknownSpecs'
-];
-
 /**
  * The backrefs analyzer only checks links to other specs. This function returns
  * true when a link does target a spec, and false if it targets something else
@@ -39,57 +24,91 @@ const matchSpecUrl = url =>
  TODO: DRY
  Copied from browser-specs/src/compute-shortname.js
 */
-function computeShortname (url) {
-  function parseUrl (url) {
+function computeShortname(url) {
+  function parseUrl(url) {
     // Handle /TR/ URLs
-    const w3cTr = url.match(/^https?:\/\/(?:www\.)?w3\.org\/TR\/([^/]+)\/$/);
+    const w3cTr = url.match(/^https?:\/\/(?:www\.)?w3\.org\/TR\/([^\/]+)\/$/);
     if (w3cTr) {
       return w3cTr[1];
     }
 
     // Handle WHATWG specs
-    const whatwg = url.match(/\/\/(.+)\.spec\.whatwg\.org\/?/);
+    const whatwg = url.match(/\/\/(.+)\.spec\.whatwg\.org\//);
     if (whatwg) {
-      return whatwg[1];
+        return whatwg[1];
     }
 
     // Handle TC39 Proposals
-    const tc39 = url.match(/\/\/tc39\.es\/proposal-([^/]+)\/$/);
+    const tc39 = url.match(/\/\/tc39\.es\/proposal-([^\/]+)\/$/);
     if (tc39) {
-      return 'tc39-' + tc39[1];
+        return "tc39-" + tc39[1];
     }
 
+
     // Handle Khronos extensions
-    const khronos = url.match(/https:\/\/registry\.khronos\.org\/webgl\/extensions\/([^/]+)\/$/);
+    const khronos = url.match(/https:\/\/registry\.khronos\.org\/webgl\/extensions\/([^\/]+)\/$/);
     if (khronos) {
-      return khronos[1];
+        return khronos[1];
     }
 
     // Handle extension specs defined in the same repo as the main spec
     // (e.g. generate a "gamepad-extensions" name for
     // https://w3c.github.io/gamepad/extensions.html")
-    const ext = url.match(/\/.*\.github\.io\/([^/]+)\/(extensions?)\.html$/);
+    const ext = url.match(/\/.*\.github\.io\/([^\/]+)\/(extensions?)\.html$/);
     if (ext) {
       return ext[1] + '-' + ext[2];
     }
 
     // Handle draft specs on GitHub, excluding the "webappsec-" prefix for
     // specifications developed by the Web Application Security Working Group
-    const github = url.match(/\/.*\.github\.io\/(?:webappsec-)?([^/]+)\//);
+    const github = url.match(/\/.*\.github\.io\/(?:webappsec-)?([^\/]+)\//);
     if (github) {
-      return github[1];
+        return github[1];
     }
 
     // Handle CSS WG specs
-    const css = url.match(/\/drafts\.(?:csswg|fxtf|css-houdini)\.org\/([^/]+)\//);
+    const css = url.match(/\/drafts\.(?:csswg|fxtf|css-houdini)\.org\/([^\/]+)\//);
     if (css) {
       return css[1];
     }
 
     // Handle SVG drafts
-    const svg = url.match(/\/svgwg\.org\/specs\/(?:svg-)?([^/]+)\//);
+    const svg = url.match(/\/svgwg\.org\/specs\/(?:svg-)?([^\/]+)\//);
     if (svg) {
-      return 'svg-' + svg[1];
+      return "svg-" + svg[1];
+    }
+
+    // Handle IETF RFCs
+    const rfcs = url.match(/\/www.rfc-editor\.org\/rfc\/(rfc[0-9]+)/);
+    if (rfcs) {
+      return rfcs[1];
+    }
+
+    // Handle IETF group drafts
+    const ietfDraft = url.match(/\/datatracker\.ietf\.org\/doc\/html\/draft-ietf-[^\-]+-([^\/]+)/);
+    if (ietfDraft) {
+      return ietfDraft[1];
+    }
+
+    // Handle IETF individual drafts, stripping group name
+    // TODO: retrieve the list of IETF groups to make sure that the group name
+    // is an actual group name and not the beginning of the shortname:
+    // https://datatracker.ietf.org/api/v1/group/group/
+    // (multiple requests needed due to pagination, "?limit=1000" is the max)
+    const ietfIndDraft = url.match(/\/datatracker\.ietf\.org\/doc\/html\/draft-[^\-]+-([^\/]+)/);
+    if (ietfIndDraft) {
+      if (ietfIndDraft[1].indexOf('-') !== -1) {
+        return ietfIndDraft[1].slice(ietfIndDraft[1].indexOf('-') + 1);
+      }
+      else {
+        return ietfIndDraft[1];
+      }
+    }
+
+    // Handle TAG findings
+    const tag = url.match(/^https?:\/\/(?:www\.)?w3\.org\/2001\/tag\/doc\/([^\/]+)\/?$/);
+    if (tag) {
+      return tag[1];
     }
 
     // Return name when one was given
@@ -97,7 +116,7 @@ function computeShortname (url) {
       return url;
     }
 
-    throw new Error(`Cannot extract meaningful name from ${url}`);
+    throw `Cannot extract meaningful name from ${url}`;
   }
 
   // Parse the URL to extract the name
@@ -107,8 +126,8 @@ function computeShortname (url) {
   // Latin characters (a-z letters, digits, underscore and "-"), and that it
   // only contains a dot for fractional levels at the end of the name
   // (e.g. "blah-1.2" is good but "blah.blah" and "blah-3.1-blah" are not)
-  if (!name.match(/^[\w-]+((?<=-\d+)\.\d+)?$/)) {
-    throw new Error(`Specification name contains unexpected characters: ${name} (extracted from ${url})`);
+  if (!name.match(/^[\w\-]+((?<=\-v?\d+)\.\d+)?$/)) {
+    throw `Specification name contains unexpected characters: ${name} (extracted from ${url})`;
   }
 
   return name;
@@ -234,18 +253,21 @@ const matchAnchor = (url, anchor) => link => {
   return link === (url + '#' + anchor) || link === (url + '#' + encodeURIComponent(anchor));
 };
 
-function studyBackrefs (edResults, trResults = [], htmlFragments = {}, shortnameFilter) {
-  trResults = trResults || [];
+async function studyBackrefs(specs, { crawlResults = null, trResults = [], htmlFragments = null } = {}) {
+  crawlResults = crawlResults ?? specs;
   const report = [];
 
-  edResults.forEach(spec => {
-    if (shortnameFilter && spec.shortname !== shortnameFilter) return;
-    studyLinks(spec, spec.links?.rawlinks, report, edResults, trResults, htmlFragments);
+  // Donwload automatic map of multipages anchors in HTML spec
+  const fragmentsUrl = 'https://html.spec.whatwg.org/multipage/fragment-links.json';
+  htmlFragments = htmlFragments ?? await fetch(fragmentsUrl).then(r => r.json());
+
+  specs.forEach(spec => {
+    studyLinks(spec, spec.links?.rawlinks, report, crawlResults, trResults, htmlFragments);
     // given the current limitation of classification of links for bikeshed
     // https://github.com/w3c/reffy/issues/1584
     // we also check autolinks for bikeshed specs 
     if (spec.generator === "bikeshed") {
-      studyLinks(spec, spec.links?.autolinks, report, edResults, trResults, htmlFragments);
+      studyLinks(spec, spec.links?.autolinks, report, crawlResults, trResults, htmlFragments);
     }
   });
   return report;
@@ -254,7 +276,9 @@ function studyBackrefs (edResults, trResults = [], htmlFragments = {}, shortname
 function studyLinks(spec, links, report, edResults, trResults, htmlFragments) {
   if (!links) return;
 
-  const recordAnomaly = recordCategorizedAnomaly(report, 'links', possibleAnomalies);
+  function recordAnomaly(spec, name, message) {
+    report.push({ name, message, spec });
+  }
 
   Object.keys(links)
     .filter(matchSpecUrl)
@@ -421,22 +445,4 @@ function studyLinks(spec, links, report, edResults, trResults, htmlFragments) {
     });
 }
 
-/**************************************************
-Export methods for use as module
-**************************************************/
-export default studyBackrefs;
-
-if (process.argv[1] === fileURLToPath(import.meta.url)) {
-  const crawl = await loadCrawlResults(process.argv[2], process.argv[3]);
-  let htmlFragments = {};
-  try {
-    console.info('Downloading HTML spec fragments data…');
-    htmlFragments = await fetch('https://html.spec.whatwg.org/multipage/fragment-links.json').then(r => r.json());
-    console.info('- done');
-  } catch (err) {
-    console.error('- failed: could not fetch HTML fragments data, may report false positive broken links on HTML spec');
-  }
-
-  const results = studyBackrefs(crawl.ed, crawl.tr, htmlFragments, process.argv[4] ?? undefined);
-  console.log(results);
-}
+export default studyBackrefs;
\ No newline at end of file
diff --git a/src/lib/study-dfns.js b/src/lib/study-dfns.js
new file mode 100644
index 00000000..b779b720
--- /dev/null
+++ b/src/lib/study-dfns.js
@@ -0,0 +1,478 @@
+/**
+ * The definitions checker compares CSS, dfns, and IDL extracts created by Reffy
+ * to detect CSS/IDL terms that do not have a corresponding dfn in the
+ * specification.
+ *
+ * Note: CSS extraction already relies on dfns and reports missing dfns in a
+ * "warnings" property. This checker simply looks at that list.
+ *
+ * @module checker
+ */
+
+import path from 'node:path';
+import { fileURLToPath } from 'node:url';
+import loadJSON from '../lib/load-json.js';
+
+
+/**
+ * List of spec shortnames that, so far, don't follow the dfns data model
+ */
+const specsWithObsoleteDfnsModel = [
+  'svg-animations', 'svg-markers', 'svg-strokes', 'SVG2',
+  'webgl1', 'webgl2',
+  'webrtc-identity'
+];
+
+
+/**
+ * Return true when provided arrays are "equal", meaning that they contain the
+ * same items
+ *
+ * @function
+ * @private
+ * @param {Array} a First array to compare
+ * @param {Array} b Second array to compare
+ * @return {boolean} True when arrays are equal
+ */
+function arraysEqual(a, b) {
+  return Array.isArray(a) &&
+    Array.isArray(b) &&
+    a.length === b.length &&
+    a.every((val, index) => val === b[index]);
+}
+
+
+/**
+ * Return the list of expected definitions from the CSS extract
+ *
+ * @function
+ * @private
+ * @param {Object} css The root of the object that describes CSS terms in the
+ *   CSS extract
+ * @return {Array} An array of expected definitions
+ */
+function getExpectedDfnsFromCSS(css) {
+  const expected = (css.warnings ?? [])
+    .filter(warning => warning.msg === 'Missing definition')
+    .map(warning => {
+      return {
+        linkingText: [warning.name],
+        type: warning.type,
+        'for': warning.for
+      };
+    });
+
+  return expected;
+}
+
+
+/**
+ * Return true when the given CSS definition matches the expected definition
+ *
+ * @function
+ * @private
+ * @param {Object} expected Expected definition
+ * @param {Object} actual Actual definition to check
+ * @return {Boolean} true when actual definition matches the expected one
+ */
+function matchCSSDfn(expected, actual) {
+  return arraysEqual(expected.linkingText, actual.linkingText) &&
+    (!expected.for || arraysEqual(expected.for, actual.for)) &&
+    (!expected.type || (expected.type === actual.type));
+}
+
+
+/**
+ * Return the list of expected definitions from the IDL extract
+ *
+ * @function
+ * @private
+ * @param {Object} css The root of the object that describes IDL terms in the
+ *   `idlparsed` extract.
+ * @return {Array} An array of expected definitions
+ */
+function getExpectedDfnsFromIdl(idl = {}) {
+  // Parse IDL names that the spec defines
+  const idlNames = Object.values(idl.idlNames || {});
+  let expected = idlNames.map(name => getExpectedDfnsFromIdlDesc(name)).flat();
+
+  // Parse members of IDL names that the spec extends
+  const idlExtendedNames = Object.values(idl.idlExtendedNames || {});
+  expected = expected.concat(idlExtendedNames.map(extended =>
+      extended.map(name => getExpectedDfnsFromIdlDesc(name, { excludeRoot: true })))
+    .flat(2));
+  return expected;
+}
+
+
+/**
+ * Return true if the given parsed IDL object describes a default toJSON
+ * operation that references:
+ * https://heycam.github.io/webidl/#default-tojson-steps
+ *
+ * @function
+ * @private
+ * @param {Object} desc Parsed IDL object to check
+ * @return {Boolean} true when object describes a default toJSON operation.
+ */
+function isDefaultToJSONOperation(desc) {
+  return (desc.type === 'operation') &&
+    (desc.name === 'toJSON') &&
+    (desc.extAttrs && desc.extAttrs.find(attr => attr.name === "Default"));
+}
+
+
+/**
+ * Return the expected definition for the given parsed IDL structure
+ *
+ * @function
+ * @public
+ * @param {Object} desc The object that describes the IDL term in the
+ *   `idlparsed` extract.
+ * @param {Object} parentDesc (optional) The object that describes the parent
+ *   IDL term of the term to parse (used to compute the `for` property).
+ * @return {Object} The expected definition, or null if no expected definition
+ *   is defined.
+ */
+function getExpectedDfnFromIdlDesc(idl, parentIdl) {
+  function serializeArgs(args = []) {
+    return args
+      .map(arg => arg.variadic ? `...${arg.name}` : arg.name)
+      .join(', ');
+  }
+
+  let expected = {
+    linkingText: [idl.name],
+    type: idl.type,
+    'for': parentIdl && (parentIdl !== idl) ? [parentIdl.name] : []
+  };
+
+  switch (idl.type) {
+    case 'attribute':
+    case 'const':
+      break;
+
+    case 'constructor':
+      // Ignore constructors for HTML elements, the spec has a dedicated
+      // section for them:
+      // https://html.spec.whatwg.org/multipage/dom.html#html-element-constructors
+      if (!parentIdl.name.startsWith('HTML')) {
+        expected.linkingText = [`constructor(${serializeArgs(idl.arguments)})`];
+      }
+      else {
+        expected = null;
+      }
+      break;
+
+    case 'enum':
+      break;
+
+    case 'enum-value':
+      // The enumeration could include the empty string as a value. There
+      // cannot be a matching definition in that case.
+      // Note: look for the quoted value and the unquoted value
+      const value = idl.value.replace(/^"(.*)"$/, '$1');
+      expected.linkingText = (value !== '') ? [`"${value}"`, value] : [`"${value}"`];
+      break;
+
+    case 'field':
+      expected.type = 'dict-member';
+      break;
+
+    case 'callback':
+    case 'callback interface':
+    case 'dictionary':
+    case 'interface':
+    case 'interface mixin':
+    case 'namespace':
+      expected.type =
+        (idl.type === 'callback interface') ? 'callback' :
+        (idl.type === 'interface mixin') ? 'interface' :
+        idl.type;
+      // Ignore partial definition
+      if (idl.partial) {
+        expected = null;
+      }
+      break;
+
+    case 'includes':
+      expected = null;
+      break;
+
+    case 'iterable':
+    case 'maplike':
+    case 'setlike':
+      // No definition expected for iterable, maplike and setlike members
+      expected = null;
+      break;
+
+    case 'operation':
+      // Stringification behavior is typically defined with a
+      // "stringification behavior" definition scoped to the interface
+      if (idl.special === 'stringifier') {
+        expected.linkingText = ['stringification behavior', 'stringificationbehavior'];
+        expected.type = 'dfn';
+      }
+      // Ignore special "getter", "setter", "deleter" operations when they don't
+      // have an identifier. They should link to a definition in the prose, but
+      // the labels seem arbitrary for now.
+      // Also ignore default toJSON operations. Steps are defined in WebIDL.
+      else if ((idl.name ||
+            ((idl.special !== 'getter') &&
+            (idl.special !== 'setter') &&
+            (idl.special !== 'deleter'))) &&
+          !isDefaultToJSONOperation(idl)) {
+        expected.linkingText = [`${idl.name}(${serializeArgs(idl.arguments)})`];
+        expected.type = 'method';
+      }
+      else {
+        expected = null;
+      }
+      break;
+
+    case 'typedef':
+      break;
+
+    case 'argument':
+      expected = null;
+      break;
+
+    default:
+      console.warn('Unsupported IDL type', idl.type, idl);
+      expected = null;
+      break;
+  }
+
+  return expected;
+}
+
+
+/**
+ * Return the list of expected definitions from a parsed IDL extract entry.
+ *
+ * The function is recursive.
+ *
+ * @function
+ * @private
+ * @param {Object} idl The object that describes the IDL term in the
+ *   `idlparsed` extract.
+ * @return {Array} An array of expected definitions
+ */
+function getExpectedDfnsFromIdlDesc(idl, {excludeRoot} = {excludeRoot: false}) {
+  const res = [];
+  const parentIdl = idl;
+  const idlToProcess = excludeRoot ? [] : [idl];
+
+  switch (idl.type) {
+    case 'enum':
+      if (idl.values) {
+        idlToProcess.push(...idl.values);
+      }
+      break;
+
+    case 'callback':
+    case 'callback interface':
+    case 'dictionary':
+    case 'interface':
+    case 'interface mixin':
+    case 'namespace':
+      if (idl.members) {
+        idlToProcess.push(...idl.members);
+      }
+      break;
+  }
+
+  idlToProcess.forEach(idl => {
+    const expected = getExpectedDfnFromIdlDesc(idl, parentIdl);
+    if (expected) {
+      expected.access = 'public';
+      expected.informative = false;
+      res.push(expected);
+    }
+  });
+
+  return res;
+}
+
+
+/**
+ * Return true when the given IDL definition matches the expected definition.
+ *
+ * The function handles overloaded methods, though not properly. That is, it
+ * will only find the "right" definition for an overloaded method if the number
+ * and/or the name of the arguments differ between the overloaded definitions.
+ * Otherwise it will just match the first definition that looks good.
+ *
+ * The function works around Respec's issue #3200 for methods and constructors
+ * that take only optional parameters:
+ * https://github.com/w3c/respec/issues/3200
+ *
+ * @function
+ * @private
+ * @param {Object} expected Expected definition
+ * @param {Object} actual Actual definition to check
+ * @param {Object} options Comparison options
+ * @return {Boolean} true when actual definition matches the expected one
+ */
+function matchIdlDfn(expected, actual,
+    {skipArgs, skipFor, skipType} = {skipArgs: false, skipFor: false, skipType: false}) {
+  const fixedLt = actual.linkingText
+    .map(lt => lt.replace(/!overload-\d/, ''))
+    .map(lt => lt.replace(/\(, /, '('));
+  let found = expected.linkingText.some(val => fixedLt.includes(val));
+  if (!found && skipArgs) {
+    const names = fixedLt.map(lt => lt.replace(/\(.*\)/, ''));
+    found = expected.linkingText.some(val => {
+      const valname = val.replace(/\(.*\)/, '');
+      return names.find(name => name === valname);
+    });
+  }
+  return found &&
+    (expected.for.every(val => actual.for.includes(val)) || skipFor) &&
+    (expected.type === actual.type || skipType);
+}
+
+
+/**
+ * Checks the CSS and IDL extracts against the dfns extract for the given spec
+ *
+ * @function
+ * @public
+ * @param {Object} spec Crawl result for the spec to parse
+ * @return {Object} An object with a css and idl property, each of them holding
+ *   an array of missing CSS or IDL definitions. The function returns null when
+ *   there are no missing definitions.
+ */
+function checkSpecDefinitions(spec) {
+  if (specsWithObsoleteDfnsModel.includes(spec.shortname)) {
+    return { obsoleteDfnsModel: true };
+  }
+
+  const dfns = spec.dfns ?? [];
+  const css = spec.css ?? {};
+  const idl = spec.idlparsed ?? {};
+
+  // Make sure that all expected CSS definitions exist in the dfns extract
+  const expectedCSSDfns = getExpectedDfnsFromCSS(css);
+  const missingCSSDfns = expectedCSSDfns.map(expected => {
+    let actual = dfns.find(dfn => matchCSSDfn(expected, dfn));
+    if (!actual && !expected.type) {
+      // Right definition is missing. For valuespaces that define functions,
+      // look for a function definition without the enclosing "<>" instead
+      const altText = [expected.linkingText[0].replace(/^<(.*)\(\)>$/, '$1()')];
+      actual = dfns.find(dfn => arraysEqual(altText, dfn.linkingText));
+    }
+    if (!actual && expected.value) {
+      // Still missing? For valuespaces that define functions, this may be
+      // because there is no definition without parameters, try to find the
+      // actual value instead
+      actual = dfns.find(dfn => arraysEqual([expected.value], dfn.linkingText));
+    }
+    if (actual) {
+      // Right definition found
+      return null;
+    }
+    else {
+      // Right definition is missing, there may be a definition that looks
+      // like the one we're looking for
+      const found = dfns.find(dfn =>
+        arraysEqual(dfn.linkingText, expected.linkingText));
+      return { expected, found };
+    }
+  }).filter(missing => !!missing);
+
+  // Make sure that all expected IDL definitions exist in the dfns extract
+  const expectedIdlDfns = getExpectedDfnsFromIdl(idl);
+  const missingIdlDfns = expectedIdlDfns.map(expected => {
+    let actual = dfns.find(dfn => matchIdlDfn(expected, dfn));
+    if (actual) {
+      // Right definition found
+      return null;
+    }
+    else {
+      // Right definition is missing, include the interface's definitions to
+      // be able to link to it in the report
+      let parent = null;
+      if (expected.for && expected.for[0]) {
+        parent = dfns.find(dfn =>
+          (dfn.linkingText[0] === expected.for[0]) &&
+          ['callback', 'dictionary', 'enum', 'interface', 'namespace'].includes(dfn.type));
+      }
+
+      // Look for a definition that seems as close as possible to the one
+      // we're looking for, in the following order:
+      // 1. For operations, find a definition without taking arguments into
+      // account and report possible match with a "warning" flag.
+      // 2. For terms linked to a parent interface-like object, find a match
+      // scoped to the same parent without taking the type into account.
+      // 3. Look for a definition with the same name, neither taking the type
+      // nor the parent into account.
+      let found = dfns.find(dfn => matchIdlDfn(expected, dfn, { skipArgs: true }));
+      if (found) {
+        return { expected, found, for: parent, warning: true };
+      }
+      found = dfns.find(dfn => matchIdlDfn(expected, dfn,
+        { skipArgs: true, skipType: true }));
+      if (found) {
+        return { expected, found, for: parent };
+      }
+      found = dfns.find(dfn => matchIdlDfn(expected, dfn,
+        { skipArgs: true, skipType: true, skipFor: true }));
+      return { expected, found, for: parent };
+    }
+  }).filter(missing => !!missing);
+
+  // Report results
+  return {
+    css: missingCSSDfns,
+    idl: missingIdlDfns
+  };
+}
+
+
+/**
+ * Format the anomaly message to report as Markdown
+ *
+ * @function
+ * @private
+ * @param {Object} missing Object that describes missing dfn
+ */
+function formatAnomalyMessage(missing) {
+  const exp = missing.expected;
+  const found = missing.found;
+  const foundFor = (found && found.for && found.for.length > 0) ?
+    ' for ' + found.for.map(f => `\`${f}\``).join(',') :
+    '';
+  return '`' + exp.linkingText[0] + '` ' +
+    (exp.type ? `with type \`${exp.type}\`` : '') + 
+    (missing.for ? ` for [\`${missing.for.linkingText[0]}\`](${missing.for.href})` : '') +
+    (found ? `, but found [\`${found.linkingText[0]}\`](${found.href}) with type \`${found.type}\`${foundFor}` : '');
+}
+
+
+/**
+ * Checks the CSS and IDL extracts against the dfns extract for all specs in
+ * the report, and return a list of missing definitions.
+ *
+ * @function
+ * @public
+ */
+export default function studyDefinitions(specs) {
+  return specs
+    .map(spec => {
+      const missing = checkSpecDefinitions(spec);
+      const res = [];
+      for (const type of ['css', 'idl']) {
+        const anomalies = missing[type];
+        for (const anomaly of anomalies) {
+          res.push({
+            name: 'missingDfns',
+            message: formatAnomalyMessage(anomaly),
+            spec
+          });
+        }
+      }
+      return res;
+    })
+    .flat();
+}
diff --git a/src/lib/study-refs.js b/src/lib/study-refs.js
index f7542486..19bc99c6 100644
--- a/src/lib/study-refs.js
+++ b/src/lib/study-refs.js
@@ -1,31 +1,116 @@
-import { loadCrawlResults, recordCategorizedAnomaly } from './util.js';
-import { fileURLToPath } from 'node:url';
+import { canonicalizeUrl, canonicalizesTo } from './canonicalize-url.js';
 
-const possibleAnomalies = [
-  'discontinuedReferences'
-];
+/**
+ * Helper function that returns true when the given URL seems to target a real
+ * "spec" (as opposed to, say, a Wiki page, or something else)
+ */
+const matchSpecUrl = url =>
+  url.match(/spec.whatwg.org/) ||
+  url.match(/www.w3.org\/TR\/[a-z0-9]/) ||
+  (url.match(/w3c.github.io/) && ! url.match(/w3c.github.io\/test-results\//));
 
-function studyReferences (edResults) {
-  const report = [];
-  const recordAnomaly = recordCategorizedAnomaly(report, 'refs', possibleAnomalies);
-  edResults.forEach(spec => {
-    (spec.refs?.normative || []).forEach(ref => {
-      const referencedSpec = edResults.find(s => s.url === ref.url || s?.nightly?.url === ref.url || s?.nightly?.alternateUrls?.includes(ref.url));
+function studyReferences (specs, { crawlResults = null } = {}) {
+  crawlResults = crawlResults ?? specs;
 
-      if (referencedSpec && referencedSpec.standing === "discontinued") {
+  // Construct spec equivalence from the crawl report
+  const specEquivalents = {};
+  for (const spec of crawlResults) {
+    for (const v of (spec.versions ?? [])) {
+      if (specEquivalents[v]) {
+        if (Array.isArray(specEquivalents[v])) {
+          specEquivalents[v].push(spec.url);
+        }
+        else {
+          specEquivalents[v] = [specEquivalents[v], spec.url];
+        }
+      }
+      else {
+        specEquivalents[v] = spec.url;
+      }
+    }
+  }
+
+  // Strong canonicalization options to find references
+  const useEquivalents = {
+    datedToLatest: true,
+    equivalents: specEquivalents
+  };
 
-	const newSpecsLinks = edResults.filter(s => referencedSpec.obsoletedBy?.includes(s.shortname)).map(s => `[${s.shortname}](${s?.nightly?.url || s.url})`);
-	recordAnomaly(spec, 'discontinuedReferences', `[${ref.name}](${ref.url}) ${newSpecsLinks.length ? `has been obsoleted by ${newSpecsLinks}` : `is discontinued, no known replacement reference`}`);
+  const report = [];
+  for (const spec of specs) {
+    for (const ref of spec.refs?.normative ?? []) {
+      const referencedSpec = crawlResults.find(s =>
+        s.url === ref.url ||
+        s?.nightly?.url === ref.url ||
+        s?.nightly?.alternateUrls?.includes(ref.url));
+      if (referencedSpec && referencedSpec.standing === "discontinued") {
+        const newSpecsLinks = crawlResults
+          .filter(s => referencedSpec.obsoletedBy?.includes(s.shortname))
+          .map(s => `[${s.shortname}](${s?.nightly?.url || s.url})`);
+        report.push({
+          name: 'discontinuedReferences',
+          message: `[${ref.name}](${ref.url}) ${newSpecsLinks.length ? `has been obsoleted by ${newSpecsLinks}` : `is discontinued, no known replacement reference`}`,
+          spec
+        });
       }
-    });
-  });
+    }
+
+    // Detect links to external specifications within the body of the spec
+    // that do not have a corresponding entry in the list of references
+    // (all links to external specs should have a companion ref)
+    Object.keys(spec.links?.rawlinks ?? {})
+      .filter(matchSpecUrl)
+      .filter(l => {
+        // Filter out "good" and "inconsistent" references
+        const canon = canonicalizeUrl(l, useEquivalents);
+        const refs = (spec.refs?.normative ?? []).concat(spec.refs?.informative ?? []);
+        return !refs.find(r => canonicalizesTo(r.url, canon, useEquivalents));
+      })
+      .filter(l =>
+        // Ignore links to other versions of "self". There may
+        // be cases where it would be worth reporting them but
+        // most of the time they appear in "changelog" sections.
+        !canonicalizesTo(l, spec.url, useEquivalents) &&
+        !canonicalizesTo(l, spec.versions, useEquivalents)
+      )
+      .forEach(l => {
+        report.push({
+          name: 'missingReferences',
+          message: l,
+          spec
+        });
+      });
+
+    // Detect links to external specifications within the body of the spec
+    // that have a corresponding entry in the references, but for which the
+    // reference uses a different URL, e.g., because the link targets the
+    // Editor's Draft, whereas the reference targets the latest published
+    // version
+    Object.keys(spec.links?.rawlinks ?? {})
+      .filter(matchSpecUrl)
+      .map(l => {
+        const canonSimple = canonicalizeUrl(l);
+        const canon = canonicalizeUrl(l, useEquivalents);
+        const refs = (spec.refs?.normative ?? [])
+          .concat(spec.refs?.informative ?? []);
+
+        // Filter out "good" references
+        if (refs.find(r => canonicalizesTo(r.url, canonSimple))) {
+          return null;
+        }
+        const ref = refs.find(r => canonicalizesTo(r.url, canon, useEquivalents));
+        return (ref ? { link: l, ref } : null);
+      })
+      .filter(anomaly => !!anomaly)
+      .forEach(anomaly => {
+        report.push({
+          name: 'inconsistentReferences',
+          message: `${anomaly.link}, related reference "${anomaly.ref.name}" uses URL ${anomaly.ref.url}`,
+          spec
+        });
+      });
+  }
   return report;
 }
 
 export default studyReferences;
-
-if (process.argv[1] === fileURLToPath(import.meta.url)) {
-  const crawl = await loadCrawlResults(process.argv[2]);
-  const results = studyReferences(crawl.ed);
-  console.log(results);
-}
diff --git a/src/lib/study-webidl.js b/src/lib/study-webidl.js
index fc09365f..30e9b5ab 100644
--- a/src/lib/study-webidl.js
+++ b/src/lib/study-webidl.js
@@ -5,52 +5,26 @@
  * object structure:
  *
  * {
- *   "category": "webidl",
  *   "name": "type of anomaly",
  *   "message": "Description of the anomaly",
- *   "specs": [
- *     { spec that contains or triggers the anomaly },
- *     { another spec that contains or triggers the anomaly },
- *     ...
- *   ]
+ *   "spec": { spec that contains or triggers the anomaly }
  * }
+ * 
+ * Some anomalies may be associated with more than one spec, when the code
+ * cannot tell which spec needs fixing (e.g., when checking duplicates while
+ * merging partials). In such cases, the `spec` property is replaced by a
+ * `specs` property that contains an array of specs.
  *
- * All anomalies will be associated with at least one spec (so specs.length > 0)
- * but some of them may be associated with more than one, when the code cannot
- * tell which of them needs to be fixed (e.g. when checking duplicates while
- * merging partials).
- *
- * The spec object returned in the "specs" array is the spec object provided in
- * the crawl results parameter.
+ * The spec object returned in the `spec` and `specs` properties is the spec
+ * object provided in the crawl results parameter.
  */
 
-import { recordCategorizedAnomaly } from './util.js';
 import * as WebIDL2 from 'webidl2';
 
 const getSpecs = list => [...new Set(list.map(({ spec }) => spec))];
 const specName = spec => spec.shortname ?? spec.url;
 const dfnName = dfn => `${dfn.idl.partial ? 'partial ' : ''}${dfn.idl.type} "${dfn.idl.name}"`;
 
-const possibleAnomalies = [
-  'incompatiblePartialIdlExposure',
-  'invalid',
-  'noExposure',
-  'noOriginalDefinition',
-  'overloaded',
-  'redefined',
-  'redefinedIncludes',
-  'redefinedMember',
-  'redefinedWithDifferentTypes',
-  'singleEnumValue',
-  'unexpectedEventHandler',
-  'unknownExposure',
-  'unknownExtAttr',
-  'unknownType',
-  'wrongCaseEnumValue',
-  'wrongKind',
-  'wrongType'
-];
-
 const basicTypes = new Set([
   // Types defined by Web IDL itself:
   'any', // https://webidl.spec.whatwg.org/#idl-any
@@ -192,7 +166,7 @@ function describeMember (member) {
   return desc;
 }
 
-function studyWebIdl (edResults, curatedResults) {
+function studyWebIdl (specs, { curatedResults = [] } = {}) {
   const report = []; // List of anomalies to report
   const dfns = {}; // Index of IDL definitions (save includes)
   const includesStatements = {}; // Index of "includes" statements
@@ -201,7 +175,14 @@ function studyWebIdl (edResults, curatedResults) {
   const usedExtAttrs = {}; // Index of extended attributes
 
   // Record an anomaly for the given spec(s).
-  const recordAnomaly = recordCategorizedAnomaly(report, 'webidl', possibleAnomalies);
+  function recordAnomaly (spec, name, message) {
+    if (Array.isArray(spec)) {
+      report.push({ name, message, specs: spec });
+    }
+    else {
+      report.push({ name, message, spec });
+    }
+  }
 
   function inheritsFrom (iface, ancestor) {
     if (!iface.inheritance) return false;
@@ -397,7 +378,7 @@ function studyWebIdl (edResults, curatedResults) {
     }
   }
 
-  edResults
+  specs
     // We're only interested in specs that define Web IDL content
     .filter(spec => !!spec.idl)
 
@@ -666,7 +647,4 @@ function studyWebIdl (edResults, curatedResults) {
   return report;
 }
 
-/**************************************************
-Export methods for use as module
-**************************************************/
 export default studyWebIdl;
diff --git a/src/lib/study.js b/src/lib/study.js
new file mode 100644
index 00000000..20cb96f6
--- /dev/null
+++ b/src/lib/study.js
@@ -0,0 +1,498 @@
+import studyDfns from './study-dfns.js';
+import studyAlgorithms from './study-algorithms.js';
+import studyBackrefs from './study-backrefs.js';
+import studyRefs from './study-refs.js';
+import studyWebIdl from './study-webidl.js';
+import isInMultiSpecRepository from './is-in-multi-spec-repo.js';
+import { recordCategorizedAnomaly } from './util.js';
+
+/**
+ * List of anomalies, grouped per study function
+ */
+const anomalyGroups = [
+  {
+    name: 'generic',
+    title: 'Generic',
+    description: 'The following errors prevented the spec from being analyzed',
+    types: [
+      {
+        name: 'error',
+        title: 'Crawl error',
+        description: 'The following crawl errors occurred'
+      }
+    ],
+    study: (specs) => specs
+      .filter(spec => !!spec.error)
+      .map(spec => Object.assign(
+        { name: 'error', message: spec.error, spec }
+      ))
+  },
+
+  {
+    name: 'dfns',
+    title: 'Problems with definitions',
+    description: 'The following problems were identified in term definitions',
+    types: [
+      {
+        name: 'missingDfns',
+        title: 'Missing definitions',
+        description: 'The following constructs were found without a definition'
+      }
+    ],
+    study: studyDfns
+  },
+
+  {
+    name: 'backrefs',
+    title: 'Problems with links to other specs',
+    description: 'The following problems were identified when analyzing links to other specifications',
+    types: [
+      {
+        name: 'brokenLinks',
+        title: 'Broken links',
+        description: 'The following links to other specifications were detected as pointing to non-existing anchors'
+      },
+      {
+        name: 'datedUrls',
+        title: 'Links to dated TR URLs',
+        description: 'The following links target a dated version of a specification'
+      },
+      {
+        name: 'evolvingLinks',
+        title: 'Links to now gone anchors',
+        description: 'The following links in the specification link to anchors that no longer exist in the Editor\'s Draft of the targeted specification'
+      },
+      { name: 'frailLinks', title: 'Unstable link anchors' },
+      {
+        name: 'nonCanonicalRefs',
+        title: 'Non-canonical links',
+        description: 'The following links were detected as pointing to outdated URLs'
+      },
+      {
+        name: 'notDfn',
+        title: 'Links to unofficial anchors',
+        description: 'The following links were detected as pointing to anchors that are neither definitions or headings in the targeted specification'
+      },
+      {
+        name: 'notExported',
+        title: 'Links to non-exported definitions',
+        description: 'The following links were detected as pointing to a private definition in the targeted specification'
+      },
+      {
+        name: 'outdatedSpecs',
+        title: 'Outdated references',
+        description: 'The following links were detected as pointing to outdated specifications'
+      },
+      {
+        name: 'unknownSpecs',
+        title: 'Links to unknown specs',
+        description: 'The following links were detected as pointing to documents that are not recognized as specifications'
+      }
+    ],
+    study: studyBackrefs,
+    studyParams: ['tr']
+  },
+
+  {
+    name: 'algorithms',
+    title: 'Problems with algorithms',
+    description: 'The following problems were identified when analyzing algorithms',
+    types: [
+      {
+        name: 'missingTaskForPromise',
+        title: 'Missing tasks in parallel steps to handle a promise',
+        description: 'The following algorithms resolve or reject a Promise within a step that runs [in parallel](https://html.spec.whatwg.org/multipage/infrastructure.html#in-parallel) without first queuing a task'
+      },
+      {
+        name: 'missingTaskForEvent',
+        title: 'Missing tasks in parallel steps to fire an event',
+        description: 'The following algorithms fire an event within a step that runs [in parallel](https://html.spec.whatwg.org/multipage/infrastructure.html#in-parallel) without first queuing a task'
+      }
+    ],
+    study: studyAlgorithms
+  },
+
+  {
+    name: 'refs',
+    title: 'Problems with references',
+    description: 'The following problems were identified when analyzing the list of references',
+    types: [
+      {
+        name: 'discontinuedReferences',
+        title: 'Normative references to discontinued specs',
+        description: 'The following normative references were detected as pointing to discontinued specifications'
+      },
+      {
+        name: 'missingReferences',
+        title: 'Missing references',
+        description: 'The following links target specifications that are not mentioned in the list of references'
+      },
+      {
+        name: 'inconsistentReferences',
+        title: 'Inconsistent reference links',
+        description: 'The following links use a different URL for the targeted specification from the URL defined in the references'
+      }
+    ],
+    study: studyRefs
+  },
+
+  {
+    name: 'webidl',
+    title: 'Web IDL problems',
+    description: 'The following Web IDL problems were identified',
+    types: [
+      { name: 'incompatiblePartialIdlExposure', title: 'Incompatible `[Exposed]` attribute in partial definitions' },
+      { name: 'invalid', title: 'Invalid Web IDL' },
+      { name: 'noExposure', title: 'Missing `[Exposed]` attributes' },
+      { name: 'noOriginalDefinition', title: 'Missing base interfaces' },
+      { name: 'overloaded', title: 'Invalid overloaded operations' },
+      { name: 'redefined', title: 'Duplicated IDL names' },
+      { name: 'redefinedIncludes', title: 'Duplicated `includes` statements' },
+      { name: 'redefinedMember', title: 'Duplicated members' },
+      { name: 'redefinedWithDifferentTypes', title: 'Duplicated IDL names with different types' },
+      { name: 'singleEnumValue', title: 'Enums with a single value' },
+      { name: 'unexpectedEventHandler', title: 'Missing `EventTarget` inheritances' },
+      { name: 'unknownExposure', title: 'Unknown globals in `[Exposed]` attribute' },
+      { name: 'unknownExtAttr', title: 'Unknown extended attributes' },
+      { name: 'unknownType', title: 'Unknown Web IDL type' },
+      { name: 'wrongCaseEnumValue', title: 'Enums with wrong casing' },
+      { name: 'wrongKind', title: 'Invalid inheritance chains' },
+      { name: 'wrongType', title: 'Web IDL names incorrectly used as types' }
+    ],
+    study: studyWebIdl,
+    studyParams: ['curated']
+  }
+];
+
+
+/**
+ * Possible report structures
+ */
+const reportStructures = [
+  'flat',
+  'type+spec',
+  'group+spec>type',
+  'spec>type',
+  'spec>group>type',
+  'type>spec',
+  'group>type>spec',
+  'group>spec>type'
+];
+
+
+// Compute mapping between an anomaly type and its parent group
+const anomalyToGroup = {};
+for (const group of anomalyGroups) {
+  for (const type of group.types) {
+    anomalyToGroup[type.name] = group;
+  }
+}
+
+/**
+ * Return an object that describes the requested anomaly type
+ */
+function getAnomalyType(name) {
+  for (const group of anomalyGroups) {
+    const type = group.types.find(t => t.name === name);
+    if (type) {
+      return Object.assign({}, type);
+    }
+  }
+  return null;
+}
+
+/**
+ * Return an object that describes the requested anomaly group
+ */
+function getAnomalyGroup(name) {
+  for (const group of anomalyGroups) {
+    if (group.name === name) {
+      return {
+        name: group.name,
+        title: group.title 
+      };
+    }
+  }
+  return null;
+}
+
+/**
+ * Return an object that describes the requested anomaly group
+ * from the given anomaly type
+ */
+function getAnomalyGroupFromType(type) {
+  const name = anomalyToGroup[type];
+  return getAnomalyGroup(name);
+}
+
+
+/**
+ * Structure a flat list of anomalies to the requested structure
+ */
+function structureResults(structure, anomalies, crawlResults) {
+  const levels = structure.split('>')
+    .map(level => level.replace(/\s+/g, ''));
+  const report = [];
+
+  switch (levels[0]) {
+    case 'flat':
+      for (const anomaly of anomalies) {
+        report.push(anomaly);
+      }
+      break;
+
+    case 'type+spec':
+      for (const anomaly of anomalies) {
+        const type = getAnomalyType(anomaly.name)
+        for (const spec of anomaly.specs) {
+          let entry = report.find(entry =>
+            entry.type.name === anomaly.name &&
+            entry.spec.shortname === spec.shortname);
+          if (!entry) {
+            const titlePrefix = isInMultiSpecRepository(spec, crawlResults) ?
+              `[${spec.shortname}] ` : '';
+            entry = {
+              title: `${titlePrefix}${type.title} in ${spec.title}`,
+              type, spec, anomalies: []
+            };
+            report.push(entry);
+          }
+          entry.anomalies.push(anomaly);
+        }
+      }
+      break;
+
+    case 'group+spec':
+      for (const anomaly of anomalies) {
+        const group = anomalyToGroup[anomaly.name];
+        for (const spec of anomaly.specs) {
+          let entry = report.find(entry =>
+            entry.group.name === group.name &&
+            entry.spec.shortname === spec.shortname);
+          if (!entry) {
+            const titlePrefix = isInMultiSpecRepository(spec, crawlResults) ?
+              `[${spec.shortname}] ` : '';
+            entry = {
+              title: `${titlePrefix}${group.title} in ${spec.title}`,
+              group, spec, anomalies: []
+            };
+            report.push(entry);
+          }
+          entry.anomalies.push(anomaly);
+        }
+      }
+      break;
+
+    case 'spec':
+      for (const anomaly of anomalies) {
+        for (const spec of anomaly.specs) {
+          let entry = report.find(entry =>
+            entry.spec.shortname === spec.shortname);
+          if (!entry) {
+            entry = {
+              title: spec.title,
+              spec, anomalies: []
+            };
+            report.push(entry);
+          }
+          entry.anomalies.push(anomaly);
+        }
+      }
+      break;
+
+    case 'type':
+      for (const anomaly of anomalies) {
+        const type = getAnomalyType(anomaly.name);
+        let entry = report.find(entry => entry.type.name === anomaly.name);
+        if (!entry) {
+          entry = {
+            title: type.title,
+            type, anomalies: []
+          };
+          report.push(entry);
+        }
+        entry.anomalies.push(anomaly);
+      }
+      break;
+
+    case 'group':
+      for (const anomaly of anomalies) {
+        const group = anomalyToGroup[anomaly.name];
+        let entry = report.find(entry => entry.group.name === group.name);
+        if (!entry) {
+          entry = {
+            title: group.title,
+            group, anomalies: []
+          };
+          report.push(entry);
+        }
+        entry.anomalies.push(anomaly);
+      }
+      break;
+  }
+
+  if (levels.length > 1) {
+    const itemsStructure = levels.slice(1).join('>');
+    for (const entry of report) {
+      entry.items = structureResults(itemsStructure, entry.anomalies, crawlResults);
+      delete entry.anomalies;
+    }
+  }
+  return report;
+}
+
+
+function makeLowerCase(description) {
+  return description.charAt(0).toLowerCase() + description.slice(1);
+}
+
+function pad(str, depth) {
+  while (depth > 1) {
+    str = '  ' + str;
+    depth -= 1;
+  }
+  return str;
+}
+
+function serializeEntry(entry, depth = 0) {
+  let res = '';
+  if (entry.spec && entry.group) {
+    res = `While crawling [${entry.spec.title}](${entry.spec.crawled}), ${makeLowerCase(entry.group.description ?? entry.group.title)}:`;
+  }
+  else if (entry.spec && entry.type) {
+    res = `While crawling [${entry.spec.title}](${entry.spec.crawled}), ${makeLowerCase(entry.type.description ?? entry.type.title)}:`;
+  }
+  else if (entry.group) {
+    if (depth === 0) {
+      res = (entry.group.description ?? entry.group.title) + ':';
+    }
+    else {
+      res = pad(`* ${entry.group.title}`, depth);
+    }
+  }
+  else if (entry.type) {
+    if (depth === 0) {
+      res = (entry.type.description ?? entry.type.title) + ':';
+    }
+    else {
+      res = pad(`* ${entry.type.title}`, depth);
+    }
+  }
+  else if (entry.spec) {
+    if (depth === 0) {
+      res = `While crawling [${entry.spec.title}](${entry.spec.crawled}), the following anomalies were identified:`;
+    }
+    else {
+      res = pad(`* [${entry.spec.title}](${entry.spec.crawled})`, depth);
+    }
+  }
+  else if (entry.message) {
+    res = pad(`* ${entry.message}`, depth);
+  }
+
+  for (const item of entry.items ?? []) {
+    res += '\n' + serializeEntry(item, depth + 1);
+  }
+  for (const anomaly of entry.anomalies ?? []) {
+    res += `\n` + serializeEntry(anomaly, depth + 1);
+  }
+
+  return res;
+}
+
+
+/**
+ * Format the structured report as JSON or markdown, or a combination of both
+ */
+function formatReport(format, report) {
+  if (format === 'json') {
+    return report;
+  }
+  else if (format === 'issue') {
+    return report.map(entry => Object.assign({
+      title: entry.title,
+      content: serializeEntry(entry)
+    }));
+  }
+  else if (format === 'full') {
+    return [
+      {
+        title: 'Study report',
+        content: report.map(entry => serializeEntry(entry))
+      }
+    ]
+  }
+}
+
+
+/**
+ * Main function that studies a crawl result and returns a structured
+ * report.
+ */
+export default async function study(specs, options) {
+  options = Object.assign({}, options ?? {});
+  const what = options.what ?? ['all'];
+  const structure = options.structure ?? 'type + spec';
+  const format = options.format ?? 'issue';
+
+  if (!what.includes('all')) {
+    const validWhat = what.every(name =>
+      group.find(g => g.name === name || g.types.find(t => t.name === name)));
+    if (!validWhat) {
+      throw new Error('Invalid `what` option');
+    }
+  }
+  if (!reportStructures.find(s => structure.replace(/\s+/g, '') === s)) {
+    throw new Error('Invalid `structure` option');
+  }
+
+  // Only keep specs that caller wants to study
+  // (but note study functions that analyze references need the whole list!)
+  options.crawlResults = specs;
+  if (options.specs) {
+    specs = options.crawlResults.filter(spec => specs.find(s => s.shortname === spec.shortname));
+  }
+
+  // Anomalies are studied in groups of related anomalies, let's compute the
+  // studies that we need to run to answer the request
+  const groups = anomalyGroups.filter(group =>
+    what.includes('all') ||
+    what.includes(group.name) ||
+    group.types.find(type => what.includes(type.name)));
+
+  // Run studies and fill the anomaly report accordingly
+  let anomalies = [];
+  for (const group of groups) {
+    const studyResult = await group.study(specs, options);
+    const recordAnomaly = recordCategorizedAnomaly(
+      anomalies, group.name, group.types.map(t => t.name));
+    studyResult.map(an => recordAnomaly(an.spec ?? an.specs, an.name, an.message));
+  }
+
+  // Only keep anomalies whose types we're interested in
+  anomalies = anomalies.filter(anomaly =>
+    what.includes('all') ||
+    what.includes(anomaly.name) ||
+    what.includes(anomalyToGroup[anomaly.name].name));
+
+  // Now that we have a flat report of anomalies,
+  // let's structure and serialize it as requested
+  const report = structureResults(structure, anomalies, options.crawlResults);
+
+  // And serialize it using the right format
+  const result = {
+    type: 'study',
+    date: (new Date()).toJSON(),
+    structure,
+    what,
+    stats: {
+      crawled: options.crawlResults.length,
+      studied: specs.length,
+      anomalies: anomalies.length
+    },
+    results: formatReport(format, report)
+  };
+
+  // Return the structured report
+  return result;
+}
\ No newline at end of file
diff --git a/test/study-algorithms.js b/test/study-algorithms.js
new file mode 100644
index 00000000..c5b19888
--- /dev/null
+++ b/test/study-algorithms.js
@@ -0,0 +1,37 @@
+import study from '../src/lib/study-algorithms.js';
+import { assertNbAnomalies, assertAnomaly } from './util.js';
+
+describe('The algorithms analyser', () => {
+  const specUrl = 'https://www.w3.org/TR/spec';
+  const specUrl2 = 'https://www.w3.org/TR/spec2';
+
+  function toCrawlResult(algorithms) {
+    return [{ url: specUrl, algorithms }];
+  }
+
+  it('reports no anomaly if there are no algorithms', () => {
+    const crawlResult = toCrawlResult([]);
+    const report = study(crawlResult);
+    assertNbAnomalies(report, 0);
+  });
+
+  it('reports an error when a step resolves a promise in parallel', () => {
+    const crawlResult = toCrawlResult([
+      {
+        html: 'The <code class="idl"><a data-link-type="idl" href="https://w3c.github.io/media-capabilities/#dom-mediacapabilities-encodinginfo" id="ref-for-dom-mediacapabilities-encodinginfo">encodingInfo()</a></code> method MUST run the following steps:',
+        rationale: 'if',
+        steps: [
+          { html: 'Let <var>p</var> be a new promise.' },
+          { html: '<a data-link-type="dfn" href="https://html.spec.whatwg.org/multipage/infrastructure.html#in-parallel" id="ref-for-in-parallel①">In parallel</a>, run the <a data-link-type="dfn" href="https://w3c.github.io/media-capabilities/#create-a-mediacapabilitiesencodinginfo" id="ref-for-create-a-mediacapabilitiesencodinginfo">Create a MediaCapabilitiesEncodingInfo</a> algorithm with <var>configuration</var> and resolve <var>p</var> with its result.' },
+          { html: 'Return <var>p</var>.' }
+        ]
+      }
+    ]);
+    const report = study(crawlResult);
+    assertAnomaly(report, 0, {
+      name: 'missingTaskForPromise',
+      message: 'The algorithm that starts with "The encodingInfo() method MUST run the following steps:" has a parallel step that resolves/rejects a promise directly',
+      spec: { url: 'https://www.w3.org/TR/spec' }
+    });
+  });
+});
\ No newline at end of file
diff --git a/test/study-backrefs.js b/test/study-backrefs.js
index ea755137..8e6e4eac 100644
--- a/test/study-backrefs.js
+++ b/test/study-backrefs.js
@@ -3,7 +3,7 @@
  */
 /* global describe, it */
 
-import studyBackrefs from '../src/lib/study-backrefs.js';
+import study from '../src/lib/study-backrefs.js';
 import { assertNbAnomalies, assertAnomaly } from './util.js';
 
 const specEdUrl = 'https://w3c.github.io/spec/';
@@ -48,28 +48,33 @@ const populateSpec = (url, ids, links, dfns) => {
 
 function toCrawlResults (ids, links, trIds = ids) {
   return {
-    ed: [populateSpec(specEdUrl, toFullIds(specEdUrl, ids), []),
-      populateSpec(specEdUrl2, [], toLinks(specEdUrl, links))],
-    tr: [populateSpec(specEdUrl, toFullIds(specEdUrl, trIds), [])]
+    ed: [
+      populateSpec(specEdUrl, toFullIds(specEdUrl, ids), []),
+      populateSpec(specEdUrl2, [], toLinks(specEdUrl, links))
+    ],
+    tr: [
+      populateSpec(specEdUrl, toFullIds(specEdUrl, trIds), [])
+    ]
   };
 }
 
 describe('The links analyser', () => {
-  it('reports no anomaly if links are valid', () => {
+  it('reports no anomaly if links are valid', async () => {
     const ids = ['validid'];
     const crawlResult = toCrawlResults(ids, ids);
-    const report = studyBackrefs(crawlResult.ed, crawlResult.tr);
+    const report = await study(crawlResult.ed, { htmlFragments: {} });
     assertNbAnomalies(report, 0);
   });
 
-  it('reports a broken link', () => {
+  it('reports a broken link', async () => {
     const ids = ['validid'];
     const crawlResult = toCrawlResults([], ids);
-    const report = studyBackrefs(crawlResult.ed, crawlResult.tr);
+    const report = await study(crawlResult.ed, { htmlFragments: {} });
     assertNbAnomalies(report, 1);
     assertAnomaly(report, 0, {
-      category: 'links',
-      message: specEdUrl + '#' + ids[0]
+      name: 'brokenLinks',
+      message: specEdUrl + '#' + ids[0],
+      spec: { url: 'https://www.w3.org/TR/spec2/' }
     });
   });
 
diff --git a/test/study-dfns.js b/test/study-dfns.js
new file mode 100644
index 00000000..b656c52c
--- /dev/null
+++ b/test/study-dfns.js
@@ -0,0 +1,39 @@
+import studyDefinitions from '../src/lib/study-dfns.js';
+import { assertNbAnomalies, assertAnomaly } from './util.js';
+
+describe('The definitions analyser', () => {
+  const specUrl = 'https://www.w3.org/TR/spec';
+  const specUrl2 = 'https://www.w3.org/TR/spec2';
+
+  function toCrawlResult({ css = {}, dfns = [], idlparsed = {} }) {
+    const crawlResult = [{
+      url: specUrl,
+      css, dfns, idlparsed
+    }];
+    return crawlResult;
+  }
+
+  it('reports no anomaly if there are no definitions', () => {
+    const crawlResult = toCrawlResult({});
+    const report = studyDefinitions(crawlResult);
+    assertNbAnomalies(report, 0);
+  });
+
+  it('reports missing definition anomalies from CSS extracts', () => {
+    const crawlResult = toCrawlResult({
+      css: {
+        warnings: [{
+          msg: 'Missing definition',
+          name: 'no-def',
+          type: 'value'
+        }]
+      }
+    });
+    const report = studyDefinitions(crawlResult);
+    assertAnomaly(report, 0, {
+      name: 'missingDfns',
+      message: '`no-def` with type `value`',
+      spec: { url: 'https://www.w3.org/TR/spec' }
+    });
+  });
+});
\ No newline at end of file
diff --git a/test/study-refs.js b/test/study-refs.js
index 11a9db18..3250e183 100644
--- a/test/study-refs.js
+++ b/test/study-refs.js
@@ -3,7 +3,7 @@
  */
 /* global describe, it */
 
-import studyReferences from '../src/lib/study-refs.js';
+import study from '../src/lib/study-refs.js';
 import { assertNbAnomalies, assertAnomaly } from './util.js';
 
 const specEdUrl = 'https://w3c.github.io/spec/';
@@ -14,6 +14,9 @@ function toRefs (name, url) {
   return [ {name, url} ];
 }
 
+const toTr = url => url.replace(
+  'https://w3c.github.io',
+  'https://www.w3.org/TR');
 
 const populateSpec = (url, refs = [], standing = "good", obsoletedBy) => {
   const shortname = url.slice(0, -1).split('/').pop();
@@ -25,6 +28,9 @@ const populateSpec = (url, refs = [], standing = "good", obsoletedBy) => {
     nightly: {
       url
     },
+    release: {
+      url: toTr(url)
+    },
     shortname,
     standing,
     obsoletedBy
@@ -39,31 +45,62 @@ function toEdCrawlResults (standing = "good", replacements) {
   ];
 }
 
-describe('The reference analyser', () => {
+describe('The references analyser', () => {
   it('reports no anomaly if references are not discontinued', () => {
     const crawlResult = toEdCrawlResults();
-    const report = studyReferences(crawlResult);
+    const report = study(crawlResult);
     assertNbAnomalies(report, 0);
   });
 
   it('reports a discontinued reference with a replacement', () => {
     const crawlResult = toEdCrawlResults("discontinued", ["spec3"]);
-    const report = studyReferences(crawlResult);
+    const report = study(crawlResult);
     assertNbAnomalies(report, 1);
     assertAnomaly(report, 0, {
-      category: 'refs',
-      message: /spec3/
+      name: 'discontinuedReferences',
+      message: /spec3/,
+      spec: { url: specEdUrl }
     });
   });
 
   it('reports a discontinued reference without a replacement', () => {
     const crawlResult = toEdCrawlResults("discontinued");
-    const report = studyReferences(crawlResult);
+    const report = study(crawlResult);
+    assertNbAnomalies(report, 1);
+    assertAnomaly(report, 0, {
+      name: 'discontinuedReferences',
+      message: /no known replacement/,
+      spec: { url: specEdUrl }
+    });
+  });
+
+  it('reports a missing reference', () => {
+    const spec = populateSpec(specEdUrl);
+    spec.links = { rawlinks: {} };
+    spec.links.rawlinks[specEdUrl2] = {};
+    const crawlResult = [spec];
+    const report = study(crawlResult);
     assertNbAnomalies(report, 1);
     assertAnomaly(report, 0, {
-      category: 'refs',
-      message: /no known replacement/
+      name: 'missingReferences',
+      message: specEdUrl2,
+      spec: { url: specEdUrl }
     });
   });
 
+  it('reports an inconsistent reference', () => {
+    const spec = populateSpec(specEdUrl, toRefs('spec2', toTr(specEdUrl2)));
+    spec.links = { rawlinks: {} };
+    spec.links.rawlinks[specEdUrl2] = {};
+    const spec2 = populateSpec(specEdUrl2);
+    spec2.versions = [toTr(specEdUrl2)];
+    const crawlResult = [spec, spec2];
+    const report = study(crawlResult);
+    assertNbAnomalies(report, 1);
+    assertAnomaly(report, 0, {
+      name: 'inconsistentReferences',
+      message: `${specEdUrl2}, related reference "spec2" uses URL ${toTr(specEdUrl2)}`,
+      spec: { url: specEdUrl }
+    });
+  });
 });
diff --git a/test/study-webidl.js b/test/study-webidl.js
index 4660b0e0..52e13a3e 100644
--- a/test/study-webidl.js
+++ b/test/study-webidl.js
@@ -4,7 +4,7 @@
  */
 /* global describe, it */
 
-import studyWebIdl from '../src/lib/study-webidl.js';
+import study from '../src/lib/study-webidl.js';
 import { assertNbAnomalies, assertAnomaly } from './util.js';
 
 describe('The Web IDL analyser', () => {
@@ -21,7 +21,7 @@ describe('The Web IDL analyser', () => {
 
   function analyzeIdl (idl, idlSpec2) {
     const crawlResult = toCrawlResult(idl, idlSpec2);
-    return studyWebIdl(crawlResult);
+    return study(crawlResult);
   }
 
   it('reports no anomaly if IDL is valid', () => {
@@ -86,12 +86,11 @@ interface Invalid;
 `);
     assertNbAnomalies(report, 1);
     assertAnomaly(report, 0, {
-      category: 'webidl',
       name: 'invalid',
       message: `Syntax error at line 3, since \`interface Invalid\`:
 interface Invalid;
                  ^ Bodyless interface`,
-      specs: [{ url: specUrl }]
+      spec: { url: specUrl }
     });
   });
 
@@ -105,11 +104,11 @@ interface Invalid;
 [Global=Window,Exposed=*]
 interface Valid: Invalid {};
 `);
-    const curatedResult = toCrawlResult(`
+    const curatedResults = toCrawlResult(`
 [Global=Window,Exposed=*]
 interface Invalid{};
 `);
-    const report = studyWebIdl(crawlResult, curatedResult);
+    const report = study(crawlResult, { curatedResults });
     assertNbAnomalies(report, 1);
     assertAnomaly(report, 0, { name: 'invalid' });
   });
diff --git a/test/study.js b/test/study.js
new file mode 100644
index 00000000..986813a6
--- /dev/null
+++ b/test/study.js
@@ -0,0 +1,150 @@
+import study from '../src/lib/study.js';
+import { assertNbAnomalies, assertAnomaly } from './util.js';
+
+const specUrl = 'https://w3c.github.io/world/';
+const specUrl2 = 'https://w3c.github.io/universe/';
+
+function toTr(url) {
+  return url.replace('https://w3c.github.io', 'https://www.w3.org/TR');
+}
+
+function populateSpec(url, crawl) {
+  const shortname = url.slice(0, -1).split('/').pop();
+  const spec = Object.assign({
+    shortname,
+    title: `Hello ${shortname} API`,
+    url: toTr(url),
+    nightly: { url },
+    release: { url: toTr(url) },
+    crawled: url
+  }, crawl);
+  return spec;
+}
+
+describe('The main study function', function () {
+  this.slow(5000);
+  this.timeout(10000);
+
+  it('reports no anomaly when spec is empty', async function() {
+    const crawlResult = [{ url: specUrl }];
+    const report = await study(crawlResult, { htmlFragments: {} });
+    assertNbAnomalies(report.results, 0);
+  });
+
+  it('reports anomalies per type and spec by default', async function() {
+    const crawlResult = [
+      populateSpec(specUrl, { error: 'Boo' }),
+      populateSpec(specUrl2, { error: 'Borked' })
+    ];
+    const report = await study(crawlResult, { htmlFragments: {} });
+    assertNbAnomalies(report.results, 2);
+    assertAnomaly(report.results, 0, {
+      title: 'Crawl error in Hello world API',
+      content: `While crawling [Hello world API](${specUrl}), the following crawl errors occurred:\n* Boo`
+    });
+    assertAnomaly(report.results, 1, {
+      title: 'Crawl error in Hello universe API',
+      content: `While crawling [Hello universe API](${specUrl2}), the following crawl errors occurred:\n* Borked`
+    });
+  });
+
+  it('reports anomalies per type when asked', async function() {
+    const crawlResult = [
+      populateSpec(specUrl, { error: 'Boo' }),
+      populateSpec(specUrl2, { error: 'Borked' })
+    ];
+    const report = await study(crawlResult, { structure: 'type>spec', htmlFragments: {} });
+    assertNbAnomalies(report.results, 1);
+    assertAnomaly(report.results, 0, {
+      title: 'Crawl error',
+      content: `The following crawl errors occurred:
+* [Hello world API](https://w3c.github.io/world/)
+  * Boo
+* [Hello universe API](https://w3c.github.io/universe/)
+  * Borked`
+    });
+  });
+
+  it('reports anomalies per spec when asked', async function() {
+    const crawlResult = [
+      populateSpec(specUrl, { error: 'Boo' }),
+      populateSpec(specUrl2, { error: 'Borked' })
+    ];
+    const report = await study(crawlResult, { structure: 'spec>type', htmlFragments: {} });
+    assertNbAnomalies(report.results, 2);
+    assertAnomaly(report.results, 0, {
+      title: 'Hello world API',
+      content: `While crawling [Hello world API](https://w3c.github.io/world/), the following anomalies were identified:
+* Crawl error
+  * Boo`
+    });
+  });
+
+  it('reports anomalies per spec and groups anomalies when asked', async function() {
+    const crawlResult = [
+      populateSpec(specUrl, { error: 'Boo' }),
+      populateSpec(specUrl2, { error: 'Borked' })
+    ];
+    const report = await study(crawlResult, { structure: 'spec>group>type', htmlFragments: {} });
+    assertNbAnomalies(report.results, 2);
+    assertAnomaly(report.results, 0, {
+      title: 'Hello world API',
+      content: `While crawling [Hello world API](https://w3c.github.io/world/), the following anomalies were identified:
+* Generic
+  * Crawl error
+    * Boo`
+    });
+  });
+
+  it('reports anomalies per group and spec when asked', async function() {
+    const crawlResult = [
+      populateSpec(specUrl, { error: 'Boo' }),
+      populateSpec(specUrl2, { error: 'Borked' })
+    ];
+    const report = await study(crawlResult, { structure: 'group+spec>type', htmlFragments: {} });
+    assertNbAnomalies(report.results, 2);
+    assertAnomaly(report.results, 0, {
+      title: 'Generic in Hello world API',
+      content: `While crawling [Hello world API](https://w3c.github.io/world/), the following errors prevented the spec from being analyzed:
+* Crawl error
+  * Boo`
+    });
+  });
+
+  it('reports anomalies per group, with anomaly type as intermediary level, when asked', async function() {
+    const crawlResult = [
+      populateSpec(specUrl, { error: 'Boo' }),
+      populateSpec(specUrl2, { error: 'Borked' })
+    ];
+    const report = await study(crawlResult, { structure: 'group>type>spec', htmlFragments: {} });
+    assertNbAnomalies(report.results, 1);
+    assertAnomaly(report.results, 0, {
+      title: 'Generic',
+      content: `The following errors prevented the spec from being analyzed:
+* Crawl error
+  * [Hello world API](https://w3c.github.io/world/)
+    * Boo
+  * [Hello universe API](https://w3c.github.io/universe/)
+    * Borked`
+    });
+  });
+
+  it('reports anomalies per group, with spec as intermediary level, when asked', async function() {
+    const crawlResult = [
+      populateSpec(specUrl, { error: 'Boo' }),
+      populateSpec(specUrl2, { error: 'Borked' })
+    ];
+    const report = await study(crawlResult, { structure: 'group>spec>type', htmlFragments: {} });
+    assertNbAnomalies(report.results, 1);
+    assertAnomaly(report.results, 0, {
+      title: 'Generic',
+      content: `The following errors prevented the spec from being analyzed:
+* [Hello world API](https://w3c.github.io/world/)
+  * Crawl error
+    * Boo
+* [Hello universe API](https://w3c.github.io/universe/)
+  * Crawl error
+    * Borked`
+    });
+  });
+});
\ No newline at end of file