From ef74536c651e1aabd46d5462a1b3cc3b4dcb346a Mon Sep 17 00:00:00 2001 From: mongodben <90647379+mongodben@users.noreply.github.com> Date: Tue, 27 Aug 2024 10:15:29 -0400 Subject: [PATCH] move main ingest logic to core lib + mdb specific stuff to our implementation --- packages/ingest-mongodb-public/package.json | 1 + .../ingest-mongodb-public/src/meta.config.ts | 2 +- .../src}/sources/DevCenterDataSource.test.ts | 0 .../src}/sources/DevCenterDataSource.ts | 12 +++---- .../sources/RstOnGitHubDataSource.test.ts | 0 .../src}/sources/RstOnGitHubDataSource.ts | 4 +-- .../src/sources/index.ts | 18 ++++++----- .../MongoDbUniversityDataApiClient.test.ts | 0 .../MongoDbUniversityDataApiClient.ts | 0 .../MongoDbUniversityDataSource.test.ts | 0 .../MongoDbUniversityDataSource.ts | 3 +- .../src}/sources/mongodb-university/index.ts | 0 .../makeUniversityPages.test.ts | 0 .../mongodb-university/makeUniversityPages.ts | 2 +- .../sources/snooty/SnootyDataSource.test.ts | 0 .../src}/sources/snooty/SnootyDataSource.ts | 32 +++++++++---------- .../sources/snooty/SnootyProjectsInfo.test.ts | 0 .../src}/sources/snooty/SnootyProjectsInfo.ts | 3 +- .../src}/sources/snooty/index.ts | 0 .../sources/snooty/renderSnootyTable.test.ts | 0 .../src}/sources/snooty/renderSnootyTable.ts | 0 .../sources/snooty/rstToSnootyAst.test.ts | 0 .../src}/sources/snooty/rstToSnootyAst.ts | 0 .../src}/sources/snooty/snootyAstToMd.test.ts | 0 .../src}/sources/snooty/snootyAstToMd.ts | 0 .../snooty/snootyAstToOpenApiSpec.test.ts | 0 .../sources/snooty/snootyAstToOpenApiSpec.ts | 0 packages/mongodb-rag-core/package.json | 4 +-- .../src/ingest/embed/chunkOpenApiSpecYaml.ts | 2 +- .../src/ingest/embed/updateEmbeddedContent.ts | 2 +- .../src/ingest/pages/getChangedPages.ts | 2 +- .../src/ingest/pages/updatePages.ts | 2 +- .../AcquitRequireMdOnGithubDataSource.ts | 2 +- .../src/ingest/sources/GitDataSource.ts | 2 +- .../src/ingest/sources/handleHtmlDocument.ts | 2 +- .../src/ingest/sources/index.ts | 5 +-- 36 files changed, 49 insertions(+), 51 deletions(-) rename packages/{mongodb-rag-core/src/ingest => ingest-mongodb-public/src}/sources/DevCenterDataSource.test.ts (100%) rename packages/{mongodb-rag-core/src/ingest => ingest-mongodb-public/src}/sources/DevCenterDataSource.ts (91%) rename packages/{mongodb-rag-core/src/ingest => ingest-mongodb-public/src}/sources/RstOnGitHubDataSource.test.ts (100%) rename packages/{mongodb-rag-core/src/ingest => ingest-mongodb-public/src}/sources/RstOnGitHubDataSource.ts (94%) rename packages/{mongodb-rag-core/src/ingest => ingest-mongodb-public/src}/sources/mongodb-university/MongoDbUniversityDataApiClient.test.ts (100%) rename packages/{mongodb-rag-core/src/ingest => ingest-mongodb-public/src}/sources/mongodb-university/MongoDbUniversityDataApiClient.ts (100%) rename packages/{mongodb-rag-core/src/ingest => ingest-mongodb-public/src}/sources/mongodb-university/MongoDbUniversityDataSource.test.ts (100%) rename packages/{mongodb-rag-core/src/ingest => ingest-mongodb-public/src}/sources/mongodb-university/MongoDbUniversityDataSource.ts (96%) rename packages/{mongodb-rag-core/src/ingest => ingest-mongodb-public/src}/sources/mongodb-university/index.ts (100%) rename packages/{mongodb-rag-core/src/ingest => ingest-mongodb-public/src}/sources/mongodb-university/makeUniversityPages.test.ts (100%) rename packages/{mongodb-rag-core/src/ingest => ingest-mongodb-public/src}/sources/mongodb-university/makeUniversityPages.ts (98%) rename packages/{mongodb-rag-core/src/ingest => ingest-mongodb-public/src}/sources/snooty/SnootyDataSource.test.ts (100%) rename packages/{mongodb-rag-core/src/ingest => ingest-mongodb-public/src}/sources/snooty/SnootyDataSource.ts (93%) rename packages/{mongodb-rag-core/src/ingest => ingest-mongodb-public/src}/sources/snooty/SnootyProjectsInfo.test.ts (100%) rename packages/{mongodb-rag-core/src/ingest => ingest-mongodb-public/src}/sources/snooty/SnootyProjectsInfo.ts (97%) rename packages/{mongodb-rag-core/src/ingest => ingest-mongodb-public/src}/sources/snooty/index.ts (100%) rename packages/{mongodb-rag-core/src/ingest => ingest-mongodb-public/src}/sources/snooty/renderSnootyTable.test.ts (100%) rename packages/{mongodb-rag-core/src/ingest => ingest-mongodb-public/src}/sources/snooty/renderSnootyTable.ts (100%) rename packages/{mongodb-rag-core/src/ingest => ingest-mongodb-public/src}/sources/snooty/rstToSnootyAst.test.ts (100%) rename packages/{mongodb-rag-core/src/ingest => ingest-mongodb-public/src}/sources/snooty/rstToSnootyAst.ts (100%) rename packages/{mongodb-rag-core/src/ingest => ingest-mongodb-public/src}/sources/snooty/snootyAstToMd.test.ts (100%) rename packages/{mongodb-rag-core/src/ingest => ingest-mongodb-public/src}/sources/snooty/snootyAstToMd.ts (100%) rename packages/{mongodb-rag-core/src/ingest => ingest-mongodb-public/src}/sources/snooty/snootyAstToOpenApiSpec.test.ts (100%) rename packages/{mongodb-rag-core/src/ingest => ingest-mongodb-public/src}/sources/snooty/snootyAstToOpenApiSpec.ts (100%) diff --git a/packages/ingest-mongodb-public/package.json b/packages/ingest-mongodb-public/package.json index cd7bbf4f0..c59b58ce8 100644 --- a/packages/ingest-mongodb-public/package.json +++ b/packages/ingest-mongodb-public/package.json @@ -47,6 +47,7 @@ "dependencies": { "@release-it/bumper": "^5.1.0", "dotenv": "^16", + "langchain": "^0.2.17", "mongodb-rag-core": "*", "mongodb-rag-ingest": "*", "striptags": "^3.2.0" diff --git a/packages/ingest-mongodb-public/src/meta.config.ts b/packages/ingest-mongodb-public/src/meta.config.ts index aae6bd0b5..b007af37c 100644 --- a/packages/ingest-mongodb-public/src/meta.config.ts +++ b/packages/ingest-mongodb-public/src/meta.config.ts @@ -13,7 +13,7 @@ import { AzureKeyCredential, } from "mongodb-rag-core"; import { snootyDataApiBaseUrl } from "./sources/snooty"; -import { makeSnootyDataSource } from "mongodb-rag-ingest/sources/snooty"; +import { makeSnootyDataSource } from "./sources/snooty/SnootyDataSource"; const { OPENAI_ENDPOINT, diff --git a/packages/mongodb-rag-core/src/ingest/sources/DevCenterDataSource.test.ts b/packages/ingest-mongodb-public/src/sources/DevCenterDataSource.test.ts similarity index 100% rename from packages/mongodb-rag-core/src/ingest/sources/DevCenterDataSource.test.ts rename to packages/ingest-mongodb-public/src/sources/DevCenterDataSource.test.ts diff --git a/packages/mongodb-rag-core/src/ingest/sources/DevCenterDataSource.ts b/packages/ingest-mongodb-public/src/sources/DevCenterDataSource.ts similarity index 91% rename from packages/mongodb-rag-core/src/ingest/sources/DevCenterDataSource.ts rename to packages/ingest-mongodb-public/src/sources/DevCenterDataSource.ts index d422662c4..4f5b7f0c7 100644 --- a/packages/mongodb-rag-core/src/ingest/sources/DevCenterDataSource.ts +++ b/packages/ingest-mongodb-public/src/sources/DevCenterDataSource.ts @@ -1,12 +1,12 @@ import { strict as assert } from "assert"; import { convert } from "html-to-text"; -import { removeMarkdownImagesAndLinks } from "./removeMarkdownImagesAndLinks"; -import { DataSource } from "./DataSource"; -import { ProjectBase } from "./ProjectBase"; +import { removeMarkdownImagesAndLinks } from "mongodb-rag-core/ingest"; +import { DataSource } from "mongodb-rag-core/ingest"; +import { ProjectBase } from "mongodb-rag-core/ingest"; import { MongoClient } from "mongodb"; -import { assertEnvVars } from "../../assertEnvVars"; -import { Page } from "../../Page"; -import { logger } from "../../services/logger"; +import { assertEnvVars } from "mongodb-rag-core"; +import { Page } from "mongodb-rag-core"; +import { logger } from "mongodb-rag-core"; export type DevCenterProjectConfig = ProjectBase & { type: "devcenter"; diff --git a/packages/mongodb-rag-core/src/ingest/sources/RstOnGitHubDataSource.test.ts b/packages/ingest-mongodb-public/src/sources/RstOnGitHubDataSource.test.ts similarity index 100% rename from packages/mongodb-rag-core/src/ingest/sources/RstOnGitHubDataSource.test.ts rename to packages/ingest-mongodb-public/src/sources/RstOnGitHubDataSource.test.ts diff --git a/packages/mongodb-rag-core/src/ingest/sources/RstOnGitHubDataSource.ts b/packages/ingest-mongodb-public/src/sources/RstOnGitHubDataSource.ts similarity index 94% rename from packages/mongodb-rag-core/src/ingest/sources/RstOnGitHubDataSource.ts rename to packages/ingest-mongodb-public/src/sources/RstOnGitHubDataSource.ts index 266ec6aad..62a7e1490 100644 --- a/packages/mongodb-rag-core/src/ingest/sources/RstOnGitHubDataSource.ts +++ b/packages/ingest-mongodb-public/src/sources/RstOnGitHubDataSource.ts @@ -4,8 +4,8 @@ import { snootyAstToMd } from "./snooty/snootyAstToMd"; import { MakeGitHubDataSourceArgs, makeGitHubDataSource, -} from "./GitHubDataSource"; -import { extractMarkdownH1 } from "./extractMarkdownH1"; +} from "mongodb-rag-core/ingest"; +import { extractMarkdownH1 } from "mongodb-rag-core/ingest"; /** Loads an rST docs site from a GitHub repo. diff --git a/packages/ingest-mongodb-public/src/sources/index.ts b/packages/ingest-mongodb-public/src/sources/index.ts index 66070ae8f..65c7c8513 100644 --- a/packages/ingest-mongodb-public/src/sources/index.ts +++ b/packages/ingest-mongodb-public/src/sources/index.ts @@ -1,20 +1,22 @@ import { strict as assert } from "assert"; -import { Page, extractFrontMatter } from "mongodb-rag-core"; import { DataSource, - makeDevCenterDataSource, - DevCenterProjectConfig, - makeGitDataSource, - HandleHtmlPageFuncOptions, - handleHtmlDocument, MakeMdOnGithubDataSourceParams, + Page, + extractFrontMatter, + makeGitDataSource, makeMdOnGithubDataSource, removeMarkdownImagesAndLinks, +} from "mongodb-rag-core"; +import { + makeDevCenterDataSource, + DevCenterProjectConfig, +} from "./DevCenterDataSource"; +import { MakeMongoDbUniversityDataSourceParams, makeMongoDbUniversityDataSource, filterOnlyPublicActiveTiCatalogItems, -} from "mongodb-rag-ingest/sources"; -import { prepareSnootySources } from "mongodb-rag-ingest/sources/snooty"; +} from "./mongodb-university"; import { prismaSourceConstructor } from "./prisma"; import { wiredTigerSourceConstructor } from "./wiredTiger"; import { mongooseSourceConstructor } from "./mongoose"; diff --git a/packages/mongodb-rag-core/src/ingest/sources/mongodb-university/MongoDbUniversityDataApiClient.test.ts b/packages/ingest-mongodb-public/src/sources/mongodb-university/MongoDbUniversityDataApiClient.test.ts similarity index 100% rename from packages/mongodb-rag-core/src/ingest/sources/mongodb-university/MongoDbUniversityDataApiClient.test.ts rename to packages/ingest-mongodb-public/src/sources/mongodb-university/MongoDbUniversityDataApiClient.test.ts diff --git a/packages/mongodb-rag-core/src/ingest/sources/mongodb-university/MongoDbUniversityDataApiClient.ts b/packages/ingest-mongodb-public/src/sources/mongodb-university/MongoDbUniversityDataApiClient.ts similarity index 100% rename from packages/mongodb-rag-core/src/ingest/sources/mongodb-university/MongoDbUniversityDataApiClient.ts rename to packages/ingest-mongodb-public/src/sources/mongodb-university/MongoDbUniversityDataApiClient.ts diff --git a/packages/mongodb-rag-core/src/ingest/sources/mongodb-university/MongoDbUniversityDataSource.test.ts b/packages/ingest-mongodb-public/src/sources/mongodb-university/MongoDbUniversityDataSource.test.ts similarity index 100% rename from packages/mongodb-rag-core/src/ingest/sources/mongodb-university/MongoDbUniversityDataSource.test.ts rename to packages/ingest-mongodb-public/src/sources/mongodb-university/MongoDbUniversityDataSource.test.ts diff --git a/packages/mongodb-rag-core/src/ingest/sources/mongodb-university/MongoDbUniversityDataSource.ts b/packages/ingest-mongodb-public/src/sources/mongodb-university/MongoDbUniversityDataSource.ts similarity index 96% rename from packages/mongodb-rag-core/src/ingest/sources/mongodb-university/MongoDbUniversityDataSource.ts rename to packages/ingest-mongodb-public/src/sources/mongodb-university/MongoDbUniversityDataSource.ts index 9b4b8def7..2d25d3a5d 100644 --- a/packages/mongodb-rag-core/src/ingest/sources/mongodb-university/MongoDbUniversityDataSource.ts +++ b/packages/ingest-mongodb-public/src/sources/mongodb-university/MongoDbUniversityDataSource.ts @@ -1,5 +1,4 @@ -import { PageMetadata } from "../../../Page"; -import { DataSource } from "../DataSource"; +import { PageMetadata, DataSource } from "mongodb-rag-core"; import { makeUniversityPages } from "./makeUniversityPages"; import { TiCatalogItem, diff --git a/packages/mongodb-rag-core/src/ingest/sources/mongodb-university/index.ts b/packages/ingest-mongodb-public/src/sources/mongodb-university/index.ts similarity index 100% rename from packages/mongodb-rag-core/src/ingest/sources/mongodb-university/index.ts rename to packages/ingest-mongodb-public/src/sources/mongodb-university/index.ts diff --git a/packages/mongodb-rag-core/src/ingest/sources/mongodb-university/makeUniversityPages.test.ts b/packages/ingest-mongodb-public/src/sources/mongodb-university/makeUniversityPages.test.ts similarity index 100% rename from packages/mongodb-rag-core/src/ingest/sources/mongodb-university/makeUniversityPages.test.ts rename to packages/ingest-mongodb-public/src/sources/mongodb-university/makeUniversityPages.test.ts diff --git a/packages/mongodb-rag-core/src/ingest/sources/mongodb-university/makeUniversityPages.ts b/packages/ingest-mongodb-public/src/sources/mongodb-university/makeUniversityPages.ts similarity index 98% rename from packages/mongodb-rag-core/src/ingest/sources/mongodb-university/makeUniversityPages.ts rename to packages/ingest-mongodb-public/src/sources/mongodb-university/makeUniversityPages.ts index da915c333..be4d55fe1 100644 --- a/packages/mongodb-rag-core/src/ingest/sources/mongodb-university/makeUniversityPages.ts +++ b/packages/ingest-mongodb-public/src/sources/mongodb-university/makeUniversityPages.ts @@ -1,4 +1,4 @@ -import { PageMetadata, Page } from "../../../Page"; +import { PageMetadata, Page } from "mongodb-rag-core"; import { TiCatalogItem, UniversityVideo, diff --git a/packages/mongodb-rag-core/src/ingest/sources/snooty/SnootyDataSource.test.ts b/packages/ingest-mongodb-public/src/sources/snooty/SnootyDataSource.test.ts similarity index 100% rename from packages/mongodb-rag-core/src/ingest/sources/snooty/SnootyDataSource.test.ts rename to packages/ingest-mongodb-public/src/sources/snooty/SnootyDataSource.test.ts diff --git a/packages/mongodb-rag-core/src/ingest/sources/snooty/SnootyDataSource.ts b/packages/ingest-mongodb-public/src/sources/snooty/SnootyDataSource.ts similarity index 93% rename from packages/mongodb-rag-core/src/ingest/sources/snooty/SnootyDataSource.ts rename to packages/ingest-mongodb-public/src/sources/snooty/SnootyDataSource.ts index 2e854a4cf..e7c419cdf 100644 --- a/packages/mongodb-rag-core/src/ingest/sources/snooty/SnootyDataSource.ts +++ b/packages/ingest-mongodb-public/src/sources/snooty/SnootyDataSource.ts @@ -1,15 +1,15 @@ import { createInterface } from "readline"; import fetch from "node-fetch"; -import { DataSource } from "../DataSource"; +import { DataSource } from "mongodb-rag-core/ingest"; import { snootyAstToMd, getTitleFromSnootyAst } from "./snootyAstToMd"; -import { ProjectBase } from "../ProjectBase"; +import { ProjectBase } from "mongodb-rag-core/ingest"; import { getTitleFromSnootyOpenApiSpecAst, snootyAstToOpenApiSpec, } from "./snootyAstToOpenApiSpec"; -import { Page } from "../../../Page"; -import { PageFormat } from "../../../PageFormat"; -import { logger } from "../../../services/logger"; +import { Page } from "mongodb-rag-core"; +import { PageFormat } from "mongodb-rag-core"; +import { logger } from "mongodb-rag-core"; // These types are what's in the snooty manifest jsonl file. export type SnootyManifestEntry = { @@ -28,10 +28,10 @@ export type SnootyPageEntry = SnootyManifestEntry & { /** Represents metadata in a Snooty manifest file. */ - export type SnootyMetadataEntry = SnootyManifestEntry & { - type: "metadata"; - data: {title?: string}; - }; +export type SnootyMetadataEntry = SnootyManifestEntry & { + type: "metadata"; + data: { title?: string }; +}; /** A node in the Snooty AST. @@ -65,9 +65,9 @@ export type SnootyPageData = { /** A Snooty Data API metadata object. This contains project-level information, such as the site name. */ - export type SnootyMetadata = { - title?: string; - }; +export type SnootyMetadata = { + title?: string; +}; export type SnootyProjectConfig = ProjectBase & { type: "snooty"; @@ -168,7 +168,7 @@ export const makeSnootyDataSource = ({ const stream = createInterface(body); const linePromises: Promise[] = []; const pages: Page[] = []; - let siteTitle: string | undefined = undefined + let siteTitle: string | undefined = undefined; await new Promise((resolve, reject) => { stream.on("line", async (line) => { const entry = JSON.parse(line) as SnootyManifestEntry; @@ -231,11 +231,11 @@ export const makeSnootyDataSource = ({ }); await Promise.allSettled(linePromises); // add metadata to all the pages - for(const page of pages){ + for (const page of pages) { if (!page.metadata) { - page.metadata = {} + page.metadata = {}; } - page.metadata.siteTitle = siteTitle + page.metadata.siteTitle = siteTitle; } return pages; }, diff --git a/packages/mongodb-rag-core/src/ingest/sources/snooty/SnootyProjectsInfo.test.ts b/packages/ingest-mongodb-public/src/sources/snooty/SnootyProjectsInfo.test.ts similarity index 100% rename from packages/mongodb-rag-core/src/ingest/sources/snooty/SnootyProjectsInfo.test.ts rename to packages/ingest-mongodb-public/src/sources/snooty/SnootyProjectsInfo.test.ts diff --git a/packages/mongodb-rag-core/src/ingest/sources/snooty/SnootyProjectsInfo.ts b/packages/ingest-mongodb-public/src/sources/snooty/SnootyProjectsInfo.ts similarity index 97% rename from packages/mongodb-rag-core/src/ingest/sources/snooty/SnootyProjectsInfo.ts rename to packages/ingest-mongodb-public/src/sources/snooty/SnootyProjectsInfo.ts index fc61a7f61..de0476fdd 100644 --- a/packages/mongodb-rag-core/src/ingest/sources/snooty/SnootyProjectsInfo.ts +++ b/packages/ingest-mongodb-public/src/sources/snooty/SnootyProjectsInfo.ts @@ -5,8 +5,7 @@ import { Branch, LocallySpecifiedSnootyProjectConfig, } from "./SnootyDataSource"; -import { filterFulfilled } from "../../../arrayFilters"; -import { logger } from "../../../services/logger"; +import { filterFulfilled, logger } from "mongodb-rag-core"; /** Schema for API response from https://snooty-data-api.mongodb.com/prod/projects */ export type GetSnootyProjectsResponse = { diff --git a/packages/mongodb-rag-core/src/ingest/sources/snooty/index.ts b/packages/ingest-mongodb-public/src/sources/snooty/index.ts similarity index 100% rename from packages/mongodb-rag-core/src/ingest/sources/snooty/index.ts rename to packages/ingest-mongodb-public/src/sources/snooty/index.ts diff --git a/packages/mongodb-rag-core/src/ingest/sources/snooty/renderSnootyTable.test.ts b/packages/ingest-mongodb-public/src/sources/snooty/renderSnootyTable.test.ts similarity index 100% rename from packages/mongodb-rag-core/src/ingest/sources/snooty/renderSnootyTable.test.ts rename to packages/ingest-mongodb-public/src/sources/snooty/renderSnootyTable.test.ts diff --git a/packages/mongodb-rag-core/src/ingest/sources/snooty/renderSnootyTable.ts b/packages/ingest-mongodb-public/src/sources/snooty/renderSnootyTable.ts similarity index 100% rename from packages/mongodb-rag-core/src/ingest/sources/snooty/renderSnootyTable.ts rename to packages/ingest-mongodb-public/src/sources/snooty/renderSnootyTable.ts diff --git a/packages/mongodb-rag-core/src/ingest/sources/snooty/rstToSnootyAst.test.ts b/packages/ingest-mongodb-public/src/sources/snooty/rstToSnootyAst.test.ts similarity index 100% rename from packages/mongodb-rag-core/src/ingest/sources/snooty/rstToSnootyAst.test.ts rename to packages/ingest-mongodb-public/src/sources/snooty/rstToSnootyAst.test.ts diff --git a/packages/mongodb-rag-core/src/ingest/sources/snooty/rstToSnootyAst.ts b/packages/ingest-mongodb-public/src/sources/snooty/rstToSnootyAst.ts similarity index 100% rename from packages/mongodb-rag-core/src/ingest/sources/snooty/rstToSnootyAst.ts rename to packages/ingest-mongodb-public/src/sources/snooty/rstToSnootyAst.ts diff --git a/packages/mongodb-rag-core/src/ingest/sources/snooty/snootyAstToMd.test.ts b/packages/ingest-mongodb-public/src/sources/snooty/snootyAstToMd.test.ts similarity index 100% rename from packages/mongodb-rag-core/src/ingest/sources/snooty/snootyAstToMd.test.ts rename to packages/ingest-mongodb-public/src/sources/snooty/snootyAstToMd.test.ts diff --git a/packages/mongodb-rag-core/src/ingest/sources/snooty/snootyAstToMd.ts b/packages/ingest-mongodb-public/src/sources/snooty/snootyAstToMd.ts similarity index 100% rename from packages/mongodb-rag-core/src/ingest/sources/snooty/snootyAstToMd.ts rename to packages/ingest-mongodb-public/src/sources/snooty/snootyAstToMd.ts diff --git a/packages/mongodb-rag-core/src/ingest/sources/snooty/snootyAstToOpenApiSpec.test.ts b/packages/ingest-mongodb-public/src/sources/snooty/snootyAstToOpenApiSpec.test.ts similarity index 100% rename from packages/mongodb-rag-core/src/ingest/sources/snooty/snootyAstToOpenApiSpec.test.ts rename to packages/ingest-mongodb-public/src/sources/snooty/snootyAstToOpenApiSpec.test.ts diff --git a/packages/mongodb-rag-core/src/ingest/sources/snooty/snootyAstToOpenApiSpec.ts b/packages/ingest-mongodb-public/src/sources/snooty/snootyAstToOpenApiSpec.ts similarity index 100% rename from packages/mongodb-rag-core/src/ingest/sources/snooty/snootyAstToOpenApiSpec.ts rename to packages/ingest-mongodb-public/src/sources/snooty/snootyAstToOpenApiSpec.ts diff --git a/packages/mongodb-rag-core/package.json b/packages/mongodb-rag-core/package.json index eee5d1693..88d079a17 100644 --- a/packages/mongodb-rag-core/package.json +++ b/packages/mongodb-rag-core/package.json @@ -24,13 +24,13 @@ "build", "README.md" ], + "main": "./build/index.js", "exports": { ".": "./build/index.js", "./ingest": "./build/ingest/index.js", "./ingest/embed": "./build/embed/index.js", "./ingest/pages": "./build/pages/index.js", - "./ingest/sources": "./build/sources/index.js", - "./ingest/sources/snooty": "./build/sources/snooty/index.js" + "./ingest/sources": "./build/sources/index.js" }, "scripts": { "clean": "rm -rf build", diff --git a/packages/mongodb-rag-core/src/ingest/embed/chunkOpenApiSpecYaml.ts b/packages/mongodb-rag-core/src/ingest/embed/chunkOpenApiSpecYaml.ts index 591030de4..65da97898 100644 --- a/packages/mongodb-rag-core/src/ingest/embed/chunkOpenApiSpecYaml.ts +++ b/packages/mongodb-rag-core/src/ingest/embed/chunkOpenApiSpecYaml.ts @@ -9,8 +9,8 @@ import { SomeTokenizer, } from "./chunkPage"; import { Page } from "../../Page"; -import { logger } from "../../services/logger"; import { updateFrontMatter } from "../../updateFrontMatter"; +import { logger } from "../../logger"; export const defaultOpenApiSpecYamlChunkOptions: ChunkOptions = { maxChunkSize: 1250, diff --git a/packages/mongodb-rag-core/src/ingest/embed/updateEmbeddedContent.ts b/packages/mongodb-rag-core/src/ingest/embed/updateEmbeddedContent.ts index 512e0e6bc..0f43d0364 100644 --- a/packages/mongodb-rag-core/src/ingest/embed/updateEmbeddedContent.ts +++ b/packages/mongodb-rag-core/src/ingest/embed/updateEmbeddedContent.ts @@ -3,7 +3,7 @@ import { chunkPage, ChunkFunc, ChunkOptions } from "./chunkPage"; import { EmbeddedContentStore, EmbeddedContent } from "../../EmbeddedContent"; import { Embedder } from "../../Embedder"; import { PageStore, PersistedPage } from "../../Page"; -import { logger } from "../../services/logger"; +import { logger } from "../../logger"; /** (Re-)embeddedContent the pages in the page store that have changed since the given date diff --git a/packages/mongodb-rag-core/src/ingest/pages/getChangedPages.ts b/packages/mongodb-rag-core/src/ingest/pages/getChangedPages.ts index 9defde510..eaac2a17d 100644 --- a/packages/mongodb-rag-core/src/ingest/pages/getChangedPages.ts +++ b/packages/mongodb-rag-core/src/ingest/pages/getChangedPages.ts @@ -1,6 +1,6 @@ import deepEqual from "deep-equal"; import { PersistedPage, Page } from "../../Page"; -import { logger } from "../../services/logger"; +import { logger } from "../../logger"; /** Given sets of old and new pages, returns the pages that need to be created, diff --git a/packages/mongodb-rag-core/src/ingest/pages/updatePages.ts b/packages/mongodb-rag-core/src/ingest/pages/updatePages.ts index 884a406cc..8fc8334ca 100644 --- a/packages/mongodb-rag-core/src/ingest/pages/updatePages.ts +++ b/packages/mongodb-rag-core/src/ingest/pages/updatePages.ts @@ -1,7 +1,7 @@ import { getChangedPages } from "./getChangedPages"; import { DataSource } from "../sources/DataSource"; import { PageStore, Page } from "../../Page"; -import { logger } from "../../services/logger"; +import { logger } from "../../logger"; /** Fetches pages from data sources and stores those that have changed in the data diff --git a/packages/mongodb-rag-core/src/ingest/sources/AcquitRequireMdOnGithubDataSource.ts b/packages/mongodb-rag-core/src/ingest/sources/AcquitRequireMdOnGithubDataSource.ts index 7a3c209c1..243723443 100644 --- a/packages/mongodb-rag-core/src/ingest/sources/AcquitRequireMdOnGithubDataSource.ts +++ b/packages/mongodb-rag-core/src/ingest/sources/AcquitRequireMdOnGithubDataSource.ts @@ -8,7 +8,7 @@ import acquit from "acquit"; import { removeMarkdownImagesAndLinks } from "./removeMarkdownImagesAndLinks"; import { extractMarkdownH1 } from "./extractMarkdownH1"; import { PageMetadata, Page } from "../../Page"; -import { logger } from "../../services/logger"; +import { logger } from "../../logger"; /** Loads an MD/Acquit docs site from a GitHub repo. diff --git a/packages/mongodb-rag-core/src/ingest/sources/GitDataSource.ts b/packages/mongodb-rag-core/src/ingest/sources/GitDataSource.ts index f285f732c..c5918959c 100644 --- a/packages/mongodb-rag-core/src/ingest/sources/GitDataSource.ts +++ b/packages/mongodb-rag-core/src/ingest/sources/GitDataSource.ts @@ -6,7 +6,7 @@ import { rimrafSync } from "rimraf"; import { DataSource } from "./DataSource"; import { filterDefined, filterFulfilled } from "../../arrayFilters"; import { Page, PageMetadata } from "../../Page"; -import { logger } from "../../services/logger"; +import { logger } from "../../logger"; /** Function to convert a file in the repo into a `Page` or `Page[]`. diff --git a/packages/mongodb-rag-core/src/ingest/sources/handleHtmlDocument.ts b/packages/mongodb-rag-core/src/ingest/sources/handleHtmlDocument.ts index f87f3c281..754aa8774 100644 --- a/packages/mongodb-rag-core/src/ingest/sources/handleHtmlDocument.ts +++ b/packages/mongodb-rag-core/src/ingest/sources/handleHtmlDocument.ts @@ -1,7 +1,7 @@ import TurndownService from "turndown"; import * as turndownPluginGfm from "turndown-plugin-gfm"; import { JSDOM } from "jsdom"; -import { logger } from "../../services/logger"; +import { logger } from "../../logger"; import { PageMetadata, Page } from "../../Page"; export type HandleHtmlPageFuncOptions = { /** Returns an array of DOM elements to be removed from the parsed document. */ diff --git a/packages/mongodb-rag-core/src/ingest/sources/index.ts b/packages/mongodb-rag-core/src/ingest/sources/index.ts index 7c43d036c..651b0f4d8 100644 --- a/packages/mongodb-rag-core/src/ingest/sources/index.ts +++ b/packages/mongodb-rag-core/src/ingest/sources/index.ts @@ -1,14 +1,11 @@ export * from "./AcquitRequireMdOnGithubDataSource"; export * from "./DataSource"; -export * from "./DevCenterDataSource"; +export * from "./extractMarkdownH1"; export * from "./GitDataSource"; export * from "./GitHubDataSource"; export * from "./MdOnGithubDataSource"; export * from "./ProjectBase"; -export * from "./RstOnGitHubDataSource"; export * from "./handleHtmlDocument"; export * from "./removeMarkdownImagesAndLinks"; -export * from "./snooty"; -export * from "./mongodb-university"; export * from "./LangchainDocumentLoaderDataSource"; export * from "./CodeOnGithubTextDataSource";