From 54f80987cb4b1124deb894d8cd11f29e9ba3193b Mon Sep 17 00:00:00 2001 From: "a.e." <49438478+I-Info@users.noreply.github.com> Date: Tue, 1 Apr 2025 13:19:11 +0800 Subject: [PATCH 1/8] perf: introduce BullMQ for website sync (#4403) * perf: introduce BullMQ for website sync * feat: new redis module * fix: remove graceful shutdown * perf: improve UI in dataset detail - Updated the "change" icon SVG file. - Modified i18n strings. - Added new i18n string "immediate_sync". - Improved UI in dataset detail page, including button icons and background colors. * refactor: Add chunkSettings to DatasetSchema --- deploy/docker/docker-compose-milvus.yml | 14 + deploy/docker/docker-compose-pgvector.yml | 15 + deploy/docker/docker-compose-zilliz.yml | 15 + packages/global/core/dataset/api.d.ts | 3 +- packages/global/core/dataset/constants.ts | 6 +- packages/global/core/dataset/type.d.ts | 19 +- packages/service/common/bullmq/index.ts | 86 +++ packages/service/common/bullmq/type.d.ts | 7 + packages/service/common/redis/index.ts | 13 + .../core/dataset/collection/controller.ts | 6 +- .../service/core/dataset/collection/schema.ts | 31 +- packages/service/core/dataset/controller.ts | 15 +- packages/service/core/dataset/schema.ts | 37 +- packages/service/core/dataset/websiteSync.ts | 68 ++ packages/service/package.json | 2 + .../components/common/Icon/icons/change.svg | 6 +- packages/web/i18n/en/common.json | 3 +- packages/web/i18n/en/dataset.json | 5 +- packages/web/i18n/zh-CN/common.json | 5 +- packages/web/i18n/zh-CN/dataset.json | 5 +- packages/web/i18n/zh-Hant/common.json | 3 +- packages/web/i18n/zh-Hant/dataset.json | 5 +- pnpm-lock.yaml | 169 ++++- projects/app/.env.template | 5 +- projects/app/src/instrumentation.ts | 2 +- .../dataset/detail/CollectionCard/Context.tsx | 44 +- .../CollectionCard/EmptyCollectionTip.tsx | 3 + .../dataset/detail/CollectionCard/Header.tsx | 71 +- .../detail/CollectionCard/WebsiteConfig.tsx | 609 ++++++++++++++++-- .../dataset/detail/CollectionCard/index.tsx | 4 +- .../app/src/pages/api/core/dataset/detail.ts | 17 + .../app/src/pages/api/core/dataset/update.ts | 96 ++- .../app/src/web/core/dataset/constants.ts | 1 - .../pages/api/core/dataset/paths.test.ts | 1 + 34 files changed, 1197 insertions(+), 194 deletions(-) create mode 100644 packages/service/common/bullmq/index.ts create mode 100644 packages/service/common/bullmq/type.d.ts create mode 100644 packages/service/common/redis/index.ts create mode 100644 packages/service/core/dataset/websiteSync.ts diff --git a/deploy/docker/docker-compose-milvus.yml b/deploy/docker/docker-compose-milvus.yml index f875d61ec05c..3b5fb4b57c02 100644 --- a/deploy/docker/docker-compose-milvus.yml +++ b/deploy/docker/docker-compose-milvus.yml @@ -110,6 +110,18 @@ services: # 等待docker-entrypoint.sh脚本执行的MongoDB服务进程 wait $$! + redis: + image: redis:7.2-alpine + container_name: redis + # ports: + # - 6379:6379 + networks: + - fastgpt + restart: always + command: | + redis-server --requirepass mypassword --loglevel warning --maxclients 10000 --appendonly yes --save 60 10 --maxmemory 1gb --maxmemory-policy noeviction + volumes: + - ./redis/data:/data # fastgpt sandbox: @@ -157,6 +169,8 @@ services: # zilliz 连接参数 - MILVUS_ADDRESS=http://milvusStandalone:19530 - MILVUS_TOKEN=none + # Redis 地址 + - REDIS_URL=redis://mypassword@redis:6379 # sandbox 地址 - SANDBOX_URL=http://sandbox:3000 # 日志等级: debug, info, warn, error diff --git a/deploy/docker/docker-compose-pgvector.yml b/deploy/docker/docker-compose-pgvector.yml index 0a21d9c29f79..ee5bd9b227de 100644 --- a/deploy/docker/docker-compose-pgvector.yml +++ b/deploy/docker/docker-compose-pgvector.yml @@ -69,6 +69,19 @@ services: # 等待docker-entrypoint.sh脚本执行的MongoDB服务进程 wait $$! + redis: + image: redis:7.2-alpine + container_name: redis + # ports: + # - 6379:6379 + networks: + - fastgpt + restart: always + command: | + redis-server --requirepass mypassword --loglevel warning --maxclients 10000 --appendonly yes --save 60 10 --maxmemory 1gb --maxmemory-policy noeviction + volumes: + - ./redis/data:/data + # fastgpt sandbox: container_name: sandbox @@ -114,6 +127,8 @@ services: - MONGODB_URI=mongodb://myusername:mypassword@mongo:27017/fastgpt?authSource=admin # pg 连接参数 - PG_URL=postgresql://username:password@pg:5432/postgres + # Redis 连接参数 + - REDIS_URL=redis://mypassword@redis:6379 # sandbox 地址 - SANDBOX_URL=http://sandbox:3000 # 日志等级: debug, info, warn, error diff --git a/deploy/docker/docker-compose-zilliz.yml b/deploy/docker/docker-compose-zilliz.yml index 8e2c0bb7a48e..3c0d48d34d63 100644 --- a/deploy/docker/docker-compose-zilliz.yml +++ b/deploy/docker/docker-compose-zilliz.yml @@ -51,6 +51,19 @@ services: # 等待docker-entrypoint.sh脚本执行的MongoDB服务进程 wait $$! + redis: + image: redis:7.2-alpine + container_name: redis + # ports: + # - 6379:6379 + networks: + - fastgpt + restart: always + command: | + redis-server --requirepass mypassword --loglevel warning --maxclients 10000 --appendonly yes --save 60 10 --maxmemory 1gb --maxmemory-policy noeviction + volumes: + - ./redis/data:/data + sandbox: container_name: sandbox image: ghcr.io/labring/fastgpt-sandbox:v4.9.3 # git @@ -92,6 +105,8 @@ services: - FILE_TOKEN_KEY=filetoken # MongoDB 连接参数. 用户名myusername,密码mypassword。 - MONGODB_URI=mongodb://myusername:mypassword@mongo:27017/fastgpt?authSource=admin + # Redis 连接参数 + - REDIS_URI=redis://mypassword@redis:6379 # zilliz 连接参数 - MILVUS_ADDRESS=zilliz_cloud_address - MILVUS_TOKEN=zilliz_cloud_token diff --git a/packages/global/core/dataset/api.d.ts b/packages/global/core/dataset/api.d.ts index 40e696b2dd65..abb5db927807 100644 --- a/packages/global/core/dataset/api.d.ts +++ b/packages/global/core/dataset/api.d.ts @@ -15,7 +15,6 @@ export type DatasetUpdateBody = { name?: string; avatar?: string; intro?: string; - status?: DatasetSchemaType['status']; agentModel?: string; vlmModel?: string; @@ -26,6 +25,7 @@ export type DatasetUpdateBody = { apiServer?: DatasetSchemaType['apiServer']; yuqueServer?: DatasetSchemaType['yuqueServer']; feishuServer?: DatasetSchemaType['feishuServer']; + chunkSettings?: DatasetSchemaType['chunkSettings']; // sync schedule autoSync?: boolean; @@ -141,7 +141,6 @@ export type PushDatasetDataChunkProps = { export type PostWebsiteSyncParams = { datasetId: string; - billId: string; }; export type PushDatasetDataProps = { diff --git a/packages/global/core/dataset/constants.ts b/packages/global/core/dataset/constants.ts index 627129835f26..8631b65b8926 100644 --- a/packages/global/core/dataset/constants.ts +++ b/packages/global/core/dataset/constants.ts @@ -50,7 +50,8 @@ export const DatasetTypeMap = { export enum DatasetStatusEnum { active = 'active', - syncing = 'syncing' + syncing = 'syncing', + waiting = 'waiting' } export const DatasetStatusMap = { [DatasetStatusEnum.active]: { @@ -58,6 +59,9 @@ export const DatasetStatusMap = { }, [DatasetStatusEnum.syncing]: { label: i18nT('common:core.dataset.status.syncing') + }, + [DatasetStatusEnum.waiting]: { + label: i18nT('common:core.dataset.status.waiting') } }; diff --git a/packages/global/core/dataset/type.d.ts b/packages/global/core/dataset/type.d.ts index 67bde78fbd39..735c449527b4 100644 --- a/packages/global/core/dataset/type.d.ts +++ b/packages/global/core/dataset/type.d.ts @@ -17,6 +17,20 @@ import { SourceMemberType } from 'support/user/type'; import { DatasetDataIndexTypeEnum } from './data/constants'; import { ChunkSettingModeEnum } from './constants'; +export type ChunkSettingsType = { + trainingType: DatasetCollectionDataProcessModeEnum; + autoIndexes?: boolean; + imageIndex?: boolean; + + chunkSettingMode?: ChunkSettingModeEnum; + chunkSplitMode?: DataChunkSplitModeEnum; + + chunkSize?: number; + indexSize?: number; + chunkSplitter?: string; + qaPrompt?: string; +}; + export type DatasetSchemaType = { _id: string; parentId?: string; @@ -29,7 +43,6 @@ export type DatasetSchemaType = { name: string; intro: string; type: `${DatasetTypeEnum}`; - status: `${DatasetStatusEnum}`; vectorModel: string; agentModel: string; @@ -39,6 +52,9 @@ export type DatasetSchemaType = { url: string; selector: string; }; + + chunkSettings?: ChunkSettingsType; + inheritPermission: boolean; apiServer?: APIFileServer; feishuServer?: FeishuServer; @@ -193,6 +209,7 @@ export type DatasetListItemType = { }; export type DatasetItemType = Omit & { + status: `${DatasetStatusEnum}`; vectorModel: EmbeddingModelItemType; agentModel: LLMModelItemType; vlmModel?: LLMModelItemType; diff --git a/packages/service/common/bullmq/index.ts b/packages/service/common/bullmq/index.ts new file mode 100644 index 000000000000..c459434702d1 --- /dev/null +++ b/packages/service/common/bullmq/index.ts @@ -0,0 +1,86 @@ +import { ConnectionOptions, Processor, Queue, QueueOptions, Worker, WorkerOptions } from 'bullmq'; +import { addLog } from '../system/log'; +import { newQueueRedisConnection, newWorkerRedisConnection } from '../redis'; + +const defaultWorkerOpts: Omit = { + removeOnComplete: { + count: 0 // Delete jobs immediately on completion + }, + removeOnFail: { + count: 0 // Delete jobs immediately on failure + } +}; + +export const FinishedStates = ['completed', 'failed'] as const; + +export enum QueueNames { + websiteSync = 'websiteSync' +} + +export const queues = (() => { + if (!global.queues) { + global.queues = new Map(); + } + return global.queues; +})(); +export const workers = (() => { + if (!global.workers) { + global.workers = new Map(); + } + return global.workers; +})(); + +export function getQueue( + name: QueueNames, + opts?: Omit +): Queue { + // check if global.queues has the queue + const queue = queues.get(name); + if (queue) { + return queue as Queue; + } + const newQueue = new Queue(name.toString(), { + connection: newQueueRedisConnection(), + ...opts + }); + // default error handler, to avoid unhandled exceptions + newQueue.on('error', (error) => { + addLog.error(`MQ Queue [${name}]: ${error.message}`, error); + }); + queues.set(name, newQueue); + return newQueue; +} + +export function getWorker( + name: QueueNames, + processor: Processor, + opts?: Omit +): Worker { + const worker = workers.get(name); + if (worker) { + return worker as Worker; + } + const newWorker = new Worker(name.toString(), processor, { + connection: newWorkerRedisConnection(), + ...defaultWorkerOpts, + ...opts + }); + // default error handler, to avoid unhandled exceptions + newWorker.on('error', (error) => { + addLog.error(`MQ Worker [${name}]: ${error.message}`, error); + }); + workers.set(name, newWorker); + return newWorker; +} + +export function getAllQueues() { + return [...queues.values()]; +} + +export function getAllWorkers() { + return [...workers.values()]; +} + +export async function closeAllWorkers() { + return Promise.all(workers.values().map((worker) => worker.close())); +} diff --git a/packages/service/common/bullmq/type.d.ts b/packages/service/common/bullmq/type.d.ts new file mode 100644 index 000000000000..723675b27b28 --- /dev/null +++ b/packages/service/common/bullmq/type.d.ts @@ -0,0 +1,7 @@ +import { Queue, Worker } from 'bullmq'; +import { QueueNames } from './index'; + +declare global { + var queues: Map | undefined; + var workers: Map | undefined; +} diff --git a/packages/service/common/redis/index.ts b/packages/service/common/redis/index.ts new file mode 100644 index 000000000000..51560901c167 --- /dev/null +++ b/packages/service/common/redis/index.ts @@ -0,0 +1,13 @@ +import Redis from 'ioredis'; + +const REDIS_URL = process.env.REDIS_URL ?? 'redis://localhost:6379'; + +export function newQueueRedisConnection() { + return new Redis(REDIS_URL); +} + +export function newWorkerRedisConnection() { + return new Redis(REDIS_URL, { + maxRetriesPerRequest: null + }); +} diff --git a/packages/service/core/dataset/collection/controller.ts b/packages/service/core/dataset/collection/controller.ts index 44e5d07dac84..14c1c0bcdc2c 100644 --- a/packages/service/core/dataset/collection/controller.ts +++ b/packages/service/core/dataset/collection/controller.ts @@ -1,6 +1,7 @@ import { DatasetCollectionTypeEnum, - DatasetCollectionDataProcessModeEnum + DatasetCollectionDataProcessModeEnum, + DatasetTypeEnum } from '@fastgpt/global/core/dataset/constants'; import type { CreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api.d'; import { MongoDatasetCollection } from './schema'; @@ -104,7 +105,8 @@ export const createCollectionAndInsertData = async ({ hashRawText: hashStr(rawText), rawTextLength: rawText.length, nextSyncTime: (() => { - if (!dataset.autoSync) return undefined; + // ignore auto collections sync for website datasets + if (!dataset.autoSync && dataset.type === DatasetTypeEnum.websiteDataset) return undefined; if ( [DatasetCollectionTypeEnum.link, DatasetCollectionTypeEnum.apiFile].includes( createCollectionParams.type diff --git a/packages/service/core/dataset/collection/schema.ts b/packages/service/core/dataset/collection/schema.ts index 9522c69f2507..1b1ceb9132ca 100644 --- a/packages/service/core/dataset/collection/schema.ts +++ b/packages/service/core/dataset/collection/schema.ts @@ -1,13 +1,8 @@ import { connectionMongo, getMongoModel } from '../../../common/mongo'; -const { Schema, model, models } = connectionMongo; +const { Schema } = connectionMongo; import { DatasetCollectionSchemaType } from '@fastgpt/global/core/dataset/type.d'; -import { - DatasetCollectionTypeMap, - DatasetCollectionDataProcessModeEnum, - ChunkSettingModeEnum, - DataChunkSplitModeEnum -} from '@fastgpt/global/core/dataset/constants'; -import { DatasetCollectionName } from '../schema'; +import { DatasetCollectionTypeMap } from '@fastgpt/global/core/dataset/constants'; +import { ChunkSettings, DatasetCollectionName } from '../schema'; import { TeamCollectionName, TeamMemberCollectionName @@ -90,25 +85,7 @@ const DatasetCollectionSchema = new Schema({ customPdfParse: Boolean, // Chunk settings - imageIndex: Boolean, - autoIndexes: Boolean, - trainingType: { - type: String, - enum: Object.values(DatasetCollectionDataProcessModeEnum) - }, - chunkSettingMode: { - type: String, - enum: Object.values(ChunkSettingModeEnum) - }, - chunkSplitMode: { - type: String, - enum: Object.values(DataChunkSplitModeEnum) - }, - chunkSize: Number, - chunkSplitter: String, - - indexSize: Number, - qaPrompt: String + ...ChunkSettings }); DatasetCollectionSchema.virtual('dataset', { diff --git a/packages/service/core/dataset/controller.ts b/packages/service/core/dataset/controller.ts index 06be050a9b37..e03bec334820 100644 --- a/packages/service/core/dataset/controller.ts +++ b/packages/service/core/dataset/controller.ts @@ -9,6 +9,8 @@ import { deleteDatasetDataVector } from '../../common/vectorStore/controller'; import { MongoDatasetDataText } from './data/dataTextSchema'; import { DatasetErrEnum } from '@fastgpt/global/common/error/code/dataset'; import { retryFn } from '@fastgpt/global/common/system/utils'; +import { removeWebsiteSyncJobScheduler } from './websiteSync'; +import { DatasetTypeEnum } from '@fastgpt/global/core/dataset/constants'; /* ============= dataset ========== */ /* find all datasetId by top datasetId */ @@ -88,6 +90,15 @@ export async function delDatasetRelevantData({ '_id teamId datasetId fileId metadata' ).lean(); + const removeJobScheduler = async () => { + await Promise.all( + datasets.map((dataset) => { + if (dataset.type === DatasetTypeEnum.websiteDataset) + return removeWebsiteSyncJobScheduler(String(dataset._id)); + }) + ); + }; + await retryFn(async () => { await Promise.all([ // delete training data @@ -105,7 +116,9 @@ export async function delDatasetRelevantData({ // Delete Image and file delCollectionRelatedSource({ collections }), // Delete vector data - deleteDatasetDataVector({ teamId, datasetIds }) + deleteDatasetDataVector({ teamId, datasetIds }), + // Remove job scheduler + removeJobScheduler() ]); }); diff --git a/packages/service/core/dataset/schema.ts b/packages/service/core/dataset/schema.ts index 22f79fd2553a..b6234b810ead 100644 --- a/packages/service/core/dataset/schema.ts +++ b/packages/service/core/dataset/schema.ts @@ -1,7 +1,8 @@ import { getMongoModel, Schema } from '../../common/mongo'; import { - DatasetStatusEnum, - DatasetStatusMap, + ChunkSettingModeEnum, + DataChunkSplitModeEnum, + DatasetCollectionDataProcessModeEnum, DatasetTypeEnum, DatasetTypeMap } from '@fastgpt/global/core/dataset/constants'; @@ -13,6 +14,28 @@ import type { DatasetSchemaType } from '@fastgpt/global/core/dataset/type.d'; export const DatasetCollectionName = 'datasets'; +export const ChunkSettings = { + imageIndex: Boolean, + autoIndexes: Boolean, + trainingType: { + type: String, + enum: Object.values(DatasetCollectionDataProcessModeEnum) + }, + chunkSettingMode: { + type: String, + enum: Object.values(ChunkSettingModeEnum) + }, + chunkSplitMode: { + type: String, + enum: Object.values(DataChunkSplitModeEnum) + }, + chunkSize: Number, + chunkSplitter: String, + + indexSize: Number, + qaPrompt: String +}; + const DatasetSchema = new Schema({ parentId: { type: Schema.Types.ObjectId, @@ -40,11 +63,6 @@ const DatasetSchema = new Schema({ required: true, default: DatasetTypeEnum.dataset }, - status: { - type: String, - enum: Object.keys(DatasetStatusMap), - default: DatasetStatusEnum.active - }, avatar: { type: String, default: '/icon/logo.svg' @@ -84,6 +102,9 @@ const DatasetSchema = new Schema({ } } }, + chunkSettings: { + type: ChunkSettings + }, inheritPermission: { type: Boolean, default: true @@ -98,8 +119,6 @@ const DatasetSchema = new Schema({ type: Object }, - autoSync: Boolean, - // abandoned externalReadUrl: { type: String diff --git a/packages/service/core/dataset/websiteSync.ts b/packages/service/core/dataset/websiteSync.ts new file mode 100644 index 000000000000..8013a05835da --- /dev/null +++ b/packages/service/core/dataset/websiteSync.ts @@ -0,0 +1,68 @@ +import { Processor } from 'bullmq'; +import { getQueue, getWorker, QueueNames } from '../../common/bullmq'; + +export type WebsiteSyncJobData = { + datasetId: string; +}; +export type WebsiteSyncJobReturn = void; + +export const websiteSyncQueue = getQueue( + QueueNames.websiteSync, + { + defaultJobOptions: { + attempts: 3, // retry 3 times + backoff: { + type: 'exponential', + delay: 1000 // delay 1 second between retries + } + } + } +); + +export function getWebsiteSyncWorker( + processor: Processor +) { + return getWorker(QueueNames.websiteSync, processor, { + removeOnFail: { + age: 15 * 24 * 3600, // Keep up to 24 hours + count: 1000 // Keep up to 8000 jobs + } + }); +} + +export async function addWebsiteSyncJob(data: WebsiteSyncJobData) { + const datasetId = String(data.datasetId); + return websiteSyncQueue.add(datasetId, data, { deduplication: { id: datasetId } }); +} + +export async function getCurrentWebsiteSyncJob(datasetId: string) { + const jobId = await websiteSyncQueue.getDeduplicationJobId(datasetId); + if (!jobId) { + return undefined; + } + return websiteSyncQueue.getJob(jobId); +} + +const repeatDuration = 86400000; // every day +export async function upsertWebsiteSyncJobScheduler(data: WebsiteSyncJobData) { + const datasetId = String(data.datasetId); + return websiteSyncQueue.upsertJobScheduler( + datasetId, + { + every: repeatDuration, + startDate: new Date().getTime() + repeatDuration // start tomorrow + }, + { + name: datasetId, + data + } + ); +} + +export async function getWebsiteSyncJobScheduler(datasetId: string) { + return websiteSyncQueue.getJobScheduler(datasetId); +} + +export async function removeWebsiteSyncJobScheduler(datasetId: string) { + return websiteSyncQueue.removeJobScheduler(datasetId); +} diff --git a/packages/service/package.json b/packages/service/package.json index 8bf1784edcc1..622a8a9cdc92 100644 --- a/packages/service/package.json +++ b/packages/service/package.json @@ -7,6 +7,7 @@ "@xmldom/xmldom": "^0.8.10", "@zilliz/milvus2-sdk-node": "2.4.2", "axios": "^1.8.2", + "bullmq": "^5.44.0", "chalk": "^5.3.0", "cheerio": "1.0.0-rc.12", "cookie": "^0.7.1", @@ -18,6 +19,7 @@ "file-type": "^19.0.0", "form-data": "^4.0.0", "iconv-lite": "^0.6.3", + "ioredis": "^5.6.0", "joplin-turndown-plugin-gfm": "^1.0.12", "json5": "^2.2.3", "jsonpath-plus": "^10.3.0", diff --git a/packages/web/components/common/Icon/icons/change.svg b/packages/web/components/common/Icon/icons/change.svg index 8ab5546eb862..546b49d8998a 100644 --- a/packages/web/components/common/Icon/icons/change.svg +++ b/packages/web/components/common/Icon/icons/change.svg @@ -1,9 +1,9 @@ - - - + + + diff --git a/packages/web/i18n/en/common.json b/packages/web/i18n/en/common.json index f729cca3e981..31673c6ef1c4 100644 --- a/packages/web/i18n/en/common.json +++ b/packages/web/i18n/en/common.json @@ -512,7 +512,7 @@ "core.dataset.Query extension intro": "Enabling the question optimization function can improve the accuracy of Dataset searches during continuous conversations. After enabling this function, when performing Dataset searches, the AI will complete the missing information of the question based on the conversation history.", "core.dataset.Quote Length": "Quote Content Length", "core.dataset.Read Dataset": "View Dataset Details", - "core.dataset.Set Website Config": "Start Configuring Website Information", + "core.dataset.Set Website Config": "Start Configuring", "core.dataset.Start export": "Export Started", "core.dataset.Table collection": "Table Dataset", "core.dataset.Text collection": "Text Dataset", @@ -630,6 +630,7 @@ "core.dataset.search.search mode": "Search Method", "core.dataset.status.active": "Ready", "core.dataset.status.syncing": "Syncing", + "core.dataset.status.waiting": "Waiting", "core.dataset.test.Batch test": "Batch Test", "core.dataset.test.Batch test Placeholder": "Select a CSV File", "core.dataset.test.Search Test": "Search Test", diff --git a/packages/web/i18n/en/dataset.json b/packages/web/i18n/en/dataset.json index 3eae9589a368..6d6c67e6ce4b 100644 --- a/packages/web/i18n/en/dataset.json +++ b/packages/web/i18n/en/dataset.json @@ -146,5 +146,8 @@ "website_dataset_desc": "Website sync allows you to build a Dataset directly using a web link.", "yuque_dataset": "Yuque Dataset", "yuque_dataset_config": "Yuque Dataset Config", - "yuque_dataset_desc": "Can build a dataset using Yuque documents by configuring permissions, without secondary storage" + "yuque_dataset_desc": "Can build a dataset using Yuque documents by configuring permissions, without secondary storage", + "website_info": "Website Information", + "params_config": "Parameters Configuration", + "immediate_sync": "Immediate Synchronization" } diff --git a/packages/web/i18n/zh-CN/common.json b/packages/web/i18n/zh-CN/common.json index 245f06b7c9e3..d3601898aa47 100644 --- a/packages/web/i18n/zh-CN/common.json +++ b/packages/web/i18n/zh-CN/common.json @@ -515,7 +515,7 @@ "core.dataset.Query extension intro": "开启问题优化功能,可以提高提高连续对话时,知识库搜索的精度。开启该功能后,在进行知识库搜索时,会根据对话记录,利用 AI 补全问题缺失的信息。", "core.dataset.Quote Length": "引用内容长度", "core.dataset.Read Dataset": "查看知识库详情", - "core.dataset.Set Website Config": "开始配置网站信息", + "core.dataset.Set Website Config": "开始配置", "core.dataset.Start export": "已开始导出", "core.dataset.Table collection": "表格数据集", "core.dataset.Text collection": "文本数据集", @@ -633,6 +633,7 @@ "core.dataset.search.search mode": "搜索方式", "core.dataset.status.active": "已就绪", "core.dataset.status.syncing": "同步中", + "core.dataset.status.waiting": "排队中", "core.dataset.test.Batch test": "批量测试", "core.dataset.test.Batch test Placeholder": "选择一个 CSV 文件", "core.dataset.test.Search Test": "搜索测试", @@ -1291,4 +1292,4 @@ "yes": "是", "yesterday": "昨天", "yesterday_detail_time": "昨天 {{time}}" -} \ No newline at end of file +} diff --git a/packages/web/i18n/zh-CN/dataset.json b/packages/web/i18n/zh-CN/dataset.json index 53a07dfdaf51..87926598a6a1 100644 --- a/packages/web/i18n/zh-CN/dataset.json +++ b/packages/web/i18n/zh-CN/dataset.json @@ -147,5 +147,8 @@ "website_dataset_desc": "Web 站点同步允许你直接使用一个网页链接构建知识库", "yuque_dataset": "语雀知识库", "yuque_dataset_config": "配置语雀知识库", - "yuque_dataset_desc": "可通过配置语雀文档权限,使用语雀文档构建知识库,文档不会进行二次存储" + "yuque_dataset_desc": "可通过配置语雀文档权限,使用语雀文档构建知识库,文档不会进行二次存储", + "website_info": "网站信息", + "params_config": "参数配置", + "immediate_sync": "立即同步" } diff --git a/packages/web/i18n/zh-Hant/common.json b/packages/web/i18n/zh-Hant/common.json index 955858e7d9bc..50679a111fd3 100644 --- a/packages/web/i18n/zh-Hant/common.json +++ b/packages/web/i18n/zh-Hant/common.json @@ -511,7 +511,7 @@ "core.dataset.Query extension intro": "開啟問題最佳化功能,可以提高連續對話時知識庫搜尋的準確度。開啟此功能後,在進行知識庫搜尋時,系統會根據對話記錄,利用 AI 補充問題中缺少的資訊。", "core.dataset.Quote Length": "引用內容長度", "core.dataset.Read Dataset": "檢視知識庫詳細資料", - "core.dataset.Set Website Config": "開始設定網站資訊", + "core.dataset.Set Website Config": "開始設定", "core.dataset.Start export": "已開始匯出", "core.dataset.Table collection": "表格資料集", "core.dataset.Text collection": "文字資料集", @@ -629,6 +629,7 @@ "core.dataset.search.search mode": "搜索方式", "core.dataset.status.active": "已就緒", "core.dataset.status.syncing": "同步中", + "core.dataset.status.waiting": "排队中", "core.dataset.test.Batch test": "批次測試", "core.dataset.test.Batch test Placeholder": "選擇一個 CSV 檔案", "core.dataset.test.Search Test": "搜尋測試", diff --git a/packages/web/i18n/zh-Hant/dataset.json b/packages/web/i18n/zh-Hant/dataset.json index 7bc6c16c2581..3bf79ac466f9 100644 --- a/packages/web/i18n/zh-Hant/dataset.json +++ b/packages/web/i18n/zh-Hant/dataset.json @@ -146,5 +146,8 @@ "website_dataset_desc": "網站同步功能讓您可以直接使用網頁連結建立資料集", "yuque_dataset": "語雀知識庫", "yuque_dataset_config": "配置語雀知識庫", - "yuque_dataset_desc": "可通過配置語雀文檔權限,使用語雀文檔構建知識庫,文檔不會進行二次存儲" + "yuque_dataset_desc": "可通過配置語雀文檔權限,使用語雀文檔構建知識庫,文檔不會進行二次存儲", + "website_info": "網站資訊", + "params_config": "參數配置", + "immediate_sync": "立即同步" } diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 9ba6886bbca0..0df4b04abfa0 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -169,6 +169,9 @@ importers: axios: specifier: ^1.8.2 version: 1.8.3 + bullmq: + specifier: ^5.44.0 + version: 5.44.0 chalk: specifier: ^5.3.0 version: 5.4.1 @@ -202,6 +205,9 @@ importers: iconv-lite: specifier: ^0.6.3 version: 0.6.3 + ioredis: + specifier: ^5.6.0 + version: 5.6.0 joplin-turndown-plugin-gfm: specifier: ^1.0.12 version: 1.0.12 @@ -2044,6 +2050,9 @@ packages: resolution: {integrity: sha512-93zYdMES/c1D69yZiKDBj0V24vqNzB/koF26KPaagAfd3P/4gUlh3Dys5ogAK+Exi9QyzlD8x/08Zt7wIKcDcA==} deprecated: Use @eslint/object-schema instead + '@ioredis/commands@1.2.0': + resolution: {integrity: sha512-Sx1pU8EM64o2BrqNpEO1CNLtKQwyhuXuqyfH7oGKCk+1a33d2r5saW8zNwm3j6BTExtjrv2BxTgzzkMwts6vGg==} + '@isaacs/cliui@8.0.2': resolution: {integrity: sha512-O8jcjabXaleOG9DQ0+ARXWZBTfnP4WNAqzuiJK7ll44AmxGKv/J2M4TPjxjY3znBCfvBXFzucm1twdyFybFqEA==} engines: {node: '>=12'} @@ -2314,6 +2323,36 @@ packages: '@mongodb-js/saslprep@1.2.0': resolution: {integrity: sha512-+ywrb0AqkfaYuhHs6LxKWgqbh3I72EpEgESCw37o+9qPx9WTCkgDm2B+eMrwehGtHBWHFU4GXvnSCNiFhhausg==} + '@msgpackr-extract/msgpackr-extract-darwin-arm64@3.0.3': + resolution: {integrity: sha512-QZHtlVgbAdy2zAqNA9Gu1UpIuI8Xvsd1v8ic6B2pZmeFnFcMWiPLfWXh7TVw4eGEZ/C9TH281KwhVoeQUKbyjw==} + cpu: [arm64] + os: [darwin] + + '@msgpackr-extract/msgpackr-extract-darwin-x64@3.0.3': + resolution: {integrity: sha512-mdzd3AVzYKuUmiWOQ8GNhl64/IoFGol569zNRdkLReh6LRLHOXxU4U8eq0JwaD8iFHdVGqSy4IjFL4reoWCDFw==} + cpu: [x64] + os: [darwin] + + '@msgpackr-extract/msgpackr-extract-linux-arm64@3.0.3': + resolution: {integrity: sha512-YxQL+ax0XqBJDZiKimS2XQaf+2wDGVa1enVRGzEvLLVFeqa5kx2bWbtcSXgsxjQB7nRqqIGFIcLteF/sHeVtQg==} + cpu: [arm64] + os: [linux] + + '@msgpackr-extract/msgpackr-extract-linux-arm@3.0.3': + resolution: {integrity: sha512-fg0uy/dG/nZEXfYilKoRe7yALaNmHoYeIoJuJ7KJ+YyU2bvY8vPv27f7UKhGRpY6euFYqEVhxCFZgAUNQBM3nw==} + cpu: [arm] + os: [linux] + + '@msgpackr-extract/msgpackr-extract-linux-x64@3.0.3': + resolution: {integrity: sha512-cvwNfbP07pKUfq1uH+S6KJ7dT9K8WOE4ZiAcsrSes+UY55E/0jLYc+vq+DO7jlmqRb5zAggExKm0H7O/CBaesg==} + cpu: [x64] + os: [linux] + + '@msgpackr-extract/msgpackr-extract-win32-x64@3.0.3': + resolution: {integrity: sha512-x0fWaQtYp4E6sktbsdAqnehxDgEc/VwM7uLsRCYWaiGu0ykYdZPiS8zCWdnjHwyiumousxfBm4SO31eXqwEZhQ==} + cpu: [x64] + os: [win32] + '@napi-rs/wasm-runtime@0.2.7': resolution: {integrity: sha512-5yximcFK5FNompXfJFoWanu5l8v1hNGqNHh9du1xETp9HWk/B/PzvchX55WYOPaIeNglG8++68AAiauBAtbnzw==} @@ -4014,6 +4053,9 @@ packages: buffer@6.0.3: resolution: {integrity: sha512-FTiCpNxtwiZZHEZbcbTIcZjERVICn9yq/pDFkTl95/AxzD1naBctN7YO68riM/gLSDY7sdrMby8hofADYuuqOA==} + bullmq@5.44.0: + resolution: {integrity: sha512-OnEtkuXyrUx2Jm5BpH92+ttrobblBdCbkhOe3OoR0hxZuAilI3mPWlwELslhfImRpDv8rK+C/0/VK7I8f3xIig==} + bundle-n-require@1.1.2: resolution: {integrity: sha512-bEk2jakVK1ytnZ9R2AAiZEeK/GxPUM8jvcRxHZXifZDMcjkI4EG/GlsJ2YGSVYT9y/p/gA9/0yDY8rCGsSU6Tg==} @@ -4248,6 +4290,10 @@ packages: resolution: {integrity: sha512-eYm0QWBtUrBWZWG0d386OGAw16Z995PiOVo2B7bjWSbHedGl5e0ZWaq65kOGgUSNesEIDkB9ISbTg/JK9dhCZA==} engines: {node: '>=6'} + cluster-key-slot@1.1.2: + resolution: {integrity: sha512-RMr0FhtfXemyinomL4hrWcYJxmX6deFdCxpJzhDttxgO1+bcCnkk+9drydLVDmAMG7NE6aN/fl4F7ucU/90gAA==} + engines: {node: '>=0.10.0'} + co@4.6.0: resolution: {integrity: sha512-QVb0dM5HvG+uaxitm8wONl7jltx8dqhfU33DcqtOZcLSVIKSDDLDi7+0LbAKiyI8hD9u42m2YxXSkMGWThaecQ==} engines: {iojs: '>= 1.0.0', node: '>= 0.12.0'} @@ -5860,6 +5906,10 @@ packages: intersection-observer@0.12.2: resolution: {integrity: sha512-7m1vEcPCxXYI8HqnL8CKI6siDyD+eIWSwgB3DZA+ZTogxk9I4CDnj4wilt9x/+/QbHI4YG5YZNmC6458/e9Ktg==} + ioredis@5.6.0: + resolution: {integrity: sha512-tBZlIIWbndeWBWCXWZiqtOF/yxf6yZX3tAlTJ7nfo5jhd6dctNxF7QnYlZLZ1a0o0pDoen7CgZqO+zjNaFbJAg==} + engines: {node: '>=12.22.0'} + ip-address@9.0.5: resolution: {integrity: sha512-zHtQzGojZXTwZTHQqra+ETKd4Sn3vgi7uBmlPoXVWZqYvuKmtI0l/VZTjqGmJY9x88GGOaZ9+G9ES8hC4T4X8g==} engines: {node: '>= 12'} @@ -6554,9 +6604,15 @@ packages: lodash.debounce@4.0.8: resolution: {integrity: sha512-FT1yDzDYEoYWhnSGnpE/4Kj1fLZkDFyqRb7fNt6FdYOSxlUWAtp42Eh6Wb0rGIv/m9Bgo7x4GhQbm5Ys4SG5ow==} + lodash.defaults@4.2.0: + resolution: {integrity: sha512-qjxPLHd3r5DnsdGacqOMU6pb/avJzdh9tFX2ymgoZE27BmjXrNy/y4LoaiTeAb+O3gL8AfpJGtqfX/ae2leYYQ==} + lodash.includes@4.3.0: resolution: {integrity: sha512-W3Bx6mdkRTGtlJISOvVD/lbqjTlPPUDTMnlXZFnVwi9NKJ6tiAk6LVdlhZMm17VZisqhKcgzpO5Wz91PCt5b0w==} + lodash.isarguments@3.1.0: + resolution: {integrity: sha512-chi4NHZlZqZD18a0imDHnZPrDeBbTtVN7GXMwuGdRH9qotxAjYs3aVLKc7zNOG9eddR5Ksd8rvFEBc9SsggPpg==} + lodash.isboolean@3.0.3: resolution: {integrity: sha512-Bz5mupy2SVbPHURB98VAcw+aHh4vRV5IPNhILUCsOzRmsTmSQ17jIuqopAentWoehktxGd9e/hbIXq980/1QJg==} @@ -7128,6 +7184,13 @@ packages: ms@2.1.3: resolution: {integrity: sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==} + msgpackr-extract@3.0.3: + resolution: {integrity: sha512-P0efT1C9jIdVRefqjzOQ9Xml57zpOXnIuS+csaB4MdZbTdmGDLo8XhzBG1N7aO11gKDDkJvBLULeFTo46wwreA==} + hasBin: true + + msgpackr@1.11.2: + resolution: {integrity: sha512-F9UngXRlPyWCDEASDpTf6c9uNhGPTqnTeLVt7bN+bU1eajoR/8V9ys2BRaV5C/e5ihE6sJ9uPIKaYt6bFuO32g==} + mssql@11.0.1: resolution: {integrity: sha512-KlGNsugoT90enKlR8/G36H0kTxPthDhmtNUCwEHvgRza5Cjpjoj+P2X6eMpFUDN7pFrJZsKadL4x990G8RBE1w==} engines: {node: '>=18'} @@ -7260,6 +7323,10 @@ packages: encoding: optional: true + node-gyp-build-optional-packages@5.2.2: + resolution: {integrity: sha512-s+w+rBWnpTMwSFbaE0UXsRlg7hU4FjekKU4eyAih5T8nJuNZT1nNsskXpxmeqSK9UzkBl6UgRlnKc8hz8IEqOw==} + hasBin: true + node-gyp@10.3.1: resolution: {integrity: sha512-Pp3nFHBThHzVtNY7U6JfPjvT/DTE8+o/4xKsLQtBoU+j2HLsGlhcfzflAoUreaJbNmYnX+LlLi0qjV8kpyO6xQ==} engines: {node: ^16.14.0 || >=18.0.0} @@ -8041,6 +8108,14 @@ packages: react: ^16.0.0 || ^17.0.0 || ^18.0.0 || ^19.0.0 react-dom: ^16.0.0 || ^17.0.0 || ^18.0.0 || ^19.0.0 + redis-errors@1.2.0: + resolution: {integrity: sha512-1qny3OExCf0UvUV/5wpYKf2YwPcOqXzkwKKSmKHiE6ZMQs5heeE/c8eXK+PNllPvmjgAbfnsbpkGZWy8cBpn9w==} + engines: {node: '>=4'} + + redis-parser@3.0.0: + resolution: {integrity: sha512-DJnGAeenTdpMEH6uAJRK/uiyEIH9WVsUmoLwzudwGJUwZPp80PDBWPHXSAGNPwNvIXAbe7MSUB1zQFugFml66A==} + engines: {node: '>=4'} + redux@4.2.1: resolution: {integrity: sha512-LAUYz4lc+Do8/g7aeRa8JkyDErK6ekstQaqWQrNRW//MY1TvCEpMtpTWvlQ+FPbWCx+Xixu/6SHt5N0HR+SB4w==} @@ -8490,6 +8565,9 @@ packages: stackback@0.0.2: resolution: {integrity: sha512-1XMJE5fQo1jGH6Y/7ebnwPOBEkIEnT4QF32d5R1+VXdXveM0IBMJt8zfaxX1P3QhVwrYe+576+jkANtSS2mBbw==} + standard-as-callback@2.1.0: + resolution: {integrity: sha512-qoRRSyROncaz1z0mvYqIE4lCd9p2R90i6GxW3uZv5ucSu8tU7B5HXUP1gG8pVZsYNVaXjk8ClXHPttLyxAL48A==} + state-local@1.0.7: resolution: {integrity: sha512-HTEHMNieakEnoe33shBYcZ7NX83ACUjCu8c40iOGEZsngj9zRnkqS9j1pqQPXwobB0ZcVTk27REb7COQ0UR59w==} @@ -11160,6 +11238,8 @@ snapshots: '@humanwhocodes/object-schema@2.0.3': {} + '@ioredis/commands@1.2.0': {} + '@isaacs/cliui@8.0.2': dependencies: string-width: 5.1.2 @@ -11565,6 +11645,24 @@ snapshots: dependencies: sparse-bitfield: 3.0.3 + '@msgpackr-extract/msgpackr-extract-darwin-arm64@3.0.3': + optional: true + + '@msgpackr-extract/msgpackr-extract-darwin-x64@3.0.3': + optional: true + + '@msgpackr-extract/msgpackr-extract-linux-arm64@3.0.3': + optional: true + + '@msgpackr-extract/msgpackr-extract-linux-arm@3.0.3': + optional: true + + '@msgpackr-extract/msgpackr-extract-linux-x64@3.0.3': + optional: true + + '@msgpackr-extract/msgpackr-extract-win32-x64@3.0.3': + optional: true + '@napi-rs/wasm-runtime@0.2.7': dependencies: '@emnapi/core': 1.3.1 @@ -13456,6 +13554,18 @@ snapshots: base64-js: 1.5.1 ieee754: 1.2.1 + bullmq@5.44.0: + dependencies: + cron-parser: 4.9.0 + ioredis: 5.6.0 + msgpackr: 1.11.2 + node-abort-controller: 3.1.1 + semver: 7.7.1 + tslib: 2.8.1 + uuid: 9.0.1 + transitivePeerDependencies: + - supports-color + bundle-n-require@1.1.2: dependencies: esbuild: 0.25.1 @@ -13713,6 +13823,8 @@ snapshots: clsx@2.1.1: {} + cluster-key-slot@1.1.2: {} + co@4.6.0: {} collapse-white-space@1.0.6: {} @@ -14626,7 +14738,7 @@ snapshots: '@typescript-eslint/parser': 6.21.0(eslint@8.56.0)(typescript@5.8.2) eslint: 8.56.0 eslint-import-resolver-node: 0.3.9 - eslint-import-resolver-typescript: 3.9.0(eslint-plugin-import@2.31.0(@typescript-eslint/parser@6.21.0(eslint@8.56.0)(typescript@5.8.2))(eslint@8.56.0))(eslint@8.56.0) + eslint-import-resolver-typescript: 3.9.0(eslint-plugin-import@2.31.0)(eslint@8.56.0) eslint-plugin-import: 2.31.0(@typescript-eslint/parser@6.21.0(eslint@8.56.0)(typescript@5.8.2))(eslint-import-resolver-typescript@3.9.0)(eslint@8.56.0) eslint-plugin-jsx-a11y: 6.10.2(eslint@8.56.0) eslint-plugin-react: 7.37.4(eslint@8.56.0) @@ -14646,7 +14758,7 @@ snapshots: transitivePeerDependencies: - supports-color - eslint-import-resolver-typescript@3.9.0(eslint-plugin-import@2.31.0(@typescript-eslint/parser@6.21.0(eslint@8.56.0)(typescript@5.8.2))(eslint@8.56.0))(eslint@8.56.0): + eslint-import-resolver-typescript@3.9.0(eslint-plugin-import@2.31.0)(eslint@8.56.0): dependencies: '@nolyfill/is-core-module': 1.0.39 debug: 4.4.0 @@ -14661,14 +14773,14 @@ snapshots: transitivePeerDependencies: - supports-color - eslint-module-utils@2.12.0(@typescript-eslint/parser@6.21.0(eslint@8.56.0)(typescript@5.8.2))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.9.0(eslint-plugin-import@2.31.0(@typescript-eslint/parser@6.21.0(eslint@8.56.0)(typescript@5.8.2))(eslint@8.56.0))(eslint@8.56.0))(eslint@8.56.0): + eslint-module-utils@2.12.0(@typescript-eslint/parser@6.21.0(eslint@8.56.0)(typescript@5.8.2))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.9.0)(eslint@8.56.0): dependencies: debug: 3.2.7 optionalDependencies: '@typescript-eslint/parser': 6.21.0(eslint@8.56.0)(typescript@5.8.2) eslint: 8.56.0 eslint-import-resolver-node: 0.3.9 - eslint-import-resolver-typescript: 3.9.0(eslint-plugin-import@2.31.0(@typescript-eslint/parser@6.21.0(eslint@8.56.0)(typescript@5.8.2))(eslint@8.56.0))(eslint@8.56.0) + eslint-import-resolver-typescript: 3.9.0(eslint-plugin-import@2.31.0)(eslint@8.56.0) transitivePeerDependencies: - supports-color @@ -14683,7 +14795,7 @@ snapshots: doctrine: 2.1.0 eslint: 8.56.0 eslint-import-resolver-node: 0.3.9 - eslint-module-utils: 2.12.0(@typescript-eslint/parser@6.21.0(eslint@8.56.0)(typescript@5.8.2))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.9.0(eslint-plugin-import@2.31.0(@typescript-eslint/parser@6.21.0(eslint@8.56.0)(typescript@5.8.2))(eslint@8.56.0))(eslint@8.56.0))(eslint@8.56.0) + eslint-module-utils: 2.12.0(@typescript-eslint/parser@6.21.0(eslint@8.56.0)(typescript@5.8.2))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.9.0)(eslint@8.56.0) hasown: 2.0.2 is-core-module: 2.16.1 is-glob: 4.0.3 @@ -15692,6 +15804,20 @@ snapshots: intersection-observer@0.12.2: {} + ioredis@5.6.0: + dependencies: + '@ioredis/commands': 1.2.0 + cluster-key-slot: 1.1.2 + debug: 4.4.0 + denque: 2.1.0 + lodash.defaults: 4.2.0 + lodash.isarguments: 3.1.0 + redis-errors: 1.2.0 + redis-parser: 3.0.0 + standard-as-callback: 2.1.0 + transitivePeerDependencies: + - supports-color + ip-address@9.0.5: dependencies: jsbn: 1.1.0 @@ -16558,8 +16684,12 @@ snapshots: lodash.debounce@4.0.8: {} + lodash.defaults@4.2.0: {} + lodash.includes@4.3.0: {} + lodash.isarguments@3.1.0: {} + lodash.isboolean@3.0.3: {} lodash.isinteger@4.0.4: {} @@ -17481,6 +17611,22 @@ snapshots: ms@2.1.3: {} + msgpackr-extract@3.0.3: + dependencies: + node-gyp-build-optional-packages: 5.2.2 + optionalDependencies: + '@msgpackr-extract/msgpackr-extract-darwin-arm64': 3.0.3 + '@msgpackr-extract/msgpackr-extract-darwin-x64': 3.0.3 + '@msgpackr-extract/msgpackr-extract-linux-arm': 3.0.3 + '@msgpackr-extract/msgpackr-extract-linux-arm64': 3.0.3 + '@msgpackr-extract/msgpackr-extract-linux-x64': 3.0.3 + '@msgpackr-extract/msgpackr-extract-win32-x64': 3.0.3 + optional: true + + msgpackr@1.11.2: + optionalDependencies: + msgpackr-extract: 3.0.3 + mssql@11.0.1: dependencies: '@tediousjs/connection-string': 0.5.0 @@ -17624,6 +17770,11 @@ snapshots: optionalDependencies: encoding: 0.1.13 + node-gyp-build-optional-packages@5.2.2: + dependencies: + detect-libc: 2.0.3 + optional: true + node-gyp@10.3.1: dependencies: env-paths: 2.2.1 @@ -18499,6 +18650,12 @@ snapshots: tiny-invariant: 1.3.3 victory-vendor: 36.9.2 + redis-errors@1.2.0: {} + + redis-parser@3.0.0: + dependencies: + redis-errors: 1.2.0 + redux@4.2.1: dependencies: '@babel/runtime': 7.26.10 @@ -19048,6 +19205,8 @@ snapshots: stackback@0.0.2: {} + standard-as-callback@2.1.0: {} + state-local@1.0.7: {} state-toggle@1.0.3: {} diff --git a/projects/app/.env.template b/projects/app/.env.template index 297b19b31d89..4ce0c90adf7a 100644 --- a/projects/app/.env.template +++ b/projects/app/.env.template @@ -32,6 +32,9 @@ OCEANBASE_URL= MILVUS_ADDRESS= MILVUS_TOKEN= +# Redis URL +REDIS_URL=redis://username:password@127.0.0.1:6379 + # code sandbox url SANDBOX_URL=http://localhost:3001 # 商业版地址 @@ -65,4 +68,4 @@ CHECK_INTERNAL_IP=false # # 日志来源ID前缀 # CHAT_LOG_SOURCE_ID_PREFIX=fastgpt- # 自定义跨域,不配置时,默认都允许跨域(逗号分割) -ALLOWED_ORIGINS= \ No newline at end of file +ALLOWED_ORIGINS= diff --git a/projects/app/src/instrumentation.ts b/projects/app/src/instrumentation.ts index 3abc0e04a78d..1681d5ba476e 100644 --- a/projects/app/src/instrumentation.ts +++ b/projects/app/src/instrumentation.ts @@ -1,6 +1,6 @@ import { exit } from 'process'; -/* +/* Init system */ export async function register() { diff --git a/projects/app/src/pageComponents/dataset/detail/CollectionCard/Context.tsx b/projects/app/src/pageComponents/dataset/detail/CollectionCard/Context.tsx index f43bf12bb497..72e2a5b1ea71 100644 --- a/projects/app/src/pageComponents/dataset/detail/CollectionCard/Context.tsx +++ b/projects/app/src/pageComponents/dataset/detail/CollectionCard/Context.tsx @@ -2,7 +2,7 @@ import { useConfirm } from '@fastgpt/web/hooks/useConfirm'; import { Dispatch, ReactNode, SetStateAction, useEffect, useState } from 'react'; import { useTranslation } from 'next-i18next'; import { createContext, useContextSelector } from 'use-context-selector'; -import { DatasetStatusEnum, DatasetTypeEnum } from '@fastgpt/global/core/dataset/constants'; +import { DatasetTypeEnum } from '@fastgpt/global/core/dataset/constants'; import { useRequest } from '@fastgpt/web/hooks/useRequest'; import { DatasetSchemaType } from '@fastgpt/global/core/dataset/type'; import { useDisclosure } from '@chakra-ui/react'; @@ -14,6 +14,7 @@ import { usePagination } from '@fastgpt/web/hooks/usePagination'; import { DatasetCollectionsListItemType } from '@/global/core/dataset/type'; import { useRouter } from 'next/router'; import { DatasetPageContext } from '@/web/core/dataset/context/datasetPageContext'; +import { WebsiteConfigFormType } from './WebsiteConfig'; const WebSiteConfigModal = dynamic(() => import('./WebsiteConfig')); @@ -66,7 +67,7 @@ const CollectionPageContextProvider = ({ children }: { children: ReactNode }) => const router = useRouter(); const { parentId = '' } = router.query as { parentId: string }; - const { datasetDetail, datasetId, updateDataset } = useContextSelector( + const { datasetDetail, datasetId, updateDataset, loadDatasetDetail } = useContextSelector( DatasetPageContext, (v) => v ); @@ -75,26 +76,41 @@ const CollectionPageContextProvider = ({ children }: { children: ReactNode }) => const { openConfirm: openWebSyncConfirm, ConfirmModal: ConfirmWebSyncModal } = useConfirm({ content: t('dataset:start_sync_website_tip') }); + + const syncWebsite = async () => { + await checkTeamWebSyncLimit(); + await postWebsiteSync({ datasetId: datasetId }); + await loadDatasetDetail(datasetId); + }; + const { isOpen: isOpenWebsiteModal, onOpen: onOpenWebsiteModal, onClose: onCloseWebsiteModal } = useDisclosure(); const { mutate: onUpdateDatasetWebsiteConfig } = useRequest({ - mutationFn: async (websiteConfig: DatasetSchemaType['websiteConfig']) => { + mutationFn: async (websiteConfig: WebsiteConfigFormType) => { onCloseWebsiteModal(); - await checkTeamWebSyncLimit(); await updateDataset({ id: datasetId, - websiteConfig, - status: DatasetStatusEnum.syncing - }); - const billId = await postCreateTrainingUsage({ - name: t('common:core.dataset.training.Website Sync'), - datasetId: datasetId + websiteConfig: { + url: websiteConfig.url, + selector: websiteConfig.selector || 'body' + }, + chunkSettings: { + autoIndexes: websiteConfig.autoIndexes, + imageIndex: websiteConfig.imageIndex, + trainingType: websiteConfig.trainingType, + chunkSettingMode: websiteConfig.chunkSettingMode, + chunkSplitMode: websiteConfig.chunkSplitMode, + chunkSize: websiteConfig.chunkSize, + indexSize: websiteConfig.indexSize, + chunkSplitter: websiteConfig.chunkSplitter, + qaPrompt: websiteConfig.qaPrompt + } }); - await postWebsiteSync({ datasetId: datasetId, billId }); + await syncWebsite(); return; }, errorToast: t('common:common.Update Failed') @@ -124,7 +140,7 @@ const CollectionPageContextProvider = ({ children }: { children: ReactNode }) => }); const contextValue: CollectionPageContextType = { - openWebSyncConfirm: openWebSyncConfirm(onUpdateDatasetWebsiteConfig), + openWebSyncConfirm: openWebSyncConfirm(syncWebsite), onOpenWebsiteModal, searchText, @@ -149,10 +165,6 @@ const CollectionPageContextProvider = ({ children }: { children: ReactNode }) => )} diff --git a/projects/app/src/pageComponents/dataset/detail/CollectionCard/EmptyCollectionTip.tsx b/projects/app/src/pageComponents/dataset/detail/CollectionCard/EmptyCollectionTip.tsx index d73c3a6a1fbb..30c098e71671 100644 --- a/projects/app/src/pageComponents/dataset/detail/CollectionCard/EmptyCollectionTip.tsx +++ b/projects/app/src/pageComponents/dataset/detail/CollectionCard/EmptyCollectionTip.tsx @@ -25,6 +25,9 @@ const EmptyCollectionTip = () => { {datasetDetail.status === DatasetStatusEnum.syncing && ( <>{t('common:core.dataset.status.syncing')} )} + {datasetDetail.status === DatasetStatusEnum.waiting && ( + <>{t('common:core.dataset.status.waiting')} + )} {datasetDetail.status === DatasetStatusEnum.active && ( <> {!datasetDetail?.websiteConfig?.url ? ( diff --git a/projects/app/src/pageComponents/dataset/detail/CollectionCard/Header.tsx b/projects/app/src/pageComponents/dataset/detail/CollectionCard/Header.tsx index 7fc193f407ca..9e4e67d2de79 100644 --- a/projects/app/src/pageComponents/dataset/detail/CollectionCard/Header.tsx +++ b/projects/app/src/pageComponents/dataset/detail/CollectionCard/Header.tsx @@ -25,11 +25,9 @@ import MyMenu from '@fastgpt/web/components/common/MyMenu'; import { useEditTitle } from '@/web/common/hooks/useEditTitle'; import { DatasetCollectionTypeEnum, - TrainingModeEnum, DatasetTypeEnum, DatasetTypeMap, - DatasetStatusEnum, - DatasetCollectionDataProcessModeEnum + DatasetStatusEnum } from '@fastgpt/global/core/dataset/constants'; import EditFolderModal, { useEditFolder } from '../../EditFolderModal'; import { TabEnum } from '../../../../pages/dataset/detail/index'; @@ -43,6 +41,7 @@ import { DatasetPageContext } from '@/web/core/dataset/context/datasetPageContex import { useSystem } from '@fastgpt/web/hooks/useSystem'; import HeaderTagPopOver from './HeaderTagPopOver'; import MyBox from '@fastgpt/web/components/common/MyBox'; +import Icon from '@fastgpt/web/components/common/Icon'; const FileSourceSelector = dynamic(() => import('../Import/components/FileSourceSelector')); @@ -57,8 +56,15 @@ const Header = ({}: {}) => { const { parentId = '' } = router.query as { parentId: string }; const { isPc } = useSystem(); - const { searchText, setSearchText, total, getData, pageNum, onOpenWebsiteModal } = - useContextSelector(CollectionPageContext, (v) => v); + const { + searchText, + setSearchText, + total, + getData, + pageNum, + onOpenWebsiteModal, + openWebSyncConfirm + } = useContextSelector(CollectionPageContext, (v) => v); const { data: paths = [] } = useQuery(['getDatasetCollectionPathById', parentId], () => getDatasetCollectionPathById(parentId) @@ -171,7 +177,9 @@ const Header = ({}: {}) => { )} {/* Tag */} - {datasetDetail.permission.hasWritePer && feConfigs?.isPlus && } + {datasetDetail.type !== DatasetTypeEnum.websiteDataset && + datasetDetail.permission.hasWritePer && + feConfigs?.isPlus && } {/* diff collection button */} @@ -274,33 +282,72 @@ const Header = ({}: {}) => { {datasetDetail?.websiteConfig?.url ? ( {datasetDetail.status === DatasetStatusEnum.active && ( - + + + + )} {datasetDetail.status === DatasetStatusEnum.syncing && ( - + {t('common:core.dataset.status.syncing')} )} + {datasetDetail.status === DatasetStatusEnum.waiting && ( + + + + {t('common:core.dataset.status.waiting')} + + + )} ) : ( - )} diff --git a/projects/app/src/pageComponents/dataset/detail/CollectionCard/WebsiteConfig.tsx b/projects/app/src/pageComponents/dataset/detail/CollectionCard/WebsiteConfig.tsx index e66d4c6588f4..c5e05b78f608 100644 --- a/projects/app/src/pageComponents/dataset/detail/CollectionCard/WebsiteConfig.tsx +++ b/projects/app/src/pageComponents/dataset/detail/CollectionCard/WebsiteConfig.tsx @@ -1,38 +1,103 @@ -import React from 'react'; import MyModal from '@fastgpt/web/components/common/MyModal'; import { useTranslation } from 'next-i18next'; -import { Box, Button, Input, Link, ModalBody, ModalFooter } from '@chakra-ui/react'; import { strIsLink } from '@fastgpt/global/common/string/tools'; import { useToast } from '@fastgpt/web/hooks/useToast'; import { useForm } from 'react-hook-form'; import { useConfirm } from '@fastgpt/web/hooks/useConfirm'; import { getDocPath } from '@/web/common/system/doc'; import { useSystemStore } from '@/web/common/system/useSystemStore'; +import { useMyStep } from '@fastgpt/web/hooks/useStep'; +import MyDivider from '@fastgpt/web/components/common/MyDivider'; +import React, { useCallback, useEffect, useMemo, useRef, useState } from 'react'; +import { + Box, + Link, + Flex, + Input, + Button, + ModalBody, + ModalFooter, + Textarea, + useDisclosure, + Checkbox, + HStack, + Stack +} from '@chakra-ui/react'; +import MyIcon from '@fastgpt/web/components/common/Icon'; +import LeftRadio from '@fastgpt/web/components/common/Radio/LeftRadio'; +import { + DataChunkSplitModeEnum, + DatasetCollectionDataProcessModeEnum, + DatasetCollectionDataProcessModeMap +} from '@fastgpt/global/core/dataset/constants'; +import { ChunkSettingModeEnum } from '@fastgpt/global/core/dataset/constants'; +import MyTooltip from '@fastgpt/web/components/common/MyTooltip'; +import { Prompt_AgentQA } from '@fastgpt/global/core/ai/prompt/agent'; +import { useContextSelector } from 'use-context-selector'; +import FormLabel from '@fastgpt/web/components/common/MyBox/FormLabel'; +import MyNumberInput from '@fastgpt/web/components/common/Input/NumberInput'; +import QuestionTip from '@fastgpt/web/components/common/MyTooltip/QuestionTip'; +import { DatasetPageContext } from '@/web/core/dataset/context/datasetPageContext'; +import MySelect from '@fastgpt/web/components/common/MySelect'; +import { + getIndexSizeSelectList, + getLLMMaxChunkSize, + getMaxIndexSize +} from '@fastgpt/global/core/dataset/training/utils'; +import RadioGroup from '@fastgpt/web/components/common/Radio/RadioGroup'; -type FormType = { - url?: string | undefined; +export type WebsiteConfigFormType = { + url: string; selector?: string | undefined; + autoIndexes?: boolean; + imageIndex?: boolean; + trainingType: DatasetCollectionDataProcessModeEnum; + chunkSettingMode?: ChunkSettingModeEnum; + chunkSplitMode?: DataChunkSplitModeEnum; + chunkSize?: number; + indexSize?: number; + chunkSplitter?: string; + qaPrompt?: string; }; const WebsiteConfigModal = ({ onClose, - onSuccess, - defaultValue = { - url: '', - selector: '' - } + onSuccess }: { onClose: () => void; - onSuccess: (data: FormType) => void; - defaultValue?: FormType; + onSuccess: (data: WebsiteConfigFormType) => void; }) => { const { t } = useTranslation(); const { feConfigs } = useSystemStore(); const { toast } = useToast(); - const { register, handleSubmit } = useForm({ - defaultValues: defaultValue + const steps = [ + { + title: t('dataset:website_info') + }, + { + title: t('dataset:params_config') + } + ]; + + const datasetDetail = useContextSelector(DatasetPageContext, (v) => v.datasetDetail); + const websiteConfig = datasetDetail.websiteConfig; + const chunkSettings = datasetDetail.chunkSettings; + + const { + register: registerForm1, + handleSubmit: handleSubmitForm1, + getValues: getValuesForm1 + } = useForm({ + defaultValues: { + url: websiteConfig?.url || '', + selector: websiteConfig?.selector || '' + } }); - const isEdit = !!defaultValue.url; + + const vectorModel = datasetDetail.vectorModel; + const agentModel = datasetDetail.agentModel; + + const isEdit = !!websiteConfig?.url; const confirmTip = isEdit ? t('common:core.dataset.website.Confirm Update Tips') : t('common:core.dataset.website.Confirm Create Tips'); @@ -41,70 +106,439 @@ const WebsiteConfigModal = ({ type: 'common' }); + const { activeStep, goToPrevious, goToNext, MyStep } = useMyStep({ + defaultStep: 0, + steps + }); + + const minChunkSize = 1000; + const maxChunkSize = getLLMMaxChunkSize(agentModel); + const maxIndexSize = getMaxIndexSize(vectorModel); + + const { setValue, register, watch, getValues, handleSubmit } = useForm({ + defaultValues: { + imageIndex: chunkSettings?.imageIndex || false, + autoIndexes: chunkSettings?.autoIndexes || false, + trainingType: chunkSettings?.trainingType || DatasetCollectionDataProcessModeEnum.chunk, + chunkSettingMode: chunkSettings?.chunkSettingMode || ChunkSettingModeEnum.auto, + chunkSplitMode: chunkSettings?.chunkSplitMode || DataChunkSplitModeEnum.size, + chunkSize: chunkSettings?.chunkSize || 2000, + indexSize: chunkSettings?.indexSize || vectorModel?.defaultToken || 512, + chunkSplitter: chunkSettings?.chunkSplitter || '', + qaPrompt: chunkSettings?.qaPrompt || Prompt_AgentQA.description + } + }); + + const trainingType = watch('trainingType'); + const indexSize = watch('indexSize'); + + const trainingModeList = useMemo(() => { + const list = Object.entries(DatasetCollectionDataProcessModeMap); + return list + .filter(([key]) => key !== DatasetCollectionDataProcessModeEnum.auto) + .map(([key, value]) => ({ + title: t(value.label as any), + value: key as DatasetCollectionDataProcessModeEnum, + tooltip: t(value.tooltip as any) + })); + }, [t]); + + const chunkSettingMode = watch('chunkSettingMode'); + const chunkSplitMode = watch('chunkSplitMode'); + + const customSplitList = [ + { label: t('dataset:split_sign_null'), value: '' }, + { label: t('dataset:split_sign_break'), value: '\\n' }, + { label: t('dataset:split_sign_break2'), value: '\\n\\n' }, + { label: t('dataset:split_sign_period'), value: '.|。' }, + { label: t('dataset:split_sign_exclamatiob'), value: '!|!' }, + { label: t('dataset:split_sign_question'), value: '?|?' }, + { label: t('dataset:split_sign_semicolon'), value: ';|;' }, + { label: '=====', value: '=====' }, + { label: t('dataset:split_sign_custom'), value: 'Other' } + ]; + + const [customListSelectValue, setCustomListSelectValue] = useState(getValues('chunkSplitter')); + useEffect(() => { + if (customListSelectValue === 'Other') { + setValue('chunkSplitter', ''); + } else { + setValue('chunkSplitter', customListSelectValue); + } + }, [customListSelectValue, setValue]); + + // Index size + const indexSizeSeletorList = useMemo(() => getIndexSizeSelectList(maxIndexSize), [maxIndexSize]); + + // QA + const qaPrompt = watch('qaPrompt'); + const { + isOpen: isOpenCustomPrompt, + onOpen: onOpenCustomPrompt, + onClose: onCloseCustomPrompt + } = useDisclosure(); + + // Adapt auto training + useEffect(() => { + if (trainingType === DatasetCollectionDataProcessModeEnum.auto) { + setValue('autoIndexes', true); + setValue('trainingType', DatasetCollectionDataProcessModeEnum.chunk); + } + }, [trainingType, setValue]); + + const showQAPromptInput = trainingType === DatasetCollectionDataProcessModeEnum.qa; + return ( - - - {t('common:core.dataset.website.Config Description')} - {feConfigs?.docUrl && ( - + + + + + {activeStep == 0 && ( + <> + - {t('common:common.course.Read Course')} - - )} - - - {t('common:core.dataset.website.Base Url')} - - - - - {t('common:core.dataset.website.Selector')}({t('common:common.choosable')}) - - - + {t('common:core.dataset.website.Config Description')} + {feConfigs?.docUrl && ( + + {t('common:common.course.Read Course')} + + )} + + + {t('common:core.dataset.website.Base Url')} + + + + + {t('common:core.dataset.website.Selector')}({t('common:common.choosable')}) + + + + + )} + {activeStep == 1 && ( + <> + + + {t('dataset:training_mode')} + + + list={trainingModeList} + px={3} + py={2.5} + value={trainingType} + onChange={(e) => { + setValue('trainingType', e); + }} + defaultBg="white" + activeBg="white" + gridTemplateColumns={'repeat(2, 1fr)'} + /> + + {trainingType === DatasetCollectionDataProcessModeEnum.chunk && ( + + + {t('dataset:enhanced_indexes')} + + + + + + {t('dataset:auto_indexes')} + + + + + + + + {t('dataset:image_auto_parse')} + + + + + + + )} + + + {t('dataset:params_setting')} + + + list={[ + { + title: t('dataset:default_params'), + desc: t('dataset:default_params_desc'), + value: ChunkSettingModeEnum.auto + }, + { + title: t('dataset:custom_data_process_params'), + desc: t('dataset:custom_data_process_params_desc'), + value: ChunkSettingModeEnum.custom, + children: chunkSettingMode === ChunkSettingModeEnum.custom && ( + + + + list={[ + { + title: t('dataset:split_chunk_size'), + value: DataChunkSplitModeEnum.size + }, + { + title: t('dataset:split_chunk_char'), + value: DataChunkSplitModeEnum.char, + tooltip: t('dataset:custom_split_sign_tip') + } + ]} + value={chunkSplitMode} + onChange={(e) => { + setValue('chunkSplitMode', e); + }} + /> + + {chunkSplitMode === DataChunkSplitModeEnum.size && ( + span': { + display: 'block' + } + }} + > + + + + + )} + + {chunkSplitMode === DataChunkSplitModeEnum.char && ( + + + + list={customSplitList} + size={'sm'} + bg={'myGray.50'} + value={customListSelectValue} + h={'32px'} + onChange={(val) => { + setCustomListSelectValue(val); + }} + /> + + {customListSelectValue === 'Other' && ( + + )} + + )} + + + {trainingType === DatasetCollectionDataProcessModeEnum.chunk && ( + + + {t('dataset:index_size')} + + + + + bg={'myGray.50'} + list={indexSizeSeletorList} + value={indexSize} + onChange={(val) => { + setValue('indexSize', val); + }} + /> + + + )} + + {showQAPromptInput && ( + + {t('common:core.dataset.collection.QA Prompt')} + + {qaPrompt} + + + + + + + )} + + ) + } + ]} + gridGap={3} + px={3} + py={3} + defaultBg="white" + activeBg="white" + value={chunkSettingMode} + w={'100%'} + onChange={(e) => { + setValue('chunkSettingMode', e); + }} + /> + + {isOpenCustomPrompt && ( + { + setValue('qaPrompt', e); + }} + onClose={onCloseCustomPrompt} + /> + )} + + )} - - + {activeStep == 0 && ( + <> + + + + )} + {activeStep == 1 && ( + <> + + + + )} @@ -112,3 +546,42 @@ const WebsiteConfigModal = ({ }; export default WebsiteConfigModal; + +const PromptTextarea = ({ + defaultValue, + onChange, + onClose +}: { + defaultValue: string; + onChange: (e: string) => void; + onClose: () => void; +}) => { + const ref = useRef(null); + const { t } = useTranslation(); + + return ( + + +