Skip to content

fix: dataset training state #4417

New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Merged
merged 6 commits into from
Apr 1, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions packages/web/components/common/Icon/icons/common/check.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
8 changes: 7 additions & 1 deletion packages/web/hooks/useScrollPagination.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -308,7 +308,13 @@ export function useScrollPagination<
);

return (
<MyBox ref={ref} h={'100%'} overflow={'auto'} isLoading={isLoading} {...props}>
<MyBox
ref={ref}
h={'100%'}
overflow={'auto'}
isLoading={isLoading || isLoadingProp}
{...props}
>
{scrollLoadType === 'top' && total > 0 && isLoading && (
<Box mt={2} fontSize={'xs'} color={'blackAlpha.500'} textAlign={'center'}>
{t('common:common.is_requesting')}
Expand Down
3 changes: 3 additions & 0 deletions packages/web/i18n/en/dataset.json
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,12 @@
"data_process_setting": "Processing config",
"dataset.Chunk_Number": "Block number",
"dataset.Completed": "Finish",
"dataset.Delete_Chunk": "delete",
"dataset.Edit_Chunk": "edit",
"dataset.Error_Message": "Report an error message",
"dataset.No_Error": "No exception information yet",
"dataset.Operation": "operate",
"dataset.ReTrain": "Retrain",
"dataset.Training Process": "Training status",
"dataset.Training_Count": "{{count}} Group training",
"dataset.Training_Errors": "Errors",
Expand Down
6 changes: 3 additions & 3 deletions packages/web/i18n/zh-CN/dataset.json
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,12 @@
"data_process_setting": "数据处理配置",
"dataset.Chunk_Number": "分块号",
"dataset.Completed": "完成",
"dataset.Delete_Chunk": "删除分块",
"dataset.Edit_Chunk": "编辑分块",
"dataset.Delete_Chunk": "删除",
"dataset.Edit_Chunk": "编辑",
"dataset.Error_Message": "报错信息",
"dataset.No_Error": "暂无异常信息",
"dataset.Operation": "操作",
"dataset.ReTrain": "重新训练",
"dataset.ReTrain": "重试",
"dataset.Training Process": "训练状态",
"dataset.Training_Count": "{{count}} 组训练中",
"dataset.Training_Errors": "异常 ({{count}})",
Expand Down
3 changes: 3 additions & 0 deletions packages/web/i18n/zh-Hant/dataset.json
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,12 @@
"data_process_setting": "資料處理設定",
"dataset.Chunk_Number": "分塊號",
"dataset.Completed": "完成",
"dataset.Delete_Chunk": "刪除",
"dataset.Edit_Chunk": "編輯",
"dataset.Error_Message": "報錯信息",
"dataset.No_Error": "暫無異常信息",
"dataset.Operation": "操作",
"dataset.ReTrain": "重試",
"dataset.Training Process": "訓練狀態",
"dataset.Training_Count": "{{count}} 組訓練中",
"dataset.Training_Errors": "異常",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ import { useForm } from 'react-hook-form';
import { getTrainingDetailResult } from '@/pages/api/core/dataset/collection/trainingDetail';
import { TFunction } from 'i18next';
import { useScrollPagination } from '@fastgpt/web/hooks/useScrollPagination';
import EmptyTip from '@fastgpt/web/components/common/EmptyTip';

const getTrainingStatus = ({
trainingCount,
Expand Down Expand Up @@ -156,11 +157,14 @@ const ProgressView = ({ trainingDetail }: { trainingDetail: getTrainingDetailRes
{statesArray.map((item, index) => (
<Flex alignItems={'center'} pl={4} key={item.label}>
<Box
w={'12px'}
h={'12px'}
w={'14px'}
h={'14px'}
borderWidth={'2px'}
borderRadius={'50%'}
position={'relative'}
display={'flex'}
alignItems={'center'}
justifyContent={'center'}
{...(item.status === TrainingStatus.InProgress || item.status === TrainingStatus.Error
? {
bg: 'primary.600',
Expand All @@ -178,16 +182,18 @@ const ProgressView = ({ trainingDetail }: { trainingDetail: getTrainingDetailRes
{...(index !== statesArray.length - 1 && {
_after: {
content: '""',
height: '66px',
height: '59px',
width: '2px',
bgColor: 'myGray.250',
position: 'absolute',
top: '10px',
left: '3px'
top: '14px',
left: '4px'
}
})}
>
{item.status === TrainingStatus.Normal && <MyIcon name="check" w={2} color={'white'} />}
{item.status === TrainingStatus.Normal && (
<MyIcon name="common/check" w={3} color={'white'} />
)}
</Box>
<Flex
alignItems={'center'}
Expand Down Expand Up @@ -265,7 +271,8 @@ const ErrorView = ({ datasetId, collectionId }: { datasetId: string; collectionI
pageSize: 15,
params: {
collectionId
}
},
EmptyTip: <EmptyTip />
});

const { runAsync: getData, loading: getDataLoading } = useRequest2(
Expand Down Expand Up @@ -322,8 +329,8 @@ const ErrorView = ({ datasetId, collectionId }: { datasetId: string; collectionI

return (
<ScrollData
isLoading={isLoading || updateLoading || getDataLoading || deleteLoading}
h={'400px'}
isLoading={isLoading || updateLoading || getDataLoading || deleteLoading}
>
<TableContainer overflowY={'auto'} fontSize={'12px'}>
<Table variant={'simple'}>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,13 +51,6 @@ async function handler(
datasetId: collection.datasetId,
collectionId: collection._id
};
const group = {
_id: null,
qa: { $sum: { $cond: [{ $eq: ['$mode', TrainingModeEnum.qa] }, 1, 0] } },
chunk: { $sum: { $cond: [{ $eq: ['$mode', TrainingModeEnum.chunk] }, 1, 0] } },
image: { $sum: { $cond: [{ $eq: ['$mode', TrainingModeEnum.image] }, 1, 0] } },
auto: { $sum: { $cond: [{ $eq: ['$mode', TrainingModeEnum.auto] }, 1, 0] } }
};

// Computed global queue
const minId = await MongoDatasetTraining.findOne(
Expand All @@ -70,41 +63,58 @@ async function handler(
readFromSecondary
).lean();

const [trainingCountsResult, trainedCount, waitingCountsResult] = await Promise.all([
// 获取训练总数 & 错误总数
const [result, trainedCount] = await Promise.all([
MongoDatasetTraining.aggregate(
[
{ $match: match },
{
$facet: {
trainingCounts: [{ $count: 'total' }],
errorCounts: [{ $match: { errorMsg: { $exists: true } } }, { $count: 'total' }]
trainingCounts: [{ $group: { _id: '$mode', count: { $sum: 1 } } }],
errorCounts: [
{ $match: { errorMsg: { $exists: true } } },
{ $group: { _id: '$mode', count: { $sum: 1 } } }
],
waitingCounts: [
{
$match: {
_id: { $lt: minId?._id },
retryCount: { $gt: 0 },
lockTime: { $lt: new Date('2050/1/1') }
}
},
{ $group: { _id: '$mode', count: { $sum: 1 } } }
]
}
}
],
readFromSecondary
),
// 获取训练完成计数
MongoDatasetData.countDocuments(match, readFromSecondary),
// 获取等待训练计数
MongoDatasetTraining.aggregate(
[
{
$match: {
_id: { $lt: minId?._id },
retryCount: { $gt: 0 },
lockTime: { $lt: new Date('2050/1/1') }
}
},
{ $group: group }
],
readFromSecondary
)
MongoDatasetData.countDocuments(match, readFromSecondary)
]);

const trainingCounts = trainingCountsResult?.[0]?.trainingCounts?.[0]?.total || defaultCounts;
const errorCounts = trainingCountsResult?.[0]?.errorCounts?.[0]?.total || defaultCounts;
const waitingCounts = waitingCountsResult[0] || defaultCounts;
const trainingCounts = result[0].trainingCounts.reduce(
(acc: Record<TrainingModeEnum, number>, item: { _id: TrainingModeEnum; count: number }) => {
acc[item._id] = item.count;
return acc;
},
defaultCounts
);

const errorCounts = result[0].errorCounts.reduce(
(acc: Record<TrainingModeEnum, number>, item: { _id: TrainingModeEnum; count: number }) => {
acc[item._id] = item.count;
return acc;
},
defaultCounts
);

const waitingCounts = result[0].waitingCounts.reduce(
(acc: Record<TrainingModeEnum, number>, item: { _id: TrainingModeEnum; count: number }) => {
acc[item._id] = item.count;
return acc;
},
defaultCounts
);

return {
trainingType: collection.trainingType,
Expand Down
58 changes: 58 additions & 0 deletions test/cases/api/core/dataset/training/deleteTrainingData.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
import handler, {
type deleteTrainingDataBody,
type deleteTrainingDataResponse
} from '@/pages/api/core/dataset/training/deleteTrainingData';
import {
DatasetCollectionTypeEnum,
TrainingModeEnum
} from '@fastgpt/global/core/dataset/constants';
import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema';
import { MongoDataset } from '@fastgpt/service/core/dataset/schema';
import { MongoDatasetTraining } from '@fastgpt/service/core/dataset/training/schema';
import { getRootUser } from '@test/datas/users';
import { Call } from '@test/utils/request';
import { describe, expect, it } from 'vitest';

describe('delete training data test', () => {
it('should delete training data', async () => {
const root = await getRootUser();
const dataset = await MongoDataset.create({
name: 'test',
teamId: root.teamId,
tmbId: root.tmbId
});
const collection = await MongoDatasetCollection.create({
name: 'test',
type: DatasetCollectionTypeEnum.file,
teamId: root.teamId,
tmbId: root.tmbId,
datasetId: dataset._id
});
const trainingData = await MongoDatasetTraining.create({
teamId: root.teamId,
tmbId: root.tmbId,
datasetId: dataset._id,
collectionId: collection._id,
mode: TrainingModeEnum.chunk,
model: 'test'
});

const res = await Call<deleteTrainingDataBody, {}, deleteTrainingDataResponse>(handler, {
auth: root,
body: {
datasetId: dataset._id,
collectionId: collection._id,
dataId: trainingData._id
}
});

const deletedTrainingData = await MongoDatasetTraining.findOne({
teamId: root.teamId,
datasetId: dataset._id,
_id: trainingData._id
});

expect(res.code).toBe(200);
expect(deletedTrainingData).toBeNull();
});
});
59 changes: 59 additions & 0 deletions test/cases/api/core/dataset/training/getTrainingDataDetail.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import handler, {
type getTrainingDataDetailBody,
type getTrainingDataDetailResponse
} from '@/pages/api/core/dataset/training/getTrainingDataDetail';
import {
DatasetCollectionTypeEnum,
TrainingModeEnum
} from '@fastgpt/global/core/dataset/constants';
import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema';
import { MongoDataset } from '@fastgpt/service/core/dataset/schema';
import { MongoDatasetTraining } from '@fastgpt/service/core/dataset/training/schema';
import { getRootUser } from '@test/datas/users';
import { Call } from '@test/utils/request';
import { describe, expect, it } from 'vitest';

describe('get training data detail test', () => {
it('should return training data detail', async () => {
const root = await getRootUser();
const dataset = await MongoDataset.create({
name: 'test',
teamId: root.teamId,
tmbId: root.tmbId
});
const collection = await MongoDatasetCollection.create({
name: 'test',
type: DatasetCollectionTypeEnum.file,
teamId: root.teamId,
tmbId: root.tmbId,
datasetId: dataset._id
});
const trainingData = await MongoDatasetTraining.create({
teamId: root.teamId,
tmbId: root.tmbId,
datasetId: dataset._id,
collectionId: collection._id,
model: 'test',
mode: TrainingModeEnum.chunk,
q: 'test',
a: 'test'
});

const res = await Call<getTrainingDataDetailBody, {}, getTrainingDataDetailResponse>(handler, {
auth: root,
body: {
datasetId: dataset._id,
collectionId: collection._id,
dataId: trainingData._id
}
});

expect(res.code).toBe(200);
expect(res.data).toBeDefined();
expect(res.data?._id).toStrictEqual(trainingData._id);
expect(res.data?.datasetId).toStrictEqual(dataset._id);
expect(res.data?.mode).toBe(TrainingModeEnum.chunk);
expect(res.data?.q).toBe('test');
expect(res.data?.a).toBe('test');
});
});
Loading
Loading