feat: 重构知识库文档导入任务化流程

- 新增上传建单、异步解析、分块处理与异步向量化闭环

- 收口分享页权限、完成态检索过滤与 SSE 局部状态刷新
This commit is contained in:
2026-04-15 19:27:22 +08:00
parent a41b50959e
commit 2689adfa40
56 changed files with 6376 additions and 1060 deletions

View File

@@ -1,13 +1,22 @@
package tech.easyflow.starter;
import org.dromara.x.file.storage.spring.EnableFileStorage;
import tech.easyflow.common.spring.BaseApp;
import org.springframework.boot.actuate.autoconfigure.elasticsearch.ElasticsearchRestHealthContributorAutoConfiguration;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import tech.easyflow.common.spring.BaseApp;
@SpringBootApplication
/**
* EasyFlow 启动入口。
*/
@SpringBootApplication(exclude = ElasticsearchRestHealthContributorAutoConfiguration.class)
@EnableFileStorage
public class MainApplication extends BaseApp {
/**
* 启动 EasyFlow 应用。
*
* @param args 启动参数
*/
public static void main(String[] args) {
run(MainApplication.class, args);
}

View File

@@ -0,0 +1,48 @@
ALTER TABLE `tb_document`
ADD COLUMN `process_status` varchar(32) NULL DEFAULT NULL COMMENT '处理状态' AFTER `options`,
ADD COLUMN `total_chunks` int NULL DEFAULT 0 COMMENT '总分块数' AFTER `process_status`,
ADD COLUMN `completed_chunks` int NULL DEFAULT 0 COMMENT '已完成分块数' AFTER `total_chunks`,
ADD COLUMN `failed_chunks` int NULL DEFAULT 0 COMMENT '失败分块数' AFTER `completed_chunks`,
ADD COLUMN `progress_percent` int NULL DEFAULT 0 COMMENT '处理进度百分比' AFTER `failed_chunks`,
ADD COLUMN `last_task_error` varchar(1024) NULL DEFAULT NULL COMMENT '最近任务错误摘要' AFTER `progress_percent`,
ADD COLUMN `task_modified_at` datetime NULL DEFAULT NULL COMMENT '任务状态更新时间' AFTER `last_task_error`;
CREATE TABLE `tb_document_import_task`
(
`id` bigint UNSIGNED NOT NULL COMMENT '主键',
`document_id` bigint UNSIGNED NOT NULL COMMENT '文档ID',
`knowledge_id` bigint UNSIGNED NOT NULL COMMENT '知识库ID',
`phase` varchar(16) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT '任务阶段',
`status` varchar(16) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT '任务状态',
`provider_task_id` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '底层任务ID',
`payload_json` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL COMMENT '任务载荷',
`error_summary` varchar(1024) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '错误摘要',
`started_at` datetime NULL DEFAULT NULL COMMENT '开始时间',
`finished_at` datetime NULL DEFAULT NULL COMMENT '结束时间',
`created` datetime NULL DEFAULT NULL COMMENT '创建时间',
`created_by` bigint UNSIGNED NULL DEFAULT NULL COMMENT '创建人',
`modified` datetime NULL DEFAULT NULL COMMENT '修改时间',
`modified_by` bigint UNSIGNED NULL DEFAULT NULL COMMENT '修改人',
PRIMARY KEY (`id`) USING BTREE,
INDEX `idx_document_import_task_document`(`document_id`) USING BTREE,
INDEX `idx_document_import_task_knowledge`(`knowledge_id`) USING BTREE,
INDEX `idx_document_import_task_phase_status`(`phase`, `status`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci COMMENT = '知识库文档导入任务' ROW_FORMAT = DYNAMIC;
UPDATE `tb_document` d
SET d.`process_status` = 'COMPLETED',
d.`total_chunks` = (
SELECT COUNT(1)
FROM `tb_document_chunk` c
WHERE c.`document_id` = d.`id`
),
d.`completed_chunks` = (
SELECT COUNT(1)
FROM `tb_document_chunk` c
WHERE c.`document_id` = d.`id`
),
d.`failed_chunks` = 0,
d.`progress_percent` = 100,
d.`last_task_error` = NULL,
d.`task_modified_at` = COALESCE(d.`modified`, d.`created`, NOW())
WHERE d.`process_status` IS NULL;