diff --git a/easyflow-api/easyflow-api-admin/src/main/java/tech/easyflow/admin/controller/ai/DocumentCollectionController.java b/easyflow-api/easyflow-api-admin/src/main/java/tech/easyflow/admin/controller/ai/DocumentCollectionController.java index 70e6ef9..6a7fefa 100644 --- a/easyflow-api/easyflow-api-admin/src/main/java/tech/easyflow/admin/controller/ai/DocumentCollectionController.java +++ b/easyflow-api/easyflow-api-admin/src/main/java/tech/easyflow/admin/controller/ai/DocumentCollectionController.java @@ -146,10 +146,14 @@ public class DocumentCollectionController extends BaseCurdController> search(@RequestParam BigInteger knowledgeId, @RequestParam String keyword, - @RequestParam(required = false) String retrievalMode) { + @RequestParam(required = false) String retrievalMode, + @RequestParam(required = false) Integer docRecallMaxNum, + @RequestParam(required = false) Double simThreshold) { KnowledgeRetrievalRequest request = new KnowledgeRetrievalRequest(); request.setKnowledgeId(knowledgeId); request.setQuery(keyword); + request.setLimit(docRecallMaxNum); + request.setMinSimilarity(simThreshold); request.setRetrievalMode(KnowledgeRetrievalModes.parse(retrievalMode)); request.setCallerType("API"); request.setCallerId(String.valueOf(knowledgeId)); diff --git a/easyflow-api/easyflow-api-admin/src/main/java/tech/easyflow/admin/controller/ai/DocumentController.java b/easyflow-api/easyflow-api-admin/src/main/java/tech/easyflow/admin/controller/ai/DocumentController.java index 537e26e..694ca72 100644 --- a/easyflow-api/easyflow-api-admin/src/main/java/tech/easyflow/admin/controller/ai/DocumentController.java +++ b/easyflow-api/easyflow-api-admin/src/main/java/tech/easyflow/admin/controller/ai/DocumentController.java @@ -8,9 +8,12 @@ import jakarta.servlet.http.HttpServletResponse; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Value; import org.springframework.core.io.ClassPathResource; +import org.springframework.http.MediaType; import org.springframework.transaction.annotation.Transactional; import org.springframework.web.bind.annotation.*; +import org.springframework.web.servlet.mvc.method.annotation.SseEmitter; import tech.easyflow.ai.documentimport.DocumentImportDtos; +import tech.easyflow.ai.documentimport.task.DocumentImportTaskStatusStreamService; import tech.easyflow.ai.entity.Document; import tech.easyflow.ai.entity.DocumentCollection; import tech.easyflow.ai.entity.DocumentCollectionSplitParams; @@ -77,6 +80,9 @@ public class DocumentController extends BaseCurdController createImportTask(@JsonBody DocumentImportDtos.TaskCreateRequest request) { + if (request.getKnowledgeId() == null) { + throw new BusinessException("知识库id不能为空"); + } + getDocumentCollection(request.getKnowledgeId().toString(), ResourceAction.MANAGE, "无权限管理知识库"); + return documentService.createImportTask(request); + } + + @GetMapping("import/task/detail") + @SaCheckPermission("/api/v1/documentCollection/query") + public Result getImportTaskDetail(@RequestParam BigInteger taskId) { + Result result = documentService.getImportTaskDetail(taskId); + if (result.getData() != null && result.getData().getKnowledgeId() != null) { + getDocumentCollection(result.getData().getKnowledgeId().toString(), ResourceAction.READ, "无权限访问知识库"); + } + return result; + } + + /** + * 订阅知识库文档任务状态流。 + * + * @param knowledgeId 知识库 ID + * @return SSE 推送连接 + */ + @PostMapping(value = "import/task/stream", produces = MediaType.TEXT_EVENT_STREAM_VALUE) + @SaCheckPermission("/api/v1/documentCollection/query") + public SseEmitter streamImportTask(@JsonBody(value = "knowledgeId", required = true) BigInteger knowledgeId) { + getDocumentCollection(knowledgeId.toString(), ResourceAction.READ, "无权限访问知识库"); + return documentImportTaskStatusStreamService.subscribe(knowledgeId); + } + + @PostMapping("import/task/preview") + @SaCheckPermission("/api/v1/documentCollection/save") + public Result previewImportTask(@JsonBody DocumentImportDtos.PreviewRequest request) { + if (request.getKnowledgeId() == null) { + throw new BusinessException("知识库id不能为空"); + } + getDocumentCollection(request.getKnowledgeId().toString(), ResourceAction.MANAGE, "无权限管理知识库"); + return documentService.previewImportTask(request); + } + + @PostMapping("import/task/startIndex") + @SaCheckPermission("/api/v1/documentCollection/save") + public Result startIndexTask(@JsonBody DocumentImportDtos.TaskStartIndexRequest request) { + if (request.getKnowledgeId() == null) { + throw new BusinessException("知识库id不能为空"); + } + getDocumentCollection(request.getKnowledgeId().toString(), ResourceAction.MANAGE, "无权限管理知识库"); + return documentService.startIndexTask(request); + } + + @PostMapping("import/task/retryParse") + @SaCheckPermission("/api/v1/documentCollection/save") + public Result retryParseTask(@JsonBody DocumentImportDtos.TaskRetryRequest request) { + if (request.getKnowledgeId() == null) { + throw new BusinessException("知识库id不能为空"); + } + getDocumentCollection(request.getKnowledgeId().toString(), ResourceAction.MANAGE, "无权限管理知识库"); + return documentService.retryParseTask(request); + } + + @PostMapping("import/task/retryIndex") + @SaCheckPermission("/api/v1/documentCollection/save") + public Result retryIndexTask(@JsonBody DocumentImportDtos.TaskRetryRequest request) { + if (request.getKnowledgeId() == null) { + throw new BusinessException("知识库id不能为空"); + } + getDocumentCollection(request.getKnowledgeId().toString(), ResourceAction.MANAGE, "无权限管理知识库"); + return documentService.retryIndexTask(request); + } + /** * 更新 entity * diff --git a/easyflow-api/easyflow-api-admin/src/main/java/tech/easyflow/admin/controller/ai/ShareKnowledgeController.java b/easyflow-api/easyflow-api-admin/src/main/java/tech/easyflow/admin/controller/ai/ShareKnowledgeController.java index e8d786d..100f2dd 100644 --- a/easyflow-api/easyflow-api-admin/src/main/java/tech/easyflow/admin/controller/ai/ShareKnowledgeController.java +++ b/easyflow-api/easyflow-api-admin/src/main/java/tech/easyflow/admin/controller/ai/ShareKnowledgeController.java @@ -17,8 +17,10 @@ import org.springframework.web.bind.annotation.PostMapping; import org.springframework.web.bind.annotation.RequestMapping; import org.springframework.web.bind.annotation.RequestParam; import org.springframework.web.bind.annotation.RestController; +import org.springframework.web.servlet.mvc.method.annotation.SseEmitter; import org.springframework.web.multipart.MultipartFile; import tech.easyflow.ai.documentimport.DocumentImportDtos; +import tech.easyflow.ai.documentimport.task.DocumentImportTaskStatusStreamService; import tech.easyflow.ai.dto.KnowledgeShareLimitedConfigRequest; import tech.easyflow.ai.dto.KnowledgeSearchResultItem; import tech.easyflow.ai.entity.Document; @@ -42,6 +44,7 @@ import tech.easyflow.ai.service.KnowledgeShareService; import tech.easyflow.ai.service.ModelService; import tech.easyflow.ai.vo.FaqImportResultVo; import tech.easyflow.ai.vo.KnowledgeShareAuthContext; +import tech.easyflow.ai.vo.KnowledgeShareViewDetail; import tech.easyflow.common.domain.Result; import tech.easyflow.common.filestorage.FileStorageService; import tech.easyflow.common.vo.UploadResVo; @@ -99,6 +102,8 @@ public class ShareKnowledgeController { private KnowledgeEmbeddingService knowledgeEmbeddingService; @Resource(name = "default") private FileStorageService fileStorageService; + @Resource + private DocumentImportTaskStatusStreamService documentImportTaskStatusStreamService; /** * 获取知识库详情。 @@ -107,14 +112,17 @@ public class ShareKnowledgeController { * @return 知识库详情 */ @GetMapping("/documentCollection/detail") - public Result detail(@RequestParam String shareKey) { + public Result detail(@RequestParam String shareKey) { KnowledgeShareAuthContext context = knowledgeShareService.assertUrlShareAccess( shareKey, null, KnowledgeShareActionScope.VIEW.name() ); audit(context, "访问知识库分享页", "KNOWLEDGE_SHARE_URL_ACCESS", false, auditDetail("knowledgeId", context.getKnowledge().getId())); - return Result.ok(context.getKnowledge()); + KnowledgeShareViewDetail detail = new KnowledgeShareViewDetail(); + detail.setKnowledge(context.getKnowledge()); + detail.setPermissionScopes(new java.util.ArrayList(context.getShare().getPermissionScopeSet())); + return Result.ok(detail); } /** @@ -234,6 +242,26 @@ public class ShareKnowledgeController { return Result.ok(documentService.getDocumentList(context.getKnowledge().getId().toString(), pageSize, pageNumber, title)); } + /** + * 订阅分享知识库的文档任务状态流。 + * + * @param shareKey 分享访问密钥 + * @param knowledgeId 知识库 ID + * @return SSE 推送连接 + */ + @PostMapping(value = "/document/import/task/stream", produces = MediaType.TEXT_EVENT_STREAM_VALUE) + public SseEmitter streamDocumentTask( + @RequestParam String shareKey, + @JsonBody(value = "knowledgeId", required = true) BigInteger knowledgeId + ) { + KnowledgeShareAuthContext context = knowledgeShareService.assertUrlShareAccess( + shareKey, + knowledgeId, + KnowledgeShareActionScope.VIEW.name() + ); + return documentImportTaskStatusStreamService.subscribe(context.getKnowledge().getId()); + } + /** * 下载文档。 */ @@ -344,6 +372,104 @@ public class ShareKnowledgeController { return documentService.commitImport(request); } + @PostMapping("/document/import/task/create") + public Result createImportTask( + @RequestParam String shareKey, + @JsonBody DocumentImportDtos.TaskCreateRequest request + ) { + KnowledgeShareAuthContext context = knowledgeShareService.assertUrlShareAccess( + shareKey, + request == null ? null : request.getKnowledgeId(), + KnowledgeShareActionScope.CONTENT_CREATE.name() + ); + BigInteger knowledgeId = resolveKnowledgeId(context, request == null ? null : request.getKnowledgeId()); + request.setKnowledgeId(knowledgeId); + audit(context, "创建分享文档导入任务", "KNOWLEDGE_SHARE_URL_WRITE", true, auditDetail("knowledgeId", knowledgeId)); + return documentService.createImportTask(request); + } + + @GetMapping("/document/import/task/detail") + public Result getImportTaskDetail( + @RequestParam String shareKey, + @RequestParam BigInteger taskId + ) { + KnowledgeShareAuthContext context = knowledgeShareService.assertUrlShareAccess( + shareKey, + null, + KnowledgeShareActionScope.VIEW.name() + ); + Result result = documentService.getImportTaskDetail(taskId); + BigInteger knowledgeId = result.getData() == null ? null : result.getData().getKnowledgeId(); + if (knowledgeId == null || knowledgeId.compareTo(context.getKnowledge().getId()) != 0) { + throw new BusinessException("任务不存在"); + } + return result; + } + + @PostMapping("/document/import/task/preview") + public Result previewImportTask( + @RequestParam String shareKey, + @JsonBody DocumentImportDtos.PreviewRequest request + ) { + KnowledgeShareAuthContext context = knowledgeShareService.assertUrlShareAccess( + shareKey, + request == null ? null : request.getKnowledgeId(), + KnowledgeShareActionScope.CONTENT_CREATE.name() + ); + BigInteger knowledgeId = resolveKnowledgeId(context, request == null ? null : request.getKnowledgeId()); + request.setKnowledgeId(knowledgeId); + audit(context, "预览分享文档分块", "KNOWLEDGE_SHARE_URL_WRITE", true, auditDetail("knowledgeId", knowledgeId)); + return documentService.previewImportTask(request); + } + + @PostMapping("/document/import/task/startIndex") + public Result startIndexTask( + @RequestParam String shareKey, + @JsonBody DocumentImportDtos.TaskStartIndexRequest request + ) { + KnowledgeShareAuthContext context = knowledgeShareService.assertUrlShareAccess( + shareKey, + request == null ? null : request.getKnowledgeId(), + KnowledgeShareActionScope.CONTENT_CREATE.name() + ); + BigInteger knowledgeId = resolveKnowledgeId(context, request == null ? null : request.getKnowledgeId()); + request.setKnowledgeId(knowledgeId); + audit(context, "启动分享文档向量化", "KNOWLEDGE_SHARE_URL_WRITE", true, auditDetail("knowledgeId", knowledgeId)); + return documentService.startIndexTask(request); + } + + @PostMapping("/document/import/task/retryParse") + public Result retryParseTask( + @RequestParam String shareKey, + @JsonBody DocumentImportDtos.TaskRetryRequest request + ) { + KnowledgeShareAuthContext context = knowledgeShareService.assertUrlShareAccess( + shareKey, + request == null ? null : request.getKnowledgeId(), + KnowledgeShareActionScope.CONTENT_CREATE.name() + ); + BigInteger knowledgeId = resolveKnowledgeId(context, request == null ? null : request.getKnowledgeId()); + request.setKnowledgeId(knowledgeId); + audit(context, "重试分享文档解析", "KNOWLEDGE_SHARE_URL_WRITE", true, auditDetail("knowledgeId", knowledgeId)); + return documentService.retryParseTask(request); + } + + @PostMapping("/document/import/task/retryIndex") + public Result retryIndexTask( + @RequestParam String shareKey, + @JsonBody DocumentImportDtos.TaskRetryRequest request + ) { + KnowledgeShareAuthContext context = knowledgeShareService.assertUrlShareAccess( + shareKey, + request == null ? null : request.getKnowledgeId(), + KnowledgeShareActionScope.CONTENT_CREATE.name() + ); + BigInteger knowledgeId = resolveKnowledgeId(context, request == null ? null : request.getKnowledgeId()); + request.setKnowledgeId(knowledgeId); + audit(context, "重试分享文档向量化", "KNOWLEDGE_SHARE_URL_WRITE", true, auditDetail("knowledgeId", knowledgeId)); + return documentService.retryIndexTask(request); + } + /** * Chunk 分页。 */ diff --git a/easyflow-api/easyflow-api-public/src/main/java/tech/easyflow/publicapi/controller/PublicKnowledgeShareController.java b/easyflow-api/easyflow-api-public/src/main/java/tech/easyflow/publicapi/controller/PublicKnowledgeShareController.java index 84c4349..e344831 100644 --- a/easyflow-api/easyflow-api-public/src/main/java/tech/easyflow/publicapi/controller/PublicKnowledgeShareController.java +++ b/easyflow-api/easyflow-api-public/src/main/java/tech/easyflow/publicapi/controller/PublicKnowledgeShareController.java @@ -224,6 +224,83 @@ public class PublicKnowledgeShareController { return documentService.commitImport(request); } + @PostMapping("/document/import/task/create") + public Result createImportTask( + @RequestHeader("ApiKey") String apiKey, + @JsonBody DocumentImportDtos.TaskCreateRequest request, + HttpServletRequest servletRequest + ) { + assertApiShare(apiKey, servletRequest.getRequestURI(), request.getKnowledgeId(), KnowledgeShareActionScope.CONTENT_CREATE.name()); + requireDocumentKnowledge(request.getKnowledgeId()); + audit(apiKey, "API创建文档导入任务", "KNOWLEDGE_API_SHARE_WRITE", servletRequest.getRequestURI(), Map.of("knowledgeId", request.getKnowledgeId())); + return documentService.createImportTask(request); + } + + @GetMapping("/document/import/task/detail") + public Result getImportTaskDetail( + @RequestHeader("ApiKey") String apiKey, + @RequestParam BigInteger knowledgeId, + @RequestParam BigInteger taskId, + HttpServletRequest request + ) { + assertApiShare(apiKey, request.getRequestURI(), knowledgeId, KnowledgeShareActionScope.VIEW.name()); + requireDocumentKnowledge(knowledgeId); + Result result = documentService.getImportTaskDetail(taskId); + if (result.getData() == null || result.getData().getKnowledgeId() == null + || result.getData().getKnowledgeId().compareTo(knowledgeId) != 0) { + throw new BusinessException("任务不存在"); + } + return result; + } + + @PostMapping("/document/import/task/preview") + public Result previewImportTask( + @RequestHeader("ApiKey") String apiKey, + @JsonBody DocumentImportDtos.PreviewRequest request, + HttpServletRequest servletRequest + ) { + assertApiShare(apiKey, servletRequest.getRequestURI(), request.getKnowledgeId(), KnowledgeShareActionScope.CONTENT_CREATE.name()); + requireDocumentKnowledge(request.getKnowledgeId()); + audit(apiKey, "API预览文档分块", "KNOWLEDGE_API_SHARE_WRITE", servletRequest.getRequestURI(), Map.of("knowledgeId", request.getKnowledgeId())); + return documentService.previewImportTask(request); + } + + @PostMapping("/document/import/task/startIndex") + public Result startIndexTask( + @RequestHeader("ApiKey") String apiKey, + @JsonBody DocumentImportDtos.TaskStartIndexRequest request, + HttpServletRequest servletRequest + ) { + assertApiShare(apiKey, servletRequest.getRequestURI(), request.getKnowledgeId(), KnowledgeShareActionScope.CONTENT_CREATE.name()); + requireDocumentKnowledge(request.getKnowledgeId()); + audit(apiKey, "API启动文档向量化", "KNOWLEDGE_API_SHARE_WRITE", servletRequest.getRequestURI(), Map.of("knowledgeId", request.getKnowledgeId())); + return documentService.startIndexTask(request); + } + + @PostMapping("/document/import/task/retryParse") + public Result retryParseTask( + @RequestHeader("ApiKey") String apiKey, + @JsonBody DocumentImportDtos.TaskRetryRequest request, + HttpServletRequest servletRequest + ) { + assertApiShare(apiKey, servletRequest.getRequestURI(), request.getKnowledgeId(), KnowledgeShareActionScope.CONTENT_CREATE.name()); + requireDocumentKnowledge(request.getKnowledgeId()); + audit(apiKey, "API重试文档解析", "KNOWLEDGE_API_SHARE_WRITE", servletRequest.getRequestURI(), Map.of("knowledgeId", request.getKnowledgeId())); + return documentService.retryParseTask(request); + } + + @PostMapping("/document/import/task/retryIndex") + public Result retryIndexTask( + @RequestHeader("ApiKey") String apiKey, + @JsonBody DocumentImportDtos.TaskRetryRequest request, + HttpServletRequest servletRequest + ) { + assertApiShare(apiKey, servletRequest.getRequestURI(), request.getKnowledgeId(), KnowledgeShareActionScope.CONTENT_CREATE.name()); + requireDocumentKnowledge(request.getKnowledgeId()); + audit(apiKey, "API重试文档向量化", "KNOWLEDGE_API_SHARE_WRITE", servletRequest.getRequestURI(), Map.of("knowledgeId", request.getKnowledgeId())); + return documentService.retryIndexTask(request); + } + /** * Chunk 分页。 */ diff --git a/easyflow-commons/easyflow-common-mq/src/main/java/tech/easyflow/common/mq/redis/RedisMQConsumerContainer.java b/easyflow-commons/easyflow-common-mq/src/main/java/tech/easyflow/common/mq/redis/RedisMQConsumerContainer.java index aad22d4..2e28ed2 100644 --- a/easyflow-commons/easyflow-common-mq/src/main/java/tech/easyflow/common/mq/redis/RedisMQConsumerContainer.java +++ b/easyflow-commons/easyflow-common-mq/src/main/java/tech/easyflow/common/mq/redis/RedisMQConsumerContainer.java @@ -1,6 +1,8 @@ package tech.easyflow.common.mq.redis; import jakarta.annotation.PreDestroy; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.springframework.context.SmartLifecycle; import org.springframework.data.domain.Range; import org.springframework.data.redis.connection.RedisConnection; @@ -34,6 +36,8 @@ import java.util.concurrent.TimeUnit; public class RedisMQConsumerContainer implements MQConsumerContainer, SmartLifecycle { + private static final Logger LOG = LoggerFactory.getLogger(RedisMQConsumerContainer.class); + private final RedisConnectionFactory redisConnectionFactory; private final StringRedisTemplate stringRedisTemplate; private final MQProperties properties; @@ -71,6 +75,8 @@ public class RedisMQConsumerContainer implements MQConsumerContainer, SmartLifec MQSubscription subscription = handler.subscription(); for (int shard = 0; shard < Math.max(subscription.getShardCount(), 1); shard++) { int currentShard = shard; + LOG.info("启动 MQ 消费线程: topic={}, group={}, shard={}, handler={}", + subscription.getTopic(), subscription.getConsumerGroup(), currentShard, handler.getClass().getSimpleName()); executorService.submit(() -> consumeLoop(handler, subscription, currentShard)); } } @@ -106,6 +112,8 @@ public class RedisMQConsumerContainer implements MQConsumerContainer, SmartLifec String streamKey = keySupport.streamKey(subscription.getTopic(), shard); String consumerName = subscription.getConsumerGroup() + "-" + shard; ensureConsumerGroup(streamKey, subscription.getConsumerGroup()); + LOG.info("MQ 消费循环已启动: topic={}, group={}, shard={}, consumer={}, streamKey={}, handler={}", + subscription.getTopic(), subscription.getConsumerGroup(), shard, consumerName, streamKey, handler.getClass().getSimpleName()); while (running) { try { reclaimPending(streamKey, subscription.getConsumerGroup(), consumerName); @@ -123,8 +131,18 @@ public class RedisMQConsumerContainer implements MQConsumerContainer, SmartLifec if (messages.isEmpty()) { continue; } + LOG.info("MQ 收到消息批次: topic={}, group={}, shard={}, consumer={}, streamKey={}, count={}", + subscription.getTopic(), subscription.getConsumerGroup(), shard, consumerName, streamKey, messages.size()); handleMessages(handler, streamKey, subscription.getConsumerGroup(), messages); - } catch (Exception ignored) { + } catch (Exception exception) { + LOG.error("MQ 消费循环异常: topic={}, group={}, shard={}, consumer={}, streamKey={}, handler={}", + subscription.getTopic(), + subscription.getConsumerGroup(), + shard, + consumerName, + streamKey, + handler.getClass().getSimpleName(), + exception); sleepSilently(1000L); } } @@ -192,8 +210,12 @@ public class RedisMQConsumerContainer implements MQConsumerContainer, SmartLifec message.setRetryCount(retryCount); message.getHeaders().put("lastError", reason == null ? "" : reason); if (retryCount > properties.getRedis().getMaxRetry()) { + LOG.error("MQ 消息超过最大重试次数,进入死信队列: topic={}, messageId={}, streamKey={}, retryCount={}, reason={}", + message.getTopic(), message.getMessageId(), message.getStreamKey(), retryCount, reason); deadLetterService.deadLetter(message, reason); } else { + LOG.warn("MQ 消息消费失败,准备重试: topic={}, messageId={}, streamKey={}, retryCount={}, reason={}", + message.getTopic(), message.getMessageId(), message.getStreamKey(), retryCount, reason); stringRedisTemplate.opsForStream().add( org.springframework.data.redis.connection.stream.StreamRecords.string( Map.of("payload", messageConverter.serialize(message)) @@ -205,10 +227,16 @@ public class RedisMQConsumerContainer implements MQConsumerContainer, SmartLifec private void handleMessages(MQConsumerHandler handler, String streamKey, String group, List messages) throws Exception { try { + LOG.info("MQ 开始批量处理消息: group={}, streamKey={}, count={}, handler={}", + group, streamKey, messages.size(), handler.getClass().getSimpleName()); handler.handle(messages); acknowledge(streamKey, group, messages); + LOG.info("MQ 批量处理消息完成: group={}, streamKey={}, count={}, handler={}", + group, streamKey, messages.size(), handler.getClass().getSimpleName()); return; } catch (Exception batchEx) { + LOG.error("MQ 批量处理消息失败,准备降级单条处理: group={}, streamKey={}, count={}, handler={}", + group, streamKey, messages.size(), handler.getClass().getSimpleName(), batchEx); if (messages.size() == 1) { retryOrDeadLetter(messages, resolveReason(batchEx)); acknowledge(streamKey, group, messages); @@ -218,7 +246,11 @@ public class RedisMQConsumerContainer implements MQConsumerContainer, SmartLifec for (MQMessage message : messages) { try { + LOG.info("MQ 开始单条处理消息: group={}, streamKey={}, messageId={}, handler={}", + group, streamKey, message.getMessageId(), handler.getClass().getSimpleName()); handler.handle(List.of(message)); + LOG.info("MQ 单条处理消息完成: group={}, streamKey={}, messageId={}, handler={}", + group, streamKey, message.getMessageId(), handler.getClass().getSimpleName()); } catch (Exception singleEx) { retryOrDeadLetter(List.of(message), resolveReason(singleEx)); } finally { @@ -240,6 +272,7 @@ public class RedisMQConsumerContainer implements MQConsumerContainer, SmartLifec } MQAcknowledger acknowledger = records -> stringRedisTemplate.opsForStream().acknowledge(streamKey, group, ids); acknowledger.acknowledge(messages); + LOG.info("MQ 消息确认完成: group={}, streamKey={}, count={}", group, streamKey, ids.length); } private String resolveReason(Exception exception) { diff --git a/easyflow-commons/easyflow-common-mq/src/main/java/tech/easyflow/common/mq/redis/RedisMQProducer.java b/easyflow-commons/easyflow-common-mq/src/main/java/tech/easyflow/common/mq/redis/RedisMQProducer.java index f87c512..0732fc8 100644 --- a/easyflow-commons/easyflow-common-mq/src/main/java/tech/easyflow/common/mq/redis/RedisMQProducer.java +++ b/easyflow-commons/easyflow-common-mq/src/main/java/tech/easyflow/common/mq/redis/RedisMQProducer.java @@ -1,5 +1,7 @@ package tech.easyflow.common.mq.redis; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.springframework.data.redis.connection.stream.RecordId; import org.springframework.data.redis.connection.stream.StreamRecords; import org.springframework.data.redis.core.StringRedisTemplate; @@ -15,6 +17,8 @@ import java.util.UUID; public class RedisMQProducer implements MQProducer { + private static final Logger LOG = LoggerFactory.getLogger(RedisMQProducer.class); + private final StringRedisTemplate stringRedisTemplate; private final MQProperties properties; private final MQMessageConverter messageConverter; @@ -47,12 +51,16 @@ public class RedisMQProducer implements MQProducer { int shardCount = Math.max(properties.getRedis().getChatPersistShardCount(), 1); int shard = keySupport.resolveShard(message.getKey(), shardCount); String streamKey = keySupport.streamKey(message.getTopic(), shard); + LOG.info("MQ 开始投递消息: topic={}, messageId={}, key={}, shard={}, streamKey={}", + message.getTopic(), message.getMessageId(), message.getKey(), shard, streamKey); RecordId recordId = stringRedisTemplate.opsForStream().add( StreamRecords.string(Map.of("payload", messageConverter.serialize(message))).withStreamKey(streamKey) ); if (recordId == null) { throw new MQException("MQ 消息投递失败"); } + LOG.info("MQ 消息投递完成: topic={}, messageId={}, key={}, shard={}, streamKey={}, recordId={}", + message.getTopic(), message.getMessageId(), message.getKey(), shard, streamKey, recordId.getValue()); return recordId.getValue(); } } diff --git a/easyflow-modules/easyflow-module-ai/pom.xml b/easyflow-modules/easyflow-module-ai/pom.xml index c164ae9..1303781 100644 --- a/easyflow-modules/easyflow-module-ai/pom.xml +++ b/easyflow-modules/easyflow-module-ai/pom.xml @@ -103,6 +103,10 @@ tech.easyflow easyflow-common-chat-protocol + + tech.easyflow + easyflow-common-mq + com.easyagents diff --git a/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/config/ThreadPoolConfig.java b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/config/ThreadPoolConfig.java index 71ab735..5e12bc2 100644 --- a/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/config/ThreadPoolConfig.java +++ b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/config/ThreadPoolConfig.java @@ -12,8 +12,9 @@ public class ThreadPoolConfig { private static final Logger log = LoggerFactory.getLogger(ThreadPoolConfig.class); /** - * SSE消息发送专用线程池 - * 核心原则:IO密集型任务(网络推送),线程数 = CPU核心数 * 2 + 1 + * 创建 SSE 消息发送线程池。 + * + * @return SSE 推送线程池 */ @Bean(name = "sseThreadPool") public ThreadPoolTaskExecutor sseThreadPool() { @@ -37,4 +38,29 @@ public class ThreadPoolConfig { executor.initialize(); return executor; } + + /** + * 创建知识库文档导入任务线程池。 + * + * @return 文档导入任务线程池 + */ + @Bean(name = "documentImportTaskExecutor") + public ThreadPoolTaskExecutor documentImportTaskExecutor() { + ThreadPoolTaskExecutor executor = new ThreadPoolTaskExecutor(); + int cpuCoreNum = Runtime.getRuntime().availableProcessors(); + executor.setCorePoolSize(Math.max(2, cpuCoreNum)); + executor.setMaxPoolSize(Math.max(4, cpuCoreNum * 2)); + executor.setQueueCapacity(200); + executor.setKeepAliveSeconds(60); + executor.setThreadNamePrefix("document-import-"); + executor.setRejectedExecutionHandler((runnable, executorService) -> { + log.error("文档导入线程池过载!核心线程数:{},最大线程数:{},队列任务数:{}", + executorService.getCorePoolSize(), + executorService.getMaximumPoolSize(), + executorService.getQueue().size()); + throw new BusinessException("文档导入任务繁忙,请稍后重试"); + }); + executor.initialize(); + return executor; + } } diff --git a/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/document/service/impl/DocumentParseBridgeServiceImpl.java b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/document/service/impl/DocumentParseBridgeServiceImpl.java index c9503ac..e0fda09 100644 --- a/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/document/service/impl/DocumentParseBridgeServiceImpl.java +++ b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/document/service/impl/DocumentParseBridgeServiceImpl.java @@ -5,6 +5,8 @@ import com.easyagents.document.core.model.ParseResponse; import com.easyagents.document.core.model.ParseResult; import com.easyagents.document.core.model.ParseTaskInfo; import com.easyagents.document.core.model.ParseTaskStatus; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.springframework.lang.Nullable; import org.springframework.stereotype.Service; import org.springframework.util.StringUtils; @@ -30,6 +32,8 @@ import tech.easyflow.ai.utils.DocUtil; @Service public class DocumentParseBridgeServiceImpl implements DocumentParseBridgeService { + private static final Logger LOG = LoggerFactory.getLogger(DocumentParseBridgeServiceImpl.class); + @Nullable private final DocumentParseService documentParseService; private final DocumentSourceLoader documentSourceLoader; @@ -52,12 +56,21 @@ public class DocumentParseBridgeServiceImpl implements DocumentParseBridgeServic @Override public DocumentParsedResult parse(DocumentSourceRef source, DocumentParseScenario scenario) { try { - LoadedDocumentSource loadedSource = preparePdfSource(source); + LoadedDocumentSource loadedSource = prepareSupportedSource(source); + LOG.info("桥接服务开始同步解析文档: fileName={}, contentType={}, scenario={}", + loadedSource.getFileName(), loadedSource.getContentType(), scenario); ParseResponse response = requireService().parse(parseRequestFactory.build(loadedSource, scenario)); - return parseResultMapper.map(extractSingleResult(response, false)); + DocumentParsedResult result = parseResultMapper.map(extractSingleResult(response, false)); + LOG.info("桥接服务同步解析完成: fileName={}, scenario={}, preferredTextLength={}", + loadedSource.getFileName(), scenario, resolveTextLength(result)); + return result; } catch (DocumentParseBridgeException e) { + LOG.error("桥接服务同步解析失败: fileName={}, scenario={}", + source == null ? null : source.getFileName(), scenario, e); throw e; } catch (Exception e) { + LOG.error("桥接服务同步解析异常: fileName={}, scenario={}", + source == null ? null : source.getFileName(), scenario, e); throw DocumentParseBridgeException.parseFailed("同步文档解析失败", e); } } @@ -68,12 +81,21 @@ public class DocumentParseBridgeServiceImpl implements DocumentParseBridgeServic @Override public DocumentParseTaskStatus submit(DocumentSourceRef source, DocumentParseScenario scenario) { try { - LoadedDocumentSource loadedSource = preparePdfSource(source); + LoadedDocumentSource loadedSource = prepareSupportedSource(source); + LOG.info("桥接服务开始提交异步解析任务: fileName={}, contentType={}, scenario={}", + loadedSource.getFileName(), loadedSource.getContentType(), scenario); ParseTaskStatus taskStatus = requireService().submit(parseRequestFactory.build(loadedSource, scenario)); - return parseResultMapper.map(taskStatus); + DocumentParseTaskStatus mappedStatus = parseResultMapper.map(taskStatus); + LOG.info("桥接服务异步解析任务提交完成: fileName={}, scenario={}, providerTaskId={}, status={}", + loadedSource.getFileName(), scenario, mappedStatus.getTaskId(), mappedStatus.getStatus()); + return mappedStatus; } catch (DocumentParseBridgeException e) { + LOG.error("桥接服务提交异步解析任务失败: fileName={}, scenario={}", + source == null ? null : source.getFileName(), scenario, e); throw e; } catch (Exception e) { + LOG.error("桥接服务提交异步解析任务异常: fileName={}, scenario={}", + source == null ? null : source.getFileName(), scenario, e); throw DocumentParseBridgeException.taskFailed("提交异步文档解析任务失败", e); } } @@ -104,11 +126,17 @@ public class DocumentParseBridgeServiceImpl implements DocumentParseBridgeServic throw DocumentParseBridgeException.resultFetchFailed("taskId 不能为空"); } try { + LOG.info("桥接服务开始获取异步解析结果: providerTaskId={}", taskId); ParseResponse response = requireService().queryResult(taskId); - return parseResultMapper.map(extractSingleResult(response, true)); + DocumentParsedResult result = parseResultMapper.map(extractSingleResult(response, true)); + LOG.info("桥接服务获取异步解析结果完成: providerTaskId={}, preferredTextLength={}", + taskId, resolveTextLength(result)); + return result; } catch (DocumentParseBridgeException e) { + LOG.error("桥接服务获取异步解析结果失败: providerTaskId={}", taskId, e); throw e; } catch (Exception e) { + LOG.error("桥接服务获取异步解析结果异常: providerTaskId={}", taskId, e); throw DocumentParseBridgeException.resultFetchFailed("获取异步文档解析结果失败", e); } } @@ -123,14 +151,32 @@ public class DocumentParseBridgeServiceImpl implements DocumentParseBridgeServic } try { ParseTaskInfo taskInfo = requireService().queryTaskInfo(taskId); - return parseResultMapper.map(taskInfo); + DocumentParseTaskInfo mappedTaskInfo = parseResultMapper.map(taskInfo); + LOG.info("桥接服务查询异步解析任务状态: providerTaskId={}, status={}, hasResult={}", + taskId, + mappedTaskInfo == null ? null : mappedTaskInfo.getStatus(), + mappedTaskInfo != null && mappedTaskInfo.getResult() != null); + return mappedTaskInfo; } catch (DocumentParseBridgeException e) { + LOG.error("桥接服务查询异步解析任务状态失败: providerTaskId={}", taskId, e); throw e; } catch (Exception e) { + LOG.error("桥接服务查询异步解析任务状态异常: providerTaskId={}", taskId, e); throw DocumentParseBridgeException.taskFailed("聚合查询异步文档解析任务信息失败", e); } } + private int resolveTextLength(DocumentParsedResult result) { + String text = result == null ? null : result.getPreferredText(); + if (!StringUtils.hasText(text) && result != null) { + text = result.getMarkdown(); + } + if (!StringUtils.hasText(text) && result != null) { + text = result.getPlainText(); + } + return text == null ? 0 : text.length(); + } + private DocumentParseService requireService() { if (documentParseService == null) { throw DocumentParseBridgeException.serviceNotEnabled(); @@ -138,24 +184,32 @@ public class DocumentParseBridgeServiceImpl implements DocumentParseBridgeServic return documentParseService; } - private LoadedDocumentSource preparePdfSource(DocumentSourceRef source) { + private LoadedDocumentSource prepareSupportedSource(DocumentSourceRef source) { LoadedDocumentSource loadedSource = documentSourceLoader.load(source); - if (!isPdf(loadedSource)) { - throw DocumentParseBridgeException.unsupportedSource("统一文档解析桥接首版仅支持 PDF 文件"); + if (!isSupportedByBridge(loadedSource)) { + throw DocumentParseBridgeException.unsupportedSource("统一文档解析桥接当前仅支持 PDF、DOCX 文件"); } return loadedSource; } - private boolean isPdf(LoadedDocumentSource loadedSource) { + private boolean isSupportedByBridge(LoadedDocumentSource loadedSource) { String contentType = loadedSource.getContentType(); - if (StringUtils.hasText(contentType) && contentType.toLowerCase().contains("pdf")) { - return true; + if (StringUtils.hasText(contentType)) { + String normalizedContentType = contentType.toLowerCase(); + if (normalizedContentType.contains("pdf") + || normalizedContentType.contains("wordprocessingml.document")) { + return true; + } } String fileName = loadedSource.getFileName(); if (!StringUtils.hasText(fileName) || !fileName.contains(".")) { return false; } - return "pdf".equals(DocUtil.normalizeSuffix(DocUtil.getSuffix(fileName))); + String suffix = DocUtil.normalizeSuffix(DocUtil.getSuffix(fileName)); + if ("pdf".equals(suffix) || "docx".equals(suffix)) { + return true; + } + return false; } private ParseResult extractSingleResult(ParseResponse response, boolean resultFetchPhase) { diff --git a/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/documentimport/DocumentImportDtos.java b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/documentimport/DocumentImportDtos.java index 55225f8..d80ffa3 100644 --- a/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/documentimport/DocumentImportDtos.java +++ b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/documentimport/DocumentImportDtos.java @@ -93,6 +93,7 @@ public final class DocumentImportDtos { public static class PreviewRequest implements Serializable { private BigInteger knowledgeId; + private BigInteger documentId; private List files = new ArrayList(); public BigInteger getKnowledgeId() { @@ -103,6 +104,14 @@ public final class DocumentImportDtos { this.knowledgeId = knowledgeId; } + public BigInteger getDocumentId() { + return documentId; + } + + public void setDocumentId(BigInteger documentId) { + this.documentId = documentId; + } + public List getFiles() { return files; } @@ -114,6 +123,7 @@ public final class DocumentImportDtos { public static class CommitRequest implements Serializable { private BigInteger knowledgeId; + private BigInteger documentId; private List previewSessionIds = new ArrayList(); public BigInteger getKnowledgeId() { @@ -124,6 +134,14 @@ public final class DocumentImportDtos { this.knowledgeId = knowledgeId; } + public BigInteger getDocumentId() { + return documentId; + } + + public void setDocumentId(BigInteger documentId) { + this.documentId = documentId; + } + public List getPreviewSessionIds() { return previewSessionIds; } @@ -241,16 +259,158 @@ public final class DocumentImportDtos { } } + public static class PreviewSourceRange implements Serializable { + private Integer start; + private Integer end; + + public Integer getStart() { + return start; + } + + public void setStart(Integer start) { + this.start = start; + } + + public Integer getEnd() { + return end; + } + + public void setEnd(Integer end) { + this.end = end; + } + } + + public static class PreviewChunkResult implements Serializable { + private String answer; + private Integer charCount; + private String chunkId; + private String chunkType; + private String content; + private List headingPath = new ArrayList(); + private Integer partNo; + private Integer partTotal; + private String question; + private String sourceLabel; + private Integer tokenEstimate; + private List warnings = new ArrayList(); + private List sourceRanges = new ArrayList(); + + public String getAnswer() { + return answer; + } + + public void setAnswer(String answer) { + this.answer = answer; + } + + public Integer getCharCount() { + return charCount; + } + + public void setCharCount(Integer charCount) { + this.charCount = charCount; + } + + public String getChunkId() { + return chunkId; + } + + public void setChunkId(String chunkId) { + this.chunkId = chunkId; + } + + public String getChunkType() { + return chunkType; + } + + public void setChunkType(String chunkType) { + this.chunkType = chunkType; + } + + public String getContent() { + return content; + } + + public void setContent(String content) { + this.content = content; + } + + public List getHeadingPath() { + return headingPath; + } + + public void setHeadingPath(List headingPath) { + this.headingPath = headingPath; + } + + public Integer getPartNo() { + return partNo; + } + + public void setPartNo(Integer partNo) { + this.partNo = partNo; + } + + public Integer getPartTotal() { + return partTotal; + } + + public void setPartTotal(Integer partTotal) { + this.partTotal = partTotal; + } + + public String getQuestion() { + return question; + } + + public void setQuestion(String question) { + this.question = question; + } + + public String getSourceLabel() { + return sourceLabel; + } + + public void setSourceLabel(String sourceLabel) { + this.sourceLabel = sourceLabel; + } + + public Integer getTokenEstimate() { + return tokenEstimate; + } + + public void setTokenEstimate(Integer tokenEstimate) { + this.tokenEstimate = tokenEstimate; + } + + public List getWarnings() { + return warnings; + } + + public void setWarnings(List warnings) { + this.warnings = warnings; + } + + public List getSourceRanges() { + return sourceRanges; + } + + public void setSourceRanges(List sourceRanges) { + this.sourceRanges = sourceRanges; + } + } + public static class PreviewFileResult implements Serializable { private String previewSessionId; private String filePath; private String fileName; + private String normalizedContent; private String strategyCode; private String strategyLabel; private AnalysisResult analysis; private Integer totalChunks; private Integer totalWarnings; - private List chunks = new ArrayList(); + private List chunks = new ArrayList(); public String getPreviewSessionId() { return previewSessionId; @@ -276,6 +436,14 @@ public final class DocumentImportDtos { this.fileName = fileName; } + public String getNormalizedContent() { + return normalizedContent; + } + + public void setNormalizedContent(String normalizedContent) { + this.normalizedContent = normalizedContent; + } + public String getStrategyCode() { return strategyCode; } @@ -316,11 +484,11 @@ public final class DocumentImportDtos { this.totalWarnings = totalWarnings; } - public List getChunks() { + public List getChunks() { return chunks; } - public void setChunks(List chunks) { + public void setChunks(List chunks) { this.chunks = chunks; } } @@ -454,6 +622,7 @@ public final class DocumentImportDtos { public static class PreviewSession implements Serializable { private String sessionId; private BigInteger knowledgeId; + private BigInteger documentId; private String filePath; private String fileName; private String sourceFormat; @@ -480,6 +649,14 @@ public final class DocumentImportDtos { this.knowledgeId = knowledgeId; } + public BigInteger getDocumentId() { + return documentId; + } + + public void setDocumentId(BigInteger documentId) { + this.documentId = documentId; + } + public String getFilePath() { return filePath; } @@ -552,4 +729,265 @@ public final class DocumentImportDtos { this.createdAt = createdAt; } } + + public static class TaskCreateRequest implements Serializable { + private BigInteger knowledgeId; + private String filePath; + private String fileName; + + public BigInteger getKnowledgeId() { + return knowledgeId; + } + + public void setKnowledgeId(BigInteger knowledgeId) { + this.knowledgeId = knowledgeId; + } + + public String getFilePath() { + return filePath; + } + + public void setFilePath(String filePath) { + this.filePath = filePath; + } + + public String getFileName() { + return fileName; + } + + public void setFileName(String fileName) { + this.fileName = fileName; + } + } + + public static class TaskCreateResponse implements Serializable { + private BigInteger documentId; + private BigInteger taskId; + private String processStatus; + + public BigInteger getDocumentId() { + return documentId; + } + + public void setDocumentId(BigInteger documentId) { + this.documentId = documentId; + } + + public BigInteger getTaskId() { + return taskId; + } + + public void setTaskId(BigInteger taskId) { + this.taskId = taskId; + } + + public String getProcessStatus() { + return processStatus; + } + + public void setProcessStatus(String processStatus) { + this.processStatus = processStatus; + } + } + + public static class TaskDetailResponse implements Serializable { + private BigInteger taskId; + private BigInteger documentId; + private BigInteger knowledgeId; + private String phase; + private String status; + private String processStatus; + private Integer progressPercent; + private Integer totalChunks; + private Integer completedChunks; + private Integer failedChunks; + private String providerTaskId; + private String errorSummary; + private Date startedAt; + private Date finishedAt; + + public BigInteger getTaskId() { + return taskId; + } + + public void setTaskId(BigInteger taskId) { + this.taskId = taskId; + } + + public BigInteger getDocumentId() { + return documentId; + } + + public void setDocumentId(BigInteger documentId) { + this.documentId = documentId; + } + + public BigInteger getKnowledgeId() { + return knowledgeId; + } + + public void setKnowledgeId(BigInteger knowledgeId) { + this.knowledgeId = knowledgeId; + } + + public String getPhase() { + return phase; + } + + public void setPhase(String phase) { + this.phase = phase; + } + + public String getStatus() { + return status; + } + + public void setStatus(String status) { + this.status = status; + } + + public String getProcessStatus() { + return processStatus; + } + + public void setProcessStatus(String processStatus) { + this.processStatus = processStatus; + } + + public Integer getProgressPercent() { + return progressPercent; + } + + public void setProgressPercent(Integer progressPercent) { + this.progressPercent = progressPercent; + } + + public Integer getTotalChunks() { + return totalChunks; + } + + public void setTotalChunks(Integer totalChunks) { + this.totalChunks = totalChunks; + } + + public Integer getCompletedChunks() { + return completedChunks; + } + + public void setCompletedChunks(Integer completedChunks) { + this.completedChunks = completedChunks; + } + + public Integer getFailedChunks() { + return failedChunks; + } + + public void setFailedChunks(Integer failedChunks) { + this.failedChunks = failedChunks; + } + + public String getProviderTaskId() { + return providerTaskId; + } + + public void setProviderTaskId(String providerTaskId) { + this.providerTaskId = providerTaskId; + } + + public String getErrorSummary() { + return errorSummary; + } + + public void setErrorSummary(String errorSummary) { + this.errorSummary = errorSummary; + } + + public Date getStartedAt() { + return startedAt; + } + + public void setStartedAt(Date startedAt) { + this.startedAt = startedAt; + } + + public Date getFinishedAt() { + return finishedAt; + } + + public void setFinishedAt(Date finishedAt) { + this.finishedAt = finishedAt; + } + } + + public static class TaskStartIndexRequest implements Serializable { + private BigInteger knowledgeId; + private BigInteger documentId; + private String previewSessionId; + + public BigInteger getKnowledgeId() { + return knowledgeId; + } + + public void setKnowledgeId(BigInteger knowledgeId) { + this.knowledgeId = knowledgeId; + } + + public BigInteger getDocumentId() { + return documentId; + } + + public void setDocumentId(BigInteger documentId) { + this.documentId = documentId; + } + + public String getPreviewSessionId() { + return previewSessionId; + } + + public void setPreviewSessionId(String previewSessionId) { + this.previewSessionId = previewSessionId; + } + } + + public static class TaskStartIndexResponse implements Serializable { + private BigInteger taskId; + private String processStatus; + + public BigInteger getTaskId() { + return taskId; + } + + public void setTaskId(BigInteger taskId) { + this.taskId = taskId; + } + + public String getProcessStatus() { + return processStatus; + } + + public void setProcessStatus(String processStatus) { + this.processStatus = processStatus; + } + } + + public static class TaskRetryRequest implements Serializable { + private BigInteger knowledgeId; + private BigInteger documentId; + + public BigInteger getKnowledgeId() { + return knowledgeId; + } + + public void setKnowledgeId(BigInteger knowledgeId) { + this.knowledgeId = knowledgeId; + } + + public BigInteger getDocumentId() { + return documentId; + } + + public void setDocumentId(BigInteger documentId) { + this.documentId = documentId; + } + } } diff --git a/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/documentimport/DocumentImportKeys.java b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/documentimport/DocumentImportKeys.java index 850fdb7..3025b49 100644 --- a/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/documentimport/DocumentImportKeys.java +++ b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/documentimport/DocumentImportKeys.java @@ -18,4 +18,8 @@ public final class DocumentImportKeys { public static final String KEY_DOCUMENT_ANALYSIS_SUMMARY = "splitter.analysisSummary"; public static final String KEY_DOCUMENT_SOURCE_FILE_EXT = "splitter.sourceFileExt"; public static final String KEY_DOCUMENT_PREVIEW_VERSION = "splitter.previewVersion"; + public static final String KEY_DOCUMENT_PARSE_BACKEND = "parse.backend"; + public static final String KEY_DOCUMENT_PARSE_METADATA = "parse.metadata"; + public static final String KEY_DOCUMENT_PARSE_WARNINGS = "parse.warnings"; + public static final String KEY_DOCUMENT_PROVIDER_TASK_ID = "parse.providerTaskId"; } diff --git a/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/documentimport/task/DocumentImportIndexTaskConsumer.java b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/documentimport/task/DocumentImportIndexTaskConsumer.java new file mode 100644 index 0000000..c276905 --- /dev/null +++ b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/documentimport/task/DocumentImportIndexTaskConsumer.java @@ -0,0 +1,76 @@ +package tech.easyflow.ai.documentimport.task; + +import com.alibaba.fastjson2.JSON; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.stereotype.Component; +import tech.easyflow.common.mq.config.MQProperties; +import tech.easyflow.common.mq.core.MQConsumerHandler; +import tech.easyflow.common.mq.core.MQMessage; +import tech.easyflow.common.mq.core.MQSubscription; + +import java.util.List; + +/** + * 文档向量化任务消费者。 + * + * @author Codex + * @since 2026-04-14 + */ +@Component +public class DocumentImportIndexTaskConsumer implements MQConsumerHandler { + + private static final Logger LOG = LoggerFactory.getLogger(DocumentImportIndexTaskConsumer.class); + + private final KnowledgeDocumentImportTaskAppService appService; + private final MQProperties mqProperties; + + public DocumentImportIndexTaskConsumer(KnowledgeDocumentImportTaskAppService appService, + MQProperties mqProperties) { + this.appService = appService; + this.mqProperties = mqProperties; + } + + @Override + public MQSubscription subscription() { + MQSubscription subscription = new MQSubscription(); + subscription.setTopic(DocumentImportTaskMqConstants.INDEX_TOPIC); + subscription.setConsumerGroup(DocumentImportTaskMqConstants.INDEX_GROUP); + subscription.setShardCount(resolveShardCount()); + return subscription; + } + + @Override + public void handle(List messages) { + LOG.info("文档向量化消费者收到消息批次: count={}", messages == null ? 0 : messages.size()); + for (MQMessage message : messages) { + DocumentImportTaskMessage event = JSON.parseObject(message.getBody(), DocumentImportTaskMessage.class); + if (event == null || event.getTaskId() == null) { + LOG.warn("文档向量化消费者跳过非法消息: streamMessageId={}, messageId={}", + message == null ? null : message.getStreamMessageId(), + message == null ? null : message.getMessageId()); + continue; + } + LOG.info("文档向量化消费者开始处理消息: taskId={}, messageId={}, streamMessageId={}", + event.getTaskId(), message.getMessageId(), message.getStreamMessageId()); + try { + appService.handleIndexTask(event.getTaskId()); + LOG.info("文档向量化消费者处理完成: taskId={}, messageId={}, streamMessageId={}", + event.getTaskId(), message.getMessageId(), message.getStreamMessageId()); + } catch (Exception exception) { + LOG.error("文档向量化消费者处理失败: taskId={}, messageId={}, streamMessageId={}", + event.getTaskId(), message.getMessageId(), message.getStreamMessageId(), exception); + throw exception; + } + } + } + + /** + * 向量化消费者需覆盖生产端的所有分片,避免消息落入未订阅分片。 + * + * @return 当前 Redis Stream 分片数 + */ + private int resolveShardCount() { + return Math.max(mqProperties.getRedis().getChatPersistShardCount(), 1); + } +} diff --git a/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/documentimport/task/DocumentImportIndexTaskProducer.java b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/documentimport/task/DocumentImportIndexTaskProducer.java new file mode 100644 index 0000000..8a12896 --- /dev/null +++ b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/documentimport/task/DocumentImportIndexTaskProducer.java @@ -0,0 +1,52 @@ +package tech.easyflow.ai.documentimport.task; + +import com.alibaba.fastjson2.JSON; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.stereotype.Service; +import tech.easyflow.common.mq.core.MQMessage; +import tech.easyflow.common.mq.core.MQProducer; + +import java.math.BigInteger; +import java.util.Date; + +/** + * 文档向量化任务消息生产者。 + * + * @author Codex + * @since 2026-04-14 + */ +@Service +public class DocumentImportIndexTaskProducer { + + private static final Logger LOG = LoggerFactory.getLogger(DocumentImportIndexTaskProducer.class); + + private final MQProducer mqProducer; + + public DocumentImportIndexTaskProducer(MQProducer mqProducer) { + this.mqProducer = mqProducer; + } + + /** + * 发送向量化任务消息。 + * + * @param taskId 任务 ID + */ + public void send(BigInteger taskId) { + DocumentImportTaskMessage event = new DocumentImportTaskMessage(); + event.setTaskId(taskId); + event.setOccurredAt(new Date()); + + MQMessage message = new MQMessage(); + message.setMessageId("index-" + taskId); + message.setTopic(DocumentImportTaskMqConstants.INDEX_TOPIC); + message.setKey(String.valueOf(taskId)); + message.setCreatedAt(event.getOccurredAt()); + message.setBody(JSON.toJSONString(event)); + LOG.info("准备投递文档向量化 MQ 消息: topic={}, taskId={}, messageId={}", + message.getTopic(), taskId, message.getMessageId()); + String recordId = mqProducer.send(message); + LOG.info("文档向量化 MQ 消息投递完成: topic={}, taskId={}, messageId={}, recordId={}", + message.getTopic(), taskId, message.getMessageId(), recordId); + } +} diff --git a/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/documentimport/task/DocumentImportParseMonitor.java b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/documentimport/task/DocumentImportParseMonitor.java new file mode 100644 index 0000000..956443a --- /dev/null +++ b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/documentimport/task/DocumentImportParseMonitor.java @@ -0,0 +1,33 @@ +package tech.easyflow.ai.documentimport.task; + +import org.springframework.scheduling.annotation.Scheduled; +import org.springframework.stereotype.Component; + +/** + * 知识库文档解析任务收敛器。 + * + *

该调度器只负责轮询运行中的桥接解析任务,不承担提交任务职责。

+ * + * @author Codex + * @since 2026-04-15 + */ +@Component +public class DocumentImportParseMonitor { + + private final KnowledgeDocumentImportTaskAppService appService; + + public DocumentImportParseMonitor(KnowledgeDocumentImportTaskAppService appService) { + this.appService = appService; + } + + /** + * 定时收敛运行中的桥接解析任务状态。 + */ + @Scheduled( + fixedDelayString = "${easyflow.ai.document-import.parse-monitor.fixed-delay:3000}", + initialDelayString = "${easyflow.ai.document-import.parse-monitor.initial-delay:5000}" + ) + public void reconcileRunningParseTasks() { + appService.monitorRunningParseTasks(); + } +} diff --git a/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/documentimport/task/DocumentImportParseTaskConsumer.java b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/documentimport/task/DocumentImportParseTaskConsumer.java new file mode 100644 index 0000000..a4981c4 --- /dev/null +++ b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/documentimport/task/DocumentImportParseTaskConsumer.java @@ -0,0 +1,76 @@ +package tech.easyflow.ai.documentimport.task; + +import com.alibaba.fastjson2.JSON; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.stereotype.Component; +import tech.easyflow.common.mq.config.MQProperties; +import tech.easyflow.common.mq.core.MQConsumerHandler; +import tech.easyflow.common.mq.core.MQMessage; +import tech.easyflow.common.mq.core.MQSubscription; + +import java.util.List; + +/** + * 文档解析任务消费者。 + * + * @author Codex + * @since 2026-04-14 + */ +@Component +public class DocumentImportParseTaskConsumer implements MQConsumerHandler { + + private static final Logger LOG = LoggerFactory.getLogger(DocumentImportParseTaskConsumer.class); + + private final KnowledgeDocumentImportTaskAppService appService; + private final MQProperties mqProperties; + + public DocumentImportParseTaskConsumer(KnowledgeDocumentImportTaskAppService appService, + MQProperties mqProperties) { + this.appService = appService; + this.mqProperties = mqProperties; + } + + @Override + public MQSubscription subscription() { + MQSubscription subscription = new MQSubscription(); + subscription.setTopic(DocumentImportTaskMqConstants.PARSE_TOPIC); + subscription.setConsumerGroup(DocumentImportTaskMqConstants.PARSE_GROUP); + subscription.setShardCount(resolveShardCount()); + return subscription; + } + + @Override + public void handle(List messages) { + LOG.info("文档解析消费者收到消息批次: count={}", messages == null ? 0 : messages.size()); + for (MQMessage message : messages) { + DocumentImportTaskMessage event = JSON.parseObject(message.getBody(), DocumentImportTaskMessage.class); + if (event == null || event.getTaskId() == null) { + LOG.warn("文档解析消费者跳过非法消息: streamMessageId={}, messageId={}", + message == null ? null : message.getStreamMessageId(), + message == null ? null : message.getMessageId()); + continue; + } + LOG.info("文档解析消费者开始处理消息: taskId={}, messageId={}, streamMessageId={}", + event.getTaskId(), message.getMessageId(), message.getStreamMessageId()); + try { + appService.handleParseTask(event.getTaskId()); + LOG.info("文档解析消费者处理完成: taskId={}, messageId={}, streamMessageId={}", + event.getTaskId(), message.getMessageId(), message.getStreamMessageId()); + } catch (Exception exception) { + LOG.error("文档解析消费者处理失败: taskId={}, messageId={}, streamMessageId={}", + event.getTaskId(), message.getMessageId(), message.getStreamMessageId(), exception); + throw exception; + } + } + } + + /** + * 解析消费者需覆盖生产端的所有分片,避免消息落入未订阅分片。 + * + * @return 当前 Redis Stream 分片数 + */ + private int resolveShardCount() { + return Math.max(mqProperties.getRedis().getChatPersistShardCount(), 1); + } +} diff --git a/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/documentimport/task/DocumentImportParseTaskProducer.java b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/documentimport/task/DocumentImportParseTaskProducer.java new file mode 100644 index 0000000..26e7ea2 --- /dev/null +++ b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/documentimport/task/DocumentImportParseTaskProducer.java @@ -0,0 +1,52 @@ +package tech.easyflow.ai.documentimport.task; + +import com.alibaba.fastjson2.JSON; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.stereotype.Service; +import tech.easyflow.common.mq.core.MQMessage; +import tech.easyflow.common.mq.core.MQProducer; + +import java.math.BigInteger; +import java.util.Date; + +/** + * 文档解析任务消息生产者。 + * + * @author Codex + * @since 2026-04-14 + */ +@Service +public class DocumentImportParseTaskProducer { + + private static final Logger LOG = LoggerFactory.getLogger(DocumentImportParseTaskProducer.class); + + private final MQProducer mqProducer; + + public DocumentImportParseTaskProducer(MQProducer mqProducer) { + this.mqProducer = mqProducer; + } + + /** + * 发送解析任务消息。 + * + * @param taskId 任务 ID + */ + public void send(BigInteger taskId) { + DocumentImportTaskMessage event = new DocumentImportTaskMessage(); + event.setTaskId(taskId); + event.setOccurredAt(new Date()); + + MQMessage message = new MQMessage(); + message.setMessageId("parse-" + taskId); + message.setTopic(DocumentImportTaskMqConstants.PARSE_TOPIC); + message.setKey(String.valueOf(taskId)); + message.setCreatedAt(event.getOccurredAt()); + message.setBody(JSON.toJSONString(event)); + LOG.info("准备投递文档解析 MQ 消息: topic={}, taskId={}, messageId={}", + message.getTopic(), taskId, message.getMessageId()); + String recordId = mqProducer.send(message); + LOG.info("文档解析 MQ 消息投递完成: topic={}, taskId={}, messageId={}, recordId={}", + message.getTopic(), taskId, message.getMessageId(), recordId); + } +} diff --git a/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/documentimport/task/DocumentImportTaskMessage.java b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/documentimport/task/DocumentImportTaskMessage.java new file mode 100644 index 0000000..5015aad --- /dev/null +++ b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/documentimport/task/DocumentImportTaskMessage.java @@ -0,0 +1,33 @@ +package tech.easyflow.ai.documentimport.task; + +import java.io.Serializable; +import java.math.BigInteger; +import java.util.Date; + +/** + * 文档导入任务消息。 + * + * @author Codex + * @since 2026-04-14 + */ +public class DocumentImportTaskMessage implements Serializable { + + private BigInteger taskId; + private Date occurredAt; + + public BigInteger getTaskId() { + return taskId; + } + + public void setTaskId(BigInteger taskId) { + this.taskId = taskId; + } + + public Date getOccurredAt() { + return occurredAt; + } + + public void setOccurredAt(Date occurredAt) { + this.occurredAt = occurredAt; + } +} diff --git a/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/documentimport/task/DocumentImportTaskMqConstants.java b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/documentimport/task/DocumentImportTaskMqConstants.java new file mode 100644 index 0000000..0ae8864 --- /dev/null +++ b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/documentimport/task/DocumentImportTaskMqConstants.java @@ -0,0 +1,18 @@ +package tech.easyflow.ai.documentimport.task; + +/** + * 文档导入任务 MQ 常量。 + * + * @author Codex + * @since 2026-04-14 + */ +public final class DocumentImportTaskMqConstants { + + private DocumentImportTaskMqConstants() { + } + + public static final String PARSE_TOPIC = "knowledge-document-parse"; + public static final String PARSE_GROUP = "knowledge-document-parse-group"; + public static final String INDEX_TOPIC = "knowledge-document-index"; + public static final String INDEX_GROUP = "knowledge-document-index-group"; +} diff --git a/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/documentimport/task/DocumentImportTaskStatusStreamService.java b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/documentimport/task/DocumentImportTaskStatusStreamService.java new file mode 100644 index 0000000..e79d849 --- /dev/null +++ b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/documentimport/task/DocumentImportTaskStatusStreamService.java @@ -0,0 +1,156 @@ +package tech.easyflow.ai.documentimport.task; + +import org.springframework.http.MediaType; +import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor; +import org.springframework.stereotype.Service; +import org.springframework.transaction.support.TransactionSynchronization; +import org.springframework.transaction.support.TransactionSynchronizationManager; +import org.springframework.web.servlet.mvc.method.annotation.SseEmitter; +import tech.easyflow.ai.entity.Document; +import tech.easyflow.ai.mapper.DocumentMapper; +import tech.easyflow.common.web.exceptions.BusinessException; + +import javax.annotation.Resource; +import java.math.BigInteger; +import java.time.Duration; +import java.util.LinkedHashMap; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; + +/** + * 知识库文档任务状态 SSE 推送服务。 + * + * @author Codex + * @since 2026-04-15 + */ +@Service +public class DocumentImportTaskStatusStreamService { + + private static final long SSE_TIMEOUT_MS = Duration.ofMinutes(30).toMillis(); + + private final Map> knowledgeEmitters = new ConcurrentHashMap>(); + + @Resource + private DocumentMapper documentMapper; + + @Resource(name = "sseThreadPool") + private ThreadPoolTaskExecutor sseThreadPool; + + /** + * 订阅知识库文档任务状态流。 + * + * @param knowledgeId 知识库 ID + * @return SSE 连接 + */ + public SseEmitter subscribe(BigInteger knowledgeId) { + if (knowledgeId == null) { + throw new BusinessException("知识库id不能为空"); + } + String topicKey = toTopicKey(knowledgeId); + SseEmitter emitter = new SseEmitter(SSE_TIMEOUT_MS); + knowledgeEmitters.computeIfAbsent(topicKey, key -> ConcurrentHashMap.newKeySet()).add(emitter); + emitter.onCompletion(() -> removeEmitter(topicKey, emitter)); + emitter.onTimeout(() -> { + removeEmitter(topicKey, emitter); + emitter.complete(); + }); + emitter.onError(error -> removeEmitter(topicKey, emitter)); + sendAsync(topicKey, emitter, "connected", buildConnectedPayload(knowledgeId)); + return emitter; + } + + /** + * 在事务提交后推送文档任务状态变更。 + * + * @param documentId 文档 ID + */ + public void publishAfterCommit(BigInteger documentId) { + if (documentId == null) { + return; + } + Runnable publishAction = () -> publishNow(documentId); + if (TransactionSynchronizationManager.isSynchronizationActive() + && TransactionSynchronizationManager.isActualTransactionActive()) { + TransactionSynchronizationManager.registerSynchronization(new TransactionSynchronization() { + @Override + public void afterCommit() { + publishAction.run(); + } + }); + return; + } + publishAction.run(); + } + + private void publishNow(BigInteger documentId) { + Document document = documentMapper.selectOneById(documentId); + if (document == null || document.getCollectionId() == null) { + return; + } + String topicKey = toTopicKey(document.getCollectionId()); + Set emitters = knowledgeEmitters.get(topicKey); + if (emitters == null || emitters.isEmpty()) { + return; + } + Map payload = buildDocumentPayload(document); + for (SseEmitter emitter : emitters) { + sendAsync(topicKey, emitter, "document-status", payload); + } + } + + private Map buildConnectedPayload(BigInteger knowledgeId) { + Map payload = new LinkedHashMap(); + payload.put("knowledgeId", knowledgeId.toString()); + payload.put("type", "connected"); + return payload; + } + + private Map buildDocumentPayload(Document document) { + Map payload = new LinkedHashMap(); + payload.put("type", "document-status"); + payload.put("knowledgeId", document.getCollectionId() == null ? null : document.getCollectionId().toString()); + payload.put("documentId", document.getId() == null ? null : document.getId().toString()); + payload.put("processStatus", document.getProcessStatus()); + payload.put("progressPercent", document.getProgressPercent()); + payload.put("totalChunks", document.getTotalChunks()); + payload.put("completedChunks", document.getCompletedChunks()); + payload.put("failedChunks", document.getFailedChunks()); + payload.put("lastTaskError", document.getLastTaskError()); + payload.put("taskModifiedAt", document.getTaskModifiedAt()); + return payload; + } + + private void sendAsync(String topicKey, SseEmitter emitter, String eventName, Map payload) { + sseThreadPool.execute(() -> { + try { + emitter.send( + SseEmitter.event() + .name(eventName) + .data(payload, MediaType.APPLICATION_JSON) + ); + } catch (Exception e) { + removeEmitter(topicKey, emitter); + try { + emitter.completeWithError(e); + } catch (Exception ignored) { + } + } + }); + } + + private void removeEmitter(String topicKey, SseEmitter emitter) { + Set emitters = knowledgeEmitters.get(topicKey); + if (emitters == null) { + return; + } + emitters.remove(emitter); + if (emitters.isEmpty()) { + knowledgeEmitters.remove(topicKey); + } + } + + private String toTopicKey(BigInteger knowledgeId) { + return String.valueOf(knowledgeId); + } +} diff --git a/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/documentimport/task/KnowledgeDocumentImportTaskAppService.java b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/documentimport/task/KnowledgeDocumentImportTaskAppService.java new file mode 100644 index 0000000..f24b62d --- /dev/null +++ b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/documentimport/task/KnowledgeDocumentImportTaskAppService.java @@ -0,0 +1,1591 @@ +package tech.easyflow.ai.documentimport.task; + +import cn.dev33.satoken.stp.StpUtil; +import com.easyagents.core.document.Document; +import com.easyagents.core.model.embedding.EmbeddingModel; +import com.easyagents.core.model.embedding.EmbeddingOptions; +import com.easyagents.core.store.DocumentStore; +import com.easyagents.core.store.StoreOptions; +import com.easyagents.core.store.StoreResult; +import com.easyagents.rag.core.RagChunk; +import com.easyagents.rag.core.RagDefaults; +import com.easyagents.rag.core.RagStrategyCodes; +import com.easyagents.rag.ingestion.RagIngestionService; +import com.easyagents.rag.ingestion.model.AnalysisResult; +import com.easyagents.rag.ingestion.model.StrategyConfig; +import com.easyagents.search.engine.service.DocumentSearcher; +import com.easyagents.search.engine.service.KeywordSearchMetadataKeys; +import com.mybatisflex.core.keygen.impl.FlexIDKeyGenerator; +import com.mybatisflex.core.query.QueryWrapper; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.context.annotation.Lazy; +import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor; +import org.springframework.stereotype.Service; +import org.springframework.transaction.support.TransactionSynchronization; +import org.springframework.transaction.support.TransactionSynchronizationManager; +import org.springframework.transaction.annotation.Transactional; +import tech.easyflow.ai.config.SearcherFactory; +import tech.easyflow.ai.document.model.DocumentParseScenario; +import tech.easyflow.ai.document.model.DocumentParseTaskInfo; +import tech.easyflow.ai.document.model.DocumentParsedResult; +import tech.easyflow.ai.document.model.DocumentSourceRef; +import tech.easyflow.ai.document.service.DocumentParseBridgeService; +import tech.easyflow.ai.documentimport.DocumentImportDtos; +import tech.easyflow.ai.documentimport.DocumentImportKeys; +import tech.easyflow.ai.documentimport.DocumentImportPreviewService; +import tech.easyflow.ai.entity.DocumentChunk; +import tech.easyflow.ai.entity.DocumentCollection; +import tech.easyflow.ai.entity.DocumentImportTask; +import tech.easyflow.ai.enums.DocumentImportTaskPhase; +import tech.easyflow.ai.enums.DocumentImportTaskStatus; +import tech.easyflow.ai.enums.DocumentProcessStatus; +import tech.easyflow.ai.entity.Model; +import tech.easyflow.ai.mapper.DocumentChunkMapper; +import tech.easyflow.ai.mapper.DocumentImportTaskMapper; +import tech.easyflow.ai.mapper.DocumentMapper; +import tech.easyflow.ai.service.DocumentChunkService; +import tech.easyflow.ai.service.DocumentCollectionService; +import tech.easyflow.ai.service.DocumentImportTaskService; +import tech.easyflow.ai.service.ModelService; +import tech.easyflow.common.domain.Result; +import tech.easyflow.common.filestorage.FileStorageService; +import tech.easyflow.common.util.FileUtil; +import tech.easyflow.common.util.StringUtil; +import tech.easyflow.common.web.exceptions.BusinessException; + +import javax.annotation.Resource; +import java.io.IOException; +import java.io.InputStream; +import java.math.BigInteger; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.Date; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.Objects; + +/** + * 知识库文档任务化导入应用服务。 + * + * @author Codex + * @since 2026-04-14 + */ +@Service +public class KnowledgeDocumentImportTaskAppService { + + private static final Logger LOG = LoggerFactory.getLogger(KnowledgeDocumentImportTaskAppService.class); + private static final int PARSE_MONITOR_BATCH_SIZE = 20; + private static final int INDEX_BATCH_SIZE = 20; + private static final String SOURCE_RANGES_KEY = "sourceRanges"; + + @Resource + private DocumentMapper documentMapper; + + @Resource + private DocumentChunkMapper documentChunkMapper; + + @Resource + private DocumentImportTaskMapper documentImportTaskMapper; + + @Resource + private DocumentCollectionService knowledgeService; + + @Resource + private ModelService modelService; + + @Resource + private DocumentChunkService documentChunkService; + + @Resource + private DocumentImportTaskService documentImportTaskService; + + @Resource + private DocumentImportPreviewService documentImportPreviewService; + + @Resource + private RagIngestionService ragIngestionService; + + @Resource + private DocumentParseBridgeService documentParseBridgeService; + + @Resource + private DocumentImportParseTaskProducer parseTaskProducer; + + @Resource + private DocumentImportIndexTaskProducer indexTaskProducer; + + @Resource(name = "default") + private FileStorageService storageService; + + @Resource + private SearcherFactory searcherFactory; + + @Resource(name = "documentImportTaskExecutor") + private ThreadPoolTaskExecutor documentImportTaskExecutor; + + @Resource + private DocumentImportTaskStatusStreamService documentImportTaskStatusStreamService; + + @Autowired + @Lazy + private KnowledgeDocumentImportTaskAppService selfProxy; + + /** + * 上传后创建解析任务。 + * + * @param request 创建请求 + * @return 任务结果 + */ + @Transactional + public Result createImportTask(DocumentImportDtos.TaskCreateRequest request) { + DocumentCollection knowledge = assertDocumentCollection(request.getKnowledgeId()); + if (!StringUtil.hasText(request.getFilePath()) || !StringUtil.hasText(request.getFileName())) { + throw new BusinessException("文件信息不完整"); + } + String fileExt = normalizeFileExtension(request.getFileName(), request.getFilePath()); + assertSupportedImportFile(fileExt); + + Date now = new Date(); + tech.easyflow.ai.entity.Document document = new tech.easyflow.ai.entity.Document(); + document.setId(generateId(document)); + document.setCollectionId(knowledge.getId()); + document.setDocumentPath(request.getFilePath()); + document.setTitle(request.getFileName()); + document.setDocumentType(fileExt); + document.setCreated(now); + document.setModified(now); + document.setCreatedBy(resolveOperatorId()); + document.setModifiedBy(resolveOperatorId()); + document.setProcessStatus(DocumentProcessStatus.PARSING.name()); + document.setTotalChunks(0); + document.setCompletedChunks(0); + document.setFailedChunks(0); + document.setProgressPercent(0); + document.setTaskModifiedAt(now); + document.setOptions(buildInitialOptions(fileExt)); + documentMapper.insert(document); + + DocumentImportTask task = createTask(document, DocumentImportTaskPhase.PARSE, buildFilePayload(request)); + LOG.info("文档导入任务已创建: knowledgeId={}, documentId={}, taskId={}, fileName={}, fileExt={}, parseMode={}", + knowledge.getId(), + document.getId(), + task.getId(), + document.getTitle(), + fileExt, + shouldUseDocumentParseBridge(fileExt) ? "async-bridge" : "sync-default"); + documentImportTaskStatusStreamService.publishAfterCommit(document.getId()); + if (shouldUseDocumentParseBridge(fileExt)) { + LOG.info("文档解析任务准备通过 MQ 异步投递: knowledgeId={}, documentId={}, taskId={}, fileExt={}", + knowledge.getId(), document.getId(), task.getId(), fileExt); + dispatchParseTaskAfterCommit(task.getId()); + scheduleParseTaskFallback(task.getId()); + } else { + LOG.info("文档解析任务准备同步执行: knowledgeId={}, documentId={}, taskId={}, fileExt={}", + knowledge.getId(), document.getId(), task.getId(), fileExt); + selfProxy.handleParseTask(task.getId()); + document = requireDocument(document.getId()); + } + + DocumentImportDtos.TaskCreateResponse response = new DocumentImportDtos.TaskCreateResponse(); + response.setDocumentId(document.getId()); + response.setTaskId(task.getId()); + response.setProcessStatus(document.getProcessStatus()); + return Result.ok(response); + } + + /** + * 查询任务详情。 + * + * @param taskId 任务 ID + * @return 任务详情 + */ + public Result getImportTaskDetail(BigInteger taskId) { + DocumentImportTask task = requireTask(taskId); + tech.easyflow.ai.entity.Document document = requireDocument(task.getDocumentId()); + DocumentImportDtos.TaskDetailResponse response = new DocumentImportDtos.TaskDetailResponse(); + response.setTaskId(task.getId()); + response.setDocumentId(task.getDocumentId()); + response.setKnowledgeId(task.getKnowledgeId()); + response.setPhase(task.getPhase()); + response.setStatus(task.getStatus()); + response.setProcessStatus(document.getProcessStatus()); + response.setProgressPercent(defaultInt(document.getProgressPercent())); + response.setTotalChunks(defaultInt(document.getTotalChunks())); + response.setCompletedChunks(defaultInt(document.getCompletedChunks())); + response.setFailedChunks(defaultInt(document.getFailedChunks())); + response.setProviderTaskId(task.getProviderTaskId()); + response.setErrorSummary(StringUtil.hasText(document.getLastTaskError()) ? document.getLastTaskError() : task.getErrorSummary()); + response.setStartedAt(task.getStartedAt()); + response.setFinishedAt(task.getFinishedAt()); + return Result.ok(response); + } + + /** + * 生成单文档分块预览。 + * + * @param request 预览请求 + * @return 预览结果 + */ + @Transactional + public Result previewImportTask(DocumentImportDtos.PreviewRequest request) { + if (request.getDocumentId() == null) { + throw new BusinessException("文档ID不能为空"); + } + DocumentCollection knowledge = assertDocumentCollection(request.getKnowledgeId()); + tech.easyflow.ai.entity.Document document = requireDocumentForKnowledge(request.getDocumentId(), knowledge.getId()); + ensurePreviewable(document); + + StrategyConfig requestedStrategy = null; + if (request.getFiles() != null && !request.getFiles().isEmpty()) { + requestedStrategy = request.getFiles().get(0).getStrategyConfig(); + } + + DocumentImportDtos.PreviewSession session = buildPreviewSessionForDocument(knowledge, document, requestedStrategy); + String sessionId = documentImportPreviewService.put(session); + + DocumentImportDtos.PreviewFileResult item = buildPreviewFileResult(document, session, sessionId); + + DocumentImportDtos.PreviewResponse response = new DocumentImportDtos.PreviewResponse(); + response.setItems(List.of(item)); + response.setTotalFiles(1); + response.setTotalChunks(session.getDocumentChunks().size()); + return Result.ok(response); + } + + /** + * 启动向量化任务。 + * + * @param request 启动请求 + * @return 启动结果 + */ + @Transactional + public Result startIndexTask(DocumentImportDtos.TaskStartIndexRequest request) { + if (request.getDocumentId() == null) { + throw new BusinessException("文档ID不能为空"); + } + DocumentCollection knowledge = assertDocumentCollection(request.getKnowledgeId()); + tech.easyflow.ai.entity.Document document = requireDocumentForKnowledge(request.getDocumentId(), knowledge.getId()); + if (!allowIndexStart(document)) { + throw new BusinessException("当前文档状态不允许开始向量化"); + } + + DocumentImportDtos.PreviewSession session = resolveIndexPreviewSession(knowledge, document, request.getPreviewSessionId()); + int totalChunks = session.getDocumentChunks().size(); + if (totalChunks <= 0) { + throw new BusinessException("未生成有效分块,无法开始向量化"); + } + + mergeDocumentPreviewOptions(document, session); + updateDocumentIndexing(document, totalChunks); + DocumentImportTask task = createTask(document, DocumentImportTaskPhase.INDEX, Map.of( + "previewSessionId", session.getSessionId(), + "totalChunks", totalChunks + )); + LOG.info("文档向量化任务已创建: knowledgeId={}, documentId={}, taskId={}, previewSessionId={}, totalChunks={}", + knowledge.getId(), document.getId(), task.getId(), session.getSessionId(), totalChunks); + dispatchIndexTaskAfterCommit(task.getId()); + scheduleIndexTaskFallback(task.getId()); + + DocumentImportDtos.TaskStartIndexResponse response = new DocumentImportDtos.TaskStartIndexResponse(); + response.setTaskId(task.getId()); + response.setProcessStatus(DocumentProcessStatus.INDEXING.name()); + return Result.ok(response); + } + + /** + * 重试解析任务。 + * + * @param request 重试请求 + * @return 任务结果 + */ + @Transactional + public Result retryParseTask(DocumentImportDtos.TaskRetryRequest request) { + DocumentCollection knowledge = assertDocumentCollection(request.getKnowledgeId()); + tech.easyflow.ai.entity.Document document = requireDocumentForKnowledge(request.getDocumentId(), knowledge.getId()); + if (!DocumentProcessStatus.PARSE_FAILED.name().equals(document.getProcessStatus())) { + throw new BusinessException("当前文档不支持重试解析"); + } + String fileExt = normalizeFileExtension(document.getTitle(), document.getDocumentPath()); + resetDocumentForParseRetry(document); + DocumentImportTask task = createTask(document, DocumentImportTaskPhase.PARSE, buildDocumentPayload(document)); + if (shouldUseDocumentParseBridge(fileExt)) { + dispatchParseTaskAfterCommit(task.getId()); + scheduleParseTaskFallback(task.getId()); + return Result.ok(buildTaskStartResponse(task, DocumentProcessStatus.PARSING)); + } + selfProxy.handleParseTask(task.getId()); + tech.easyflow.ai.entity.Document current = requireDocument(document.getId()); + return Result.ok(buildTaskStartResponse(task, DocumentProcessStatus.valueOf(current.getProcessStatus()))); + } + + /** + * 重试向量化任务。 + * + * @param request 重试请求 + * @return 任务结果 + */ + @Transactional + public Result retryIndexTask(DocumentImportDtos.TaskRetryRequest request) { + DocumentImportDtos.TaskStartIndexRequest startRequest = new DocumentImportDtos.TaskStartIndexRequest(); + startRequest.setKnowledgeId(request.getKnowledgeId()); + startRequest.setDocumentId(request.getDocumentId()); + return startIndexTask(startRequest); + } + + /** + * 处理解析任务消息。 + * + * @param taskId 任务 ID + */ + @Transactional + public void handleParseTask(BigInteger taskId) { + DocumentImportTask task = requireTask(taskId); + if (!DocumentImportTaskPhase.PARSE.name().equals(task.getPhase())) { + LOG.warn("忽略非解析阶段任务: taskId={}, phase={}", taskId, task.getPhase()); + return; + } + if (DocumentImportTaskStatus.COMPLETED.name().equals(task.getStatus()) + || DocumentImportTaskStatus.FAILED.name().equals(task.getStatus())) { + LOG.info("解析任务已结束,跳过重复处理: taskId={}, status={}", taskId, task.getStatus()); + return; + } + if (!tryMarkTaskRunning(taskId)) { + LOG.info("解析任务未抢占成功,跳过本次执行: taskId={}", taskId); + return; + } + task = requireTask(taskId); + + tech.easyflow.ai.entity.Document document = requireDocument(task.getDocumentId()); + LOG.info("开始执行文档解析任务: taskId={}, documentId={}, knowledgeId={}, currentStatus={}", + taskId, document.getId(), task.getKnowledgeId(), document.getProcessStatus()); + + try { + String fileExt = normalizeFileExtension(document.getTitle(), document.getDocumentPath()); + if (shouldUseDocumentParseBridge(fileExt)) { + handleBridgeParse(task, document, fileExt); + } else { + handleDefaultParse(task, document, fileExt); + } + } catch (Exception e) { + LOG.error("文档解析任务失败: taskId={}, documentId={}", taskId, document.getId(), e); + markParseFailed(task, document, truncateError(e.getMessage())); + } + } + + /** + * 收敛运行中的桥接解析任务。 + */ + public void monitorRunningParseTasks() { + QueryWrapper queryWrapper = QueryWrapper.create() + .eq(DocumentImportTask::getPhase, DocumentImportTaskPhase.PARSE.name()) + .eq(DocumentImportTask::getStatus, DocumentImportTaskStatus.RUNNING.name()) + .orderBy(DocumentImportTask::getModified, true) + .limit(PARSE_MONITOR_BATCH_SIZE); + List runningTasks = documentImportTaskService.list(queryWrapper); + if (runningTasks == null || runningTasks.isEmpty()) { + return; + } + for (DocumentImportTask task : runningTasks) { + if (task == null || task.getId() == null || !StringUtil.hasText(task.getProviderTaskId())) { + continue; + } + try { + selfProxy.handleRunningParseTask(task.getId()); + } catch (Exception e) { + LOG.error("收敛运行中的文档解析任务失败: taskId={}", task.getId(), e); + } + } + } + + /** + * 单次收敛指定解析任务的桥接状态。 + * + * @param taskId 任务 ID + */ + @Transactional + public void handleRunningParseTask(BigInteger taskId) { + DocumentImportTask task = requireTask(taskId); + if (!DocumentImportTaskPhase.PARSE.name().equals(task.getPhase())) { + return; + } + if (!DocumentImportTaskStatus.RUNNING.name().equals(task.getStatus()) + || !StringUtil.hasText(task.getProviderTaskId())) { + return; + } + tech.easyflow.ai.entity.Document document = requireDocument(task.getDocumentId()); + if (!DocumentProcessStatus.PARSING.name().equals(document.getProcessStatus())) { + return; + } + String fileExt = normalizeFileExtension(document.getTitle(), document.getDocumentPath()); + if (!shouldUseDocumentParseBridge(fileExt)) { + return; + } + try { + syncBridgeParseTask(task, document, fileExt); + } catch (Exception e) { + LOG.error("文档解析任务收敛失败: taskId={}, documentId={}", taskId, document.getId(), e); + markParseFailed(task, document, truncateError(e.getMessage())); + } + } + + /** + * 处理向量化任务消息。 + * + * @param taskId 任务 ID + */ + @Transactional + public void handleIndexTask(BigInteger taskId) { + DocumentImportTask task = requireTask(taskId); + if (!DocumentImportTaskPhase.INDEX.name().equals(task.getPhase())) { + LOG.warn("忽略非向量化阶段任务: taskId={}, phase={}", taskId, task.getPhase()); + return; + } + if (DocumentImportTaskStatus.COMPLETED.name().equals(task.getStatus()) + || DocumentImportTaskStatus.FAILED.name().equals(task.getStatus())) { + LOG.info("向量化任务已结束,跳过重复处理: taskId={}, status={}", taskId, task.getStatus()); + return; + } + if (!tryMarkTaskRunning(taskId)) { + LOG.info("向量化任务未抢占成功,跳过本次执行: taskId={}", taskId); + return; + } + task = requireTask(taskId); + + tech.easyflow.ai.entity.Document document = requireDocument(task.getDocumentId()); + LOG.info("开始执行文档向量化任务: taskId={}, documentId={}, knowledgeId={}, currentStatus={}", + taskId, document.getId(), task.getKnowledgeId(), document.getProcessStatus()); + StoreExecutionContext storeContext = null; + List storedChunks = new ArrayList(); + try { + DocumentCollection knowledge = assertDocumentCollection(task.getKnowledgeId()); + DocumentImportDtos.PreviewSession session = resolveIndexPreviewSession( + knowledge, + document, + asString(task.getPayloadJson().get("previewSessionId")) + ); + List chunks = session.getDocumentChunks(); + if (chunks == null || chunks.isEmpty()) { + throw new BusinessException("预览会话无有效分块"); + } + + clearPersistedChunks(document.getId()); + storeContext = prepareStoreContext(document); + int totalChunks = chunks.size(); + int completedChunks = 0; + for (int start = 0; start < chunks.size(); start += INDEX_BATCH_SIZE) { + int end = Math.min(start + INDEX_BATCH_SIZE, chunks.size()); + List batch = new ArrayList(chunks.subList(start, end)); + LOG.info("文档向量化任务开始处理批次: taskId={}, documentId={}, batchStart={}, batchEnd={}, batchSize={}, totalChunks={}", + taskId, document.getId(), start, end, batch.size(), totalChunks); + storeDocumentChunks(storeContext, batch); + storedChunks.addAll(batch); + persistChunkBatch(document, batch); + completedChunks += batch.size(); + updateDocumentIndexProgress(document.getId(), totalChunks, completedChunks); + } + updateKnowledgeAfterStore(storeContext); + markIndexCompleted(task, document, totalChunks); + documentImportPreviewService.remove(session.getSessionId()); + } catch (Exception e) { + LOG.error("文档向量化任务失败: taskId={}, documentId={}", taskId, document.getId(), e); + clearPersistedChunks(document.getId()); + if (storeContext != null && !storedChunks.isEmpty()) { + rollbackStoredChunks(taskId, document.getId(), storeContext, storedChunks); + } + markIndexFailed(task, document, truncateError(e.getMessage())); + } + } + + /** + * 通过统一文档解析桥接处理异步解析任务。 + * + * @param task 任务实体 + * @param document 文档实体 + * @param fileExt 文件后缀 + */ + private void handleBridgeParse(DocumentImportTask task, + tech.easyflow.ai.entity.Document document, + String fileExt) { + if (!StringUtil.hasText(task.getProviderTaskId())) { + submitBridgeParseTask(task, document, fileExt); + return; + } + syncBridgeParseTask(task, document, fileExt); + } + + private void handleDefaultParse(DocumentImportTask task, + tech.easyflow.ai.entity.Document document, + String fileExt) { + LOG.info("开始同步解析文档: taskId={}, documentId={}, fileName={}, fileExt={}", + task.getId(), document.getId(), document.getTitle(), fileExt); + String normalizedContent = readFileContent(document.getDocumentPath(), document.getTitle()); + DocumentParsedResult parsedResult = new DocumentParsedResult(); + parsedResult.setFileName(document.getTitle()); + parsedResult.setPreferredText(normalizedContent); + parsedResult.setPlainText(normalizedContent); + parsedResult.setMetadata(Map.of("sourceFormat", fileExt)); + LOG.info("同步解析文档完成: taskId={}, documentId={}, fileName={}, fileExt={}, contentLength={}", + task.getId(), document.getId(), document.getTitle(), fileExt, normalizedContent == null ? 0 : normalizedContent.length()); + markParseSuccess(task, document, parsedResult, fileExt, null); + } + + private void markParseSuccess(DocumentImportTask task, + tech.easyflow.ai.entity.Document document, + DocumentParsedResult parsedResult, + String sourceFormat, + String providerTaskId) { + String preferredText = resolvePreferredText(parsedResult); + if (!StringUtil.hasText(preferredText)) { + throw new BusinessException("文档解析结果为空"); + } + + Map options = copyOptions(document.getOptions()); + options.put(DocumentImportKeys.KEY_DOCUMENT_SOURCE_FILE_EXT, sourceFormat); + if (StringUtil.hasText(providerTaskId)) { + options.put(DocumentImportKeys.KEY_DOCUMENT_PROVIDER_TASK_ID, providerTaskId); + } + if (parsedResult != null && parsedResult.getMetadata() != null && !parsedResult.getMetadata().isEmpty()) { + options.put(DocumentImportKeys.KEY_DOCUMENT_PARSE_METADATA, new LinkedHashMap(parsedResult.getMetadata())); + } + if (parsedResult != null && parsedResult.getWarnings() != null && !parsedResult.getWarnings().isEmpty()) { + options.put(DocumentImportKeys.KEY_DOCUMENT_PARSE_WARNINGS, new ArrayList(parsedResult.getWarnings())); + } + + Date now = new Date(); + document.setContent(preferredText); + document.setDocumentType(sourceFormat); + document.setOptions(options); + document.setProcessStatus(DocumentProcessStatus.READY_FOR_SEGMENT.name()); + document.setProgressPercent(0); + document.setTotalChunks(0); + document.setCompletedChunks(0); + document.setFailedChunks(0); + document.setLastTaskError(null); + persistDocumentTaskState(document, now); + LOG.info("文档解析任务完成: taskId={}, documentId={}, processStatus={}, providerTaskId={}, contentLength={}", + task.getId(), document.getId(), DocumentProcessStatus.READY_FOR_SEGMENT.name(), providerTaskId, preferredText.length()); + + finishTask(task, now, DocumentImportTaskStatus.COMPLETED, null); + } + + private void markParseFailed(DocumentImportTask task, + tech.easyflow.ai.entity.Document document, + String errorMessage) { + Date now = new Date(); + document.setProcessStatus(DocumentProcessStatus.PARSE_FAILED.name()); + document.setLastTaskError(errorMessage); + persistDocumentTaskState(document, now); + LOG.warn("文档解析任务失败: taskId={}, documentId={}, processStatus={}, error={}", + task.getId(), document.getId(), DocumentProcessStatus.PARSE_FAILED.name(), errorMessage); + + finishTask(task, now, DocumentImportTaskStatus.FAILED, errorMessage); + } + + private void markIndexCompleted(DocumentImportTask task, + tech.easyflow.ai.entity.Document document, + int totalChunks) { + Date now = new Date(); + document.setProcessStatus(DocumentProcessStatus.COMPLETED.name()); + document.setTotalChunks(totalChunks); + document.setCompletedChunks(totalChunks); + document.setFailedChunks(0); + document.setProgressPercent(100); + document.setLastTaskError(null); + persistDocumentTaskState(document, now); + LOG.info("文档向量化任务完成: taskId={}, documentId={}, processStatus={}, totalChunks={}", + task.getId(), document.getId(), DocumentProcessStatus.COMPLETED.name(), totalChunks); + + finishTask(task, now, DocumentImportTaskStatus.COMPLETED, null); + } + + private void markIndexFailed(DocumentImportTask task, + tech.easyflow.ai.entity.Document document, + String errorMessage) { + tech.easyflow.ai.entity.Document current = requireDocument(document.getId()); + Date now = new Date(); + current.setProcessStatus(DocumentProcessStatus.INDEX_FAILED.name()); + current.setTotalChunks(defaultInt(current.getTotalChunks())); + current.setCompletedChunks(0); + current.setFailedChunks(defaultInt(current.getTotalChunks())); + current.setProgressPercent(0); + current.setLastTaskError(errorMessage); + persistDocumentTaskState(current, now); + LOG.warn("文档向量化任务失败: taskId={}, documentId={}, processStatus={}, completedChunks={}, totalChunks={}, error={}", + task.getId(), + document.getId(), + DocumentProcessStatus.INDEX_FAILED.name(), + defaultInt(current.getCompletedChunks()), + defaultInt(current.getTotalChunks()), + errorMessage); + + finishTask(task, now, DocumentImportTaskStatus.FAILED, errorMessage); + } + + /** + * 尝试将待处理任务抢占为运行中。 + * + * @param taskId 任务 ID + * @return 是否抢占成功 + */ + private boolean tryMarkTaskRunning(BigInteger taskId) { + Date now = new Date(); + DocumentImportTask update = new DocumentImportTask(); + update.setStatus(DocumentImportTaskStatus.RUNNING.name()); + update.setStartedAt(now); + update.setModified(now); + update.setModifiedBy(resolveOperatorId()); + + QueryWrapper queryWrapper = QueryWrapper.create() + .eq(DocumentImportTask::getId, taskId) + .eq(DocumentImportTask::getStatus, DocumentImportTaskStatus.PENDING.name()); + return documentImportTaskMapper.updateByQuery(update, queryWrapper) > 0; + } + + private void updateTaskProvider(DocumentImportTask task, String providerTaskId) { + task.setProviderTaskId(providerTaskId); + task.setModified(new Date()); + Map payload = new LinkedHashMap(task.getPayloadJson()); + payload.put("providerTaskId", providerTaskId); + task.setPayloadJson(payload); + documentImportTaskService.updateById(task); + } + + private void updateDocumentIndexing(tech.easyflow.ai.entity.Document document, int totalChunks) { + Date now = new Date(); + document.setProcessStatus(DocumentProcessStatus.INDEXING.name()); + document.setTotalChunks(totalChunks); + document.setCompletedChunks(0); + document.setFailedChunks(0); + document.setProgressPercent(0); + document.setLastTaskError(null); + persistDocumentTaskState(document, now); + } + + private void updateDocumentIndexProgress(BigInteger documentId, int totalChunks, int completedChunks) { + int progressPercent = Math.min(100, totalChunks <= 0 ? 0 : (completedChunks * 100) / totalChunks); + tech.easyflow.ai.entity.Document document = requireDocument(documentId); + Date now = new Date(); + document.setCompletedChunks(completedChunks); + document.setFailedChunks(0); + document.setProgressPercent(progressPercent); + persistDocumentTaskState(document, now); + LOG.info("文档向量化进度更新: documentId={}, completedChunks={}, totalChunks={}, progressPercent={}", + documentId, completedChunks, totalChunks, progressPercent); + } + + private void resetDocumentForParseRetry(tech.easyflow.ai.entity.Document document) { + Date now = new Date(); + document.setProcessStatus(DocumentProcessStatus.PARSING.name()); + document.setLastTaskError(null); + document.setProgressPercent(0); + persistDocumentTaskState(document, now); + } + + /** + * 持久化文档任务状态并推送局部状态刷新事件。 + * + * @param document 文档实体 + * @param now 当前时间 + */ + private void persistDocumentTaskState(tech.easyflow.ai.entity.Document document, Date now) { + document.setTaskModifiedAt(now); + document.setModified(now); + document.setModifiedBy(resolveOperatorId()); + documentMapper.update(document, false); + documentImportTaskStatusStreamService.publishAfterCommit(document.getId()); + } + + /** + * 收口任务终态更新,避免不同阶段遗漏错误信息或完成时间。 + * + * @param task 任务实体 + * @param now 当前时间 + * @param status 终态 + * @param errorSummary 错误摘要 + */ + private void finishTask(DocumentImportTask task, + Date now, + DocumentImportTaskStatus status, + String errorSummary) { + task.setStatus(status.name()); + task.setErrorSummary(errorSummary); + task.setFinishedAt(now); + task.setModified(now); + documentImportTaskService.updateById(task, false); + } + + private DocumentImportDtos.PreviewSession buildPreviewSessionForDocument(DocumentCollection knowledge, + tech.easyflow.ai.entity.Document document, + StrategyConfig requestedStrategy) { + if (!StringUtil.hasText(document.getContent())) { + throw new BusinessException("文档尚未完成解析"); + } + AnalysisResult analysis = ragIngestionService.analyze(document.getContent(), normalizeSourceFormat(document)); + StrategyConfig strategyConfig = resolveStrategyConfig(knowledge, requestedStrategy, analysis); + List previewChunks = ragIngestionService.split(analysis, strategyConfig); + if (previewChunks == null || previewChunks.isEmpty()) { + throw new BusinessException("未生成有效分块,请调整策略后重试"); + } + validatePreviewChunkRanges(previewChunks, analysis.getNormalizedContent()); + List documentChunks = buildDocumentChunks(document, previewChunks); + DocumentImportDtos.PreviewSession session = new DocumentImportDtos.PreviewSession(); + session.setKnowledgeId(knowledge.getId()); + session.setDocumentId(document.getId()); + session.setFilePath(document.getDocumentPath()); + session.setFileName(document.getTitle()); + session.setSourceFormat(normalizeSourceFormat(document)); + session.setStrategyConfig(strategyConfig); + session.setAnalysis(analysis); + session.setDocument(document); + session.setDocumentChunks(documentChunks); + session.setPreviewChunks(previewChunks); + session.setCreatedAt(new Date()); + return session; + } + + /** + * 组装处理页预览响应。 + * + * @param document 文档实体 + * @param session 预览会话 + * @param sessionId 预览会话 ID + * @return 预览文件结果 + */ + private DocumentImportDtos.PreviewFileResult buildPreviewFileResult(tech.easyflow.ai.entity.Document document, + DocumentImportDtos.PreviewSession session, + String sessionId) { + DocumentImportDtos.PreviewFileResult item = new DocumentImportDtos.PreviewFileResult(); + item.setPreviewSessionId(sessionId); + item.setFilePath(document.getDocumentPath()); + item.setFileName(document.getTitle()); + item.setNormalizedContent(session.getAnalysis() == null ? null : session.getAnalysis().getNormalizedContent()); + item.setStrategyCode(session.getStrategyConfig().getStrategyCode()); + item.setStrategyLabel(ragIngestionService.toStrategyLabel(session.getStrategyConfig().getStrategyCode())); + item.setAnalysis(session.getAnalysis()); + item.setTotalChunks(session.getDocumentChunks().size()); + item.setTotalWarnings(countWarnings(session.getPreviewChunks())); + item.setChunks(toPreviewChunkResults(session.getPreviewChunks())); + return item; + } + + private void mergeDocumentPreviewOptions(tech.easyflow.ai.entity.Document document, + DocumentImportDtos.PreviewSession session) { + Map options = copyOptions(document.getOptions()); + options.put(DocumentImportKeys.KEY_DOCUMENT_STRATEGY_CODE, session.getStrategyConfig().getStrategyCode()); + options.put(DocumentImportKeys.KEY_DOCUMENT_STRATEGY_LABEL, ragIngestionService.toStrategyLabel(session.getStrategyConfig().getStrategyCode())); + options.put(DocumentImportKeys.KEY_DOCUMENT_STRATEGY_SNAPSHOT, strategyConfigToMap(session.getStrategyConfig())); + options.put(DocumentImportKeys.KEY_DOCUMENT_ANALYSIS_SUMMARY, session.getAnalysis().getFeatures()); + options.put(DocumentImportKeys.KEY_DOCUMENT_SOURCE_FILE_EXT, session.getSourceFormat()); + options.put(DocumentImportKeys.KEY_DOCUMENT_PREVIEW_VERSION, "v2"); + + document.setOptions(options); + } + + private DocumentImportDtos.PreviewSession resolveIndexPreviewSession(DocumentCollection knowledge, + tech.easyflow.ai.entity.Document document, + String previewSessionId) { + if (StringUtil.hasText(previewSessionId)) { + DocumentImportDtos.PreviewSession session = documentImportPreviewService.getRequired(previewSessionId); + if (!knowledge.getId().equals(session.getKnowledgeId()) || !document.getId().equals(session.getDocumentId())) { + throw new BusinessException("预览会话与当前文档不匹配"); + } + return session; + } + StrategyConfig storedStrategy = readStoredStrategy(document); + DocumentImportDtos.PreviewSession rebuilt = buildPreviewSessionForDocument(knowledge, document, storedStrategy); + String sessionId = documentImportPreviewService.put(rebuilt); + rebuilt.setSessionId(sessionId); + return rebuilt; + } + + @SuppressWarnings("unchecked") + private StrategyConfig readStoredStrategy(tech.easyflow.ai.entity.Document document) { + Object snapshot = document.getOptions() == null ? null : document.getOptions().get(DocumentImportKeys.KEY_DOCUMENT_STRATEGY_SNAPSHOT); + if (!(snapshot instanceof Map)) { + throw new BusinessException("当前文档缺少分块策略,请重新生成预览"); + } + Map rawSnapshot = (Map) snapshot; + StrategyConfig config = StrategyConfig.defaults(); + config.setStrategyCode(asString(rawSnapshot.get("strategyCode"))); + config.setChunkSize(asInteger(rawSnapshot.get("chunkSize"), RagDefaults.CHUNK_SIZE)); + config.setOverlapSize(asInteger(rawSnapshot.get("overlapSize"), RagDefaults.OVERLAP_SIZE)); + config.setRegex(asString(rawSnapshot.get("regex"))); + config.setRowsPerChunk(asInteger(rawSnapshot.get("rowsPerChunk"), config.getRowsPerChunk())); + config.setMdSplitterLevel(asInteger(rawSnapshot.get("mdSplitterLevel"), RagDefaults.MD_SPLITTER_LEVEL)); + return config; + } + + private List buildDocumentChunks(tech.easyflow.ai.entity.Document document, + List previewChunks) { + FlexIDKeyGenerator flexIDKeyGenerator = new FlexIDKeyGenerator(); + List chunks = new ArrayList(); + for (int i = 0; i < previewChunks.size(); i++) { + RagChunk previewChunk = previewChunks.get(i); + DocumentChunk chunk = new DocumentChunk(); + chunk.setId(new BigInteger(String.valueOf(flexIDKeyGenerator.generate(chunk, null)))); + chunk.setDocumentId(document.getId()); + chunk.setDocumentCollectionId(document.getCollectionId()); + chunk.setContent(previewChunk.getContent()); + chunk.setSorting(i + 1); + + Map options = new LinkedHashMap(); + if (previewChunk.getOptions() != null) { + options.putAll(previewChunk.getOptions()); + } + options.put("chunkType", previewChunk.getChunkType()); + options.put("sourceLabel", previewChunk.getSourceLabel()); + options.put("headingPath", previewChunk.getHeadingPath()); + options.put("charCount", previewChunk.getCharCount()); + options.put("tokenEstimate", previewChunk.getTokenEstimate()); + options.put("qaQuestion", previewChunk.getQuestion()); + options.put("qaAnswer", previewChunk.getAnswer()); + options.put("partNo", previewChunk.getPartNo()); + options.put("partTotal", previewChunk.getPartTotal()); + options.put("warnings", previewChunk.getWarnings()); + options.put("sourceRanges", copySourceRanges(previewChunk)); + chunk.setOptions(options); + chunks.add(chunk); + } + return chunks; + } + + private DocumentImportTask createTask(tech.easyflow.ai.entity.Document document, + DocumentImportTaskPhase phase, + Map payload) { + Date now = new Date(); + DocumentImportTask task = new DocumentImportTask(); + task.setDocumentId(document.getId()); + task.setKnowledgeId(document.getCollectionId()); + task.setPhase(phase.name()); + task.setStatus(DocumentImportTaskStatus.PENDING.name()); + task.setPayloadJson(payload); + task.setCreated(now); + task.setModified(now); + task.setCreatedBy(resolveOperatorId()); + task.setModifiedBy(resolveOperatorId()); + documentImportTaskService.save(task); + return task; + } + + private Map buildFilePayload(DocumentImportDtos.TaskCreateRequest request) { + Map payload = new LinkedHashMap(); + payload.put("filePath", request.getFilePath()); + payload.put("fileName", request.getFileName()); + return payload; + } + + private Map buildDocumentPayload(tech.easyflow.ai.entity.Document document) { + Map payload = new LinkedHashMap(); + payload.put("filePath", document.getDocumentPath()); + payload.put("fileName", document.getTitle()); + return payload; + } + + private Map buildInitialOptions(String fileExt) { + Map options = new LinkedHashMap(); + options.put(DocumentImportKeys.KEY_DOCUMENT_SOURCE_FILE_EXT, fileExt); + return options; + } + + private StoreExecutionContext prepareStoreContext(tech.easyflow.ai.entity.Document document) { + DocumentCollection knowledge = knowledgeService.getById(document.getCollectionId()); + if (knowledge == null) { + throw new BusinessException("知识库不存在"); + } + DocumentStore documentStore = knowledge.toDocumentStore(); + if (documentStore == null) { + throw new BusinessException("向量数据库配置错误"); + } + Model model = modelService.getModelInstance(knowledge.getVectorEmbedModelId()); + if (model == null) { + throw new BusinessException("该知识库未配置向量模型"); + } + EmbeddingModel embeddingModel = model.toEmbeddingModel(); + documentStore.setEmbeddingModel(embeddingModel); + + StoreOptions options = StoreOptions.ofCollectionName(knowledge.getVectorStoreCollection()); + EmbeddingOptions embeddingOptions = new EmbeddingOptions(); + embeddingOptions.setModel(model.getModelName()); + embeddingOptions.setDimensions(knowledge.getDimensionOfVectorModel()); + options.setEmbeddingOptions(embeddingOptions); + options.setIndexName(options.getCollectionName()); + return new StoreExecutionContext( + knowledge, + embeddingModel, + documentStore, + options, + searcherFactory.getSearcher() + ); + } + + private void storeDocumentChunks(StoreExecutionContext storeContext, List documentChunks) { + List documents = new ArrayList(); + for (DocumentChunk chunk : documentChunks) { + Document storeDocument = new Document(); + storeDocument.setId(chunk.getId()); + storeDocument.setContent(chunk.getContent()); + storeDocument.addMetadata(KeywordSearchMetadataKeys.KNOWLEDGE_ID, + storeContext.knowledge.getId() == null ? null : storeContext.knowledge.getId().toString()); + documents.add(storeDocument); + } + StoreResult result = storeContext.documentStore.store(documents, storeContext.options); + if (result == null || !result.isSuccess()) { + throw new BusinessException("向量化写入失败"); + } + if (storeContext.searcher != null) { + for (Document storeDocument : documents) { + storeContext.searcher.addDocument(storeDocument); + } + } + } + + private void persistChunkBatch(tech.easyflow.ai.entity.Document document, List batch) { + for (DocumentChunk chunk : batch) { + chunk.setDocumentId(document.getId()); + chunk.setDocumentCollectionId(document.getCollectionId()); + documentChunkService.save(chunk); + } + } + + private void rollbackStoredChunks(BigInteger taskId, + BigInteger documentId, + StoreExecutionContext storeContext, + List documentChunks) { + try { + List ids = new ArrayList(); + for (DocumentChunk chunk : documentChunks) { + ids.add(chunk.getId()); + } + LOG.warn("开始回滚文档向量化外部索引: taskId={}, documentId={}, knowledgeId={}, chunkCount={}", + taskId, + documentId, + storeContext == null || storeContext.knowledge == null ? null : storeContext.knowledge.getId(), + ids.size()); + storeContext.documentStore.delete(ids, storeContext.options); + if (storeContext.searcher != null) { + for (BigInteger id : ids) { + storeContext.searcher.deleteDocument(id); + } + } + LOG.warn("文档向量化外部索引回滚完成: taskId={}, documentId={}, knowledgeId={}, chunkCount={}", + taskId, + documentId, + storeContext == null || storeContext.knowledge == null ? null : storeContext.knowledge.getId(), + ids.size()); + } catch (Exception e) { + LOG.error("回滚文档向量数据失败", e); + } + } + + private void updateKnowledgeAfterStore(StoreExecutionContext storeContext) { + DocumentCollection update = new DocumentCollection(); + update.setId(storeContext.knowledge.getId()); + Map options = storeContext.knowledge.getOptions() == null + ? new HashMap() + : new HashMap(storeContext.knowledge.getOptions()); + options.put(DocumentCollection.KEY_CAN_UPDATE_EMBEDDING_MODEL, false); + update.setOptions(options); + knowledgeService.updateById(update); + if (storeContext.knowledge.getDimensionOfVectorModel() == null) { + DocumentCollection dimensionUpdate = new DocumentCollection(); + dimensionUpdate.setId(storeContext.knowledge.getId()); + dimensionUpdate.setDimensionOfVectorModel(Model.getEmbeddingDimension(storeContext.embeddingModel)); + knowledgeService.updateById(dimensionUpdate); + } + } + + private void clearPersistedChunks(BigInteger documentId) { + if (documentId == null) { + return; + } + documentChunkMapper.deleteByQuery(QueryWrapper.create().eq(DocumentChunk::getDocumentId, documentId)); + } + + private void ensurePreviewable(tech.easyflow.ai.entity.Document document) { + String status = document.getProcessStatus(); + if (DocumentProcessStatus.PARSING.name().equals(status)) { + throw new BusinessException("文档仍在解析中"); + } + if (DocumentProcessStatus.INDEXING.name().equals(status)) { + throw new BusinessException("文档正在向量化中"); + } + if (DocumentProcessStatus.PARSE_FAILED.name().equals(status)) { + throw new BusinessException("文档解析失败,请先重试解析"); + } + } + + private boolean allowIndexStart(tech.easyflow.ai.entity.Document document) { + String status = document.getProcessStatus(); + return DocumentProcessStatus.READY_FOR_SEGMENT.name().equals(status) + || DocumentProcessStatus.READY_FOR_INDEX.name().equals(status) + || DocumentProcessStatus.INDEX_FAILED.name().equals(status); + } + + private DocumentImportDtos.TaskStartIndexResponse buildTaskStartResponse(DocumentImportTask task, + DocumentProcessStatus processStatus) { + DocumentImportDtos.TaskStartIndexResponse response = new DocumentImportDtos.TaskStartIndexResponse(); + response.setTaskId(task.getId()); + response.setProcessStatus(processStatus.name()); + return response; + } + + private tech.easyflow.ai.entity.Document requireDocument(BigInteger documentId) { + tech.easyflow.ai.entity.Document document = documentMapper.selectOneById(documentId); + if (document == null) { + throw new BusinessException("文档不存在"); + } + return document; + } + + private tech.easyflow.ai.entity.Document requireDocumentForKnowledge(BigInteger documentId, BigInteger knowledgeId) { + tech.easyflow.ai.entity.Document document = requireDocument(documentId); + if (document.getCollectionId() == null || document.getCollectionId().compareTo(knowledgeId) != 0) { + throw new BusinessException("文档不存在"); + } + return document; + } + + private DocumentImportTask requireTask(BigInteger taskId) { + DocumentImportTask task = documentImportTaskService.getById(taskId); + if (task == null) { + throw new BusinessException("任务不存在"); + } + return task; + } + + private DocumentCollection assertDocumentCollection(BigInteger knowledgeId) { + DocumentCollection knowledge = knowledgeService.getById(knowledgeId); + if (knowledge == null) { + throw new BusinessException("知识库不存在"); + } + if (knowledge.isFaqCollection()) { + throw new BusinessException("FAQ知识库不支持文档上传"); + } + return knowledge; + } + + private String normalizeSourceFormat(tech.easyflow.ai.entity.Document document) { + String type = document.getDocumentType(); + if (StringUtil.hasText(type)) { + return type.toLowerCase(Locale.ROOT); + } + return normalizeFileExtension(document.getTitle(), document.getDocumentPath()); + } + + private String normalizeFileExtension(String fileName, String filePath) { + String target = StringUtil.hasText(fileName) ? fileName : filePath; + String ext = FileUtil.getFileTypeByExtension(target); + return ext == null ? "" : ext.toLowerCase(Locale.ROOT); + } + + private void assertSupportedImportFile(String fileExt) { + if (!Arrays.asList("pdf", "docx", "txt", "md").contains(fileExt)) { + throw new BusinessException("当前仅支持 pdf/docx/txt/md 文档导入"); + } + } + + /** + * 判断当前文件是否应走统一文档解析桥接。 + * + * @param fileExt 文件后缀 + * @return 是否走桥接解析 + */ + private boolean shouldUseDocumentParseBridge(String fileExt) { + return "pdf".equals(fileExt) || "docx".equals(fileExt); + } + + /** + * 提交桥接解析任务。 + * + * @param task 任务实体 + * @param document 文档实体 + * @param fileExt 文件后缀 + */ + private void submitBridgeParseTask(DocumentImportTask task, + tech.easyflow.ai.entity.Document document, + String fileExt) { + DocumentSourceRef sourceRef = new DocumentSourceRef(); + sourceRef.setFileName(document.getTitle()); + sourceRef.setFilePath(document.getDocumentPath()); + sourceRef.setContentType(resolveBridgeContentType(fileExt)); + try { + LOG.info("文档解析桥接任务开始提交: taskId={}, documentId={}, fileName={}, fileExt={}", + task.getId(), document.getId(), document.getTitle(), fileExt); + String providerTaskId = documentParseBridgeService.submit(sourceRef, DocumentParseScenario.KNOWLEDGE_IMPORT).getTaskId(); + if (!StringUtil.hasText(providerTaskId)) { + throw new BusinessException("文档解析服务未返回任务ID"); + } + updateTaskProvider(task, providerTaskId); + LOG.info("文档解析桥接任务提交完成: taskId={}, documentId={}, providerTaskId={}", + task.getId(), document.getId(), providerTaskId); + } catch (BusinessException e) { + throw e; + } catch (Exception e) { + throw new BusinessException("文档解析失败:" + e.getMessage()); + } + } + + /** + * 单次查询桥接解析任务状态并收敛结果。 + * + * @param task 任务实体 + * @param document 文档实体 + * @param fileExt 文件后缀 + */ + private void syncBridgeParseTask(DocumentImportTask task, + tech.easyflow.ai.entity.Document document, + String fileExt) { + try { + DocumentParseTaskInfo taskInfo = documentParseBridgeService.queryTaskInfo(task.getProviderTaskId()); + String providerStatus = taskInfo == null ? null : taskInfo.getStatus(); + LOG.info("文档解析桥接任务单次收敛: taskId={}, documentId={}, providerTaskId={}, providerStatus={}, hasResult={}, error={}", + task.getId(), + document.getId(), + task.getProviderTaskId(), + providerStatus, + taskInfo != null && taskInfo.getResult() != null, + taskInfo == null ? null : taskInfo.getError()); + if (isTaskSuccess(providerStatus)) { + DocumentParsedResult result = taskInfo.getResult() == null + ? documentParseBridgeService.queryResult(task.getProviderTaskId()) + : taskInfo.getResult(); + markParseSuccess(task, document, result, fileExt, task.getProviderTaskId()); + return; + } + if (isTaskFailed(providerStatus)) { + throw new BusinessException(taskInfo == null ? "文档解析失败" : taskInfo.getError()); + } + touchRunningTask(task); + } catch (BusinessException e) { + throw e; + } catch (Exception e) { + throw new BusinessException("文档解析失败:" + e.getMessage()); + } + } + + /** + * 在事务提交后投递解析任务消息。 + * + * @param taskId 任务 ID + */ + private void dispatchParseTaskAfterCommit(BigInteger taskId) { + runAfterCommit(() -> parseTaskProducer.send(taskId)); + } + + /** + * 在事务提交后投递向量化任务消息。 + * + * @param taskId 任务 ID + */ + private void dispatchIndexTaskAfterCommit(BigInteger taskId) { + runAfterCommit(() -> indexTaskProducer.send(taskId)); + } + + /** + * 解析桥接所需的内容类型。 + * + * @param fileExt 文件后缀 + * @return MIME 类型 + */ + private String resolveBridgeContentType(String fileExt) { + if ("pdf".equals(fileExt)) { + return "application/pdf"; + } + if ("docx".equals(fileExt)) { + return "application/vnd.openxmlformats-officedocument.wordprocessingml.document"; + } + return null; + } + + /** + * 在事务提交后调度本地向量化兜底执行。 + * + * @param taskId 任务 ID + */ + private void scheduleIndexTaskFallback(BigInteger taskId) { + scheduleTaskFallback(taskId, "index", () -> selfProxy.handleIndexTask(taskId)); + } + + /** + * 在事务提交后调度本地解析兜底执行。 + * + * @param taskId 任务 ID + */ + private void scheduleParseTaskFallback(BigInteger taskId) { + scheduleTaskFallback(taskId, "parse", () -> selfProxy.handleParseTask(taskId)); + } + + /** + * 在事务提交后异步执行本地兜底任务。 + * + * @param taskId 任务 ID + * @param taskType 任务类型 + * @param action 执行动作 + */ + private void scheduleTaskFallback(BigInteger taskId, String taskType, Runnable action) { + Runnable dispatch = () -> documentImportTaskExecutor.execute(() -> { + try { + LOG.info("开始执行文档导入本地兜底任务: type={}, taskId={}", taskType, taskId); + action.run(); + LOG.info("文档导入本地兜底任务执行完成: type={}, taskId={}", taskType, taskId); + } catch (Exception e) { + LOG.error("执行文档导入本地兜底任务失败: type={}, taskId={}", taskType, taskId, e); + } + }); + runAfterCommit(dispatch); + } + + /** + * 在事务提交后执行指定动作;若当前无事务则立即执行。 + * + * @param action 执行动作 + */ + private void runAfterCommit(Runnable action) { + if (TransactionSynchronizationManager.isSynchronizationActive() + && TransactionSynchronizationManager.isActualTransactionActive()) { + TransactionSynchronizationManager.registerSynchronization(new TransactionSynchronization() { + @Override + public void afterCommit() { + action.run(); + } + }); + return; + } + action.run(); + } + + private String readFileContent(String filePath, String fileName) { + try (InputStream inputStream = storageService.readStream(filePath)) { + return com.easyagents.core.file2text.File2TextUtil.readFromStream(inputStream, fileName, null); + } catch (IOException e) { + throw new BusinessException("文件解析失败:" + e.getMessage()); + } + } + + private String resolvePreferredText(DocumentParsedResult parsedResult) { + if (parsedResult == null) { + return null; + } + if (StringUtil.hasText(parsedResult.getPreferredText())) { + return parsedResult.getPreferredText(); + } + if (StringUtil.hasText(parsedResult.getMarkdown())) { + return parsedResult.getMarkdown(); + } + return parsedResult.getPlainText(); + } + + /** + * 触摸运行中任务的修改时间,避免监控视角下任务长时间无变化。 + * + * @param task 任务实体 + */ + private void touchRunningTask(DocumentImportTask task) { + task.setModified(new Date()); + task.setModifiedBy(resolveOperatorId()); + documentImportTaskService.updateById(task, false); + } + + private StrategyConfig resolveStrategyConfig(DocumentCollection knowledge, + StrategyConfig requestConfig, + AnalysisResult analysisResult) { + Map options = knowledge.getOptions() == null + ? Collections.emptyMap() + : knowledge.getOptions(); + String recommended = analysisResult.getRecommendedStrategyCode(); + String defaultStrategyCode = asString(options.get(DocumentImportKeys.KEY_SPLITTER_DEFAULT_STRATEGY)); + String fallbackStrategyCode = asString(options.get(DocumentImportKeys.KEY_SPLITTER_FALLBACK_STRATEGY)); + Boolean autoRecommendEnabled = asBoolean(options.get(DocumentImportKeys.KEY_SPLITTER_AUTO_RECOMMEND_ENABLED), true); + + StrategyConfig config = readProfileConfig(options, defaultStrategyCode); + if (config == null) { + config = StrategyConfig.defaults(); + } + String requestedStrategyCode = requestConfig == null ? null : requestConfig.getStrategyCode(); + String strategyCode = StringUtil.hasText(requestedStrategyCode) ? requestedStrategyCode : config.getStrategyCode(); + if (!StringUtil.hasText(strategyCode) || RagStrategyCodes.AUTO.equals(strategyCode)) { + strategyCode = Boolean.TRUE.equals(autoRecommendEnabled) + ? recommended + : (StringUtil.hasText(defaultStrategyCode) ? defaultStrategyCode : recommended); + } + if (!StringUtil.hasText(strategyCode)) { + strategyCode = StringUtil.hasText(fallbackStrategyCode) + ? fallbackStrategyCode + : RagStrategyCodes.PARAGRAPH_LENGTH; + } + StrategyConfig profileConfig = readProfileConfig(options, strategyCode); + if (profileConfig != null) { + mergeStrategyConfig(config, profileConfig); + } + if (requestConfig != null) { + mergeStrategyConfig(config, requestConfig); + } + config.setStrategyCode(strategyCode); + if (config.getChunkSize() == null || config.getChunkSize() <= 0) { + config.setChunkSize(RagDefaults.CHUNK_SIZE); + } + if (config.getOverlapSize() == null || config.getOverlapSize() < 0) { + config.setOverlapSize(RagDefaults.OVERLAP_SIZE); + } + if (config.getMdSplitterLevel() == null || config.getMdSplitterLevel() <= 0) { + config.setMdSplitterLevel(RagDefaults.MD_SPLITTER_LEVEL); + } + return config; + } + + @SuppressWarnings("unchecked") + private StrategyConfig readProfileConfig(Map options, String strategyCode) { + if (!StringUtil.hasText(strategyCode)) { + return null; + } + Object profileObject = options.get(DocumentImportKeys.KEY_SPLITTER_STRATEGY_PROFILES); + if (!(profileObject instanceof Map)) { + return null; + } + Map profileMap = (Map) profileObject; + Object strategyObject = profileMap.get(strategyCode); + if (!(strategyObject instanceof Map)) { + return null; + } + Map rawProfile = (Map) strategyObject; + StrategyConfig config = StrategyConfig.defaults(); + config.setStrategyCode(strategyCode); + config.setChunkSize(asInteger(rawProfile.get("chunkSize"), config.getChunkSize())); + config.setOverlapSize(asInteger(rawProfile.get("overlapSize"), config.getOverlapSize())); + config.setRegex(asString(rawProfile.get("regex"))); + config.setRowsPerChunk(asInteger(rawProfile.get("rowsPerChunk"), config.getRowsPerChunk())); + config.setMdSplitterLevel(asInteger(rawProfile.get("mdSplitterLevel"), config.getMdSplitterLevel())); + return config; + } + + private void mergeStrategyConfig(StrategyConfig target, StrategyConfig source) { + if (source == null) { + return; + } + if (StringUtil.hasText(source.getStrategyCode())) { + target.setStrategyCode(source.getStrategyCode()); + } + if (source.getChunkSize() != null) { + target.setChunkSize(source.getChunkSize()); + } + if (source.getOverlapSize() != null) { + target.setOverlapSize(source.getOverlapSize()); + } + if (StringUtil.hasText(source.getRegex())) { + target.setRegex(source.getRegex()); + } + if (source.getRowsPerChunk() != null) { + target.setRowsPerChunk(source.getRowsPerChunk()); + } + if (source.getMdSplitterLevel() != null) { + target.setMdSplitterLevel(source.getMdSplitterLevel()); + } + } + + private Map strategyConfigToMap(StrategyConfig strategyConfig) { + Map map = new LinkedHashMap(); + map.put("strategyCode", strategyConfig.getStrategyCode()); + map.put("chunkSize", strategyConfig.getChunkSize()); + map.put("overlapSize", strategyConfig.getOverlapSize()); + map.put("regex", strategyConfig.getRegex()); + map.put("rowsPerChunk", strategyConfig.getRowsPerChunk()); + map.put("mdSplitterLevel", strategyConfig.getMdSplitterLevel()); + return map; + } + + private int countWarnings(List chunks) { + int total = 0; + if (chunks == null) { + return 0; + } + for (RagChunk chunk : chunks) { + total += chunk.getWarnings() == null ? 0 : chunk.getWarnings().size(); + } + return total; + } + + /** + * 将分块预览转换为前端工作台使用的显式模型。 + * + * @param chunks 分块列表 + * @return 预览分块结果 + */ + private List toPreviewChunkResults(List chunks) { + List result = new ArrayList(); + if (chunks == null) { + return result; + } + for (RagChunk chunk : chunks) { + DocumentImportDtos.PreviewChunkResult item = new DocumentImportDtos.PreviewChunkResult(); + item.setAnswer(chunk.getAnswer()); + item.setCharCount(chunk.getCharCount()); + item.setChunkId(chunk.getChunkId()); + item.setChunkType(chunk.getChunkType()); + item.setContent(chunk.getContent()); + item.setHeadingPath(chunk.getHeadingPath() == null ? new ArrayList() : new ArrayList(chunk.getHeadingPath())); + item.setPartNo(chunk.getPartNo()); + item.setPartTotal(chunk.getPartTotal()); + item.setQuestion(chunk.getQuestion()); + item.setSourceLabel(chunk.getSourceLabel()); + item.setTokenEstimate(chunk.getTokenEstimate()); + item.setWarnings(chunk.getWarnings() == null ? new ArrayList() : new ArrayList(chunk.getWarnings())); + item.setSourceRanges(copySourceRanges(chunk)); + result.add(item); + } + return result; + } + + /** + * 校验切分结果是否具备稳定原文映射能力。 + * + * @param chunks 分块列表 + * @param normalizedContent 标准化原文 + */ + private void validatePreviewChunkRanges(List chunks, String normalizedContent) { + int maxLength = normalizedContent == null ? 0 : normalizedContent.length(); + if (maxLength <= 0) { + throw new BusinessException("标准化原文为空,无法生成分块预览"); + } + if (chunks == null || chunks.isEmpty()) { + throw new BusinessException("未生成有效分块,请调整策略后重试"); + } + for (RagChunk chunk : chunks) { + List ranges = copySourceRanges(chunk); + if (ranges.isEmpty()) { + throw new BusinessException("当前分块策略缺少稳定原文映射,请调整策略后重试"); + } + for (DocumentImportDtos.PreviewSourceRange range : ranges) { + int start = defaultInt(range.getStart()); + int end = defaultInt(range.getEnd()); + if (start < 0 || end <= start || end > maxLength) { + throw new BusinessException("当前分块策略生成了非法原文区间,请调整策略后重试"); + } + } + } + } + + /** + * 从分块元信息中复制原文区间。 + * + * @param chunk 分块实体 + * @return 原文区间列表 + */ + @SuppressWarnings("unchecked") + private List copySourceRanges(RagChunk chunk) { + List result = new ArrayList(); + if (chunk == null || chunk.getOptions() == null) { + return result; + } + Object rawRanges = chunk.getOptions().get(SOURCE_RANGES_KEY); + if (!(rawRanges instanceof List rangeList)) { + return result; + } + for (Object item : rangeList) { + if (!(item instanceof Map rawRange)) { + continue; + } + DocumentImportDtos.PreviewSourceRange range = new DocumentImportDtos.PreviewSourceRange(); + range.setStart(asInteger(rawRange.get("start"), null)); + range.setEnd(asInteger(rawRange.get("end"), null)); + if (range.getStart() != null && range.getEnd() != null) { + result.add(range); + } + } + return result; + } + + private BigInteger generateId(Object entity) { + FlexIDKeyGenerator generator = new FlexIDKeyGenerator(); + return new BigInteger(String.valueOf(generator.generate(entity, null))); + } + + private BigInteger resolveOperatorId() { + try { + return BigInteger.valueOf(StpUtil.getLoginIdAsLong()); + } catch (Exception ignore) { + return BigInteger.ZERO; + } + } + + private String truncateError(String message) { + if (!StringUtil.hasText(message)) { + return "任务执行失败"; + } + String trimmed = message.trim(); + return trimmed.length() > 1000 ? trimmed.substring(0, 1000) : trimmed; + } + + private boolean isTaskSuccess(String status) { + String normalized = status == null ? "" : status.toUpperCase(Locale.ROOT); + return normalized.contains("SUCCESS") || normalized.contains("COMPLETED") || normalized.contains("DONE"); + } + + private boolean isTaskFailed(String status) { + String normalized = status == null ? "" : status.toUpperCase(Locale.ROOT); + return normalized.contains("FAIL") || normalized.contains("ERROR") || normalized.contains("CANCEL"); + } + + private Map copyOptions(Map options) { + return options == null ? new LinkedHashMap() : new LinkedHashMap(options); + } + + private String asString(Object value) { + return value == null ? null : String.valueOf(value); + } + + private Integer asInteger(Object value, Integer defaultValue) { + if (value == null) { + return defaultValue; + } + if (value instanceof Number number) { + return number.intValue(); + } + if (value instanceof String text && StringUtil.hasText(text)) { + return Integer.parseInt(text); + } + return defaultValue; + } + + private Boolean asBoolean(Object value, boolean defaultValue) { + if (value == null) { + return defaultValue; + } + if (value instanceof Boolean bool) { + return bool; + } + if (value instanceof Number number) { + return number.intValue() != 0; + } + return Boolean.parseBoolean(String.valueOf(value)); + } + + private int defaultInt(Integer value) { + return value == null ? 0 : value; + } + + private static class StoreExecutionContext { + private final DocumentCollection knowledge; + private final EmbeddingModel embeddingModel; + private final DocumentStore documentStore; + private final StoreOptions options; + private final DocumentSearcher searcher; + + private StoreExecutionContext(DocumentCollection knowledge, + EmbeddingModel embeddingModel, + DocumentStore documentStore, + StoreOptions options, + DocumentSearcher searcher) { + this.knowledge = knowledge; + this.embeddingModel = embeddingModel; + this.documentStore = documentStore; + this.options = options; + this.searcher = searcher; + } + } +} diff --git a/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/entity/Document.java b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/entity/Document.java index 2ab81fe..a1ca125 100644 --- a/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/entity/Document.java +++ b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/entity/Document.java @@ -55,4 +55,16 @@ public class Document extends DocumentBase { public void setOverlapSize(int overlapSize) { this.overlapSize = overlapSize; } + + /** + * 获取列表展示时的优先分块数。 + * + * @return 分块数 + */ + public long getDisplayChunkCount() { + if (getTotalChunks() != null && getTotalChunks() > 0) { + return getTotalChunks(); + } + return chunkCount == null ? 0L : chunkCount.longValue(); + } } diff --git a/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/entity/DocumentImportTask.java b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/entity/DocumentImportTask.java new file mode 100644 index 0000000..04935e1 --- /dev/null +++ b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/entity/DocumentImportTask.java @@ -0,0 +1,182 @@ +package tech.easyflow.ai.entity; + +import com.mybatisflex.annotation.Column; +import com.mybatisflex.annotation.Id; +import com.mybatisflex.annotation.KeyType; +import com.mybatisflex.annotation.Table; +import com.mybatisflex.core.handler.FastjsonTypeHandler; +import tech.easyflow.common.entity.DateEntity; + +import java.io.Serializable; +import java.math.BigInteger; +import java.util.Date; +import java.util.LinkedHashMap; +import java.util.Map; + +/** + * 知识库文档导入任务。 + * + * @author Codex + * @since 2026-04-14 + */ +@Table(value = "tb_document_import_task", comment = "知识库文档导入任务") +public class DocumentImportTask extends DateEntity implements Serializable { + + @Id(keyType = KeyType.Generator, value = "snowFlakeId") + private BigInteger id; + + @Column(comment = "文档ID") + private BigInteger documentId; + + @Column(comment = "知识库ID") + private BigInteger knowledgeId; + + @Column(comment = "任务阶段") + private String phase; + + @Column(comment = "任务状态") + private String status; + + @Column(comment = "底层任务ID") + private String providerTaskId; + + @Column(typeHandler = FastjsonTypeHandler.class, comment = "任务载荷") + private Map payloadJson; + + @Column(comment = "错误摘要") + private String errorSummary; + + @Column(comment = "开始时间") + private Date startedAt; + + @Column(comment = "结束时间") + private Date finishedAt; + + @Column(comment = "创建时间") + private Date created; + + @Column(comment = "创建人") + private BigInteger createdBy; + + @Column(comment = "修改时间") + private Date modified; + + @Column(comment = "修改人") + private BigInteger modifiedBy; + + public BigInteger getId() { + return id; + } + + public void setId(BigInteger id) { + this.id = id; + } + + public BigInteger getDocumentId() { + return documentId; + } + + public void setDocumentId(BigInteger documentId) { + this.documentId = documentId; + } + + public BigInteger getKnowledgeId() { + return knowledgeId; + } + + public void setKnowledgeId(BigInteger knowledgeId) { + this.knowledgeId = knowledgeId; + } + + public String getPhase() { + return phase; + } + + public void setPhase(String phase) { + this.phase = phase; + } + + public String getStatus() { + return status; + } + + public void setStatus(String status) { + this.status = status; + } + + public String getProviderTaskId() { + return providerTaskId; + } + + public void setProviderTaskId(String providerTaskId) { + this.providerTaskId = providerTaskId; + } + + public Map getPayloadJson() { + return payloadJson == null ? new LinkedHashMap() : payloadJson; + } + + public void setPayloadJson(Map payloadJson) { + this.payloadJson = payloadJson == null ? new LinkedHashMap() : payloadJson; + } + + public String getErrorSummary() { + return errorSummary; + } + + public void setErrorSummary(String errorSummary) { + this.errorSummary = errorSummary; + } + + public Date getStartedAt() { + return startedAt; + } + + public void setStartedAt(Date startedAt) { + this.startedAt = startedAt; + } + + public Date getFinishedAt() { + return finishedAt; + } + + public void setFinishedAt(Date finishedAt) { + this.finishedAt = finishedAt; + } + + @Override + public Date getCreated() { + return created; + } + + @Override + public void setCreated(Date created) { + this.created = created; + } + + public BigInteger getCreatedBy() { + return createdBy; + } + + public void setCreatedBy(BigInteger createdBy) { + this.createdBy = createdBy; + } + + @Override + public Date getModified() { + return modified; + } + + @Override + public void setModified(Date modified) { + this.modified = modified; + } + + public BigInteger getModifiedBy() { + return modifiedBy; + } + + public void setModifiedBy(BigInteger modifiedBy) { + this.modifiedBy = modifiedBy; + } +} diff --git a/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/entity/base/DocumentBase.java b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/entity/base/DocumentBase.java index e96b6ff..8f3f6e8 100644 --- a/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/entity/base/DocumentBase.java +++ b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/entity/base/DocumentBase.java @@ -72,6 +72,48 @@ public class DocumentBase extends DateEntity implements Serializable { @Column(typeHandler = FastjsonTypeHandler.class, comment = "其他配置项") private Map options; + /** + * 处理状态 + */ + @Column(comment = "处理状态") + private String processStatus; + + /** + * 总分块数 + */ + @Column(comment = "总分块数") + private Integer totalChunks; + + /** + * 已完成分块数 + */ + @Column(comment = "已完成分块数") + private Integer completedChunks; + + /** + * 失败分块数 + */ + @Column(comment = "失败分块数") + private Integer failedChunks; + + /** + * 处理进度百分比 + */ + @Column(comment = "处理进度百分比") + private Integer progressPercent; + + /** + * 最近任务错误摘要 + */ + @Column(comment = "最近任务错误摘要") + private String lastTaskError; + + /** + * 任务状态更新时间 + */ + @Column(comment = "任务状态更新时间") + private Date taskModifiedAt; + /** * 创建时间 */ @@ -176,6 +218,62 @@ public class DocumentBase extends DateEntity implements Serializable { this.options = options; } + public String getProcessStatus() { + return processStatus; + } + + public void setProcessStatus(String processStatus) { + this.processStatus = processStatus; + } + + public Integer getTotalChunks() { + return totalChunks; + } + + public void setTotalChunks(Integer totalChunks) { + this.totalChunks = totalChunks; + } + + public Integer getCompletedChunks() { + return completedChunks; + } + + public void setCompletedChunks(Integer completedChunks) { + this.completedChunks = completedChunks; + } + + public Integer getFailedChunks() { + return failedChunks; + } + + public void setFailedChunks(Integer failedChunks) { + this.failedChunks = failedChunks; + } + + public Integer getProgressPercent() { + return progressPercent; + } + + public void setProgressPercent(Integer progressPercent) { + this.progressPercent = progressPercent; + } + + public String getLastTaskError() { + return lastTaskError; + } + + public void setLastTaskError(String lastTaskError) { + this.lastTaskError = lastTaskError; + } + + public Date getTaskModifiedAt() { + return taskModifiedAt; + } + + public void setTaskModifiedAt(Date taskModifiedAt) { + this.taskModifiedAt = taskModifiedAt; + } + public Date getCreated() { return created; } diff --git a/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/enums/DocumentImportTaskPhase.java b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/enums/DocumentImportTaskPhase.java new file mode 100644 index 0000000..cf2f308 --- /dev/null +++ b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/enums/DocumentImportTaskPhase.java @@ -0,0 +1,20 @@ +package tech.easyflow.ai.enums; + +/** + * 文档导入任务阶段。 + * + * @author Codex + * @since 2026-04-14 + */ +public enum DocumentImportTaskPhase { + + /** + * 文档解析阶段。 + */ + PARSE, + + /** + * 向量化阶段。 + */ + INDEX +} diff --git a/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/enums/DocumentImportTaskStatus.java b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/enums/DocumentImportTaskStatus.java new file mode 100644 index 0000000..a7479f5 --- /dev/null +++ b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/enums/DocumentImportTaskStatus.java @@ -0,0 +1,30 @@ +package tech.easyflow.ai.enums; + +/** + * 文档导入任务状态。 + * + * @author Codex + * @since 2026-04-14 + */ +public enum DocumentImportTaskStatus { + + /** + * 已创建,等待执行。 + */ + PENDING, + + /** + * 正在执行。 + */ + RUNNING, + + /** + * 执行失败。 + */ + FAILED, + + /** + * 执行完成。 + */ + COMPLETED +} diff --git a/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/enums/DocumentProcessStatus.java b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/enums/DocumentProcessStatus.java new file mode 100644 index 0000000..305210e --- /dev/null +++ b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/enums/DocumentProcessStatus.java @@ -0,0 +1,59 @@ +package tech.easyflow.ai.enums; + +/** + * 文档处理状态。 + * + * @author Codex + * @since 2026-04-14 + */ +public enum DocumentProcessStatus { + + /** + * 已上传,尚未进入异步处理。 + */ + UPLOADED, + + /** + * 解析中。 + */ + PARSING, + + /** + * 解析失败。 + */ + PARSE_FAILED, + + /** + * 可继续配置分块。 + */ + READY_FOR_SEGMENT, + + /** + * 已确认分块,可开始向量化。 + */ + READY_FOR_INDEX, + + /** + * 向量化处理中。 + */ + INDEXING, + + /** + * 向量化失败。 + */ + INDEX_FAILED, + + /** + * 全流程完成。 + */ + COMPLETED; + + /** + * 判断当前状态是否属于运行中状态。 + * + * @return 是否运行中 + */ + public boolean isProcessing() { + return this == PARSING || this == INDEXING; + } +} diff --git a/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/mapper/DocumentImportTaskMapper.java b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/mapper/DocumentImportTaskMapper.java new file mode 100644 index 0000000..90e508c --- /dev/null +++ b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/mapper/DocumentImportTaskMapper.java @@ -0,0 +1,13 @@ +package tech.easyflow.ai.mapper; + +import com.mybatisflex.core.BaseMapper; +import tech.easyflow.ai.entity.DocumentImportTask; + +/** + * 文档导入任务映射层。 + * + * @author Codex + * @since 2026-04-14 + */ +public interface DocumentImportTaskMapper extends BaseMapper { +} diff --git a/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/rag/KnowledgeRetrievalRequest.java b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/rag/KnowledgeRetrievalRequest.java index f0ddc9e..a5a5379 100644 --- a/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/rag/KnowledgeRetrievalRequest.java +++ b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/rag/KnowledgeRetrievalRequest.java @@ -9,6 +9,7 @@ public class KnowledgeRetrievalRequest { private BigInteger knowledgeId; private String query; private Integer limit; + private Double minSimilarity; private RetrievalMode retrievalMode = RetrievalMode.HYBRID; private String callerType; private String callerId; @@ -37,6 +38,24 @@ public class KnowledgeRetrievalRequest { this.limit = limit; } + /** + * 返回检索时使用的最小相似度阈值。 + * + * @return 最小相似度阈值 + */ + public Double getMinSimilarity() { + return minSimilarity; + } + + /** + * 设置检索时使用的最小相似度阈值。 + * + * @param minSimilarity 最小相似度阈值 + */ + public void setMinSimilarity(Double minSimilarity) { + this.minSimilarity = minSimilarity; + } + public RetrievalMode getRetrievalMode() { return retrievalMode; } diff --git a/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/service/DocumentImportTaskService.java b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/service/DocumentImportTaskService.java new file mode 100644 index 0000000..6ce655d --- /dev/null +++ b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/service/DocumentImportTaskService.java @@ -0,0 +1,13 @@ +package tech.easyflow.ai.service; + +import com.mybatisflex.core.service.IService; +import tech.easyflow.ai.entity.DocumentImportTask; + +/** + * 文档导入任务服务。 + * + * @author Codex + * @since 2026-04-14 + */ +public interface DocumentImportTaskService extends IService { +} diff --git a/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/service/DocumentService.java b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/service/DocumentService.java index a5efce9..3da7c32 100644 --- a/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/service/DocumentService.java +++ b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/service/DocumentService.java @@ -32,4 +32,16 @@ public interface DocumentService extends IService { Result previewImport(DocumentImportDtos.PreviewRequest request); Result commitImport(DocumentImportDtos.CommitRequest request); + + Result createImportTask(DocumentImportDtos.TaskCreateRequest request); + + Result getImportTaskDetail(BigInteger taskId); + + Result previewImportTask(DocumentImportDtos.PreviewRequest request); + + Result startIndexTask(DocumentImportDtos.TaskStartIndexRequest request); + + Result retryParseTask(DocumentImportDtos.TaskRetryRequest request); + + Result retryIndexTask(DocumentImportDtos.TaskRetryRequest request); } diff --git a/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/service/impl/DocumentCollectionServiceImpl.java b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/service/impl/DocumentCollectionServiceImpl.java index ae16454..df91789 100644 --- a/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/service/impl/DocumentCollectionServiceImpl.java +++ b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/service/impl/DocumentCollectionServiceImpl.java @@ -30,9 +30,11 @@ import tech.easyflow.ai.entity.DocumentChunk; import tech.easyflow.ai.entity.DocumentCollection; import tech.easyflow.ai.entity.FaqItem; import tech.easyflow.ai.entity.Model; +import tech.easyflow.ai.enums.DocumentProcessStatus; import tech.easyflow.ai.enums.PublishStatus; import tech.easyflow.ai.mapper.DocumentChunkMapper; import tech.easyflow.ai.mapper.DocumentCollectionMapper; +import tech.easyflow.ai.mapper.DocumentMapper; import tech.easyflow.ai.mapper.FaqItemMapper; import tech.easyflow.ai.rag.KnowledgeRetrievalRequest; import tech.easyflow.ai.service.DocumentCollectionService; @@ -71,6 +73,8 @@ public class DocumentCollectionServiceImpl extends ServiceImpl searchDocuments = toDocuments(retrievalResult.getHits()); - fillSearchContent(documentCollection, searchDocuments); + List searchDocuments = prepareSearchDocuments( + documentCollection, + toDocuments(retrievalResult.getHits()) + ); if (searchDocuments.isEmpty()) { return Collections.emptyList(); } @@ -138,7 +148,10 @@ public class DocumentCollectionServiceImpl extends ServiceImpl prepareSearchDocuments(DocumentCollection documentCollection, List searchDocuments) { + if (searchDocuments == null || searchDocuments.isEmpty()) { + return Collections.emptyList(); + } + if (documentCollection == null) { + return searchDocuments; + } + if (documentCollection.isFaqCollection()) { + fillSearchContent(documentCollection, searchDocuments); + return searchDocuments; + } + DocumentHitSnapshot hitSnapshot = loadDocumentHitSnapshot(documentCollection, searchDocuments); + if (hitSnapshot.isEmpty()) { + return Collections.emptyList(); + } + + return searchDocuments.stream() + .filter(Objects::nonNull) + .filter(item -> { + String content = hitSnapshot.findChunkContent(item.getId()); + if (!StringUtil.hasText(content)) { + return false; + } + item.setContent(content); + return true; + }) + .collect(Collectors.toList()); + } + @Override public DocumentCollection getDetail(String idOrAlias) { DocumentCollection knowledge = null; @@ -418,18 +509,93 @@ public class DocumentCollectionServiceImpl extends ServiceImpl chunkMap = documentChunkMapper.selectListByQuery(queryWrapper).stream() - .collect(Collectors.toMap(item -> item.getId().toString(), item -> item, (a, b) -> a)); - searchDocuments.removeIf(item -> !chunkMap.containsKey(String.valueOf(item.getId()))); + DocumentHitSnapshot hitSnapshot = loadDocumentHitSnapshot(documentCollection, searchDocuments); searchDocuments.forEach(item -> { - DocumentChunk documentChunk = chunkMap.get(String.valueOf(item.getId())); - if (documentChunk != null && !StringUtil.noText(documentChunk.getContent())) { - item.setContent(documentChunk.getContent()); - } + item.setContent(hitSnapshot.findChunkContent(item.getId())); }); + searchDocuments.removeIf(item -> !StringUtil.hasText(item.getContent())); + } + + /** + * 批量加载命中 chunk 及其完成态父文档,供过滤和内容填充复用。 + * + * @param documentCollection 知识库 + * @param searchDocuments 检索命中 + * @return 命中快照 + */ + private DocumentHitSnapshot loadDocumentHitSnapshot(DocumentCollection documentCollection, List searchDocuments) { + if (documentCollection == null || searchDocuments == null || searchDocuments.isEmpty()) { + return DocumentHitSnapshot.empty(); + } + List chunkIds = searchDocuments.stream() + .map(Document::getId) + .filter(Objects::nonNull) + .map(item -> (Serializable) item) + .collect(Collectors.toList()); + if (chunkIds.isEmpty()) { + return DocumentHitSnapshot.empty(); + } + + QueryWrapper chunkWrapper = QueryWrapper.create(); + chunkWrapper.in(DocumentChunk::getId, chunkIds); + chunkWrapper.eq(DocumentChunk::getDocumentCollectionId, documentCollection.getId()); + Map chunkMap = documentChunkMapper.selectListByQuery(chunkWrapper).stream() + .collect(Collectors.toMap(item -> item.getId().toString(), item -> item, (a, b) -> a)); + if (chunkMap.isEmpty()) { + return DocumentHitSnapshot.empty(); + } + + List documentIds = chunkMap.values().stream() + .map(DocumentChunk::getDocumentId) + .filter(Objects::nonNull) + .distinct() + .map(item -> (Serializable) item) + .collect(Collectors.toList()); + if (documentIds.isEmpty()) { + return DocumentHitSnapshot.empty(); + } + + QueryWrapper documentWrapper = QueryWrapper.create(); + documentWrapper.in(tech.easyflow.ai.entity.Document::getId, documentIds); + documentWrapper.eq(tech.easyflow.ai.entity.Document::getCollectionId, documentCollection.getId()); + documentWrapper.eq(tech.easyflow.ai.entity.Document::getProcessStatus, DocumentProcessStatus.COMPLETED.name()); + Map documentMap = documentMapper.selectListByQuery(documentWrapper).stream() + .collect(Collectors.toMap(item -> item.getId().toString(), item -> item, (a, b) -> a)); + return new DocumentHitSnapshot(chunkMap, documentMap); + } + + /** + * 文档检索命中的批量快照,避免过滤和填充阶段重复查询。 + */ + private static class DocumentHitSnapshot { + + private final Map chunkMap; + private final Map documentMap; + + private DocumentHitSnapshot(Map chunkMap, + Map documentMap) { + this.chunkMap = chunkMap == null ? Collections.emptyMap() : chunkMap; + this.documentMap = documentMap == null ? Collections.emptyMap() : documentMap; + } + + private static DocumentHitSnapshot empty() { + return new DocumentHitSnapshot(Collections.emptyMap(), Collections.emptyMap()); + } + + private boolean isEmpty() { + return chunkMap.isEmpty() || documentMap.isEmpty(); + } + + private String findChunkContent(Object chunkId) { + DocumentChunk documentChunk = chunkMap.get(String.valueOf(chunkId)); + if (documentChunk == null || documentChunk.getDocumentId() == null) { + return null; + } + if (!documentMap.containsKey(String.valueOf(documentChunk.getDocumentId()))) { + return null; + } + return StringUtil.noText(documentChunk.getContent()) ? null : documentChunk.getContent(); + } } private String buildFaqPromptContent(FaqItem faqItem, List> images) { diff --git a/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/service/impl/DocumentImportTaskServiceImpl.java b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/service/impl/DocumentImportTaskServiceImpl.java new file mode 100644 index 0000000..994cf5e --- /dev/null +++ b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/service/impl/DocumentImportTaskServiceImpl.java @@ -0,0 +1,18 @@ +package tech.easyflow.ai.service.impl; + +import com.mybatisflex.spring.service.impl.ServiceImpl; +import org.springframework.stereotype.Service; +import tech.easyflow.ai.entity.DocumentImportTask; +import tech.easyflow.ai.mapper.DocumentImportTaskMapper; +import tech.easyflow.ai.service.DocumentImportTaskService; + +/** + * 文档导入任务服务实现。 + * + * @author Codex + * @since 2026-04-14 + */ +@Service +public class DocumentImportTaskServiceImpl extends ServiceImpl + implements DocumentImportTaskService { +} diff --git a/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/service/impl/DocumentServiceImpl.java b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/service/impl/DocumentServiceImpl.java index 1a1508c..a759f7e 100644 --- a/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/service/impl/DocumentServiceImpl.java +++ b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/service/impl/DocumentServiceImpl.java @@ -34,7 +34,9 @@ import tech.easyflow.ai.config.SearcherFactory; import tech.easyflow.ai.documentimport.DocumentImportDtos; import tech.easyflow.ai.documentimport.DocumentImportKeys; import tech.easyflow.ai.documentimport.DocumentImportPreviewService; +import tech.easyflow.ai.documentimport.task.KnowledgeDocumentImportTaskAppService; import tech.easyflow.ai.entity.*; +import tech.easyflow.ai.enums.DocumentProcessStatus; import tech.easyflow.ai.mapper.DocumentChunkMapper; import tech.easyflow.ai.mapper.DocumentMapper; import tech.easyflow.ai.service.DocumentChunkService; @@ -69,6 +71,7 @@ import static tech.easyflow.ai.entity.table.DocumentTableDef.DOCUMENT; @Service("AiService") public class DocumentServiceImpl extends ServiceImpl implements DocumentService { protected Logger Log = LoggerFactory.getLogger(DocumentServiceImpl.class); + private static final String SOURCE_RANGES_KEY = "sourceRanges"; @Resource private DocumentMapper documentMapper; @@ -97,6 +100,9 @@ public class DocumentServiceImpl extends ServiceImpl i @Autowired private DocumentImportPreviewService documentImportPreviewService; + @Autowired + private KnowledgeDocumentImportTaskAppService importTaskAppService; + @Override public Page getDocumentList(String knowledgeId, int pageSize, int pageNum, String fileName) { QueryWrapper queryWrapper=QueryWrapper.create() @@ -130,6 +136,13 @@ public class DocumentServiceImpl extends ServiceImpl i // 查询该文档对应哪些分割的字段,先删除 QueryWrapper queryWrapperDocument = QueryWrapper.create().eq(Document::getId, id); Document oneByQuery = documentMapper.selectOneByQuery(queryWrapperDocument); + if (oneByQuery == null) { + return false; + } + if (DocumentProcessStatus.PARSING.name().equals(oneByQuery.getProcessStatus()) + || DocumentProcessStatus.INDEXING.name().equals(oneByQuery.getProcessStatus())) { + throw new BusinessException("文档处理中,暂不允许删除"); + } DocumentCollection knowledge = knowledgeService.getById(oneByQuery.getCollectionId()); if (knowledge == null) { return false; @@ -209,12 +222,18 @@ public class DocumentServiceImpl extends ServiceImpl i aiDocument.setDocumentPath(filePath); aiDocument.setCreated(new Date()); aiDocument.setModifiedBy(BigInteger.valueOf(StpUtil.getLoginIdAsLong())); - aiDocument.setModified(new Date()); - aiDocument.setContent(document.getContent()); - aiDocument.setChunkSize(documentCollectionSplitParams.getChunkSize()); - aiDocument.setOverlapSize(documentCollectionSplitParams.getOverlapSize()); - aiDocument.setTitle(fileOriginName); - Map res = new HashMap<>(); + aiDocument.setModified(new Date()); + aiDocument.setContent(document.getContent()); + aiDocument.setChunkSize(documentCollectionSplitParams.getChunkSize()); + aiDocument.setOverlapSize(documentCollectionSplitParams.getOverlapSize()); + aiDocument.setTitle(fileOriginName); + aiDocument.setProcessStatus(DocumentProcessStatus.COMPLETED.name()); + aiDocument.setTotalChunks(previewList.size()); + aiDocument.setCompletedChunks(previewList.size()); + aiDocument.setFailedChunks(0); + aiDocument.setProgressPercent(100); + aiDocument.setTaskModifiedAt(new Date()); + Map res = new HashMap<>(); List documentChunks = null; String operation = documentCollectionSplitParams.getOperation(); @@ -334,10 +353,11 @@ public class DocumentServiceImpl extends ServiceImpl i item.setPreviewSessionId(sessionId); item.setFilePath(file.getFilePath()); item.setFileName(file.getFileName()); + item.setNormalizedContent(session.getAnalysis() == null ? null : session.getAnalysis().getNormalizedContent()); item.setStrategyCode(session.getStrategyConfig().getStrategyCode()); item.setStrategyLabel(ragIngestionService.toStrategyLabel(session.getStrategyConfig().getStrategyCode())); item.setAnalysis(session.getAnalysis()); - item.setChunks(session.getPreviewChunks()); + item.setChunks(toPreviewChunkResults(session.getPreviewChunks())); item.setTotalChunks(session.getPreviewChunks().size()); item.setTotalWarnings(countWarnings(session.getPreviewChunks())); items.add(item); @@ -398,6 +418,12 @@ public class DocumentServiceImpl extends ServiceImpl i document.setModified(new Date()); document.setCreatedBy(BigInteger.valueOf(StpUtil.getLoginIdAsLong())); document.setModifiedBy(BigInteger.valueOf(StpUtil.getLoginIdAsLong())); + document.setProcessStatus(DocumentProcessStatus.COMPLETED.name()); + document.setTotalChunks(session.getDocumentChunks().size()); + document.setCompletedChunks(session.getDocumentChunks().size()); + document.setFailedChunks(0); + document.setProgressPercent(100); + document.setTaskModifiedAt(new Date()); for (DocumentChunk chunk : session.getDocumentChunks()) { chunk.setDocumentId(document.getId()); chunk.setDocumentCollectionId(document.getCollectionId()); @@ -430,6 +456,7 @@ public class DocumentServiceImpl extends ServiceImpl i DocumentImportDtos.PreviewSession session = new DocumentImportDtos.PreviewSession(); session.setKnowledgeId(knowledge.getId()); + session.setDocumentId(document.getId()); session.setFilePath(fileRequest.getFilePath()); session.setFileName(fileRequest.getFileName()); session.setSourceFormat(analysis.getSourceFormat()); @@ -656,6 +683,55 @@ public class DocumentServiceImpl extends ServiceImpl i return total; } + private List toPreviewChunkResults(List chunks) { + List result = new ArrayList<>(); + if (chunks == null) { + return result; + } + for (RagChunk chunk : chunks) { + DocumentImportDtos.PreviewChunkResult item = new DocumentImportDtos.PreviewChunkResult(); + item.setAnswer(chunk.getAnswer()); + item.setCharCount(chunk.getCharCount()); + item.setChunkId(chunk.getChunkId()); + item.setChunkType(chunk.getChunkType()); + item.setContent(chunk.getContent()); + item.setHeadingPath(chunk.getHeadingPath() == null ? new ArrayList<>() : new ArrayList<>(chunk.getHeadingPath())); + item.setPartNo(chunk.getPartNo()); + item.setPartTotal(chunk.getPartTotal()); + item.setQuestion(chunk.getQuestion()); + item.setSourceLabel(chunk.getSourceLabel()); + item.setTokenEstimate(chunk.getTokenEstimate()); + item.setWarnings(chunk.getWarnings() == null ? new ArrayList<>() : new ArrayList<>(chunk.getWarnings())); + item.setSourceRanges(copySourceRanges(chunk)); + result.add(item); + } + return result; + } + + @SuppressWarnings("unchecked") + private List copySourceRanges(RagChunk chunk) { + List result = new ArrayList<>(); + if (chunk == null || chunk.getOptions() == null) { + return result; + } + Object rawRanges = chunk.getOptions().get(SOURCE_RANGES_KEY); + if (!(rawRanges instanceof List rangeList)) { + return result; + } + for (Object item : rangeList) { + if (!(item instanceof Map rangeMap)) { + continue; + } + DocumentImportDtos.PreviewSourceRange range = new DocumentImportDtos.PreviewSourceRange(); + range.setStart(asInteger(rangeMap.get("start"), null)); + range.setEnd(asInteger(rangeMap.get("end"), null)); + if (range.getStart() != null && range.getEnd() != null) { + result.add(range); + } + } + return result; + } + private StoreExecutionContext prepareStoreContext(Document entity) { DocumentCollection knowledge = knowledgeService.getById(entity.getCollectionId()); if (knowledge == null) { @@ -882,4 +958,34 @@ public class DocumentServiceImpl extends ServiceImpl i } return null; } + + @Override + public Result createImportTask(DocumentImportDtos.TaskCreateRequest request) { + return importTaskAppService.createImportTask(request); + } + + @Override + public Result getImportTaskDetail(BigInteger taskId) { + return importTaskAppService.getImportTaskDetail(taskId); + } + + @Override + public Result previewImportTask(DocumentImportDtos.PreviewRequest request) { + return importTaskAppService.previewImportTask(request); + } + + @Override + public Result startIndexTask(DocumentImportDtos.TaskStartIndexRequest request) { + return importTaskAppService.startIndexTask(request); + } + + @Override + public Result retryParseTask(DocumentImportDtos.TaskRetryRequest request) { + return importTaskAppService.retryParseTask(request); + } + + @Override + public Result retryIndexTask(DocumentImportDtos.TaskRetryRequest request) { + return importTaskAppService.retryIndexTask(request); + } } diff --git a/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/service/impl/KnowledgeSharePermissionServiceImpl.java b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/service/impl/KnowledgeSharePermissionServiceImpl.java index 12cc222..503a78c 100644 --- a/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/service/impl/KnowledgeSharePermissionServiceImpl.java +++ b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/service/impl/KnowledgeSharePermissionServiceImpl.java @@ -37,6 +37,7 @@ public class KnowledgeSharePermissionServiceImpl implements KnowledgeSharePermis "/public-api/knowledge-share/detail", "/public-api/knowledge-share/document/page", "/public-api/knowledge-share/document/download", + "/public-api/knowledge-share/document/import/task/detail", "/public-api/knowledge-share/documentChunk/page", "/public-api/knowledge-share/faq/page", "/public-api/knowledge-share/faq/detail" @@ -48,6 +49,11 @@ public class KnowledgeSharePermissionServiceImpl implements KnowledgeSharePermis "/public-api/knowledge-share/document/import/analyze", "/public-api/knowledge-share/document/import/preview", "/public-api/knowledge-share/document/import/commit", + "/public-api/knowledge-share/document/import/task/create", + "/public-api/knowledge-share/document/import/task/preview", + "/public-api/knowledge-share/document/import/task/startIndex", + "/public-api/knowledge-share/document/import/task/retryParse", + "/public-api/knowledge-share/document/import/task/retryIndex", "/public-api/knowledge-share/faq/save" )); URI_SCOPE_MAPPING.put(KnowledgeShareActionScope.CONTENT_UPDATE.name(), List.of( diff --git a/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/vo/KnowledgeShareViewDetail.java b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/vo/KnowledgeShareViewDetail.java new file mode 100644 index 0000000..26e0748 --- /dev/null +++ b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/vo/KnowledgeShareViewDetail.java @@ -0,0 +1,44 @@ +package tech.easyflow.ai.vo; + +import tech.easyflow.ai.entity.DocumentCollection; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.List; + +/** + * 知识库分享页详情视图。 + * + * @author Codex + * @since 2026-04-15 + */ +public class KnowledgeShareViewDetail implements Serializable { + + /** + * 当前分享对应的知识库。 + */ + private DocumentCollection knowledge; + + /** + * 当前分享授权范围。 + */ + private List permissionScopes = new ArrayList(); + + public DocumentCollection getKnowledge() { + return knowledge; + } + + public void setKnowledge(DocumentCollection knowledge) { + this.knowledge = knowledge; + } + + public List getPermissionScopes() { + return permissionScopes; + } + + public void setPermissionScopes(List permissionScopes) { + this.permissionScopes = permissionScopes == null + ? new ArrayList() + : new ArrayList(permissionScopes); + } +} diff --git a/easyflow-modules/easyflow-module-ai/src/test/java/tech/easyflow/ai/documentimport/task/KnowledgeDocumentImportTaskAppServiceTest.java b/easyflow-modules/easyflow-module-ai/src/test/java/tech/easyflow/ai/documentimport/task/KnowledgeDocumentImportTaskAppServiceTest.java new file mode 100644 index 0000000..185992e --- /dev/null +++ b/easyflow-modules/easyflow-module-ai/src/test/java/tech/easyflow/ai/documentimport/task/KnowledgeDocumentImportTaskAppServiceTest.java @@ -0,0 +1,148 @@ +package tech.easyflow.ai.documentimport.task; + +import org.junit.Assert; +import org.junit.Test; +import tech.easyflow.ai.entity.DocumentImportTask; +import tech.easyflow.ai.enums.DocumentImportTaskStatus; +import tech.easyflow.ai.enums.DocumentProcessStatus; +import tech.easyflow.ai.mapper.DocumentMapper; +import tech.easyflow.ai.service.DocumentImportTaskService; + +import java.lang.reflect.Field; +import java.lang.reflect.Method; +import java.lang.reflect.Proxy; +import java.math.BigInteger; +import java.util.concurrent.atomic.AtomicReference; + +/** + * {@link KnowledgeDocumentImportTaskAppService} 回归测试。 + * + * @author Codex + * @since 2026-04-15 + */ +public class KnowledgeDocumentImportTaskAppServiceTest { + + /** + * 验证向量化失败会按整文档失败语义重置进度,并刷新任务错误信息。 + * + * @throws Exception 反射调用异常 + */ + @Test + public void markIndexFailedShouldResetProgressAndPersistLatestError() throws Exception { + BigInteger documentId = BigInteger.valueOf(10); + BigInteger knowledgeId = BigInteger.valueOf(20); + + tech.easyflow.ai.entity.Document persistedDocument = new tech.easyflow.ai.entity.Document(); + persistedDocument.setId(documentId); + persistedDocument.setCollectionId(knowledgeId); + persistedDocument.setProcessStatus(DocumentProcessStatus.INDEXING.name()); + persistedDocument.setTotalChunks(8); + persistedDocument.setCompletedChunks(5); + persistedDocument.setFailedChunks(1); + persistedDocument.setProgressPercent(63); + persistedDocument.setLastTaskError("旧错误"); + + AtomicReference updatedDocumentRef = new AtomicReference(); + AtomicReference updatedTaskRef = new AtomicReference(); + + KnowledgeDocumentImportTaskAppService service = new KnowledgeDocumentImportTaskAppService(); + setField(service, "documentMapper", mockDocumentMapper(persistedDocument, updatedDocumentRef)); + setField(service, "documentImportTaskService", mockDocumentImportTaskService(updatedTaskRef)); + setField(service, "documentImportTaskStatusStreamService", new NoopTaskStatusStreamService()); + + DocumentImportTask task = new DocumentImportTask(); + task.setId(BigInteger.valueOf(30)); + task.setDocumentId(documentId); + task.setKnowledgeId(knowledgeId); + task.setStatus(DocumentImportTaskStatus.RUNNING.name()); + task.setErrorSummary("旧错误"); + + tech.easyflow.ai.entity.Document inputDocument = new tech.easyflow.ai.entity.Document(); + inputDocument.setId(documentId); + inputDocument.setCollectionId(knowledgeId); + + Method method = KnowledgeDocumentImportTaskAppService.class.getDeclaredMethod( + "markIndexFailed", + DocumentImportTask.class, + tech.easyflow.ai.entity.Document.class, + String.class + ); + method.setAccessible(true); + method.invoke(service, task, inputDocument, "新错误"); + + tech.easyflow.ai.entity.Document updatedDocument = updatedDocumentRef.get(); + Assert.assertNotNull(updatedDocument); + Assert.assertEquals(DocumentProcessStatus.INDEX_FAILED.name(), updatedDocument.getProcessStatus()); + Assert.assertEquals(Integer.valueOf(0), updatedDocument.getCompletedChunks()); + Assert.assertEquals(Integer.valueOf(8), updatedDocument.getFailedChunks()); + Assert.assertEquals(Integer.valueOf(0), updatedDocument.getProgressPercent()); + Assert.assertEquals("新错误", updatedDocument.getLastTaskError()); + + DocumentImportTask updatedTask = updatedTaskRef.get(); + Assert.assertNotNull(updatedTask); + Assert.assertEquals(DocumentImportTaskStatus.FAILED.name(), updatedTask.getStatus()); + Assert.assertEquals("新错误", updatedTask.getErrorSummary()); + } + + private static DocumentMapper mockDocumentMapper(tech.easyflow.ai.entity.Document persistedDocument, + AtomicReference updatedDocumentRef) { + return (DocumentMapper) Proxy.newProxyInstance( + DocumentMapper.class.getClassLoader(), + new Class[]{DocumentMapper.class}, + (proxy, method, args) -> { + if ("selectOneById".equals(method.getName())) { + return persistedDocument; + } + if ("update".equals(method.getName())) { + updatedDocumentRef.set((tech.easyflow.ai.entity.Document) args[0]); + return 1; + } + return defaultValue(method.getReturnType()); + } + ); + } + + private static DocumentImportTaskService mockDocumentImportTaskService(AtomicReference updatedTaskRef) { + return (DocumentImportTaskService) Proxy.newProxyInstance( + DocumentImportTaskService.class.getClassLoader(), + new Class[]{DocumentImportTaskService.class}, + (proxy, method, args) -> { + if ("updateById".equals(method.getName())) { + updatedTaskRef.set((DocumentImportTask) args[0]); + return true; + } + return defaultValue(method.getReturnType()); + } + ); + } + + private static void setField(Object target, String fieldName, Object value) throws Exception { + Field field = KnowledgeDocumentImportTaskAppService.class.getDeclaredField(fieldName); + field.setAccessible(true); + field.set(target, value); + } + + private static Object defaultValue(Class returnType) { + if (returnType == boolean.class) { + return false; + } + if (returnType == int.class) { + return 0; + } + if (returnType == long.class) { + return 0L; + } + return null; + } + + /** + * 测试用 SSE 推送桩,避免依赖线程池和真实推送。 + */ + private static class NoopTaskStatusStreamService extends DocumentImportTaskStatusStreamService { + + @Override + public void publishAfterCommit(BigInteger documentId) { + // no-op + } + } +} diff --git a/easyflow-modules/easyflow-module-ai/src/test/java/tech/easyflow/ai/service/impl/DocumentCollectionServiceImplTest.java b/easyflow-modules/easyflow-module-ai/src/test/java/tech/easyflow/ai/service/impl/DocumentCollectionServiceImplTest.java new file mode 100644 index 0000000..2e570ff --- /dev/null +++ b/easyflow-modules/easyflow-module-ai/src/test/java/tech/easyflow/ai/service/impl/DocumentCollectionServiceImplTest.java @@ -0,0 +1,238 @@ +package tech.easyflow.ai.service.impl; + +import com.easyagents.core.document.Document; +import com.easyagents.search.engine.service.DocumentSearcher; +import com.easyagents.search.engine.service.KeywordSearchRequest; +import org.junit.Assert; +import org.junit.Test; +import org.springframework.beans.factory.ObjectProvider; +import tech.easyflow.ai.config.SearcherFactory; +import tech.easyflow.ai.enums.DocumentProcessStatus; +import tech.easyflow.ai.mapper.DocumentChunkMapper; +import tech.easyflow.ai.mapper.DocumentMapper; + +import java.io.Serializable; +import java.lang.reflect.Field; +import java.lang.reflect.Proxy; +import java.math.BigDecimal; +import java.math.BigInteger; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import static tech.easyflow.ai.entity.DocumentCollection.KEY_DOC_RECALL_MAX_NUM; +import static tech.easyflow.ai.entity.DocumentCollection.KEY_SIMILARITY_THRESHOLD; + +/** + * {@link DocumentCollectionServiceImpl} 回归测试。 + * + * @author Codex + * @since 2026-04-15 + */ +public class DocumentCollectionServiceImplTest { + + /** + * 验证检索结果会在重排前过滤掉未完成文档,避免高分进行中文档挤占最终名额。 + * + * @throws Exception 反射注入异常 + */ + @Test + public void searchShouldFilterNonCompletedChunksBeforeFinalTopK() throws Exception { + BigInteger knowledgeId = BigInteger.ONE; + BigInteger completedDocumentId = BigInteger.valueOf(101); + BigInteger indexingDocumentId = BigInteger.valueOf(102); + BigInteger completedChunkId = BigInteger.valueOf(1001); + BigInteger indexingChunkId = BigInteger.valueOf(1002); + + tech.easyflow.ai.entity.DocumentCollection collection = new tech.easyflow.ai.entity.DocumentCollection(); + collection.setId(knowledgeId); + collection.setCollectionType(tech.easyflow.ai.entity.DocumentCollection.TYPE_DOCUMENT); + collection.setOptions(new HashMap() {{ + put(KEY_DOC_RECALL_MAX_NUM, 1); + put(KEY_SIMILARITY_THRESHOLD, BigDecimal.ZERO); + }}); + + tech.easyflow.ai.entity.DocumentChunk completedChunk = new tech.easyflow.ai.entity.DocumentChunk(); + completedChunk.setId(completedChunkId); + completedChunk.setDocumentId(completedDocumentId); + completedChunk.setDocumentCollectionId(knowledgeId); + completedChunk.setContent("completed chunk"); + + tech.easyflow.ai.entity.DocumentChunk indexingChunk = new tech.easyflow.ai.entity.DocumentChunk(); + indexingChunk.setId(indexingChunkId); + indexingChunk.setDocumentId(indexingDocumentId); + indexingChunk.setDocumentCollectionId(knowledgeId); + indexingChunk.setContent("indexing chunk"); + + tech.easyflow.ai.entity.Document completedDocument = new tech.easyflow.ai.entity.Document(); + completedDocument.setId(completedDocumentId); + completedDocument.setCollectionId(knowledgeId); + completedDocument.setProcessStatus(DocumentProcessStatus.COMPLETED.name()); + completedDocument.setTitle("completed"); + + TestKeywordSearcher searcher = new TestKeywordSearcher(List.of( + buildHit(indexingChunkId, 0.99D), + buildHit(completedChunkId, 0.75D) + )); + + DocumentCollectionServiceImpl service = new TestDocumentCollectionService(collection); + setField(service, "searcherFactory", new SearcherFactory(new StaticObjectProvider(searcher))); + setField(service, "documentChunkMapper", mockDocumentChunkMapper(completedChunk, indexingChunk)); + setField(service, "documentMapper", mockDocumentMapper(completedDocument)); + + tech.easyflow.ai.rag.KnowledgeRetrievalRequest request = new tech.easyflow.ai.rag.KnowledgeRetrievalRequest(); + request.setKnowledgeId(knowledgeId); + request.setQuery("test-query"); + request.setRetrievalMode(com.easyagents.rag.retrieval.RetrievalMode.KEYWORD); + + List result = service.search(request); + + Assert.assertEquals("内部关键词召回应扩容到业务 topK 的 5 倍", 5, searcher.lastRequestCount); + Assert.assertEquals("知识库过滤后只应保留完成态文档", 1, result.size()); + Assert.assertEquals(completedChunkId, result.get(0).getId()); + Assert.assertEquals("completed chunk", result.get(0).getContent()); + Assert.assertEquals(String.valueOf(knowledgeId), searcher.lastKnowledgeId); + } + + private static Document buildHit(BigInteger id, double score) { + Document document = new Document(); + document.setId(id); + document.setScore(score); + document.setContent("raw-hit-" + id); + return document; + } + + private static DocumentChunkMapper mockDocumentChunkMapper(tech.easyflow.ai.entity.DocumentChunk... chunks) { + Map chunkMap = new HashMap(); + for (tech.easyflow.ai.entity.DocumentChunk chunk : chunks) { + chunkMap.put(String.valueOf(chunk.getId()), chunk); + } + return (DocumentChunkMapper) Proxy.newProxyInstance( + DocumentChunkMapper.class.getClassLoader(), + new Class[]{DocumentChunkMapper.class}, + (proxy, method, args) -> { + if ("selectListByQuery".equals(method.getName())) { + return List.copyOf(chunkMap.values()); + } + return defaultValue(method.getReturnType()); + } + ); + } + + private static DocumentMapper mockDocumentMapper(tech.easyflow.ai.entity.Document completedDocument) { + return (DocumentMapper) Proxy.newProxyInstance( + DocumentMapper.class.getClassLoader(), + new Class[]{DocumentMapper.class}, + (proxy, method, args) -> { + if ("selectListByQuery".equals(method.getName())) { + return List.of(completedDocument); + } + return defaultValue(method.getReturnType()); + } + ); + } + + private static void setField(Object target, String fieldName, Object value) throws Exception { + Field field = DocumentCollectionServiceImpl.class.getDeclaredField(fieldName); + field.setAccessible(true); + field.set(target, value); + } + + private static Object defaultValue(Class returnType) { + if (returnType == boolean.class) { + return false; + } + if (returnType == int.class) { + return 0; + } + if (returnType == long.class) { + return 0L; + } + return null; + } + + /** + * 固定返回测试知识库实体,避免依赖数据库。 + */ + private static class TestDocumentCollectionService extends DocumentCollectionServiceImpl { + + private final tech.easyflow.ai.entity.DocumentCollection collection; + + private TestDocumentCollectionService(tech.easyflow.ai.entity.DocumentCollection collection) { + this.collection = collection; + } + + @Override + public tech.easyflow.ai.entity.DocumentCollection getById(Serializable id) { + return collection; + } + } + + /** + * 记录关键词检索请求参数的搜索器桩实现。 + */ + private static class TestKeywordSearcher implements DocumentSearcher { + + private final List documents; + private int lastRequestCount; + private String lastKnowledgeId; + + private TestKeywordSearcher(List documents) { + this.documents = documents; + } + + @Override + public boolean addDocument(Document document) { + return true; + } + + @Override + public boolean deleteDocument(Object id) { + return true; + } + + @Override + public boolean updateDocument(Document document) { + return true; + } + + @Override + public List searchDocuments(KeywordSearchRequest request) { + this.lastRequestCount = request.getCount(); + this.lastKnowledgeId = request.getKnowledgeId(); + return documents; + } + } + + /** + * 最小 ObjectProvider 实现,仅服务搜索器工厂测试注入。 + */ + private static class StaticObjectProvider implements ObjectProvider { + + private final T value; + + private StaticObjectProvider(T value) { + this.value = value; + } + + @Override + public T getObject(Object... args) { + return value; + } + + @Override + public T getIfAvailable() { + return value; + } + + @Override + public T getIfUnique() { + return value; + } + + @Override + public T getObject() { + return value; + } + } +} diff --git a/easyflow-starter/easyflow-starter-all/src/main/java/tech/easyflow/starter/MainApplication.java b/easyflow-starter/easyflow-starter-all/src/main/java/tech/easyflow/starter/MainApplication.java index 91c6f14..693a341 100644 --- a/easyflow-starter/easyflow-starter-all/src/main/java/tech/easyflow/starter/MainApplication.java +++ b/easyflow-starter/easyflow-starter-all/src/main/java/tech/easyflow/starter/MainApplication.java @@ -1,13 +1,22 @@ package tech.easyflow.starter; import org.dromara.x.file.storage.spring.EnableFileStorage; -import tech.easyflow.common.spring.BaseApp; +import org.springframework.boot.actuate.autoconfigure.elasticsearch.ElasticsearchRestHealthContributorAutoConfiguration; import org.springframework.boot.autoconfigure.SpringBootApplication; +import tech.easyflow.common.spring.BaseApp; -@SpringBootApplication +/** + * EasyFlow 启动入口。 + */ +@SpringBootApplication(exclude = ElasticsearchRestHealthContributorAutoConfiguration.class) @EnableFileStorage public class MainApplication extends BaseApp { + /** + * 启动 EasyFlow 应用。 + * + * @param args 启动参数 + */ public static void main(String[] args) { run(MainApplication.class, args); } diff --git a/easyflow-starter/easyflow-starter-all/src/main/resources/db/migration/mysql/V13__mysql_document_import_task.sql b/easyflow-starter/easyflow-starter-all/src/main/resources/db/migration/mysql/V13__mysql_document_import_task.sql new file mode 100644 index 0000000..2d5e2b8 --- /dev/null +++ b/easyflow-starter/easyflow-starter-all/src/main/resources/db/migration/mysql/V13__mysql_document_import_task.sql @@ -0,0 +1,48 @@ +ALTER TABLE `tb_document` + ADD COLUMN `process_status` varchar(32) NULL DEFAULT NULL COMMENT '处理状态' AFTER `options`, + ADD COLUMN `total_chunks` int NULL DEFAULT 0 COMMENT '总分块数' AFTER `process_status`, + ADD COLUMN `completed_chunks` int NULL DEFAULT 0 COMMENT '已完成分块数' AFTER `total_chunks`, + ADD COLUMN `failed_chunks` int NULL DEFAULT 0 COMMENT '失败分块数' AFTER `completed_chunks`, + ADD COLUMN `progress_percent` int NULL DEFAULT 0 COMMENT '处理进度百分比' AFTER `failed_chunks`, + ADD COLUMN `last_task_error` varchar(1024) NULL DEFAULT NULL COMMENT '最近任务错误摘要' AFTER `progress_percent`, + ADD COLUMN `task_modified_at` datetime NULL DEFAULT NULL COMMENT '任务状态更新时间' AFTER `last_task_error`; + +CREATE TABLE `tb_document_import_task` +( + `id` bigint UNSIGNED NOT NULL COMMENT '主键', + `document_id` bigint UNSIGNED NOT NULL COMMENT '文档ID', + `knowledge_id` bigint UNSIGNED NOT NULL COMMENT '知识库ID', + `phase` varchar(16) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT '任务阶段', + `status` varchar(16) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT '任务状态', + `provider_task_id` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '底层任务ID', + `payload_json` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL COMMENT '任务载荷', + `error_summary` varchar(1024) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '错误摘要', + `started_at` datetime NULL DEFAULT NULL COMMENT '开始时间', + `finished_at` datetime NULL DEFAULT NULL COMMENT '结束时间', + `created` datetime NULL DEFAULT NULL COMMENT '创建时间', + `created_by` bigint UNSIGNED NULL DEFAULT NULL COMMENT '创建人', + `modified` datetime NULL DEFAULT NULL COMMENT '修改时间', + `modified_by` bigint UNSIGNED NULL DEFAULT NULL COMMENT '修改人', + PRIMARY KEY (`id`) USING BTREE, + INDEX `idx_document_import_task_document`(`document_id`) USING BTREE, + INDEX `idx_document_import_task_knowledge`(`knowledge_id`) USING BTREE, + INDEX `idx_document_import_task_phase_status`(`phase`, `status`) USING BTREE +) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci COMMENT = '知识库文档导入任务' ROW_FORMAT = DYNAMIC; + +UPDATE `tb_document` d +SET d.`process_status` = 'COMPLETED', + d.`total_chunks` = ( + SELECT COUNT(1) + FROM `tb_document_chunk` c + WHERE c.`document_id` = d.`id` + ), + d.`completed_chunks` = ( + SELECT COUNT(1) + FROM `tb_document_chunk` c + WHERE c.`document_id` = d.`id` + ), + d.`failed_chunks` = 0, + d.`progress_percent` = 100, + d.`last_task_error` = NULL, + d.`task_modified_at` = COALESCE(d.`modified`, d.`created`, NOW()) +WHERE d.`process_status` IS NULL; diff --git a/easyflow-ui-admin/app/src/components/page/PageData.vue b/easyflow-ui-admin/app/src/components/page/PageData.vue index aa07b9f..a1276db 100644 --- a/easyflow-ui-admin/app/src/components/page/PageData.vue +++ b/easyflow-ui-admin/app/src/components/page/PageData.vue @@ -78,6 +78,25 @@ const handleCurrentChange = (newPage: number) => { pageInfo.pageNumber = newPage; }; +const patchRowById = ( + id: number | string, + patch: Record, +): boolean => { + const rowIndex = pageList.value.findIndex( + (item: Record) => String(item?.id ?? '') === String(id ?? ''), + ); + if (rowIndex === -1) { + return false; + } + const nextPageList = [...pageList.value]; + nextPageList[rowIndex] = { + ...nextPageList[rowIndex], + ...patch, + }; + pageList.value = nextPageList; + return true; +}; + // 暴露给父组件的方法 (替代 useImperativeHandle) const setQuery = (newQueryParams: Record) => { pageInfo.pageNumber = 1; @@ -89,6 +108,7 @@ const setQuery = (newQueryParams: Record) => { // 暴露方法给父组件 defineExpose({ reload: getPageList, + patchRowById, setQuery, }); diff --git a/easyflow-ui-admin/app/src/components/upload/DragFileUpload.vue b/easyflow-ui-admin/app/src/components/upload/DragFileUpload.vue index 2ae7513..168cc3a 100644 --- a/easyflow-ui-admin/app/src/components/upload/DragFileUpload.vue +++ b/easyflow-ui-admin/app/src/components/upload/DragFileUpload.vue @@ -67,8 +67,13 @@ const triggerFileSelect = () => { } }; +const clearFiles = () => { + uploadRef.value?.clearFiles?.(); +}; + // 对外暴露方法(父组件可通过ref调用) defineExpose({ + clearFiles, triggerFileSelect, }); diff --git a/easyflow-ui-admin/app/src/locales/langs/en-US/button.json b/easyflow-ui-admin/app/src/locales/langs/en-US/button.json index 554f69c..fa9ea07 100644 --- a/easyflow-ui-admin/app/src/locales/langs/en-US/button.json +++ b/easyflow-ui-admin/app/src/locales/langs/en-US/button.json @@ -48,5 +48,9 @@ "more": "Mode", "submitDeleteApproval": "Submit Delete Approval", "submitPublishApproval": "Submit Publish Approval", - "viewSegmentation": "ViewSegmentation" + "viewSegmentation": "View Segments", + "continueProcess": "Continue", + "startIndex": "Start Indexing", + "retryParse": "Retry Parse", + "retryIndex": "Retry Index" } diff --git a/easyflow-ui-admin/app/src/locales/langs/en-US/documentCollection.json b/easyflow-ui-admin/app/src/locales/langs/en-US/documentCollection.json index 48ae9f7..49c11f2 100644 --- a/easyflow-ui-admin/app/src/locales/langs/en-US/documentCollection.json +++ b/easyflow-ui-admin/app/src/locales/langs/en-US/documentCollection.json @@ -32,9 +32,10 @@ "rerankLlmId": "RerankLlm", "searchEngineEnable": "SearchEngineEnable", "englishName": "EnglishName", - "documentType": "DocumentType", "fileName": "fileName", - "knowledgeCount": "Number of knowledge items", + "chunkCount": "Chunks", + "processStatus": "Status", + "progress": "Progress", "publishStatusDraft": "Draft", "publishStatusPublishPending": "Publish Pending", "publishStatusPublished": "Published", @@ -77,17 +78,32 @@ "fileName": "File Name", "progressUpload": "Progress of file upload", "fileSize": "File size", + "uploadCreateTip": "After upload, the document appears in the list first and is parsed asynchronously. Continue with chunking after parsing finishes.", "analysisTip": "The system analyzes multilingual structure first and recommends a splitting strategy. You can still adjust each file manually.", + "manualStrategyTip": "The preview refreshes automatically when the chunking strategy changes. Start indexing after it looks right.", "confidence": "Confidence", "recommendReason": "Reasons", "candidateStrategies": "Candidates", "strategySelection": "Strategy", "previewTip": "The preview result is the final import basis. Confirm it before committing.", + "previewPaneTitle": "Chunk Preview", "previewEmpty": "No preview data", + "previewReady": "Preview ready", + "previewRefreshing": "Refreshing preview", + "previewRequestFailed": "Failed to refresh preview. Please try again.", "warningCount": "Warnings", "chunkCount": "Chunks", + "lockedState": "Locked", + "normalizedDocumentTitle": "Source Text", + "normalizedDocumentTip": "Shows the normalized source text so each chunk can be checked in context.", "resultEmpty": "No import result", - "importFailed": "Import failed" + "importFailed": "Import failed", + "createSuccess": "Documents added to the list", + "partialCreateSuccess": "Only some documents were added successfully", + "indexQueued": "Indexing started", + "previewAction": "Generate Preview", + "workbenchEyebrow": "Processing Workspace", + "workbenchTitle": "Document Processing" }, "splitterDoc": { "fileType": "FileType", @@ -113,6 +129,16 @@ "uploading": "Parsing in progress", "importSuccess": "ImportSuccess" }, + "taskStatus": { + "UPLOADED": "Uploaded", + "PARSING": "Parsing", + "PARSE_FAILED": "Parse Failed", + "READY_FOR_SEGMENT": "Ready for Chunking", + "READY_FOR_INDEX": "Ready for Indexing", + "INDEXING": "Indexing", + "INDEX_FAILED": "Index Failed", + "COMPLETED": "Completed" + }, "documentManagement": "Document management", "actions": { "knowledge": "Knowledge", @@ -188,5 +214,6 @@ "vectorEmbedModelTips": "After successful vector data, it is not allowed to modify the vector model", "dimensionOfVectorModelTips": "After successful vector data, it is not allowed to modify the dimensions of the vector model", "dimensionOfVectorModel": "Dimension of vector model", - "managePermissionHint": "Only the creator or super admin can modify this knowledge base" + "managePermissionHint": "Only the creator or super admin can modify this knowledge base", + "processingDeleteBlocked": "Documents in progress cannot be deleted" } diff --git a/easyflow-ui-admin/app/src/locales/langs/zh-CN/button.json b/easyflow-ui-admin/app/src/locales/langs/zh-CN/button.json index b7403e7..140d2ee 100644 --- a/easyflow-ui-admin/app/src/locales/langs/zh-CN/button.json +++ b/easyflow-ui-admin/app/src/locales/langs/zh-CN/button.json @@ -48,5 +48,9 @@ "more": "更多", "submitDeleteApproval": "提交删除审批", "submitPublishApproval": "提交发布审批", - "viewSegmentation": "查看分段" + "viewSegmentation": "查看分段", + "continueProcess": "继续处理", + "startIndex": "开始向量化", + "retryParse": "重试解析", + "retryIndex": "重试向量化" } diff --git a/easyflow-ui-admin/app/src/locales/langs/zh-CN/documentCollection.json b/easyflow-ui-admin/app/src/locales/langs/zh-CN/documentCollection.json index fed1f5f..213f25c 100644 --- a/easyflow-ui-admin/app/src/locales/langs/zh-CN/documentCollection.json +++ b/easyflow-ui-admin/app/src/locales/langs/zh-CN/documentCollection.json @@ -32,9 +32,10 @@ "rerankLlmId": "重排模型", "searchEngineEnable": "是否启用搜索引擎", "englishName": "英文名称", - "documentType": "文件类型", "fileName": "文件名", - "knowledgeCount": "知识条数", + "chunkCount": "分块数", + "processStatus": "处理状态", + "progress": "处理进度", "publishStatusDraft": "草稿", "publishStatusPublishPending": "发布审批中", "publishStatusPublished": "已发布", @@ -77,17 +78,32 @@ "fileName": "文件名称", "progressUpload": "文件上传进度", "fileSize": "文件大小", + "uploadCreateTip": "上传完成后,文档会先进入列表并异步解析,解析完成后再继续分块和向量化。", "analysisTip": "系统会先基于文档结构做中英文规则分析,再推荐拆分策略,你也可以逐个文件手动调整。", + "manualStrategyTip": "调整分块策略后会自动刷新预览,确认效果后再启动向量化。", "confidence": "置信度", "recommendReason": "推荐理由", "candidateStrategies": "备选策略", "strategySelection": "拆分策略", "previewTip": "预览结果就是最终入库依据,确认无误后再执行导入。", + "previewPaneTitle": "分块预览", "previewEmpty": "暂无可预览内容", + "previewReady": "预览已更新", + "previewRefreshing": "正在更新预览", + "previewRequestFailed": "预览生成失败,请稍后重试", "warningCount": "警告数", "chunkCount": "分块数", + "lockedState": "已锁定", + "normalizedDocumentTitle": "原文", + "normalizedDocumentTip": "展示解析后的标准化文本,便于核对每个分块落点。", "resultEmpty": "暂无导入结果", - "importFailed": "导入失败" + "importFailed": "导入失败", + "createSuccess": "文档已加入列表", + "partialCreateSuccess": "已加入部分文档,请检查失败项", + "indexQueued": "已开始向量化", + "previewAction": "生成预览", + "workbenchEyebrow": "处理工作台", + "workbenchTitle": "文档处理" }, "splitterDoc": { "fileType": "文件类型", @@ -113,6 +129,16 @@ "uploading": "解析中", "importSuccess": "导入成功" }, + "taskStatus": { + "UPLOADED": "已上传", + "PARSING": "解析中", + "PARSE_FAILED": "解析失败", + "READY_FOR_SEGMENT": "待分块", + "READY_FOR_INDEX": "待向量化", + "INDEXING": "向量化中", + "INDEX_FAILED": "向量化失败", + "COMPLETED": "已完成" + }, "documentManagement": "文档管理", "actions": { "knowledge": "知识", @@ -188,5 +214,6 @@ "vectorEmbedModelTips": "成功向量数据之后不允许修改向量模型", "dimensionOfVectorModelTips": "成功向量数据之后不允许修改向量模型维度", "dimensionOfVectorModel": "向量模型维度", - "managePermissionHint": "仅创建者或超级管理员可修改当前知识库" + "managePermissionHint": "仅创建者或超级管理员可修改当前知识库", + "processingDeleteBlocked": "文档处理中,暂不允许删除" } diff --git a/easyflow-ui-admin/app/src/views/ai/documentCollection/Document.vue b/easyflow-ui-admin/app/src/views/ai/documentCollection/Document.vue index e597719..e189391 100644 --- a/easyflow-ui-admin/app/src/views/ai/documentCollection/Document.vue +++ b/easyflow-ui-admin/app/src/views/ai/documentCollection/Document.vue @@ -1,5 +1,5 @@ @@ -306,6 +334,12 @@ const backDoc = () => { border-radius: 8px; } +.doc-table-content--process { + padding: 18px 20px 0; + border-color: rgb(15 23 42 / 6%); + border-radius: 18px; +} + .doc-header { width: 100%; padding-bottom: 21px; @@ -378,10 +412,8 @@ const backDoc = () => { background-color: var(--el-bg-color); } -.doc-imp-container { - box-sizing: border-box; - flex: 1; - width: 100%; +.doc-sub-back { + padding: 0 6px 16px; } .doc-header-container { @@ -428,6 +460,8 @@ const backDoc = () => { display: flex; width: 100%; height: 100%; + min-width: 0; + min-height: 0; } .menu-container { diff --git a/easyflow-ui-admin/app/src/views/ai/documentCollection/DocumentTable.vue b/easyflow-ui-admin/app/src/views/ai/documentCollection/DocumentTable.vue index 548eab6..a04109f 100644 --- a/easyflow-ui-admin/app/src/views/ai/documentCollection/DocumentTable.vue +++ b/easyflow-ui-admin/app/src/views/ai/documentCollection/DocumentTable.vue @@ -1,36 +1,65 @@