From b6213d0933671d817c78953e6e74bdd1f5daa4c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E5=AD=90=E9=BB=98?= <925456043@qq.com> Date: Sun, 29 Mar 2026 17:27:12 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E5=A2=9E=E5=BC=BA=E7=9F=A5=E8=AF=86?= =?UTF-8?q?=E5=BA=93=E5=88=86=E5=9D=97=E7=AD=96=E7=95=A5=E6=B5=81=E7=A8=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 增加导入分析预览提交与预览态缓存键 - 支持知识库分块策略配置与分块预览 - 重构知识库导入与确认导入前端流程 --- .../ai/documentimport/DocumentImportDtos.java | 555 +++++++++++++++++ .../ai/documentimport/DocumentImportKeys.java | 21 + .../DocumentImportPreviewService.java | 45 ++ .../easyflow/ai/service/DocumentService.java | 7 + .../ai/service/impl/DocumentServiceImpl.java | 577 ++++++++++++++++-- .../V6__document_import_strategy.sql | 4 + .../ComfirmImportDocument.vue | 184 +++--- .../ImportKnowledgeDocFile.vue | 383 ++++++------ .../ImportKnowledgeFileContainer.vue | 2 +- .../ai/documentCollection/SegmenterDoc.vue | 496 ++++++++++----- .../documentCollection/SplitterDocPreview.vue | 404 +++++++----- 11 files changed, 2078 insertions(+), 600 deletions(-) create mode 100644 easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/documentimport/DocumentImportDtos.java create mode 100644 easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/documentimport/DocumentImportKeys.java create mode 100644 easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/documentimport/DocumentImportPreviewService.java create mode 100644 easyflow-starter/easyflow-starter-all/src/main/resources/db/migration/V6__document_import_strategy.sql diff --git a/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/documentimport/DocumentImportDtos.java b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/documentimport/DocumentImportDtos.java new file mode 100644 index 0000000..55225f8 --- /dev/null +++ b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/documentimport/DocumentImportDtos.java @@ -0,0 +1,555 @@ +package tech.easyflow.ai.documentimport; + +import com.easyagents.rag.core.RagChunk; +import com.easyagents.rag.ingestion.model.AnalysisResult; +import com.easyagents.rag.ingestion.model.StrategyConfig; +import tech.easyflow.ai.entity.Document; +import tech.easyflow.ai.entity.DocumentChunk; + +import java.io.Serializable; +import java.math.BigInteger; +import java.util.ArrayList; +import java.util.Date; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +public final class DocumentImportDtos { + + private DocumentImportDtos() { + } + + public static class FileItem implements Serializable { + private String filePath; + private String fileName; + + public String getFilePath() { + return filePath; + } + + public void setFilePath(String filePath) { + this.filePath = filePath; + } + + public String getFileName() { + return fileName; + } + + public void setFileName(String fileName) { + this.fileName = fileName; + } + } + + public static class AnalyzeRequest implements Serializable { + private BigInteger knowledgeId; + private List files = new ArrayList(); + + public BigInteger getKnowledgeId() { + return knowledgeId; + } + + public void setKnowledgeId(BigInteger knowledgeId) { + this.knowledgeId = knowledgeId; + } + + public List getFiles() { + return files; + } + + public void setFiles(List files) { + this.files = files; + } + } + + public static class PreviewFileRequest implements Serializable { + private String filePath; + private String fileName; + private StrategyConfig strategyConfig = StrategyConfig.defaults(); + + public String getFilePath() { + return filePath; + } + + public void setFilePath(String filePath) { + this.filePath = filePath; + } + + public String getFileName() { + return fileName; + } + + public void setFileName(String fileName) { + this.fileName = fileName; + } + + public StrategyConfig getStrategyConfig() { + return strategyConfig; + } + + public void setStrategyConfig(StrategyConfig strategyConfig) { + this.strategyConfig = strategyConfig; + } + } + + public static class PreviewRequest implements Serializable { + private BigInteger knowledgeId; + private List files = new ArrayList(); + + public BigInteger getKnowledgeId() { + return knowledgeId; + } + + public void setKnowledgeId(BigInteger knowledgeId) { + this.knowledgeId = knowledgeId; + } + + public List getFiles() { + return files; + } + + public void setFiles(List files) { + this.files = files; + } + } + + public static class CommitRequest implements Serializable { + private BigInteger knowledgeId; + private List previewSessionIds = new ArrayList(); + + public BigInteger getKnowledgeId() { + return knowledgeId; + } + + public void setKnowledgeId(BigInteger knowledgeId) { + this.knowledgeId = knowledgeId; + } + + public List getPreviewSessionIds() { + return previewSessionIds; + } + + public void setPreviewSessionIds(List previewSessionIds) { + this.previewSessionIds = previewSessionIds; + } + } + + public static class SplitterProfileSaveRequest implements Serializable { + private BigInteger knowledgeId; + private String defaultStrategyCode; + private Boolean autoRecommendEnabled; + private String fallbackStrategyCode; + private Map strategyProfiles = new LinkedHashMap(); + + public BigInteger getKnowledgeId() { + return knowledgeId; + } + + public void setKnowledgeId(BigInteger knowledgeId) { + this.knowledgeId = knowledgeId; + } + + public String getDefaultStrategyCode() { + return defaultStrategyCode; + } + + public void setDefaultStrategyCode(String defaultStrategyCode) { + this.defaultStrategyCode = defaultStrategyCode; + } + + public Boolean getAutoRecommendEnabled() { + return autoRecommendEnabled; + } + + public void setAutoRecommendEnabled(Boolean autoRecommendEnabled) { + this.autoRecommendEnabled = autoRecommendEnabled; + } + + public String getFallbackStrategyCode() { + return fallbackStrategyCode; + } + + public void setFallbackStrategyCode(String fallbackStrategyCode) { + this.fallbackStrategyCode = fallbackStrategyCode; + } + + public Map getStrategyProfiles() { + return strategyProfiles; + } + + public void setStrategyProfiles(Map strategyProfiles) { + this.strategyProfiles = strategyProfiles; + } + } + + public static class AnalyzeItem implements Serializable { + private String filePath; + private String fileName; + private AnalysisResult analysis; + private StrategyConfig strategyConfig = StrategyConfig.defaults(); + + public String getFilePath() { + return filePath; + } + + public void setFilePath(String filePath) { + this.filePath = filePath; + } + + public String getFileName() { + return fileName; + } + + public void setFileName(String fileName) { + this.fileName = fileName; + } + + public AnalysisResult getAnalysis() { + return analysis; + } + + public void setAnalysis(AnalysisResult analysis) { + this.analysis = analysis; + } + + public StrategyConfig getStrategyConfig() { + return strategyConfig; + } + + public void setStrategyConfig(StrategyConfig strategyConfig) { + this.strategyConfig = strategyConfig; + } + } + + public static class AnalyzeResponse implements Serializable { + private Integer total; + private List items = new ArrayList(); + + public Integer getTotal() { + return total; + } + + public void setTotal(Integer total) { + this.total = total; + } + + public List getItems() { + return items; + } + + public void setItems(List items) { + this.items = items; + } + } + + public static class PreviewFileResult implements Serializable { + private String previewSessionId; + private String filePath; + private String fileName; + private String strategyCode; + private String strategyLabel; + private AnalysisResult analysis; + private Integer totalChunks; + private Integer totalWarnings; + private List chunks = new ArrayList(); + + public String getPreviewSessionId() { + return previewSessionId; + } + + public void setPreviewSessionId(String previewSessionId) { + this.previewSessionId = previewSessionId; + } + + public String getFilePath() { + return filePath; + } + + public void setFilePath(String filePath) { + this.filePath = filePath; + } + + public String getFileName() { + return fileName; + } + + public void setFileName(String fileName) { + this.fileName = fileName; + } + + public String getStrategyCode() { + return strategyCode; + } + + public void setStrategyCode(String strategyCode) { + this.strategyCode = strategyCode; + } + + public String getStrategyLabel() { + return strategyLabel; + } + + public void setStrategyLabel(String strategyLabel) { + this.strategyLabel = strategyLabel; + } + + public AnalysisResult getAnalysis() { + return analysis; + } + + public void setAnalysis(AnalysisResult analysis) { + this.analysis = analysis; + } + + public Integer getTotalChunks() { + return totalChunks; + } + + public void setTotalChunks(Integer totalChunks) { + this.totalChunks = totalChunks; + } + + public Integer getTotalWarnings() { + return totalWarnings; + } + + public void setTotalWarnings(Integer totalWarnings) { + this.totalWarnings = totalWarnings; + } + + public List getChunks() { + return chunks; + } + + public void setChunks(List chunks) { + this.chunks = chunks; + } + } + + public static class PreviewResponse implements Serializable { + private Integer totalFiles; + private Integer totalChunks; + private List items = new ArrayList(); + + public Integer getTotalFiles() { + return totalFiles; + } + + public void setTotalFiles(Integer totalFiles) { + this.totalFiles = totalFiles; + } + + public Integer getTotalChunks() { + return totalChunks; + } + + public void setTotalChunks(Integer totalChunks) { + this.totalChunks = totalChunks; + } + + public List getItems() { + return items; + } + + public void setItems(List items) { + this.items = items; + } + } + + public static class CommitFileResult implements Serializable { + private String previewSessionId; + private String fileName; + private Boolean success; + private String reason; + private BigInteger documentId; + private Integer chunkCount; + + public String getPreviewSessionId() { + return previewSessionId; + } + + public void setPreviewSessionId(String previewSessionId) { + this.previewSessionId = previewSessionId; + } + + public String getFileName() { + return fileName; + } + + public void setFileName(String fileName) { + this.fileName = fileName; + } + + public Boolean getSuccess() { + return success; + } + + public void setSuccess(Boolean success) { + this.success = success; + } + + public String getReason() { + return reason; + } + + public void setReason(String reason) { + this.reason = reason; + } + + public BigInteger getDocumentId() { + return documentId; + } + + public void setDocumentId(BigInteger documentId) { + this.documentId = documentId; + } + + public Integer getChunkCount() { + return chunkCount; + } + + public void setChunkCount(Integer chunkCount) { + this.chunkCount = chunkCount; + } + } + + public static class CommitResponse implements Serializable { + private Integer totalFiles; + private Integer successCount; + private Integer errorCount; + private List results = new ArrayList(); + + public Integer getTotalFiles() { + return totalFiles; + } + + public void setTotalFiles(Integer totalFiles) { + this.totalFiles = totalFiles; + } + + public Integer getSuccessCount() { + return successCount; + } + + public void setSuccessCount(Integer successCount) { + this.successCount = successCount; + } + + public Integer getErrorCount() { + return errorCount; + } + + public void setErrorCount(Integer errorCount) { + this.errorCount = errorCount; + } + + public List getResults() { + return results; + } + + public void setResults(List results) { + this.results = results; + } + } + + public static class PreviewSession implements Serializable { + private String sessionId; + private BigInteger knowledgeId; + private String filePath; + private String fileName; + private String sourceFormat; + private StrategyConfig strategyConfig; + private AnalysisResult analysis; + private Document document; + private List documentChunks = new ArrayList(); + private List previewChunks = new ArrayList(); + private Date createdAt; + + public String getSessionId() { + return sessionId; + } + + public void setSessionId(String sessionId) { + this.sessionId = sessionId; + } + + public BigInteger getKnowledgeId() { + return knowledgeId; + } + + public void setKnowledgeId(BigInteger knowledgeId) { + this.knowledgeId = knowledgeId; + } + + public String getFilePath() { + return filePath; + } + + public void setFilePath(String filePath) { + this.filePath = filePath; + } + + public String getFileName() { + return fileName; + } + + public void setFileName(String fileName) { + this.fileName = fileName; + } + + public String getSourceFormat() { + return sourceFormat; + } + + public void setSourceFormat(String sourceFormat) { + this.sourceFormat = sourceFormat; + } + + public StrategyConfig getStrategyConfig() { + return strategyConfig; + } + + public void setStrategyConfig(StrategyConfig strategyConfig) { + this.strategyConfig = strategyConfig; + } + + public AnalysisResult getAnalysis() { + return analysis; + } + + public void setAnalysis(AnalysisResult analysis) { + this.analysis = analysis; + } + + public Document getDocument() { + return document; + } + + public void setDocument(Document document) { + this.document = document; + } + + public List getDocumentChunks() { + return documentChunks; + } + + public void setDocumentChunks(List documentChunks) { + this.documentChunks = documentChunks; + } + + public List getPreviewChunks() { + return previewChunks; + } + + public void setPreviewChunks(List previewChunks) { + this.previewChunks = previewChunks; + } + + public Date getCreatedAt() { + return createdAt; + } + + public void setCreatedAt(Date createdAt) { + this.createdAt = createdAt; + } + } +} diff --git a/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/documentimport/DocumentImportKeys.java b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/documentimport/DocumentImportKeys.java new file mode 100644 index 0000000..850fdb7 --- /dev/null +++ b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/documentimport/DocumentImportKeys.java @@ -0,0 +1,21 @@ +package tech.easyflow.ai.documentimport; + +public final class DocumentImportKeys { + + private DocumentImportKeys() { + } + + public static final String CACHE_KEY_PREFIX = "easyflow:document:import:preview:"; + + public static final String KEY_SPLITTER_DEFAULT_STRATEGY = "splitter.defaultStrategyCode"; + public static final String KEY_SPLITTER_AUTO_RECOMMEND_ENABLED = "splitter.autoRecommendEnabled"; + public static final String KEY_SPLITTER_FALLBACK_STRATEGY = "splitter.fallbackStrategyCode"; + public static final String KEY_SPLITTER_STRATEGY_PROFILES = "splitter.strategyProfiles"; + + public static final String KEY_DOCUMENT_STRATEGY_CODE = "splitter.strategyCode"; + public static final String KEY_DOCUMENT_STRATEGY_LABEL = "splitter.strategyLabel"; + public static final String KEY_DOCUMENT_STRATEGY_SNAPSHOT = "splitter.strategySnapshot"; + public static final String KEY_DOCUMENT_ANALYSIS_SUMMARY = "splitter.analysisSummary"; + public static final String KEY_DOCUMENT_SOURCE_FILE_EXT = "splitter.sourceFileExt"; + public static final String KEY_DOCUMENT_PREVIEW_VERSION = "splitter.previewVersion"; +} diff --git a/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/documentimport/DocumentImportPreviewService.java b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/documentimport/DocumentImportPreviewService.java new file mode 100644 index 0000000..3eceb9c --- /dev/null +++ b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/documentimport/DocumentImportPreviewService.java @@ -0,0 +1,45 @@ +package tech.easyflow.ai.documentimport; + +import com.alicp.jetcache.Cache; +import org.springframework.beans.factory.annotation.Qualifier; +import org.springframework.stereotype.Service; +import tech.easyflow.common.web.exceptions.BusinessException; + +import java.time.Duration; +import java.util.concurrent.TimeUnit; +import java.util.UUID; + +@Service +public class DocumentImportPreviewService { + + private static final Duration SESSION_TTL = Duration.ofMinutes(30); + + private final Cache defaultCache; + + public DocumentImportPreviewService(@Qualifier("defaultCache") Cache defaultCache) { + this.defaultCache = defaultCache; + } + + public String put(DocumentImportDtos.PreviewSession session) { + String sessionId = UUID.randomUUID().toString().replace("-", ""); + session.setSessionId(sessionId); + defaultCache.put(buildKey(sessionId), session, SESSION_TTL.toMinutes(), TimeUnit.MINUTES); + return sessionId; + } + + public DocumentImportDtos.PreviewSession getRequired(String sessionId) { + Object cached = defaultCache.get(buildKey(sessionId)); + if (!(cached instanceof DocumentImportDtos.PreviewSession)) { + throw new BusinessException("预览会话已失效,请重新生成预览"); + } + return (DocumentImportDtos.PreviewSession) cached; + } + + public void remove(String sessionId) { + defaultCache.remove(buildKey(sessionId)); + } + + private String buildKey(String sessionId) { + return DocumentImportKeys.CACHE_KEY_PREFIX + sessionId; + } +} diff --git a/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/service/DocumentService.java b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/service/DocumentService.java index c56712d..a5efce9 100644 --- a/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/service/DocumentService.java +++ b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/service/DocumentService.java @@ -3,6 +3,7 @@ package tech.easyflow.ai.service; import tech.easyflow.ai.entity.Document; import com.mybatisflex.core.paginate.Page; import com.mybatisflex.core.service.IService; +import tech.easyflow.ai.documentimport.DocumentImportDtos; import tech.easyflow.ai.entity.DocumentChunk; import tech.easyflow.ai.entity.DocumentCollectionSplitParams; import tech.easyflow.common.domain.Result; @@ -25,4 +26,10 @@ public interface DocumentService extends IService { Result textSplit(DocumentCollectionSplitParams documentCollectionSplitParams); Result saveTextResult(List documentChunks, Document document); + + Result analyzeImport(DocumentImportDtos.AnalyzeRequest request); + + Result previewImport(DocumentImportDtos.PreviewRequest request); + + Result commitImport(DocumentImportDtos.CommitRequest request); } diff --git a/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/service/impl/DocumentServiceImpl.java b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/service/impl/DocumentServiceImpl.java index 5793fa7..9953f26 100644 --- a/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/service/impl/DocumentServiceImpl.java +++ b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/service/impl/DocumentServiceImpl.java @@ -12,6 +12,12 @@ import com.easyagents.core.model.embedding.EmbeddingOptions; import com.easyagents.core.store.DocumentStore; import com.easyagents.core.store.StoreOptions; import com.easyagents.core.store.StoreResult; +import com.easyagents.rag.core.RagChunk; +import com.easyagents.rag.core.RagDefaults; +import com.easyagents.rag.core.RagStrategyCodes; +import com.easyagents.rag.ingestion.RagIngestionService; +import com.easyagents.rag.ingestion.model.AnalysisResult; +import com.easyagents.rag.ingestion.model.StrategyConfig; import com.easyagents.search.engine.service.DocumentSearcher; import com.mybatisflex.core.keygen.impl.FlexIDKeyGenerator; import com.mybatisflex.core.paginate.Page; @@ -24,6 +30,9 @@ import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; import org.springframework.transaction.annotation.Transactional; import tech.easyflow.ai.config.SearcherFactory; +import tech.easyflow.ai.documentimport.DocumentImportDtos; +import tech.easyflow.ai.documentimport.DocumentImportKeys; +import tech.easyflow.ai.documentimport.DocumentImportPreviewService; import tech.easyflow.ai.entity.*; import tech.easyflow.ai.mapper.DocumentChunkMapper; import tech.easyflow.ai.mapper.DocumentMapper; @@ -42,6 +51,7 @@ import javax.annotation.Resource; import java.io.IOException; import java.io.InputStream; import java.math.BigInteger; +import java.math.BigDecimal; import java.util.*; import java.util.concurrent.atomic.AtomicInteger; @@ -81,6 +91,12 @@ public class DocumentServiceImpl extends ServiceImpl i @Autowired private SearcherFactory searcherFactory; + @Autowired + private RagIngestionService ragIngestionService; + + @Autowired + private DocumentImportPreviewService documentImportPreviewService; + @Override public Page getDocumentList(String knowledgeId, int pageSize, int pageNum, String fileName) { QueryWrapper queryWrapper=QueryWrapper.create() @@ -250,23 +266,397 @@ public class DocumentServiceImpl extends ServiceImpl i return Result.fail(1, "切割结果无有效文本,无法进行向量化"); } - Boolean result = storeDocument(document, validChunks); - if (result) { - this.getMapper().insert(document); - AtomicInteger sort = new AtomicInteger(1); - validChunks.forEach(item -> { - item.setDocumentCollectionId(document.getCollectionId()); - item.setSorting(sort.get()); - item.setDocumentId(document.getId()); - sort.getAndIncrement(); - documentChunkService.save(item); - }); + StoreExecutionContext storeContext = prepareStoreContext(document); + storeDocumentChunks(storeContext, validChunks); + try { + persistDocumentWithChunks(document, validChunks); + updateKnowledgeAfterStore(storeContext); return Result.ok(); + } catch (Exception e) { + cleanupPersistedDocument(document); + rollbackStoredChunks(storeContext, validChunks); + Log.error("保存文档失败: documentId={}, title={}", document.getId(), document.getTitle(), e); + throw new BusinessException("保存失败:" + e.getMessage()); } - return Result.fail(1, "保存失败"); } protected Boolean storeDocument(Document entity, List documentChunks) { + StoreExecutionContext storeContext = prepareStoreContext(entity); + storeDocumentChunks(storeContext, documentChunks); + updateKnowledgeAfterStore(storeContext); + return true; + } + + @Override + public Result analyzeImport(DocumentImportDtos.AnalyzeRequest request) { + DocumentCollection knowledge = assertDocumentCollection(request.getKnowledgeId()); + if (request.getFiles() == null || request.getFiles().isEmpty()) { + throw new BusinessException("请先上传文件"); + } + + List items = new ArrayList<>(); + for (DocumentImportDtos.FileItem file : request.getFiles()) { + AnalysisResult analysis = analyzeSingleFile(file.getFilePath(), file.getFileName()); + StrategyConfig strategyConfig = resolveStrategyConfig( + knowledge, + null, + analysis + ); + + DocumentImportDtos.AnalyzeItem item = new DocumentImportDtos.AnalyzeItem(); + item.setFilePath(file.getFilePath()); + item.setFileName(file.getFileName()); + item.setAnalysis(analysis); + item.setStrategyConfig(strategyConfig); + items.add(item); + } + + DocumentImportDtos.AnalyzeResponse response = new DocumentImportDtos.AnalyzeResponse(); + response.setItems(items); + response.setTotal(items.size()); + return Result.ok(response); + } + + @Override + public Result previewImport(DocumentImportDtos.PreviewRequest request) { + DocumentCollection knowledge = assertDocumentCollection(request.getKnowledgeId()); + if (request.getFiles() == null || request.getFiles().isEmpty()) { + throw new BusinessException("请先上传文件"); + } + + List items = new ArrayList<>(); + int totalChunks = 0; + for (DocumentImportDtos.PreviewFileRequest file : request.getFiles()) { + DocumentImportDtos.PreviewSession session = buildPreviewSession(knowledge, file); + String sessionId = documentImportPreviewService.put(session); + + DocumentImportDtos.PreviewFileResult item = new DocumentImportDtos.PreviewFileResult(); + item.setPreviewSessionId(sessionId); + item.setFilePath(file.getFilePath()); + item.setFileName(file.getFileName()); + item.setStrategyCode(session.getStrategyConfig().getStrategyCode()); + item.setStrategyLabel(ragIngestionService.toStrategyLabel(session.getStrategyConfig().getStrategyCode())); + item.setAnalysis(session.getAnalysis()); + item.setChunks(session.getPreviewChunks()); + item.setTotalChunks(session.getPreviewChunks().size()); + item.setTotalWarnings(countWarnings(session.getPreviewChunks())); + items.add(item); + totalChunks += session.getPreviewChunks().size(); + } + + DocumentImportDtos.PreviewResponse response = new DocumentImportDtos.PreviewResponse(); + response.setItems(items); + response.setTotalFiles(items.size()); + response.setTotalChunks(totalChunks); + return Result.ok(response); + } + + @Override + public Result commitImport(DocumentImportDtos.CommitRequest request) { + DocumentCollection knowledge = assertDocumentCollection(request.getKnowledgeId()); + if (request.getPreviewSessionIds() == null || request.getPreviewSessionIds().isEmpty()) { + throw new BusinessException("请选择需要提交的预览结果"); + } + + List results = new ArrayList<>(); + int successCount = 0; + int errorCount = 0; + for (String previewSessionId : request.getPreviewSessionIds()) { + DocumentImportDtos.CommitFileResult result = new DocumentImportDtos.CommitFileResult(); + result.setPreviewSessionId(previewSessionId); + try { + DocumentImportDtos.PreviewSession session = documentImportPreviewService.getRequired(previewSessionId); + if (!Objects.equals(session.getKnowledgeId(), knowledge.getId())) { + throw new BusinessException("预览会话与当前知识库不匹配"); + } + commitSingleSession(session); + result.setSuccess(true); + result.setFileName(session.getFileName()); + result.setDocumentId(session.getDocument().getId()); + result.setChunkCount(session.getDocumentChunks().size()); + documentImportPreviewService.remove(previewSessionId); + successCount++; + } catch (Exception e) { + result.setSuccess(false); + result.setReason(e.getMessage()); + errorCount++; + } + results.add(result); + } + + DocumentImportDtos.CommitResponse response = new DocumentImportDtos.CommitResponse(); + response.setTotalFiles(results.size()); + response.setSuccessCount(successCount); + response.setErrorCount(errorCount); + response.setResults(results); + return Result.ok(response); + } + + private void commitSingleSession(DocumentImportDtos.PreviewSession session) { + Document document = session.getDocument(); + document.setCreated(new Date()); + document.setModified(new Date()); + document.setCreatedBy(BigInteger.valueOf(StpUtil.getLoginIdAsLong())); + document.setModifiedBy(BigInteger.valueOf(StpUtil.getLoginIdAsLong())); + for (DocumentChunk chunk : session.getDocumentChunks()) { + chunk.setDocumentId(document.getId()); + chunk.setDocumentCollectionId(document.getCollectionId()); + } + + StoreExecutionContext storeContext = prepareStoreContext(document); + storeDocumentChunks(storeContext, session.getDocumentChunks()); + try { + persistDocumentWithChunks(document, session.getDocumentChunks()); + updateKnowledgeAfterStore(storeContext); + } catch (Exception e) { + cleanupPersistedDocument(document); + rollbackStoredChunks(storeContext, session.getDocumentChunks()); + throw new BusinessException("提交导入失败:" + e.getMessage()); + } + } + + private DocumentImportDtos.PreviewSession buildPreviewSession(DocumentCollection knowledge, + DocumentImportDtos.PreviewFileRequest fileRequest) { + AnalysisResult analysis = analyzeSingleFile(fileRequest.getFilePath(), fileRequest.getFileName()); + StrategyConfig strategyConfig = resolveStrategyConfig(knowledge, fileRequest.getStrategyConfig(), analysis); + List previewChunks = ragIngestionService.split(analysis, strategyConfig); + if (previewChunks.isEmpty()) { + throw new BusinessException("未生成有效分块,请调整策略后重试"); + } + + FlexIDKeyGenerator flexIDKeyGenerator = new FlexIDKeyGenerator(); + Document document = buildPreviewDocument(flexIDKeyGenerator, knowledge, fileRequest, analysis, strategyConfig); + List documentChunks = buildDocumentChunks(flexIDKeyGenerator, document, previewChunks); + + DocumentImportDtos.PreviewSession session = new DocumentImportDtos.PreviewSession(); + session.setKnowledgeId(knowledge.getId()); + session.setFilePath(fileRequest.getFilePath()); + session.setFileName(fileRequest.getFileName()); + session.setSourceFormat(analysis.getSourceFormat()); + session.setStrategyConfig(strategyConfig); + session.setAnalysis(analysis); + session.setDocument(document); + session.setDocumentChunks(documentChunks); + session.setPreviewChunks(previewChunks); + session.setCreatedAt(new Date()); + return session; + } + + private Document buildPreviewDocument(FlexIDKeyGenerator flexIDKeyGenerator, + DocumentCollection knowledge, + DocumentImportDtos.PreviewFileRequest fileRequest, + AnalysisResult analysis, + StrategyConfig strategyConfig) { + Document document = new Document(); + document.setId(new BigInteger(String.valueOf(flexIDKeyGenerator.generate(document, null)))); + document.setCollectionId(knowledge.getId()); + document.setDocumentType(analysis.getSourceFormat()); + document.setDocumentPath(fileRequest.getFilePath()); + document.setTitle(fileRequest.getFileName()); + document.setContent(analysis.getNormalizedContent()); + document.setCreated(new Date()); + document.setModified(new Date()); + document.setModifiedBy(BigInteger.valueOf(StpUtil.getLoginIdAsLong())); + + Map options = new LinkedHashMap<>(); + options.put(DocumentImportKeys.KEY_DOCUMENT_STRATEGY_CODE, strategyConfig.getStrategyCode()); + options.put(DocumentImportKeys.KEY_DOCUMENT_STRATEGY_LABEL, ragIngestionService.toStrategyLabel(strategyConfig.getStrategyCode())); + options.put(DocumentImportKeys.KEY_DOCUMENT_STRATEGY_SNAPSHOT, strategyConfigToMap(strategyConfig)); + options.put(DocumentImportKeys.KEY_DOCUMENT_ANALYSIS_SUMMARY, analysis.getFeatures()); + options.put(DocumentImportKeys.KEY_DOCUMENT_SOURCE_FILE_EXT, analysis.getSourceFormat()); + options.put(DocumentImportKeys.KEY_DOCUMENT_PREVIEW_VERSION, "v1"); + document.setOptions(options); + return document; + } + + private List buildDocumentChunks(FlexIDKeyGenerator flexIDKeyGenerator, + Document document, + List previewChunks) { + List chunks = new ArrayList<>(); + for (int i = 0; i < previewChunks.size(); i++) { + RagChunk previewChunk = previewChunks.get(i); + DocumentChunk chunk = new DocumentChunk(); + chunk.setId(new BigInteger(String.valueOf(flexIDKeyGenerator.generate(chunk, null)))); + chunk.setDocumentId(document.getId()); + chunk.setDocumentCollectionId(document.getCollectionId()); + chunk.setContent(previewChunk.getContent()); + chunk.setSorting(i + 1); + + Map options = new LinkedHashMap<>(previewChunk.getOptions()); + options.put("chunkType", previewChunk.getChunkType()); + options.put("sourceLabel", previewChunk.getSourceLabel()); + options.put("headingPath", previewChunk.getHeadingPath()); + options.put("charCount", previewChunk.getCharCount()); + options.put("tokenEstimate", previewChunk.getTokenEstimate()); + options.put("qaQuestion", previewChunk.getQuestion()); + options.put("qaAnswer", previewChunk.getAnswer()); + options.put("partNo", previewChunk.getPartNo()); + options.put("partTotal", previewChunk.getPartTotal()); + options.put("warnings", previewChunk.getWarnings()); + chunk.setOptions(options); + chunks.add(chunk); + } + return chunks; + } + + private AnalysisResult analyzeSingleFile(String filePath, String fileName) { + String fileExt = normalizeFileExtension(fileName, filePath); + assertSupportedImportFile(fileExt); + String content = readFileContent(filePath, fileName); + return ragIngestionService.analyze(content, fileExt); + } + + private String readFileContent(String filePath, String fileName) { + try (InputStream inputStream = storageService.readStream(filePath)) { + return File2TextUtil.readFromStream(inputStream, fileName, null); + } catch (IOException e) { + Log.error("读取导入文件失败: filePath={}, fileName={}", filePath, fileName, e); + throw new BusinessException("文件解析失败:" + e.getMessage()); + } + } + + private void assertSupportedImportFile(String fileExt) { + if (!Arrays.asList("pdf", "docx", "txt", "md").contains(fileExt)) { + throw new BusinessException("当前仅支持 pdf/docx/txt/md 文档导入"); + } + } + + private String normalizeFileExtension(String fileName, String filePath) { + String target = StringUtil.hasText(fileName) ? fileName : filePath; + String ext = FileUtil.getFileTypeByExtension(target); + return ext == null ? "" : ext.toLowerCase(Locale.ROOT); + } + + private DocumentCollection assertDocumentCollection(BigInteger knowledgeId) { + DocumentCollection knowledge = knowledgeService.getById(knowledgeId); + if (knowledge == null) { + throw new BusinessException("知识库不存在"); + } + if (knowledge.isFaqCollection()) { + throw new BusinessException("FAQ知识库不支持文档上传"); + } + return knowledge; + } + + private StrategyConfig resolveStrategyConfig(DocumentCollection knowledge, + StrategyConfig requestConfig, + AnalysisResult analysisResult) { + Map options = knowledge.getOptions() == null + ? Collections.emptyMap() + : knowledge.getOptions(); + String recommended = analysisResult.getRecommendedStrategyCode(); + String defaultStrategyCode = asString(options.get(DocumentImportKeys.KEY_SPLITTER_DEFAULT_STRATEGY)); + String fallbackStrategyCode = asString(options.get(DocumentImportKeys.KEY_SPLITTER_FALLBACK_STRATEGY)); + Boolean autoRecommendEnabled = asBoolean(options.get(DocumentImportKeys.KEY_SPLITTER_AUTO_RECOMMEND_ENABLED), true); + + StrategyConfig config = readProfileConfig(options, defaultStrategyCode); + if (config == null) { + config = StrategyConfig.defaults(); + } + String requestedStrategyCode = requestConfig == null ? null : requestConfig.getStrategyCode(); + + String strategyCode = StringUtil.hasText(requestedStrategyCode) + ? requestedStrategyCode + : config.getStrategyCode(); + if (!StringUtil.hasText(strategyCode) || RagStrategyCodes.AUTO.equals(strategyCode)) { + strategyCode = Boolean.TRUE.equals(autoRecommendEnabled) + ? recommended + : (StringUtil.hasText(defaultStrategyCode) ? defaultStrategyCode : recommended); + } + if (!StringUtil.hasText(strategyCode)) { + strategyCode = StringUtil.hasText(fallbackStrategyCode) + ? fallbackStrategyCode + : RagStrategyCodes.PARAGRAPH_LENGTH; + } + + StrategyConfig profileConfig = readProfileConfig(options, strategyCode); + if (profileConfig != null) { + mergeStrategyConfig(config, profileConfig); + } + if (requestConfig != null) { + mergeStrategyConfig(config, requestConfig); + } + config.setStrategyCode(strategyCode); + if (config.getChunkSize() == null || config.getChunkSize() <= 0) { + config.setChunkSize(RagDefaults.CHUNK_SIZE); + } + if (config.getOverlapSize() == null || config.getOverlapSize() < 0) { + config.setOverlapSize(RagDefaults.OVERLAP_SIZE); + } + if (config.getMdSplitterLevel() == null || config.getMdSplitterLevel() <= 0) { + config.setMdSplitterLevel(RagDefaults.MD_SPLITTER_LEVEL); + } + return config; + } + + @SuppressWarnings("unchecked") + private StrategyConfig readProfileConfig(Map options, String strategyCode) { + if (!StringUtil.hasText(strategyCode)) { + return null; + } + Object profileObject = options.get(DocumentImportKeys.KEY_SPLITTER_STRATEGY_PROFILES); + if (!(profileObject instanceof Map)) { + return null; + } + Object strategyObject = ((Map) profileObject).get(strategyCode); + if (!(strategyObject instanceof Map)) { + return null; + } + Map rawProfile = (Map) strategyObject; + StrategyConfig config = StrategyConfig.defaults(); + config.setStrategyCode(strategyCode); + config.setChunkSize(asInteger(rawProfile.get("chunkSize"), config.getChunkSize())); + config.setOverlapSize(asInteger(rawProfile.get("overlapSize"), config.getOverlapSize())); + config.setRegex(asString(rawProfile.get("regex"))); + config.setRowsPerChunk(asInteger(rawProfile.get("rowsPerChunk"), config.getRowsPerChunk())); + config.setMdSplitterLevel(asInteger(rawProfile.get("mdSplitterLevel"), config.getMdSplitterLevel())); + return config; + } + + private void mergeStrategyConfig(StrategyConfig target, StrategyConfig source) { + if (source == null) { + return; + } + if (StringUtil.hasText(source.getStrategyCode())) { + target.setStrategyCode(source.getStrategyCode()); + } + if (source.getChunkSize() != null) { + target.setChunkSize(source.getChunkSize()); + } + if (source.getOverlapSize() != null) { + target.setOverlapSize(source.getOverlapSize()); + } + if (StringUtil.hasText(source.getRegex())) { + target.setRegex(source.getRegex()); + } + if (source.getRowsPerChunk() != null) { + target.setRowsPerChunk(source.getRowsPerChunk()); + } + if (source.getMdSplitterLevel() != null) { + target.setMdSplitterLevel(source.getMdSplitterLevel()); + } + } + + private Map strategyConfigToMap(StrategyConfig strategyConfig) { + Map map = new LinkedHashMap<>(); + map.put("strategyCode", strategyConfig.getStrategyCode()); + map.put("chunkSize", strategyConfig.getChunkSize()); + map.put("overlapSize", strategyConfig.getOverlapSize()); + map.put("regex", strategyConfig.getRegex()); + map.put("rowsPerChunk", strategyConfig.getRowsPerChunk()); + map.put("mdSplitterLevel", strategyConfig.getMdSplitterLevel()); + return map; + } + + private int countWarnings(List chunks) { + int total = 0; + for (RagChunk chunk : chunks) { + total += chunk.getWarnings() == null ? 0 : chunk.getWarnings().size(); + } + return total; + } + + private StoreExecutionContext prepareStoreContext(Document entity) { DocumentCollection knowledge = knowledgeService.getById(entity.getCollectionId()); if (knowledge == null) { throw new BusinessException("知识库不存在"); @@ -274,23 +664,22 @@ public class DocumentServiceImpl extends ServiceImpl i if (knowledge.isFaqCollection()) { throw new BusinessException("FAQ知识库不支持文档上传"); } - DocumentStore documentStore = null; + + DocumentStore documentStore; try { documentStore = knowledge.toDocumentStore(); } catch (Exception e) { - Log.error(e.getMessage()); + Log.error("向量库配置错误: knowledgeId={}", knowledge.getId(), e); throw new BusinessException("向量数据库配置错误"); } - if (documentStore == null) { throw new BusinessException("向量数据库配置错误"); } - // 设置向量模型 + Model model = modelService.getModelInstance(knowledge.getVectorEmbedModelId()); if (model == null) { throw new BusinessException("该知识库未配置大模型"); } - // 设置向量模型 EmbeddingModel embeddingModel = model.toEmbeddingModel(); documentStore.setEmbeddingModel(embeddingModel); @@ -300,46 +689,152 @@ public class DocumentServiceImpl extends ServiceImpl i embeddingOptions.setDimensions(knowledge.getDimensionOfVectorModel()); options.setEmbeddingOptions(embeddingOptions); options.setIndexName(options.getCollectionName()); + + DocumentSearcher searcher = null; + if (knowledge.isSearchEngineEnabled()) { + searcher = searcherFactory.getSearcher((String) knowledge.getOptionsByKey(KEY_SEARCH_ENGINE_TYPE)); + } + return new StoreExecutionContext(knowledge, model, embeddingModel, documentStore, options, searcher); + } + + private void storeDocumentChunks(StoreExecutionContext storeContext, List documentChunks) { List documents = new ArrayList<>(); - documentChunks.forEach(item -> { - com.easyagents.core.document.Document document = new com.easyagents.core.document.Document(); - document.setId(item.getId()); - document.setContent(item.getContent()); - documents.add(document); - } - ); - StoreResult result = null; + for (DocumentChunk item : documentChunks) { + com.easyagents.core.document.Document document = new com.easyagents.core.document.Document(); + document.setId(item.getId()); + document.setContent(item.getContent()); + documents.add(document); + } + + StoreResult result; try { - result = documentStore.store(documents, options); + result = storeContext.documentStore.store(documents, storeContext.options); } catch (Exception e) { Log.error("Vector store failed: knowledgeId={}, collection={}, chunkCount={}", - knowledge.getId(), options.getCollectionName(), documents.size(), e); + storeContext.knowledge.getId(), + storeContext.options.getCollectionName(), + documents.size(), + e); throw new BusinessException("向量过程中发生错误,错误信息为:" + e.getMessage()); } - if (result == null || !result.isSuccess()) { - Log.error("DocumentStore.store failed: " + result); + if (result == null || !result.isSuccess()) { + Log.error("DocumentStore.store failed: {}", result); throw new BusinessException("DocumentStore.store failed"); } - if (knowledge.isSearchEngineEnabled()) { - // 获取搜索引擎 - DocumentSearcher searcher = searcherFactory.getSearcher((String) knowledge.getOptionsByKey(KEY_SEARCH_ENGINE_TYPE)); - // 添加到搜索引擎 - documents.forEach(searcher::addDocument); + if (storeContext.searcher != null) { + for (com.easyagents.core.document.Document document : documents) { + storeContext.searcher.addDocument(document); + } } + } + private void rollbackStoredChunks(StoreExecutionContext storeContext, List documentChunks) { + try { + List chunkIds = new ArrayList<>(); + for (DocumentChunk chunk : documentChunks) { + chunkIds.add(chunk.getId()); + } + storeContext.documentStore.delete(chunkIds, storeContext.options); + if (storeContext.searcher != null) { + for (BigInteger chunkId : chunkIds) { + storeContext.searcher.deleteDocument(chunkId); + } + } + } catch (Exception e) { + Log.error("回滚向量文档失败: knowledgeId={}", storeContext.knowledge.getId(), e); + } + } + + private void updateKnowledgeAfterStore(StoreExecutionContext storeContext) { DocumentCollection documentCollection = new DocumentCollection(); - documentCollection.setId(entity.getCollectionId()); - Map knowledgeOptions = knowledge.getOptions(); + documentCollection.setId(storeContext.knowledge.getId()); + Map knowledgeOptions = storeContext.knowledge.getOptions() == null + ? new HashMap<>() + : new HashMap<>(storeContext.knowledge.getOptions()); knowledgeOptions.put(KEY_CAN_UPDATE_EMBEDDING_MODEL, false); documentCollection.setOptions(knowledgeOptions); knowledgeService.updateById(documentCollection); - if (knowledge.getDimensionOfVectorModel() == null) { - int dimension = Model.getEmbeddingDimension(embeddingModel); - knowledge.setDimensionOfVectorModel(dimension); - knowledgeService.updateById(knowledge); + + if (storeContext.knowledge.getDimensionOfVectorModel() == null) { + int dimension = Model.getEmbeddingDimension(storeContext.embeddingModel); + DocumentCollection update = new DocumentCollection(); + update.setId(storeContext.knowledge.getId()); + update.setDimensionOfVectorModel(dimension); + knowledgeService.updateById(update); + } + } + + private void persistDocumentWithChunks(Document document, List chunks) { + this.getMapper().insert(document); + AtomicInteger sort = new AtomicInteger(1); + for (DocumentChunk item : chunks) { + item.setDocumentCollectionId(document.getCollectionId()); + item.setDocumentId(document.getId()); + item.setSorting(sort.getAndIncrement()); + documentChunkService.save(item); + } + } + + private void cleanupPersistedDocument(Document document) { + if (document == null || document.getId() == null) { + return; + } + documentChunkMapper.deleteByQuery(QueryWrapper.create().eq(DocumentChunk::getDocumentId, document.getId())); + this.getMapper().deleteById(document.getId()); + } + + private String asString(Object value) { + return value == null ? null : String.valueOf(value); + } + + private Integer asInteger(Object value, Integer defaultValue) { + if (value == null) { + return defaultValue; + } + if (value instanceof Number) { + return ((Number) value).intValue(); + } + if (value instanceof String && StringUtil.hasText((String) value)) { + return Integer.parseInt((String) value); + } + return defaultValue; + } + + private Boolean asBoolean(Object value, boolean defaultValue) { + if (value == null) { + return defaultValue; + } + if (value instanceof Boolean) { + return (Boolean) value; + } + if (value instanceof Number) { + return ((Number) value).intValue() != 0; + } + return Boolean.parseBoolean(String.valueOf(value)); + } + + private static class StoreExecutionContext { + private final DocumentCollection knowledge; + private final Model model; + private final EmbeddingModel embeddingModel; + private final DocumentStore documentStore; + private final StoreOptions options; + private final DocumentSearcher searcher; + + private StoreExecutionContext(DocumentCollection knowledge, + Model model, + EmbeddingModel embeddingModel, + DocumentStore documentStore, + StoreOptions options, + DocumentSearcher searcher) { + this.knowledge = knowledge; + this.model = model; + this.embeddingModel = embeddingModel; + this.documentStore = documentStore; + this.options = options; + this.searcher = searcher; } - return true; } public DocumentSplitter getDocumentSplitter(DocumentCollectionSplitParams params) { diff --git a/easyflow-starter/easyflow-starter-all/src/main/resources/db/migration/V6__document_import_strategy.sql b/easyflow-starter/easyflow-starter-all/src/main/resources/db/migration/V6__document_import_strategy.sql new file mode 100644 index 0000000..2dbb273 --- /dev/null +++ b/easyflow-starter/easyflow-starter-all/src/main/resources/db/migration/V6__document_import_strategy.sql @@ -0,0 +1,4 @@ +SET NAMES utf8mb4; + +ALTER TABLE `tb_document_chunk` + ADD COLUMN `options` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL COMMENT '扩展元信息' AFTER `sorting`; diff --git a/easyflow-ui-admin/app/src/views/ai/documentCollection/ComfirmImportDocument.vue b/easyflow-ui-admin/app/src/views/ai/documentCollection/ComfirmImportDocument.vue index a092e0a..9e1f3ce 100644 --- a/easyflow-ui-admin/app/src/views/ai/documentCollection/ComfirmImportDocument.vue +++ b/easyflow-ui-admin/app/src/views/ai/documentCollection/ComfirmImportDocument.vue @@ -1,99 +1,141 @@ diff --git a/easyflow-ui-admin/app/src/views/ai/documentCollection/ImportKnowledgeDocFile.vue b/easyflow-ui-admin/app/src/views/ai/documentCollection/ImportKnowledgeDocFile.vue index c57b41c..10d768a 100644 --- a/easyflow-ui-admin/app/src/views/ai/documentCollection/ImportKnowledgeDocFile.vue +++ b/easyflow-ui-admin/app/src/views/ai/documentCollection/ImportKnowledgeDocFile.vue @@ -1,189 +1,215 @@