feat: 增强知识库分块策略流程
- 增加导入分析预览提交与预览态缓存键 - 支持知识库分块策略配置与分块预览 - 重构知识库导入与确认导入前端流程
This commit is contained in:
@@ -0,0 +1,555 @@
|
||||
package tech.easyflow.ai.documentimport;
|
||||
|
||||
import com.easyagents.rag.core.RagChunk;
|
||||
import com.easyagents.rag.ingestion.model.AnalysisResult;
|
||||
import com.easyagents.rag.ingestion.model.StrategyConfig;
|
||||
import tech.easyflow.ai.entity.Document;
|
||||
import tech.easyflow.ai.entity.DocumentChunk;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.math.BigInteger;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Date;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
public final class DocumentImportDtos {
|
||||
|
||||
private DocumentImportDtos() {
|
||||
}
|
||||
|
||||
public static class FileItem implements Serializable {
|
||||
private String filePath;
|
||||
private String fileName;
|
||||
|
||||
public String getFilePath() {
|
||||
return filePath;
|
||||
}
|
||||
|
||||
public void setFilePath(String filePath) {
|
||||
this.filePath = filePath;
|
||||
}
|
||||
|
||||
public String getFileName() {
|
||||
return fileName;
|
||||
}
|
||||
|
||||
public void setFileName(String fileName) {
|
||||
this.fileName = fileName;
|
||||
}
|
||||
}
|
||||
|
||||
public static class AnalyzeRequest implements Serializable {
|
||||
private BigInteger knowledgeId;
|
||||
private List<FileItem> files = new ArrayList<FileItem>();
|
||||
|
||||
public BigInteger getKnowledgeId() {
|
||||
return knowledgeId;
|
||||
}
|
||||
|
||||
public void setKnowledgeId(BigInteger knowledgeId) {
|
||||
this.knowledgeId = knowledgeId;
|
||||
}
|
||||
|
||||
public List<FileItem> getFiles() {
|
||||
return files;
|
||||
}
|
||||
|
||||
public void setFiles(List<FileItem> files) {
|
||||
this.files = files;
|
||||
}
|
||||
}
|
||||
|
||||
public static class PreviewFileRequest implements Serializable {
|
||||
private String filePath;
|
||||
private String fileName;
|
||||
private StrategyConfig strategyConfig = StrategyConfig.defaults();
|
||||
|
||||
public String getFilePath() {
|
||||
return filePath;
|
||||
}
|
||||
|
||||
public void setFilePath(String filePath) {
|
||||
this.filePath = filePath;
|
||||
}
|
||||
|
||||
public String getFileName() {
|
||||
return fileName;
|
||||
}
|
||||
|
||||
public void setFileName(String fileName) {
|
||||
this.fileName = fileName;
|
||||
}
|
||||
|
||||
public StrategyConfig getStrategyConfig() {
|
||||
return strategyConfig;
|
||||
}
|
||||
|
||||
public void setStrategyConfig(StrategyConfig strategyConfig) {
|
||||
this.strategyConfig = strategyConfig;
|
||||
}
|
||||
}
|
||||
|
||||
public static class PreviewRequest implements Serializable {
|
||||
private BigInteger knowledgeId;
|
||||
private List<PreviewFileRequest> files = new ArrayList<PreviewFileRequest>();
|
||||
|
||||
public BigInteger getKnowledgeId() {
|
||||
return knowledgeId;
|
||||
}
|
||||
|
||||
public void setKnowledgeId(BigInteger knowledgeId) {
|
||||
this.knowledgeId = knowledgeId;
|
||||
}
|
||||
|
||||
public List<PreviewFileRequest> getFiles() {
|
||||
return files;
|
||||
}
|
||||
|
||||
public void setFiles(List<PreviewFileRequest> files) {
|
||||
this.files = files;
|
||||
}
|
||||
}
|
||||
|
||||
public static class CommitRequest implements Serializable {
|
||||
private BigInteger knowledgeId;
|
||||
private List<String> previewSessionIds = new ArrayList<String>();
|
||||
|
||||
public BigInteger getKnowledgeId() {
|
||||
return knowledgeId;
|
||||
}
|
||||
|
||||
public void setKnowledgeId(BigInteger knowledgeId) {
|
||||
this.knowledgeId = knowledgeId;
|
||||
}
|
||||
|
||||
public List<String> getPreviewSessionIds() {
|
||||
return previewSessionIds;
|
||||
}
|
||||
|
||||
public void setPreviewSessionIds(List<String> previewSessionIds) {
|
||||
this.previewSessionIds = previewSessionIds;
|
||||
}
|
||||
}
|
||||
|
||||
public static class SplitterProfileSaveRequest implements Serializable {
|
||||
private BigInteger knowledgeId;
|
||||
private String defaultStrategyCode;
|
||||
private Boolean autoRecommendEnabled;
|
||||
private String fallbackStrategyCode;
|
||||
private Map<String, Object> strategyProfiles = new LinkedHashMap<String, Object>();
|
||||
|
||||
public BigInteger getKnowledgeId() {
|
||||
return knowledgeId;
|
||||
}
|
||||
|
||||
public void setKnowledgeId(BigInteger knowledgeId) {
|
||||
this.knowledgeId = knowledgeId;
|
||||
}
|
||||
|
||||
public String getDefaultStrategyCode() {
|
||||
return defaultStrategyCode;
|
||||
}
|
||||
|
||||
public void setDefaultStrategyCode(String defaultStrategyCode) {
|
||||
this.defaultStrategyCode = defaultStrategyCode;
|
||||
}
|
||||
|
||||
public Boolean getAutoRecommendEnabled() {
|
||||
return autoRecommendEnabled;
|
||||
}
|
||||
|
||||
public void setAutoRecommendEnabled(Boolean autoRecommendEnabled) {
|
||||
this.autoRecommendEnabled = autoRecommendEnabled;
|
||||
}
|
||||
|
||||
public String getFallbackStrategyCode() {
|
||||
return fallbackStrategyCode;
|
||||
}
|
||||
|
||||
public void setFallbackStrategyCode(String fallbackStrategyCode) {
|
||||
this.fallbackStrategyCode = fallbackStrategyCode;
|
||||
}
|
||||
|
||||
public Map<String, Object> getStrategyProfiles() {
|
||||
return strategyProfiles;
|
||||
}
|
||||
|
||||
public void setStrategyProfiles(Map<String, Object> strategyProfiles) {
|
||||
this.strategyProfiles = strategyProfiles;
|
||||
}
|
||||
}
|
||||
|
||||
public static class AnalyzeItem implements Serializable {
|
||||
private String filePath;
|
||||
private String fileName;
|
||||
private AnalysisResult analysis;
|
||||
private StrategyConfig strategyConfig = StrategyConfig.defaults();
|
||||
|
||||
public String getFilePath() {
|
||||
return filePath;
|
||||
}
|
||||
|
||||
public void setFilePath(String filePath) {
|
||||
this.filePath = filePath;
|
||||
}
|
||||
|
||||
public String getFileName() {
|
||||
return fileName;
|
||||
}
|
||||
|
||||
public void setFileName(String fileName) {
|
||||
this.fileName = fileName;
|
||||
}
|
||||
|
||||
public AnalysisResult getAnalysis() {
|
||||
return analysis;
|
||||
}
|
||||
|
||||
public void setAnalysis(AnalysisResult analysis) {
|
||||
this.analysis = analysis;
|
||||
}
|
||||
|
||||
public StrategyConfig getStrategyConfig() {
|
||||
return strategyConfig;
|
||||
}
|
||||
|
||||
public void setStrategyConfig(StrategyConfig strategyConfig) {
|
||||
this.strategyConfig = strategyConfig;
|
||||
}
|
||||
}
|
||||
|
||||
public static class AnalyzeResponse implements Serializable {
|
||||
private Integer total;
|
||||
private List<AnalyzeItem> items = new ArrayList<AnalyzeItem>();
|
||||
|
||||
public Integer getTotal() {
|
||||
return total;
|
||||
}
|
||||
|
||||
public void setTotal(Integer total) {
|
||||
this.total = total;
|
||||
}
|
||||
|
||||
public List<AnalyzeItem> getItems() {
|
||||
return items;
|
||||
}
|
||||
|
||||
public void setItems(List<AnalyzeItem> items) {
|
||||
this.items = items;
|
||||
}
|
||||
}
|
||||
|
||||
public static class PreviewFileResult implements Serializable {
|
||||
private String previewSessionId;
|
||||
private String filePath;
|
||||
private String fileName;
|
||||
private String strategyCode;
|
||||
private String strategyLabel;
|
||||
private AnalysisResult analysis;
|
||||
private Integer totalChunks;
|
||||
private Integer totalWarnings;
|
||||
private List<RagChunk> chunks = new ArrayList<RagChunk>();
|
||||
|
||||
public String getPreviewSessionId() {
|
||||
return previewSessionId;
|
||||
}
|
||||
|
||||
public void setPreviewSessionId(String previewSessionId) {
|
||||
this.previewSessionId = previewSessionId;
|
||||
}
|
||||
|
||||
public String getFilePath() {
|
||||
return filePath;
|
||||
}
|
||||
|
||||
public void setFilePath(String filePath) {
|
||||
this.filePath = filePath;
|
||||
}
|
||||
|
||||
public String getFileName() {
|
||||
return fileName;
|
||||
}
|
||||
|
||||
public void setFileName(String fileName) {
|
||||
this.fileName = fileName;
|
||||
}
|
||||
|
||||
public String getStrategyCode() {
|
||||
return strategyCode;
|
||||
}
|
||||
|
||||
public void setStrategyCode(String strategyCode) {
|
||||
this.strategyCode = strategyCode;
|
||||
}
|
||||
|
||||
public String getStrategyLabel() {
|
||||
return strategyLabel;
|
||||
}
|
||||
|
||||
public void setStrategyLabel(String strategyLabel) {
|
||||
this.strategyLabel = strategyLabel;
|
||||
}
|
||||
|
||||
public AnalysisResult getAnalysis() {
|
||||
return analysis;
|
||||
}
|
||||
|
||||
public void setAnalysis(AnalysisResult analysis) {
|
||||
this.analysis = analysis;
|
||||
}
|
||||
|
||||
public Integer getTotalChunks() {
|
||||
return totalChunks;
|
||||
}
|
||||
|
||||
public void setTotalChunks(Integer totalChunks) {
|
||||
this.totalChunks = totalChunks;
|
||||
}
|
||||
|
||||
public Integer getTotalWarnings() {
|
||||
return totalWarnings;
|
||||
}
|
||||
|
||||
public void setTotalWarnings(Integer totalWarnings) {
|
||||
this.totalWarnings = totalWarnings;
|
||||
}
|
||||
|
||||
public List<RagChunk> getChunks() {
|
||||
return chunks;
|
||||
}
|
||||
|
||||
public void setChunks(List<RagChunk> chunks) {
|
||||
this.chunks = chunks;
|
||||
}
|
||||
}
|
||||
|
||||
public static class PreviewResponse implements Serializable {
|
||||
private Integer totalFiles;
|
||||
private Integer totalChunks;
|
||||
private List<PreviewFileResult> items = new ArrayList<PreviewFileResult>();
|
||||
|
||||
public Integer getTotalFiles() {
|
||||
return totalFiles;
|
||||
}
|
||||
|
||||
public void setTotalFiles(Integer totalFiles) {
|
||||
this.totalFiles = totalFiles;
|
||||
}
|
||||
|
||||
public Integer getTotalChunks() {
|
||||
return totalChunks;
|
||||
}
|
||||
|
||||
public void setTotalChunks(Integer totalChunks) {
|
||||
this.totalChunks = totalChunks;
|
||||
}
|
||||
|
||||
public List<PreviewFileResult> getItems() {
|
||||
return items;
|
||||
}
|
||||
|
||||
public void setItems(List<PreviewFileResult> items) {
|
||||
this.items = items;
|
||||
}
|
||||
}
|
||||
|
||||
public static class CommitFileResult implements Serializable {
|
||||
private String previewSessionId;
|
||||
private String fileName;
|
||||
private Boolean success;
|
||||
private String reason;
|
||||
private BigInteger documentId;
|
||||
private Integer chunkCount;
|
||||
|
||||
public String getPreviewSessionId() {
|
||||
return previewSessionId;
|
||||
}
|
||||
|
||||
public void setPreviewSessionId(String previewSessionId) {
|
||||
this.previewSessionId = previewSessionId;
|
||||
}
|
||||
|
||||
public String getFileName() {
|
||||
return fileName;
|
||||
}
|
||||
|
||||
public void setFileName(String fileName) {
|
||||
this.fileName = fileName;
|
||||
}
|
||||
|
||||
public Boolean getSuccess() {
|
||||
return success;
|
||||
}
|
||||
|
||||
public void setSuccess(Boolean success) {
|
||||
this.success = success;
|
||||
}
|
||||
|
||||
public String getReason() {
|
||||
return reason;
|
||||
}
|
||||
|
||||
public void setReason(String reason) {
|
||||
this.reason = reason;
|
||||
}
|
||||
|
||||
public BigInteger getDocumentId() {
|
||||
return documentId;
|
||||
}
|
||||
|
||||
public void setDocumentId(BigInteger documentId) {
|
||||
this.documentId = documentId;
|
||||
}
|
||||
|
||||
public Integer getChunkCount() {
|
||||
return chunkCount;
|
||||
}
|
||||
|
||||
public void setChunkCount(Integer chunkCount) {
|
||||
this.chunkCount = chunkCount;
|
||||
}
|
||||
}
|
||||
|
||||
public static class CommitResponse implements Serializable {
|
||||
private Integer totalFiles;
|
||||
private Integer successCount;
|
||||
private Integer errorCount;
|
||||
private List<CommitFileResult> results = new ArrayList<CommitFileResult>();
|
||||
|
||||
public Integer getTotalFiles() {
|
||||
return totalFiles;
|
||||
}
|
||||
|
||||
public void setTotalFiles(Integer totalFiles) {
|
||||
this.totalFiles = totalFiles;
|
||||
}
|
||||
|
||||
public Integer getSuccessCount() {
|
||||
return successCount;
|
||||
}
|
||||
|
||||
public void setSuccessCount(Integer successCount) {
|
||||
this.successCount = successCount;
|
||||
}
|
||||
|
||||
public Integer getErrorCount() {
|
||||
return errorCount;
|
||||
}
|
||||
|
||||
public void setErrorCount(Integer errorCount) {
|
||||
this.errorCount = errorCount;
|
||||
}
|
||||
|
||||
public List<CommitFileResult> getResults() {
|
||||
return results;
|
||||
}
|
||||
|
||||
public void setResults(List<CommitFileResult> results) {
|
||||
this.results = results;
|
||||
}
|
||||
}
|
||||
|
||||
public static class PreviewSession implements Serializable {
|
||||
private String sessionId;
|
||||
private BigInteger knowledgeId;
|
||||
private String filePath;
|
||||
private String fileName;
|
||||
private String sourceFormat;
|
||||
private StrategyConfig strategyConfig;
|
||||
private AnalysisResult analysis;
|
||||
private Document document;
|
||||
private List<DocumentChunk> documentChunks = new ArrayList<DocumentChunk>();
|
||||
private List<RagChunk> previewChunks = new ArrayList<RagChunk>();
|
||||
private Date createdAt;
|
||||
|
||||
public String getSessionId() {
|
||||
return sessionId;
|
||||
}
|
||||
|
||||
public void setSessionId(String sessionId) {
|
||||
this.sessionId = sessionId;
|
||||
}
|
||||
|
||||
public BigInteger getKnowledgeId() {
|
||||
return knowledgeId;
|
||||
}
|
||||
|
||||
public void setKnowledgeId(BigInteger knowledgeId) {
|
||||
this.knowledgeId = knowledgeId;
|
||||
}
|
||||
|
||||
public String getFilePath() {
|
||||
return filePath;
|
||||
}
|
||||
|
||||
public void setFilePath(String filePath) {
|
||||
this.filePath = filePath;
|
||||
}
|
||||
|
||||
public String getFileName() {
|
||||
return fileName;
|
||||
}
|
||||
|
||||
public void setFileName(String fileName) {
|
||||
this.fileName = fileName;
|
||||
}
|
||||
|
||||
public String getSourceFormat() {
|
||||
return sourceFormat;
|
||||
}
|
||||
|
||||
public void setSourceFormat(String sourceFormat) {
|
||||
this.sourceFormat = sourceFormat;
|
||||
}
|
||||
|
||||
public StrategyConfig getStrategyConfig() {
|
||||
return strategyConfig;
|
||||
}
|
||||
|
||||
public void setStrategyConfig(StrategyConfig strategyConfig) {
|
||||
this.strategyConfig = strategyConfig;
|
||||
}
|
||||
|
||||
public AnalysisResult getAnalysis() {
|
||||
return analysis;
|
||||
}
|
||||
|
||||
public void setAnalysis(AnalysisResult analysis) {
|
||||
this.analysis = analysis;
|
||||
}
|
||||
|
||||
public Document getDocument() {
|
||||
return document;
|
||||
}
|
||||
|
||||
public void setDocument(Document document) {
|
||||
this.document = document;
|
||||
}
|
||||
|
||||
public List<DocumentChunk> getDocumentChunks() {
|
||||
return documentChunks;
|
||||
}
|
||||
|
||||
public void setDocumentChunks(List<DocumentChunk> documentChunks) {
|
||||
this.documentChunks = documentChunks;
|
||||
}
|
||||
|
||||
public List<RagChunk> getPreviewChunks() {
|
||||
return previewChunks;
|
||||
}
|
||||
|
||||
public void setPreviewChunks(List<RagChunk> previewChunks) {
|
||||
this.previewChunks = previewChunks;
|
||||
}
|
||||
|
||||
public Date getCreatedAt() {
|
||||
return createdAt;
|
||||
}
|
||||
|
||||
public void setCreatedAt(Date createdAt) {
|
||||
this.createdAt = createdAt;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,21 @@
|
||||
package tech.easyflow.ai.documentimport;
|
||||
|
||||
public final class DocumentImportKeys {
|
||||
|
||||
private DocumentImportKeys() {
|
||||
}
|
||||
|
||||
public static final String CACHE_KEY_PREFIX = "easyflow:document:import:preview:";
|
||||
|
||||
public static final String KEY_SPLITTER_DEFAULT_STRATEGY = "splitter.defaultStrategyCode";
|
||||
public static final String KEY_SPLITTER_AUTO_RECOMMEND_ENABLED = "splitter.autoRecommendEnabled";
|
||||
public static final String KEY_SPLITTER_FALLBACK_STRATEGY = "splitter.fallbackStrategyCode";
|
||||
public static final String KEY_SPLITTER_STRATEGY_PROFILES = "splitter.strategyProfiles";
|
||||
|
||||
public static final String KEY_DOCUMENT_STRATEGY_CODE = "splitter.strategyCode";
|
||||
public static final String KEY_DOCUMENT_STRATEGY_LABEL = "splitter.strategyLabel";
|
||||
public static final String KEY_DOCUMENT_STRATEGY_SNAPSHOT = "splitter.strategySnapshot";
|
||||
public static final String KEY_DOCUMENT_ANALYSIS_SUMMARY = "splitter.analysisSummary";
|
||||
public static final String KEY_DOCUMENT_SOURCE_FILE_EXT = "splitter.sourceFileExt";
|
||||
public static final String KEY_DOCUMENT_PREVIEW_VERSION = "splitter.previewVersion";
|
||||
}
|
||||
@@ -0,0 +1,45 @@
|
||||
package tech.easyflow.ai.documentimport;
|
||||
|
||||
import com.alicp.jetcache.Cache;
|
||||
import org.springframework.beans.factory.annotation.Qualifier;
|
||||
import org.springframework.stereotype.Service;
|
||||
import tech.easyflow.common.web.exceptions.BusinessException;
|
||||
|
||||
import java.time.Duration;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.UUID;
|
||||
|
||||
@Service
|
||||
public class DocumentImportPreviewService {
|
||||
|
||||
private static final Duration SESSION_TTL = Duration.ofMinutes(30);
|
||||
|
||||
private final Cache<String, Object> defaultCache;
|
||||
|
||||
public DocumentImportPreviewService(@Qualifier("defaultCache") Cache<String, Object> defaultCache) {
|
||||
this.defaultCache = defaultCache;
|
||||
}
|
||||
|
||||
public String put(DocumentImportDtos.PreviewSession session) {
|
||||
String sessionId = UUID.randomUUID().toString().replace("-", "");
|
||||
session.setSessionId(sessionId);
|
||||
defaultCache.put(buildKey(sessionId), session, SESSION_TTL.toMinutes(), TimeUnit.MINUTES);
|
||||
return sessionId;
|
||||
}
|
||||
|
||||
public DocumentImportDtos.PreviewSession getRequired(String sessionId) {
|
||||
Object cached = defaultCache.get(buildKey(sessionId));
|
||||
if (!(cached instanceof DocumentImportDtos.PreviewSession)) {
|
||||
throw new BusinessException("预览会话已失效,请重新生成预览");
|
||||
}
|
||||
return (DocumentImportDtos.PreviewSession) cached;
|
||||
}
|
||||
|
||||
public void remove(String sessionId) {
|
||||
defaultCache.remove(buildKey(sessionId));
|
||||
}
|
||||
|
||||
private String buildKey(String sessionId) {
|
||||
return DocumentImportKeys.CACHE_KEY_PREFIX + sessionId;
|
||||
}
|
||||
}
|
||||
@@ -3,6 +3,7 @@ package tech.easyflow.ai.service;
|
||||
import tech.easyflow.ai.entity.Document;
|
||||
import com.mybatisflex.core.paginate.Page;
|
||||
import com.mybatisflex.core.service.IService;
|
||||
import tech.easyflow.ai.documentimport.DocumentImportDtos;
|
||||
import tech.easyflow.ai.entity.DocumentChunk;
|
||||
import tech.easyflow.ai.entity.DocumentCollectionSplitParams;
|
||||
import tech.easyflow.common.domain.Result;
|
||||
@@ -25,4 +26,10 @@ public interface DocumentService extends IService<Document> {
|
||||
Result textSplit(DocumentCollectionSplitParams documentCollectionSplitParams);
|
||||
|
||||
Result saveTextResult(List<DocumentChunk> documentChunks, Document document);
|
||||
|
||||
Result<DocumentImportDtos.AnalyzeResponse> analyzeImport(DocumentImportDtos.AnalyzeRequest request);
|
||||
|
||||
Result<DocumentImportDtos.PreviewResponse> previewImport(DocumentImportDtos.PreviewRequest request);
|
||||
|
||||
Result<DocumentImportDtos.CommitResponse> commitImport(DocumentImportDtos.CommitRequest request);
|
||||
}
|
||||
|
||||
@@ -12,6 +12,12 @@ import com.easyagents.core.model.embedding.EmbeddingOptions;
|
||||
import com.easyagents.core.store.DocumentStore;
|
||||
import com.easyagents.core.store.StoreOptions;
|
||||
import com.easyagents.core.store.StoreResult;
|
||||
import com.easyagents.rag.core.RagChunk;
|
||||
import com.easyagents.rag.core.RagDefaults;
|
||||
import com.easyagents.rag.core.RagStrategyCodes;
|
||||
import com.easyagents.rag.ingestion.RagIngestionService;
|
||||
import com.easyagents.rag.ingestion.model.AnalysisResult;
|
||||
import com.easyagents.rag.ingestion.model.StrategyConfig;
|
||||
import com.easyagents.search.engine.service.DocumentSearcher;
|
||||
import com.mybatisflex.core.keygen.impl.FlexIDKeyGenerator;
|
||||
import com.mybatisflex.core.paginate.Page;
|
||||
@@ -24,6 +30,9 @@ import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.transaction.annotation.Transactional;
|
||||
import tech.easyflow.ai.config.SearcherFactory;
|
||||
import tech.easyflow.ai.documentimport.DocumentImportDtos;
|
||||
import tech.easyflow.ai.documentimport.DocumentImportKeys;
|
||||
import tech.easyflow.ai.documentimport.DocumentImportPreviewService;
|
||||
import tech.easyflow.ai.entity.*;
|
||||
import tech.easyflow.ai.mapper.DocumentChunkMapper;
|
||||
import tech.easyflow.ai.mapper.DocumentMapper;
|
||||
@@ -42,6 +51,7 @@ import javax.annotation.Resource;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.math.BigInteger;
|
||||
import java.math.BigDecimal;
|
||||
import java.util.*;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
@@ -81,6 +91,12 @@ public class DocumentServiceImpl extends ServiceImpl<DocumentMapper, Document> i
|
||||
@Autowired
|
||||
private SearcherFactory searcherFactory;
|
||||
|
||||
@Autowired
|
||||
private RagIngestionService ragIngestionService;
|
||||
|
||||
@Autowired
|
||||
private DocumentImportPreviewService documentImportPreviewService;
|
||||
|
||||
@Override
|
||||
public Page<Document> getDocumentList(String knowledgeId, int pageSize, int pageNum, String fileName) {
|
||||
QueryWrapper queryWrapper=QueryWrapper.create()
|
||||
@@ -250,23 +266,397 @@ public class DocumentServiceImpl extends ServiceImpl<DocumentMapper, Document> i
|
||||
return Result.fail(1, "切割结果无有效文本,无法进行向量化");
|
||||
}
|
||||
|
||||
Boolean result = storeDocument(document, validChunks);
|
||||
if (result) {
|
||||
this.getMapper().insert(document);
|
||||
AtomicInteger sort = new AtomicInteger(1);
|
||||
validChunks.forEach(item -> {
|
||||
item.setDocumentCollectionId(document.getCollectionId());
|
||||
item.setSorting(sort.get());
|
||||
item.setDocumentId(document.getId());
|
||||
sort.getAndIncrement();
|
||||
documentChunkService.save(item);
|
||||
});
|
||||
StoreExecutionContext storeContext = prepareStoreContext(document);
|
||||
storeDocumentChunks(storeContext, validChunks);
|
||||
try {
|
||||
persistDocumentWithChunks(document, validChunks);
|
||||
updateKnowledgeAfterStore(storeContext);
|
||||
return Result.ok();
|
||||
} catch (Exception e) {
|
||||
cleanupPersistedDocument(document);
|
||||
rollbackStoredChunks(storeContext, validChunks);
|
||||
Log.error("保存文档失败: documentId={}, title={}", document.getId(), document.getTitle(), e);
|
||||
throw new BusinessException("保存失败:" + e.getMessage());
|
||||
}
|
||||
return Result.fail(1, "保存失败");
|
||||
}
|
||||
|
||||
protected Boolean storeDocument(Document entity, List<DocumentChunk> documentChunks) {
|
||||
StoreExecutionContext storeContext = prepareStoreContext(entity);
|
||||
storeDocumentChunks(storeContext, documentChunks);
|
||||
updateKnowledgeAfterStore(storeContext);
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Result<DocumentImportDtos.AnalyzeResponse> analyzeImport(DocumentImportDtos.AnalyzeRequest request) {
|
||||
DocumentCollection knowledge = assertDocumentCollection(request.getKnowledgeId());
|
||||
if (request.getFiles() == null || request.getFiles().isEmpty()) {
|
||||
throw new BusinessException("请先上传文件");
|
||||
}
|
||||
|
||||
List<DocumentImportDtos.AnalyzeItem> items = new ArrayList<>();
|
||||
for (DocumentImportDtos.FileItem file : request.getFiles()) {
|
||||
AnalysisResult analysis = analyzeSingleFile(file.getFilePath(), file.getFileName());
|
||||
StrategyConfig strategyConfig = resolveStrategyConfig(
|
||||
knowledge,
|
||||
null,
|
||||
analysis
|
||||
);
|
||||
|
||||
DocumentImportDtos.AnalyzeItem item = new DocumentImportDtos.AnalyzeItem();
|
||||
item.setFilePath(file.getFilePath());
|
||||
item.setFileName(file.getFileName());
|
||||
item.setAnalysis(analysis);
|
||||
item.setStrategyConfig(strategyConfig);
|
||||
items.add(item);
|
||||
}
|
||||
|
||||
DocumentImportDtos.AnalyzeResponse response = new DocumentImportDtos.AnalyzeResponse();
|
||||
response.setItems(items);
|
||||
response.setTotal(items.size());
|
||||
return Result.ok(response);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Result<DocumentImportDtos.PreviewResponse> previewImport(DocumentImportDtos.PreviewRequest request) {
|
||||
DocumentCollection knowledge = assertDocumentCollection(request.getKnowledgeId());
|
||||
if (request.getFiles() == null || request.getFiles().isEmpty()) {
|
||||
throw new BusinessException("请先上传文件");
|
||||
}
|
||||
|
||||
List<DocumentImportDtos.PreviewFileResult> items = new ArrayList<>();
|
||||
int totalChunks = 0;
|
||||
for (DocumentImportDtos.PreviewFileRequest file : request.getFiles()) {
|
||||
DocumentImportDtos.PreviewSession session = buildPreviewSession(knowledge, file);
|
||||
String sessionId = documentImportPreviewService.put(session);
|
||||
|
||||
DocumentImportDtos.PreviewFileResult item = new DocumentImportDtos.PreviewFileResult();
|
||||
item.setPreviewSessionId(sessionId);
|
||||
item.setFilePath(file.getFilePath());
|
||||
item.setFileName(file.getFileName());
|
||||
item.setStrategyCode(session.getStrategyConfig().getStrategyCode());
|
||||
item.setStrategyLabel(ragIngestionService.toStrategyLabel(session.getStrategyConfig().getStrategyCode()));
|
||||
item.setAnalysis(session.getAnalysis());
|
||||
item.setChunks(session.getPreviewChunks());
|
||||
item.setTotalChunks(session.getPreviewChunks().size());
|
||||
item.setTotalWarnings(countWarnings(session.getPreviewChunks()));
|
||||
items.add(item);
|
||||
totalChunks += session.getPreviewChunks().size();
|
||||
}
|
||||
|
||||
DocumentImportDtos.PreviewResponse response = new DocumentImportDtos.PreviewResponse();
|
||||
response.setItems(items);
|
||||
response.setTotalFiles(items.size());
|
||||
response.setTotalChunks(totalChunks);
|
||||
return Result.ok(response);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Result<DocumentImportDtos.CommitResponse> commitImport(DocumentImportDtos.CommitRequest request) {
|
||||
DocumentCollection knowledge = assertDocumentCollection(request.getKnowledgeId());
|
||||
if (request.getPreviewSessionIds() == null || request.getPreviewSessionIds().isEmpty()) {
|
||||
throw new BusinessException("请选择需要提交的预览结果");
|
||||
}
|
||||
|
||||
List<DocumentImportDtos.CommitFileResult> results = new ArrayList<>();
|
||||
int successCount = 0;
|
||||
int errorCount = 0;
|
||||
for (String previewSessionId : request.getPreviewSessionIds()) {
|
||||
DocumentImportDtos.CommitFileResult result = new DocumentImportDtos.CommitFileResult();
|
||||
result.setPreviewSessionId(previewSessionId);
|
||||
try {
|
||||
DocumentImportDtos.PreviewSession session = documentImportPreviewService.getRequired(previewSessionId);
|
||||
if (!Objects.equals(session.getKnowledgeId(), knowledge.getId())) {
|
||||
throw new BusinessException("预览会话与当前知识库不匹配");
|
||||
}
|
||||
commitSingleSession(session);
|
||||
result.setSuccess(true);
|
||||
result.setFileName(session.getFileName());
|
||||
result.setDocumentId(session.getDocument().getId());
|
||||
result.setChunkCount(session.getDocumentChunks().size());
|
||||
documentImportPreviewService.remove(previewSessionId);
|
||||
successCount++;
|
||||
} catch (Exception e) {
|
||||
result.setSuccess(false);
|
||||
result.setReason(e.getMessage());
|
||||
errorCount++;
|
||||
}
|
||||
results.add(result);
|
||||
}
|
||||
|
||||
DocumentImportDtos.CommitResponse response = new DocumentImportDtos.CommitResponse();
|
||||
response.setTotalFiles(results.size());
|
||||
response.setSuccessCount(successCount);
|
||||
response.setErrorCount(errorCount);
|
||||
response.setResults(results);
|
||||
return Result.ok(response);
|
||||
}
|
||||
|
||||
private void commitSingleSession(DocumentImportDtos.PreviewSession session) {
|
||||
Document document = session.getDocument();
|
||||
document.setCreated(new Date());
|
||||
document.setModified(new Date());
|
||||
document.setCreatedBy(BigInteger.valueOf(StpUtil.getLoginIdAsLong()));
|
||||
document.setModifiedBy(BigInteger.valueOf(StpUtil.getLoginIdAsLong()));
|
||||
for (DocumentChunk chunk : session.getDocumentChunks()) {
|
||||
chunk.setDocumentId(document.getId());
|
||||
chunk.setDocumentCollectionId(document.getCollectionId());
|
||||
}
|
||||
|
||||
StoreExecutionContext storeContext = prepareStoreContext(document);
|
||||
storeDocumentChunks(storeContext, session.getDocumentChunks());
|
||||
try {
|
||||
persistDocumentWithChunks(document, session.getDocumentChunks());
|
||||
updateKnowledgeAfterStore(storeContext);
|
||||
} catch (Exception e) {
|
||||
cleanupPersistedDocument(document);
|
||||
rollbackStoredChunks(storeContext, session.getDocumentChunks());
|
||||
throw new BusinessException("提交导入失败:" + e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
private DocumentImportDtos.PreviewSession buildPreviewSession(DocumentCollection knowledge,
|
||||
DocumentImportDtos.PreviewFileRequest fileRequest) {
|
||||
AnalysisResult analysis = analyzeSingleFile(fileRequest.getFilePath(), fileRequest.getFileName());
|
||||
StrategyConfig strategyConfig = resolveStrategyConfig(knowledge, fileRequest.getStrategyConfig(), analysis);
|
||||
List<RagChunk> previewChunks = ragIngestionService.split(analysis, strategyConfig);
|
||||
if (previewChunks.isEmpty()) {
|
||||
throw new BusinessException("未生成有效分块,请调整策略后重试");
|
||||
}
|
||||
|
||||
FlexIDKeyGenerator flexIDKeyGenerator = new FlexIDKeyGenerator();
|
||||
Document document = buildPreviewDocument(flexIDKeyGenerator, knowledge, fileRequest, analysis, strategyConfig);
|
||||
List<DocumentChunk> documentChunks = buildDocumentChunks(flexIDKeyGenerator, document, previewChunks);
|
||||
|
||||
DocumentImportDtos.PreviewSession session = new DocumentImportDtos.PreviewSession();
|
||||
session.setKnowledgeId(knowledge.getId());
|
||||
session.setFilePath(fileRequest.getFilePath());
|
||||
session.setFileName(fileRequest.getFileName());
|
||||
session.setSourceFormat(analysis.getSourceFormat());
|
||||
session.setStrategyConfig(strategyConfig);
|
||||
session.setAnalysis(analysis);
|
||||
session.setDocument(document);
|
||||
session.setDocumentChunks(documentChunks);
|
||||
session.setPreviewChunks(previewChunks);
|
||||
session.setCreatedAt(new Date());
|
||||
return session;
|
||||
}
|
||||
|
||||
private Document buildPreviewDocument(FlexIDKeyGenerator flexIDKeyGenerator,
|
||||
DocumentCollection knowledge,
|
||||
DocumentImportDtos.PreviewFileRequest fileRequest,
|
||||
AnalysisResult analysis,
|
||||
StrategyConfig strategyConfig) {
|
||||
Document document = new Document();
|
||||
document.setId(new BigInteger(String.valueOf(flexIDKeyGenerator.generate(document, null))));
|
||||
document.setCollectionId(knowledge.getId());
|
||||
document.setDocumentType(analysis.getSourceFormat());
|
||||
document.setDocumentPath(fileRequest.getFilePath());
|
||||
document.setTitle(fileRequest.getFileName());
|
||||
document.setContent(analysis.getNormalizedContent());
|
||||
document.setCreated(new Date());
|
||||
document.setModified(new Date());
|
||||
document.setModifiedBy(BigInteger.valueOf(StpUtil.getLoginIdAsLong()));
|
||||
|
||||
Map<String, Object> options = new LinkedHashMap<>();
|
||||
options.put(DocumentImportKeys.KEY_DOCUMENT_STRATEGY_CODE, strategyConfig.getStrategyCode());
|
||||
options.put(DocumentImportKeys.KEY_DOCUMENT_STRATEGY_LABEL, ragIngestionService.toStrategyLabel(strategyConfig.getStrategyCode()));
|
||||
options.put(DocumentImportKeys.KEY_DOCUMENT_STRATEGY_SNAPSHOT, strategyConfigToMap(strategyConfig));
|
||||
options.put(DocumentImportKeys.KEY_DOCUMENT_ANALYSIS_SUMMARY, analysis.getFeatures());
|
||||
options.put(DocumentImportKeys.KEY_DOCUMENT_SOURCE_FILE_EXT, analysis.getSourceFormat());
|
||||
options.put(DocumentImportKeys.KEY_DOCUMENT_PREVIEW_VERSION, "v1");
|
||||
document.setOptions(options);
|
||||
return document;
|
||||
}
|
||||
|
||||
private List<DocumentChunk> buildDocumentChunks(FlexIDKeyGenerator flexIDKeyGenerator,
|
||||
Document document,
|
||||
List<RagChunk> previewChunks) {
|
||||
List<DocumentChunk> chunks = new ArrayList<>();
|
||||
for (int i = 0; i < previewChunks.size(); i++) {
|
||||
RagChunk previewChunk = previewChunks.get(i);
|
||||
DocumentChunk chunk = new DocumentChunk();
|
||||
chunk.setId(new BigInteger(String.valueOf(flexIDKeyGenerator.generate(chunk, null))));
|
||||
chunk.setDocumentId(document.getId());
|
||||
chunk.setDocumentCollectionId(document.getCollectionId());
|
||||
chunk.setContent(previewChunk.getContent());
|
||||
chunk.setSorting(i + 1);
|
||||
|
||||
Map<String, Object> options = new LinkedHashMap<>(previewChunk.getOptions());
|
||||
options.put("chunkType", previewChunk.getChunkType());
|
||||
options.put("sourceLabel", previewChunk.getSourceLabel());
|
||||
options.put("headingPath", previewChunk.getHeadingPath());
|
||||
options.put("charCount", previewChunk.getCharCount());
|
||||
options.put("tokenEstimate", previewChunk.getTokenEstimate());
|
||||
options.put("qaQuestion", previewChunk.getQuestion());
|
||||
options.put("qaAnswer", previewChunk.getAnswer());
|
||||
options.put("partNo", previewChunk.getPartNo());
|
||||
options.put("partTotal", previewChunk.getPartTotal());
|
||||
options.put("warnings", previewChunk.getWarnings());
|
||||
chunk.setOptions(options);
|
||||
chunks.add(chunk);
|
||||
}
|
||||
return chunks;
|
||||
}
|
||||
|
||||
private AnalysisResult analyzeSingleFile(String filePath, String fileName) {
|
||||
String fileExt = normalizeFileExtension(fileName, filePath);
|
||||
assertSupportedImportFile(fileExt);
|
||||
String content = readFileContent(filePath, fileName);
|
||||
return ragIngestionService.analyze(content, fileExt);
|
||||
}
|
||||
|
||||
private String readFileContent(String filePath, String fileName) {
|
||||
try (InputStream inputStream = storageService.readStream(filePath)) {
|
||||
return File2TextUtil.readFromStream(inputStream, fileName, null);
|
||||
} catch (IOException e) {
|
||||
Log.error("读取导入文件失败: filePath={}, fileName={}", filePath, fileName, e);
|
||||
throw new BusinessException("文件解析失败:" + e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
private void assertSupportedImportFile(String fileExt) {
|
||||
if (!Arrays.asList("pdf", "docx", "txt", "md").contains(fileExt)) {
|
||||
throw new BusinessException("当前仅支持 pdf/docx/txt/md 文档导入");
|
||||
}
|
||||
}
|
||||
|
||||
private String normalizeFileExtension(String fileName, String filePath) {
|
||||
String target = StringUtil.hasText(fileName) ? fileName : filePath;
|
||||
String ext = FileUtil.getFileTypeByExtension(target);
|
||||
return ext == null ? "" : ext.toLowerCase(Locale.ROOT);
|
||||
}
|
||||
|
||||
private DocumentCollection assertDocumentCollection(BigInteger knowledgeId) {
|
||||
DocumentCollection knowledge = knowledgeService.getById(knowledgeId);
|
||||
if (knowledge == null) {
|
||||
throw new BusinessException("知识库不存在");
|
||||
}
|
||||
if (knowledge.isFaqCollection()) {
|
||||
throw new BusinessException("FAQ知识库不支持文档上传");
|
||||
}
|
||||
return knowledge;
|
||||
}
|
||||
|
||||
private StrategyConfig resolveStrategyConfig(DocumentCollection knowledge,
|
||||
StrategyConfig requestConfig,
|
||||
AnalysisResult analysisResult) {
|
||||
Map<String, Object> options = knowledge.getOptions() == null
|
||||
? Collections.emptyMap()
|
||||
: knowledge.getOptions();
|
||||
String recommended = analysisResult.getRecommendedStrategyCode();
|
||||
String defaultStrategyCode = asString(options.get(DocumentImportKeys.KEY_SPLITTER_DEFAULT_STRATEGY));
|
||||
String fallbackStrategyCode = asString(options.get(DocumentImportKeys.KEY_SPLITTER_FALLBACK_STRATEGY));
|
||||
Boolean autoRecommendEnabled = asBoolean(options.get(DocumentImportKeys.KEY_SPLITTER_AUTO_RECOMMEND_ENABLED), true);
|
||||
|
||||
StrategyConfig config = readProfileConfig(options, defaultStrategyCode);
|
||||
if (config == null) {
|
||||
config = StrategyConfig.defaults();
|
||||
}
|
||||
String requestedStrategyCode = requestConfig == null ? null : requestConfig.getStrategyCode();
|
||||
|
||||
String strategyCode = StringUtil.hasText(requestedStrategyCode)
|
||||
? requestedStrategyCode
|
||||
: config.getStrategyCode();
|
||||
if (!StringUtil.hasText(strategyCode) || RagStrategyCodes.AUTO.equals(strategyCode)) {
|
||||
strategyCode = Boolean.TRUE.equals(autoRecommendEnabled)
|
||||
? recommended
|
||||
: (StringUtil.hasText(defaultStrategyCode) ? defaultStrategyCode : recommended);
|
||||
}
|
||||
if (!StringUtil.hasText(strategyCode)) {
|
||||
strategyCode = StringUtil.hasText(fallbackStrategyCode)
|
||||
? fallbackStrategyCode
|
||||
: RagStrategyCodes.PARAGRAPH_LENGTH;
|
||||
}
|
||||
|
||||
StrategyConfig profileConfig = readProfileConfig(options, strategyCode);
|
||||
if (profileConfig != null) {
|
||||
mergeStrategyConfig(config, profileConfig);
|
||||
}
|
||||
if (requestConfig != null) {
|
||||
mergeStrategyConfig(config, requestConfig);
|
||||
}
|
||||
config.setStrategyCode(strategyCode);
|
||||
if (config.getChunkSize() == null || config.getChunkSize() <= 0) {
|
||||
config.setChunkSize(RagDefaults.CHUNK_SIZE);
|
||||
}
|
||||
if (config.getOverlapSize() == null || config.getOverlapSize() < 0) {
|
||||
config.setOverlapSize(RagDefaults.OVERLAP_SIZE);
|
||||
}
|
||||
if (config.getMdSplitterLevel() == null || config.getMdSplitterLevel() <= 0) {
|
||||
config.setMdSplitterLevel(RagDefaults.MD_SPLITTER_LEVEL);
|
||||
}
|
||||
return config;
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
private StrategyConfig readProfileConfig(Map<String, Object> options, String strategyCode) {
|
||||
if (!StringUtil.hasText(strategyCode)) {
|
||||
return null;
|
||||
}
|
||||
Object profileObject = options.get(DocumentImportKeys.KEY_SPLITTER_STRATEGY_PROFILES);
|
||||
if (!(profileObject instanceof Map)) {
|
||||
return null;
|
||||
}
|
||||
Object strategyObject = ((Map<String, Object>) profileObject).get(strategyCode);
|
||||
if (!(strategyObject instanceof Map)) {
|
||||
return null;
|
||||
}
|
||||
Map<String, Object> rawProfile = (Map<String, Object>) strategyObject;
|
||||
StrategyConfig config = StrategyConfig.defaults();
|
||||
config.setStrategyCode(strategyCode);
|
||||
config.setChunkSize(asInteger(rawProfile.get("chunkSize"), config.getChunkSize()));
|
||||
config.setOverlapSize(asInteger(rawProfile.get("overlapSize"), config.getOverlapSize()));
|
||||
config.setRegex(asString(rawProfile.get("regex")));
|
||||
config.setRowsPerChunk(asInteger(rawProfile.get("rowsPerChunk"), config.getRowsPerChunk()));
|
||||
config.setMdSplitterLevel(asInteger(rawProfile.get("mdSplitterLevel"), config.getMdSplitterLevel()));
|
||||
return config;
|
||||
}
|
||||
|
||||
private void mergeStrategyConfig(StrategyConfig target, StrategyConfig source) {
|
||||
if (source == null) {
|
||||
return;
|
||||
}
|
||||
if (StringUtil.hasText(source.getStrategyCode())) {
|
||||
target.setStrategyCode(source.getStrategyCode());
|
||||
}
|
||||
if (source.getChunkSize() != null) {
|
||||
target.setChunkSize(source.getChunkSize());
|
||||
}
|
||||
if (source.getOverlapSize() != null) {
|
||||
target.setOverlapSize(source.getOverlapSize());
|
||||
}
|
||||
if (StringUtil.hasText(source.getRegex())) {
|
||||
target.setRegex(source.getRegex());
|
||||
}
|
||||
if (source.getRowsPerChunk() != null) {
|
||||
target.setRowsPerChunk(source.getRowsPerChunk());
|
||||
}
|
||||
if (source.getMdSplitterLevel() != null) {
|
||||
target.setMdSplitterLevel(source.getMdSplitterLevel());
|
||||
}
|
||||
}
|
||||
|
||||
private Map<String, Object> strategyConfigToMap(StrategyConfig strategyConfig) {
|
||||
Map<String, Object> map = new LinkedHashMap<>();
|
||||
map.put("strategyCode", strategyConfig.getStrategyCode());
|
||||
map.put("chunkSize", strategyConfig.getChunkSize());
|
||||
map.put("overlapSize", strategyConfig.getOverlapSize());
|
||||
map.put("regex", strategyConfig.getRegex());
|
||||
map.put("rowsPerChunk", strategyConfig.getRowsPerChunk());
|
||||
map.put("mdSplitterLevel", strategyConfig.getMdSplitterLevel());
|
||||
return map;
|
||||
}
|
||||
|
||||
private int countWarnings(List<RagChunk> chunks) {
|
||||
int total = 0;
|
||||
for (RagChunk chunk : chunks) {
|
||||
total += chunk.getWarnings() == null ? 0 : chunk.getWarnings().size();
|
||||
}
|
||||
return total;
|
||||
}
|
||||
|
||||
private StoreExecutionContext prepareStoreContext(Document entity) {
|
||||
DocumentCollection knowledge = knowledgeService.getById(entity.getCollectionId());
|
||||
if (knowledge == null) {
|
||||
throw new BusinessException("知识库不存在");
|
||||
@@ -274,23 +664,22 @@ public class DocumentServiceImpl extends ServiceImpl<DocumentMapper, Document> i
|
||||
if (knowledge.isFaqCollection()) {
|
||||
throw new BusinessException("FAQ知识库不支持文档上传");
|
||||
}
|
||||
DocumentStore documentStore = null;
|
||||
|
||||
DocumentStore documentStore;
|
||||
try {
|
||||
documentStore = knowledge.toDocumentStore();
|
||||
} catch (Exception e) {
|
||||
Log.error(e.getMessage());
|
||||
Log.error("向量库配置错误: knowledgeId={}", knowledge.getId(), e);
|
||||
throw new BusinessException("向量数据库配置错误");
|
||||
}
|
||||
|
||||
if (documentStore == null) {
|
||||
throw new BusinessException("向量数据库配置错误");
|
||||
}
|
||||
// 设置向量模型
|
||||
|
||||
Model model = modelService.getModelInstance(knowledge.getVectorEmbedModelId());
|
||||
if (model == null) {
|
||||
throw new BusinessException("该知识库未配置大模型");
|
||||
}
|
||||
// 设置向量模型
|
||||
EmbeddingModel embeddingModel = model.toEmbeddingModel();
|
||||
documentStore.setEmbeddingModel(embeddingModel);
|
||||
|
||||
@@ -300,46 +689,152 @@ public class DocumentServiceImpl extends ServiceImpl<DocumentMapper, Document> i
|
||||
embeddingOptions.setDimensions(knowledge.getDimensionOfVectorModel());
|
||||
options.setEmbeddingOptions(embeddingOptions);
|
||||
options.setIndexName(options.getCollectionName());
|
||||
|
||||
DocumentSearcher searcher = null;
|
||||
if (knowledge.isSearchEngineEnabled()) {
|
||||
searcher = searcherFactory.getSearcher((String) knowledge.getOptionsByKey(KEY_SEARCH_ENGINE_TYPE));
|
||||
}
|
||||
return new StoreExecutionContext(knowledge, model, embeddingModel, documentStore, options, searcher);
|
||||
}
|
||||
|
||||
private void storeDocumentChunks(StoreExecutionContext storeContext, List<DocumentChunk> documentChunks) {
|
||||
List<com.easyagents.core.document.Document> documents = new ArrayList<>();
|
||||
documentChunks.forEach(item -> {
|
||||
com.easyagents.core.document.Document document = new com.easyagents.core.document.Document();
|
||||
document.setId(item.getId());
|
||||
document.setContent(item.getContent());
|
||||
documents.add(document);
|
||||
}
|
||||
);
|
||||
StoreResult result = null;
|
||||
for (DocumentChunk item : documentChunks) {
|
||||
com.easyagents.core.document.Document document = new com.easyagents.core.document.Document();
|
||||
document.setId(item.getId());
|
||||
document.setContent(item.getContent());
|
||||
documents.add(document);
|
||||
}
|
||||
|
||||
StoreResult result;
|
||||
try {
|
||||
result = documentStore.store(documents, options);
|
||||
result = storeContext.documentStore.store(documents, storeContext.options);
|
||||
} catch (Exception e) {
|
||||
Log.error("Vector store failed: knowledgeId={}, collection={}, chunkCount={}",
|
||||
knowledge.getId(), options.getCollectionName(), documents.size(), e);
|
||||
storeContext.knowledge.getId(),
|
||||
storeContext.options.getCollectionName(),
|
||||
documents.size(),
|
||||
e);
|
||||
throw new BusinessException("向量过程中发生错误,错误信息为:" + e.getMessage());
|
||||
}
|
||||
if (result == null || !result.isSuccess()) {
|
||||
Log.error("DocumentStore.store failed: " + result);
|
||||
if (result == null || !result.isSuccess()) {
|
||||
Log.error("DocumentStore.store failed: {}", result);
|
||||
throw new BusinessException("DocumentStore.store failed");
|
||||
}
|
||||
|
||||
if (knowledge.isSearchEngineEnabled()) {
|
||||
// 获取搜索引擎
|
||||
DocumentSearcher searcher = searcherFactory.getSearcher((String) knowledge.getOptionsByKey(KEY_SEARCH_ENGINE_TYPE));
|
||||
// 添加到搜索引擎
|
||||
documents.forEach(searcher::addDocument);
|
||||
if (storeContext.searcher != null) {
|
||||
for (com.easyagents.core.document.Document document : documents) {
|
||||
storeContext.searcher.addDocument(document);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void rollbackStoredChunks(StoreExecutionContext storeContext, List<DocumentChunk> documentChunks) {
|
||||
try {
|
||||
List<BigInteger> chunkIds = new ArrayList<>();
|
||||
for (DocumentChunk chunk : documentChunks) {
|
||||
chunkIds.add(chunk.getId());
|
||||
}
|
||||
storeContext.documentStore.delete(chunkIds, storeContext.options);
|
||||
if (storeContext.searcher != null) {
|
||||
for (BigInteger chunkId : chunkIds) {
|
||||
storeContext.searcher.deleteDocument(chunkId);
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
Log.error("回滚向量文档失败: knowledgeId={}", storeContext.knowledge.getId(), e);
|
||||
}
|
||||
}
|
||||
|
||||
private void updateKnowledgeAfterStore(StoreExecutionContext storeContext) {
|
||||
DocumentCollection documentCollection = new DocumentCollection();
|
||||
documentCollection.setId(entity.getCollectionId());
|
||||
Map<String, Object> knowledgeOptions = knowledge.getOptions();
|
||||
documentCollection.setId(storeContext.knowledge.getId());
|
||||
Map<String, Object> knowledgeOptions = storeContext.knowledge.getOptions() == null
|
||||
? new HashMap<>()
|
||||
: new HashMap<>(storeContext.knowledge.getOptions());
|
||||
knowledgeOptions.put(KEY_CAN_UPDATE_EMBEDDING_MODEL, false);
|
||||
documentCollection.setOptions(knowledgeOptions);
|
||||
knowledgeService.updateById(documentCollection);
|
||||
if (knowledge.getDimensionOfVectorModel() == null) {
|
||||
int dimension = Model.getEmbeddingDimension(embeddingModel);
|
||||
knowledge.setDimensionOfVectorModel(dimension);
|
||||
knowledgeService.updateById(knowledge);
|
||||
|
||||
if (storeContext.knowledge.getDimensionOfVectorModel() == null) {
|
||||
int dimension = Model.getEmbeddingDimension(storeContext.embeddingModel);
|
||||
DocumentCollection update = new DocumentCollection();
|
||||
update.setId(storeContext.knowledge.getId());
|
||||
update.setDimensionOfVectorModel(dimension);
|
||||
knowledgeService.updateById(update);
|
||||
}
|
||||
}
|
||||
|
||||
private void persistDocumentWithChunks(Document document, List<DocumentChunk> chunks) {
|
||||
this.getMapper().insert(document);
|
||||
AtomicInteger sort = new AtomicInteger(1);
|
||||
for (DocumentChunk item : chunks) {
|
||||
item.setDocumentCollectionId(document.getCollectionId());
|
||||
item.setDocumentId(document.getId());
|
||||
item.setSorting(sort.getAndIncrement());
|
||||
documentChunkService.save(item);
|
||||
}
|
||||
}
|
||||
|
||||
private void cleanupPersistedDocument(Document document) {
|
||||
if (document == null || document.getId() == null) {
|
||||
return;
|
||||
}
|
||||
documentChunkMapper.deleteByQuery(QueryWrapper.create().eq(DocumentChunk::getDocumentId, document.getId()));
|
||||
this.getMapper().deleteById(document.getId());
|
||||
}
|
||||
|
||||
private String asString(Object value) {
|
||||
return value == null ? null : String.valueOf(value);
|
||||
}
|
||||
|
||||
private Integer asInteger(Object value, Integer defaultValue) {
|
||||
if (value == null) {
|
||||
return defaultValue;
|
||||
}
|
||||
if (value instanceof Number) {
|
||||
return ((Number) value).intValue();
|
||||
}
|
||||
if (value instanceof String && StringUtil.hasText((String) value)) {
|
||||
return Integer.parseInt((String) value);
|
||||
}
|
||||
return defaultValue;
|
||||
}
|
||||
|
||||
private Boolean asBoolean(Object value, boolean defaultValue) {
|
||||
if (value == null) {
|
||||
return defaultValue;
|
||||
}
|
||||
if (value instanceof Boolean) {
|
||||
return (Boolean) value;
|
||||
}
|
||||
if (value instanceof Number) {
|
||||
return ((Number) value).intValue() != 0;
|
||||
}
|
||||
return Boolean.parseBoolean(String.valueOf(value));
|
||||
}
|
||||
|
||||
private static class StoreExecutionContext {
|
||||
private final DocumentCollection knowledge;
|
||||
private final Model model;
|
||||
private final EmbeddingModel embeddingModel;
|
||||
private final DocumentStore documentStore;
|
||||
private final StoreOptions options;
|
||||
private final DocumentSearcher searcher;
|
||||
|
||||
private StoreExecutionContext(DocumentCollection knowledge,
|
||||
Model model,
|
||||
EmbeddingModel embeddingModel,
|
||||
DocumentStore documentStore,
|
||||
StoreOptions options,
|
||||
DocumentSearcher searcher) {
|
||||
this.knowledge = knowledge;
|
||||
this.model = model;
|
||||
this.embeddingModel = embeddingModel;
|
||||
this.documentStore = documentStore;
|
||||
this.options = options;
|
||||
this.searcher = searcher;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
public DocumentSplitter getDocumentSplitter(DocumentCollectionSplitParams params) {
|
||||
|
||||
Reference in New Issue
Block a user