知识库功能增强,支持Milvus,并优化相关逻辑
This commit is contained in:
@@ -6,6 +6,8 @@ import com.easyagents.store.aliyun.AliyunVectorStore;
|
||||
import com.easyagents.store.aliyun.AliyunVectorStoreConfig;
|
||||
import com.easyagents.store.elasticsearch.ElasticSearchVectorStore;
|
||||
import com.easyagents.store.elasticsearch.ElasticSearchVectorStoreConfig;
|
||||
import com.easyagents.store.milvus.MilvusVectorStore;
|
||||
import com.easyagents.store.milvus.MilvusVectorStoreConfig;
|
||||
import com.easyagents.store.opensearch.OpenSearchVectorStore;
|
||||
import com.easyagents.store.opensearch.OpenSearchVectorStoreConfig;
|
||||
import com.easyagents.store.qcloud.QCloudVectorStore;
|
||||
@@ -62,6 +64,11 @@ public class DocumentCollection extends DocumentCollectionBase {
|
||||
*/
|
||||
public static final String KEY_CAN_UPDATE_EMBEDDING_MODEL = "canUpdateEmbeddingModel";
|
||||
|
||||
/**
|
||||
* 是否启用重排模型
|
||||
*/
|
||||
public static final String KEY_RERANK_ENABLE = "rerankEnable";
|
||||
|
||||
public DocumentStore toDocumentStore() {
|
||||
String storeType = this.getVectorStoreType();
|
||||
if (StringUtil.noText(storeType)) {
|
||||
@@ -73,8 +80,8 @@ public class DocumentCollection extends DocumentCollectionBase {
|
||||
switch (storeType.toLowerCase()) {
|
||||
case "redis":
|
||||
return redisStore();
|
||||
// case "milvus":
|
||||
// return milvusStore();
|
||||
case "milvus":
|
||||
return milvusStore();
|
||||
case "opensearch":
|
||||
return openSearchStore();
|
||||
case "elasticsearch":
|
||||
@@ -101,10 +108,13 @@ public class DocumentCollection extends DocumentCollectionBase {
|
||||
return new RedisVectorStore(redisVectorStoreConfig);
|
||||
}
|
||||
|
||||
// private DocumentStore milvusStore() {
|
||||
// MilvusVectorStoreConfig milvusVectorStoreConfig = getStoreConfig(MilvusVectorStoreConfig.class);
|
||||
// return new MilvusVectorStore(milvusVectorStoreConfig);
|
||||
// }
|
||||
private DocumentStore milvusStore() {
|
||||
MilvusVectorStoreConfig milvusVectorStoreConfig = getStoreConfig(MilvusVectorStoreConfig.class);
|
||||
if (milvusVectorStoreConfig != null && StringUtil.noText(milvusVectorStoreConfig.getDefaultCollectionName())) {
|
||||
milvusVectorStoreConfig.setDefaultCollectionName(this.getVectorStoreCollection());
|
||||
}
|
||||
return new MilvusVectorStore(milvusVectorStoreConfig);
|
||||
}
|
||||
|
||||
private DocumentStore openSearchStore() {
|
||||
OpenSearchVectorStoreConfig openSearchVectorStoreConfig = getStoreConfig(OpenSearchVectorStoreConfig.class);
|
||||
@@ -136,6 +146,23 @@ public class DocumentCollection extends DocumentCollectionBase {
|
||||
|
||||
public Object getOptionsByKey(String key) {
|
||||
Map<String, Object> options = this.getOptions();
|
||||
if (KEY_RERANK_ENABLE.equals(key)) {
|
||||
if (options == null || !options.containsKey(KEY_RERANK_ENABLE)) {
|
||||
return this.getRerankModelId() != null;
|
||||
}
|
||||
Object value = options.get(key);
|
||||
if (value instanceof Boolean) {
|
||||
return value;
|
||||
}
|
||||
if (value instanceof Number) {
|
||||
return ((Number) value).intValue() != 0;
|
||||
}
|
||||
if (value instanceof String) {
|
||||
return Boolean.parseBoolean((String) value);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
if (options == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
@@ -2,6 +2,7 @@ package tech.easyflow.ai.service.impl;
|
||||
|
||||
|
||||
import com.easyagents.core.document.Document;
|
||||
import com.easyagents.core.model.rerank.RerankException;
|
||||
import com.easyagents.core.model.rerank.RerankModel;
|
||||
import com.easyagents.core.store.DocumentStore;
|
||||
import com.easyagents.core.store.SearchWrapper;
|
||||
@@ -9,6 +10,8 @@ import com.easyagents.core.store.StoreOptions;
|
||||
import com.easyagents.search.engine.service.DocumentSearcher;
|
||||
import com.mybatisflex.core.query.QueryWrapper;
|
||||
import com.mybatisflex.spring.service.impl.ServiceImpl;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.stereotype.Service;
|
||||
import tech.easyflow.ai.config.SearcherFactory;
|
||||
@@ -46,6 +49,8 @@ import static tech.easyflow.ai.entity.DocumentCollection.*;
|
||||
@Service
|
||||
public class DocumentCollectionServiceImpl extends ServiceImpl<DocumentCollectionMapper, DocumentCollection> implements DocumentCollectionService {
|
||||
|
||||
private static final Logger LOG = LoggerFactory.getLogger(DocumentCollectionServiceImpl.class);
|
||||
|
||||
@Resource
|
||||
private ModelService llmService;
|
||||
|
||||
@@ -128,20 +133,33 @@ public class DocumentCollectionServiceImpl extends ServiceImpl<DocumentCollectio
|
||||
if (searchDocuments.isEmpty()) {
|
||||
return Collections.emptyList();
|
||||
}
|
||||
if (documentCollection.getRerankModelId() == null) {
|
||||
boolean rerankEnable = Boolean.TRUE.equals(documentCollection.getOptionsByKey(KEY_RERANK_ENABLE));
|
||||
if (!rerankEnable || documentCollection.getRerankModelId() == null) {
|
||||
return formatDocuments(searchDocuments, minSimilarity, docRecallMaxNum);
|
||||
}
|
||||
|
||||
Model modelRerank = llmService.getModelInstance(documentCollection.getRerankModelId());
|
||||
if (modelRerank == null) {
|
||||
return formatDocuments(searchDocuments, minSimilarity, docRecallMaxNum);
|
||||
}
|
||||
|
||||
RerankModel rerankModel = modelRerank.toRerankModel();
|
||||
if (rerankModel == null) {
|
||||
return formatDocuments(searchDocuments, minSimilarity, docRecallMaxNum);
|
||||
}
|
||||
|
||||
Map<Object, Double> originalScores = new HashMap<>();
|
||||
searchDocuments.forEach(item -> originalScores.put(item.getId(), item.getScore()));
|
||||
searchDocuments.forEach(item -> item.setScore(null));
|
||||
List<Document> rerankDocuments = rerankModel.rerank(keyword, searchDocuments);
|
||||
return formatDocuments(rerankDocuments, minSimilarity, docRecallMaxNum);
|
||||
try {
|
||||
List<Document> rerankDocuments = rerankModel.rerank(keyword, searchDocuments);
|
||||
return formatDocuments(rerankDocuments, minSimilarity, docRecallMaxNum);
|
||||
} catch (RerankException e) {
|
||||
searchDocuments.forEach(item -> item.setScore(originalScores.get(item.getId())));
|
||||
LOG.warn("Rerank failed for collectionId={}, modelId={}, fallback to vector results. message={}",
|
||||
documentCollection.getId(), documentCollection.getRerankModelId(), e.getMessage());
|
||||
return formatDocuments(searchDocuments, minSimilarity, docRecallMaxNum);
|
||||
}
|
||||
} catch (InterruptedException | ExecutionException e) {
|
||||
Thread.currentThread().interrupt();
|
||||
e.printStackTrace();
|
||||
|
||||
@@ -25,16 +25,11 @@ import org.springframework.stereotype.Service;
|
||||
import org.springframework.transaction.annotation.Transactional;
|
||||
import tech.easyflow.ai.config.SearcherFactory;
|
||||
import tech.easyflow.ai.entity.*;
|
||||
|
||||
import static tech.easyflow.ai.entity.DocumentCollection.KEY_CAN_UPDATE_EMBEDDING_MODEL;
|
||||
import static tech.easyflow.ai.entity.DocumentCollection.KEY_SEARCH_ENGINE_TYPE;
|
||||
import static tech.easyflow.ai.entity.table.DocumentChunkTableDef.DOCUMENT_CHUNK;
|
||||
import static tech.easyflow.ai.entity.table.DocumentTableDef.DOCUMENT;
|
||||
import tech.easyflow.ai.mapper.DocumentChunkMapper;
|
||||
import tech.easyflow.ai.mapper.DocumentMapper;
|
||||
import tech.easyflow.ai.service.DocumentChunkService;
|
||||
import tech.easyflow.ai.service.DocumentService;
|
||||
import tech.easyflow.ai.service.DocumentCollectionService;
|
||||
import tech.easyflow.ai.service.DocumentService;
|
||||
import tech.easyflow.ai.service.ModelService;
|
||||
import tech.easyflow.common.ai.rag.ExcelDocumentSplitter;
|
||||
import tech.easyflow.common.domain.Result;
|
||||
@@ -50,6 +45,11 @@ import java.math.BigInteger;
|
||||
import java.util.*;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
import static tech.easyflow.ai.entity.DocumentCollection.KEY_CAN_UPDATE_EMBEDDING_MODEL;
|
||||
import static tech.easyflow.ai.entity.DocumentCollection.KEY_SEARCH_ENGINE_TYPE;
|
||||
import static tech.easyflow.ai.entity.table.DocumentChunkTableDef.DOCUMENT_CHUNK;
|
||||
import static tech.easyflow.ai.entity.table.DocumentTableDef.DOCUMENT;
|
||||
|
||||
/**
|
||||
* 服务层实现。
|
||||
*
|
||||
@@ -228,11 +228,26 @@ public class DocumentServiceImpl extends ServiceImpl<DocumentMapper, Document> i
|
||||
|
||||
@Override
|
||||
public Result<?> saveTextResult(List<DocumentChunk> documentChunks, Document document) {
|
||||
Boolean result = storeDocument(document, documentChunks);
|
||||
if (documentChunks == null || documentChunks.isEmpty()) {
|
||||
return Result.fail(1, "切割结果为空,无法保存");
|
||||
}
|
||||
|
||||
List<DocumentChunk> validChunks = new ArrayList<>();
|
||||
for (DocumentChunk chunk : documentChunks) {
|
||||
if (chunk != null && StringUtil.hasText(chunk.getContent())) {
|
||||
validChunks.add(chunk);
|
||||
}
|
||||
}
|
||||
|
||||
if (validChunks.isEmpty()) {
|
||||
return Result.fail(1, "切割结果无有效文本,无法进行向量化");
|
||||
}
|
||||
|
||||
Boolean result = storeDocument(document, validChunks);
|
||||
if (result) {
|
||||
this.getMapper().insert(document);
|
||||
AtomicInteger sort = new AtomicInteger(1);
|
||||
documentChunks.forEach(item -> {
|
||||
validChunks.forEach(item -> {
|
||||
item.setDocumentCollectionId(document.getCollectionId());
|
||||
item.setSorting(sort.get());
|
||||
item.setDocumentId(document.getId());
|
||||
@@ -287,7 +302,8 @@ public class DocumentServiceImpl extends ServiceImpl<DocumentMapper, Document> i
|
||||
try {
|
||||
result = documentStore.store(documents, options);
|
||||
} catch (Exception e) {
|
||||
Log.error(e.getMessage());
|
||||
Log.error("Vector store failed: knowledgeId={}, collection={}, chunkCount={}",
|
||||
knowledge.getId(), options.getCollectionName(), documents.size(), e);
|
||||
throw new BusinessException("向量过程中发生错误,错误信息为:" + e.getMessage());
|
||||
}
|
||||
if (result == null || !result.isSuccess()) {
|
||||
|
||||
Reference in New Issue
Block a user