知识库功能增强,支持Milvus,并优化相关逻辑

This commit is contained in:
2026-02-24 11:19:53 +08:00
parent 148a08a3f1
commit 094b185c49
10 changed files with 196 additions and 59 deletions

View File

@@ -6,6 +6,8 @@ import com.easyagents.store.aliyun.AliyunVectorStore;
import com.easyagents.store.aliyun.AliyunVectorStoreConfig;
import com.easyagents.store.elasticsearch.ElasticSearchVectorStore;
import com.easyagents.store.elasticsearch.ElasticSearchVectorStoreConfig;
import com.easyagents.store.milvus.MilvusVectorStore;
import com.easyagents.store.milvus.MilvusVectorStoreConfig;
import com.easyagents.store.opensearch.OpenSearchVectorStore;
import com.easyagents.store.opensearch.OpenSearchVectorStoreConfig;
import com.easyagents.store.qcloud.QCloudVectorStore;
@@ -62,6 +64,11 @@ public class DocumentCollection extends DocumentCollectionBase {
*/
public static final String KEY_CAN_UPDATE_EMBEDDING_MODEL = "canUpdateEmbeddingModel";
/**
* 是否启用重排模型
*/
public static final String KEY_RERANK_ENABLE = "rerankEnable";
public DocumentStore toDocumentStore() {
String storeType = this.getVectorStoreType();
if (StringUtil.noText(storeType)) {
@@ -73,8 +80,8 @@ public class DocumentCollection extends DocumentCollectionBase {
switch (storeType.toLowerCase()) {
case "redis":
return redisStore();
// case "milvus":
// return milvusStore();
case "milvus":
return milvusStore();
case "opensearch":
return openSearchStore();
case "elasticsearch":
@@ -101,10 +108,13 @@ public class DocumentCollection extends DocumentCollectionBase {
return new RedisVectorStore(redisVectorStoreConfig);
}
// private DocumentStore milvusStore() {
// MilvusVectorStoreConfig milvusVectorStoreConfig = getStoreConfig(MilvusVectorStoreConfig.class);
// return new MilvusVectorStore(milvusVectorStoreConfig);
// }
private DocumentStore milvusStore() {
MilvusVectorStoreConfig milvusVectorStoreConfig = getStoreConfig(MilvusVectorStoreConfig.class);
if (milvusVectorStoreConfig != null && StringUtil.noText(milvusVectorStoreConfig.getDefaultCollectionName())) {
milvusVectorStoreConfig.setDefaultCollectionName(this.getVectorStoreCollection());
}
return new MilvusVectorStore(milvusVectorStoreConfig);
}
private DocumentStore openSearchStore() {
OpenSearchVectorStoreConfig openSearchVectorStoreConfig = getStoreConfig(OpenSearchVectorStoreConfig.class);
@@ -136,6 +146,23 @@ public class DocumentCollection extends DocumentCollectionBase {
public Object getOptionsByKey(String key) {
Map<String, Object> options = this.getOptions();
if (KEY_RERANK_ENABLE.equals(key)) {
if (options == null || !options.containsKey(KEY_RERANK_ENABLE)) {
return this.getRerankModelId() != null;
}
Object value = options.get(key);
if (value instanceof Boolean) {
return value;
}
if (value instanceof Number) {
return ((Number) value).intValue() != 0;
}
if (value instanceof String) {
return Boolean.parseBoolean((String) value);
}
return false;
}
if (options == null) {
return null;
}

View File

@@ -2,6 +2,7 @@ package tech.easyflow.ai.service.impl;
import com.easyagents.core.document.Document;
import com.easyagents.core.model.rerank.RerankException;
import com.easyagents.core.model.rerank.RerankModel;
import com.easyagents.core.store.DocumentStore;
import com.easyagents.core.store.SearchWrapper;
@@ -9,6 +10,8 @@ import com.easyagents.core.store.StoreOptions;
import com.easyagents.search.engine.service.DocumentSearcher;
import com.mybatisflex.core.query.QueryWrapper;
import com.mybatisflex.spring.service.impl.ServiceImpl;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import tech.easyflow.ai.config.SearcherFactory;
@@ -46,6 +49,8 @@ import static tech.easyflow.ai.entity.DocumentCollection.*;
@Service
public class DocumentCollectionServiceImpl extends ServiceImpl<DocumentCollectionMapper, DocumentCollection> implements DocumentCollectionService {
private static final Logger LOG = LoggerFactory.getLogger(DocumentCollectionServiceImpl.class);
@Resource
private ModelService llmService;
@@ -128,20 +133,33 @@ public class DocumentCollectionServiceImpl extends ServiceImpl<DocumentCollectio
if (searchDocuments.isEmpty()) {
return Collections.emptyList();
}
if (documentCollection.getRerankModelId() == null) {
boolean rerankEnable = Boolean.TRUE.equals(documentCollection.getOptionsByKey(KEY_RERANK_ENABLE));
if (!rerankEnable || documentCollection.getRerankModelId() == null) {
return formatDocuments(searchDocuments, minSimilarity, docRecallMaxNum);
}
Model modelRerank = llmService.getModelInstance(documentCollection.getRerankModelId());
if (modelRerank == null) {
return formatDocuments(searchDocuments, minSimilarity, docRecallMaxNum);
}
RerankModel rerankModel = modelRerank.toRerankModel();
if (rerankModel == null) {
return formatDocuments(searchDocuments, minSimilarity, docRecallMaxNum);
}
Map<Object, Double> originalScores = new HashMap<>();
searchDocuments.forEach(item -> originalScores.put(item.getId(), item.getScore()));
searchDocuments.forEach(item -> item.setScore(null));
List<Document> rerankDocuments = rerankModel.rerank(keyword, searchDocuments);
return formatDocuments(rerankDocuments, minSimilarity, docRecallMaxNum);
try {
List<Document> rerankDocuments = rerankModel.rerank(keyword, searchDocuments);
return formatDocuments(rerankDocuments, minSimilarity, docRecallMaxNum);
} catch (RerankException e) {
searchDocuments.forEach(item -> item.setScore(originalScores.get(item.getId())));
LOG.warn("Rerank failed for collectionId={}, modelId={}, fallback to vector results. message={}",
documentCollection.getId(), documentCollection.getRerankModelId(), e.getMessage());
return formatDocuments(searchDocuments, minSimilarity, docRecallMaxNum);
}
} catch (InterruptedException | ExecutionException e) {
Thread.currentThread().interrupt();
e.printStackTrace();

View File

@@ -25,16 +25,11 @@ import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
import tech.easyflow.ai.config.SearcherFactory;
import tech.easyflow.ai.entity.*;
import static tech.easyflow.ai.entity.DocumentCollection.KEY_CAN_UPDATE_EMBEDDING_MODEL;
import static tech.easyflow.ai.entity.DocumentCollection.KEY_SEARCH_ENGINE_TYPE;
import static tech.easyflow.ai.entity.table.DocumentChunkTableDef.DOCUMENT_CHUNK;
import static tech.easyflow.ai.entity.table.DocumentTableDef.DOCUMENT;
import tech.easyflow.ai.mapper.DocumentChunkMapper;
import tech.easyflow.ai.mapper.DocumentMapper;
import tech.easyflow.ai.service.DocumentChunkService;
import tech.easyflow.ai.service.DocumentService;
import tech.easyflow.ai.service.DocumentCollectionService;
import tech.easyflow.ai.service.DocumentService;
import tech.easyflow.ai.service.ModelService;
import tech.easyflow.common.ai.rag.ExcelDocumentSplitter;
import tech.easyflow.common.domain.Result;
@@ -50,6 +45,11 @@ import java.math.BigInteger;
import java.util.*;
import java.util.concurrent.atomic.AtomicInteger;
import static tech.easyflow.ai.entity.DocumentCollection.KEY_CAN_UPDATE_EMBEDDING_MODEL;
import static tech.easyflow.ai.entity.DocumentCollection.KEY_SEARCH_ENGINE_TYPE;
import static tech.easyflow.ai.entity.table.DocumentChunkTableDef.DOCUMENT_CHUNK;
import static tech.easyflow.ai.entity.table.DocumentTableDef.DOCUMENT;
/**
* 服务层实现。
*
@@ -228,11 +228,26 @@ public class DocumentServiceImpl extends ServiceImpl<DocumentMapper, Document> i
@Override
public Result<?> saveTextResult(List<DocumentChunk> documentChunks, Document document) {
Boolean result = storeDocument(document, documentChunks);
if (documentChunks == null || documentChunks.isEmpty()) {
return Result.fail(1, "切割结果为空,无法保存");
}
List<DocumentChunk> validChunks = new ArrayList<>();
for (DocumentChunk chunk : documentChunks) {
if (chunk != null && StringUtil.hasText(chunk.getContent())) {
validChunks.add(chunk);
}
}
if (validChunks.isEmpty()) {
return Result.fail(1, "切割结果无有效文本,无法进行向量化");
}
Boolean result = storeDocument(document, validChunks);
if (result) {
this.getMapper().insert(document);
AtomicInteger sort = new AtomicInteger(1);
documentChunks.forEach(item -> {
validChunks.forEach(item -> {
item.setDocumentCollectionId(document.getCollectionId());
item.setSorting(sort.get());
item.setDocumentId(document.getId());
@@ -287,7 +302,8 @@ public class DocumentServiceImpl extends ServiceImpl<DocumentMapper, Document> i
try {
result = documentStore.store(documents, options);
} catch (Exception e) {
Log.error(e.getMessage());
Log.error("Vector store failed: knowledgeId={}, collection={}, chunkCount={}",
knowledge.getId(), options.getCollectionName(), documents.size(), e);
throw new BusinessException("向量过程中发生错误,错误信息为:" + e.getMessage());
}
if (result == null || !result.isSuccess()) {