feat: 收敛知识库检索调度与评分语义

- 固定 rag.engine 与 Milvus 配置,补齐启动期检索基础设施校验

- 支持调用方配置 retrievalMode,并统一知识库检索入口与结果来源展示

- 修正关键词检索 knowledgeId 过滤、混合检索评分归一化与本地 ES 默认配置
This commit is contained in:
2026-04-05 20:23:05 +08:00
parent 2592a1f09d
commit b5dd427920
41 changed files with 1260 additions and 600 deletions

View File

@@ -41,6 +41,10 @@
<groupId>com.easyagents</groupId>
<artifactId>easy-agents-support</artifactId>
</dependency>
<dependency>
<groupId>com.easyagents</groupId>
<artifactId>easy-agents-rag-retrieval</artifactId>
</dependency>
<dependency>
<groupId>com.easyagents</groupId>
<artifactId>easy-agents-spring-boot-starter</artifactId>

View File

@@ -1,35 +1,10 @@
package tech.easyflow.ai.config;
import com.easyagents.engine.es.ESConfig;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.boot.context.properties.ConfigurationProperties;
import org.springframework.stereotype.Component;
@Component
@ConfigurationProperties(prefix = "rag.searcher.elastic")
public class AiEsConfig extends ESConfig {
@Value("${rag.searcher.elastic.host}")
@Override
public void setHost(String host) {
super.setHost(host);
}
@Value("${rag.searcher.elastic.userName}")
@Override
public void setUserName(String userName) {
super.setUserName(userName);
}
@Value("${rag.searcher.elastic.password}")
@Override
public void setPassword(String password) {
super.setPassword(password);
}
@Value("${rag.searcher.elastic.indexName}")
@Override
public void setIndexName(String indexName) {
super.setIndexName(indexName);
}
}

View File

@@ -1,17 +1,10 @@
package tech.easyflow.ai.config;
import com.easyagents.search.engine.lucene.LuceneConfig;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.boot.context.properties.ConfigurationProperties;
import org.springframework.stereotype.Component;
@Component
@ConfigurationProperties(prefix = "rag.searcher.lucene")
public class AiLuceneConfig extends LuceneConfig {
@Value("${rag.searcher.lucene.indexDirPath}")
@Override
public void setIndexDirPath(String indexDirPath) {
super.setIndexDirPath(indexDirPath);
}
}

View File

@@ -0,0 +1,22 @@
package tech.easyflow.ai.config;
import com.easyagents.store.milvus.MilvusVectorStoreConfig;
import org.springframework.boot.context.properties.ConfigurationProperties;
import org.springframework.stereotype.Component;
@Component
@ConfigurationProperties(prefix = "rag.milvus")
public class AiMilvusConfig extends MilvusVectorStoreConfig {
public MilvusVectorStoreConfig copyForCollection(String collectionName) {
MilvusVectorStoreConfig config = new MilvusVectorStoreConfig();
config.setUri(getUri());
config.setToken(getToken());
config.setDatabaseName(getDatabaseName());
config.setUsername(getUsername());
config.setPassword(getPassword());
config.setAutoCreateCollection(isAutoCreateCollection());
config.setDefaultCollectionName(collectionName);
return config;
}
}

View File

@@ -0,0 +1,103 @@
package tech.easyflow.ai.config;
import com.easyagents.engine.es.ElasticSearcher;
import com.easyagents.search.engine.service.DocumentSearcher;
import com.easyagents.store.milvus.MilvusVectorStore;
import org.springframework.beans.factory.SmartInitializingSingleton;
import org.springframework.stereotype.Component;
import tech.easyflow.ai.rag.KeywordEngineType;
import tech.easyflow.common.util.SpringContextUtil;
import tech.easyflow.common.util.StringUtil;
import tech.easyflow.common.web.exceptions.BusinessException;
import javax.annotation.Resource;
import java.io.File;
@Component
public class RagInfrastructureValidator implements SmartInitializingSingleton {
private static final int STARTUP_CHECK_RETRY_TIMES = 10;
private static final long STARTUP_CHECK_RETRY_INTERVAL_MS = 1000L;
@Resource
private AiMilvusConfig aiMilvusConfig;
@Resource
private AiLuceneConfig aiLuceneConfig;
@Resource
private SearcherFactory searcherFactory;
@Override
public void afterSingletonsInstantiated() {
validateMilvus();
validateKeywordSearcher();
}
private void validateMilvus() {
Exception lastException = null;
for (int i = 0; i < STARTUP_CHECK_RETRY_TIMES; i++) {
try {
MilvusVectorStore vectorStore = new MilvusVectorStore(aiMilvusConfig.copyForCollection("__rag_boot_probe__"));
if (vectorStore.checkAvailable()) {
return;
}
} catch (Exception e) {
lastException = e;
}
sleepBeforeRetry();
}
if (lastException != null) {
throw new BusinessException("Milvus 服务不可用,项目启动失败,请检查 rag.milvus 配置与服务状态: " + lastException.getMessage());
}
throw new BusinessException("Milvus 服务不可用,项目启动失败,请检查 rag.milvus 配置与服务状态");
}
private void validateKeywordSearcher() {
KeywordEngineType engineType = KeywordEngineType.from(
SpringContextUtil.getProperty("rag.engine", "ES")
);
if (engineType == KeywordEngineType.LUCENE) {
validateLuceneDirectory();
return;
}
DocumentSearcher searcher = searcherFactory.getSearcher();
if (!(searcher instanceof ElasticSearcher) || !checkElasticAvailable((ElasticSearcher) searcher)) {
throw new BusinessException("ES 服务不可用,项目启动失败,请检查 rag.engine 与 rag.searcher.elastic 配置");
}
}
private boolean checkElasticAvailable(ElasticSearcher elasticSearcher) {
for (int i = 0; i < STARTUP_CHECK_RETRY_TIMES; i++) {
if (elasticSearcher.checkAvailable()) {
return true;
}
sleepBeforeRetry();
}
return false;
}
private void validateLuceneDirectory() {
String indexDirPath = aiLuceneConfig.getIndexDirPath();
if (StringUtil.noText(indexDirPath)) {
throw new BusinessException("Lucene 索引目录未配置,请检查 rag.searcher.lucene.indexDirPath");
}
File indexDir = new File(indexDirPath);
if (!indexDir.exists() && !indexDir.mkdirs()) {
throw new BusinessException("Lucene 索引目录创建失败: " + indexDirPath);
}
if (!indexDir.isDirectory() || !indexDir.canRead() || !indexDir.canWrite()) {
throw new BusinessException("Lucene 索引目录不可读写: " + indexDirPath);
}
}
private void sleepBeforeRetry() {
try {
Thread.sleep(STARTUP_CHECK_RETRY_INTERVAL_MS);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new BusinessException("中间件启动校验被中断");
}
}
}

View File

@@ -3,40 +3,37 @@ package tech.easyflow.ai.config;
import com.easyagents.engine.es.ElasticSearcher;
import com.easyagents.search.engine.lucene.LuceneSearcher;
import com.easyagents.search.engine.service.DocumentSearcher;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.ObjectProvider;
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
@Configuration
public class SearcherFactory {
@Autowired
private AiLuceneConfig luceneConfig;
private final ObjectProvider<DocumentSearcher> documentSearcherProvider;
@Autowired
private AiEsConfig aiEsConfig;
public SearcherFactory(ObjectProvider<DocumentSearcher> documentSearcherProvider) {
this.documentSearcherProvider = documentSearcherProvider;
}
@Bean
public LuceneSearcher luceneSearcher() {
@ConditionalOnProperty(prefix = "rag", name = "engine", havingValue = "LUCENE")
public LuceneSearcher luceneSearcher(AiLuceneConfig luceneConfig) {
return new LuceneSearcher(luceneConfig);
}
@Bean
public ElasticSearcher elasticSearcher() {
@ConditionalOnProperty(prefix = "rag", name = "engine", havingValue = "ES", matchIfMissing = true)
public ElasticSearcher elasticSearcher(AiEsConfig aiEsConfig) {
return new ElasticSearcher(aiEsConfig);
}
public DocumentSearcher getSearcher() {
return documentSearcherProvider.getIfAvailable();
}
public DocumentSearcher getSearcher(String defaultSearcherType) {
if (defaultSearcherType == null) {
defaultSearcherType = "lucene";
}
switch (defaultSearcherType) {
case "elasticSearch":
return new ElasticSearcher(aiEsConfig);
case "lucene":
default:
return new LuceneSearcher(luceneConfig);
}
public DocumentSearcher getSearcher(String ignored) {
return getSearcher();
}
}

View File

@@ -0,0 +1,32 @@
package tech.easyflow.ai.dto;
import com.easyagents.rag.retrieval.RetrievalMode;
import tech.easyflow.ai.rag.KnowledgeRetrievalModes;
import java.math.BigInteger;
public class BotKnowledgeBindingRequest {
private BigInteger knowledgeId;
private String retrievalMode;
public BigInteger getKnowledgeId() {
return knowledgeId;
}
public void setKnowledgeId(BigInteger knowledgeId) {
this.knowledgeId = knowledgeId;
}
public String getRetrievalMode() {
return retrievalMode;
}
public void setRetrievalMode(String retrievalMode) {
this.retrievalMode = retrievalMode;
}
public RetrievalMode resolveRetrievalMode() {
return KnowledgeRetrievalModes.parse(retrievalMode);
}
}

View File

@@ -0,0 +1,59 @@
package tech.easyflow.ai.dto;
public class KnowledgeSearchResultItem {
private Integer sorting;
private String content;
private Double score;
private String hitSource;
private Double vectorScore;
private Double keywordScore;
public Integer getSorting() {
return sorting;
}
public void setSorting(Integer sorting) {
this.sorting = sorting;
}
public String getContent() {
return content;
}
public void setContent(String content) {
this.content = content;
}
public Double getScore() {
return score;
}
public void setScore(Double score) {
this.score = score;
}
public String getHitSource() {
return hitSource;
}
public void setHitSource(String hitSource) {
this.hitSource = hitSource;
}
public Double getVectorScore() {
return vectorScore;
}
public void setVectorScore(Double vectorScore) {
this.vectorScore = vectorScore;
}
public Double getKeywordScore() {
return keywordScore;
}
public void setKeywordScore(Double keywordScore) {
this.keywordScore = keywordScore;
}
}

View File

@@ -3,7 +3,9 @@ package tech.easyflow.ai.easyagents.tool;
import com.easyagents.core.document.Document;
import com.easyagents.core.model.chat.tool.BaseTool;
import com.easyagents.core.model.chat.tool.Parameter;
import com.easyagents.rag.retrieval.RetrievalMode;
import tech.easyflow.ai.entity.DocumentCollection;
import tech.easyflow.ai.rag.KnowledgeRetrievalRequest;
import tech.easyflow.ai.service.DocumentCollectionService;
import tech.easyflow.common.util.SpringContextUtil;
@@ -14,12 +16,18 @@ import java.util.Map;
public class DocumentCollectionTool extends BaseTool {
private BigInteger knowledgeId;
private RetrievalMode retrievalMode = RetrievalMode.HYBRID;
public DocumentCollectionTool() {
}
public DocumentCollectionTool(DocumentCollection documentCollection, boolean needEnglishName) {
this(documentCollection, needEnglishName, RetrievalMode.HYBRID);
}
public DocumentCollectionTool(DocumentCollection documentCollection, boolean needEnglishName, RetrievalMode retrievalMode) {
this.knowledgeId = documentCollection.getId();
this.retrievalMode = retrievalMode == null ? RetrievalMode.HYBRID : retrievalMode;
if (needEnglishName) {
this.name = documentCollection.getEnglishName();
} else {
@@ -47,11 +55,25 @@ public class DocumentCollectionTool extends BaseTool {
this.knowledgeId = knowledgeId;
}
public RetrievalMode getRetrievalMode() {
return retrievalMode;
}
public void setRetrievalMode(RetrievalMode retrievalMode) {
this.retrievalMode = retrievalMode == null ? RetrievalMode.HYBRID : retrievalMode;
}
@Override
public Object invoke(Map<String, Object> argsMap) {
DocumentCollectionService knowledgeService = SpringContextUtil.getBean(DocumentCollectionService.class);
List<Document> documents = knowledgeService.search(this.knowledgeId, (String) argsMap.get("input"));
KnowledgeRetrievalRequest request = new KnowledgeRetrievalRequest();
request.setKnowledgeId(this.knowledgeId);
request.setQuery((String) argsMap.get("input"));
request.setRetrievalMode(this.retrievalMode);
request.setCallerType("BOT_TOOL");
request.setCallerId(this.knowledgeId == null ? null : this.knowledgeId.toString());
List<Document> documents = knowledgeService.search(request);
StringBuilder sb = new StringBuilder();
if (documents != null) {

View File

@@ -7,6 +7,8 @@ import com.easyagents.flow.core.knowledge.Knowledge;
import com.easyagents.flow.core.knowledge.KnowledgeProvider;
import com.easyagents.flow.core.node.KnowledgeNode;
import org.springframework.stereotype.Component;
import tech.easyflow.ai.rag.KnowledgeRetrievalRequest;
import tech.easyflow.ai.rag.KnowledgeRetrievalModes;
import tech.easyflow.ai.service.DocumentCollectionService;
import javax.annotation.Resource;
@@ -30,7 +32,17 @@ public class KnowledgeProviderImpl implements KnowledgeProvider {
return new Knowledge() {
@Override
public List<Map<String, Object>> search(String keyword, int limit, KnowledgeNode knowledgeNode, Chain chain) {
List<Document> documents = documentCollectionService.search(new BigInteger(id.toString()), keyword);
KnowledgeRetrievalRequest request = new KnowledgeRetrievalRequest();
request.setKnowledgeId(new BigInteger(id.toString()));
request.setQuery(keyword);
request.setLimit(limit);
request.setRetrievalMode(KnowledgeRetrievalModes.parse(knowledgeNode.getRetrievalMode()));
request.setCallerType("WORKFLOW");
request.setCallerId(knowledgeNode.getId());
List<Document> documents = documentCollectionService.search(request);
if (limit > 0 && documents.size() > limit) {
documents = new ArrayList<>(documents.subList(0, limit));
}
List<Map<String, Object>> res = new ArrayList<>();
for (Document document : documents) {
res.add(JSONObject.from(document));

View File

@@ -1,9 +1,14 @@
package tech.easyflow.ai.entity;
import com.easyagents.rag.retrieval.RetrievalMode;
import tech.easyflow.ai.entity.base.BotDocumentCollectionBase;
import tech.easyflow.ai.rag.KnowledgeRetrievalModes;
import com.mybatisflex.annotation.RelationOneToOne;
import com.mybatisflex.annotation.Table;
import java.util.HashMap;
import java.util.Map;
/**
* 实体类。
*
@@ -14,6 +19,8 @@ import com.mybatisflex.annotation.Table;
@Table("tb_bot_document_collection")
public class BotDocumentCollection extends BotDocumentCollectionBase {
public static final String OPTION_KEY_RETRIEVAL_MODE = "retrievalMode";
@RelationOneToOne(selfField = "documentCollectionId", targetField = "id")
private DocumentCollection knowledge;
@@ -24,4 +31,21 @@ public class BotDocumentCollection extends BotDocumentCollectionBase {
public void setKnowledge(DocumentCollection knowledge) {
this.knowledge = knowledge;
}
public RetrievalMode getRetrievalMode() {
Map<String, Object> options = getOptions();
if (options == null) {
return RetrievalMode.HYBRID;
}
Object value = options.get(OPTION_KEY_RETRIEVAL_MODE);
return KnowledgeRetrievalModes.parse(value == null ? null : String.valueOf(value));
}
public void setRetrievalMode(RetrievalMode retrievalMode) {
Map<String, Object> options = getOptions() == null
? new HashMap<>()
: new HashMap<>(getOptions());
options.put(OPTION_KEY_RETRIEVAL_MODE, (retrievalMode == null ? RetrievalMode.HYBRID : retrievalMode).name());
setOptions(options);
}
}

View File

@@ -2,24 +2,16 @@ package tech.easyflow.ai.entity;
import com.easyagents.core.model.chat.tool.Tool;
import com.easyagents.core.store.DocumentStore;
import com.easyagents.store.aliyun.AliyunVectorStore;
import com.easyagents.store.aliyun.AliyunVectorStoreConfig;
import com.easyagents.store.elasticsearch.ElasticSearchVectorStore;
import com.easyagents.store.elasticsearch.ElasticSearchVectorStoreConfig;
import com.easyagents.rag.retrieval.RetrievalMode;
import com.easyagents.store.milvus.MilvusVectorStore;
import com.easyagents.store.milvus.MilvusVectorStoreConfig;
import com.easyagents.store.opensearch.OpenSearchVectorStore;
import com.easyagents.store.opensearch.OpenSearchVectorStoreConfig;
import com.easyagents.store.qcloud.QCloudVectorStore;
import com.easyagents.store.qcloud.QCloudVectorStoreConfig;
import com.easyagents.store.redis.RedisVectorStore;
import com.easyagents.store.redis.RedisVectorStoreConfig;
import com.mybatisflex.annotation.Table;
import tech.easyflow.ai.config.AiMilvusConfig;
import tech.easyflow.ai.easyagents.tool.DocumentCollectionTool;
import tech.easyflow.ai.entity.base.DocumentCollectionBase;
import tech.easyflow.common.util.PropertiesUtil;
import tech.easyflow.ai.rag.KnowledgeRetrievalModes;
import tech.easyflow.common.util.SpringContextUtil;
import tech.easyflow.common.util.StringUtil;
import tech.easyflow.common.web.exceptions.BusinessException;
import tech.easyflow.system.permission.resource.VisibilityResource;
import java.math.BigDecimal;
@@ -58,11 +50,6 @@ public class DocumentCollection extends DocumentCollectionBase implements Visibi
*/
public static final String KEY_SIMILARITY_THRESHOLD = "simThreshold";
/**
* 搜索引擎类型
*/
public static final String KEY_SEARCH_ENGINE_TYPE = "searchEngineType";
/**
* 是否允许更新向量模型
*/
@@ -78,28 +65,10 @@ public class DocumentCollection extends DocumentCollectionBase implements Visibi
public static final String KEY_SPLITTER_STRATEGY_PROFILES = "splitter.strategyProfiles";
public DocumentStore toDocumentStore() {
String storeType = this.getVectorStoreType();
if (StringUtil.noText(storeType)) {
throw new BusinessException("向量数据库类型未设置");
}
if (storeType == null) {
if (StringUtil.noText(this.getVectorStoreCollection())) {
return null;
}
switch (storeType.toLowerCase()) {
case "redis":
return redisStore();
case "milvus":
return milvusStore();
case "opensearch":
return openSearchStore();
case "elasticsearch":
return elasticSearchStore();
case "aliyun":
return aliyunStore();
case "qcloud":
return qcloudStore();
}
return null;
return milvusStore();
}
public boolean isVectorStoreEnabled() {
@@ -115,53 +84,31 @@ public class DocumentCollection extends DocumentCollectionBase implements Visibi
}
public boolean isSearchEngineEnabled() {
return this.getSearchEngineEnable() != null && this.getSearchEngineEnable();
}
private DocumentStore redisStore() {
RedisVectorStoreConfig redisVectorStoreConfig = getStoreConfig(RedisVectorStoreConfig.class);
return new RedisVectorStore(redisVectorStoreConfig);
return true;
}
private DocumentStore milvusStore() {
MilvusVectorStoreConfig milvusVectorStoreConfig = getStoreConfig(MilvusVectorStoreConfig.class);
if (milvusVectorStoreConfig != null && StringUtil.noText(milvusVectorStoreConfig.getDefaultCollectionName())) {
milvusVectorStoreConfig.setDefaultCollectionName(this.getVectorStoreCollection());
}
AiMilvusConfig aiMilvusConfig = SpringContextUtil.getBean(AiMilvusConfig.class);
MilvusVectorStoreConfig milvusVectorStoreConfig = aiMilvusConfig.copyForCollection(this.getVectorStoreCollection());
return new MilvusVectorStore(milvusVectorStoreConfig);
}
private DocumentStore openSearchStore() {
OpenSearchVectorStoreConfig openSearchVectorStoreConfig = getStoreConfig(OpenSearchVectorStoreConfig.class);
return new OpenSearchVectorStore(openSearchVectorStoreConfig);
}
private DocumentStore elasticSearchStore() {
ElasticSearchVectorStoreConfig elasticSearchVectorStoreConfig = getStoreConfig(ElasticSearchVectorStoreConfig.class);
return new ElasticSearchVectorStore(elasticSearchVectorStoreConfig);
}
private DocumentStore aliyunStore() {
AliyunVectorStoreConfig aliyunVectorStoreConfig = getStoreConfig(AliyunVectorStoreConfig.class);
return new AliyunVectorStore(aliyunVectorStoreConfig);
}
private DocumentStore qcloudStore() {
QCloudVectorStoreConfig qCloudVectorStoreConfig = getStoreConfig(QCloudVectorStoreConfig.class);
return new QCloudVectorStore(qCloudVectorStoreConfig);
}
private <T> T getStoreConfig(Class<T> clazz) {
return PropertiesUtil.propertiesTextToEntity(this.getVectorStoreConfig(), clazz);
}
public Tool toFunction(boolean needEnglishName) {
return new DocumentCollectionTool(this, needEnglishName);
return toFunction(needEnglishName, RetrievalMode.HYBRID.name());
}
public Tool toFunction(boolean needEnglishName, String retrievalMode) {
return new DocumentCollectionTool(this, needEnglishName, KnowledgeRetrievalModes.parse(retrievalMode));
}
public Object getOptionsByKey(String key) {
Map<String, Object> options = this.getOptions();
if (KEY_DOC_RECALL_MAX_NUM.equals(key) && (options == null || !options.containsKey(KEY_DOC_RECALL_MAX_NUM))) {
return 5;
}
if (KEY_SIMILARITY_THRESHOLD.equals(key) && (options == null || !options.containsKey(KEY_SIMILARITY_THRESHOLD))) {
return 0.6f;
}
if (KEY_RERANK_ENABLE.equals(key)) {
if (options == null || !options.containsKey(KEY_RERANK_ENABLE)) {
return this.getRerankModelId() != null;
@@ -182,21 +129,9 @@ public class DocumentCollection extends DocumentCollectionBase implements Visibi
if (options == null) {
return null;
}
if (KEY_DOC_RECALL_MAX_NUM.equals(key) && !options.containsKey(KEY_DOC_RECALL_MAX_NUM)) {
return 5;
}
if (KEY_SIMILARITY_THRESHOLD.equals(key)) {
if (!options.containsKey(KEY_SIMILARITY_THRESHOLD)) {
return 0.6f;
} else {
BigDecimal score = (BigDecimal) options.get(key);
return (float) score.doubleValue();
}
}
if (KEY_SEARCH_ENGINE_TYPE.equals(key)) {
if (!options.containsKey(KEY_SEARCH_ENGINE_TYPE)) {
return "lucene";
}
BigDecimal score = (BigDecimal) options.get(key);
return (float) score.doubleValue();
}
return options.get(key);
}

View File

@@ -0,0 +1,21 @@
package tech.easyflow.ai.rag;
import tech.easyflow.common.util.StringUtil;
import tech.easyflow.common.web.exceptions.BusinessException;
public enum KeywordEngineType {
ES,
LUCENE;
public static KeywordEngineType from(String value) {
if (StringUtil.noText(value)) {
return ES;
}
for (KeywordEngineType type : values()) {
if (type.name().equalsIgnoreCase(value.trim())) {
return type;
}
}
throw new BusinessException("不支持的关键词检索引擎: " + value);
}
}

View File

@@ -0,0 +1,18 @@
package tech.easyflow.ai.rag;
import com.easyagents.rag.retrieval.RetrievalMode;
import tech.easyflow.common.web.exceptions.BusinessException;
public final class KnowledgeRetrievalModes {
private KnowledgeRetrievalModes() {
}
public static RetrievalMode parse(String value) {
try {
return RetrievalMode.from(value);
} catch (IllegalArgumentException e) {
throw new BusinessException("不支持的检索方式: " + value);
}
}
}

View File

@@ -0,0 +1,63 @@
package tech.easyflow.ai.rag;
import com.easyagents.rag.retrieval.RetrievalMode;
import java.math.BigInteger;
public class KnowledgeRetrievalRequest {
private BigInteger knowledgeId;
private String query;
private Integer limit;
private RetrievalMode retrievalMode = RetrievalMode.HYBRID;
private String callerType;
private String callerId;
public BigInteger getKnowledgeId() {
return knowledgeId;
}
public void setKnowledgeId(BigInteger knowledgeId) {
this.knowledgeId = knowledgeId;
}
public String getQuery() {
return query;
}
public void setQuery(String query) {
this.query = query;
}
public Integer getLimit() {
return limit;
}
public void setLimit(Integer limit) {
this.limit = limit;
}
public RetrievalMode getRetrievalMode() {
return retrievalMode;
}
public void setRetrievalMode(RetrievalMode retrievalMode) {
this.retrievalMode = retrievalMode == null ? RetrievalMode.HYBRID : retrievalMode;
}
public String getCallerType() {
return callerType;
}
public void setCallerType(String callerType) {
this.callerType = callerType;
}
public String getCallerId() {
return callerId;
}
public void setCallerId(String callerId) {
this.callerId = callerId;
}
}

View File

@@ -1,5 +1,6 @@
package tech.easyflow.ai.service;
import tech.easyflow.ai.dto.BotKnowledgeBindingRequest;
import tech.easyflow.ai.entity.BotDocumentCollection;
import com.mybatisflex.core.service.IService;
@@ -16,5 +17,5 @@ public interface BotDocumentCollectionService extends IService<BotDocumentCollec
List<BotDocumentCollection> listByBotId(BigInteger botId);
void saveBotAndKnowledge(BigInteger botId, BigInteger[] knowledgeIds);
void saveBotAndKnowledge(BigInteger botId, List<BotKnowledgeBindingRequest> knowledgeBindings);
}

View File

@@ -2,6 +2,7 @@ package tech.easyflow.ai.service;
import com.easyagents.core.document.Document;
import tech.easyflow.ai.entity.DocumentCollection;
import tech.easyflow.ai.rag.KnowledgeRetrievalRequest;
import com.mybatisflex.core.service.IService;
import java.math.BigInteger;
@@ -17,6 +18,8 @@ public interface DocumentCollectionService extends IService<DocumentCollection>
List<Document> search(BigInteger id, String keyword);
List<Document> search(KnowledgeRetrievalRequest request);
DocumentCollection getDetail(String idOrAlias);
DocumentCollection getByAlias(String idOrAlias);

View File

@@ -1,5 +1,7 @@
package tech.easyflow.ai.service.impl;
import com.easyagents.rag.retrieval.RetrievalMode;
import tech.easyflow.ai.dto.BotKnowledgeBindingRequest;
import tech.easyflow.ai.entity.BotDocumentCollection;
import tech.easyflow.ai.mapper.BotDocumentCollectionMapper;
import tech.easyflow.ai.service.BotDocumentCollectionService;
@@ -11,9 +13,10 @@ import tech.easyflow.common.cache.RedisLockExecutor;
import javax.annotation.Resource;
import java.math.BigInteger;
import java.util.ArrayList;
import java.util.LinkedHashSet;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Set;
import java.util.Map;
import java.time.Duration;
import com.mybatisflex.core.query.QueryWrapper;
@@ -44,26 +47,30 @@ public class BotDocumentCollectionServiceImpl extends ServiceImpl<BotDocumentCol
@Override
@Transactional
public void saveBotAndKnowledge(BigInteger botId, BigInteger[] knowledgeIds) {
public void saveBotAndKnowledge(BigInteger botId, List<BotKnowledgeBindingRequest> knowledgeBindings) {
redisLockExecutor.executeWithLock(BOT_BINDING_LOCK_KEY_PREFIX + botId, LOCK_WAIT_TIMEOUT, LOCK_LEASE_TIMEOUT, () -> {
this.remove(QueryWrapper.create().eq(BotDocumentCollection::getBotId, botId));
Set<BigInteger> uniqueKnowledgeIds = new LinkedHashSet<>();
if (knowledgeIds != null) {
for (BigInteger knowledgeId : knowledgeIds) {
if (knowledgeId != null) {
uniqueKnowledgeIds.add(knowledgeId);
Map<BigInteger, RetrievalMode> bindingMap = new LinkedHashMap<>();
if (knowledgeBindings != null) {
for (BotKnowledgeBindingRequest binding : knowledgeBindings) {
if (binding == null || binding.getKnowledgeId() == null) {
continue;
}
bindingMap.put(binding.getKnowledgeId(), binding.resolveRetrievalMode());
}
}
if (uniqueKnowledgeIds.isEmpty()) {
if (bindingMap.isEmpty()) {
return;
}
List<BotDocumentCollection> list = new ArrayList<>(uniqueKnowledgeIds.size());
for (BigInteger knowledgeId : uniqueKnowledgeIds) {
List<BotDocumentCollection> list = new ArrayList<>(bindingMap.size());
for (Map.Entry<BigInteger, RetrievalMode> entry : bindingMap.entrySet()) {
BotDocumentCollection botDocumentCollection = new BotDocumentCollection();
botDocumentCollection.setBotId(botId);
botDocumentCollection.setDocumentCollectionId(knowledgeId);
botDocumentCollection.setDocumentCollectionId(entry.getKey());
Map<String, Object> options = new HashMap<>();
options.put(BotDocumentCollection.OPTION_KEY_RETRIEVAL_MODE, entry.getValue().name());
botDocumentCollection.setOptions(options);
list.add(botDocumentCollection);
}
this.saveBatch(list);

View File

@@ -401,7 +401,8 @@ public class BotServiceImpl extends ServiceImpl<BotMapper, Bot> implements BotSe
.selectListWithRelationsByQuery(queryWrapper);
if (botDocumentCollections != null && !botDocumentCollections.isEmpty()) {
for (BotDocumentCollection botDocumentCollection : botDocumentCollections) {
Tool function = botDocumentCollection.getKnowledge().toFunction(needEnglishName);
Tool function = botDocumentCollection.getKnowledge()
.toFunction(needEnglishName, botDocumentCollection.getRetrievalMode().name());
functionList.add(function);
}
}

View File

@@ -12,8 +12,6 @@ import org.springframework.stereotype.Service;
import java.math.BigInteger;
import static tech.easyflow.ai.entity.DocumentCollection.KEY_SEARCH_ENGINE_TYPE;
/**
* 服务层实现。
*
@@ -28,10 +26,9 @@ public class DocumentChunkServiceImpl extends ServiceImpl<DocumentChunkMapper, D
@Override
public boolean removeChunk(DocumentCollection knowledge, BigInteger chunkId) {
String searchEngineType = (String) knowledge.getOptionsByKey(KEY_SEARCH_ENGINE_TYPE);
DocumentSearcher searcher = searcherFactory.getSearcher(searchEngineType);
DocumentSearcher searcher = searcherFactory.getSearcher();
// 删除搜索引擎中的数据
if (searcherFactory.getSearcher(searchEngineType) == null){
if (searcher == null){
return true;
}
return searcher.deleteDocument(chunkId);

View File

@@ -1,13 +1,23 @@
package tech.easyflow.ai.service.impl;
import com.easyagents.core.document.Document;
import com.easyagents.core.model.rerank.RerankException;
import com.easyagents.core.model.rerank.RerankModel;
import com.easyagents.core.store.DocumentStore;
import com.easyagents.core.store.SearchWrapper;
import com.easyagents.core.store.StoreOptions;
import com.easyagents.rag.retrieval.HitSource;
import com.easyagents.rag.retrieval.KeywordRetriever;
import com.easyagents.rag.retrieval.RagHit;
import com.easyagents.rag.retrieval.RagQuery;
import com.easyagents.rag.retrieval.RagRetrievalExecutor;
import com.easyagents.rag.retrieval.RagScoreNormalizer;
import com.easyagents.rag.retrieval.RagRetrievalResult;
import com.easyagents.rag.retrieval.RetrievalMode;
import com.easyagents.rag.retrieval.RrfFusionStrategy;
import com.easyagents.rag.retrieval.VectorRetriever;
import com.easyagents.search.engine.service.DocumentSearcher;
import com.easyagents.search.engine.service.KeywordSearchRequest;
import com.mybatisflex.core.query.QueryWrapper;
import com.mybatisflex.spring.service.impl.ServiceImpl;
import org.slf4j.Logger;
@@ -22,7 +32,7 @@ import tech.easyflow.ai.entity.Model;
import tech.easyflow.ai.mapper.DocumentChunkMapper;
import tech.easyflow.ai.mapper.DocumentCollectionMapper;
import tech.easyflow.ai.mapper.FaqItemMapper;
import tech.easyflow.ai.service.DocumentChunkService;
import tech.easyflow.ai.rag.KnowledgeRetrievalRequest;
import tech.easyflow.ai.service.DocumentCollectionService;
import tech.easyflow.ai.service.ModelService;
import tech.easyflow.ai.utils.CustomBeanUtils;
@@ -35,12 +45,18 @@ import java.io.Serializable;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.math.RoundingMode;
import java.util.*;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutionException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.stream.Collectors;
import static tech.easyflow.ai.entity.DocumentCollection.*;
import static tech.easyflow.ai.entity.DocumentCollection.KEY_DOC_RECALL_MAX_NUM;
import static tech.easyflow.ai.entity.DocumentCollection.KEY_RERANK_ENABLE;
import static tech.easyflow.ai.entity.DocumentCollection.KEY_SIMILARITY_THRESHOLD;
/**
* 服务层实现。
@@ -57,9 +73,6 @@ public class DocumentCollectionServiceImpl extends ServiceImpl<DocumentCollectio
@Resource
private ModelService llmService;
@Resource
private DocumentChunkService chunkService;
@Autowired
private SearcherFactory searcherFactory;
@@ -69,14 +82,96 @@ public class DocumentCollectionServiceImpl extends ServiceImpl<DocumentCollectio
@Autowired
private FaqItemMapper faqItemMapper;
@Override
public List<Document> search(BigInteger id, String keyword) {
DocumentCollection documentCollection = getById(id);
KnowledgeRetrievalRequest request = new KnowledgeRetrievalRequest();
request.setKnowledgeId(id);
request.setQuery(keyword);
request.setRetrievalMode(RetrievalMode.HYBRID);
return search(request);
}
@Override
public List<Document> search(KnowledgeRetrievalRequest request) {
if (request == null || request.getKnowledgeId() == null) {
throw new BusinessException("知识库ID不能为空");
}
String keyword = request.getQuery();
if (StringUtil.noText(keyword)) {
return Collections.emptyList();
}
RetrievalMode retrievalMode = request.getRetrievalMode() == null
? RetrievalMode.HYBRID
: request.getRetrievalMode();
DocumentCollection documentCollection = getById(request.getKnowledgeId());
if (documentCollection == null) {
throw new BusinessException("知识库不存在");
}
int docRecallMaxNum = readIntegerOption(documentCollection, KEY_DOC_RECALL_MAX_NUM, 5);
float minSimilarity = readFloatOption(documentCollection, KEY_SIMILARITY_THRESHOLD, 0.6F);
RagQuery ragQuery = new RagQuery();
ragQuery.setQuery(keyword);
ragQuery.setRetrievalMode(retrievalMode);
ragQuery.setTopK(docRecallMaxNum);
ragQuery.setMinScore((double) minSimilarity);
RagRetrievalExecutor retrievalExecutor = new RagRetrievalExecutor(
buildVectorRetriever(documentCollection, docRecallMaxNum, retrievalMode == RetrievalMode.VECTOR ? minSimilarity : null),
buildKeywordRetriever(documentCollection, docRecallMaxNum),
new RrfFusionStrategy()
);
RagRetrievalResult retrievalResult = retrievalExecutor.retrieve(ragQuery);
List<Document> searchDocuments = toDocuments(retrievalResult.getHits());
fillSearchContent(documentCollection, searchDocuments);
if (searchDocuments.isEmpty()) {
return Collections.emptyList();
}
RerankModel rerankModel = resolveRerankModel(documentCollection);
boolean reranked = false;
if (rerankModel != null) {
try {
RagRetrievalResult rerankResult = retrievalExecutor.rerank(keyword, toRagHits(searchDocuments), rerankModel, docRecallMaxNum);
searchDocuments = toDocuments(rerankResult.getHits());
reranked = true;
} catch (RerankException e) {
LOG.warn("Rerank failed for collectionId={}, modelId={}, fallback to retrieved results. message={}",
documentCollection.getId(), documentCollection.getRerankModelId(), e.getMessage());
}
}
RagScoreNormalizer.normalize(searchDocuments, retrievalMode, reranked);
return formatDocuments(searchDocuments, shouldApplyMinSimilarityFilter(retrievalMode, reranked), minSimilarity, docRecallMaxNum);
}
private VectorRetriever buildVectorRetriever(DocumentCollection documentCollection,
int docRecallMaxNum,
Float minSimilarity) {
return new VectorRetriever() {
@Override
public List<RagHit> retrieve(RagQuery query) {
return adaptDocuments(searchVectorDocuments(documentCollection, query.getQuery(), docRecallMaxNum, minSimilarity), HitSource.VECTOR);
}
};
}
private KeywordRetriever buildKeywordRetriever(DocumentCollection documentCollection, int docRecallMaxNum) {
return new KeywordRetriever() {
@Override
public List<RagHit> retrieve(RagQuery query) {
return adaptDocuments(searchKeywordDocuments(documentCollection, query.getQuery(), docRecallMaxNum), HitSource.KEYWORD);
}
};
}
private List<Document> searchVectorDocuments(DocumentCollection documentCollection,
String keyword,
int docRecallMaxNum,
Float minSimilarity) {
DocumentStore documentStore = documentCollection.toDocumentStore();
if (documentStore == null) {
throw new BusinessException("知识库没有配置向量库");
@@ -88,88 +183,92 @@ public class DocumentCollectionServiceImpl extends ServiceImpl<DocumentCollectio
}
documentStore.setEmbeddingModel(model.toEmbeddingModel());
// 最大召回知识条数
Integer docRecallMaxNum = (Integer) documentCollection.getOptionsByKey(KEY_DOC_RECALL_MAX_NUM);
// 最低相似度
float minSimilarity = (float) documentCollection.getOptionsByKey(KEY_SIMILARITY_THRESHOLD);
SearchWrapper wrapper = new SearchWrapper();
wrapper.setMaxResults(docRecallMaxNum);
wrapper.setMinScore((double) minSimilarity);
if (minSimilarity != null) {
wrapper.setMinScore((double) minSimilarity);
}
wrapper.setText(keyword);
StoreOptions options = StoreOptions.ofCollectionName(documentCollection.getVectorStoreCollection());
options.setIndexName(documentCollection.getVectorStoreCollection());
List<Document> documents = documentStore.search(wrapper, options);
return documents == null ? Collections.<Document>emptyList() : documents;
}
// 并行查询:向量库 + 搜索引擎
CompletableFuture<List<Document>> vectorFuture = CompletableFuture.supplyAsync(() ->
documentStore.search(wrapper, options)
);
CompletableFuture<List<Document>> searcherFuture = CompletableFuture.supplyAsync(() -> {
DocumentSearcher searcher = searcherFactory.getSearcher((String) documentCollection.getOptionsByKey(KEY_SEARCH_ENGINE_TYPE));
if (searcher == null || !documentCollection.isSearchEngineEnabled()) {
return Collections.emptyList();
}
List<Document> documents = searcher.searchDocuments(keyword);
return documents == null ? Collections.emptyList() : documents;
});
// 合并两个查询结果
CompletableFuture<Map<String, Document>> combinedFuture = vectorFuture.thenCombine(
searcherFuture,
(vectorDocs, searcherDocs) -> {
Map<String, Document> uniqueDocs = new HashMap<>();
vectorDocs.forEach(doc -> uniqueDocs.putIfAbsent(doc.getId().toString(), doc));
searcherDocs.forEach(doc -> uniqueDocs.putIfAbsent(doc.getId().toString(), doc));
return uniqueDocs;
}
);
try {
Map<String, Document> uniqueDocs = combinedFuture.get(); // 阻塞等待所有查询完成
List<Document> searchDocuments = new ArrayList<>(uniqueDocs.values());
searchDocuments.sort((doc1, doc2) -> Double.compare(doc2.getScore(), doc1.getScore()));
fillSearchContent(documentCollection, searchDocuments);
if (searchDocuments.isEmpty()) {
return Collections.emptyList();
}
boolean rerankEnable = Boolean.TRUE.equals(documentCollection.getOptionsByKey(KEY_RERANK_ENABLE));
if (!rerankEnable || documentCollection.getRerankModelId() == null) {
return formatDocuments(searchDocuments, minSimilarity, docRecallMaxNum);
}
Model modelRerank = llmService.getModelInstance(documentCollection.getRerankModelId());
if (modelRerank == null) {
return formatDocuments(searchDocuments, minSimilarity, docRecallMaxNum);
}
RerankModel rerankModel = modelRerank.toRerankModel();
if (rerankModel == null) {
return formatDocuments(searchDocuments, minSimilarity, docRecallMaxNum);
}
Map<Object, Double> originalScores = new HashMap<>();
searchDocuments.forEach(item -> originalScores.put(item.getId(), item.getScore()));
searchDocuments.forEach(item -> item.setScore(null));
try {
List<Document> rerankDocuments = rerankModel.rerank(keyword, searchDocuments);
return formatDocuments(rerankDocuments, minSimilarity, docRecallMaxNum);
} catch (RerankException e) {
searchDocuments.forEach(item -> item.setScore(originalScores.get(item.getId())));
LOG.warn("Rerank failed for collectionId={}, modelId={}, fallback to vector results. message={}",
documentCollection.getId(), documentCollection.getRerankModelId(), e.getMessage());
return formatDocuments(searchDocuments, minSimilarity, docRecallMaxNum);
}
} catch (InterruptedException | ExecutionException e) {
Thread.currentThread().interrupt();
e.printStackTrace();
throw new RuntimeException(e);
private List<Document> searchKeywordDocuments(DocumentCollection documentCollection, String keyword, int docRecallMaxNum) {
DocumentSearcher searcher = searcherFactory.getSearcher();
if (searcher == null) {
return Collections.emptyList();
}
KeywordSearchRequest request = KeywordSearchRequest.of(keyword, docRecallMaxNum);
request.setKnowledgeId(documentCollection == null || documentCollection.getId() == null
? null
: documentCollection.getId().toString());
List<Document> documents = searcher.searchDocuments(request);
return documents == null ? Collections.<Document>emptyList() : documents;
}
private List<RagHit> adaptDocuments(List<Document> documents, HitSource hitSource) {
List<RagHit> hits = new ArrayList<>();
if (documents == null) {
return hits;
}
for (Document document : documents) {
RagHit hit = RagHit.fromDocument(document, hitSource);
if (hit != null) {
hits.add(hit);
}
}
return hits;
}
private List<Document> toDocuments(List<RagHit> hits) {
List<Document> documents = new ArrayList<>();
if (hits == null) {
return documents;
}
for (RagHit hit : hits) {
if (hit == null) {
continue;
}
documents.add(hit.toDocument());
}
return documents;
}
private List<RagHit> toRagHits(List<Document> documents) {
List<RagHit> hits = new ArrayList<>();
if (documents == null) {
return hits;
}
for (Document document : documents) {
RagHit hit = RagHit.fromDocument(document);
if (hit != null) {
hits.add(hit);
}
}
return hits;
}
private RerankModel resolveRerankModel(DocumentCollection documentCollection) {
boolean rerankEnable = Boolean.TRUE.equals(documentCollection.getOptionsByKey(KEY_RERANK_ENABLE));
if (!rerankEnable || documentCollection.getRerankModelId() == null) {
return null;
}
Model modelRerank = llmService.getModelInstance(documentCollection.getRerankModelId());
if (modelRerank == null) {
return null;
}
return modelRerank.toRerankModel();
}
private boolean shouldApplyMinSimilarityFilter(RetrievalMode retrievalMode, boolean reranked) {
return !reranked && retrievalMode == RetrievalMode.VECTOR;
}
@Override
public DocumentCollection getDetail(String idOrAlias) {
DocumentCollection knowledge = null;
if (idOrAlias.matches(RegexUtils.ALL_NUMBER)) {
@@ -188,15 +287,11 @@ public class DocumentCollectionServiceImpl extends ServiceImpl<DocumentCollectio
@Override
public DocumentCollection getByAlias(String idOrAlias) {
QueryWrapper queryWrapper = QueryWrapper.create();
queryWrapper.eq(DocumentCollection::getAlias, idOrAlias);
return getOne(queryWrapper);
}
@Override
public boolean updateById(DocumentCollection entity) {
DocumentCollection documentCollection = getById(entity.getId());
@@ -210,38 +305,29 @@ public class DocumentCollectionServiceImpl extends ServiceImpl<DocumentCollectio
documentCollection.setAlias(null);
}
return super.updateById(documentCollection, false);
}
/**
* 格式化文档列表
*
* @param documents 文档列表
* @param minSimilarity 最小相似度
* @return 格式化后的文档列表
*/
public List<Document> formatDocuments(List<Document> documents, float minSimilarity, int maxResults) {
public List<Document> formatDocuments(List<Document> documents,
boolean applyMinSimilarity,
float minSimilarity,
int maxResults) {
return documents.stream()
// 过滤掉分数为空 或 分数低于最小值的文档
.filter(document -> {
Double score = document.getScore();
return score != null && score >= minSimilarity;
})
// 格式化保留四位小数
.map(document -> {
Double score = document.getScore();
BigDecimal bd = new BigDecimal(score.toString());
bd = bd.setScale(4, RoundingMode.HALF_UP);
Double roundedScore = bd.doubleValue();
document.setScore(roundedScore);
return document;
})
// 按score降序排序分数最高的排前面
.sorted(Comparator.comparing(Document::getScore, Comparator.reverseOrder()))
// 限制只保留前maxResults条
.limit(maxResults)
.collect(Collectors.toList());
.filter(Objects::nonNull)
.filter(document -> !applyMinSimilarity
|| (document.getScore() != null && document.getScore() >= minSimilarity))
.map(this::roundDocumentScore)
.sorted(Comparator.comparing(Document::getScore, Comparator.nullsLast(Comparator.reverseOrder())))
.limit(maxResults)
.collect(Collectors.toList());
}
private Document roundDocumentScore(Document document) {
if (document == null || document.getScore() == null) {
return document;
}
document.setScore(roundDouble(document.getScore()));
return document;
}
private void fillSearchContent(DocumentCollection documentCollection, List<Document> searchDocuments) {
@@ -249,33 +335,42 @@ public class DocumentCollectionServiceImpl extends ServiceImpl<DocumentCollectio
return;
}
List<Serializable> ids = searchDocuments.stream()
.map(item -> (Serializable) item.getId())
.collect(Collectors.toList());
.map(item -> (Serializable) item.getId())
.collect(Collectors.toList());
if (documentCollection.isFaqCollection()) {
Map<String, FaqItem> faqItemMap = faqItemMapper.selectListByIds(ids).stream()
.collect(Collectors.toMap(item -> item.getId().toString(), item -> item, (a, b) -> a));
QueryWrapper queryWrapper = QueryWrapper.create();
queryWrapper.in(FaqItem::getId, ids);
queryWrapper.eq(FaqItem::getCollectionId, documentCollection.getId());
Map<String, FaqItem> faqItemMap = faqItemMapper.selectListByQuery(queryWrapper).stream()
.collect(Collectors.toMap(item -> item.getId().toString(), item -> item, (a, b) -> a));
searchDocuments.removeIf(item -> !faqItemMap.containsKey(String.valueOf(item.getId())));
searchDocuments.forEach(item -> {
FaqItem faqItem = faqItemMap.get(String.valueOf(item.getId()));
if (faqItem != null) {
List<Map<String, String>> faqImages = readFaqImages(faqItem);
item.setContent(buildFaqPromptContent(faqItem, faqImages));
Map<String, Object> metadataMap = item.getMetadataMap() == null
? new HashMap<>()
: new HashMap<>(item.getMetadataMap());
List<String> imageUrls = faqImages.stream()
.map(image -> image.get("url"))
.filter(Objects::nonNull)
.collect(Collectors.toList());
metadataMap.put("imageUrls", imageUrls);
item.setMetadataMap(metadataMap);
if (faqItem == null) {
return;
}
List<Map<String, String>> faqImages = readFaqImages(faqItem);
item.setContent(buildFaqPromptContent(faqItem, faqImages));
Map<String, Object> metadataMap = item.getMetadataMap() == null
? new HashMap<String, Object>()
: new HashMap<String, Object>(item.getMetadataMap());
List<String> imageUrls = faqImages.stream()
.map(image -> image.get("url"))
.filter(Objects::nonNull)
.collect(Collectors.toList());
metadataMap.put("imageUrls", imageUrls);
item.setMetadataMap(metadataMap);
});
return;
}
Map<String, DocumentChunk> chunkMap = documentChunkMapper.selectListByIds(ids).stream()
.collect(Collectors.toMap(item -> item.getId().toString(), item -> item, (a, b) -> a));
QueryWrapper queryWrapper = QueryWrapper.create();
queryWrapper.in(DocumentChunk::getId, ids);
queryWrapper.eq(DocumentChunk::getDocumentCollectionId, documentCollection.getId());
Map<String, DocumentChunk> chunkMap = documentChunkMapper.selectListByQuery(queryWrapper).stream()
.collect(Collectors.toMap(item -> item.getId().toString(), item -> item, (a, b) -> a));
searchDocuments.removeIf(item -> !chunkMap.containsKey(String.valueOf(item.getId())));
searchDocuments.forEach(item -> {
DocumentChunk documentChunk = chunkMap.get(String.valueOf(item.getId()));
if (documentChunk != null && !StringUtil.noText(documentChunk.getContent())) {
@@ -354,4 +449,43 @@ public class DocumentCollectionServiceImpl extends ServiceImpl<DocumentCollectio
String text = String.valueOf(value).trim();
return text.isEmpty() ? null : text;
}
private int readIntegerOption(DocumentCollection documentCollection, String key, int defaultValue) {
Object value = documentCollection.getOptionsByKey(key);
if (value instanceof Number) {
return ((Number) value).intValue();
}
if (value instanceof String && StringUtil.hasText((String) value)) {
try {
return Integer.parseInt((String) value);
} catch (NumberFormatException ignore) {
return defaultValue;
}
}
return defaultValue;
}
private float readFloatOption(DocumentCollection documentCollection, String key, float defaultValue) {
Object value = documentCollection.getOptionsByKey(key);
if (value instanceof Number) {
return ((Number) value).floatValue();
}
if (value instanceof String && StringUtil.hasText((String) value)) {
try {
return Float.parseFloat((String) value);
} catch (NumberFormatException ignore) {
return defaultValue;
}
}
return defaultValue;
}
private Double roundDouble(Double value) {
if (value == null) {
return null;
}
return new BigDecimal(String.valueOf(value))
.setScale(4, RoundingMode.HALF_UP)
.doubleValue();
}
}

View File

@@ -19,6 +19,7 @@ import com.easyagents.rag.ingestion.RagIngestionService;
import com.easyagents.rag.ingestion.model.AnalysisResult;
import com.easyagents.rag.ingestion.model.StrategyConfig;
import com.easyagents.search.engine.service.DocumentSearcher;
import com.easyagents.search.engine.service.KeywordSearchMetadataKeys;
import com.mybatisflex.core.keygen.impl.FlexIDKeyGenerator;
import com.mybatisflex.core.paginate.Page;
import com.mybatisflex.core.query.QueryMethods;
@@ -56,7 +57,6 @@ import java.util.*;
import java.util.concurrent.atomic.AtomicInteger;
import static tech.easyflow.ai.entity.DocumentCollection.KEY_CAN_UPDATE_EMBEDDING_MODEL;
import static tech.easyflow.ai.entity.DocumentCollection.KEY_SEARCH_ENGINE_TYPE;
import static tech.easyflow.ai.entity.table.DocumentChunkTableDef.DOCUMENT_CHUNK;
import static tech.easyflow.ai.entity.table.DocumentTableDef.DOCUMENT;
@@ -157,8 +157,8 @@ public class DocumentServiceImpl extends ServiceImpl<DocumentMapper, Document> i
List<BigInteger> chunkIds = documentChunkMapper.selectListByQueryAs(queryWrapper, BigInteger.class);
documentStore.delete(chunkIds, options);
// 删除搜索引擎中的数据
if (searcherFactory.getSearcher((String) knowledge.getOptionsByKey(KEY_SEARCH_ENGINE_TYPE)) != null) {
DocumentSearcher searcher = searcherFactory.getSearcher((String) knowledge.getOptionsByKey(KEY_SEARCH_ENGINE_TYPE));
DocumentSearcher searcher = searcherFactory.getSearcher();
if (searcher != null) {
chunkIds.forEach(searcher::deleteDocument);
}
int ck = documentChunkMapper.deleteByQuery(QueryWrapper.create().eq(DocumentChunk::getDocumentId, id));
@@ -691,9 +691,7 @@ public class DocumentServiceImpl extends ServiceImpl<DocumentMapper, Document> i
options.setIndexName(options.getCollectionName());
DocumentSearcher searcher = null;
if (knowledge.isSearchEngineEnabled()) {
searcher = searcherFactory.getSearcher((String) knowledge.getOptionsByKey(KEY_SEARCH_ENGINE_TYPE));
}
searcher = searcherFactory.getSearcher();
return new StoreExecutionContext(knowledge, model, embeddingModel, documentStore, options, searcher);
}
@@ -703,6 +701,8 @@ public class DocumentServiceImpl extends ServiceImpl<DocumentMapper, Document> i
com.easyagents.core.document.Document document = new com.easyagents.core.document.Document();
document.setId(item.getId());
document.setContent(item.getContent());
document.addMetadata(KeywordSearchMetadataKeys.KNOWLEDGE_ID,
storeContext.knowledge.getId() == null ? null : storeContext.knowledge.getId().toString());
documents.add(document);
}

View File

@@ -15,6 +15,7 @@ import com.easyagents.core.store.DocumentStore;
import com.easyagents.core.store.StoreOptions;
import com.easyagents.core.store.StoreResult;
import com.easyagents.search.engine.service.DocumentSearcher;
import com.easyagents.search.engine.service.KeywordSearchMetadataKeys;
import com.mybatisflex.core.query.QueryWrapper;
import com.mybatisflex.spring.service.impl.ServiceImpl;
import org.jsoup.Jsoup;
@@ -52,7 +53,6 @@ import java.util.*;
import java.util.function.Supplier;
import static tech.easyflow.ai.entity.DocumentCollection.KEY_CAN_UPDATE_EMBEDDING_MODEL;
import static tech.easyflow.ai.entity.DocumentCollection.KEY_SEARCH_ENGINE_TYPE;
@Service
public class FaqItemServiceImpl extends ServiceImpl<FaqItemMapper, FaqItem> implements FaqItemService {
@@ -356,14 +356,12 @@ public class FaqItemServiceImpl extends ServiceImpl<FaqItemMapper, FaqItem> impl
throw new BusinessException("FAQ向量化失败");
}
if (collection.isSearchEngineEnabled()) {
DocumentSearcher searcher = searcherFactory.getSearcher((String) collection.getOptionsByKey(KEY_SEARCH_ENGINE_TYPE));
if (searcher != null) {
if (isUpdate) {
searcher.deleteDocument(entity.getId());
}
searcher.addDocument(doc);
DocumentSearcher searcher = searcherFactory.getSearcher();
if (searcher != null) {
if (isUpdate) {
searcher.deleteDocument(entity.getId());
}
searcher.addDocument(doc);
}
markCollectionEmbedded(collection, preparedStore.embeddingModel);
}
@@ -375,14 +373,11 @@ public class FaqItemServiceImpl extends ServiceImpl<FaqItemMapper, FaqItem> impl
throw new BusinessException("FAQ向量删除失败");
}
if (collection.isSearchEngineEnabled()) {
DocumentSearcher searcher = searcherFactory.getSearcher((String) collection.getOptionsByKey(KEY_SEARCH_ENGINE_TYPE));
if (searcher != null) {
boolean removed = searcher.deleteDocument(entity.getId());
if (!removed) {
LOG.warn("Delete faq search index failed. faqId={}, searcherType={}",
entity.getId(), collection.getOptionsByKey(KEY_SEARCH_ENGINE_TYPE));
}
DocumentSearcher searcher = searcherFactory.getSearcher();
if (searcher != null) {
boolean removed = searcher.deleteDocument(entity.getId());
if (!removed) {
LOG.warn("Delete faq search index failed. faqId={}", entity.getId());
}
}
}
@@ -443,6 +438,7 @@ public class FaqItemServiceImpl extends ServiceImpl<FaqItemMapper, FaqItem> impl
metadata.put("answerText", entity.getAnswerText());
metadata.put("categoryId", entity.getCategoryId());
metadata.put("imageUrls", readImageUrls(entity.getOptions()));
metadata.put(KeywordSearchMetadataKeys.KNOWLEDGE_ID, entity.getCollectionId() == null ? null : entity.getCollectionId().toString());
doc.setMetadataMap(metadata);
return doc;
}