feat: 下沉知识库检索编排能力

- 新增 rag retrieval 核心协议、RRF 融合与相关度归一化

- 支持关键词检索按 knowledgeId 过滤并补充 ES/Lucene 单测

- 扩展 KnowledgeNode 检索模式与 Milvus 检索参数透传
This commit is contained in:
2026-04-05 20:22:59 +08:00
parent 941995d1b8
commit f57544daa2
28 changed files with 1309 additions and 34 deletions

View File

@@ -37,6 +37,11 @@
<artifactId>jackson-databind</artifactId>
<version>2.15.2</version> <!-- 或与Elasticsearch客户端兼容的版本 -->
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
</project>

View File

@@ -4,12 +4,15 @@ import co.elastic.clients.elasticsearch.ElasticsearchClient;
import co.elastic.clients.elasticsearch.core.*;
import co.elastic.clients.elasticsearch.core.bulk.BulkOperation;
import co.elastic.clients.elasticsearch.core.bulk.IndexOperation;
import co.elastic.clients.elasticsearch.core.search.SourceConfig;
import co.elastic.clients.json.JsonData;
import co.elastic.clients.json.jackson.JacksonJsonpMapper;
import co.elastic.clients.transport.ElasticsearchTransport;
import co.elastic.clients.transport.rest_client.RestClientTransport;
import com.easyagents.core.document.Document;
import com.easyagents.search.engine.service.DocumentSearcher;
import com.easyagents.search.engine.service.KeywordSearchMetadataKeys;
import com.easyagents.search.engine.service.KeywordSearchRequest;
import org.apache.http.HttpHost;
import org.apache.http.auth.AuthScope;
import org.apache.http.auth.UsernamePasswordCredentials;
@@ -88,13 +91,7 @@ public class ElasticSearcher implements DocumentSearcher {
transport = new RestClientTransport(restClient, new JacksonJsonpMapper());
ElasticsearchClient client = new ElasticsearchClient(transport);
Map<String, Object> source = new HashMap<>();
source.put("id", document.getId());
source.put("content", document.getContent());
if (document.getTitle() != null) {
source.put("title", document.getTitle());
}
Map<String, Object> source = buildSource(document);
String documentId = document.getId().toString();
IndexOperation<?> indexOp = IndexOperation.of(i -> i
.index(esConfig.getIndexName())
@@ -116,7 +113,7 @@ public class ElasticSearcher implements DocumentSearcher {
}
@Override
public List<Document> searchDocuments(String keyword, int count) {
public List<Document> searchDocuments(KeywordSearchRequest request) {
RestClient restClient = null;
ElasticsearchTransport transport = null;
@@ -125,21 +122,16 @@ public class ElasticSearcher implements DocumentSearcher {
transport = new RestClientTransport(restClient, new JacksonJsonpMapper());
ElasticsearchClient client = new ElasticsearchClient(transport);
SearchRequest request = SearchRequest.of(s -> s
.index(esConfig.getIndexName())
.size(count)
.query(q -> q
.match(m -> m
.field("title")
.field("content")
.query(keyword)
)
)
);
SearchResponse<Document> response = client.search(request, Document.class);
SearchResponse<Map> response = client.search(buildSearchRequest(request), Map.class);
List<Document> results = new ArrayList<>();
response.hits().hits().forEach(hit -> results.add(hit.source()));
response.hits().hits().forEach(hit -> {
Map source = hit.source();
Document document = toDocument(hit.id(), source, hit.score());
if (document == null) {
return;
}
results.add(document);
});
return results;
} catch (Exception e) {
@@ -193,14 +185,17 @@ public class ElasticSearcher implements DocumentSearcher {
transport = new RestClientTransport(restClient, new JacksonJsonpMapper());
ElasticsearchClient client = new ElasticsearchClient(transport);
UpdateRequest<Document, Object> request = UpdateRequest.of(u -> u
UpdateRequest<Map<String, Object>, Map<String, Object>> request = UpdateRequest.of(u -> u
.index(esConfig.getIndexName())
.id(document.getId().toString())
.doc(document)
.doc(buildSource(document))
);
UpdateResponse<Document> response = client.update(request, Object.class);
return response.result() == co.elastic.clients.elasticsearch._types.Result.Updated;
@SuppressWarnings("unchecked")
Class<Map<String, Object>> documentClass = (Class<Map<String, Object>>) (Class<?>) Map.class;
UpdateResponse<Map<String, Object>> response = client.update(request, documentClass);
return response.result() == co.elastic.clients.elasticsearch._types.Result.Updated
|| response.result() == co.elastic.clients.elasticsearch._types.Result.NoOp;
} catch (Exception e) {
LOG.error("Error updating document with id: " + document.getId(), e);
return false;
@@ -220,4 +215,88 @@ public class ElasticSearcher implements DocumentSearcher {
}
}
}
@SuppressWarnings("unchecked")
private Document toDocument(String hitId, Map source, Double score) {
if (source == null || source.isEmpty()) {
return null;
}
Document document = new Document();
Object id = source.get("id");
document.setId(id != null ? id : hitId);
Object title = source.get("title");
if (title != null) {
document.setTitle(String.valueOf(title));
}
Object content = source.get("content");
if (content != null) {
document.setContent(String.valueOf(content));
}
Object metadataMap = source.get("metadataMap");
if (metadataMap instanceof Map<?, ?>) {
document.setMetadataMap(new HashMap<>((Map<String, Object>) metadataMap));
}
document.setScore(score);
return document;
}
Map<String, Object> buildSource(Document document) {
Map<String, Object> source = new HashMap<String, Object>();
source.put("id", document.getId());
source.put("content", document.getContent());
if (document.getTitle() != null) {
source.put("title", document.getTitle());
}
if (document.getMetadataMap() != null && !document.getMetadataMap().isEmpty()) {
source.put("metadataMap", new HashMap<String, Object>(document.getMetadataMap()));
Object knowledgeId = document.getMetadata(KeywordSearchMetadataKeys.KNOWLEDGE_ID);
if (knowledgeId != null) {
source.put(KeywordSearchMetadataKeys.KNOWLEDGE_ID, String.valueOf(knowledgeId));
}
}
return source;
}
SearchRequest buildSearchRequest(KeywordSearchRequest request) {
KeywordSearchRequest effectiveRequest = request == null ? new KeywordSearchRequest() : request;
return SearchRequest.of(s -> s
.index(esConfig.getIndexName())
.size(effectiveRequest.getCount())
.source(SourceConfig.of(sc -> sc.filter(f -> f.includes("id", "title", "content", "metadataMap"))))
.query(q -> q.bool(b -> {
b.must(m -> m.multiMatch(mm -> mm
.query(effectiveRequest.getKeyword())
.fields("title", "content")
));
if (effectiveRequest.getKnowledgeId() != null && !effectiveRequest.getKnowledgeId().trim().isEmpty()) {
b.filter(f -> f.term(t -> t
.field(KeywordSearchMetadataKeys.KNOWLEDGE_ID)
.value(v -> v.stringValue(effectiveRequest.getKnowledgeId().trim()))
));
}
return b;
}))
);
}
public boolean checkAvailable() {
RestClient restClient = null;
ElasticsearchTransport transport = null;
try {
restClient = buildRestClient();
transport = new RestClientTransport(restClient, new JacksonJsonpMapper());
ElasticsearchClient client = new ElasticsearchClient(transport);
return client.info() != null;
} catch (Exception e) {
LOG.error("Elasticsearch availability check failed", e);
return false;
} finally {
closeResources(transport, restClient);
}
}
}

View File

@@ -0,0 +1,54 @@
package com.easyagents.engine.es;
import co.elastic.clients.elasticsearch.core.SearchRequest;
import com.easyagents.core.document.Document;
import com.easyagents.search.engine.service.KeywordSearchMetadataKeys;
import com.easyagents.search.engine.service.KeywordSearchRequest;
import org.junit.Assert;
import org.junit.Test;
import java.util.Map;
public class ElasticSearcherQueryBuilderTest {
@Test
public void shouldBuildSearchRequestWithMultiMatchAndKnowledgeFilter() {
ElasticSearcher searcher = new ElasticSearcher(config());
KeywordSearchRequest request = KeywordSearchRequest.of("客服", 5);
request.setKnowledgeId("100");
SearchRequest searchRequest = searcher.buildSearchRequest(request);
Assert.assertEquals(5, searchRequest.size().intValue());
Assert.assertNotNull(searchRequest.query().bool());
Assert.assertEquals(1, searchRequest.query().bool().must().size());
Assert.assertNotNull(searchRequest.query().bool().must().get(0).multiMatch());
Assert.assertEquals(2, searchRequest.query().bool().must().get(0).multiMatch().fields().size());
Assert.assertEquals(1, searchRequest.query().bool().filter().size());
Assert.assertEquals("knowledgeId", searchRequest.query().bool().filter().get(0).term().field());
}
@Test
public void shouldExtractKnowledgeIdToTopLevelSource() {
ElasticSearcher searcher = new ElasticSearcher(config());
Document document = new Document();
document.setId("1");
document.setTitle("title");
document.setContent("content");
document.addMetadata(KeywordSearchMetadataKeys.KNOWLEDGE_ID, "100");
Map<String, Object> source = searcher.buildSource(document);
Assert.assertEquals("100", source.get(KeywordSearchMetadataKeys.KNOWLEDGE_ID));
Assert.assertTrue(source.get("metadataMap") instanceof Map);
}
private ESConfig config() {
ESConfig config = new ESConfig();
config.setHost("http://127.0.0.1:9200");
config.setUserName("elastic");
config.setPassword("elastic");
config.setIndexName("easyflow");
return config;
}
}