feat: 下沉知识库检索编排能力
- 新增 rag retrieval 核心协议、RRF 融合与相关度归一化 - 支持关键词检索按 knowledgeId 过滤并补充 ES/Lucene 单测 - 扩展 KnowledgeNode 检索模式与 Milvus 检索参数透传
This commit is contained in:
@@ -37,6 +37,11 @@
|
||||
<artifactId>jackson-databind</artifactId>
|
||||
<version>2.15.2</version> <!-- 或与Elasticsearch客户端兼容的版本 -->
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
</project>
|
||||
|
||||
@@ -4,12 +4,15 @@ import co.elastic.clients.elasticsearch.ElasticsearchClient;
|
||||
import co.elastic.clients.elasticsearch.core.*;
|
||||
import co.elastic.clients.elasticsearch.core.bulk.BulkOperation;
|
||||
import co.elastic.clients.elasticsearch.core.bulk.IndexOperation;
|
||||
import co.elastic.clients.elasticsearch.core.search.SourceConfig;
|
||||
import co.elastic.clients.json.JsonData;
|
||||
import co.elastic.clients.json.jackson.JacksonJsonpMapper;
|
||||
import co.elastic.clients.transport.ElasticsearchTransport;
|
||||
import co.elastic.clients.transport.rest_client.RestClientTransport;
|
||||
import com.easyagents.core.document.Document;
|
||||
import com.easyagents.search.engine.service.DocumentSearcher;
|
||||
import com.easyagents.search.engine.service.KeywordSearchMetadataKeys;
|
||||
import com.easyagents.search.engine.service.KeywordSearchRequest;
|
||||
import org.apache.http.HttpHost;
|
||||
import org.apache.http.auth.AuthScope;
|
||||
import org.apache.http.auth.UsernamePasswordCredentials;
|
||||
@@ -88,13 +91,7 @@ public class ElasticSearcher implements DocumentSearcher {
|
||||
transport = new RestClientTransport(restClient, new JacksonJsonpMapper());
|
||||
ElasticsearchClient client = new ElasticsearchClient(transport);
|
||||
|
||||
Map<String, Object> source = new HashMap<>();
|
||||
source.put("id", document.getId());
|
||||
source.put("content", document.getContent());
|
||||
if (document.getTitle() != null) {
|
||||
source.put("title", document.getTitle());
|
||||
}
|
||||
|
||||
Map<String, Object> source = buildSource(document);
|
||||
String documentId = document.getId().toString();
|
||||
IndexOperation<?> indexOp = IndexOperation.of(i -> i
|
||||
.index(esConfig.getIndexName())
|
||||
@@ -116,7 +113,7 @@ public class ElasticSearcher implements DocumentSearcher {
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<Document> searchDocuments(String keyword, int count) {
|
||||
public List<Document> searchDocuments(KeywordSearchRequest request) {
|
||||
RestClient restClient = null;
|
||||
ElasticsearchTransport transport = null;
|
||||
|
||||
@@ -125,21 +122,16 @@ public class ElasticSearcher implements DocumentSearcher {
|
||||
transport = new RestClientTransport(restClient, new JacksonJsonpMapper());
|
||||
ElasticsearchClient client = new ElasticsearchClient(transport);
|
||||
|
||||
SearchRequest request = SearchRequest.of(s -> s
|
||||
.index(esConfig.getIndexName())
|
||||
.size(count)
|
||||
.query(q -> q
|
||||
.match(m -> m
|
||||
.field("title")
|
||||
.field("content")
|
||||
.query(keyword)
|
||||
)
|
||||
)
|
||||
);
|
||||
|
||||
SearchResponse<Document> response = client.search(request, Document.class);
|
||||
SearchResponse<Map> response = client.search(buildSearchRequest(request), Map.class);
|
||||
List<Document> results = new ArrayList<>();
|
||||
response.hits().hits().forEach(hit -> results.add(hit.source()));
|
||||
response.hits().hits().forEach(hit -> {
|
||||
Map source = hit.source();
|
||||
Document document = toDocument(hit.id(), source, hit.score());
|
||||
if (document == null) {
|
||||
return;
|
||||
}
|
||||
results.add(document);
|
||||
});
|
||||
return results;
|
||||
|
||||
} catch (Exception e) {
|
||||
@@ -193,14 +185,17 @@ public class ElasticSearcher implements DocumentSearcher {
|
||||
transport = new RestClientTransport(restClient, new JacksonJsonpMapper());
|
||||
ElasticsearchClient client = new ElasticsearchClient(transport);
|
||||
|
||||
UpdateRequest<Document, Object> request = UpdateRequest.of(u -> u
|
||||
UpdateRequest<Map<String, Object>, Map<String, Object>> request = UpdateRequest.of(u -> u
|
||||
.index(esConfig.getIndexName())
|
||||
.id(document.getId().toString())
|
||||
.doc(document)
|
||||
.doc(buildSource(document))
|
||||
);
|
||||
|
||||
UpdateResponse<Document> response = client.update(request, Object.class);
|
||||
return response.result() == co.elastic.clients.elasticsearch._types.Result.Updated;
|
||||
@SuppressWarnings("unchecked")
|
||||
Class<Map<String, Object>> documentClass = (Class<Map<String, Object>>) (Class<?>) Map.class;
|
||||
UpdateResponse<Map<String, Object>> response = client.update(request, documentClass);
|
||||
return response.result() == co.elastic.clients.elasticsearch._types.Result.Updated
|
||||
|| response.result() == co.elastic.clients.elasticsearch._types.Result.NoOp;
|
||||
} catch (Exception e) {
|
||||
LOG.error("Error updating document with id: " + document.getId(), e);
|
||||
return false;
|
||||
@@ -220,4 +215,88 @@ public class ElasticSearcher implements DocumentSearcher {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
private Document toDocument(String hitId, Map source, Double score) {
|
||||
if (source == null || source.isEmpty()) {
|
||||
return null;
|
||||
}
|
||||
|
||||
Document document = new Document();
|
||||
Object id = source.get("id");
|
||||
document.setId(id != null ? id : hitId);
|
||||
|
||||
Object title = source.get("title");
|
||||
if (title != null) {
|
||||
document.setTitle(String.valueOf(title));
|
||||
}
|
||||
|
||||
Object content = source.get("content");
|
||||
if (content != null) {
|
||||
document.setContent(String.valueOf(content));
|
||||
}
|
||||
|
||||
Object metadataMap = source.get("metadataMap");
|
||||
if (metadataMap instanceof Map<?, ?>) {
|
||||
document.setMetadataMap(new HashMap<>((Map<String, Object>) metadataMap));
|
||||
}
|
||||
|
||||
document.setScore(score);
|
||||
return document;
|
||||
}
|
||||
|
||||
Map<String, Object> buildSource(Document document) {
|
||||
Map<String, Object> source = new HashMap<String, Object>();
|
||||
source.put("id", document.getId());
|
||||
source.put("content", document.getContent());
|
||||
if (document.getTitle() != null) {
|
||||
source.put("title", document.getTitle());
|
||||
}
|
||||
if (document.getMetadataMap() != null && !document.getMetadataMap().isEmpty()) {
|
||||
source.put("metadataMap", new HashMap<String, Object>(document.getMetadataMap()));
|
||||
Object knowledgeId = document.getMetadata(KeywordSearchMetadataKeys.KNOWLEDGE_ID);
|
||||
if (knowledgeId != null) {
|
||||
source.put(KeywordSearchMetadataKeys.KNOWLEDGE_ID, String.valueOf(knowledgeId));
|
||||
}
|
||||
}
|
||||
return source;
|
||||
}
|
||||
|
||||
SearchRequest buildSearchRequest(KeywordSearchRequest request) {
|
||||
KeywordSearchRequest effectiveRequest = request == null ? new KeywordSearchRequest() : request;
|
||||
return SearchRequest.of(s -> s
|
||||
.index(esConfig.getIndexName())
|
||||
.size(effectiveRequest.getCount())
|
||||
.source(SourceConfig.of(sc -> sc.filter(f -> f.includes("id", "title", "content", "metadataMap"))))
|
||||
.query(q -> q.bool(b -> {
|
||||
b.must(m -> m.multiMatch(mm -> mm
|
||||
.query(effectiveRequest.getKeyword())
|
||||
.fields("title", "content")
|
||||
));
|
||||
if (effectiveRequest.getKnowledgeId() != null && !effectiveRequest.getKnowledgeId().trim().isEmpty()) {
|
||||
b.filter(f -> f.term(t -> t
|
||||
.field(KeywordSearchMetadataKeys.KNOWLEDGE_ID)
|
||||
.value(v -> v.stringValue(effectiveRequest.getKnowledgeId().trim()))
|
||||
));
|
||||
}
|
||||
return b;
|
||||
}))
|
||||
);
|
||||
}
|
||||
|
||||
public boolean checkAvailable() {
|
||||
RestClient restClient = null;
|
||||
ElasticsearchTransport transport = null;
|
||||
try {
|
||||
restClient = buildRestClient();
|
||||
transport = new RestClientTransport(restClient, new JacksonJsonpMapper());
|
||||
ElasticsearchClient client = new ElasticsearchClient(transport);
|
||||
return client.info() != null;
|
||||
} catch (Exception e) {
|
||||
LOG.error("Elasticsearch availability check failed", e);
|
||||
return false;
|
||||
} finally {
|
||||
closeResources(transport, restClient);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,54 @@
|
||||
package com.easyagents.engine.es;
|
||||
|
||||
import co.elastic.clients.elasticsearch.core.SearchRequest;
|
||||
import com.easyagents.core.document.Document;
|
||||
import com.easyagents.search.engine.service.KeywordSearchMetadataKeys;
|
||||
import com.easyagents.search.engine.service.KeywordSearchRequest;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
public class ElasticSearcherQueryBuilderTest {
|
||||
|
||||
@Test
|
||||
public void shouldBuildSearchRequestWithMultiMatchAndKnowledgeFilter() {
|
||||
ElasticSearcher searcher = new ElasticSearcher(config());
|
||||
KeywordSearchRequest request = KeywordSearchRequest.of("客服", 5);
|
||||
request.setKnowledgeId("100");
|
||||
|
||||
SearchRequest searchRequest = searcher.buildSearchRequest(request);
|
||||
|
||||
Assert.assertEquals(5, searchRequest.size().intValue());
|
||||
Assert.assertNotNull(searchRequest.query().bool());
|
||||
Assert.assertEquals(1, searchRequest.query().bool().must().size());
|
||||
Assert.assertNotNull(searchRequest.query().bool().must().get(0).multiMatch());
|
||||
Assert.assertEquals(2, searchRequest.query().bool().must().get(0).multiMatch().fields().size());
|
||||
Assert.assertEquals(1, searchRequest.query().bool().filter().size());
|
||||
Assert.assertEquals("knowledgeId", searchRequest.query().bool().filter().get(0).term().field());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void shouldExtractKnowledgeIdToTopLevelSource() {
|
||||
ElasticSearcher searcher = new ElasticSearcher(config());
|
||||
Document document = new Document();
|
||||
document.setId("1");
|
||||
document.setTitle("title");
|
||||
document.setContent("content");
|
||||
document.addMetadata(KeywordSearchMetadataKeys.KNOWLEDGE_ID, "100");
|
||||
|
||||
Map<String, Object> source = searcher.buildSource(document);
|
||||
|
||||
Assert.assertEquals("100", source.get(KeywordSearchMetadataKeys.KNOWLEDGE_ID));
|
||||
Assert.assertTrue(source.get("metadataMap") instanceof Map);
|
||||
}
|
||||
|
||||
private ESConfig config() {
|
||||
ESConfig config = new ESConfig();
|
||||
config.setHost("http://127.0.0.1:9200");
|
||||
config.setUserName("elastic");
|
||||
config.setPassword("elastic");
|
||||
config.setIndexName("easyflow");
|
||||
return config;
|
||||
}
|
||||
}
|
||||
@@ -51,5 +51,10 @@
|
||||
<groupId>com.easyagents</groupId>
|
||||
<artifactId>easy-agents-search-engine-service</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</project>
|
||||
|
||||
@@ -17,6 +17,8 @@ package com.easyagents.search.engine.lucene;
|
||||
|
||||
import com.easyagents.core.document.Document;
|
||||
import com.easyagents.search.engine.service.DocumentSearcher;
|
||||
import com.easyagents.search.engine.service.KeywordSearchMetadataKeys;
|
||||
import com.easyagents.search.engine.service.KeywordSearchRequest;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.StringField;
|
||||
@@ -78,7 +80,7 @@ public class LuceneSearcher implements DocumentSearcher {
|
||||
if (document.getTitle() != null) {
|
||||
luceneDoc.add(new TextField("title", document.getTitle(), Field.Store.YES));
|
||||
}
|
||||
|
||||
appendKnowledgeId(document, luceneDoc);
|
||||
|
||||
indexWriter.addDocument(luceneDoc);
|
||||
indexWriter.commit();
|
||||
@@ -127,7 +129,7 @@ public class LuceneSearcher implements DocumentSearcher {
|
||||
if (document.getTitle() != null) {
|
||||
luceneDoc.add(new TextField("title", document.getTitle(), Field.Store.YES));
|
||||
}
|
||||
|
||||
appendKnowledgeId(document, luceneDoc);
|
||||
indexWriter.updateDocument(term, luceneDoc);
|
||||
indexWriter.commit();
|
||||
return true;
|
||||
@@ -140,18 +142,21 @@ public class LuceneSearcher implements DocumentSearcher {
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<Document> searchDocuments(String keyword, int count) {
|
||||
public List<Document> searchDocuments(KeywordSearchRequest request) {
|
||||
List<Document> results = new ArrayList<>();
|
||||
try (IndexReader reader = DirectoryReader.open(directory)) {
|
||||
IndexSearcher searcher = new IndexSearcher(reader);
|
||||
Query query = buildQuery(keyword);
|
||||
TopDocs topDocs = searcher.search(query, count);
|
||||
Query query = buildQuery(request);
|
||||
TopDocs topDocs = searcher.search(query, request == null ? 10 : request.getCount());
|
||||
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
|
||||
org.apache.lucene.document.Document doc = searcher.doc(scoreDoc.doc);
|
||||
Document resultDoc = new Document();
|
||||
resultDoc.setId(doc.get("id"));
|
||||
resultDoc.setContent(doc.get("content"));
|
||||
resultDoc.setTitle(doc.get("title"));
|
||||
if (doc.get(KeywordSearchMetadataKeys.KNOWLEDGE_ID) != null) {
|
||||
resultDoc.addMetadata(KeywordSearchMetadataKeys.KNOWLEDGE_ID, doc.get(KeywordSearchMetadataKeys.KNOWLEDGE_ID));
|
||||
}
|
||||
|
||||
resultDoc.setScore((double) scoreDoc.score);
|
||||
|
||||
@@ -164,9 +169,10 @@ public class LuceneSearcher implements DocumentSearcher {
|
||||
return results;
|
||||
}
|
||||
|
||||
private static Query buildQuery(String keyword) {
|
||||
Query buildQuery(KeywordSearchRequest request) {
|
||||
try {
|
||||
Analyzer analyzer = createAnalyzer();
|
||||
String keyword = request == null ? null : request.getKeyword();
|
||||
|
||||
QueryParser titleQueryParser = new QueryParser("title", analyzer);
|
||||
Query titleQuery = titleQueryParser.parse(keyword);
|
||||
@@ -179,6 +185,9 @@ public class LuceneSearcher implements DocumentSearcher {
|
||||
BooleanQuery.Builder builder = new BooleanQuery.Builder();
|
||||
builder.add(titleBooleanClause)
|
||||
.add(contentBooleanClause);
|
||||
if (request != null && request.getKnowledgeId() != null && !request.getKnowledgeId().trim().isEmpty()) {
|
||||
builder.add(new TermQuery(new Term(KeywordSearchMetadataKeys.KNOWLEDGE_ID, request.getKnowledgeId().trim())), BooleanClause.Occur.MUST);
|
||||
}
|
||||
return builder.build();
|
||||
} catch (ParseException e) {
|
||||
LOG.error(e.toString(), e);
|
||||
@@ -200,6 +209,16 @@ public class LuceneSearcher implements DocumentSearcher {
|
||||
return new JcsegAnalyzer(ISegment.Type.NLP, config, DictionaryFactory.createSingletonDictionary(config));
|
||||
}
|
||||
|
||||
private void appendKnowledgeId(Document document, org.apache.lucene.document.Document luceneDoc) {
|
||||
if (document == null || document.getMetadataMap() == null) {
|
||||
return;
|
||||
}
|
||||
Object knowledgeId = document.getMetadata(KeywordSearchMetadataKeys.KNOWLEDGE_ID);
|
||||
if (knowledgeId != null) {
|
||||
luceneDoc.add(new StringField(KeywordSearchMetadataKeys.KNOWLEDGE_ID, String.valueOf(knowledgeId), Field.Store.YES));
|
||||
}
|
||||
}
|
||||
|
||||
public void close(IndexWriter indexWriter) {
|
||||
try {
|
||||
if (indexWriter != null) {
|
||||
|
||||
@@ -0,0 +1,45 @@
|
||||
package com.easyagents.search.engine.lucene;
|
||||
|
||||
import com.easyagents.core.document.Document;
|
||||
import com.easyagents.search.engine.service.KeywordSearchMetadataKeys;
|
||||
import com.easyagents.search.engine.service.KeywordSearchRequest;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.List;
|
||||
|
||||
public class LuceneSearcherTest {
|
||||
|
||||
@Test
|
||||
public void shouldFilterByKnowledgeIdAndSearchTitleAndContent() throws Exception {
|
||||
Path tempDir = Files.createTempDirectory("lucene-searcher-test");
|
||||
LuceneConfig config = new LuceneConfig();
|
||||
config.setIndexDirPath(tempDir.toString());
|
||||
LuceneSearcher searcher = new LuceneSearcher(config);
|
||||
|
||||
Document first = new Document();
|
||||
first.setId("1");
|
||||
first.setTitle("客服标题");
|
||||
first.setContent("这里没有关键字");
|
||||
first.addMetadata(KeywordSearchMetadataKeys.KNOWLEDGE_ID, "100");
|
||||
|
||||
Document second = new Document();
|
||||
second.setId("2");
|
||||
second.setTitle("别的知识库");
|
||||
second.setContent("客服内容");
|
||||
second.addMetadata(KeywordSearchMetadataKeys.KNOWLEDGE_ID, "200");
|
||||
|
||||
Assert.assertTrue(searcher.addDocument(first));
|
||||
Assert.assertTrue(searcher.addDocument(second));
|
||||
|
||||
KeywordSearchRequest request = KeywordSearchRequest.of("客服", 10);
|
||||
request.setKnowledgeId("100");
|
||||
List<Document> results = searcher.searchDocuments(request);
|
||||
|
||||
Assert.assertEquals(1, results.size());
|
||||
Assert.assertEquals("1", String.valueOf(results.get(0).getId()));
|
||||
Assert.assertEquals("100", String.valueOf(results.get(0).getMetadata(KeywordSearchMetadataKeys.KNOWLEDGE_ID)));
|
||||
}
|
||||
}
|
||||
@@ -28,8 +28,12 @@ public interface DocumentSearcher {
|
||||
boolean updateDocument(Document document);
|
||||
|
||||
default List<Document> searchDocuments(String keyword) {
|
||||
return searchDocuments(keyword, 10);
|
||||
return searchDocuments(KeywordSearchRequest.of(keyword, 10));
|
||||
}
|
||||
|
||||
List<Document> searchDocuments(String keyword, int count);
|
||||
default List<Document> searchDocuments(String keyword, int count) {
|
||||
return searchDocuments(KeywordSearchRequest.of(keyword, count));
|
||||
}
|
||||
|
||||
List<Document> searchDocuments(KeywordSearchRequest request);
|
||||
}
|
||||
|
||||
@@ -0,0 +1,9 @@
|
||||
package com.easyagents.search.engine.service;
|
||||
|
||||
public final class KeywordSearchMetadataKeys {
|
||||
|
||||
private KeywordSearchMetadataKeys() {
|
||||
}
|
||||
|
||||
public static final String KNOWLEDGE_ID = "knowledgeId";
|
||||
}
|
||||
@@ -0,0 +1,39 @@
|
||||
package com.easyagents.search.engine.service;
|
||||
|
||||
public class KeywordSearchRequest {
|
||||
|
||||
private String keyword;
|
||||
private int count = 10;
|
||||
private String knowledgeId;
|
||||
|
||||
public static KeywordSearchRequest of(String keyword, int count) {
|
||||
KeywordSearchRequest request = new KeywordSearchRequest();
|
||||
request.setKeyword(keyword);
|
||||
request.setCount(count);
|
||||
return request;
|
||||
}
|
||||
|
||||
public String getKeyword() {
|
||||
return keyword;
|
||||
}
|
||||
|
||||
public void setKeyword(String keyword) {
|
||||
this.keyword = keyword;
|
||||
}
|
||||
|
||||
public int getCount() {
|
||||
return count;
|
||||
}
|
||||
|
||||
public void setCount(int count) {
|
||||
this.count = count <= 0 ? 10 : count;
|
||||
}
|
||||
|
||||
public String getKnowledgeId() {
|
||||
return knowledgeId;
|
||||
}
|
||||
|
||||
public void setKnowledgeId(String knowledgeId) {
|
||||
this.knowledgeId = knowledgeId;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user