初始化
This commit is contained in:
49
easy-agents-store/easy-agents-store-elasticsearch/pom.xml
Normal file
49
easy-agents-store/easy-agents-store-elasticsearch/pom.xml
Normal file
@@ -0,0 +1,49 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<parent>
|
||||
<groupId>com.easyagents</groupId>
|
||||
<artifactId>easy-agents-store</artifactId>
|
||||
<version>${revision}</version>
|
||||
</parent>
|
||||
|
||||
<name>easy-agents-store-elasticsearch</name>
|
||||
<artifactId>easy-agents-store-elasticsearch</artifactId>
|
||||
|
||||
<properties>
|
||||
<elasticsearch.version>8.15.0</elasticsearch.version>
|
||||
<jackson.version>2.17.0</jackson.version>
|
||||
|
||||
<maven.compiler.source>8</maven.compiler.source>
|
||||
<maven.compiler.target>8</maven.compiler.target>
|
||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||
</properties>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>com.easyagents</groupId>
|
||||
<artifactId>easy-agents-core</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>co.elastic.clients</groupId>
|
||||
<artifactId>elasticsearch-java</artifactId>
|
||||
<version>${elasticsearch.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.fasterxml.jackson.core</groupId>
|
||||
<artifactId>jackson-databind</artifactId>
|
||||
<version>${jackson.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
</project>
|
||||
@@ -0,0 +1,282 @@
|
||||
/*
|
||||
* Copyright (c) 2023-2026, Easy-Agents (fuhai999@gmail.com).
|
||||
* <p>
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
* <p>
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* <p>
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package com.easyagents.store.elasticsearch;
|
||||
|
||||
import co.elastic.clients.elasticsearch.ElasticsearchClient;
|
||||
import co.elastic.clients.elasticsearch._types.ErrorCause;
|
||||
import co.elastic.clients.elasticsearch._types.mapping.DenseVectorProperty;
|
||||
import co.elastic.clients.elasticsearch._types.mapping.Property;
|
||||
import co.elastic.clients.elasticsearch._types.mapping.TextProperty;
|
||||
import co.elastic.clients.elasticsearch._types.mapping.TypeMapping;
|
||||
import co.elastic.clients.elasticsearch._types.query_dsl.Query;
|
||||
import co.elastic.clients.elasticsearch._types.query_dsl.ScriptScoreQuery;
|
||||
import co.elastic.clients.elasticsearch.core.BulkRequest;
|
||||
import co.elastic.clients.elasticsearch.core.BulkResponse;
|
||||
import co.elastic.clients.elasticsearch.core.SearchRequest;
|
||||
import co.elastic.clients.elasticsearch.core.SearchResponse;
|
||||
import co.elastic.clients.elasticsearch.core.bulk.BulkResponseItem;
|
||||
import co.elastic.clients.json.JsonData;
|
||||
import co.elastic.clients.json.jackson.JacksonJsonpMapper;
|
||||
import co.elastic.clients.transport.ElasticsearchTransport;
|
||||
import co.elastic.clients.transport.endpoints.BooleanResponse;
|
||||
import co.elastic.clients.transport.rest_client.RestClientTransport;
|
||||
import com.easyagents.core.document.Document;
|
||||
import com.easyagents.core.store.DocumentStore;
|
||||
import com.easyagents.core.store.SearchWrapper;
|
||||
import com.easyagents.core.store.StoreOptions;
|
||||
import com.easyagents.core.store.StoreResult;
|
||||
import com.easyagents.core.store.exception.StoreException;
|
||||
import com.easyagents.core.util.StringUtil;
|
||||
import org.apache.http.Header;
|
||||
import org.apache.http.HttpHost;
|
||||
import org.apache.http.auth.AuthScope;
|
||||
import org.apache.http.auth.UsernamePasswordCredentials;
|
||||
import org.apache.http.client.CredentialsProvider;
|
||||
import org.apache.http.impl.client.BasicCredentialsProvider;
|
||||
import org.apache.http.message.BasicHeader;
|
||||
import org.apache.http.ssl.SSLContextBuilder;
|
||||
import org.elasticsearch.client.RestClient;
|
||||
import org.elasticsearch.client.RestClientBuilder;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import javax.net.ssl.SSLContext;
|
||||
import java.io.IOException;
|
||||
import java.security.KeyManagementException;
|
||||
import java.security.KeyStoreException;
|
||||
import java.security.NoSuchAlgorithmException;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* es 向量存储:<a href="https://www.elastic.co/guide/en/elasticsearch/client/java-api-client/current/introduction.html">elasticsearch-java</a>
|
||||
*
|
||||
* @author songyinyin
|
||||
* @since 2024/8/12 下午4:17
|
||||
*/
|
||||
public class ElasticSearchVectorStore extends DocumentStore {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(ElasticSearchVectorStore.class);
|
||||
|
||||
private final ElasticsearchClient client;
|
||||
|
||||
private final ElasticSearchVectorStoreConfig config;
|
||||
|
||||
public ElasticSearchVectorStore(ElasticSearchVectorStoreConfig config) {
|
||||
this.config = config;
|
||||
RestClientBuilder restClientBuilder = RestClient.builder(HttpHost.create(config.getServerUrl()));
|
||||
|
||||
try {
|
||||
SSLContext sslContext = SSLContextBuilder.create().loadTrustMaterial(null, (chains, authType) -> true).build();
|
||||
|
||||
if (StringUtil.hasText(config.getUsername())) {
|
||||
CredentialsProvider provider = new BasicCredentialsProvider();
|
||||
provider.setCredentials(AuthScope.ANY, new UsernamePasswordCredentials(config.getUsername(), config.getPassword()));
|
||||
restClientBuilder.setHttpClientConfigCallback(httpClientBuilder -> {
|
||||
httpClientBuilder.setSSLContext(sslContext);
|
||||
httpClientBuilder.setDefaultCredentialsProvider(provider);
|
||||
return httpClientBuilder;
|
||||
});
|
||||
}
|
||||
|
||||
if (StringUtil.hasText(config.getApiKey())) {
|
||||
restClientBuilder.setDefaultHeaders(new Header[]{
|
||||
new BasicHeader("Authorization", "Apikey " + config.getApiKey())
|
||||
});
|
||||
}
|
||||
|
||||
ElasticsearchTransport transport = new RestClientTransport(restClientBuilder.build(), new JacksonJsonpMapper());
|
||||
|
||||
this.client = new ElasticsearchClient(transport);
|
||||
} catch (NoSuchAlgorithmException | KeyManagementException | KeyStoreException e) {
|
||||
throw new StoreException("Elasticsearch init error", e);
|
||||
}
|
||||
try {
|
||||
client.ping();
|
||||
} catch (IOException e) {
|
||||
log.error("[I/O Elasticsearch Exception]", e);
|
||||
throw new StoreException(e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
public ElasticSearchVectorStore(ElasticSearchVectorStoreConfig config, ElasticsearchClient client) {
|
||||
this.config = config;
|
||||
this.client = client;
|
||||
}
|
||||
|
||||
private static void throwIfError(BulkResponse bulkResponse) {
|
||||
if (bulkResponse.errors()) {
|
||||
for (BulkResponseItem item : bulkResponse.items()) {
|
||||
if (item.error() == null) {
|
||||
continue;
|
||||
}
|
||||
ErrorCause errorCause = item.error();
|
||||
throw new StoreException("type: " + errorCause.type() + "," + "reason: " + errorCause.reason());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public StoreResult doStore(List<Document> documents, StoreOptions options) {
|
||||
String indexName;
|
||||
if (StringUtil.hasText(options.getCollectionName())){
|
||||
indexName = options.getCollectionName();
|
||||
} else {
|
||||
indexName = options.getIndexNameOrDefault(config.getDefaultIndexName());
|
||||
}
|
||||
createIndexIfNotExist(indexName);
|
||||
return saveOrUpdate(documents, indexName);
|
||||
}
|
||||
|
||||
@Override
|
||||
public StoreResult doDelete(Collection<?> ids, StoreOptions options) {
|
||||
String indexName = options.getIndexNameOrDefault(config.getDefaultIndexName());
|
||||
BulkRequest.Builder bulkBuilder = new BulkRequest.Builder();
|
||||
for (Object id : ids) {
|
||||
bulkBuilder.operations(op -> op.delete(d -> d.index(indexName).id(id.toString())));
|
||||
}
|
||||
bulk(bulkBuilder.build());
|
||||
return StoreResult.success();
|
||||
}
|
||||
|
||||
@Override
|
||||
public StoreResult doUpdate(List<Document> documents, StoreOptions options) {
|
||||
String indexName = options.getIndexNameOrDefault(config.getDefaultIndexName());
|
||||
return saveOrUpdate(documents, indexName);
|
||||
}
|
||||
|
||||
public List<Document> doSearch(SearchWrapper wrapper, StoreOptions options) {
|
||||
// 最小匹配分数,无值则默认0
|
||||
Double minScore = wrapper.getMinScore();
|
||||
// 获取索引名,无指定则使用配置的默认索引
|
||||
String indexName = options.getIndexNameOrDefault(config.getDefaultIndexName());
|
||||
|
||||
// 公式:(cosineSimilarity + 1.0) / 2 将相似度映射到 0~1 区间
|
||||
ScriptScoreQuery scriptScoreQuery = ScriptScoreQuery.of(fn -> fn
|
||||
.minScore(minScore == null ? 0 : minScore.floatValue())
|
||||
.query(Query.of(q -> q.matchAll(m -> m)))
|
||||
.script(s -> s
|
||||
.source("(cosineSimilarity(params.query_vector, 'vector') + 1.0) / 2")
|
||||
.params("query_vector", JsonData.of(wrapper.getVector()))
|
||||
)
|
||||
);
|
||||
|
||||
try {
|
||||
SearchResponse<JsonData> response = client.search(
|
||||
SearchRequest.of(s -> s.index(indexName)
|
||||
.query(n -> n.scriptScore(scriptScoreQuery))
|
||||
.size(wrapper.getMaxResults())),
|
||||
JsonData.class
|
||||
);
|
||||
|
||||
return response.hits().hits().stream()
|
||||
.filter(hit -> hit.source() != null) // 过滤_source为空的无效结果
|
||||
.map(hit -> parseFromJsonData(hit.source(), hit.score()))
|
||||
.collect(Collectors.toList());
|
||||
} catch (IOException e) {
|
||||
log.error("[es/search] Elasticsearch I/O exception occurred", e);
|
||||
throw new StoreException(e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
private StoreResult saveOrUpdate(List<Document> documents, String indexName) {
|
||||
BulkRequest.Builder bulkBuilder = new BulkRequest.Builder();
|
||||
for (Document document : documents) {
|
||||
bulkBuilder.operations(op -> op.index(
|
||||
idx -> idx.index(indexName).id(document.getId().toString()).document(document))
|
||||
);
|
||||
}
|
||||
bulk(bulkBuilder.build());
|
||||
return StoreResult.successWithIds(documents);
|
||||
}
|
||||
|
||||
private void bulk(BulkRequest bulkRequest) {
|
||||
try {
|
||||
BulkResponse bulkResponse = client.bulk(bulkRequest);
|
||||
throwIfError(bulkResponse);
|
||||
} catch (IOException e) {
|
||||
log.error("[I/O Elasticsearch Exception]", e);
|
||||
throw new StoreException(e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
private void createIndexIfNotExist(String indexName) {
|
||||
try {
|
||||
BooleanResponse response = client.indices().exists(c -> c.index(indexName));
|
||||
if (!response.value()) {
|
||||
log.info("[ElasticSearch] Index {} not exists, creating...", indexName);
|
||||
client.indices().create(c -> c.index(indexName)
|
||||
.mappings(getDefaultMappings(this.getEmbeddingModel().dimensions())));
|
||||
}
|
||||
} catch (IOException e) {
|
||||
log.error("[I/O ElasticSearch Exception]", e);
|
||||
throw new StoreException(e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
private TypeMapping getDefaultMappings(int dimension) {
|
||||
Map<String, Property> properties = new HashMap<>(4);
|
||||
properties.put("content", Property.of(p -> p.text(TextProperty.of(t -> t))));
|
||||
properties.put("vector", Property.of(p -> p.denseVector(DenseVectorProperty.of(d -> d.dims(dimension)))));
|
||||
return TypeMapping.of(c -> c.properties(properties));
|
||||
}
|
||||
|
||||
private Document parseFromJsonData(JsonData source, Double score) {
|
||||
Document document = new Document();
|
||||
Map<String, Object> dataMap = source.to(Map.class);
|
||||
|
||||
document.setId(dataMap.get("id"));
|
||||
document.setTitle((String) dataMap.get("title"));
|
||||
document.setContent((String) dataMap.get("content"));
|
||||
document.setScore(score);
|
||||
|
||||
Object vectorObj = dataMap.get("vector");
|
||||
if (vectorObj instanceof List<?>) {
|
||||
List<?> vectorList = (List<?>) vectorObj;
|
||||
float[] vector = new float[vectorList.size()];
|
||||
for (int i = 0; i < vectorList.size(); i++) {
|
||||
Object val = vectorList.get(i);
|
||||
if (val instanceof Number) {
|
||||
vector[i] = ((Number) val).floatValue();
|
||||
}
|
||||
}
|
||||
document.setVector(vector);
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
Map<String, Object> metadataMap = (Map<String, Object>) dataMap.get("metadataMap");
|
||||
if (metadataMap != null && !metadataMap.isEmpty()) {
|
||||
document.setMetadataMap(metadataMap);
|
||||
} else {
|
||||
Map<String, Object> otherMetadata = new HashMap<>();
|
||||
for (Map.Entry<String, Object> entry : dataMap.entrySet()) {
|
||||
String key = entry.getKey();
|
||||
if (!"id".equals(key) && !"title".equals(key)
|
||||
&& !"content".equals(key) && !"vector".equals(key)) {
|
||||
otherMetadata.put(key, entry.getValue());
|
||||
}
|
||||
}
|
||||
if (!otherMetadata.isEmpty()) {
|
||||
document.setMetadataMap(otherMetadata);
|
||||
}
|
||||
}
|
||||
|
||||
return document;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,82 @@
|
||||
/*
|
||||
* Copyright (c) 2023-2026, Easy-Agents (fuhai999@gmail.com).
|
||||
* <p>
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
* <p>
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* <p>
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package com.easyagents.store.elasticsearch;
|
||||
|
||||
import com.easyagents.core.store.DocumentStoreConfig;
|
||||
import com.easyagents.core.util.StringUtil;
|
||||
|
||||
/**
|
||||
* 连接 elasticsearch 配置:<a href="https://www.elastic.co/guide/en/elasticsearch/client/java-api-client/current/getting-started-java.html">elasticsearch-java</a>
|
||||
*
|
||||
* @author songyinyin
|
||||
*/
|
||||
public class ElasticSearchVectorStoreConfig implements DocumentStoreConfig {
|
||||
|
||||
private String serverUrl = "https://localhost:9200";
|
||||
|
||||
private String apiKey;
|
||||
|
||||
private String username;
|
||||
|
||||
private String password;
|
||||
|
||||
private String defaultIndexName = "easy-agents-default";
|
||||
|
||||
public String getServerUrl() {
|
||||
return serverUrl;
|
||||
}
|
||||
|
||||
public void setServerUrl(String serverUrl) {
|
||||
this.serverUrl = serverUrl;
|
||||
}
|
||||
|
||||
public String getApiKey() {
|
||||
return apiKey;
|
||||
}
|
||||
|
||||
public void setApiKey(String apiKey) {
|
||||
this.apiKey = apiKey;
|
||||
}
|
||||
|
||||
public String getUsername() {
|
||||
return username;
|
||||
}
|
||||
|
||||
public void setUsername(String username) {
|
||||
this.username = username;
|
||||
}
|
||||
|
||||
public String getPassword() {
|
||||
return password;
|
||||
}
|
||||
|
||||
public void setPassword(String password) {
|
||||
this.password = password;
|
||||
}
|
||||
|
||||
public String getDefaultIndexName() {
|
||||
return defaultIndexName;
|
||||
}
|
||||
|
||||
public void setDefaultIndexName(String defaultIndexName) {
|
||||
this.defaultIndexName = defaultIndexName;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean checkAvailable() {
|
||||
return StringUtil.hasText(this.serverUrl, this.apiKey, this.defaultIndexName);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,99 @@
|
||||
/*
|
||||
* Copyright (c) 2023-2026, Easy-Agents (fuhai999@gmail.com).
|
||||
* <p>
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
* <p>
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* <p>
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package com.easyagents.store.opensearch;
|
||||
|
||||
import com.easyagents.core.document.Document;
|
||||
import com.easyagents.core.model.embedding.EmbeddingModel;
|
||||
import com.easyagents.core.model.embedding.EmbeddingOptions;
|
||||
import com.easyagents.core.store.SearchWrapper;
|
||||
import com.easyagents.core.store.StoreOptions;
|
||||
import com.easyagents.core.store.VectorData;
|
||||
import com.easyagents.core.store.exception.StoreException;
|
||||
import com.easyagents.store.elasticsearch.ElasticSearchVectorStore;
|
||||
import com.easyagents.store.elasticsearch.ElasticSearchVectorStoreConfig;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* @author songyinyin
|
||||
*/
|
||||
public class ElasticSearchVectorStoreTest {
|
||||
|
||||
private static ElasticSearchVectorStore getVectorStore() {
|
||||
ElasticSearchVectorStoreConfig config = new ElasticSearchVectorStoreConfig();
|
||||
// config.setApiKey("bmtXRVNaRUJNMEZXZzMzcnNvSXk6MlNMVmFnT0hRVVNUSmN3UXpoNWp4Zw==");
|
||||
config.setUsername("elastic");
|
||||
config.setPassword("Dd2024a10");
|
||||
ElasticSearchVectorStore store = new ElasticSearchVectorStore(config);
|
||||
store.setEmbeddingModel(new EmbeddingModel() {
|
||||
@Override
|
||||
public VectorData embed(Document document, EmbeddingOptions options) {
|
||||
VectorData vectorData = new VectorData();
|
||||
vectorData.setVector(new float[]{0, 0});
|
||||
return vectorData;
|
||||
}
|
||||
});
|
||||
return store;
|
||||
}
|
||||
|
||||
@Test(expected = StoreException.class)
|
||||
public void test01() {
|
||||
ElasticSearchVectorStore store = getVectorStore();
|
||||
|
||||
// https://opensearch.org/docs/latest/search-plugins/vector-search/#example
|
||||
List<Document> list = new ArrayList<>();
|
||||
Document doc1 = new Document();
|
||||
doc1.setId(1);
|
||||
doc1.setContent("test1");
|
||||
doc1.setVector(new float[]{5.2f, 4.4f});
|
||||
list.add(doc1);
|
||||
Document doc2 = new Document();
|
||||
doc2.setId(2);
|
||||
doc2.setContent("test2");
|
||||
doc2.setVector(new float[]{5.2f, 3.9f});
|
||||
list.add(doc2);
|
||||
Document doc3 = new Document();
|
||||
doc3.setId(3);
|
||||
doc3.setContent("test3");
|
||||
doc3.setVector(new float[]{4.9f, 3.4f});
|
||||
list.add(doc3);
|
||||
Document doc4 = new Document();
|
||||
doc4.setId(4);
|
||||
doc4.setContent("test4");
|
||||
doc4.setVector(new float[]{4.2f, 4.6f});
|
||||
list.add(doc4);
|
||||
Document doc5 = new Document();
|
||||
doc5.setId(5);
|
||||
doc5.setContent("test5");
|
||||
doc5.setVector(new float[]{3.3f, 4.5f});
|
||||
list.add(doc5);
|
||||
store.doStore(list, StoreOptions.DEFAULT);
|
||||
|
||||
// 可能要等一会 才能查出结果
|
||||
SearchWrapper searchWrapper = new SearchWrapper();
|
||||
searchWrapper.setVector(new float[]{5, 4});
|
||||
searchWrapper.setMaxResults(3);
|
||||
List<Document> documents = store.doSearch(searchWrapper, StoreOptions.DEFAULT);
|
||||
for (Document document : documents) {
|
||||
System.out.printf("id=%s, content=%s, vector=%s, metadata=%s\n",
|
||||
document.getId(), document.getContent(), Arrays.toString(document.getVector()), document.getMetadataMap());
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user