初始化

This commit is contained in:
2026-02-22 18:55:40 +08:00
commit 8392cdd861
496 changed files with 45020 additions and 0 deletions

View File

@@ -0,0 +1,41 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>com.easyagents</groupId>
<artifactId>easy-agents-store</artifactId>
<version>${revision}</version>
</parent>
<name>easy-agents-store-pgvector</name>
<artifactId>easy-agents-store-pgvector</artifactId>
<properties>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<dependencies>
<dependency>
<groupId>com.easyagents</groupId>
<artifactId>easy-agents-core</artifactId>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.postgresql</groupId>
<artifactId>postgresql</artifactId>
<version>42.7.5</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
</project>

View File

@@ -0,0 +1,63 @@
/*
* Copyright (c) 2023-2026, Easy-Agents (fuhai999@gmail.com).
* <p>
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* <p>
* http://www.apache.org/licenses/LICENSE-2.0
* <p>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.easyagents.store.pgvector;
import org.postgresql.util.PGobject;
import java.sql.SQLException;
public class PgvectorUtil {
/**
* 转化为vector.
* 如果需要half vector或者sparse vector 对应实现即可
* @param src 向量
* @return
* @throws SQLException
*/
public static PGobject toPgVector(double[] src) throws SQLException {
PGobject vector = new PGobject();
vector.setType("vector");
if (src.length == 0) {
vector.setValue("[]");
return vector;
}
StringBuilder sb = new StringBuilder("[");
for (double v : src) {
sb.append(v);
sb.append(",");
}
vector.setValue(sb.substring(0, sb.length() - 1) + "]");
return vector;
}
public static double[] fromPgVector(String src) {
if (src.equals("[]")) {
return new double[0];
}
String[] strs = src.substring(1, src.length() - 1).split(",");
double[] output = new double[strs.length];
for (int i = 0; i < strs.length; i++) {
try {
output[i] = Double.parseDouble(strs[i]);
} catch (Exception ignore) {
output[i] = 0;
}
}
return output;
}
}

View File

@@ -0,0 +1,231 @@
/*
* Copyright (c) 2023-2026, Easy-Agents (fuhai999@gmail.com).
* <p>
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* <p>
* http://www.apache.org/licenses/LICENSE-2.0
* <p>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.easyagents.store.pgvector;
import com.easyagents.core.document.Document;
import com.easyagents.core.store.DocumentStore;
import com.easyagents.core.store.SearchWrapper;
import com.easyagents.core.store.StoreOptions;
import com.easyagents.core.store.StoreResult;
import com.alibaba.fastjson2.JSON;
import com.alibaba.fastjson2.JSONObject;
import org.postgresql.ds.PGSimpleDataSource;
import org.postgresql.util.PGobject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.sql.*;
import java.util.*;
public class PgvectorVectorStore extends DocumentStore {
private static final Logger logger = LoggerFactory.getLogger(PgvectorVectorStore.class);
public static final double DEFAULT_SIMILARITY_THRESHOLD = 0.3;
private final PGSimpleDataSource dataSource;
private final String defaultCollectionName;
private final PgvectorVectorStoreConfig config;
public PgvectorVectorStore(PgvectorVectorStoreConfig config) {
dataSource = new PGSimpleDataSource();
dataSource.setServerNames(new String[]{config.getHost() + ":" + config.getPort()});
dataSource.setUser(config.getUsername());
dataSource.setPassword(config.getPassword());
dataSource.setDatabaseName(config.getDatabaseName());
if (!config.getProperties().isEmpty()) {
config.getProperties().forEach((k, v) -> {
try {
dataSource.setProperty(k, v);
} catch (SQLException e) {
logger.error("set pg property error", e);
}
});
}
this.defaultCollectionName = config.getDefaultCollectionName();
this.config = config;
// 异步初始化数据库
new Thread(this::initDb).start();
}
public void initDb() {
// 启动的时候初始化向量表, 需要数据库支持pgvector插件
// pg管理员需要在对应的库上执行 CREATE EXTENSION IF NOT EXISTS vector;
if (config.isAutoCreateCollection()) {
createCollection(defaultCollectionName);
}
}
private Connection getConnection() throws SQLException {
Connection connection = dataSource.getConnection();
connection.setAutoCommit(false);
return connection;
}
@Override
public StoreResult doStore(List<Document> documents, StoreOptions options) {
// 表名
String collectionName = options.getCollectionNameOrDefault(defaultCollectionName);
try (Connection connection = getConnection()) {
PreparedStatement pstmt = connection.prepareStatement("insert into " + collectionName + " (id, content, vector, metadata) values (?, ?, ?, ?::jsonb)");
for (Document doc : documents) {
Map<String, Object> metadatas = doc.getMetadataMap();
JSONObject jsonObject = JSON.parseObject(JSON.toJSONBytes(metadatas == null ? Collections.EMPTY_MAP : metadatas));
pstmt.setString(1, String.valueOf(doc.getId()));
pstmt.setString(2, doc.getContent());
pstmt.setObject(3, PgvectorUtil.toPgVector(doc.getVectorAsDoubleArray()));
pstmt.setString(4, jsonObject.toString());
pstmt.addBatch();
}
pstmt.executeBatch();
connection.commit();
} catch (SQLException e) {
logger.error("store vector error", e);
return StoreResult.fail();
}
return StoreResult.successWithIds(documents);
}
private Boolean createCollection(String collectionName) {
try (Connection connection = getConnection()) {
try (CallableStatement statement = connection.prepareCall("CREATE TABLE IF NOT EXISTS " + collectionName +
" (id varchar(100) PRIMARY KEY, content text, vector vector(" + config.getVectorDimension() + "), metadata jsonb)")) {
statement.execute();
}
// 默认情况下pgvector 执行精确的最近邻搜索,从而提供完美的召回率. 可以通过索引来修改 pgvector 的搜索方式,以获得更好的性能。
// By default, pgvector performs exact nearest neighbor search, which provides perfect recall.
if (config.isUseHnswIndex()) {
try (Statement stmt = connection.createStatement()) {
stmt.execute("CREATE INDEX IF NOT EXISTS " + collectionName + "_vector_idx ON " + collectionName +
" USING hnsw (vector vector_cosine_ops)");
}
}
} catch (SQLException e) {
logger.error("create collection error", e);
return false;
}
return true;
}
@Override
public StoreResult doDelete(Collection<?> ids, StoreOptions options) {
StringBuilder sql = new StringBuilder("DELETE FROM " + options.getCollectionNameOrDefault(defaultCollectionName) + " WHERE id IN (");
for (int i = 0; i < ids.size(); i++) {
sql.append("?");
if (i < ids.size() - 1) {
sql.append(",");
}
}
sql.append(")");
try (Connection connection = getConnection()) {
PreparedStatement pstmt = connection.prepareStatement(sql.toString());
ArrayList<?> list = new ArrayList<>(ids);
for (int i = 0; i < list.size(); i++) {
pstmt.setString(i + 1, (String) list.get(i));
}
pstmt.executeUpdate();
connection.commit();
} catch (Exception e) {
logger.error("delete document error: " + e, e);
return StoreResult.fail();
}
return StoreResult.success();
}
@Override
public List<Document> doSearch(SearchWrapper searchWrapper, StoreOptions options) {
StringBuilder sql = new StringBuilder("select ");
if (searchWrapper.isOutputVector()) {
sql.append("id, vector, content, metadata");
} else {
sql.append("id, content, metadata");
}
sql.append(" from ").append(options.getCollectionNameOrDefault(defaultCollectionName));
sql.append(" where vector <=> ? < ? order by vector <=> ? LIMIT ?");
try (Connection connection = getConnection()){
// 使用余弦距离计算最相似的文档
PreparedStatement stmt = connection.prepareStatement(sql.toString());
PGobject vector = PgvectorUtil.toPgVector(searchWrapper.getVectorAsDoubleArray());
stmt.setObject(1, vector);
stmt.setObject(2, Optional.ofNullable(searchWrapper.getMinScore()).orElse(DEFAULT_SIMILARITY_THRESHOLD));
stmt.setObject(3, vector);
stmt.setObject(4, searchWrapper.getMaxResults());
ResultSet resultSet = stmt.executeQuery();
List<Document> documents = new ArrayList<>();
while (resultSet.next()) {
Document doc = new Document();
doc.setId(resultSet.getString("id"));
doc.setContent(resultSet.getString("content"));
doc.addMetadata(JSON.parseObject(resultSet.getString("metadata")));
if (searchWrapper.isOutputVector()) {
String vectorStr = resultSet.getString("vector");
doc.setVector(PgvectorUtil.fromPgVector(vectorStr));
}
documents.add(doc);
}
return documents;
} catch (Exception e) {
logger.error("Error searching in pgvector", e);
return Collections.emptyList();
}
}
@Override
public StoreResult doUpdate(List<Document> documents, StoreOptions options) {
if (documents == null || documents.isEmpty()) {
return StoreResult.success();
}
StringBuilder sql = new StringBuilder("UPDATE " + options.getCollectionNameOrDefault(defaultCollectionName) + " SET ");
sql.append("content = ?, vector = ?, metadata = ?::jsonb WHERE id = ?");
try (Connection connection = getConnection()) {
PreparedStatement pstmt = connection.prepareStatement(sql.toString());
for (Document doc : documents) {
Map<String, Object> metadatas = doc.getMetadataMap();
JSONObject metadataJson = JSON.parseObject(JSON.toJSONBytes(metadatas == null ? Collections.EMPTY_MAP : metadatas));
pstmt.setString(1, doc.getContent());
pstmt.setObject(2, PgvectorUtil.toPgVector(doc.getVectorAsDoubleArray()));
pstmt.setString(3, metadataJson.toString());
pstmt.setString(4, String.valueOf(doc.getId()));
pstmt.addBatch();
}
pstmt.executeUpdate();
connection.commit();
} catch (Exception e) {
logger.error("Error update in pgvector", e);
return StoreResult.fail();
}
return StoreResult.successWithIds(documents);
}
}

View File

@@ -0,0 +1,127 @@
/*
* Copyright (c) 2023-2026, Easy-Agents (fuhai999@gmail.com).
* <p>
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* <p>
* http://www.apache.org/licenses/LICENSE-2.0
* <p>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.easyagents.store.pgvector;
import com.easyagents.core.store.DocumentStoreConfig;
import com.easyagents.core.util.StringUtil;
import java.util.HashMap;
import java.util.Map;
/**
* postgreSQL访问配置
* https://github.com/pgvector/pgvector
*/
public class PgvectorVectorStoreConfig implements DocumentStoreConfig {
private String host;
private int port = 5432;
private String databaseName = "agent_vector";
private String username;
private String password;
private Map<String, String> properties = new HashMap<>();
private String defaultCollectionName;
private boolean autoCreateCollection = true;
private boolean useHnswIndex = false;
private int vectorDimension = 1024;
public PgvectorVectorStoreConfig() {
}
public String getHost() {
return host;
}
public void setHost(String host) {
this.host = host;
}
public String getDatabaseName() {
return databaseName;
}
public void setDatabaseName(String databaseName) {
this.databaseName = databaseName;
}
public String getDefaultCollectionName() {
return defaultCollectionName;
}
public void setDefaultCollectionName(String defaultCollectionName) {
this.defaultCollectionName = defaultCollectionName;
}
public boolean isAutoCreateCollection() {
return autoCreateCollection;
}
public void setAutoCreateCollection(boolean autoCreateCollection) {
this.autoCreateCollection = autoCreateCollection;
}
public String getUsername() {
return username;
}
public void setUsername(String username) {
this.username = username;
}
public String getPassword() {
return password;
}
public void setPassword(String password) {
this.password = password;
}
public int getPort() {
return port;
}
public void setPort(int port) {
this.port = port;
}
public Map<String, String> getProperties() {
return properties;
}
public void setProperties(Map<String, String> properties) {
this.properties = properties;
}
@Override
public boolean checkAvailable() {
return StringUtil.hasText(this.host, this.username, this.password, this.databaseName);
}
public int getVectorDimension() {
return vectorDimension;
}
public void setVectorDimension(int vectorDimension) {
this.vectorDimension = vectorDimension;
}
public boolean isUseHnswIndex() {
return useHnswIndex;
}
public void setUseHnswIndex(boolean useHnswIndex) {
this.useHnswIndex = useHnswIndex;
}
}

View File

@@ -0,0 +1,134 @@
package com.easyagents.store.pgvector;
import com.easyagents.core.document.Document;
import com.easyagents.core.store.SearchWrapper;
import com.easyagents.core.store.StoreResult;
import com.easyagents.core.util.Maps;
import org.junit.Test;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public class PgvectorDbTest {
@Test
public void testInsert() {
PgvectorVectorStoreConfig config = new PgvectorVectorStoreConfig();
config.setHost("127.0.0.1");
config.setPort(5432);
config.setDatabaseName("pgvector_test");
config.setUsername("test");
config.setPassword("123456");
config.setVectorDimension(1024);
config.setUseHnswIndex(true);
config.setAutoCreateCollection(true);
config.setDefaultCollectionName("test");
PgvectorVectorStore store = new PgvectorVectorStore(config);
Document doc = new Document("测试数据");
// 初始化 vector 为长度为 1024 的全是 1 的数组
float[] vector = new float[1024];
Arrays.fill(vector, 1.0f);
doc.setVector(vector);
doc.setMetadataMap(Maps.of("test", "test"));
store.store(doc);
}
@Test
public void testInsertMany() {
PgvectorVectorStoreConfig config = new PgvectorVectorStoreConfig();
config.setHost("127.0.0.1");
config.setPort(5432);
config.setDatabaseName("pgvector_test");
config.setUsername("test");
config.setPassword("123456");
config.setVectorDimension(1024);
config.setUseHnswIndex(true);
config.setAutoCreateCollection(true);
config.setDefaultCollectionName("test");
PgvectorVectorStore store = new PgvectorVectorStore(config);
List<Document> docs = new ArrayList<>(100);
for (int i = 0; i < 100; i++) {
Document doc = new Document("测试数据" + i);
// 初始化 vector 为长度为 1024 的全是 1 的数组
float[] vector = new float[1024];
Arrays.fill(vector, (float) Math.random());
doc.setVector(vector);
doc.setMetadataMap(Maps.of("test", "test" + i));
docs.add(doc);
}
store.store(docs);
}
@Test
public void testSearch() {
PgvectorVectorStoreConfig config = new PgvectorVectorStoreConfig();
config.setHost("127.0.0.1");
config.setPort(5432);
config.setDatabaseName("pgvector_test");
config.setUsername("test");
config.setPassword("123456");
config.setVectorDimension(1024);
config.setUseHnswIndex(true);
config.setAutoCreateCollection(true);
config.setDefaultCollectionName("test");
PgvectorVectorStore store = new PgvectorVectorStore(config);
float[] vector = new float[1024];
Arrays.fill(vector, 1.0f);
SearchWrapper searchWrapper = new SearchWrapper().text("测试数据");
searchWrapper.setVector(vector);
searchWrapper.setMinScore(0.0);
searchWrapper.setOutputVector(true);
List<Document> docs = store.search(searchWrapper);
System.out.println(docs);
}
@Test
public void testUpdate() {
PgvectorVectorStoreConfig config = new PgvectorVectorStoreConfig();
config.setHost("127.0.0.1");
config.setPort(5432);
config.setDatabaseName("pgvector_test");
config.setUsername("test");
config.setPassword("123456");
config.setVectorDimension(1024);
config.setUseHnswIndex(true);
config.setAutoCreateCollection(true);
config.setDefaultCollectionName("test");
PgvectorVectorStore store = new PgvectorVectorStore(config);
Document document = new Document("测试数据");
document.setId("145314895749100ae8306079519b3393");
document.setMetadataMap(Maps.of("test", "test0"));
float[] vector = new float[1024];
Arrays.fill(vector, 1.1f);
document.setVector(vector);
StoreResult update = store.update(document);
System.out.println(update);
}
@Test
public void testDelete() {
PgvectorVectorStoreConfig config = new PgvectorVectorStoreConfig();
config.setHost("127.0.0.1");
config.setPort(5432);
config.setDatabaseName("pgvector_test");
config.setUsername("test");
config.setPassword("123456");
config.setVectorDimension(1024);
config.setUseHnswIndex(true);
config.setAutoCreateCollection(true);
config.setDefaultCollectionName("test");
PgvectorVectorStore store = new PgvectorVectorStore(config);
StoreResult update = store.delete("145314895749100ae8306079519b3393","e83518d36b6d5de8199b40e3ef4e4ce1");
System.out.println(update);
}
}