初始化
This commit is contained in:
@@ -0,0 +1,55 @@
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<parent>
|
||||
<groupId>com.easyagents</groupId>
|
||||
<artifactId>easy-agents-search-engine</artifactId>
|
||||
<version>${revision}</version>
|
||||
</parent>
|
||||
|
||||
<name>easy-agents-search-engine-lucene</name>
|
||||
<artifactId>easy-agents-search-engine-lucene</artifactId>
|
||||
|
||||
<properties>
|
||||
<jcseg.version>2.6.3</jcseg.version>
|
||||
<lucene.version>8.11.1</lucene.version>
|
||||
<maven.compiler.source>8</maven.compiler.source>
|
||||
<maven.compiler.target>8</maven.compiler.target>
|
||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||
</properties>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>com.easyagents</groupId>
|
||||
<artifactId>easy-agents-core</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.lucene</groupId>
|
||||
<artifactId>lucene-core</artifactId>
|
||||
<version>${lucene.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.lucene</groupId>
|
||||
<artifactId>lucene-analyzers-common</artifactId>
|
||||
<version>${lucene.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.lucene</groupId>
|
||||
<artifactId>lucene-queryparser</artifactId>
|
||||
<version>${lucene.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.lionsoul</groupId>
|
||||
<artifactId>jcseg-core</artifactId>
|
||||
<version>2.6.3</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.lionsoul</groupId>
|
||||
<artifactId>jcseg-analyzer</artifactId>
|
||||
<version>2.6.3</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.easyagents</groupId>
|
||||
<artifactId>easy-agents-search-engine-service</artifactId>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</project>
|
||||
@@ -0,0 +1,29 @@
|
||||
/*
|
||||
* Copyright (c) 2023-2026, Easy-Agents (fuhai999@gmail.com).
|
||||
* <p>
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
* <p>
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* <p>
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package com.easyagents.search.engine.lucene;
|
||||
|
||||
public class LuceneConfig {
|
||||
// lucene 目录
|
||||
private String indexDirPath;
|
||||
|
||||
public String getIndexDirPath() {
|
||||
return indexDirPath;
|
||||
}
|
||||
|
||||
public void setIndexDirPath(String indexDirPath) {
|
||||
this.indexDirPath = indexDirPath;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,212 @@
|
||||
/*
|
||||
* Copyright (c) 2023-2026, Easy-Agents (fuhai999@gmail.com).
|
||||
* <p>
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
* <p>
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* <p>
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package com.easyagents.search.engine.lucene;
|
||||
|
||||
import com.easyagents.core.document.Document;
|
||||
import com.easyagents.search.engine.service.DocumentSearcher;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.*;
|
||||
import org.apache.lucene.queryparser.classic.ParseException;
|
||||
import org.apache.lucene.queryparser.classic.QueryParser;
|
||||
import org.apache.lucene.search.*;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.FSDirectory;
|
||||
import org.jetbrains.annotations.NotNull;
|
||||
import org.lionsoul.jcseg.ISegment;
|
||||
import org.lionsoul.jcseg.analyzer.JcsegAnalyzer;
|
||||
import org.lionsoul.jcseg.dic.DictionaryFactory;
|
||||
import org.lionsoul.jcseg.segmenter.SegmenterConfig;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
|
||||
public class LuceneSearcher implements DocumentSearcher {
|
||||
|
||||
private static final Logger LOG = LoggerFactory.getLogger(LuceneSearcher.class);
|
||||
|
||||
private Directory directory;
|
||||
|
||||
public LuceneSearcher(LuceneConfig config) {
|
||||
Objects.requireNonNull(config, "LuceneConfig 不能为 null");
|
||||
try {
|
||||
String indexDirPath = config.getIndexDirPath(); // 索引目录路径
|
||||
File indexDir = new File(indexDirPath);
|
||||
if (!indexDir.exists() && !indexDir.mkdirs()) {
|
||||
throw new IllegalStateException("can not mkdirs for path: " + indexDirPath);
|
||||
}
|
||||
|
||||
this.directory = FSDirectory.open(indexDir.toPath());
|
||||
} catch (IOException e) {
|
||||
LOG.error("初始化 Lucene 索引失败", e);
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean addDocument(Document document) {
|
||||
if (document == null || document.getContent() == null) return false;
|
||||
|
||||
IndexWriter indexWriter = null;
|
||||
try {
|
||||
indexWriter = createIndexWriter();
|
||||
|
||||
org.apache.lucene.document.Document luceneDoc = new org.apache.lucene.document.Document();
|
||||
luceneDoc.add(new StringField("id", document.getId().toString(), Field.Store.YES));
|
||||
luceneDoc.add(new TextField("content", document.getContent(), Field.Store.YES));
|
||||
|
||||
if (document.getTitle() != null) {
|
||||
luceneDoc.add(new TextField("title", document.getTitle(), Field.Store.YES));
|
||||
}
|
||||
|
||||
|
||||
indexWriter.addDocument(luceneDoc);
|
||||
indexWriter.commit();
|
||||
return true;
|
||||
} catch (Exception e) {
|
||||
LOG.error("添加文档失败", e);
|
||||
return false;
|
||||
} finally {
|
||||
close(indexWriter);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public boolean deleteDocument(Object id) {
|
||||
if (id == null) return false;
|
||||
|
||||
IndexWriter indexWriter = null;
|
||||
try {
|
||||
indexWriter = createIndexWriter();
|
||||
Term term = new Term("id", id.toString());
|
||||
indexWriter.deleteDocuments(term);
|
||||
indexWriter.commit();
|
||||
return true;
|
||||
} catch (IOException e) {
|
||||
LOG.error("删除文档失败", e);
|
||||
return false;
|
||||
} finally {
|
||||
close(indexWriter);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean updateDocument(Document document) {
|
||||
if (document == null || document.getId() == null) return false;
|
||||
|
||||
IndexWriter indexWriter = null;
|
||||
try {
|
||||
indexWriter = createIndexWriter();
|
||||
Term term = new Term("id", document.getId().toString());
|
||||
|
||||
org.apache.lucene.document.Document luceneDoc = new org.apache.lucene.document.Document();
|
||||
luceneDoc.add(new StringField("id", document.getId().toString(), Field.Store.YES));
|
||||
luceneDoc.add(new TextField("content", document.getContent(), Field.Store.YES));
|
||||
|
||||
if (document.getTitle() != null) {
|
||||
luceneDoc.add(new TextField("title", document.getTitle(), Field.Store.YES));
|
||||
}
|
||||
|
||||
indexWriter.updateDocument(term, luceneDoc);
|
||||
indexWriter.commit();
|
||||
return true;
|
||||
} catch (IOException e) {
|
||||
LOG.error("更新文档失败", e);
|
||||
return false;
|
||||
} finally {
|
||||
close(indexWriter);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<Document> searchDocuments(String keyword, int count) {
|
||||
List<Document> results = new ArrayList<>();
|
||||
try (IndexReader reader = DirectoryReader.open(directory)) {
|
||||
IndexSearcher searcher = new IndexSearcher(reader);
|
||||
Query query = buildQuery(keyword);
|
||||
TopDocs topDocs = searcher.search(query, count);
|
||||
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
|
||||
org.apache.lucene.document.Document doc = searcher.doc(scoreDoc.doc);
|
||||
Document resultDoc = new Document();
|
||||
resultDoc.setId(doc.get("id"));
|
||||
resultDoc.setContent(doc.get("content"));
|
||||
resultDoc.setTitle(doc.get("title"));
|
||||
|
||||
resultDoc.setScore((double) scoreDoc.score);
|
||||
|
||||
results.add(resultDoc);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
LOG.error("搜索文档失败", e);
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
private static Query buildQuery(String keyword) {
|
||||
try {
|
||||
Analyzer analyzer = createAnalyzer();
|
||||
|
||||
QueryParser titleQueryParser = new QueryParser("title", analyzer);
|
||||
Query titleQuery = titleQueryParser.parse(keyword);
|
||||
BooleanClause titleBooleanClause = new BooleanClause(titleQuery, BooleanClause.Occur.SHOULD);
|
||||
|
||||
QueryParser contentQueryParser = new QueryParser("content", analyzer);
|
||||
Query contentQuery = contentQueryParser.parse(keyword);
|
||||
BooleanClause contentBooleanClause = new BooleanClause(contentQuery, BooleanClause.Occur.SHOULD);
|
||||
|
||||
BooleanQuery.Builder builder = new BooleanQuery.Builder();
|
||||
builder.add(titleBooleanClause)
|
||||
.add(contentBooleanClause);
|
||||
return builder.build();
|
||||
} catch (ParseException e) {
|
||||
LOG.error(e.toString(), e);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
@NotNull
|
||||
private IndexWriter createIndexWriter() throws IOException {
|
||||
Analyzer analyzer = createAnalyzer();
|
||||
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer);
|
||||
return new IndexWriter(directory, indexWriterConfig);
|
||||
}
|
||||
|
||||
|
||||
private static Analyzer createAnalyzer() {
|
||||
SegmenterConfig config = new SegmenterConfig(true);
|
||||
return new JcsegAnalyzer(ISegment.Type.NLP, config, DictionaryFactory.createSingletonDictionary(config));
|
||||
}
|
||||
|
||||
public void close(IndexWriter indexWriter) {
|
||||
try {
|
||||
if (indexWriter != null) {
|
||||
indexWriter.close();
|
||||
}
|
||||
} catch (IOException e) {
|
||||
LOG.error("关闭 Lucene 失败", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,96 @@
|
||||
/*
|
||||
* Copyright (c) 2023-2026, Easy-Agents (fuhai999@gmail.com).
|
||||
* <p>
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
* <p>
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* <p>
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package com.easyagents.engines.test;
|
||||
|
||||
import com.easyagents.core.document.Document;
|
||||
import com.easyagents.search.engine.lucene.LuceneConfig;
|
||||
import com.easyagents.search.engine.lucene.LuceneSearcher;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
public class TestLuceneCRUD {
|
||||
public static void main(String[] args) {
|
||||
// 1. 配置 Lucene 索引路径
|
||||
LuceneConfig config = new LuceneConfig();
|
||||
config.setIndexDirPath("./2lucene_index"); // 设置索引目录路径
|
||||
// 2. 创建 LuceneSearcher 实例
|
||||
|
||||
LuceneSearcher luceneSearcher = new LuceneSearcher(config);
|
||||
|
||||
// 文档ID(用于更新和删除)
|
||||
// ---- Step 1: 添加文档 ----
|
||||
System.out.println("【添加文档】");
|
||||
Document doc1 = new Document();
|
||||
doc1.setId(1);
|
||||
doc1.setTitle("利润最大化的原则");
|
||||
doc1.setContent("平台客服工具:是指拼多多平台开发并向企业提供的功能或工具,商家通过其专属账号登录平台客服工具后,可以与平台消费者取得\n" +
|
||||
"联系并为消费者提供客户服务");
|
||||
|
||||
boolean addSuccess = luceneSearcher.addDocument(doc1);
|
||||
System.out.println("添加文档1结果:" + (addSuccess ? "成功" : "失败"));
|
||||
|
||||
|
||||
Document doc2 = new Document();
|
||||
doc2.setId(2);
|
||||
doc2.setTitle("企业获取报酬的活动");
|
||||
doc2.setContent("研究如何最合理地分配稀缺资源及不同的用途");
|
||||
|
||||
boolean addSuccess1 = luceneSearcher.addDocument(doc2);
|
||||
System.out.println("添加文档2结果:" + (addSuccess1 ? "成功" : "失败"));
|
||||
|
||||
// 查询添加后的结果
|
||||
testSearch(luceneSearcher, "企业");
|
||||
testSearch(luceneSearcher, "报酬");
|
||||
|
||||
// ---- Step 2: 更新文档 ----
|
||||
System.out.println("\n【更新文档】");
|
||||
Document updatedDoc = new Document();
|
||||
updatedDoc.setId(1);
|
||||
updatedDoc.setContent("平台客服工具:是指拼多多平台开发并向商家提供的功能或工具,商家通过其专属账号登录平台客服工具后,可以与平台消费者取得\n" +
|
||||
"联系并为消费者提供客户服务2");
|
||||
|
||||
boolean updateSuccess = luceneSearcher.updateDocument(updatedDoc);
|
||||
System.out.println("更新文档结果:" + (updateSuccess ? "成功" : "失败"));
|
||||
|
||||
// 查询更新后的结果
|
||||
testSearch(luceneSearcher, "消费者");
|
||||
|
||||
// ---- Step 3: 删除文档 ----
|
||||
System.out.println("\n【删除文档】");
|
||||
boolean deleteSuccess = luceneSearcher.deleteDocument(2);
|
||||
System.out.println("删除文档结果:" + (deleteSuccess ? "成功" : "失败"));
|
||||
|
||||
// 查询删除后的结果
|
||||
testSearch(luceneSearcher, "报酬");
|
||||
|
||||
}
|
||||
|
||||
// 封装一个搜索方法,打印搜索结果
|
||||
private static void testSearch(LuceneSearcher searcher, String keyword) {
|
||||
List<com.easyagents.core.document.Document> results = searcher.searchDocuments(keyword);
|
||||
if (results.isEmpty()) {
|
||||
System.out.println("没有找到匹配的文档。");
|
||||
} else {
|
||||
System.out.println("找到 " + results.size() + " 个匹配文档:");
|
||||
for (com.easyagents.core.document.Document doc : results) {
|
||||
System.out.println("ID: " + doc.getId());
|
||||
System.out.println("标题: " + doc.getTitle());
|
||||
System.out.println("内容: " + doc.getContent());
|
||||
System.out.println("-----------------------------");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user