feat: RAG分块策略增强
This commit is contained in:
@@ -56,6 +56,30 @@
|
|||||||
</exclusions>
|
</exclusions>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.easyagents</groupId>
|
||||||
|
<artifactId>easy-agents-rag-core</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.easyagents</groupId>
|
||||||
|
<artifactId>easy-agents-rag-ingestion</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.easyagents</groupId>
|
||||||
|
<artifactId>easy-agents-rag-ocr</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.easyagents</groupId>
|
||||||
|
<artifactId>easy-agents-rag-enhance</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.easyagents</groupId>
|
||||||
|
<artifactId>easy-agents-rag-retrieval</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
<!--image model start-->
|
<!--image model start-->
|
||||||
<dependency>
|
<dependency>
|
||||||
|
|||||||
102
easy-agents-rag/TECH-PLAN.md
Normal file
102
easy-agents-rag/TECH-PLAN.md
Normal file
@@ -0,0 +1,102 @@
|
|||||||
|
# easy-agents-rag 技术规划
|
||||||
|
|
||||||
|
## 目标
|
||||||
|
|
||||||
|
`easy-agents-rag` 用于承载 Easy-Agents 的 RAG 领域能力,逐步将知识入库、文档预处理、分块、索引增强、检索增强等能力从业务工程中抽离,形成可复用的框架层模块。
|
||||||
|
|
||||||
|
当前阶段先完成模块骨架建设,并优先承接本次文档导入链路中的预处理与分块能力迁移。
|
||||||
|
|
||||||
|
## 模块规划
|
||||||
|
|
||||||
|
### `easy-agents-rag-core`
|
||||||
|
|
||||||
|
定位:RAG 域共享契约层。
|
||||||
|
|
||||||
|
负责内容:
|
||||||
|
- 通用常量与元数据 key
|
||||||
|
- 结构类型、策略类型、chunk 类型定义
|
||||||
|
- 少量稳定共享模型与接口
|
||||||
|
|
||||||
|
不负责内容:
|
||||||
|
- 具体 OCR 实现
|
||||||
|
- 具体分块实现
|
||||||
|
- 具体召回编排
|
||||||
|
|
||||||
|
### `easy-agents-rag-ingestion`
|
||||||
|
|
||||||
|
定位:入库前处理链路。
|
||||||
|
|
||||||
|
负责内容:
|
||||||
|
- 文本标准化与清洗
|
||||||
|
- 文档结构分析
|
||||||
|
- 拆分策略推荐
|
||||||
|
- 文档分块与 chunk 元信息补全
|
||||||
|
- 入库前质量控制
|
||||||
|
|
||||||
|
当前迁移优先承接:
|
||||||
|
- 文档结构分析
|
||||||
|
- 章节/问答/段落分块
|
||||||
|
- 自动推荐拆分策略
|
||||||
|
|
||||||
|
### `easy-agents-rag-ocr`
|
||||||
|
|
||||||
|
定位:OCR 与版面恢复能力。
|
||||||
|
|
||||||
|
负责内容:
|
||||||
|
- 图片/PDF OCR
|
||||||
|
- 页面版面解析
|
||||||
|
- 标题、段落、表格等结构恢复
|
||||||
|
- PDF 到结构化文本或 Markdown 的转换
|
||||||
|
|
||||||
|
### `easy-agents-rag-enhance`
|
||||||
|
|
||||||
|
定位:索引前增强能力。
|
||||||
|
|
||||||
|
负责内容:
|
||||||
|
- 图增强
|
||||||
|
- RAPTOR
|
||||||
|
- parent-child chunk
|
||||||
|
- window chunk
|
||||||
|
- 摘要、关键词、标签等增强信息生成
|
||||||
|
- 索引前的知识单元增强
|
||||||
|
|
||||||
|
### `easy-agents-rag-retrieval`
|
||||||
|
|
||||||
|
定位:查询侧增强与召回编排。
|
||||||
|
|
||||||
|
负责内容:
|
||||||
|
- query rewrite / expansion
|
||||||
|
- hybrid recall 编排
|
||||||
|
- metadata filter 策略
|
||||||
|
- graph recall
|
||||||
|
- rerank 编排
|
||||||
|
- chunk merge / window expand / context assemble
|
||||||
|
|
||||||
|
## 当前迁移范围
|
||||||
|
|
||||||
|
本次优先迁移到 `easy-agents-rag-ingestion` 的能力:
|
||||||
|
- 文档结构分析
|
||||||
|
- 拆分策略推荐
|
||||||
|
- 标题型 / QA 型 / 段落型分块
|
||||||
|
|
||||||
|
本次不迁移的能力:
|
||||||
|
- 业务侧预览会话
|
||||||
|
- 控制器与接口 DTO
|
||||||
|
- 业务库持久化
|
||||||
|
- 前端导入页面
|
||||||
|
|
||||||
|
这些能力继续留在业务工程,由业务层依赖 `easy-agents-rag` 提供的能力完成编排。
|
||||||
|
|
||||||
|
## 后续演进
|
||||||
|
|
||||||
|
后续演进顺序建议如下:
|
||||||
|
|
||||||
|
1. 完成 `rag-ingestion` 首批能力迁移并稳定对外接口
|
||||||
|
2. 补充 `rag-ocr`,接入 OCR 与版面恢复
|
||||||
|
3. 补充 `rag-enhance`,支持图增强、RAPTOR、索引增强
|
||||||
|
4. 补充 `rag-retrieval`,统一查询增强与召回后处理
|
||||||
|
|
||||||
|
整体原则:
|
||||||
|
- `easy-agents-core` 保持基础抽象
|
||||||
|
- `easy-agents-rag` 聚合 RAG 领域实现
|
||||||
|
- 业务工程只保留编排、持久化与产品层逻辑
|
||||||
28
easy-agents-rag/easy-agents-rag-core/pom.xml
Normal file
28
easy-agents-rag/easy-agents-rag-core/pom.xml
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||||
|
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||||
|
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||||
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
|
||||||
|
<parent>
|
||||||
|
<groupId>com.easyagents</groupId>
|
||||||
|
<artifactId>easy-agents-rag</artifactId>
|
||||||
|
<version>${revision}</version>
|
||||||
|
</parent>
|
||||||
|
|
||||||
|
<artifactId>easy-agents-rag-core</artifactId>
|
||||||
|
<name>easy-agents-rag-core</name>
|
||||||
|
|
||||||
|
<properties>
|
||||||
|
<maven.compiler.source>8</maven.compiler.source>
|
||||||
|
<maven.compiler.target>8</maven.compiler.target>
|
||||||
|
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||||
|
</properties>
|
||||||
|
|
||||||
|
<dependencies>
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.easyagents</groupId>
|
||||||
|
<artifactId>easy-agents-core</artifactId>
|
||||||
|
</dependency>
|
||||||
|
</dependencies>
|
||||||
|
</project>
|
||||||
@@ -0,0 +1,128 @@
|
|||||||
|
package com.easyagents.rag.core;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.LinkedHashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
public class RagChunk implements Serializable {
|
||||||
|
|
||||||
|
private String chunkId;
|
||||||
|
private String chunkType;
|
||||||
|
private String sourceLabel;
|
||||||
|
private List<String> headingPath = new ArrayList<String>();
|
||||||
|
private String content;
|
||||||
|
private String question;
|
||||||
|
private String answer;
|
||||||
|
private Integer charCount;
|
||||||
|
private Integer tokenEstimate;
|
||||||
|
private Integer partNo = 1;
|
||||||
|
private Integer partTotal = 1;
|
||||||
|
private List<String> warnings = new ArrayList<String>();
|
||||||
|
private Map<String, Object> options = new LinkedHashMap<String, Object>();
|
||||||
|
|
||||||
|
public String getChunkId() {
|
||||||
|
return chunkId;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setChunkId(String chunkId) {
|
||||||
|
this.chunkId = chunkId;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getChunkType() {
|
||||||
|
return chunkType;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setChunkType(String chunkType) {
|
||||||
|
this.chunkType = chunkType;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getSourceLabel() {
|
||||||
|
return sourceLabel;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setSourceLabel(String sourceLabel) {
|
||||||
|
this.sourceLabel = sourceLabel;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<String> getHeadingPath() {
|
||||||
|
return headingPath;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setHeadingPath(List<String> headingPath) {
|
||||||
|
this.headingPath = headingPath;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getContent() {
|
||||||
|
return content;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setContent(String content) {
|
||||||
|
this.content = content;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getQuestion() {
|
||||||
|
return question;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setQuestion(String question) {
|
||||||
|
this.question = question;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getAnswer() {
|
||||||
|
return answer;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setAnswer(String answer) {
|
||||||
|
this.answer = answer;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Integer getCharCount() {
|
||||||
|
return charCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setCharCount(Integer charCount) {
|
||||||
|
this.charCount = charCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Integer getTokenEstimate() {
|
||||||
|
return tokenEstimate;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setTokenEstimate(Integer tokenEstimate) {
|
||||||
|
this.tokenEstimate = tokenEstimate;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Integer getPartNo() {
|
||||||
|
return partNo;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setPartNo(Integer partNo) {
|
||||||
|
this.partNo = partNo;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Integer getPartTotal() {
|
||||||
|
return partTotal;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setPartTotal(Integer partTotal) {
|
||||||
|
this.partTotal = partTotal;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<String> getWarnings() {
|
||||||
|
return warnings;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setWarnings(List<String> warnings) {
|
||||||
|
this.warnings = warnings;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Map<String, Object> getOptions() {
|
||||||
|
return options;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setOptions(Map<String, Object> options) {
|
||||||
|
this.options = options;
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,11 @@
|
|||||||
|
package com.easyagents.rag.core;
|
||||||
|
|
||||||
|
public final class RagChunkTypes {
|
||||||
|
|
||||||
|
private RagChunkTypes() {
|
||||||
|
}
|
||||||
|
|
||||||
|
public static final String SECTION = "section";
|
||||||
|
public static final String QA_PAIR = "qa_pair";
|
||||||
|
public static final String PARAGRAPH = "paragraph";
|
||||||
|
}
|
||||||
@@ -0,0 +1,12 @@
|
|||||||
|
package com.easyagents.rag.core;
|
||||||
|
|
||||||
|
public final class RagDefaults {
|
||||||
|
|
||||||
|
private RagDefaults() {
|
||||||
|
}
|
||||||
|
|
||||||
|
public static final int CHUNK_SIZE = 512;
|
||||||
|
public static final int OVERLAP_SIZE = 128;
|
||||||
|
public static final int MD_SPLITTER_LEVEL = 2;
|
||||||
|
public static final int ROWS_PER_CHUNK = 1;
|
||||||
|
}
|
||||||
@@ -0,0 +1,20 @@
|
|||||||
|
package com.easyagents.rag.core;
|
||||||
|
|
||||||
|
public final class RagMetadataKeys {
|
||||||
|
|
||||||
|
private RagMetadataKeys() {
|
||||||
|
}
|
||||||
|
|
||||||
|
public static final String CHUNK_TYPE = "chunkType";
|
||||||
|
public static final String SOURCE_LABEL = "sourceLabel";
|
||||||
|
public static final String HEADING_PATH = "headingPath";
|
||||||
|
public static final String PAGE_NO = "pageNo";
|
||||||
|
public static final String CHAR_COUNT = "charCount";
|
||||||
|
public static final String TOKEN_ESTIMATE = "tokenEstimate";
|
||||||
|
public static final String QA_QUESTION = "qaQuestion";
|
||||||
|
public static final String QA_ANSWER = "qaAnswer";
|
||||||
|
public static final String QA_GROUP_ID = "qaGroupId";
|
||||||
|
public static final String PART_NO = "partNo";
|
||||||
|
public static final String PART_TOTAL = "partTotal";
|
||||||
|
public static final String WARNINGS = "warnings";
|
||||||
|
}
|
||||||
@@ -0,0 +1,14 @@
|
|||||||
|
package com.easyagents.rag.core;
|
||||||
|
|
||||||
|
public final class RagStrategyCodes {
|
||||||
|
|
||||||
|
private RagStrategyCodes() {
|
||||||
|
}
|
||||||
|
|
||||||
|
public static final String AUTO = "AUTO";
|
||||||
|
public static final String MARKDOWN_SECTION = "MARKDOWN_SECTION";
|
||||||
|
public static final String OUTLINE_SECTION = "OUTLINE_SECTION";
|
||||||
|
public static final String QA_PAIR = "QA_PAIR";
|
||||||
|
public static final String PARAGRAPH_LENGTH = "PARAGRAPH_LENGTH";
|
||||||
|
public static final String CUSTOM_REGEX = "CUSTOM_REGEX";
|
||||||
|
}
|
||||||
@@ -0,0 +1,12 @@
|
|||||||
|
package com.easyagents.rag.core;
|
||||||
|
|
||||||
|
public final class RagStructureTypes {
|
||||||
|
|
||||||
|
private RagStructureTypes() {
|
||||||
|
}
|
||||||
|
|
||||||
|
public static final String MARKDOWN_HEADING = "markdown_heading";
|
||||||
|
public static final String OUTLINE_SECTION = "outline_section";
|
||||||
|
public static final String QA_PAIR = "qa_pair";
|
||||||
|
public static final String PLAIN_PARAGRAPH = "plain_paragraph";
|
||||||
|
}
|
||||||
36
easy-agents-rag/easy-agents-rag-enhance/pom.xml
Normal file
36
easy-agents-rag/easy-agents-rag-enhance/pom.xml
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||||
|
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||||
|
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||||
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
|
||||||
|
<parent>
|
||||||
|
<groupId>com.easyagents</groupId>
|
||||||
|
<artifactId>easy-agents-rag</artifactId>
|
||||||
|
<version>${revision}</version>
|
||||||
|
</parent>
|
||||||
|
|
||||||
|
<artifactId>easy-agents-rag-enhance</artifactId>
|
||||||
|
<name>easy-agents-rag-enhance</name>
|
||||||
|
|
||||||
|
<properties>
|
||||||
|
<maven.compiler.source>8</maven.compiler.source>
|
||||||
|
<maven.compiler.target>8</maven.compiler.target>
|
||||||
|
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||||
|
</properties>
|
||||||
|
|
||||||
|
<dependencies>
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.easyagents</groupId>
|
||||||
|
<artifactId>easy-agents-core</artifactId>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.easyagents</groupId>
|
||||||
|
<artifactId>easy-agents-rag-core</artifactId>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.easyagents</groupId>
|
||||||
|
<artifactId>easy-agents-rag-ingestion</artifactId>
|
||||||
|
</dependency>
|
||||||
|
</dependencies>
|
||||||
|
</project>
|
||||||
37
easy-agents-rag/easy-agents-rag-ingestion/pom.xml
Normal file
37
easy-agents-rag/easy-agents-rag-ingestion/pom.xml
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||||
|
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||||
|
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||||
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
|
||||||
|
<parent>
|
||||||
|
<groupId>com.easyagents</groupId>
|
||||||
|
<artifactId>easy-agents-rag</artifactId>
|
||||||
|
<version>${revision}</version>
|
||||||
|
</parent>
|
||||||
|
|
||||||
|
<artifactId>easy-agents-rag-ingestion</artifactId>
|
||||||
|
<name>easy-agents-rag-ingestion</name>
|
||||||
|
|
||||||
|
<properties>
|
||||||
|
<maven.compiler.source>8</maven.compiler.source>
|
||||||
|
<maven.compiler.target>8</maven.compiler.target>
|
||||||
|
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||||
|
</properties>
|
||||||
|
|
||||||
|
<dependencies>
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.easyagents</groupId>
|
||||||
|
<artifactId>easy-agents-core</artifactId>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.easyagents</groupId>
|
||||||
|
<artifactId>easy-agents-rag-core</artifactId>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>junit</groupId>
|
||||||
|
<artifactId>junit</artifactId>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
|
</dependencies>
|
||||||
|
</project>
|
||||||
@@ -0,0 +1,41 @@
|
|||||||
|
package com.easyagents.rag.ingestion;
|
||||||
|
|
||||||
|
import com.easyagents.rag.core.RagChunk;
|
||||||
|
import com.easyagents.rag.ingestion.analysis.DocumentStructureAnalyzer;
|
||||||
|
import com.easyagents.rag.ingestion.chunk.RagSplitStrategyRegistry;
|
||||||
|
import com.easyagents.rag.ingestion.model.AnalysisResult;
|
||||||
|
import com.easyagents.rag.ingestion.model.StrategyConfig;
|
||||||
|
import com.easyagents.rag.ingestion.recommend.SplitStrategyRecommender;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public class DefaultRagIngestionService implements RagIngestionService {
|
||||||
|
|
||||||
|
private final DocumentStructureAnalyzer documentStructureAnalyzer;
|
||||||
|
private final SplitStrategyRecommender splitStrategyRecommender;
|
||||||
|
private final RagSplitStrategyRegistry ragSplitStrategyRegistry;
|
||||||
|
|
||||||
|
public DefaultRagIngestionService(DocumentStructureAnalyzer documentStructureAnalyzer,
|
||||||
|
SplitStrategyRecommender splitStrategyRecommender,
|
||||||
|
RagSplitStrategyRegistry ragSplitStrategyRegistry) {
|
||||||
|
this.documentStructureAnalyzer = documentStructureAnalyzer;
|
||||||
|
this.splitStrategyRecommender = splitStrategyRecommender;
|
||||||
|
this.ragSplitStrategyRegistry = ragSplitStrategyRegistry;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public AnalysisResult analyze(String rawContent, String sourceFormat) {
|
||||||
|
AnalysisResult result = documentStructureAnalyzer.analyze(rawContent, sourceFormat);
|
||||||
|
return splitStrategyRecommender.recommend(result);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<RagChunk> split(AnalysisResult analysis, StrategyConfig config) {
|
||||||
|
return ragSplitStrategyRegistry.split(analysis, config);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toStrategyLabel(String strategyCode) {
|
||||||
|
return splitStrategyRecommender.toStrategyLabel(strategyCode);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,16 @@
|
|||||||
|
package com.easyagents.rag.ingestion;
|
||||||
|
|
||||||
|
import com.easyagents.rag.core.RagChunk;
|
||||||
|
import com.easyagents.rag.ingestion.model.AnalysisResult;
|
||||||
|
import com.easyagents.rag.ingestion.model.StrategyConfig;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public interface RagIngestionService {
|
||||||
|
|
||||||
|
AnalysisResult analyze(String rawContent, String sourceFormat);
|
||||||
|
|
||||||
|
List<RagChunk> split(AnalysisResult analysis, StrategyConfig config);
|
||||||
|
|
||||||
|
String toStrategyLabel(String strategyCode);
|
||||||
|
}
|
||||||
@@ -0,0 +1,234 @@
|
|||||||
|
package com.easyagents.rag.ingestion.analysis;
|
||||||
|
|
||||||
|
import com.easyagents.core.util.StringUtil;
|
||||||
|
import com.easyagents.rag.ingestion.model.AnalysisResult;
|
||||||
|
|
||||||
|
import java.util.*;
|
||||||
|
import java.util.regex.Matcher;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
public class DocumentStructureAnalyzer {
|
||||||
|
|
||||||
|
private static final Pattern MARKDOWN_HEADING = Pattern.compile("^#{1,6}\\s+\\S+.*$");
|
||||||
|
private static final Pattern CHINESE_CHAPTER = Pattern.compile("^第[一二三四五六七八九十百零两0-9]+[章节篇部分卷]\\s*.*$");
|
||||||
|
private static final Pattern CHINESE_SECTION = Pattern.compile("^[一二三四五六七八九十百零]+[、..]\\s*\\S+.*$");
|
||||||
|
private static final Pattern CHINESE_SUBSECTION = Pattern.compile("^[((][一二三四五六七八九十百零0-9]+[))]\\s*\\S+.*$");
|
||||||
|
private static final Pattern NUMERIC_SECTION = Pattern.compile("^[0-9]+(\\.[0-9]+){0,4}\\s+\\S+.*$");
|
||||||
|
private static final Pattern ENGLISH_SECTION = Pattern.compile("^(Chapter|Section|Part)\\s+[0-9IVXLC]+([.:\\-\\s].*)?$", Pattern.CASE_INSENSITIVE);
|
||||||
|
private static final Pattern ENGLISH_ROMAN = Pattern.compile("^[IVXLC]+[.、)\\s-]+\\S+.*$");
|
||||||
|
private static final Pattern QUESTION_LINE = Pattern.compile("^(Q|QUESTION|问|问题|FAQ\\s*[0-9]+)\\s*[.::-]\\s*.+$", Pattern.CASE_INSENSITIVE);
|
||||||
|
private static final Pattern ANSWER_LINE = Pattern.compile("^(A|ANSWER|答|答案)\\s*[.::-]\\s*.+$", Pattern.CASE_INSENSITIVE);
|
||||||
|
private static final Pattern PAGE_NUMBER = Pattern.compile("^(第?\\s*\\d+\\s*页|page\\s+\\d+|\\d+)\\s*$", Pattern.CASE_INSENSITIVE);
|
||||||
|
private static final Pattern TOC_LINE = Pattern.compile("^.{2,80}[.·•…]{2,}\\s*\\d+\\s*$");
|
||||||
|
|
||||||
|
public AnalysisResult analyze(String rawContent, String sourceFormat) {
|
||||||
|
String normalizedContent = normalize(rawContent);
|
||||||
|
List<String> lines = toLines(normalizedContent);
|
||||||
|
|
||||||
|
int markdownHeadingCount = 0;
|
||||||
|
int outlineHeadingCount = 0;
|
||||||
|
int qaQuestionCount = 0;
|
||||||
|
int qaAnswerCount = 0;
|
||||||
|
int pairedQaCount = 0;
|
||||||
|
int tocLineCount = 0;
|
||||||
|
int shortLineCount = 0;
|
||||||
|
Set<Integer> markdownLevels = new HashSet<Integer>();
|
||||||
|
|
||||||
|
for (int i = 0; i < lines.size(); i++) {
|
||||||
|
String line = lines.get(i);
|
||||||
|
if (MARKDOWN_HEADING.matcher(line).matches()) {
|
||||||
|
markdownHeadingCount++;
|
||||||
|
markdownLevels.add(Integer.valueOf(countMarkdownLevel(line)));
|
||||||
|
}
|
||||||
|
if (isOutlineHeading(line)) {
|
||||||
|
outlineHeadingCount++;
|
||||||
|
}
|
||||||
|
if (QUESTION_LINE.matcher(line).matches()) {
|
||||||
|
qaQuestionCount++;
|
||||||
|
if (hasAnswerNearby(lines, i)) {
|
||||||
|
pairedQaCount++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (ANSWER_LINE.matcher(line).matches()) {
|
||||||
|
qaAnswerCount++;
|
||||||
|
}
|
||||||
|
if (TOC_LINE.matcher(line).matches()) {
|
||||||
|
tocLineCount++;
|
||||||
|
}
|
||||||
|
if (line.length() <= 20) {
|
||||||
|
shortLineCount++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int paragraphCount = 0;
|
||||||
|
int longParagraphCount = 0;
|
||||||
|
List<String> paragraphs = splitParagraphs(normalizedContent);
|
||||||
|
for (String paragraph : paragraphs) {
|
||||||
|
if (StringUtil.hasText(paragraph)) {
|
||||||
|
paragraphCount++;
|
||||||
|
if (paragraph.length() > 800) {
|
||||||
|
longParagraphCount++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Map<String, Object> features = new LinkedHashMap<String, Object>();
|
||||||
|
features.put("lineCount", Integer.valueOf(lines.size()));
|
||||||
|
features.put("paragraphCount", Integer.valueOf(paragraphCount));
|
||||||
|
features.put("markdownHeadingCount", Integer.valueOf(markdownHeadingCount));
|
||||||
|
features.put("markdownLevelVariety", Integer.valueOf(markdownLevels.size()));
|
||||||
|
features.put("outlineHeadingCount", Integer.valueOf(outlineHeadingCount));
|
||||||
|
features.put("qaQuestionCount", Integer.valueOf(qaQuestionCount));
|
||||||
|
features.put("qaAnswerCount", Integer.valueOf(qaAnswerCount));
|
||||||
|
features.put("pairedQaCount", Integer.valueOf(pairedQaCount));
|
||||||
|
features.put("tocLineCount", Integer.valueOf(tocLineCount));
|
||||||
|
features.put("shortLineRatio", lines.isEmpty() ? Double.valueOf(0D) : Double.valueOf((double) shortLineCount / (double) lines.size()));
|
||||||
|
features.put("longParagraphCount", Integer.valueOf(longParagraphCount));
|
||||||
|
|
||||||
|
AnalysisResult result = new AnalysisResult();
|
||||||
|
result.setSourceFormat(sourceFormat == null ? "" : sourceFormat.toLowerCase(Locale.ROOT));
|
||||||
|
result.setNormalizedContent(normalizedContent);
|
||||||
|
result.setFeatures(features);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
private String normalize(String rawContent) {
|
||||||
|
if (!StringUtil.hasText(rawContent)) {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
String content = rawContent
|
||||||
|
.replace("\uFEFF", "")
|
||||||
|
.replace("\u200B", "")
|
||||||
|
.replace("\r\n", "\n")
|
||||||
|
.replace('\r', '\n')
|
||||||
|
.replace('\u00A0', ' ')
|
||||||
|
.replace(':', ':');
|
||||||
|
|
||||||
|
List<String> originalLines = toLines(content);
|
||||||
|
Map<String, Integer> lineCounts = new HashMap<String, Integer>();
|
||||||
|
for (String line : originalLines) {
|
||||||
|
if (line.length() >= 4 && line.length() <= 60) {
|
||||||
|
Integer count = lineCounts.get(line);
|
||||||
|
lineCounts.put(line, count == null ? Integer.valueOf(1) : Integer.valueOf(count.intValue() + 1));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
List<String> filteredLines = new ArrayList<String>();
|
||||||
|
for (String line : originalLines) {
|
||||||
|
if (!StringUtil.hasText(line)) {
|
||||||
|
filteredLines.add("");
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (PAGE_NUMBER.matcher(line).matches()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
Integer repeated = lineCounts.get(line);
|
||||||
|
if (repeated != null && repeated.intValue() >= 3 && line.length() <= 40) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
filteredLines.add(line);
|
||||||
|
}
|
||||||
|
|
||||||
|
List<String> mergedLines = new ArrayList<String>();
|
||||||
|
for (String line : filteredLines) {
|
||||||
|
if (mergedLines.isEmpty()) {
|
||||||
|
mergedLines.add(line);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
String previous = mergedLines.get(mergedLines.size() - 1);
|
||||||
|
if (!StringUtil.hasText(previous) || !StringUtil.hasText(line)) {
|
||||||
|
mergedLines.add(line);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (shouldMerge(previous, line)) {
|
||||||
|
mergedLines.set(mergedLines.size() - 1, previous + joinToken(previous, line) + line);
|
||||||
|
} else {
|
||||||
|
mergedLines.add(line);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return String.join("\n", mergedLines)
|
||||||
|
.replaceAll("[ \\t]{2,}", " ")
|
||||||
|
.replaceAll("\\n{3,}", "\n\n")
|
||||||
|
.trim();
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean shouldMerge(String previous, String current) {
|
||||||
|
if (isHeading(previous) || isHeading(current)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (QUESTION_LINE.matcher(current).matches() || ANSWER_LINE.matcher(current).matches()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (TOC_LINE.matcher(previous).matches() || TOC_LINE.matcher(current).matches()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
char previousChar = previous.charAt(previous.length() - 1);
|
||||||
|
if ("。!?.!?:;:;".indexOf(previousChar) >= 0) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return current.length() < 80;
|
||||||
|
}
|
||||||
|
|
||||||
|
private String joinToken(String previous, String current) {
|
||||||
|
char last = previous.charAt(previous.length() - 1);
|
||||||
|
char first = current.charAt(0);
|
||||||
|
if (Character.isLetterOrDigit(last) && Character.isLetterOrDigit(first)) {
|
||||||
|
return " ";
|
||||||
|
}
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean hasAnswerNearby(List<String> lines, int index) {
|
||||||
|
int end = Math.min(lines.size(), index + 4);
|
||||||
|
for (int i = index + 1; i < end; i++) {
|
||||||
|
if (ANSWER_LINE.matcher(lines.get(i)).matches()) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean isHeading(String line) {
|
||||||
|
return MARKDOWN_HEADING.matcher(line).matches() || isOutlineHeading(line);
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean isOutlineHeading(String line) {
|
||||||
|
return CHINESE_CHAPTER.matcher(line).matches()
|
||||||
|
|| CHINESE_SECTION.matcher(line).matches()
|
||||||
|
|| CHINESE_SUBSECTION.matcher(line).matches()
|
||||||
|
|| NUMERIC_SECTION.matcher(line).matches()
|
||||||
|
|| ENGLISH_SECTION.matcher(line).matches()
|
||||||
|
|| ENGLISH_ROMAN.matcher(line).matches();
|
||||||
|
}
|
||||||
|
|
||||||
|
private int countMarkdownLevel(String line) {
|
||||||
|
Matcher matcher = Pattern.compile("^(#{1,6})\\s+").matcher(line);
|
||||||
|
if (!matcher.find()) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return matcher.group(1).length();
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<String> splitParagraphs(String normalizedContent) {
|
||||||
|
String[] parts = normalizedContent.split("\\n\\s*\\n");
|
||||||
|
List<String> paragraphs = new ArrayList<String>();
|
||||||
|
for (String part : parts) {
|
||||||
|
String paragraph = part.trim();
|
||||||
|
if (StringUtil.hasText(paragraph)) {
|
||||||
|
paragraphs.add(paragraph);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return paragraphs;
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<String> toLines(String content) {
|
||||||
|
String[] rawLines = content.split("\\n");
|
||||||
|
List<String> lines = new ArrayList<String>(rawLines.length);
|
||||||
|
for (String rawLine : rawLines) {
|
||||||
|
lines.add(rawLine == null ? "" : rawLine.trim());
|
||||||
|
}
|
||||||
|
return lines;
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,388 @@
|
|||||||
|
package com.easyagents.rag.ingestion.chunk;
|
||||||
|
|
||||||
|
import com.easyagents.core.document.Document;
|
||||||
|
import com.easyagents.core.document.DocumentSplitter;
|
||||||
|
import com.easyagents.core.document.splitter.RegexDocumentSplitter;
|
||||||
|
import com.easyagents.core.document.splitter.SimpleDocumentSplitter;
|
||||||
|
import com.easyagents.core.util.StringUtil;
|
||||||
|
import com.easyagents.rag.core.*;
|
||||||
|
import com.easyagents.rag.ingestion.model.AnalysisResult;
|
||||||
|
import com.easyagents.rag.ingestion.model.StrategyConfig;
|
||||||
|
|
||||||
|
import java.util.*;
|
||||||
|
import java.util.regex.Matcher;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
public class RagSplitStrategyRegistry {
|
||||||
|
|
||||||
|
private static final Pattern MARKDOWN_HEADING = Pattern.compile("^(#{1,6})\\s+(.*)$");
|
||||||
|
private static final Pattern QUESTION_PREFIX = Pattern.compile("^(Q|QUESTION|问|问题|FAQ\\s*[0-9]+)\\s*[.::-]\\s*(.+)$", Pattern.CASE_INSENSITIVE);
|
||||||
|
private static final Pattern ANSWER_PREFIX = Pattern.compile("^(A|ANSWER|答|答案)\\s*[.::-]\\s*(.+)$", Pattern.CASE_INSENSITIVE);
|
||||||
|
|
||||||
|
public List<RagChunk> split(AnalysisResult analysisResult, StrategyConfig strategyConfig) {
|
||||||
|
String strategyCode = strategyConfig.getStrategyCode();
|
||||||
|
if (!StringUtil.hasText(strategyCode) || RagStrategyCodes.AUTO.equals(strategyCode)) {
|
||||||
|
strategyCode = analysisResult.getRecommendedStrategyCode();
|
||||||
|
}
|
||||||
|
String normalizedContent = analysisResult.getNormalizedContent();
|
||||||
|
if (RagStrategyCodes.MARKDOWN_SECTION.equals(strategyCode)) {
|
||||||
|
return buildMarkdownChunks(normalizedContent, strategyConfig);
|
||||||
|
}
|
||||||
|
if (RagStrategyCodes.OUTLINE_SECTION.equals(strategyCode)) {
|
||||||
|
return buildOutlineChunks(normalizedContent, strategyConfig);
|
||||||
|
}
|
||||||
|
if (RagStrategyCodes.QA_PAIR.equals(strategyCode)) {
|
||||||
|
return buildQaChunks(normalizedContent, strategyConfig);
|
||||||
|
}
|
||||||
|
if (RagStrategyCodes.CUSTOM_REGEX.equals(strategyCode)) {
|
||||||
|
return buildRegexChunks(normalizedContent, strategyConfig);
|
||||||
|
}
|
||||||
|
return buildParagraphChunks(normalizedContent, strategyConfig);
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<RagChunk> buildMarkdownChunks(String content, StrategyConfig strategyConfig) {
|
||||||
|
List<String> lines = Arrays.asList(content.split("\\n"));
|
||||||
|
List<SectionChunk> sections = new ArrayList<SectionChunk>();
|
||||||
|
Deque<HeadingLevel> stack = new ArrayDeque<HeadingLevel>();
|
||||||
|
SectionChunk current = null;
|
||||||
|
for (String rawLine : lines) {
|
||||||
|
String line = rawLine.trim();
|
||||||
|
Matcher matcher = MARKDOWN_HEADING.matcher(line);
|
||||||
|
if (matcher.matches()) {
|
||||||
|
if (current != null) {
|
||||||
|
sections.add(current);
|
||||||
|
}
|
||||||
|
int level = matcher.group(1).length();
|
||||||
|
while (!stack.isEmpty() && stack.peekLast().level >= level) {
|
||||||
|
stack.removeLast();
|
||||||
|
}
|
||||||
|
stack.addLast(new HeadingLevel(level, matcher.group(2).trim()));
|
||||||
|
current = new SectionChunk(copyPath(stack), matcher.group(2).trim());
|
||||||
|
current.lines.add(line);
|
||||||
|
} else {
|
||||||
|
if (current == null) {
|
||||||
|
current = new SectionChunk(Collections.singletonList("未命名段落"), "未命名段落");
|
||||||
|
}
|
||||||
|
current.lines.add(rawLine);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (current != null) {
|
||||||
|
sections.add(current);
|
||||||
|
}
|
||||||
|
return finalizeSectionChunks(sections, strategyConfig);
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<RagChunk> buildOutlineChunks(String content, StrategyConfig strategyConfig) {
|
||||||
|
List<String> lines = Arrays.asList(content.split("\\n"));
|
||||||
|
List<SectionChunk> sections = new ArrayList<SectionChunk>();
|
||||||
|
Deque<HeadingLevel> stack = new ArrayDeque<HeadingLevel>();
|
||||||
|
SectionChunk current = null;
|
||||||
|
for (String rawLine : lines) {
|
||||||
|
String line = rawLine.trim();
|
||||||
|
OutlineHeading heading = OutlineHeading.parse(line);
|
||||||
|
if (heading != null) {
|
||||||
|
if (current != null) {
|
||||||
|
sections.add(current);
|
||||||
|
}
|
||||||
|
while (!stack.isEmpty() && stack.peekLast().level >= heading.level) {
|
||||||
|
stack.removeLast();
|
||||||
|
}
|
||||||
|
stack.addLast(new HeadingLevel(heading.level, heading.title));
|
||||||
|
current = new SectionChunk(copyPath(stack), heading.title);
|
||||||
|
current.lines.add(line);
|
||||||
|
} else {
|
||||||
|
if (current == null) {
|
||||||
|
current = new SectionChunk(Collections.singletonList("未命名段落"), "未命名段落");
|
||||||
|
}
|
||||||
|
current.lines.add(rawLine);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (current != null) {
|
||||||
|
sections.add(current);
|
||||||
|
}
|
||||||
|
return finalizeSectionChunks(sections, strategyConfig);
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<RagChunk> finalizeSectionChunks(List<SectionChunk> sections, StrategyConfig strategyConfig) {
|
||||||
|
List<RagChunk> result = new ArrayList<RagChunk>();
|
||||||
|
int index = 1;
|
||||||
|
for (SectionChunk section : sections) {
|
||||||
|
String content = joinAndTrim(section.lines);
|
||||||
|
if (!StringUtil.hasText(content) || content.equals(section.sourceLabel)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (content.length() <= safeChunkSize(strategyConfig)) {
|
||||||
|
result.add(createChunk(RagChunkTypes.SECTION, section.sourceLabel, section.headingPath, content, index++, 1, 1));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
List<String> subContents = splitLongContent(content, strategyConfig.getChunkSize());
|
||||||
|
int total = subContents.size();
|
||||||
|
for (int i = 0; i < subContents.size(); i++) {
|
||||||
|
result.add(createChunk(RagChunkTypes.SECTION, section.sourceLabel, section.headingPath, subContents.get(i), index++, i + 1, total));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return postProcess(result);
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<RagChunk> buildQaChunks(String content, StrategyConfig strategyConfig) {
|
||||||
|
List<RagChunk> result = new ArrayList<RagChunk>();
|
||||||
|
String currentQuestion = null;
|
||||||
|
StringBuilder answerBuilder = new StringBuilder();
|
||||||
|
StringBuilder questionBuilder = new StringBuilder();
|
||||||
|
int qaIndex = 1;
|
||||||
|
|
||||||
|
for (String rawLine : content.split("\\n")) {
|
||||||
|
String line = rawLine.trim();
|
||||||
|
if (!StringUtil.hasText(line)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
Matcher questionMatcher = QUESTION_PREFIX.matcher(line);
|
||||||
|
Matcher answerMatcher = ANSWER_PREFIX.matcher(line);
|
||||||
|
if (questionMatcher.matches()) {
|
||||||
|
qaIndex = flushQaChunk(result, currentQuestion, questionBuilder, answerBuilder, qaIndex, strategyConfig);
|
||||||
|
currentQuestion = questionMatcher.group(2).trim();
|
||||||
|
questionBuilder = new StringBuilder(currentQuestion);
|
||||||
|
answerBuilder = new StringBuilder();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (answerMatcher.matches()) {
|
||||||
|
if (answerBuilder.length() > 0) {
|
||||||
|
answerBuilder.append('\n');
|
||||||
|
}
|
||||||
|
answerBuilder.append(answerMatcher.group(2).trim());
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (answerBuilder.length() > 0) {
|
||||||
|
answerBuilder.append('\n').append(rawLine.trim());
|
||||||
|
} else if (questionBuilder.length() > 0) {
|
||||||
|
questionBuilder.append('\n').append(rawLine.trim());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
flushQaChunk(result, currentQuestion, questionBuilder, answerBuilder, qaIndex, strategyConfig);
|
||||||
|
return postProcess(result);
|
||||||
|
}
|
||||||
|
|
||||||
|
private int flushQaChunk(List<RagChunk> result,
|
||||||
|
String currentQuestion,
|
||||||
|
StringBuilder questionBuilder,
|
||||||
|
StringBuilder answerBuilder,
|
||||||
|
int qaIndex,
|
||||||
|
StrategyConfig strategyConfig) {
|
||||||
|
if (!StringUtil.hasText(currentQuestion)) {
|
||||||
|
return qaIndex;
|
||||||
|
}
|
||||||
|
if (!StringUtil.hasText(answerBuilder.toString())) {
|
||||||
|
return qaIndex;
|
||||||
|
}
|
||||||
|
String question = questionBuilder.toString().trim();
|
||||||
|
String answer = answerBuilder.toString().trim();
|
||||||
|
String baseContent = "问题:" + question + "\n答案:" + answer;
|
||||||
|
List<String> subContents = baseContent.length() > safeChunkSize(strategyConfig)
|
||||||
|
? splitLongContent(baseContent, strategyConfig.getChunkSize())
|
||||||
|
: Collections.singletonList(baseContent);
|
||||||
|
int total = subContents.size();
|
||||||
|
for (int i = 0; i < subContents.size(); i++) {
|
||||||
|
RagChunk chunk = createChunk(RagChunkTypes.QA_PAIR, "Q" + qaIndex + " " + question, Collections.<String>emptyList(), subContents.get(i), result.size() + 1, i + 1, total);
|
||||||
|
chunk.setQuestion(question);
|
||||||
|
chunk.setAnswer(answer);
|
||||||
|
chunk.getOptions().put(RagMetadataKeys.QA_GROUP_ID, "qa-" + qaIndex);
|
||||||
|
result.add(chunk);
|
||||||
|
}
|
||||||
|
return qaIndex + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<RagChunk> buildParagraphChunks(String content, StrategyConfig strategyConfig) {
|
||||||
|
List<RagChunk> result = new ArrayList<RagChunk>();
|
||||||
|
DocumentSplitter splitter = new SimpleDocumentSplitter(safeChunkSize(strategyConfig), safeOverlap(strategyConfig));
|
||||||
|
List<Document> docs = splitter.split(new Document(content));
|
||||||
|
int index = 1;
|
||||||
|
for (Document doc : docs) {
|
||||||
|
result.add(createChunk(RagChunkTypes.PARAGRAPH, "分块 " + index, Collections.<String>emptyList(), doc.getContent(), index, 1, 1));
|
||||||
|
index++;
|
||||||
|
}
|
||||||
|
return postProcess(result);
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<RagChunk> buildRegexChunks(String content, StrategyConfig strategyConfig) {
|
||||||
|
String regex = StringUtil.hasText(strategyConfig.getRegex()) ? strategyConfig.getRegex() : "\\n\\s*\\n";
|
||||||
|
DocumentSplitter splitter = new RegexDocumentSplitter(regex);
|
||||||
|
List<Document> docs = splitter.split(new Document(content));
|
||||||
|
List<RagChunk> result = new ArrayList<RagChunk>();
|
||||||
|
int index = 1;
|
||||||
|
for (Document doc : docs) {
|
||||||
|
result.add(createChunk(RagChunkTypes.PARAGRAPH, "正则分块 " + index, Collections.<String>emptyList(), doc.getContent(), index, 1, 1));
|
||||||
|
index++;
|
||||||
|
}
|
||||||
|
return postProcess(result);
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<String> splitLongContent(String content, Integer chunkSize) {
|
||||||
|
int size = chunkSize == null || chunkSize.intValue() <= 0 ? RagDefaults.CHUNK_SIZE : chunkSize.intValue();
|
||||||
|
String[] paragraphs = content.split("\\n\\s*\\n");
|
||||||
|
List<String> parts = new ArrayList<String>();
|
||||||
|
StringBuilder current = new StringBuilder();
|
||||||
|
for (String paragraph : paragraphs) {
|
||||||
|
String text = paragraph.trim();
|
||||||
|
if (!StringUtil.hasText(text)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (current.length() > 0 && current.length() + text.length() + 2 > size) {
|
||||||
|
parts.add(current.toString().trim());
|
||||||
|
current = new StringBuilder();
|
||||||
|
}
|
||||||
|
if (current.length() > 0) {
|
||||||
|
current.append("\n\n");
|
||||||
|
}
|
||||||
|
current.append(text);
|
||||||
|
}
|
||||||
|
if (current.length() > 0) {
|
||||||
|
parts.add(current.toString().trim());
|
||||||
|
}
|
||||||
|
if (parts.isEmpty()) {
|
||||||
|
parts.add(content);
|
||||||
|
}
|
||||||
|
return parts;
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<RagChunk> postProcess(List<RagChunk> chunks) {
|
||||||
|
List<RagChunk> result = new ArrayList<RagChunk>();
|
||||||
|
Set<String> dedup = new HashSet<String>();
|
||||||
|
int index = 1;
|
||||||
|
for (RagChunk chunk : chunks) {
|
||||||
|
String content = chunk.getContent() == null ? "" : chunk.getContent().trim();
|
||||||
|
if (!StringUtil.hasText(content)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (content.length() < 10 && !RagChunkTypes.QA_PAIR.equals(chunk.getChunkType())) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
String dedupKey = content.replaceAll("\\s+", " ");
|
||||||
|
if (!dedup.add(dedupKey)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
chunk.setChunkId("chunk-" + index);
|
||||||
|
chunk.setCharCount(Integer.valueOf(content.length()));
|
||||||
|
chunk.setTokenEstimate(Integer.valueOf(Math.max(1, content.length() / 4)));
|
||||||
|
result.add(chunk);
|
||||||
|
index++;
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
private RagChunk createChunk(String chunkType,
|
||||||
|
String sourceLabel,
|
||||||
|
List<String> headingPath,
|
||||||
|
String content,
|
||||||
|
int index,
|
||||||
|
int partNo,
|
||||||
|
int partTotal) {
|
||||||
|
RagChunk chunk = new RagChunk();
|
||||||
|
chunk.setChunkId("chunk-" + index);
|
||||||
|
chunk.setChunkType(chunkType);
|
||||||
|
chunk.setSourceLabel(sourceLabel);
|
||||||
|
chunk.setHeadingPath(new ArrayList<String>(headingPath));
|
||||||
|
chunk.setContent(content.trim());
|
||||||
|
chunk.setPartNo(Integer.valueOf(partNo));
|
||||||
|
chunk.setPartTotal(Integer.valueOf(partTotal));
|
||||||
|
if (!headingPath.isEmpty()) {
|
||||||
|
chunk.getOptions().put(RagMetadataKeys.HEADING_PATH, new ArrayList<String>(headingPath));
|
||||||
|
}
|
||||||
|
if (RagChunkTypes.SECTION.equals(chunkType)) {
|
||||||
|
chunk.getOptions().put(RagMetadataKeys.SOURCE_LABEL, sourceLabel);
|
||||||
|
}
|
||||||
|
return chunk;
|
||||||
|
}
|
||||||
|
|
||||||
|
private int safeChunkSize(StrategyConfig strategyConfig) {
|
||||||
|
Integer chunkSize = strategyConfig.getChunkSize();
|
||||||
|
return chunkSize == null || chunkSize.intValue() <= 0 ? RagDefaults.CHUNK_SIZE : chunkSize.intValue();
|
||||||
|
}
|
||||||
|
|
||||||
|
private int safeOverlap(StrategyConfig strategyConfig) {
|
||||||
|
Integer overlapSize = strategyConfig.getOverlapSize();
|
||||||
|
return overlapSize == null || overlapSize.intValue() < 0 ? RagDefaults.OVERLAP_SIZE : overlapSize.intValue();
|
||||||
|
}
|
||||||
|
|
||||||
|
private String joinAndTrim(List<String> lines) {
|
||||||
|
String value = String.join("\n", lines).trim();
|
||||||
|
return value.replaceAll("\\n{3,}", "\n\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<String> copyPath(Deque<HeadingLevel> stack) {
|
||||||
|
List<String> path = new ArrayList<String>();
|
||||||
|
for (HeadingLevel item : stack) {
|
||||||
|
path.add(0, item.title);
|
||||||
|
}
|
||||||
|
return path;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class SectionChunk {
|
||||||
|
private final List<String> headingPath;
|
||||||
|
private final String sourceLabel;
|
||||||
|
private final List<String> lines = new ArrayList<String>();
|
||||||
|
|
||||||
|
private SectionChunk(List<String> headingPath, String sourceLabel) {
|
||||||
|
this.headingPath = headingPath;
|
||||||
|
this.sourceLabel = sourceLabel;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class HeadingLevel {
|
||||||
|
private final int level;
|
||||||
|
private final String title;
|
||||||
|
|
||||||
|
private HeadingLevel(int level, String title) {
|
||||||
|
this.level = level;
|
||||||
|
this.title = title;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class OutlineHeading {
|
||||||
|
private static final Pattern CHINESE_CHAPTER = Pattern.compile("^第[一二三四五六七八九十百零两0-9]+[章节篇部分卷]\\s*(.*)$");
|
||||||
|
private static final Pattern CHINESE_SECTION = Pattern.compile("^([一二三四五六七八九十百零]+[、..])\\s*(\\S+.*)$");
|
||||||
|
private static final Pattern CHINESE_SUBSECTION = Pattern.compile("^[((]([一二三四五六七八九十百零0-9]+)[))]\\s*(\\S+.*)$");
|
||||||
|
private static final Pattern NUMERIC_SECTION = Pattern.compile("^([0-9]+(?:\\.[0-9]+){0,4})\\s+(\\S+.*)$");
|
||||||
|
private static final Pattern ENGLISH_SECTION = Pattern.compile("^(Chapter|Section|Part)\\s+([0-9IVXLC]+)(.*)$", Pattern.CASE_INSENSITIVE);
|
||||||
|
private static final Pattern ENGLISH_ROMAN = Pattern.compile("^([IVXLC]+)[.、)\\s-]+(\\S+.*)$");
|
||||||
|
|
||||||
|
private final int level;
|
||||||
|
private final String title;
|
||||||
|
|
||||||
|
private OutlineHeading(int level, String title) {
|
||||||
|
this.level = level;
|
||||||
|
this.title = title;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static OutlineHeading parse(String line) {
|
||||||
|
Matcher matcher = CHINESE_CHAPTER.matcher(line);
|
||||||
|
if (matcher.matches()) {
|
||||||
|
return new OutlineHeading(1, line.trim());
|
||||||
|
}
|
||||||
|
matcher = CHINESE_SECTION.matcher(line);
|
||||||
|
if (matcher.matches()) {
|
||||||
|
return new OutlineHeading(2, line.trim());
|
||||||
|
}
|
||||||
|
matcher = CHINESE_SUBSECTION.matcher(line);
|
||||||
|
if (matcher.matches()) {
|
||||||
|
return new OutlineHeading(3, line.trim());
|
||||||
|
}
|
||||||
|
matcher = NUMERIC_SECTION.matcher(line);
|
||||||
|
if (matcher.matches()) {
|
||||||
|
String code = matcher.group(1);
|
||||||
|
int level = code.split("\\.").length;
|
||||||
|
return new OutlineHeading(level, line.trim());
|
||||||
|
}
|
||||||
|
matcher = ENGLISH_SECTION.matcher(line);
|
||||||
|
if (matcher.matches()) {
|
||||||
|
String prefix = matcher.group(1).toLowerCase();
|
||||||
|
int level = "chapter".equals(prefix) ? 1 : ("section".equals(prefix) ? 2 : 1);
|
||||||
|
return new OutlineHeading(level, line.trim());
|
||||||
|
}
|
||||||
|
matcher = ENGLISH_ROMAN.matcher(line);
|
||||||
|
if (matcher.matches()) {
|
||||||
|
return new OutlineHeading(2, line.trim());
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,92 @@
|
|||||||
|
package com.easyagents.rag.ingestion.model;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.LinkedHashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
public class AnalysisResult implements Serializable {
|
||||||
|
|
||||||
|
private String sourceFormat;
|
||||||
|
private String normalizedContent;
|
||||||
|
private String recommendedStructureType;
|
||||||
|
private String recommendedStrategyCode;
|
||||||
|
private String recommendedStrategyLabel;
|
||||||
|
private Double confidence;
|
||||||
|
private List<String> reasons = new ArrayList<String>();
|
||||||
|
private List<CandidateStrategy> candidateStrategies = new ArrayList<CandidateStrategy>();
|
||||||
|
private Map<String, Object> features = new LinkedHashMap<String, Object>();
|
||||||
|
|
||||||
|
public String getSourceFormat() {
|
||||||
|
return sourceFormat;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setSourceFormat(String sourceFormat) {
|
||||||
|
this.sourceFormat = sourceFormat;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getNormalizedContent() {
|
||||||
|
return normalizedContent;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setNormalizedContent(String normalizedContent) {
|
||||||
|
this.normalizedContent = normalizedContent;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getRecommendedStructureType() {
|
||||||
|
return recommendedStructureType;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setRecommendedStructureType(String recommendedStructureType) {
|
||||||
|
this.recommendedStructureType = recommendedStructureType;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getRecommendedStrategyCode() {
|
||||||
|
return recommendedStrategyCode;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setRecommendedStrategyCode(String recommendedStrategyCode) {
|
||||||
|
this.recommendedStrategyCode = recommendedStrategyCode;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getRecommendedStrategyLabel() {
|
||||||
|
return recommendedStrategyLabel;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setRecommendedStrategyLabel(String recommendedStrategyLabel) {
|
||||||
|
this.recommendedStrategyLabel = recommendedStrategyLabel;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Double getConfidence() {
|
||||||
|
return confidence;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setConfidence(Double confidence) {
|
||||||
|
this.confidence = confidence;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<String> getReasons() {
|
||||||
|
return reasons;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setReasons(List<String> reasons) {
|
||||||
|
this.reasons = reasons;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<CandidateStrategy> getCandidateStrategies() {
|
||||||
|
return candidateStrategies;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setCandidateStrategies(List<CandidateStrategy> candidateStrategies) {
|
||||||
|
this.candidateStrategies = candidateStrategies;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Map<String, Object> getFeatures() {
|
||||||
|
return features;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setFeatures(Map<String, Object> features) {
|
||||||
|
this.features = features;
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,43 @@
|
|||||||
|
package com.easyagents.rag.ingestion.model;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
public class CandidateStrategy implements Serializable {
|
||||||
|
|
||||||
|
private String strategyCode;
|
||||||
|
private String strategyLabel;
|
||||||
|
private Double score;
|
||||||
|
|
||||||
|
public CandidateStrategy() {
|
||||||
|
}
|
||||||
|
|
||||||
|
public CandidateStrategy(String strategyCode, String strategyLabel, Double score) {
|
||||||
|
this.strategyCode = strategyCode;
|
||||||
|
this.strategyLabel = strategyLabel;
|
||||||
|
this.score = score;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getStrategyCode() {
|
||||||
|
return strategyCode;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setStrategyCode(String strategyCode) {
|
||||||
|
this.strategyCode = strategyCode;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getStrategyLabel() {
|
||||||
|
return strategyLabel;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setStrategyLabel(String strategyLabel) {
|
||||||
|
this.strategyLabel = strategyLabel;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Double getScore() {
|
||||||
|
return score;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setScore(Double score) {
|
||||||
|
this.score = score;
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,79 @@
|
|||||||
|
package com.easyagents.rag.ingestion.model;
|
||||||
|
|
||||||
|
import com.easyagents.rag.core.RagDefaults;
|
||||||
|
import com.easyagents.rag.core.RagStrategyCodes;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
public class StrategyConfig implements Serializable {
|
||||||
|
|
||||||
|
private String strategyCode = RagStrategyCodes.AUTO;
|
||||||
|
private Integer chunkSize = RagDefaults.CHUNK_SIZE;
|
||||||
|
private Integer overlapSize = RagDefaults.OVERLAP_SIZE;
|
||||||
|
private String regex;
|
||||||
|
private Integer rowsPerChunk = RagDefaults.ROWS_PER_CHUNK;
|
||||||
|
private Integer mdSplitterLevel = RagDefaults.MD_SPLITTER_LEVEL;
|
||||||
|
|
||||||
|
public static StrategyConfig defaults() {
|
||||||
|
return new StrategyConfig();
|
||||||
|
}
|
||||||
|
|
||||||
|
public StrategyConfig copy() {
|
||||||
|
StrategyConfig copy = new StrategyConfig();
|
||||||
|
copy.setStrategyCode(this.strategyCode);
|
||||||
|
copy.setChunkSize(this.chunkSize);
|
||||||
|
copy.setOverlapSize(this.overlapSize);
|
||||||
|
copy.setRegex(this.regex);
|
||||||
|
copy.setRowsPerChunk(this.rowsPerChunk);
|
||||||
|
copy.setMdSplitterLevel(this.mdSplitterLevel);
|
||||||
|
return copy;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getStrategyCode() {
|
||||||
|
return strategyCode;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setStrategyCode(String strategyCode) {
|
||||||
|
this.strategyCode = strategyCode;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Integer getChunkSize() {
|
||||||
|
return chunkSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setChunkSize(Integer chunkSize) {
|
||||||
|
this.chunkSize = chunkSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Integer getOverlapSize() {
|
||||||
|
return overlapSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setOverlapSize(Integer overlapSize) {
|
||||||
|
this.overlapSize = overlapSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getRegex() {
|
||||||
|
return regex;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setRegex(String regex) {
|
||||||
|
this.regex = regex;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Integer getRowsPerChunk() {
|
||||||
|
return rowsPerChunk;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setRowsPerChunk(Integer rowsPerChunk) {
|
||||||
|
this.rowsPerChunk = rowsPerChunk;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Integer getMdSplitterLevel() {
|
||||||
|
return mdSplitterLevel;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setMdSplitterLevel(Integer mdSplitterLevel) {
|
||||||
|
this.mdSplitterLevel = mdSplitterLevel;
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,133 @@
|
|||||||
|
package com.easyagents.rag.ingestion.recommend;
|
||||||
|
|
||||||
|
import com.easyagents.rag.core.RagStrategyCodes;
|
||||||
|
import com.easyagents.rag.core.RagStructureTypes;
|
||||||
|
import com.easyagents.rag.ingestion.model.AnalysisResult;
|
||||||
|
import com.easyagents.rag.ingestion.model.CandidateStrategy;
|
||||||
|
|
||||||
|
import java.math.BigDecimal;
|
||||||
|
import java.math.RoundingMode;
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
public class SplitStrategyRecommender {
|
||||||
|
|
||||||
|
public AnalysisResult recommend(AnalysisResult analysisResult) {
|
||||||
|
Map<String, Object> features = analysisResult.getFeatures();
|
||||||
|
String sourceFormat = safeLowercase(analysisResult.getSourceFormat());
|
||||||
|
|
||||||
|
double markdownScore = number(features.get("markdownHeadingCount")) * 12
|
||||||
|
+ number(features.get("markdownLevelVariety")) * 8
|
||||||
|
+ ("md".equals(sourceFormat) ? 20 : 0);
|
||||||
|
double outlineScore = number(features.get("outlineHeadingCount")) * 10
|
||||||
|
+ (("pdf".equals(sourceFormat) || "docx".equals(sourceFormat)) ? 5 : 0)
|
||||||
|
- number(features.get("tocLineCount")) * 4;
|
||||||
|
double qaScore = number(features.get("qaQuestionCount")) * 10
|
||||||
|
+ number(features.get("qaAnswerCount")) * 10
|
||||||
|
+ number(features.get("pairedQaCount")) * 18;
|
||||||
|
double plainScore = 18
|
||||||
|
+ number(features.get("paragraphCount")) * 2
|
||||||
|
+ number(features.get("longParagraphCount")) * 3;
|
||||||
|
|
||||||
|
Map<String, Double> scoreMap = new LinkedHashMap<String, Double>();
|
||||||
|
scoreMap.put(RagStrategyCodes.MARKDOWN_SECTION, Double.valueOf(markdownScore));
|
||||||
|
scoreMap.put(RagStrategyCodes.OUTLINE_SECTION, Double.valueOf(outlineScore));
|
||||||
|
scoreMap.put(RagStrategyCodes.QA_PAIR, Double.valueOf(qaScore));
|
||||||
|
scoreMap.put(RagStrategyCodes.PARAGRAPH_LENGTH, Double.valueOf(plainScore));
|
||||||
|
|
||||||
|
List<Map.Entry<String, Double>> ranking = new ArrayList<Map.Entry<String, Double>>(scoreMap.entrySet());
|
||||||
|
ranking.sort((left, right) -> Double.compare(right.getValue().doubleValue(), left.getValue().doubleValue()));
|
||||||
|
|
||||||
|
Map.Entry<String, Double> best = ranking.get(0);
|
||||||
|
Map.Entry<String, Double> second = ranking.size() > 1 ? ranking.get(1) : best;
|
||||||
|
double confidence = computeConfidence(best.getValue().doubleValue(), second.getValue().doubleValue());
|
||||||
|
|
||||||
|
String recommendedStrategy = confidence < 0.45D ? RagStrategyCodes.PARAGRAPH_LENGTH : best.getKey();
|
||||||
|
analysisResult.setRecommendedStrategyCode(recommendedStrategy);
|
||||||
|
analysisResult.setRecommendedStrategyLabel(toStrategyLabel(recommendedStrategy));
|
||||||
|
analysisResult.setRecommendedStructureType(toStructureType(recommendedStrategy));
|
||||||
|
analysisResult.setConfidence(Double.valueOf(scale(confidence)));
|
||||||
|
analysisResult.setReasons(buildReasons(features, recommendedStrategy, confidence));
|
||||||
|
|
||||||
|
List<CandidateStrategy> candidates = new ArrayList<CandidateStrategy>();
|
||||||
|
for (Map.Entry<String, Double> entry : ranking) {
|
||||||
|
candidates.add(new CandidateStrategy(entry.getKey(), toStrategyLabel(entry.getKey()), Double.valueOf(scale(entry.getValue().doubleValue()))));
|
||||||
|
}
|
||||||
|
analysisResult.setCandidateStrategies(candidates);
|
||||||
|
return analysisResult;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toStructureType(String strategyCode) {
|
||||||
|
if (RagStrategyCodes.MARKDOWN_SECTION.equals(strategyCode)) {
|
||||||
|
return RagStructureTypes.MARKDOWN_HEADING;
|
||||||
|
}
|
||||||
|
if (RagStrategyCodes.OUTLINE_SECTION.equals(strategyCode)) {
|
||||||
|
return RagStructureTypes.OUTLINE_SECTION;
|
||||||
|
}
|
||||||
|
if (RagStrategyCodes.QA_PAIR.equals(strategyCode)) {
|
||||||
|
return RagStructureTypes.QA_PAIR;
|
||||||
|
}
|
||||||
|
return RagStructureTypes.PLAIN_PARAGRAPH;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toStrategyLabel(String strategyCode) {
|
||||||
|
if (RagStrategyCodes.MARKDOWN_SECTION.equals(strategyCode)) {
|
||||||
|
return "Markdown 标题拆分";
|
||||||
|
}
|
||||||
|
if (RagStrategyCodes.OUTLINE_SECTION.equals(strategyCode)) {
|
||||||
|
return "章节标题拆分";
|
||||||
|
}
|
||||||
|
if (RagStrategyCodes.QA_PAIR.equals(strategyCode)) {
|
||||||
|
return "问答对拆分";
|
||||||
|
}
|
||||||
|
if (RagStrategyCodes.CUSTOM_REGEX.equals(strategyCode)) {
|
||||||
|
return "自定义正则拆分";
|
||||||
|
}
|
||||||
|
if (RagStrategyCodes.AUTO.equals(strategyCode)) {
|
||||||
|
return "自动推荐";
|
||||||
|
}
|
||||||
|
return "自然段长度拆分";
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<String> buildReasons(Map<String, Object> features, String strategyCode, double confidence) {
|
||||||
|
List<String> reasons = new ArrayList<String>();
|
||||||
|
if (RagStrategyCodes.MARKDOWN_SECTION.equals(strategyCode)) {
|
||||||
|
reasons.add("检测到 Markdown 标题结构,适合按标题层级拆分");
|
||||||
|
reasons.add("标题层级数:" + number(features.get("markdownLevelVariety")) + ",标题数量:" + number(features.get("markdownHeadingCount")));
|
||||||
|
return reasons;
|
||||||
|
}
|
||||||
|
if (RagStrategyCodes.OUTLINE_SECTION.equals(strategyCode)) {
|
||||||
|
reasons.add("检测到中英文标题/章节编号,适合按章节拆分");
|
||||||
|
reasons.add("章节标题数量:" + number(features.get("outlineHeadingCount")));
|
||||||
|
return reasons;
|
||||||
|
}
|
||||||
|
if (RagStrategyCodes.QA_PAIR.equals(strategyCode)) {
|
||||||
|
reasons.add("检测到问答结构,适合按一问一答拆分");
|
||||||
|
reasons.add("问题数量:" + number(features.get("qaQuestionCount")) + ",成对问答数量:" + number(features.get("pairedQaCount")));
|
||||||
|
return reasons;
|
||||||
|
}
|
||||||
|
reasons.add("结构特征不够集中,回退为自然段长度拆分");
|
||||||
|
reasons.add("推荐置信度:" + scale(confidence));
|
||||||
|
return reasons;
|
||||||
|
}
|
||||||
|
|
||||||
|
private double computeConfidence(double bestScore, double secondScore) {
|
||||||
|
double delta = Math.max(0D, bestScore - secondScore);
|
||||||
|
double base = Math.min(1D, bestScore / 100D);
|
||||||
|
return Math.min(1D, Math.max(0.25D, base * 0.6D + Math.min(0.4D, delta / 50D)));
|
||||||
|
}
|
||||||
|
|
||||||
|
private double number(Object value) {
|
||||||
|
if (value instanceof Number) {
|
||||||
|
return ((Number) value).doubleValue();
|
||||||
|
}
|
||||||
|
return 0D;
|
||||||
|
}
|
||||||
|
|
||||||
|
private String safeLowercase(String value) {
|
||||||
|
return value == null ? "" : value.toLowerCase(Locale.ROOT);
|
||||||
|
}
|
||||||
|
|
||||||
|
private double scale(double value) {
|
||||||
|
return BigDecimal.valueOf(value).setScale(2, RoundingMode.HALF_UP).doubleValue();
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,80 @@
|
|||||||
|
package com.easyagents.rag.ingestion;
|
||||||
|
|
||||||
|
import com.easyagents.rag.core.RagChunk;
|
||||||
|
import com.easyagents.rag.core.RagChunkTypes;
|
||||||
|
import com.easyagents.rag.core.RagStrategyCodes;
|
||||||
|
import com.easyagents.rag.ingestion.analysis.DocumentStructureAnalyzer;
|
||||||
|
import com.easyagents.rag.ingestion.chunk.RagSplitStrategyRegistry;
|
||||||
|
import com.easyagents.rag.ingestion.model.AnalysisResult;
|
||||||
|
import com.easyagents.rag.ingestion.model.StrategyConfig;
|
||||||
|
import com.easyagents.rag.ingestion.recommend.SplitStrategyRecommender;
|
||||||
|
import org.junit.Assert;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public class RagIngestionPipelineTest {
|
||||||
|
|
||||||
|
private final DocumentStructureAnalyzer analyzer = new DocumentStructureAnalyzer();
|
||||||
|
private final SplitStrategyRecommender recommender = new SplitStrategyRecommender();
|
||||||
|
private final RagSplitStrategyRegistry registry = new RagSplitStrategyRegistry();
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void shouldRecommendMarkdownStrategy() {
|
||||||
|
String markdown = "# Quick Start\n"
|
||||||
|
+ "Welcome\n\n"
|
||||||
|
+ "## Install\n"
|
||||||
|
+ "Run npm install\n\n"
|
||||||
|
+ "## Usage\n"
|
||||||
|
+ "Run pnpm dev";
|
||||||
|
|
||||||
|
AnalysisResult analysis = recommender.recommend(analyzer.analyze(markdown, "md"));
|
||||||
|
|
||||||
|
Assert.assertEquals(RagStrategyCodes.MARKDOWN_SECTION, analysis.getRecommendedStrategyCode());
|
||||||
|
Assert.assertTrue(analysis.getConfidence().doubleValue() > 0.4D);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void shouldRecommendQaStrategyForEnglishAndChinese() {
|
||||||
|
String qa = "Q: How to reset password?\n"
|
||||||
|
+ "A: Open admin page and click reset.\n\n"
|
||||||
|
+ "问:默认密码是什么?\n"
|
||||||
|
+ "答:由系统配置统一决定。";
|
||||||
|
|
||||||
|
AnalysisResult analysis = recommender.recommend(analyzer.analyze(qa, "txt"));
|
||||||
|
|
||||||
|
Assert.assertEquals(RagStrategyCodes.QA_PAIR, analysis.getRecommendedStrategyCode());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void shouldSplitOutlineDocumentByHeadingPath() {
|
||||||
|
String outline = "第1章 总则\n适用范围说明。\n\n1.1 目标\n定义系统目标。\n\n1.2 范围\n定义系统范围。";
|
||||||
|
AnalysisResult analysis = recommender.recommend(analyzer.analyze(outline, "docx"));
|
||||||
|
StrategyConfig config = StrategyConfig.defaults();
|
||||||
|
config.setStrategyCode(RagStrategyCodes.OUTLINE_SECTION);
|
||||||
|
|
||||||
|
List<RagChunk> chunks = registry.split(analysis, config);
|
||||||
|
|
||||||
|
Assert.assertEquals(3, chunks.size());
|
||||||
|
Assert.assertEquals("第1章 总则", chunks.get(0).getSourceLabel());
|
||||||
|
Assert.assertEquals(2, chunks.get(1).getHeadingPath().size());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void shouldSplitQaDocumentByPair() {
|
||||||
|
String qa = "Q: How to reset password?\n"
|
||||||
|
+ "A: Open admin page and click reset.\n\n"
|
||||||
|
+ "问:默认密码是什么?\n"
|
||||||
|
+ "答:由系统配置统一决定。";
|
||||||
|
AnalysisResult analysis = recommender.recommend(analyzer.analyze(qa, "txt"));
|
||||||
|
StrategyConfig config = StrategyConfig.defaults();
|
||||||
|
config.setStrategyCode(RagStrategyCodes.QA_PAIR);
|
||||||
|
|
||||||
|
List<RagChunk> chunks = registry.split(analysis, config);
|
||||||
|
|
||||||
|
Assert.assertEquals(2, chunks.size());
|
||||||
|
Assert.assertEquals(RagChunkTypes.QA_PAIR, chunks.get(0).getChunkType());
|
||||||
|
Assert.assertTrue(chunks.get(0).getContent().contains("问题"));
|
||||||
|
Assert.assertTrue(chunks.get(1).getAnswer().contains("系统配置"));
|
||||||
|
}
|
||||||
|
}
|
||||||
32
easy-agents-rag/easy-agents-rag-ocr/pom.xml
Normal file
32
easy-agents-rag/easy-agents-rag-ocr/pom.xml
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||||
|
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||||
|
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||||
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
|
||||||
|
<parent>
|
||||||
|
<groupId>com.easyagents</groupId>
|
||||||
|
<artifactId>easy-agents-rag</artifactId>
|
||||||
|
<version>${revision}</version>
|
||||||
|
</parent>
|
||||||
|
|
||||||
|
<artifactId>easy-agents-rag-ocr</artifactId>
|
||||||
|
<name>easy-agents-rag-ocr</name>
|
||||||
|
|
||||||
|
<properties>
|
||||||
|
<maven.compiler.source>8</maven.compiler.source>
|
||||||
|
<maven.compiler.target>8</maven.compiler.target>
|
||||||
|
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||||
|
</properties>
|
||||||
|
|
||||||
|
<dependencies>
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.easyagents</groupId>
|
||||||
|
<artifactId>easy-agents-core</artifactId>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.easyagents</groupId>
|
||||||
|
<artifactId>easy-agents-rag-core</artifactId>
|
||||||
|
</dependency>
|
||||||
|
</dependencies>
|
||||||
|
</project>
|
||||||
36
easy-agents-rag/easy-agents-rag-retrieval/pom.xml
Normal file
36
easy-agents-rag/easy-agents-rag-retrieval/pom.xml
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||||
|
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||||
|
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||||
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
|
||||||
|
<parent>
|
||||||
|
<groupId>com.easyagents</groupId>
|
||||||
|
<artifactId>easy-agents-rag</artifactId>
|
||||||
|
<version>${revision}</version>
|
||||||
|
</parent>
|
||||||
|
|
||||||
|
<artifactId>easy-agents-rag-retrieval</artifactId>
|
||||||
|
<name>easy-agents-rag-retrieval</name>
|
||||||
|
|
||||||
|
<properties>
|
||||||
|
<maven.compiler.source>8</maven.compiler.source>
|
||||||
|
<maven.compiler.target>8</maven.compiler.target>
|
||||||
|
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||||
|
</properties>
|
||||||
|
|
||||||
|
<dependencies>
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.easyagents</groupId>
|
||||||
|
<artifactId>easy-agents-core</artifactId>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.easyagents</groupId>
|
||||||
|
<artifactId>easy-agents-rag-core</artifactId>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.easyagents</groupId>
|
||||||
|
<artifactId>easy-agents-rag-enhance</artifactId>
|
||||||
|
</dependency>
|
||||||
|
</dependencies>
|
||||||
|
</project>
|
||||||
24
easy-agents-rag/pom.xml
Normal file
24
easy-agents-rag/pom.xml
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||||
|
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||||
|
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||||
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
|
||||||
|
<parent>
|
||||||
|
<groupId>com.easyagents</groupId>
|
||||||
|
<artifactId>easy-agents</artifactId>
|
||||||
|
<version>${revision}</version>
|
||||||
|
</parent>
|
||||||
|
|
||||||
|
<artifactId>easy-agents-rag</artifactId>
|
||||||
|
<packaging>pom</packaging>
|
||||||
|
<name>easy-agents-rag</name>
|
||||||
|
|
||||||
|
<modules>
|
||||||
|
<module>easy-agents-rag-core</module>
|
||||||
|
<module>easy-agents-rag-ingestion</module>
|
||||||
|
<module>easy-agents-rag-ocr</module>
|
||||||
|
<module>easy-agents-rag-enhance</module>
|
||||||
|
<module>easy-agents-rag-retrieval</module>
|
||||||
|
</modules>
|
||||||
|
</project>
|
||||||
@@ -51,6 +51,15 @@
|
|||||||
<artifactId>easy-agents-bom</artifactId>
|
<artifactId>easy-agents-bom</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.easyagents</groupId>
|
||||||
|
<artifactId>easy-agents-rag-core</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.easyagents</groupId>
|
||||||
|
<artifactId>easy-agents-rag-ingestion</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.springframework.boot</groupId>
|
<groupId>org.springframework.boot</groupId>
|
||||||
|
|||||||
@@ -0,0 +1,42 @@
|
|||||||
|
package com.easyagents.spring.boot.rag.ingestion;
|
||||||
|
|
||||||
|
import com.easyagents.rag.ingestion.DefaultRagIngestionService;
|
||||||
|
import com.easyagents.rag.ingestion.RagIngestionService;
|
||||||
|
import com.easyagents.rag.ingestion.analysis.DocumentStructureAnalyzer;
|
||||||
|
import com.easyagents.rag.ingestion.chunk.RagSplitStrategyRegistry;
|
||||||
|
import com.easyagents.rag.ingestion.recommend.SplitStrategyRecommender;
|
||||||
|
import org.springframework.boot.autoconfigure.condition.ConditionalOnClass;
|
||||||
|
import org.springframework.boot.autoconfigure.condition.ConditionalOnMissingBean;
|
||||||
|
import org.springframework.context.annotation.Bean;
|
||||||
|
import org.springframework.context.annotation.Configuration;
|
||||||
|
|
||||||
|
@ConditionalOnClass(RagIngestionService.class)
|
||||||
|
@Configuration(proxyBeanMethods = false)
|
||||||
|
public class RagIngestionAutoConfiguration {
|
||||||
|
|
||||||
|
@Bean
|
||||||
|
@ConditionalOnMissingBean
|
||||||
|
public DocumentStructureAnalyzer documentStructureAnalyzer() {
|
||||||
|
return new DocumentStructureAnalyzer();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Bean
|
||||||
|
@ConditionalOnMissingBean
|
||||||
|
public SplitStrategyRecommender splitStrategyRecommender() {
|
||||||
|
return new SplitStrategyRecommender();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Bean
|
||||||
|
@ConditionalOnMissingBean
|
||||||
|
public RagSplitStrategyRegistry ragSplitStrategyRegistry() {
|
||||||
|
return new RagSplitStrategyRegistry();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Bean
|
||||||
|
@ConditionalOnMissingBean
|
||||||
|
public RagIngestionService ragIngestionService(DocumentStructureAnalyzer documentStructureAnalyzer,
|
||||||
|
SplitStrategyRecommender splitStrategyRecommender,
|
||||||
|
RagSplitStrategyRegistry ragSplitStrategyRegistry) {
|
||||||
|
return new DefaultRagIngestionService(documentStructureAnalyzer, splitStrategyRecommender, ragSplitStrategyRegistry);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,10 +1,11 @@
|
|||||||
org.springframework.boot.autoconfigure.EnableAutoConfiguration=\
|
org.springframework.boot.autoconfigure.EnableAutoConfiguration=\
|
||||||
com.easyagents.spring.boot.chatModel.chatglm.ChatglmAutoConfiguration,\
|
com.easyagents.spring.boot.llm.openai.OpenAIAutoConfiguration,\
|
||||||
com.easyagents.spring.boot.chatModel.openai.OpenAIAutoConfiguration,\
|
com.easyagents.spring.boot.llm.qwen.QwenAutoConfiguration,\
|
||||||
com.easyagents.spring.boot.chatModel.qwen.QwenAutoConfiguration,\
|
|
||||||
com.easyagents.spring.boot.chatModel.spark.SparkAutoConfiguration,\
|
|
||||||
com.easyagents.spring.boot.store.aliyun.AliyunAutoConfiguration,\
|
com.easyagents.spring.boot.store.aliyun.AliyunAutoConfiguration,\
|
||||||
com.easyagents.spring.boot.store.qcloud.QCloudStoreAutoConfiguration,\
|
com.easyagents.spring.boot.store.qcloud.QCloudStoreAutoConfiguration,\
|
||||||
com.easyagents.spring.boot.chatModel.ollama.OllamaAutoConfiguration,\
|
com.easyagents.spring.boot.llm.ollama.OllamaAutoConfiguration,\
|
||||||
com.easyagents.spring.boot.chatModel.deepseek.DeepSeekAutoConfiguration,\
|
com.easyagents.spring.boot.llm.deepseek.DeepSeekAutoConfiguration,\
|
||||||
com.easyagents.spring.boot.store.chroma.ChromaAutoConfiguration
|
com.easyagents.spring.boot.store.chroma.ChromaAutoConfiguration,\
|
||||||
|
com.easyagents.spring.boot.store.elasticsearch.ElasticSearchAutoConfiguration,\
|
||||||
|
com.easyagents.spring.boot.store.opensearch.OpenSearchAutoConfiguration,\
|
||||||
|
com.easyagents.spring.boot.rag.ingestion.RagIngestionAutoConfiguration
|
||||||
|
|||||||
@@ -1,8 +1,10 @@
|
|||||||
com.easyagents.spring.boot.chatModel.chatglm.ChatglmAutoConfiguration
|
com.easyagents.spring.boot.llm.openai.OpenAIAutoConfiguration
|
||||||
com.easyagents.spring.boot.chatModel.openai.OpenAIAutoConfiguration
|
com.easyagents.spring.boot.llm.qwen.QwenAutoConfiguration
|
||||||
com.easyagents.spring.boot.chatModel.qwen.QwenAutoConfiguration
|
|
||||||
com.easyagents.spring.boot.chatModel.spark.SparkAutoConfiguration
|
|
||||||
com.easyagents.spring.boot.store.aliyun.AliyunAutoConfiguration
|
com.easyagents.spring.boot.store.aliyun.AliyunAutoConfiguration
|
||||||
com.easyagents.spring.boot.store.qcloud.QCloudStoreAutoConfiguration
|
com.easyagents.spring.boot.store.qcloud.QCloudStoreAutoConfiguration
|
||||||
com.easyagents.spring.boot.chatModel.ollama.OllamaAutoConfiguration
|
com.easyagents.spring.boot.llm.ollama.OllamaAutoConfiguration
|
||||||
|
com.easyagents.spring.boot.llm.deepseek.DeepSeekAutoConfiguration
|
||||||
com.easyagents.spring.boot.store.chroma.ChromaAutoConfiguration
|
com.easyagents.spring.boot.store.chroma.ChromaAutoConfiguration
|
||||||
|
com.easyagents.spring.boot.store.elasticsearch.ElasticSearchAutoConfiguration
|
||||||
|
com.easyagents.spring.boot.store.opensearch.OpenSearchAutoConfiguration
|
||||||
|
com.easyagents.spring.boot.rag.ingestion.RagIngestionAutoConfiguration
|
||||||
|
|||||||
31
pom.xml
31
pom.xml
@@ -16,6 +16,7 @@
|
|||||||
<modules>
|
<modules>
|
||||||
<module>easy-agents-bom</module>
|
<module>easy-agents-bom</module>
|
||||||
<module>easy-agents-core</module>
|
<module>easy-agents-core</module>
|
||||||
|
<module>easy-agents-rag</module>
|
||||||
<module>easy-agents-chat</module>
|
<module>easy-agents-chat</module>
|
||||||
<module>easy-agents-store</module>
|
<module>easy-agents-store</module>
|
||||||
<module>easy-agents-spring-boot-starter</module>
|
<module>easy-agents-spring-boot-starter</module>
|
||||||
@@ -118,6 +119,36 @@
|
|||||||
<version>${revision}</version>
|
<version>${revision}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.easyagents</groupId>
|
||||||
|
<artifactId>easy-agents-rag-core</artifactId>
|
||||||
|
<version>${revision}</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.easyagents</groupId>
|
||||||
|
<artifactId>easy-agents-rag-ingestion</artifactId>
|
||||||
|
<version>${revision}</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.easyagents</groupId>
|
||||||
|
<artifactId>easy-agents-rag-ocr</artifactId>
|
||||||
|
<version>${revision}</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.easyagents</groupId>
|
||||||
|
<artifactId>easy-agents-rag-enhance</artifactId>
|
||||||
|
<version>${revision}</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.easyagents</groupId>
|
||||||
|
<artifactId>easy-agents-rag-retrieval</artifactId>
|
||||||
|
<version>${revision}</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.easyagents</groupId>
|
<groupId>com.easyagents</groupId>
|
||||||
<artifactId>easy-agents-bom</artifactId>
|
<artifactId>easy-agents-bom</artifactId>
|
||||||
|
|||||||
Reference in New Issue
Block a user