Compare commits

...

2 Commits

Author SHA1 Message Date
941995d1b8 feat: RAG分块策略增强 2026-03-29 17:28:12 +08:00
2f20064ee1 fix: 收紧 starter 自动装配并修复默认连接问题
- 为 llm 与 store 自动配置增加显式属性门槛

- 修复 ollama think 为空时的启动空指针

- 补充 starter 条件装配测试
2026-03-25 15:26:42 +08:00
39 changed files with 1790 additions and 16 deletions

View File

@@ -56,6 +56,30 @@
</exclusions>
</dependency>
<dependency>
<groupId>com.easyagents</groupId>
<artifactId>easy-agents-rag-core</artifactId>
</dependency>
<dependency>
<groupId>com.easyagents</groupId>
<artifactId>easy-agents-rag-ingestion</artifactId>
</dependency>
<dependency>
<groupId>com.easyagents</groupId>
<artifactId>easy-agents-rag-ocr</artifactId>
</dependency>
<dependency>
<groupId>com.easyagents</groupId>
<artifactId>easy-agents-rag-enhance</artifactId>
</dependency>
<dependency>
<groupId>com.easyagents</groupId>
<artifactId>easy-agents-rag-retrieval</artifactId>
</dependency>
<!--image model start-->
<dependency>

View File

@@ -0,0 +1,102 @@
# easy-agents-rag 技术规划
## 目标
`easy-agents-rag` 用于承载 Easy-Agents 的 RAG 领域能力,逐步将知识入库、文档预处理、分块、索引增强、检索增强等能力从业务工程中抽离,形成可复用的框架层模块。
当前阶段先完成模块骨架建设,并优先承接本次文档导入链路中的预处理与分块能力迁移。
## 模块规划
### `easy-agents-rag-core`
定位RAG 域共享契约层。
负责内容:
- 通用常量与元数据 key
- 结构类型、策略类型、chunk 类型定义
- 少量稳定共享模型与接口
不负责内容:
- 具体 OCR 实现
- 具体分块实现
- 具体召回编排
### `easy-agents-rag-ingestion`
定位:入库前处理链路。
负责内容:
- 文本标准化与清洗
- 文档结构分析
- 拆分策略推荐
- 文档分块与 chunk 元信息补全
- 入库前质量控制
当前迁移优先承接:
- 文档结构分析
- 章节/问答/段落分块
- 自动推荐拆分策略
### `easy-agents-rag-ocr`
定位OCR 与版面恢复能力。
负责内容:
- 图片/PDF OCR
- 页面版面解析
- 标题、段落、表格等结构恢复
- PDF 到结构化文本或 Markdown 的转换
### `easy-agents-rag-enhance`
定位:索引前增强能力。
负责内容:
- 图增强
- RAPTOR
- parent-child chunk
- window chunk
- 摘要、关键词、标签等增强信息生成
- 索引前的知识单元增强
### `easy-agents-rag-retrieval`
定位:查询侧增强与召回编排。
负责内容:
- query rewrite / expansion
- hybrid recall 编排
- metadata filter 策略
- graph recall
- rerank 编排
- chunk merge / window expand / context assemble
## 当前迁移范围
本次优先迁移到 `easy-agents-rag-ingestion` 的能力:
- 文档结构分析
- 拆分策略推荐
- 标题型 / QA 型 / 段落型分块
本次不迁移的能力:
- 业务侧预览会话
- 控制器与接口 DTO
- 业务库持久化
- 前端导入页面
这些能力继续留在业务工程,由业务层依赖 `easy-agents-rag` 提供的能力完成编排。
## 后续演进
后续演进顺序建议如下:
1. 完成 `rag-ingestion` 首批能力迁移并稳定对外接口
2. 补充 `rag-ocr`,接入 OCR 与版面恢复
3. 补充 `rag-enhance`支持图增强、RAPTOR、索引增强
4. 补充 `rag-retrieval`,统一查询增强与召回后处理
整体原则:
- `easy-agents-core` 保持基础抽象
- `easy-agents-rag` 聚合 RAG 领域实现
- 业务工程只保留编排、持久化与产品层逻辑

View File

@@ -0,0 +1,28 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>com.easyagents</groupId>
<artifactId>easy-agents-rag</artifactId>
<version>${revision}</version>
</parent>
<artifactId>easy-agents-rag-core</artifactId>
<name>easy-agents-rag-core</name>
<properties>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<dependencies>
<dependency>
<groupId>com.easyagents</groupId>
<artifactId>easy-agents-core</artifactId>
</dependency>
</dependencies>
</project>

View File

@@ -0,0 +1,128 @@
package com.easyagents.rag.core;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
public class RagChunk implements Serializable {
private String chunkId;
private String chunkType;
private String sourceLabel;
private List<String> headingPath = new ArrayList<String>();
private String content;
private String question;
private String answer;
private Integer charCount;
private Integer tokenEstimate;
private Integer partNo = 1;
private Integer partTotal = 1;
private List<String> warnings = new ArrayList<String>();
private Map<String, Object> options = new LinkedHashMap<String, Object>();
public String getChunkId() {
return chunkId;
}
public void setChunkId(String chunkId) {
this.chunkId = chunkId;
}
public String getChunkType() {
return chunkType;
}
public void setChunkType(String chunkType) {
this.chunkType = chunkType;
}
public String getSourceLabel() {
return sourceLabel;
}
public void setSourceLabel(String sourceLabel) {
this.sourceLabel = sourceLabel;
}
public List<String> getHeadingPath() {
return headingPath;
}
public void setHeadingPath(List<String> headingPath) {
this.headingPath = headingPath;
}
public String getContent() {
return content;
}
public void setContent(String content) {
this.content = content;
}
public String getQuestion() {
return question;
}
public void setQuestion(String question) {
this.question = question;
}
public String getAnswer() {
return answer;
}
public void setAnswer(String answer) {
this.answer = answer;
}
public Integer getCharCount() {
return charCount;
}
public void setCharCount(Integer charCount) {
this.charCount = charCount;
}
public Integer getTokenEstimate() {
return tokenEstimate;
}
public void setTokenEstimate(Integer tokenEstimate) {
this.tokenEstimate = tokenEstimate;
}
public Integer getPartNo() {
return partNo;
}
public void setPartNo(Integer partNo) {
this.partNo = partNo;
}
public Integer getPartTotal() {
return partTotal;
}
public void setPartTotal(Integer partTotal) {
this.partTotal = partTotal;
}
public List<String> getWarnings() {
return warnings;
}
public void setWarnings(List<String> warnings) {
this.warnings = warnings;
}
public Map<String, Object> getOptions() {
return options;
}
public void setOptions(Map<String, Object> options) {
this.options = options;
}
}

View File

@@ -0,0 +1,11 @@
package com.easyagents.rag.core;
public final class RagChunkTypes {
private RagChunkTypes() {
}
public static final String SECTION = "section";
public static final String QA_PAIR = "qa_pair";
public static final String PARAGRAPH = "paragraph";
}

View File

@@ -0,0 +1,12 @@
package com.easyagents.rag.core;
public final class RagDefaults {
private RagDefaults() {
}
public static final int CHUNK_SIZE = 512;
public static final int OVERLAP_SIZE = 128;
public static final int MD_SPLITTER_LEVEL = 2;
public static final int ROWS_PER_CHUNK = 1;
}

View File

@@ -0,0 +1,20 @@
package com.easyagents.rag.core;
public final class RagMetadataKeys {
private RagMetadataKeys() {
}
public static final String CHUNK_TYPE = "chunkType";
public static final String SOURCE_LABEL = "sourceLabel";
public static final String HEADING_PATH = "headingPath";
public static final String PAGE_NO = "pageNo";
public static final String CHAR_COUNT = "charCount";
public static final String TOKEN_ESTIMATE = "tokenEstimate";
public static final String QA_QUESTION = "qaQuestion";
public static final String QA_ANSWER = "qaAnswer";
public static final String QA_GROUP_ID = "qaGroupId";
public static final String PART_NO = "partNo";
public static final String PART_TOTAL = "partTotal";
public static final String WARNINGS = "warnings";
}

View File

@@ -0,0 +1,14 @@
package com.easyagents.rag.core;
public final class RagStrategyCodes {
private RagStrategyCodes() {
}
public static final String AUTO = "AUTO";
public static final String MARKDOWN_SECTION = "MARKDOWN_SECTION";
public static final String OUTLINE_SECTION = "OUTLINE_SECTION";
public static final String QA_PAIR = "QA_PAIR";
public static final String PARAGRAPH_LENGTH = "PARAGRAPH_LENGTH";
public static final String CUSTOM_REGEX = "CUSTOM_REGEX";
}

View File

@@ -0,0 +1,12 @@
package com.easyagents.rag.core;
public final class RagStructureTypes {
private RagStructureTypes() {
}
public static final String MARKDOWN_HEADING = "markdown_heading";
public static final String OUTLINE_SECTION = "outline_section";
public static final String QA_PAIR = "qa_pair";
public static final String PLAIN_PARAGRAPH = "plain_paragraph";
}

View File

@@ -0,0 +1,36 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>com.easyagents</groupId>
<artifactId>easy-agents-rag</artifactId>
<version>${revision}</version>
</parent>
<artifactId>easy-agents-rag-enhance</artifactId>
<name>easy-agents-rag-enhance</name>
<properties>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<dependencies>
<dependency>
<groupId>com.easyagents</groupId>
<artifactId>easy-agents-core</artifactId>
</dependency>
<dependency>
<groupId>com.easyagents</groupId>
<artifactId>easy-agents-rag-core</artifactId>
</dependency>
<dependency>
<groupId>com.easyagents</groupId>
<artifactId>easy-agents-rag-ingestion</artifactId>
</dependency>
</dependencies>
</project>

View File

@@ -0,0 +1,37 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>com.easyagents</groupId>
<artifactId>easy-agents-rag</artifactId>
<version>${revision}</version>
</parent>
<artifactId>easy-agents-rag-ingestion</artifactId>
<name>easy-agents-rag-ingestion</name>
<properties>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<dependencies>
<dependency>
<groupId>com.easyagents</groupId>
<artifactId>easy-agents-core</artifactId>
</dependency>
<dependency>
<groupId>com.easyagents</groupId>
<artifactId>easy-agents-rag-core</artifactId>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
</project>

View File

@@ -0,0 +1,41 @@
package com.easyagents.rag.ingestion;
import com.easyagents.rag.core.RagChunk;
import com.easyagents.rag.ingestion.analysis.DocumentStructureAnalyzer;
import com.easyagents.rag.ingestion.chunk.RagSplitStrategyRegistry;
import com.easyagents.rag.ingestion.model.AnalysisResult;
import com.easyagents.rag.ingestion.model.StrategyConfig;
import com.easyagents.rag.ingestion.recommend.SplitStrategyRecommender;
import java.util.List;
public class DefaultRagIngestionService implements RagIngestionService {
private final DocumentStructureAnalyzer documentStructureAnalyzer;
private final SplitStrategyRecommender splitStrategyRecommender;
private final RagSplitStrategyRegistry ragSplitStrategyRegistry;
public DefaultRagIngestionService(DocumentStructureAnalyzer documentStructureAnalyzer,
SplitStrategyRecommender splitStrategyRecommender,
RagSplitStrategyRegistry ragSplitStrategyRegistry) {
this.documentStructureAnalyzer = documentStructureAnalyzer;
this.splitStrategyRecommender = splitStrategyRecommender;
this.ragSplitStrategyRegistry = ragSplitStrategyRegistry;
}
@Override
public AnalysisResult analyze(String rawContent, String sourceFormat) {
AnalysisResult result = documentStructureAnalyzer.analyze(rawContent, sourceFormat);
return splitStrategyRecommender.recommend(result);
}
@Override
public List<RagChunk> split(AnalysisResult analysis, StrategyConfig config) {
return ragSplitStrategyRegistry.split(analysis, config);
}
@Override
public String toStrategyLabel(String strategyCode) {
return splitStrategyRecommender.toStrategyLabel(strategyCode);
}
}

View File

@@ -0,0 +1,16 @@
package com.easyagents.rag.ingestion;
import com.easyagents.rag.core.RagChunk;
import com.easyagents.rag.ingestion.model.AnalysisResult;
import com.easyagents.rag.ingestion.model.StrategyConfig;
import java.util.List;
public interface RagIngestionService {
AnalysisResult analyze(String rawContent, String sourceFormat);
List<RagChunk> split(AnalysisResult analysis, StrategyConfig config);
String toStrategyLabel(String strategyCode);
}

View File

@@ -0,0 +1,234 @@
package com.easyagents.rag.ingestion.analysis;
import com.easyagents.core.util.StringUtil;
import com.easyagents.rag.ingestion.model.AnalysisResult;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class DocumentStructureAnalyzer {
private static final Pattern MARKDOWN_HEADING = Pattern.compile("^#{1,6}\\s+\\S+.*$");
private static final Pattern CHINESE_CHAPTER = Pattern.compile("^第[一二三四五六七八九十百零两0-9]+[章节篇部分卷]\\s*.*$");
private static final Pattern CHINESE_SECTION = Pattern.compile("^[一二三四五六七八九十百零]+[、.]\\s*\\S+.*$");
private static final Pattern CHINESE_SUBSECTION = Pattern.compile("^[(][一二三四五六七八九十百零0-9]+[)]\\s*\\S+.*$");
private static final Pattern NUMERIC_SECTION = Pattern.compile("^[0-9]+(\\.[0-9]+){0,4}\\s+\\S+.*$");
private static final Pattern ENGLISH_SECTION = Pattern.compile("^(Chapter|Section|Part)\\s+[0-9IVXLC]+([.:\\-\\s].*)?$", Pattern.CASE_INSENSITIVE);
private static final Pattern ENGLISH_ROMAN = Pattern.compile("^[IVXLC]+[.、)\\s-]+\\S+.*$");
private static final Pattern QUESTION_LINE = Pattern.compile("^(Q|QUESTION|问|问题|FAQ\\s*[0-9]+)\\s*[.:-]\\s*.+$", Pattern.CASE_INSENSITIVE);
private static final Pattern ANSWER_LINE = Pattern.compile("^(A|ANSWER|答|答案)\\s*[.:-]\\s*.+$", Pattern.CASE_INSENSITIVE);
private static final Pattern PAGE_NUMBER = Pattern.compile("^(第?\\s*\\d+\\s*页|page\\s+\\d+|\\d+)\\s*$", Pattern.CASE_INSENSITIVE);
private static final Pattern TOC_LINE = Pattern.compile("^.{2,80}[.·•…]{2,}\\s*\\d+\\s*$");
public AnalysisResult analyze(String rawContent, String sourceFormat) {
String normalizedContent = normalize(rawContent);
List<String> lines = toLines(normalizedContent);
int markdownHeadingCount = 0;
int outlineHeadingCount = 0;
int qaQuestionCount = 0;
int qaAnswerCount = 0;
int pairedQaCount = 0;
int tocLineCount = 0;
int shortLineCount = 0;
Set<Integer> markdownLevels = new HashSet<Integer>();
for (int i = 0; i < lines.size(); i++) {
String line = lines.get(i);
if (MARKDOWN_HEADING.matcher(line).matches()) {
markdownHeadingCount++;
markdownLevels.add(Integer.valueOf(countMarkdownLevel(line)));
}
if (isOutlineHeading(line)) {
outlineHeadingCount++;
}
if (QUESTION_LINE.matcher(line).matches()) {
qaQuestionCount++;
if (hasAnswerNearby(lines, i)) {
pairedQaCount++;
}
}
if (ANSWER_LINE.matcher(line).matches()) {
qaAnswerCount++;
}
if (TOC_LINE.matcher(line).matches()) {
tocLineCount++;
}
if (line.length() <= 20) {
shortLineCount++;
}
}
int paragraphCount = 0;
int longParagraphCount = 0;
List<String> paragraphs = splitParagraphs(normalizedContent);
for (String paragraph : paragraphs) {
if (StringUtil.hasText(paragraph)) {
paragraphCount++;
if (paragraph.length() > 800) {
longParagraphCount++;
}
}
}
Map<String, Object> features = new LinkedHashMap<String, Object>();
features.put("lineCount", Integer.valueOf(lines.size()));
features.put("paragraphCount", Integer.valueOf(paragraphCount));
features.put("markdownHeadingCount", Integer.valueOf(markdownHeadingCount));
features.put("markdownLevelVariety", Integer.valueOf(markdownLevels.size()));
features.put("outlineHeadingCount", Integer.valueOf(outlineHeadingCount));
features.put("qaQuestionCount", Integer.valueOf(qaQuestionCount));
features.put("qaAnswerCount", Integer.valueOf(qaAnswerCount));
features.put("pairedQaCount", Integer.valueOf(pairedQaCount));
features.put("tocLineCount", Integer.valueOf(tocLineCount));
features.put("shortLineRatio", lines.isEmpty() ? Double.valueOf(0D) : Double.valueOf((double) shortLineCount / (double) lines.size()));
features.put("longParagraphCount", Integer.valueOf(longParagraphCount));
AnalysisResult result = new AnalysisResult();
result.setSourceFormat(sourceFormat == null ? "" : sourceFormat.toLowerCase(Locale.ROOT));
result.setNormalizedContent(normalizedContent);
result.setFeatures(features);
return result;
}
private String normalize(String rawContent) {
if (!StringUtil.hasText(rawContent)) {
return "";
}
String content = rawContent
.replace("\uFEFF", "")
.replace("\u200B", "")
.replace("\r\n", "\n")
.replace('\r', '\n')
.replace('\u00A0', ' ')
.replace('', ':');
List<String> originalLines = toLines(content);
Map<String, Integer> lineCounts = new HashMap<String, Integer>();
for (String line : originalLines) {
if (line.length() >= 4 && line.length() <= 60) {
Integer count = lineCounts.get(line);
lineCounts.put(line, count == null ? Integer.valueOf(1) : Integer.valueOf(count.intValue() + 1));
}
}
List<String> filteredLines = new ArrayList<String>();
for (String line : originalLines) {
if (!StringUtil.hasText(line)) {
filteredLines.add("");
continue;
}
if (PAGE_NUMBER.matcher(line).matches()) {
continue;
}
Integer repeated = lineCounts.get(line);
if (repeated != null && repeated.intValue() >= 3 && line.length() <= 40) {
continue;
}
filteredLines.add(line);
}
List<String> mergedLines = new ArrayList<String>();
for (String line : filteredLines) {
if (mergedLines.isEmpty()) {
mergedLines.add(line);
continue;
}
String previous = mergedLines.get(mergedLines.size() - 1);
if (!StringUtil.hasText(previous) || !StringUtil.hasText(line)) {
mergedLines.add(line);
continue;
}
if (shouldMerge(previous, line)) {
mergedLines.set(mergedLines.size() - 1, previous + joinToken(previous, line) + line);
} else {
mergedLines.add(line);
}
}
return String.join("\n", mergedLines)
.replaceAll("[ \\t]{2,}", " ")
.replaceAll("\\n{3,}", "\n\n")
.trim();
}
private boolean shouldMerge(String previous, String current) {
if (isHeading(previous) || isHeading(current)) {
return false;
}
if (QUESTION_LINE.matcher(current).matches() || ANSWER_LINE.matcher(current).matches()) {
return false;
}
if (TOC_LINE.matcher(previous).matches() || TOC_LINE.matcher(current).matches()) {
return false;
}
char previousChar = previous.charAt(previous.length() - 1);
if ("。!?.!?:;".indexOf(previousChar) >= 0) {
return false;
}
return current.length() < 80;
}
private String joinToken(String previous, String current) {
char last = previous.charAt(previous.length() - 1);
char first = current.charAt(0);
if (Character.isLetterOrDigit(last) && Character.isLetterOrDigit(first)) {
return " ";
}
return "";
}
private boolean hasAnswerNearby(List<String> lines, int index) {
int end = Math.min(lines.size(), index + 4);
for (int i = index + 1; i < end; i++) {
if (ANSWER_LINE.matcher(lines.get(i)).matches()) {
return true;
}
}
return false;
}
private boolean isHeading(String line) {
return MARKDOWN_HEADING.matcher(line).matches() || isOutlineHeading(line);
}
private boolean isOutlineHeading(String line) {
return CHINESE_CHAPTER.matcher(line).matches()
|| CHINESE_SECTION.matcher(line).matches()
|| CHINESE_SUBSECTION.matcher(line).matches()
|| NUMERIC_SECTION.matcher(line).matches()
|| ENGLISH_SECTION.matcher(line).matches()
|| ENGLISH_ROMAN.matcher(line).matches();
}
private int countMarkdownLevel(String line) {
Matcher matcher = Pattern.compile("^(#{1,6})\\s+").matcher(line);
if (!matcher.find()) {
return 0;
}
return matcher.group(1).length();
}
private List<String> splitParagraphs(String normalizedContent) {
String[] parts = normalizedContent.split("\\n\\s*\\n");
List<String> paragraphs = new ArrayList<String>();
for (String part : parts) {
String paragraph = part.trim();
if (StringUtil.hasText(paragraph)) {
paragraphs.add(paragraph);
}
}
return paragraphs;
}
private List<String> toLines(String content) {
String[] rawLines = content.split("\\n");
List<String> lines = new ArrayList<String>(rawLines.length);
for (String rawLine : rawLines) {
lines.add(rawLine == null ? "" : rawLine.trim());
}
return lines;
}
}

View File

@@ -0,0 +1,388 @@
package com.easyagents.rag.ingestion.chunk;
import com.easyagents.core.document.Document;
import com.easyagents.core.document.DocumentSplitter;
import com.easyagents.core.document.splitter.RegexDocumentSplitter;
import com.easyagents.core.document.splitter.SimpleDocumentSplitter;
import com.easyagents.core.util.StringUtil;
import com.easyagents.rag.core.*;
import com.easyagents.rag.ingestion.model.AnalysisResult;
import com.easyagents.rag.ingestion.model.StrategyConfig;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class RagSplitStrategyRegistry {
private static final Pattern MARKDOWN_HEADING = Pattern.compile("^(#{1,6})\\s+(.*)$");
private static final Pattern QUESTION_PREFIX = Pattern.compile("^(Q|QUESTION|问|问题|FAQ\\s*[0-9]+)\\s*[.:-]\\s*(.+)$", Pattern.CASE_INSENSITIVE);
private static final Pattern ANSWER_PREFIX = Pattern.compile("^(A|ANSWER|答|答案)\\s*[.:-]\\s*(.+)$", Pattern.CASE_INSENSITIVE);
public List<RagChunk> split(AnalysisResult analysisResult, StrategyConfig strategyConfig) {
String strategyCode = strategyConfig.getStrategyCode();
if (!StringUtil.hasText(strategyCode) || RagStrategyCodes.AUTO.equals(strategyCode)) {
strategyCode = analysisResult.getRecommendedStrategyCode();
}
String normalizedContent = analysisResult.getNormalizedContent();
if (RagStrategyCodes.MARKDOWN_SECTION.equals(strategyCode)) {
return buildMarkdownChunks(normalizedContent, strategyConfig);
}
if (RagStrategyCodes.OUTLINE_SECTION.equals(strategyCode)) {
return buildOutlineChunks(normalizedContent, strategyConfig);
}
if (RagStrategyCodes.QA_PAIR.equals(strategyCode)) {
return buildQaChunks(normalizedContent, strategyConfig);
}
if (RagStrategyCodes.CUSTOM_REGEX.equals(strategyCode)) {
return buildRegexChunks(normalizedContent, strategyConfig);
}
return buildParagraphChunks(normalizedContent, strategyConfig);
}
private List<RagChunk> buildMarkdownChunks(String content, StrategyConfig strategyConfig) {
List<String> lines = Arrays.asList(content.split("\\n"));
List<SectionChunk> sections = new ArrayList<SectionChunk>();
Deque<HeadingLevel> stack = new ArrayDeque<HeadingLevel>();
SectionChunk current = null;
for (String rawLine : lines) {
String line = rawLine.trim();
Matcher matcher = MARKDOWN_HEADING.matcher(line);
if (matcher.matches()) {
if (current != null) {
sections.add(current);
}
int level = matcher.group(1).length();
while (!stack.isEmpty() && stack.peekLast().level >= level) {
stack.removeLast();
}
stack.addLast(new HeadingLevel(level, matcher.group(2).trim()));
current = new SectionChunk(copyPath(stack), matcher.group(2).trim());
current.lines.add(line);
} else {
if (current == null) {
current = new SectionChunk(Collections.singletonList("未命名段落"), "未命名段落");
}
current.lines.add(rawLine);
}
}
if (current != null) {
sections.add(current);
}
return finalizeSectionChunks(sections, strategyConfig);
}
private List<RagChunk> buildOutlineChunks(String content, StrategyConfig strategyConfig) {
List<String> lines = Arrays.asList(content.split("\\n"));
List<SectionChunk> sections = new ArrayList<SectionChunk>();
Deque<HeadingLevel> stack = new ArrayDeque<HeadingLevel>();
SectionChunk current = null;
for (String rawLine : lines) {
String line = rawLine.trim();
OutlineHeading heading = OutlineHeading.parse(line);
if (heading != null) {
if (current != null) {
sections.add(current);
}
while (!stack.isEmpty() && stack.peekLast().level >= heading.level) {
stack.removeLast();
}
stack.addLast(new HeadingLevel(heading.level, heading.title));
current = new SectionChunk(copyPath(stack), heading.title);
current.lines.add(line);
} else {
if (current == null) {
current = new SectionChunk(Collections.singletonList("未命名段落"), "未命名段落");
}
current.lines.add(rawLine);
}
}
if (current != null) {
sections.add(current);
}
return finalizeSectionChunks(sections, strategyConfig);
}
private List<RagChunk> finalizeSectionChunks(List<SectionChunk> sections, StrategyConfig strategyConfig) {
List<RagChunk> result = new ArrayList<RagChunk>();
int index = 1;
for (SectionChunk section : sections) {
String content = joinAndTrim(section.lines);
if (!StringUtil.hasText(content) || content.equals(section.sourceLabel)) {
continue;
}
if (content.length() <= safeChunkSize(strategyConfig)) {
result.add(createChunk(RagChunkTypes.SECTION, section.sourceLabel, section.headingPath, content, index++, 1, 1));
continue;
}
List<String> subContents = splitLongContent(content, strategyConfig.getChunkSize());
int total = subContents.size();
for (int i = 0; i < subContents.size(); i++) {
result.add(createChunk(RagChunkTypes.SECTION, section.sourceLabel, section.headingPath, subContents.get(i), index++, i + 1, total));
}
}
return postProcess(result);
}
private List<RagChunk> buildQaChunks(String content, StrategyConfig strategyConfig) {
List<RagChunk> result = new ArrayList<RagChunk>();
String currentQuestion = null;
StringBuilder answerBuilder = new StringBuilder();
StringBuilder questionBuilder = new StringBuilder();
int qaIndex = 1;
for (String rawLine : content.split("\\n")) {
String line = rawLine.trim();
if (!StringUtil.hasText(line)) {
continue;
}
Matcher questionMatcher = QUESTION_PREFIX.matcher(line);
Matcher answerMatcher = ANSWER_PREFIX.matcher(line);
if (questionMatcher.matches()) {
qaIndex = flushQaChunk(result, currentQuestion, questionBuilder, answerBuilder, qaIndex, strategyConfig);
currentQuestion = questionMatcher.group(2).trim();
questionBuilder = new StringBuilder(currentQuestion);
answerBuilder = new StringBuilder();
continue;
}
if (answerMatcher.matches()) {
if (answerBuilder.length() > 0) {
answerBuilder.append('\n');
}
answerBuilder.append(answerMatcher.group(2).trim());
continue;
}
if (answerBuilder.length() > 0) {
answerBuilder.append('\n').append(rawLine.trim());
} else if (questionBuilder.length() > 0) {
questionBuilder.append('\n').append(rawLine.trim());
}
}
flushQaChunk(result, currentQuestion, questionBuilder, answerBuilder, qaIndex, strategyConfig);
return postProcess(result);
}
private int flushQaChunk(List<RagChunk> result,
String currentQuestion,
StringBuilder questionBuilder,
StringBuilder answerBuilder,
int qaIndex,
StrategyConfig strategyConfig) {
if (!StringUtil.hasText(currentQuestion)) {
return qaIndex;
}
if (!StringUtil.hasText(answerBuilder.toString())) {
return qaIndex;
}
String question = questionBuilder.toString().trim();
String answer = answerBuilder.toString().trim();
String baseContent = "问题:" + question + "\n答案" + answer;
List<String> subContents = baseContent.length() > safeChunkSize(strategyConfig)
? splitLongContent(baseContent, strategyConfig.getChunkSize())
: Collections.singletonList(baseContent);
int total = subContents.size();
for (int i = 0; i < subContents.size(); i++) {
RagChunk chunk = createChunk(RagChunkTypes.QA_PAIR, "Q" + qaIndex + " " + question, Collections.<String>emptyList(), subContents.get(i), result.size() + 1, i + 1, total);
chunk.setQuestion(question);
chunk.setAnswer(answer);
chunk.getOptions().put(RagMetadataKeys.QA_GROUP_ID, "qa-" + qaIndex);
result.add(chunk);
}
return qaIndex + 1;
}
private List<RagChunk> buildParagraphChunks(String content, StrategyConfig strategyConfig) {
List<RagChunk> result = new ArrayList<RagChunk>();
DocumentSplitter splitter = new SimpleDocumentSplitter(safeChunkSize(strategyConfig), safeOverlap(strategyConfig));
List<Document> docs = splitter.split(new Document(content));
int index = 1;
for (Document doc : docs) {
result.add(createChunk(RagChunkTypes.PARAGRAPH, "分块 " + index, Collections.<String>emptyList(), doc.getContent(), index, 1, 1));
index++;
}
return postProcess(result);
}
private List<RagChunk> buildRegexChunks(String content, StrategyConfig strategyConfig) {
String regex = StringUtil.hasText(strategyConfig.getRegex()) ? strategyConfig.getRegex() : "\\n\\s*\\n";
DocumentSplitter splitter = new RegexDocumentSplitter(regex);
List<Document> docs = splitter.split(new Document(content));
List<RagChunk> result = new ArrayList<RagChunk>();
int index = 1;
for (Document doc : docs) {
result.add(createChunk(RagChunkTypes.PARAGRAPH, "正则分块 " + index, Collections.<String>emptyList(), doc.getContent(), index, 1, 1));
index++;
}
return postProcess(result);
}
private List<String> splitLongContent(String content, Integer chunkSize) {
int size = chunkSize == null || chunkSize.intValue() <= 0 ? RagDefaults.CHUNK_SIZE : chunkSize.intValue();
String[] paragraphs = content.split("\\n\\s*\\n");
List<String> parts = new ArrayList<String>();
StringBuilder current = new StringBuilder();
for (String paragraph : paragraphs) {
String text = paragraph.trim();
if (!StringUtil.hasText(text)) {
continue;
}
if (current.length() > 0 && current.length() + text.length() + 2 > size) {
parts.add(current.toString().trim());
current = new StringBuilder();
}
if (current.length() > 0) {
current.append("\n\n");
}
current.append(text);
}
if (current.length() > 0) {
parts.add(current.toString().trim());
}
if (parts.isEmpty()) {
parts.add(content);
}
return parts;
}
private List<RagChunk> postProcess(List<RagChunk> chunks) {
List<RagChunk> result = new ArrayList<RagChunk>();
Set<String> dedup = new HashSet<String>();
int index = 1;
for (RagChunk chunk : chunks) {
String content = chunk.getContent() == null ? "" : chunk.getContent().trim();
if (!StringUtil.hasText(content)) {
continue;
}
if (content.length() < 10 && !RagChunkTypes.QA_PAIR.equals(chunk.getChunkType())) {
continue;
}
String dedupKey = content.replaceAll("\\s+", " ");
if (!dedup.add(dedupKey)) {
continue;
}
chunk.setChunkId("chunk-" + index);
chunk.setCharCount(Integer.valueOf(content.length()));
chunk.setTokenEstimate(Integer.valueOf(Math.max(1, content.length() / 4)));
result.add(chunk);
index++;
}
return result;
}
private RagChunk createChunk(String chunkType,
String sourceLabel,
List<String> headingPath,
String content,
int index,
int partNo,
int partTotal) {
RagChunk chunk = new RagChunk();
chunk.setChunkId("chunk-" + index);
chunk.setChunkType(chunkType);
chunk.setSourceLabel(sourceLabel);
chunk.setHeadingPath(new ArrayList<String>(headingPath));
chunk.setContent(content.trim());
chunk.setPartNo(Integer.valueOf(partNo));
chunk.setPartTotal(Integer.valueOf(partTotal));
if (!headingPath.isEmpty()) {
chunk.getOptions().put(RagMetadataKeys.HEADING_PATH, new ArrayList<String>(headingPath));
}
if (RagChunkTypes.SECTION.equals(chunkType)) {
chunk.getOptions().put(RagMetadataKeys.SOURCE_LABEL, sourceLabel);
}
return chunk;
}
private int safeChunkSize(StrategyConfig strategyConfig) {
Integer chunkSize = strategyConfig.getChunkSize();
return chunkSize == null || chunkSize.intValue() <= 0 ? RagDefaults.CHUNK_SIZE : chunkSize.intValue();
}
private int safeOverlap(StrategyConfig strategyConfig) {
Integer overlapSize = strategyConfig.getOverlapSize();
return overlapSize == null || overlapSize.intValue() < 0 ? RagDefaults.OVERLAP_SIZE : overlapSize.intValue();
}
private String joinAndTrim(List<String> lines) {
String value = String.join("\n", lines).trim();
return value.replaceAll("\\n{3,}", "\n\n");
}
private List<String> copyPath(Deque<HeadingLevel> stack) {
List<String> path = new ArrayList<String>();
for (HeadingLevel item : stack) {
path.add(0, item.title);
}
return path;
}
private static class SectionChunk {
private final List<String> headingPath;
private final String sourceLabel;
private final List<String> lines = new ArrayList<String>();
private SectionChunk(List<String> headingPath, String sourceLabel) {
this.headingPath = headingPath;
this.sourceLabel = sourceLabel;
}
}
private static class HeadingLevel {
private final int level;
private final String title;
private HeadingLevel(int level, String title) {
this.level = level;
this.title = title;
}
}
private static class OutlineHeading {
private static final Pattern CHINESE_CHAPTER = Pattern.compile("^第[一二三四五六七八九十百零两0-9]+[章节篇部分卷]\\s*(.*)$");
private static final Pattern CHINESE_SECTION = Pattern.compile("^([一二三四五六七八九十百零]+[、.])\\s*(\\S+.*)$");
private static final Pattern CHINESE_SUBSECTION = Pattern.compile("^[(]([一二三四五六七八九十百零0-9]+)[)]\\s*(\\S+.*)$");
private static final Pattern NUMERIC_SECTION = Pattern.compile("^([0-9]+(?:\\.[0-9]+){0,4})\\s+(\\S+.*)$");
private static final Pattern ENGLISH_SECTION = Pattern.compile("^(Chapter|Section|Part)\\s+([0-9IVXLC]+)(.*)$", Pattern.CASE_INSENSITIVE);
private static final Pattern ENGLISH_ROMAN = Pattern.compile("^([IVXLC]+)[.、)\\s-]+(\\S+.*)$");
private final int level;
private final String title;
private OutlineHeading(int level, String title) {
this.level = level;
this.title = title;
}
private static OutlineHeading parse(String line) {
Matcher matcher = CHINESE_CHAPTER.matcher(line);
if (matcher.matches()) {
return new OutlineHeading(1, line.trim());
}
matcher = CHINESE_SECTION.matcher(line);
if (matcher.matches()) {
return new OutlineHeading(2, line.trim());
}
matcher = CHINESE_SUBSECTION.matcher(line);
if (matcher.matches()) {
return new OutlineHeading(3, line.trim());
}
matcher = NUMERIC_SECTION.matcher(line);
if (matcher.matches()) {
String code = matcher.group(1);
int level = code.split("\\.").length;
return new OutlineHeading(level, line.trim());
}
matcher = ENGLISH_SECTION.matcher(line);
if (matcher.matches()) {
String prefix = matcher.group(1).toLowerCase();
int level = "chapter".equals(prefix) ? 1 : ("section".equals(prefix) ? 2 : 1);
return new OutlineHeading(level, line.trim());
}
matcher = ENGLISH_ROMAN.matcher(line);
if (matcher.matches()) {
return new OutlineHeading(2, line.trim());
}
return null;
}
}
}

View File

@@ -0,0 +1,92 @@
package com.easyagents.rag.ingestion.model;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
public class AnalysisResult implements Serializable {
private String sourceFormat;
private String normalizedContent;
private String recommendedStructureType;
private String recommendedStrategyCode;
private String recommendedStrategyLabel;
private Double confidence;
private List<String> reasons = new ArrayList<String>();
private List<CandidateStrategy> candidateStrategies = new ArrayList<CandidateStrategy>();
private Map<String, Object> features = new LinkedHashMap<String, Object>();
public String getSourceFormat() {
return sourceFormat;
}
public void setSourceFormat(String sourceFormat) {
this.sourceFormat = sourceFormat;
}
public String getNormalizedContent() {
return normalizedContent;
}
public void setNormalizedContent(String normalizedContent) {
this.normalizedContent = normalizedContent;
}
public String getRecommendedStructureType() {
return recommendedStructureType;
}
public void setRecommendedStructureType(String recommendedStructureType) {
this.recommendedStructureType = recommendedStructureType;
}
public String getRecommendedStrategyCode() {
return recommendedStrategyCode;
}
public void setRecommendedStrategyCode(String recommendedStrategyCode) {
this.recommendedStrategyCode = recommendedStrategyCode;
}
public String getRecommendedStrategyLabel() {
return recommendedStrategyLabel;
}
public void setRecommendedStrategyLabel(String recommendedStrategyLabel) {
this.recommendedStrategyLabel = recommendedStrategyLabel;
}
public Double getConfidence() {
return confidence;
}
public void setConfidence(Double confidence) {
this.confidence = confidence;
}
public List<String> getReasons() {
return reasons;
}
public void setReasons(List<String> reasons) {
this.reasons = reasons;
}
public List<CandidateStrategy> getCandidateStrategies() {
return candidateStrategies;
}
public void setCandidateStrategies(List<CandidateStrategy> candidateStrategies) {
this.candidateStrategies = candidateStrategies;
}
public Map<String, Object> getFeatures() {
return features;
}
public void setFeatures(Map<String, Object> features) {
this.features = features;
}
}

View File

@@ -0,0 +1,43 @@
package com.easyagents.rag.ingestion.model;
import java.io.Serializable;
public class CandidateStrategy implements Serializable {
private String strategyCode;
private String strategyLabel;
private Double score;
public CandidateStrategy() {
}
public CandidateStrategy(String strategyCode, String strategyLabel, Double score) {
this.strategyCode = strategyCode;
this.strategyLabel = strategyLabel;
this.score = score;
}
public String getStrategyCode() {
return strategyCode;
}
public void setStrategyCode(String strategyCode) {
this.strategyCode = strategyCode;
}
public String getStrategyLabel() {
return strategyLabel;
}
public void setStrategyLabel(String strategyLabel) {
this.strategyLabel = strategyLabel;
}
public Double getScore() {
return score;
}
public void setScore(Double score) {
this.score = score;
}
}

View File

@@ -0,0 +1,79 @@
package com.easyagents.rag.ingestion.model;
import com.easyagents.rag.core.RagDefaults;
import com.easyagents.rag.core.RagStrategyCodes;
import java.io.Serializable;
public class StrategyConfig implements Serializable {
private String strategyCode = RagStrategyCodes.AUTO;
private Integer chunkSize = RagDefaults.CHUNK_SIZE;
private Integer overlapSize = RagDefaults.OVERLAP_SIZE;
private String regex;
private Integer rowsPerChunk = RagDefaults.ROWS_PER_CHUNK;
private Integer mdSplitterLevel = RagDefaults.MD_SPLITTER_LEVEL;
public static StrategyConfig defaults() {
return new StrategyConfig();
}
public StrategyConfig copy() {
StrategyConfig copy = new StrategyConfig();
copy.setStrategyCode(this.strategyCode);
copy.setChunkSize(this.chunkSize);
copy.setOverlapSize(this.overlapSize);
copy.setRegex(this.regex);
copy.setRowsPerChunk(this.rowsPerChunk);
copy.setMdSplitterLevel(this.mdSplitterLevel);
return copy;
}
public String getStrategyCode() {
return strategyCode;
}
public void setStrategyCode(String strategyCode) {
this.strategyCode = strategyCode;
}
public Integer getChunkSize() {
return chunkSize;
}
public void setChunkSize(Integer chunkSize) {
this.chunkSize = chunkSize;
}
public Integer getOverlapSize() {
return overlapSize;
}
public void setOverlapSize(Integer overlapSize) {
this.overlapSize = overlapSize;
}
public String getRegex() {
return regex;
}
public void setRegex(String regex) {
this.regex = regex;
}
public Integer getRowsPerChunk() {
return rowsPerChunk;
}
public void setRowsPerChunk(Integer rowsPerChunk) {
this.rowsPerChunk = rowsPerChunk;
}
public Integer getMdSplitterLevel() {
return mdSplitterLevel;
}
public void setMdSplitterLevel(Integer mdSplitterLevel) {
this.mdSplitterLevel = mdSplitterLevel;
}
}

View File

@@ -0,0 +1,133 @@
package com.easyagents.rag.ingestion.recommend;
import com.easyagents.rag.core.RagStrategyCodes;
import com.easyagents.rag.core.RagStructureTypes;
import com.easyagents.rag.ingestion.model.AnalysisResult;
import com.easyagents.rag.ingestion.model.CandidateStrategy;
import java.math.BigDecimal;
import java.math.RoundingMode;
import java.util.*;
public class SplitStrategyRecommender {
public AnalysisResult recommend(AnalysisResult analysisResult) {
Map<String, Object> features = analysisResult.getFeatures();
String sourceFormat = safeLowercase(analysisResult.getSourceFormat());
double markdownScore = number(features.get("markdownHeadingCount")) * 12
+ number(features.get("markdownLevelVariety")) * 8
+ ("md".equals(sourceFormat) ? 20 : 0);
double outlineScore = number(features.get("outlineHeadingCount")) * 10
+ (("pdf".equals(sourceFormat) || "docx".equals(sourceFormat)) ? 5 : 0)
- number(features.get("tocLineCount")) * 4;
double qaScore = number(features.get("qaQuestionCount")) * 10
+ number(features.get("qaAnswerCount")) * 10
+ number(features.get("pairedQaCount")) * 18;
double plainScore = 18
+ number(features.get("paragraphCount")) * 2
+ number(features.get("longParagraphCount")) * 3;
Map<String, Double> scoreMap = new LinkedHashMap<String, Double>();
scoreMap.put(RagStrategyCodes.MARKDOWN_SECTION, Double.valueOf(markdownScore));
scoreMap.put(RagStrategyCodes.OUTLINE_SECTION, Double.valueOf(outlineScore));
scoreMap.put(RagStrategyCodes.QA_PAIR, Double.valueOf(qaScore));
scoreMap.put(RagStrategyCodes.PARAGRAPH_LENGTH, Double.valueOf(plainScore));
List<Map.Entry<String, Double>> ranking = new ArrayList<Map.Entry<String, Double>>(scoreMap.entrySet());
ranking.sort((left, right) -> Double.compare(right.getValue().doubleValue(), left.getValue().doubleValue()));
Map.Entry<String, Double> best = ranking.get(0);
Map.Entry<String, Double> second = ranking.size() > 1 ? ranking.get(1) : best;
double confidence = computeConfidence(best.getValue().doubleValue(), second.getValue().doubleValue());
String recommendedStrategy = confidence < 0.45D ? RagStrategyCodes.PARAGRAPH_LENGTH : best.getKey();
analysisResult.setRecommendedStrategyCode(recommendedStrategy);
analysisResult.setRecommendedStrategyLabel(toStrategyLabel(recommendedStrategy));
analysisResult.setRecommendedStructureType(toStructureType(recommendedStrategy));
analysisResult.setConfidence(Double.valueOf(scale(confidence)));
analysisResult.setReasons(buildReasons(features, recommendedStrategy, confidence));
List<CandidateStrategy> candidates = new ArrayList<CandidateStrategy>();
for (Map.Entry<String, Double> entry : ranking) {
candidates.add(new CandidateStrategy(entry.getKey(), toStrategyLabel(entry.getKey()), Double.valueOf(scale(entry.getValue().doubleValue()))));
}
analysisResult.setCandidateStrategies(candidates);
return analysisResult;
}
public String toStructureType(String strategyCode) {
if (RagStrategyCodes.MARKDOWN_SECTION.equals(strategyCode)) {
return RagStructureTypes.MARKDOWN_HEADING;
}
if (RagStrategyCodes.OUTLINE_SECTION.equals(strategyCode)) {
return RagStructureTypes.OUTLINE_SECTION;
}
if (RagStrategyCodes.QA_PAIR.equals(strategyCode)) {
return RagStructureTypes.QA_PAIR;
}
return RagStructureTypes.PLAIN_PARAGRAPH;
}
public String toStrategyLabel(String strategyCode) {
if (RagStrategyCodes.MARKDOWN_SECTION.equals(strategyCode)) {
return "Markdown 标题拆分";
}
if (RagStrategyCodes.OUTLINE_SECTION.equals(strategyCode)) {
return "章节标题拆分";
}
if (RagStrategyCodes.QA_PAIR.equals(strategyCode)) {
return "问答对拆分";
}
if (RagStrategyCodes.CUSTOM_REGEX.equals(strategyCode)) {
return "自定义正则拆分";
}
if (RagStrategyCodes.AUTO.equals(strategyCode)) {
return "自动推荐";
}
return "自然段长度拆分";
}
private List<String> buildReasons(Map<String, Object> features, String strategyCode, double confidence) {
List<String> reasons = new ArrayList<String>();
if (RagStrategyCodes.MARKDOWN_SECTION.equals(strategyCode)) {
reasons.add("检测到 Markdown 标题结构,适合按标题层级拆分");
reasons.add("标题层级数:" + number(features.get("markdownLevelVariety")) + ",标题数量:" + number(features.get("markdownHeadingCount")));
return reasons;
}
if (RagStrategyCodes.OUTLINE_SECTION.equals(strategyCode)) {
reasons.add("检测到中英文标题/章节编号,适合按章节拆分");
reasons.add("章节标题数量:" + number(features.get("outlineHeadingCount")));
return reasons;
}
if (RagStrategyCodes.QA_PAIR.equals(strategyCode)) {
reasons.add("检测到问答结构,适合按一问一答拆分");
reasons.add("问题数量:" + number(features.get("qaQuestionCount")) + ",成对问答数量:" + number(features.get("pairedQaCount")));
return reasons;
}
reasons.add("结构特征不够集中,回退为自然段长度拆分");
reasons.add("推荐置信度:" + scale(confidence));
return reasons;
}
private double computeConfidence(double bestScore, double secondScore) {
double delta = Math.max(0D, bestScore - secondScore);
double base = Math.min(1D, bestScore / 100D);
return Math.min(1D, Math.max(0.25D, base * 0.6D + Math.min(0.4D, delta / 50D)));
}
private double number(Object value) {
if (value instanceof Number) {
return ((Number) value).doubleValue();
}
return 0D;
}
private String safeLowercase(String value) {
return value == null ? "" : value.toLowerCase(Locale.ROOT);
}
private double scale(double value) {
return BigDecimal.valueOf(value).setScale(2, RoundingMode.HALF_UP).doubleValue();
}
}

View File

@@ -0,0 +1,80 @@
package com.easyagents.rag.ingestion;
import com.easyagents.rag.core.RagChunk;
import com.easyagents.rag.core.RagChunkTypes;
import com.easyagents.rag.core.RagStrategyCodes;
import com.easyagents.rag.ingestion.analysis.DocumentStructureAnalyzer;
import com.easyagents.rag.ingestion.chunk.RagSplitStrategyRegistry;
import com.easyagents.rag.ingestion.model.AnalysisResult;
import com.easyagents.rag.ingestion.model.StrategyConfig;
import com.easyagents.rag.ingestion.recommend.SplitStrategyRecommender;
import org.junit.Assert;
import org.junit.Test;
import java.util.List;
public class RagIngestionPipelineTest {
private final DocumentStructureAnalyzer analyzer = new DocumentStructureAnalyzer();
private final SplitStrategyRecommender recommender = new SplitStrategyRecommender();
private final RagSplitStrategyRegistry registry = new RagSplitStrategyRegistry();
@Test
public void shouldRecommendMarkdownStrategy() {
String markdown = "# Quick Start\n"
+ "Welcome\n\n"
+ "## Install\n"
+ "Run npm install\n\n"
+ "## Usage\n"
+ "Run pnpm dev";
AnalysisResult analysis = recommender.recommend(analyzer.analyze(markdown, "md"));
Assert.assertEquals(RagStrategyCodes.MARKDOWN_SECTION, analysis.getRecommendedStrategyCode());
Assert.assertTrue(analysis.getConfidence().doubleValue() > 0.4D);
}
@Test
public void shouldRecommendQaStrategyForEnglishAndChinese() {
String qa = "Q: How to reset password?\n"
+ "A: Open admin page and click reset.\n\n"
+ "问:默认密码是什么?\n"
+ "答:由系统配置统一决定。";
AnalysisResult analysis = recommender.recommend(analyzer.analyze(qa, "txt"));
Assert.assertEquals(RagStrategyCodes.QA_PAIR, analysis.getRecommendedStrategyCode());
}
@Test
public void shouldSplitOutlineDocumentByHeadingPath() {
String outline = "第1章 总则\n适用范围说明。\n\n1.1 目标\n定义系统目标。\n\n1.2 范围\n定义系统范围。";
AnalysisResult analysis = recommender.recommend(analyzer.analyze(outline, "docx"));
StrategyConfig config = StrategyConfig.defaults();
config.setStrategyCode(RagStrategyCodes.OUTLINE_SECTION);
List<RagChunk> chunks = registry.split(analysis, config);
Assert.assertEquals(3, chunks.size());
Assert.assertEquals("第1章 总则", chunks.get(0).getSourceLabel());
Assert.assertEquals(2, chunks.get(1).getHeadingPath().size());
}
@Test
public void shouldSplitQaDocumentByPair() {
String qa = "Q: How to reset password?\n"
+ "A: Open admin page and click reset.\n\n"
+ "问:默认密码是什么?\n"
+ "答:由系统配置统一决定。";
AnalysisResult analysis = recommender.recommend(analyzer.analyze(qa, "txt"));
StrategyConfig config = StrategyConfig.defaults();
config.setStrategyCode(RagStrategyCodes.QA_PAIR);
List<RagChunk> chunks = registry.split(analysis, config);
Assert.assertEquals(2, chunks.size());
Assert.assertEquals(RagChunkTypes.QA_PAIR, chunks.get(0).getChunkType());
Assert.assertTrue(chunks.get(0).getContent().contains("问题"));
Assert.assertTrue(chunks.get(1).getAnswer().contains("系统配置"));
}
}

View File

@@ -0,0 +1,32 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>com.easyagents</groupId>
<artifactId>easy-agents-rag</artifactId>
<version>${revision}</version>
</parent>
<artifactId>easy-agents-rag-ocr</artifactId>
<name>easy-agents-rag-ocr</name>
<properties>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<dependencies>
<dependency>
<groupId>com.easyagents</groupId>
<artifactId>easy-agents-core</artifactId>
</dependency>
<dependency>
<groupId>com.easyagents</groupId>
<artifactId>easy-agents-rag-core</artifactId>
</dependency>
</dependencies>
</project>

View File

@@ -0,0 +1,36 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>com.easyagents</groupId>
<artifactId>easy-agents-rag</artifactId>
<version>${revision}</version>
</parent>
<artifactId>easy-agents-rag-retrieval</artifactId>
<name>easy-agents-rag-retrieval</name>
<properties>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<dependencies>
<dependency>
<groupId>com.easyagents</groupId>
<artifactId>easy-agents-core</artifactId>
</dependency>
<dependency>
<groupId>com.easyagents</groupId>
<artifactId>easy-agents-rag-core</artifactId>
</dependency>
<dependency>
<groupId>com.easyagents</groupId>
<artifactId>easy-agents-rag-enhance</artifactId>
</dependency>
</dependencies>
</project>

24
easy-agents-rag/pom.xml Normal file
View File

@@ -0,0 +1,24 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>com.easyagents</groupId>
<artifactId>easy-agents</artifactId>
<version>${revision}</version>
</parent>
<artifactId>easy-agents-rag</artifactId>
<packaging>pom</packaging>
<name>easy-agents-rag</name>
<modules>
<module>easy-agents-rag-core</module>
<module>easy-agents-rag-ingestion</module>
<module>easy-agents-rag-ocr</module>
<module>easy-agents-rag-enhance</module>
<module>easy-agents-rag-retrieval</module>
</modules>
</project>

View File

@@ -51,6 +51,34 @@
<artifactId>easy-agents-bom</artifactId>
</dependency>
<dependency>
<groupId>com.easyagents</groupId>
<artifactId>easy-agents-rag-core</artifactId>
</dependency>
<dependency>
<groupId>com.easyagents</groupId>
<artifactId>easy-agents-rag-ingestion</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-test</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.assertj</groupId>
<artifactId>assertj-core</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
</project>

View File

@@ -4,6 +4,7 @@ import com.easyagents.llm.deepseek.DeepseekConfig;
import com.easyagents.llm.deepseek.DeepseekChatModel;
import org.springframework.boot.autoconfigure.condition.ConditionalOnClass;
import org.springframework.boot.autoconfigure.condition.ConditionalOnMissingBean;
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
import org.springframework.boot.context.properties.EnableConfigurationProperties;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
@@ -13,6 +14,7 @@ import org.springframework.context.annotation.Configuration;
* DeepSeek
*/
@ConditionalOnClass(DeepseekChatModel.class)
@ConditionalOnProperty(prefix = "easy-agents.llm.deepseek", name = "api-key")
@Configuration(proxyBeanMethods = false)
@EnableConfigurationProperties(DeepSeekProperties.class)
public class DeepSeekAutoConfiguration {

View File

@@ -4,6 +4,7 @@ import com.easyagents.llm.ollama.OllamaChatModel;
import com.easyagents.llm.ollama.OllamaChatConfig;
import org.springframework.boot.autoconfigure.condition.ConditionalOnClass;
import org.springframework.boot.autoconfigure.condition.ConditionalOnMissingBean;
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
import org.springframework.boot.context.properties.EnableConfigurationProperties;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
@@ -15,6 +16,7 @@ import org.springframework.context.annotation.Configuration;
* @since 2025-02-11
*/
@ConditionalOnClass(OllamaChatModel.class)
@ConditionalOnProperty(prefix = "easy-agents.llm.ollama", name = "model")
@Configuration(proxyBeanMethods = false)
@EnableConfigurationProperties(OllamaProperties.class)
public class OllamaAutoConfiguration {
@@ -26,7 +28,7 @@ public class OllamaAutoConfiguration {
config.setApiKey(properties.getApiKey());
config.setEndpoint(properties.getEndpoint());
config.setModel(properties.getModel());
config.setThinkingEnabled(properties.getThink());
config.setThinkingEnabled(Boolean.TRUE.equals(properties.getThink()));
return new OllamaChatModel(config);
}

View File

@@ -12,7 +12,7 @@ public class OllamaProperties {
private String model;
private String endpoint = "http://localhost:11434";
private String apiKey;
private Boolean think;
private Boolean think = Boolean.FALSE;
public String getModel() {
return model;

View File

@@ -4,6 +4,7 @@ import com.easyagents.llm.openai.OpenAIChatModel;
import com.easyagents.llm.openai.OpenAIChatConfig;
import org.springframework.boot.autoconfigure.condition.ConditionalOnClass;
import org.springframework.boot.autoconfigure.condition.ConditionalOnMissingBean;
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
import org.springframework.boot.context.properties.EnableConfigurationProperties;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
@@ -15,6 +16,7 @@ import org.springframework.context.annotation.Configuration;
* @since 2024-04-10
*/
@ConditionalOnClass(OpenAIChatModel.class)
@ConditionalOnProperty(prefix = "easy-agents.llm.openai", name = "api-key")
@Configuration(proxyBeanMethods = false)
@EnableConfigurationProperties(OpenAIProperties.class)
public class OpenAIAutoConfiguration {

View File

@@ -4,6 +4,7 @@ import com.easyagents.llm.qwen.QwenChatModel;
import com.easyagents.llm.qwen.QwenChatConfig;
import org.springframework.boot.autoconfigure.condition.ConditionalOnClass;
import org.springframework.boot.autoconfigure.condition.ConditionalOnMissingBean;
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
import org.springframework.boot.context.properties.EnableConfigurationProperties;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
@@ -15,6 +16,7 @@ import org.springframework.context.annotation.Configuration;
* @since 2024-04-10
*/
@ConditionalOnClass(QwenChatModel.class)
@ConditionalOnProperty(prefix = "easy-agents.llm.qwen", name = "api-key")
@Configuration(proxyBeanMethods = false)
@EnableConfigurationProperties(QwenProperties.class)
public class QwenAutoConfiguration {

View File

@@ -0,0 +1,42 @@
package com.easyagents.spring.boot.rag.ingestion;
import com.easyagents.rag.ingestion.DefaultRagIngestionService;
import com.easyagents.rag.ingestion.RagIngestionService;
import com.easyagents.rag.ingestion.analysis.DocumentStructureAnalyzer;
import com.easyagents.rag.ingestion.chunk.RagSplitStrategyRegistry;
import com.easyagents.rag.ingestion.recommend.SplitStrategyRecommender;
import org.springframework.boot.autoconfigure.condition.ConditionalOnClass;
import org.springframework.boot.autoconfigure.condition.ConditionalOnMissingBean;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
@ConditionalOnClass(RagIngestionService.class)
@Configuration(proxyBeanMethods = false)
public class RagIngestionAutoConfiguration {
@Bean
@ConditionalOnMissingBean
public DocumentStructureAnalyzer documentStructureAnalyzer() {
return new DocumentStructureAnalyzer();
}
@Bean
@ConditionalOnMissingBean
public SplitStrategyRecommender splitStrategyRecommender() {
return new SplitStrategyRecommender();
}
@Bean
@ConditionalOnMissingBean
public RagSplitStrategyRegistry ragSplitStrategyRegistry() {
return new RagSplitStrategyRegistry();
}
@Bean
@ConditionalOnMissingBean
public RagIngestionService ragIngestionService(DocumentStructureAnalyzer documentStructureAnalyzer,
SplitStrategyRecommender splitStrategyRecommender,
RagSplitStrategyRegistry ragSplitStrategyRegistry) {
return new DefaultRagIngestionService(documentStructureAnalyzer, splitStrategyRecommender, ragSplitStrategyRegistry);
}
}

View File

@@ -4,6 +4,7 @@ import com.easyagents.store.aliyun.AliyunVectorStore;
import com.easyagents.store.aliyun.AliyunVectorStoreConfig;
import org.springframework.boot.autoconfigure.condition.ConditionalOnClass;
import org.springframework.boot.autoconfigure.condition.ConditionalOnMissingBean;
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
import org.springframework.boot.context.properties.EnableConfigurationProperties;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
@@ -14,6 +15,7 @@ import org.springframework.context.annotation.Configuration;
*/
@Configuration(proxyBeanMethods = false)
@ConditionalOnClass(AliyunVectorStore.class)
@ConditionalOnProperty(prefix = "easy-agents.store.aliyun", name = "endpoint")
@EnableConfigurationProperties(AliyunProperties.class)
public class AliyunAutoConfiguration {

View File

@@ -17,9 +17,9 @@ package com.easyagents.spring.boot.store.chroma;
import com.easyagents.store.chroma.ChromaVectorStore;
import com.easyagents.store.chroma.ChromaVectorStoreConfig;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.autoconfigure.condition.ConditionalOnClass;
import org.springframework.boot.autoconfigure.condition.ConditionalOnMissingBean;
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
import org.springframework.boot.context.properties.EnableConfigurationProperties;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
@@ -31,6 +31,7 @@ import org.springframework.context.annotation.Configuration;
*/
@Configuration(proxyBeanMethods = false)
@ConditionalOnClass(ChromaVectorStore.class)
@ConditionalOnProperty(prefix = "easy-agents.store.chroma", name = "host")
@EnableConfigurationProperties(ChromaProperties.class)
public class ChromaAutoConfiguration {

View File

@@ -21,6 +21,7 @@ import com.easyagents.store.elasticsearch.ElasticSearchVectorStoreConfig;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.autoconfigure.condition.ConditionalOnClass;
import org.springframework.boot.autoconfigure.condition.ConditionalOnMissingBean;
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
import org.springframework.boot.context.properties.EnableConfigurationProperties;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
@@ -31,6 +32,7 @@ import org.springframework.context.annotation.Configuration;
*/
@Configuration(proxyBeanMethods = false)
@ConditionalOnClass(ElasticSearchVectorStore.class)
@ConditionalOnProperty(prefix = "easy-agents.store.elasticsearch", name = "server-url")
@EnableConfigurationProperties(ElasticSearchProperties.class)
public class ElasticSearchAutoConfiguration {

View File

@@ -21,6 +21,7 @@ import org.opensearch.client.opensearch.OpenSearchClient;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.autoconfigure.condition.ConditionalOnClass;
import org.springframework.boot.autoconfigure.condition.ConditionalOnMissingBean;
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
import org.springframework.boot.context.properties.EnableConfigurationProperties;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
@@ -31,13 +32,14 @@ import org.springframework.context.annotation.Configuration;
*/
@Configuration(proxyBeanMethods = false)
@ConditionalOnClass(OpenSearchVectorStore.class)
@ConditionalOnProperty(prefix = "easy-agents.store.opensearch", name = "server-url")
@EnableConfigurationProperties(OpenSearchProperties.class)
public class OpenSearchAutoConfiguration {
@Bean
@ConditionalOnMissingBean
public OpenSearchVectorStore openSearchVectorStore(OpenSearchProperties properties,
@Autowired(required = false) OpenSearchClient client) {
@Autowired(required = false) OpenSearchClient client) {
OpenSearchVectorStoreConfig config = new OpenSearchVectorStoreConfig();
config.setServerUrl(properties.getServerUrl());
config.setApiKey(properties.getApiKey());

View File

@@ -4,6 +4,7 @@ import com.easyagents.store.qcloud.QCloudVectorStore;
import com.easyagents.store.qcloud.QCloudVectorStoreConfig;
import org.springframework.boot.autoconfigure.condition.ConditionalOnClass;
import org.springframework.boot.autoconfigure.condition.ConditionalOnMissingBean;
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
import org.springframework.boot.context.properties.EnableConfigurationProperties;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
@@ -14,6 +15,7 @@ import org.springframework.context.annotation.Configuration;
*/
@Configuration(proxyBeanMethods = false)
@ConditionalOnClass(QCloudVectorStore.class)
@ConditionalOnProperty(prefix = "easy-agents.store.qcloud", name = "host")
@EnableConfigurationProperties(QCloudProperties.class)
public class QCloudStoreAutoConfiguration {

View File

@@ -1,10 +1,11 @@
org.springframework.boot.autoconfigure.EnableAutoConfiguration=\
com.easyagents.spring.boot.chatModel.chatglm.ChatglmAutoConfiguration,\
com.easyagents.spring.boot.chatModel.openai.OpenAIAutoConfiguration,\
com.easyagents.spring.boot.chatModel.qwen.QwenAutoConfiguration,\
com.easyagents.spring.boot.chatModel.spark.SparkAutoConfiguration,\
com.easyagents.spring.boot.llm.openai.OpenAIAutoConfiguration,\
com.easyagents.spring.boot.llm.qwen.QwenAutoConfiguration,\
com.easyagents.spring.boot.store.aliyun.AliyunAutoConfiguration,\
com.easyagents.spring.boot.store.qcloud.QCloudStoreAutoConfiguration,\
com.easyagents.spring.boot.chatModel.ollama.OllamaAutoConfiguration,\
com.easyagents.spring.boot.chatModel.deepseek.DeepSeekAutoConfiguration,\
com.easyagents.spring.boot.store.chroma.ChromaAutoConfiguration
com.easyagents.spring.boot.llm.ollama.OllamaAutoConfiguration,\
com.easyagents.spring.boot.llm.deepseek.DeepSeekAutoConfiguration,\
com.easyagents.spring.boot.store.chroma.ChromaAutoConfiguration,\
com.easyagents.spring.boot.store.elasticsearch.ElasticSearchAutoConfiguration,\
com.easyagents.spring.boot.store.opensearch.OpenSearchAutoConfiguration,\
com.easyagents.spring.boot.rag.ingestion.RagIngestionAutoConfiguration

View File

@@ -1,8 +1,10 @@
com.easyagents.spring.boot.chatModel.chatglm.ChatglmAutoConfiguration
com.easyagents.spring.boot.chatModel.openai.OpenAIAutoConfiguration
com.easyagents.spring.boot.chatModel.qwen.QwenAutoConfiguration
com.easyagents.spring.boot.chatModel.spark.SparkAutoConfiguration
com.easyagents.spring.boot.llm.openai.OpenAIAutoConfiguration
com.easyagents.spring.boot.llm.qwen.QwenAutoConfiguration
com.easyagents.spring.boot.store.aliyun.AliyunAutoConfiguration
com.easyagents.spring.boot.store.qcloud.QCloudStoreAutoConfiguration
com.easyagents.spring.boot.chatModel.ollama.OllamaAutoConfiguration
com.easyagents.spring.boot.llm.ollama.OllamaAutoConfiguration
com.easyagents.spring.boot.llm.deepseek.DeepSeekAutoConfiguration
com.easyagents.spring.boot.store.chroma.ChromaAutoConfiguration
com.easyagents.spring.boot.store.elasticsearch.ElasticSearchAutoConfiguration
com.easyagents.spring.boot.store.opensearch.OpenSearchAutoConfiguration
com.easyagents.spring.boot.rag.ingestion.RagIngestionAutoConfiguration

View File

@@ -0,0 +1,31 @@
package com.easyagents.spring.boot.autoconfigure;
import com.easyagents.llm.ollama.OllamaChatModel;
import com.easyagents.spring.boot.llm.ollama.OllamaAutoConfiguration;
import com.easyagents.spring.boot.rag.ingestion.RagIngestionAutoConfiguration;
import com.easyagents.spring.boot.store.opensearch.OpenSearchAutoConfiguration;
import org.junit.Assert;
import org.junit.Test;
import org.springframework.boot.test.context.runner.ApplicationContextRunner;
public class StarterConditionalAutoConfigurationTest {
private final ApplicationContextRunner contextRunner = new ApplicationContextRunner()
.withUserConfiguration(RagIngestionAutoConfiguration.class, OllamaAutoConfiguration.class, OpenSearchAutoConfiguration.class);
@Test
public void shouldNotCreateOptionalBeansWithoutExplicitProperties() {
contextRunner.run(context -> {
Assert.assertTrue(context.containsBean("ragIngestionService"));
Assert.assertFalse(context.containsBean("ollamaLlm"));
Assert.assertFalse(context.containsBean("openSearchVectorStore"));
});
}
@Test
public void shouldCreateOllamaBeanWhenModelConfigured() {
contextRunner
.withPropertyValues("easy-agents.llm.ollama.model=qwen3:8b")
.run(context -> Assert.assertNotNull(context.getBean(OllamaChatModel.class)));
}
}

31
pom.xml
View File

@@ -16,6 +16,7 @@
<modules>
<module>easy-agents-bom</module>
<module>easy-agents-core</module>
<module>easy-agents-rag</module>
<module>easy-agents-chat</module>
<module>easy-agents-store</module>
<module>easy-agents-spring-boot-starter</module>
@@ -118,6 +119,36 @@
<version>${revision}</version>
</dependency>
<dependency>
<groupId>com.easyagents</groupId>
<artifactId>easy-agents-rag-core</artifactId>
<version>${revision}</version>
</dependency>
<dependency>
<groupId>com.easyagents</groupId>
<artifactId>easy-agents-rag-ingestion</artifactId>
<version>${revision}</version>
</dependency>
<dependency>
<groupId>com.easyagents</groupId>
<artifactId>easy-agents-rag-ocr</artifactId>
<version>${revision}</version>
</dependency>
<dependency>
<groupId>com.easyagents</groupId>
<artifactId>easy-agents-rag-enhance</artifactId>
<version>${revision}</version>
</dependency>
<dependency>
<groupId>com.easyagents</groupId>
<artifactId>easy-agents-rag-retrieval</artifactId>
<version>${revision}</version>
</dependency>
<dependency>
<groupId>com.easyagents</groupId>
<artifactId>easy-agents-bom</artifactId>