feat: RAG分块策略增强

This commit is contained in:
2026-03-29 17:28:12 +08:00
parent 2f20064ee1
commit 941995d1b8
28 changed files with 1719 additions and 12 deletions

View File

@@ -0,0 +1,28 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>com.easyagents</groupId>
<artifactId>easy-agents-rag</artifactId>
<version>${revision}</version>
</parent>
<artifactId>easy-agents-rag-core</artifactId>
<name>easy-agents-rag-core</name>
<properties>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<dependencies>
<dependency>
<groupId>com.easyagents</groupId>
<artifactId>easy-agents-core</artifactId>
</dependency>
</dependencies>
</project>

View File

@@ -0,0 +1,128 @@
package com.easyagents.rag.core;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
public class RagChunk implements Serializable {
private String chunkId;
private String chunkType;
private String sourceLabel;
private List<String> headingPath = new ArrayList<String>();
private String content;
private String question;
private String answer;
private Integer charCount;
private Integer tokenEstimate;
private Integer partNo = 1;
private Integer partTotal = 1;
private List<String> warnings = new ArrayList<String>();
private Map<String, Object> options = new LinkedHashMap<String, Object>();
public String getChunkId() {
return chunkId;
}
public void setChunkId(String chunkId) {
this.chunkId = chunkId;
}
public String getChunkType() {
return chunkType;
}
public void setChunkType(String chunkType) {
this.chunkType = chunkType;
}
public String getSourceLabel() {
return sourceLabel;
}
public void setSourceLabel(String sourceLabel) {
this.sourceLabel = sourceLabel;
}
public List<String> getHeadingPath() {
return headingPath;
}
public void setHeadingPath(List<String> headingPath) {
this.headingPath = headingPath;
}
public String getContent() {
return content;
}
public void setContent(String content) {
this.content = content;
}
public String getQuestion() {
return question;
}
public void setQuestion(String question) {
this.question = question;
}
public String getAnswer() {
return answer;
}
public void setAnswer(String answer) {
this.answer = answer;
}
public Integer getCharCount() {
return charCount;
}
public void setCharCount(Integer charCount) {
this.charCount = charCount;
}
public Integer getTokenEstimate() {
return tokenEstimate;
}
public void setTokenEstimate(Integer tokenEstimate) {
this.tokenEstimate = tokenEstimate;
}
public Integer getPartNo() {
return partNo;
}
public void setPartNo(Integer partNo) {
this.partNo = partNo;
}
public Integer getPartTotal() {
return partTotal;
}
public void setPartTotal(Integer partTotal) {
this.partTotal = partTotal;
}
public List<String> getWarnings() {
return warnings;
}
public void setWarnings(List<String> warnings) {
this.warnings = warnings;
}
public Map<String, Object> getOptions() {
return options;
}
public void setOptions(Map<String, Object> options) {
this.options = options;
}
}

View File

@@ -0,0 +1,11 @@
package com.easyagents.rag.core;
public final class RagChunkTypes {
private RagChunkTypes() {
}
public static final String SECTION = "section";
public static final String QA_PAIR = "qa_pair";
public static final String PARAGRAPH = "paragraph";
}

View File

@@ -0,0 +1,12 @@
package com.easyagents.rag.core;
public final class RagDefaults {
private RagDefaults() {
}
public static final int CHUNK_SIZE = 512;
public static final int OVERLAP_SIZE = 128;
public static final int MD_SPLITTER_LEVEL = 2;
public static final int ROWS_PER_CHUNK = 1;
}

View File

@@ -0,0 +1,20 @@
package com.easyagents.rag.core;
public final class RagMetadataKeys {
private RagMetadataKeys() {
}
public static final String CHUNK_TYPE = "chunkType";
public static final String SOURCE_LABEL = "sourceLabel";
public static final String HEADING_PATH = "headingPath";
public static final String PAGE_NO = "pageNo";
public static final String CHAR_COUNT = "charCount";
public static final String TOKEN_ESTIMATE = "tokenEstimate";
public static final String QA_QUESTION = "qaQuestion";
public static final String QA_ANSWER = "qaAnswer";
public static final String QA_GROUP_ID = "qaGroupId";
public static final String PART_NO = "partNo";
public static final String PART_TOTAL = "partTotal";
public static final String WARNINGS = "warnings";
}

View File

@@ -0,0 +1,14 @@
package com.easyagents.rag.core;
public final class RagStrategyCodes {
private RagStrategyCodes() {
}
public static final String AUTO = "AUTO";
public static final String MARKDOWN_SECTION = "MARKDOWN_SECTION";
public static final String OUTLINE_SECTION = "OUTLINE_SECTION";
public static final String QA_PAIR = "QA_PAIR";
public static final String PARAGRAPH_LENGTH = "PARAGRAPH_LENGTH";
public static final String CUSTOM_REGEX = "CUSTOM_REGEX";
}

View File

@@ -0,0 +1,12 @@
package com.easyagents.rag.core;
public final class RagStructureTypes {
private RagStructureTypes() {
}
public static final String MARKDOWN_HEADING = "markdown_heading";
public static final String OUTLINE_SECTION = "outline_section";
public static final String QA_PAIR = "qa_pair";
public static final String PLAIN_PARAGRAPH = "plain_paragraph";
}