feat: RAG分块策略增强
This commit is contained in:
28
easy-agents-rag/easy-agents-rag-core/pom.xml
Normal file
28
easy-agents-rag/easy-agents-rag-core/pom.xml
Normal file
@@ -0,0 +1,28 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<parent>
|
||||
<groupId>com.easyagents</groupId>
|
||||
<artifactId>easy-agents-rag</artifactId>
|
||||
<version>${revision}</version>
|
||||
</parent>
|
||||
|
||||
<artifactId>easy-agents-rag-core</artifactId>
|
||||
<name>easy-agents-rag-core</name>
|
||||
|
||||
<properties>
|
||||
<maven.compiler.source>8</maven.compiler.source>
|
||||
<maven.compiler.target>8</maven.compiler.target>
|
||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||
</properties>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>com.easyagents</groupId>
|
||||
<artifactId>easy-agents-core</artifactId>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</project>
|
||||
@@ -0,0 +1,128 @@
|
||||
package com.easyagents.rag.core;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.ArrayList;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
public class RagChunk implements Serializable {
|
||||
|
||||
private String chunkId;
|
||||
private String chunkType;
|
||||
private String sourceLabel;
|
||||
private List<String> headingPath = new ArrayList<String>();
|
||||
private String content;
|
||||
private String question;
|
||||
private String answer;
|
||||
private Integer charCount;
|
||||
private Integer tokenEstimate;
|
||||
private Integer partNo = 1;
|
||||
private Integer partTotal = 1;
|
||||
private List<String> warnings = new ArrayList<String>();
|
||||
private Map<String, Object> options = new LinkedHashMap<String, Object>();
|
||||
|
||||
public String getChunkId() {
|
||||
return chunkId;
|
||||
}
|
||||
|
||||
public void setChunkId(String chunkId) {
|
||||
this.chunkId = chunkId;
|
||||
}
|
||||
|
||||
public String getChunkType() {
|
||||
return chunkType;
|
||||
}
|
||||
|
||||
public void setChunkType(String chunkType) {
|
||||
this.chunkType = chunkType;
|
||||
}
|
||||
|
||||
public String getSourceLabel() {
|
||||
return sourceLabel;
|
||||
}
|
||||
|
||||
public void setSourceLabel(String sourceLabel) {
|
||||
this.sourceLabel = sourceLabel;
|
||||
}
|
||||
|
||||
public List<String> getHeadingPath() {
|
||||
return headingPath;
|
||||
}
|
||||
|
||||
public void setHeadingPath(List<String> headingPath) {
|
||||
this.headingPath = headingPath;
|
||||
}
|
||||
|
||||
public String getContent() {
|
||||
return content;
|
||||
}
|
||||
|
||||
public void setContent(String content) {
|
||||
this.content = content;
|
||||
}
|
||||
|
||||
public String getQuestion() {
|
||||
return question;
|
||||
}
|
||||
|
||||
public void setQuestion(String question) {
|
||||
this.question = question;
|
||||
}
|
||||
|
||||
public String getAnswer() {
|
||||
return answer;
|
||||
}
|
||||
|
||||
public void setAnswer(String answer) {
|
||||
this.answer = answer;
|
||||
}
|
||||
|
||||
public Integer getCharCount() {
|
||||
return charCount;
|
||||
}
|
||||
|
||||
public void setCharCount(Integer charCount) {
|
||||
this.charCount = charCount;
|
||||
}
|
||||
|
||||
public Integer getTokenEstimate() {
|
||||
return tokenEstimate;
|
||||
}
|
||||
|
||||
public void setTokenEstimate(Integer tokenEstimate) {
|
||||
this.tokenEstimate = tokenEstimate;
|
||||
}
|
||||
|
||||
public Integer getPartNo() {
|
||||
return partNo;
|
||||
}
|
||||
|
||||
public void setPartNo(Integer partNo) {
|
||||
this.partNo = partNo;
|
||||
}
|
||||
|
||||
public Integer getPartTotal() {
|
||||
return partTotal;
|
||||
}
|
||||
|
||||
public void setPartTotal(Integer partTotal) {
|
||||
this.partTotal = partTotal;
|
||||
}
|
||||
|
||||
public List<String> getWarnings() {
|
||||
return warnings;
|
||||
}
|
||||
|
||||
public void setWarnings(List<String> warnings) {
|
||||
this.warnings = warnings;
|
||||
}
|
||||
|
||||
public Map<String, Object> getOptions() {
|
||||
return options;
|
||||
}
|
||||
|
||||
public void setOptions(Map<String, Object> options) {
|
||||
this.options = options;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,11 @@
|
||||
package com.easyagents.rag.core;
|
||||
|
||||
public final class RagChunkTypes {
|
||||
|
||||
private RagChunkTypes() {
|
||||
}
|
||||
|
||||
public static final String SECTION = "section";
|
||||
public static final String QA_PAIR = "qa_pair";
|
||||
public static final String PARAGRAPH = "paragraph";
|
||||
}
|
||||
@@ -0,0 +1,12 @@
|
||||
package com.easyagents.rag.core;
|
||||
|
||||
public final class RagDefaults {
|
||||
|
||||
private RagDefaults() {
|
||||
}
|
||||
|
||||
public static final int CHUNK_SIZE = 512;
|
||||
public static final int OVERLAP_SIZE = 128;
|
||||
public static final int MD_SPLITTER_LEVEL = 2;
|
||||
public static final int ROWS_PER_CHUNK = 1;
|
||||
}
|
||||
@@ -0,0 +1,20 @@
|
||||
package com.easyagents.rag.core;
|
||||
|
||||
public final class RagMetadataKeys {
|
||||
|
||||
private RagMetadataKeys() {
|
||||
}
|
||||
|
||||
public static final String CHUNK_TYPE = "chunkType";
|
||||
public static final String SOURCE_LABEL = "sourceLabel";
|
||||
public static final String HEADING_PATH = "headingPath";
|
||||
public static final String PAGE_NO = "pageNo";
|
||||
public static final String CHAR_COUNT = "charCount";
|
||||
public static final String TOKEN_ESTIMATE = "tokenEstimate";
|
||||
public static final String QA_QUESTION = "qaQuestion";
|
||||
public static final String QA_ANSWER = "qaAnswer";
|
||||
public static final String QA_GROUP_ID = "qaGroupId";
|
||||
public static final String PART_NO = "partNo";
|
||||
public static final String PART_TOTAL = "partTotal";
|
||||
public static final String WARNINGS = "warnings";
|
||||
}
|
||||
@@ -0,0 +1,14 @@
|
||||
package com.easyagents.rag.core;
|
||||
|
||||
public final class RagStrategyCodes {
|
||||
|
||||
private RagStrategyCodes() {
|
||||
}
|
||||
|
||||
public static final String AUTO = "AUTO";
|
||||
public static final String MARKDOWN_SECTION = "MARKDOWN_SECTION";
|
||||
public static final String OUTLINE_SECTION = "OUTLINE_SECTION";
|
||||
public static final String QA_PAIR = "QA_PAIR";
|
||||
public static final String PARAGRAPH_LENGTH = "PARAGRAPH_LENGTH";
|
||||
public static final String CUSTOM_REGEX = "CUSTOM_REGEX";
|
||||
}
|
||||
@@ -0,0 +1,12 @@
|
||||
package com.easyagents.rag.core;
|
||||
|
||||
public final class RagStructureTypes {
|
||||
|
||||
private RagStructureTypes() {
|
||||
}
|
||||
|
||||
public static final String MARKDOWN_HEADING = "markdown_heading";
|
||||
public static final String OUTLINE_SECTION = "outline_section";
|
||||
public static final String QA_PAIR = "qa_pair";
|
||||
public static final String PLAIN_PARAGRAPH = "plain_paragraph";
|
||||
}
|
||||
Reference in New Issue
Block a user