feat: 完成工作流多文件文档解析闭环

- 支持文档解析节点批量解析并收口为 documents 轻量输出

- 收口引用树、节点输出展示与旧工作流固定输出兼容

- 修复共享按钮点击事件,恢复多个节点加号交互
This commit is contained in:
2026-04-19 16:05:40 +08:00
parent a5aab86de2
commit 1d8b9d9662
15 changed files with 496 additions and 48 deletions

View File

@@ -6,6 +6,7 @@ import com.easyagents.flow.core.chain.Parameter;
import com.easyagents.flow.core.node.BaseNode;
import tech.easyflow.common.util.SpringContextUtil;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@@ -30,17 +31,42 @@ public class DocNode extends BaseNode {
@Override
public Map<String, Object> execute(Chain chain) {
Map<String, Object> map = chain.getState().resolveParameters(this);
Map<String, Object> res = new HashMap<>();
DocNodeFileContentExtractor extractor = SpringContextUtil.getBean(DocNodeFileContentExtractor.class);
String docContent = extractor.extract(map.get("file"));
List<DocNodeFileContentExtractor.DocExtractedDocument> documents = extractor.extractDocuments(map.get("file"));
String key = "content";
List<Parameter> outputDefs = getOutputDefs();
if (outputDefs != null && !outputDefs.isEmpty()) {
String defName = outputDefs.get(0).getName();
if (StringUtil.hasText(defName)) key = defName;
List<Map<String, Object>> documentMaps = new ArrayList<>();
for (DocNodeFileContentExtractor.DocExtractedDocument document : documents) {
documentMaps.add(document.toMap());
}
res.put(key, docContent);
Map<String, String> outputKeyMapping = resolveOutputKeyMapping();
Map<String, Object> res = new HashMap<>();
res.put(outputKeyMapping.get("documents"), documentMaps);
return res;
}
/**
* 根据节点输出定义解析运行态输出键名。
*
* @return 逻辑字段到实际输出键名的映射
*/
private Map<String, String> resolveOutputKeyMapping() {
Map<String, String> mapping = new HashMap<>();
mapping.put("documents", "documents");
List<Parameter> outputDefs = getOutputDefs();
if (outputDefs == null || outputDefs.isEmpty()) {
return mapping;
}
String[] logicalKeys = {"documents"};
for (int i = 0; i < outputDefs.size() && i < logicalKeys.length; i++) {
Parameter outputDef = outputDefs.get(i);
String name = outputDef == null ? null : outputDef.getName();
if (StringUtil.hasText(name)) {
mapping.put(logicalKeys[i], name);
}
}
return mapping;
}
}

View File

@@ -14,7 +14,13 @@ import tech.easyflow.common.web.exceptions.BusinessException;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collection;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
/**
* {@link DocNode} 文件内容提取器。
@@ -27,6 +33,9 @@ import java.util.Map;
*/
@Component
public class DocNodeFileContentExtractor {
private static final int FILE_MAX_COUNT = 10;
private static final long FILE_MAX_SINGLE_SIZE = 5L * 1024 * 1024;
private static final long FILE_MAX_TOTAL_SIZE = 50L * 1024 * 1024;
private final DocumentParseBridgeService documentParseBridgeService;
private final FileStorageService fileStorageService;
@@ -62,6 +71,33 @@ public class DocNodeFileContentExtractor {
return extractDefaultContent(sourceRef);
}
/**
* 批量提取文件文本内容。
*
* @param fileValue 工作流运行态中的单文件或多文件值
* @return 逐文件解析结果
*/
public List<DocExtractedDocument> extractDocuments(Object fileValue) {
List<DocumentSourceRef> sourceRefs = toDocumentSourceRefs(fileValue);
List<DocExtractedDocument> results = new ArrayList<>();
for (int index = 0; index < sourceRefs.size(); index++) {
DocumentSourceRef sourceRef = sourceRefs.get(index);
try {
String content = shouldUseDocumentBridge(sourceRef)
? extractBridgeContent(sourceRef)
: extractDefaultContent(sourceRef);
results.add(new DocExtractedDocument(sourceRef.getFileName(), content));
} catch (Exception e) {
String fileName = StringUtil.hasText(sourceRef.getFileName()) ? sourceRef.getFileName() : ("#" + (index + 1));
if (e instanceof BusinessException businessException) {
throw new BusinessException("文件解析失败(" + fileName + "): " + businessException.getMessage());
}
throw new RuntimeException("文件解析失败(" + fileName + ")", e);
}
}
return results;
}
/**
* 将运行时文件值转换为统一文档源。
*
@@ -84,6 +120,50 @@ public class DocNodeFileContentExtractor {
return sourceRef;
}
/**
* 将单文件或多文件运行值归一化为统一文档源列表。
*
* @param fileValue 运行态文件值
* @return 文档源列表
*/
List<DocumentSourceRef> toDocumentSourceRefs(Object fileValue) {
List<Object> candidates = new ArrayList<>();
collectFileValues(fileValue, candidates);
if (candidates.isEmpty()) {
throw new BusinessException("文件输入不能为空");
}
List<DocumentSourceRef> sourceRefs = new ArrayList<>();
Set<String> seenFilePaths = new LinkedHashSet<>();
long totalSize = 0L;
for (Object candidate : candidates) {
DocumentSourceRef sourceRef = toDocumentSourceRef(candidate);
validateSourceRef(sourceRef);
String filePath = sourceRef.getFilePath().trim();
if (!seenFilePaths.add(filePath)) {
continue;
}
Long size = sourceRef.getSize();
if (size != null && size > FILE_MAX_SINGLE_SIZE) {
throw new BusinessException("单个文件不能超过 5MB: " + sourceRef.getFileName());
}
if (size != null && size > 0) {
totalSize += size;
}
sourceRefs.add(sourceRef);
}
if (sourceRefs.size() > FILE_MAX_COUNT) {
throw new BusinessException("最多上传 10 个文件");
}
if (totalSize > FILE_MAX_TOTAL_SIZE) {
throw new BusinessException("文件总大小不能超过 50MB");
}
if (sourceRefs.isEmpty()) {
throw new BusinessException("文件输入不能为空");
}
return sourceRefs;
}
private void validateSourceRef(DocumentSourceRef sourceRef) {
if (sourceRef == null) {
throw new BusinessException("文件输入不能为空");
@@ -96,6 +176,19 @@ public class DocNodeFileContentExtractor {
}
}
private void collectFileValues(Object value, List<Object> result) {
if (value == null) {
return;
}
if (value instanceof Collection<?> collection) {
for (Object item : collection) {
collectFileValues(item, result);
}
return;
}
result.add(value);
}
/**
* 判断当前文件类型是否应优先走统一文档解析桥接。
*
@@ -172,4 +265,49 @@ public class DocNodeFileContentExtractor {
}
return null;
}
/**
* 逐文件解析结果。
*/
public static final class DocExtractedDocument {
private final String fileName;
private final String content;
/**
* 创建逐文件解析结果。
*
* @param fileName 文件名
* @param content 解析文本
*/
public DocExtractedDocument(String fileName, String content) {
this.fileName = fileName;
this.content = content;
}
/**
* @return 文件名
*/
public String getFileName() {
return fileName;
}
/**
* @return 文本内容
*/
public String getContent() {
return content;
}
/**
* 转为轻量 Map供工作流结果与引用树消费。
*
* @return 轻量结果对象
*/
public Map<String, Object> toMap() {
Map<String, Object> result = new LinkedHashMap<>();
result.put("fileName", fileName);
result.put("content", content);
return result;
}
}
}

View File

@@ -16,7 +16,9 @@ import java.io.IOException;
import java.io.InputStream;
import java.net.InetSocketAddress;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import com.sun.net.httpserver.HttpServer;
@@ -211,6 +213,56 @@ public class DocNodeFileContentExtractorTest {
}
}
/**
* 验证多文件输入会按顺序返回逐文件结果。
*/
@Test
public void shouldExtractDocumentsForMultipleFiles() {
RecordingDocumentParseBridgeService bridgeService = new RecordingDocumentParseBridgeService();
DocNodeFileContentExtractor extractor = new DocNodeFileContentExtractor(
bridgeService,
new FakeFileStorageService(),
new FakeReaderManager("plain text")
);
List<DocNodeFileContentExtractor.DocExtractedDocument> documents = extractor.extractDocuments(Arrays.asList(
buildFileValue("demo.pdf", "/files/demo.pdf", "application/pdf"),
buildFileValue("note.txt", "/files/note.txt", "text/plain")
));
Assert.assertEquals(2, documents.size());
Assert.assertEquals("demo.pdf", documents.get(0).getFileName());
Assert.assertEquals("# parsed", documents.get(0).getContent());
Assert.assertEquals("note.txt", documents.get(1).getFileName());
Assert.assertEquals("plain text", documents.get(1).getContent());
}
/**
* 验证多文件中任一文件失败时会暴露文件名。
*/
@Test
public void shouldExposeFileNameWhenMultipleDocumentsFail() {
RecordingDocumentParseBridgeService bridgeService = new RecordingDocumentParseBridgeService();
bridgeService.response.setPreferredText(null);
bridgeService.response.setMarkdown(null);
bridgeService.response.setPlainText(null);
DocNodeFileContentExtractor extractor = new DocNodeFileContentExtractor(
bridgeService,
new FakeFileStorageService(),
new FakeReaderManager("plain text")
);
try {
extractor.extractDocuments(Arrays.asList(
buildFileValue("broken.pdf", "/files/broken.pdf", "application/pdf"),
buildFileValue("note.txt", "/files/note.txt", "text/plain")
));
Assert.fail("expected BusinessException");
} catch (BusinessException e) {
Assert.assertEquals("文件解析失败(broken.pdf): 文档解析结果为空", e.getMessage());
}
}
private Map<String, Object> buildFileValue(String fileName, String filePath, String contentType) {
Map<String, Object> value = new HashMap<String, Object>();
value.put("fileName", fileName);

View File

@@ -0,0 +1,40 @@
package tech.easyflow.ai.node;
import com.easyagents.flow.core.chain.Parameter;
import org.junit.Assert;
import org.junit.Test;
import java.lang.reflect.Method;
import java.util.Arrays;
import java.util.Map;
/**
* {@link DocNode} 单元测试。
*/
public class DocNodeTest {
/**
* 历史工作流若改过输出名,仍应按固定输出槽位顺序映射运行态结果键。
*
* @throws Exception 反射调用失败
*/
@Test
public void shouldResolveOutputKeyMappingByOutputOrder() throws Exception {
DocNode node = new DocNode();
node.setOutputDefs(Arrays.asList(
parameter("documentItems")
));
Method method = DocNode.class.getDeclaredMethod("resolveOutputKeyMapping");
method.setAccessible(true);
Map<String, String> mapping = (Map<String, String>) method.invoke(node);
Assert.assertEquals("documentItems", mapping.get("documents"));
}
private static Parameter parameter(String name) {
Parameter parameter = new Parameter();
parameter.setName(name);
return parameter;
}
}