feat: 完成工作流多文件文档解析闭环

- 支持文档解析节点批量解析并收口为 documents 轻量输出 - 收口引用树、节点输出展示与旧工作流固定输出兼容 - 修复共享按钮点击事件，恢复多个节点加号交互
2026-04-19 16:05:40 +08:00
parent a5aab86de2
commit 1d8b9d9662
15 changed files with 496 additions and 48 deletions
--- a/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/node/DocNode.java
+++ b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/node/DocNode.java
@@ -6,6 +6,7 @@ import com.easyagents.flow.core.chain.Parameter;
 import com.easyagents.flow.core.node.BaseNode;
 import tech.easyflow.common.util.SpringContextUtil;

+import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
@@ -30,17 +31,42 @@ public class DocNode extends BaseNode {
    @Override
    public Map<String, Object> execute(Chain chain) {
        Map<String, Object> map = chain.getState().resolveParameters(this);
-        Map<String, Object> res = new HashMap<>();
        DocNodeFileContentExtractor extractor = SpringContextUtil.getBean(DocNodeFileContentExtractor.class);
-        String docContent = extractor.extract(map.get("file"));
+        List<DocNodeFileContentExtractor.DocExtractedDocument> documents = extractor.extractDocuments(map.get("file"));

-        String key = "content";
-        List<Parameter> outputDefs = getOutputDefs();
-        if (outputDefs != null && !outputDefs.isEmpty()) {
-            String defName = outputDefs.get(0).getName();
-            if (StringUtil.hasText(defName)) key = defName;
+        List<Map<String, Object>> documentMaps = new ArrayList<>();
+        for (DocNodeFileContentExtractor.DocExtractedDocument document : documents) {
+            documentMaps.add(document.toMap());
        }
-        res.put(key, docContent);
+
+        Map<String, String> outputKeyMapping = resolveOutputKeyMapping();
+        Map<String, Object> res = new HashMap<>();
+        res.put(outputKeyMapping.get("documents"), documentMaps);
        return res;
    }
+
+    /**
+     * 根据节点输出定义解析运行态输出键名。
+     *
+     * @return 逻辑字段到实际输出键名的映射
+     */
+    private Map<String, String> resolveOutputKeyMapping() {
+        Map<String, String> mapping = new HashMap<>();
+        mapping.put("documents", "documents");
+
+        List<Parameter> outputDefs = getOutputDefs();
+        if (outputDefs == null || outputDefs.isEmpty()) {
+            return mapping;
+        }
+
+        String[] logicalKeys = {"documents"};
+        for (int i = 0; i < outputDefs.size() && i < logicalKeys.length; i++) {
+            Parameter outputDef = outputDefs.get(i);
+            String name = outputDef == null ? null : outputDef.getName();
+            if (StringUtil.hasText(name)) {
+                mapping.put(logicalKeys[i], name);
+            }
+        }
+        return mapping;
+    }
 }
--- a/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/node/DocNodeFileContentExtractor.java
+++ b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/node/DocNodeFileContentExtractor.java
@@ -14,7 +14,13 @@ import tech.easyflow.common.web.exceptions.BusinessException;

 import java.io.IOException;
 import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.LinkedHashMap;
+import java.util.LinkedHashSet;
+import java.util.List;
 import java.util.Map;
+import java.util.Set;

 /**
 * {@link DocNode} 文件内容提取器。
@@ -27,6 +33,9 @@ import java.util.Map;
 */
@Component
 public class DocNodeFileContentExtractor {
+    private static final int FILE_MAX_COUNT = 10;
+    private static final long FILE_MAX_SINGLE_SIZE = 5L * 1024 * 1024;
+    private static final long FILE_MAX_TOTAL_SIZE = 50L * 1024 * 1024;

    private final DocumentParseBridgeService documentParseBridgeService;
    private final FileStorageService fileStorageService;
@@ -62,6 +71,33 @@ public class DocNodeFileContentExtractor {
        return extractDefaultContent(sourceRef);
    }

+    /**
+     * 批量提取文件文本内容。
+     *
+     * @param fileValue 工作流运行态中的单文件或多文件值
+     * @return 逐文件解析结果
+     */
+    public List<DocExtractedDocument> extractDocuments(Object fileValue) {
+        List<DocumentSourceRef> sourceRefs = toDocumentSourceRefs(fileValue);
+        List<DocExtractedDocument> results = new ArrayList<>();
+        for (int index = 0; index < sourceRefs.size(); index++) {
+            DocumentSourceRef sourceRef = sourceRefs.get(index);
+            try {
+                String content = shouldUseDocumentBridge(sourceRef)
+                        ? extractBridgeContent(sourceRef)
+                        : extractDefaultContent(sourceRef);
+                results.add(new DocExtractedDocument(sourceRef.getFileName(), content));
+            } catch (Exception e) {
+                String fileName = StringUtil.hasText(sourceRef.getFileName()) ? sourceRef.getFileName() : ("#" + (index + 1));
+                if (e instanceof BusinessException businessException) {
+                    throw new BusinessException("文件解析失败(" + fileName + "): " + businessException.getMessage());
+                }
+                throw new RuntimeException("文件解析失败(" + fileName + ")", e);
+            }
+        }
+        return results;
+    }
+
    /**
     * 将运行时文件值转换为统一文档源。
     *
@@ -84,6 +120,50 @@ public class DocNodeFileContentExtractor {
        return sourceRef;
    }

+    /**
+     * 将单文件或多文件运行值归一化为统一文档源列表。
+     *
+     * @param fileValue 运行态文件值
+     * @return 文档源列表
+     */
+    List<DocumentSourceRef> toDocumentSourceRefs(Object fileValue) {
+        List<Object> candidates = new ArrayList<>();
+        collectFileValues(fileValue, candidates);
+        if (candidates.isEmpty()) {
+            throw new BusinessException("文件输入不能为空");
+        }
+
+        List<DocumentSourceRef> sourceRefs = new ArrayList<>();
+        Set<String> seenFilePaths = new LinkedHashSet<>();
+        long totalSize = 0L;
+        for (Object candidate : candidates) {
+            DocumentSourceRef sourceRef = toDocumentSourceRef(candidate);
+            validateSourceRef(sourceRef);
+            String filePath = sourceRef.getFilePath().trim();
+            if (!seenFilePaths.add(filePath)) {
+                continue;
+            }
+            Long size = sourceRef.getSize();
+            if (size != null && size > FILE_MAX_SINGLE_SIZE) {
+                throw new BusinessException("单个文件不能超过 5MB: " + sourceRef.getFileName());
+            }
+            if (size != null && size > 0) {
+                totalSize += size;
+            }
+            sourceRefs.add(sourceRef);
+        }
+        if (sourceRefs.size() > FILE_MAX_COUNT) {
+            throw new BusinessException("最多上传 10 个文件");
+        }
+        if (totalSize > FILE_MAX_TOTAL_SIZE) {
+            throw new BusinessException("文件总大小不能超过 50MB");
+        }
+        if (sourceRefs.isEmpty()) {
+            throw new BusinessException("文件输入不能为空");
+        }
+        return sourceRefs;
+    }
+
    private void validateSourceRef(DocumentSourceRef sourceRef) {
        if (sourceRef == null) {
            throw new BusinessException("文件输入不能为空");
@@ -96,6 +176,19 @@ public class DocNodeFileContentExtractor {
        }
    }

+    private void collectFileValues(Object value, List<Object> result) {
+        if (value == null) {
+            return;
+        }
+        if (value instanceof Collection<?> collection) {
+            for (Object item : collection) {
+                collectFileValues(item, result);
+            }
+            return;
+        }
+        result.add(value);
+    }
+
    /**
     * 判断当前文件类型是否应优先走统一文档解析桥接。
     *
@@ -172,4 +265,49 @@ public class DocNodeFileContentExtractor {
        }
        return null;
    }
+
+    /**
+     * 逐文件解析结果。
+     */
+    public static final class DocExtractedDocument {
+        private final String fileName;
+        private final String content;
+
+        /**
+         * 创建逐文件解析结果。
+         *
+         * @param fileName 文件名
+         * @param content 解析文本
+         */
+        public DocExtractedDocument(String fileName, String content) {
+            this.fileName = fileName;
+            this.content = content;
+        }
+
+        /**
+         * @return 文件名
+         */
+        public String getFileName() {
+            return fileName;
+        }
+
+        /**
+         * @return 文本内容
+         */
+        public String getContent() {
+            return content;
+        }
+
+        /**
+         * 转为轻量 Map，供工作流结果与引用树消费。
+         *
+         * @return 轻量结果对象
+         */
+        public Map<String, Object> toMap() {
+            Map<String, Object> result = new LinkedHashMap<>();
+            result.put("fileName", fileName);
+            result.put("content", content);
+            return result;
+        }
+    }
 }
--- a/easyflow-modules/easyflow-module-ai/src/test/java/tech/easyflow/ai/node/DocNodeFileContentExtractorTest.java
+++ b/easyflow-modules/easyflow-module-ai/src/test/java/tech/easyflow/ai/node/DocNodeFileContentExtractorTest.java
@@ -16,7 +16,9 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.net.InetSocketAddress;
 import java.nio.charset.StandardCharsets;
+import java.util.Arrays;
 import java.util.HashMap;
+import java.util.List;
 import java.util.Map;
 import com.sun.net.httpserver.HttpServer;

@@ -211,6 +213,56 @@ public class DocNodeFileContentExtractorTest {
        }
    }

+    /**
+     * 验证多文件输入会按顺序返回逐文件结果。
+     */
+    @Test
+    public void shouldExtractDocumentsForMultipleFiles() {
+        RecordingDocumentParseBridgeService bridgeService = new RecordingDocumentParseBridgeService();
+        DocNodeFileContentExtractor extractor = new DocNodeFileContentExtractor(
+            bridgeService,
+            new FakeFileStorageService(),
+            new FakeReaderManager("plain text")
+        );
+
+        List<DocNodeFileContentExtractor.DocExtractedDocument> documents = extractor.extractDocuments(Arrays.asList(
+            buildFileValue("demo.pdf", "/files/demo.pdf", "application/pdf"),
+            buildFileValue("note.txt", "/files/note.txt", "text/plain")
+        ));
+
+        Assert.assertEquals(2, documents.size());
+        Assert.assertEquals("demo.pdf", documents.get(0).getFileName());
+        Assert.assertEquals("# parsed", documents.get(0).getContent());
+        Assert.assertEquals("note.txt", documents.get(1).getFileName());
+        Assert.assertEquals("plain text", documents.get(1).getContent());
+    }
+
+    /**
+     * 验证多文件中任一文件失败时会暴露文件名。
+     */
+    @Test
+    public void shouldExposeFileNameWhenMultipleDocumentsFail() {
+        RecordingDocumentParseBridgeService bridgeService = new RecordingDocumentParseBridgeService();
+        bridgeService.response.setPreferredText(null);
+        bridgeService.response.setMarkdown(null);
+        bridgeService.response.setPlainText(null);
+        DocNodeFileContentExtractor extractor = new DocNodeFileContentExtractor(
+            bridgeService,
+            new FakeFileStorageService(),
+            new FakeReaderManager("plain text")
+        );
+
+        try {
+            extractor.extractDocuments(Arrays.asList(
+                buildFileValue("broken.pdf", "/files/broken.pdf", "application/pdf"),
+                buildFileValue("note.txt", "/files/note.txt", "text/plain")
+            ));
+            Assert.fail("expected BusinessException");
+        } catch (BusinessException e) {
+            Assert.assertEquals("文件解析失败(broken.pdf): 文档解析结果为空", e.getMessage());
+        }
+    }
+
    private Map<String, Object> buildFileValue(String fileName, String filePath, String contentType) {
        Map<String, Object> value = new HashMap<String, Object>();
        value.put("fileName", fileName);
--- a/easyflow-modules/easyflow-module-ai/src/test/java/tech/easyflow/ai/node/DocNodeTest.java
+++ b/easyflow-modules/easyflow-module-ai/src/test/java/tech/easyflow/ai/node/DocNodeTest.java
@@ -0,0 +1,40 @@
+package tech.easyflow.ai.node;
+
+import com.easyagents.flow.core.chain.Parameter;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.lang.reflect.Method;
+import java.util.Arrays;
+import java.util.Map;
+
+/**
+ * {@link DocNode} 单元测试。
+ */
+public class DocNodeTest {
+
+    /**
+     * 历史工作流若改过输出名，仍应按固定输出槽位顺序映射运行态结果键。
+     *
+     * @throws Exception 反射调用失败
+     */
+    @Test
+    public void shouldResolveOutputKeyMappingByOutputOrder() throws Exception {
+        DocNode node = new DocNode();
+        node.setOutputDefs(Arrays.asList(
+                parameter("documentItems")
+        ));
+
+        Method method = DocNode.class.getDeclaredMethod("resolveOutputKeyMapping");
+        method.setAccessible(true);
+        Map<String, String> mapping = (Map<String, String>) method.invoke(node);
+
+        Assert.assertEquals("documentItems", mapping.get("documents"));
+    }
+
+    private static Parameter parameter(String name) {
+        Parameter parameter = new Parameter();
+        parameter.setName(name);
+        return parameter;
+    }
+}