桥接层负责把底层文档服务异常和文件加载异常转换为稳定语义, + * 供上层业务按场景进行处理。
+ * + * @author Codex + * @since 2026-04-14 + */ +public class DocumentParseBridgeException extends RuntimeException { + + private final String code; + + public DocumentParseBridgeException(String code, String message) { + super(message); + this.code = code; + } + + public DocumentParseBridgeException(String code, String message, Throwable cause) { + super(message, cause); + this.code = code; + } + + /** + * 获取稳定错误码。 + * + * @return 错误码 + */ + public String getCode() { + return code; + } + + public static DocumentParseBridgeException serviceNotEnabled() { + return new DocumentParseBridgeException( + "service_not_enabled", + "统一文档解析服务未启用,请先配置 easy-agents.document.pdf.provider" + ); + } + + public static DocumentParseBridgeException unsupportedSource(String message) { + return new DocumentParseBridgeException("unsupported_source", message); + } + + public static DocumentParseBridgeException sourceLoadFailed(String message, Throwable cause) { + return new DocumentParseBridgeException("source_load_failed", message, cause); + } + + public static DocumentParseBridgeException requestBuildFailed(String message, Throwable cause) { + return new DocumentParseBridgeException("request_build_failed", message, cause); + } + + public static DocumentParseBridgeException requestBuildFailed(String message) { + return new DocumentParseBridgeException("request_build_failed", message); + } + + public static DocumentParseBridgeException parseFailed(String message, Throwable cause) { + return new DocumentParseBridgeException("parse_failed", message, cause); + } + + public static DocumentParseBridgeException parseFailed(String message) { + return new DocumentParseBridgeException("parse_failed", message); + } + + public static DocumentParseBridgeException taskFailed(String message, Throwable cause) { + return new DocumentParseBridgeException("task_failed", message, cause); + } + + public static DocumentParseBridgeException taskFailed(String message) { + return new DocumentParseBridgeException("task_failed", message); + } + + public static DocumentParseBridgeException resultFetchFailed(String message, Throwable cause) { + return new DocumentParseBridgeException("result_fetch_failed", message, cause); + } + + public static DocumentParseBridgeException resultFetchFailed(String message) { + return new DocumentParseBridgeException("result_fetch_failed", message); + } +} diff --git a/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/document/model/DocumentParseArtifacts.java b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/document/model/DocumentParseArtifacts.java new file mode 100644 index 0000000..269e258 --- /dev/null +++ b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/document/model/DocumentParseArtifacts.java @@ -0,0 +1,63 @@ +package tech.easyflow.ai.document.model; + +import java.util.LinkedHashMap; +import java.util.Map; + +/** + * 文档解析工件。 + * + * @author Codex + * @since 2026-04-14 + */ +public class DocumentParseArtifacts { + + private Object middleJson; + private Object contentList; + private Object modelOutput; + private Map场景由 easyflow 业务层传入,桥接层负责将场景映射为底层解析请求参数, + * 避免业务模块直接感知多个布尔开关。
+ * + * @author Codex + * @since 2026-04-14 + */ +public enum DocumentParseScenario { + + /** + * 工作流文本提取场景,仅要求尽快返回可直接消费的文本结果。 + */ + WORKFLOW_TEXT, + + /** + * 知识库导入场景,需要保留更多结构化工件供后续分块分析使用。 + */ + KNOWLEDGE_IMPORT, + + /** + * 尽可能返回完整工件,供后续高级消费场景使用。 + */ + FULL_ARTIFACTS +} diff --git a/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/document/model/DocumentParseTaskInfo.java b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/document/model/DocumentParseTaskInfo.java new file mode 100644 index 0000000..a2b163c --- /dev/null +++ b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/document/model/DocumentParseTaskInfo.java @@ -0,0 +1,33 @@ +package tech.easyflow.ai.document.model; + +/** + * 任务聚合查询结果。 + * + *该对象在任务状态基础上按需附带最终解析结果。 + * 当任务尚未完成时仅返回状态信息;当任务已完成时可同时返回标准化结果。
+ * + * @author Codex + * @since 2026-04-14 + */ +public class DocumentParseTaskInfo extends DocumentParseTaskStatus { + + private DocumentParsedResult result; + + /** + * 获取标准化解析结果。 + * + * @return 解析结果;任务未完成时可能为空 + */ + public DocumentParsedResult getResult() { + return result; + } + + /** + * 设置标准化解析结果。 + * + * @param result 解析结果 + */ + public void setResult(DocumentParsedResult result) { + this.result = result; + } +} diff --git a/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/document/model/DocumentParseTaskStatus.java b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/document/model/DocumentParseTaskStatus.java new file mode 100644 index 0000000..580dc95 --- /dev/null +++ b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/document/model/DocumentParseTaskStatus.java @@ -0,0 +1,113 @@ +package tech.easyflow.ai.document.model; + +import java.util.ArrayList; +import java.util.List; + +/** + * 标准化异步解析任务状态。 + * + * @author Codex + * @since 2026-04-14 + */ +public class DocumentParseTaskStatus { + + private String taskId; + private String status; + private String backend; + private List该模型用于屏蔽业务模块和底层解析框架的文件输入差异,业务方只需要描述文件来自哪里, + * 具体由桥接层负责加载字节内容并转成统一解析请求。
+ * + * @author Codex + * @since 2026-04-14 + */ +public class DocumentSourceRef { + + private String fileName; + private String filePath; + private String contentType; + private Long size; + private String url; + private byte[] contentBytes; + + /** + * 创建基于文件存储路径的文档源。 + * + * @param filePath 存储路径 + * @return 文档源 + */ + public static DocumentSourceRef ofPath(String filePath) { + DocumentSourceRef sourceRef = new DocumentSourceRef(); + sourceRef.setFilePath(filePath); + return sourceRef; + } + + /** + * 创建基于内存字节的文档源。 + * + * @param fileName 文件名 + * @param contentBytes 文件字节 + * @return 文档源 + */ + public static DocumentSourceRef ofBytes(String fileName, byte[] contentBytes) { + DocumentSourceRef sourceRef = new DocumentSourceRef(); + sourceRef.setFileName(fileName); + sourceRef.setContentBytes(contentBytes); + return sourceRef; + } + + /** + * 创建基于 URL 的文档源。 + * + * @param url 文件 URL + * @return 文档源 + */ + public static DocumentSourceRef ofUrl(String url) { + DocumentSourceRef sourceRef = new DocumentSourceRef(); + sourceRef.setUrl(url); + return sourceRef; + } + + public String getFileName() { + return fileName; + } + + public void setFileName(String fileName) { + this.fileName = fileName; + } + + public String getFilePath() { + return filePath; + } + + public void setFilePath(String filePath) { + this.filePath = filePath; + } + + public String getContentType() { + return contentType; + } + + public void setContentType(String contentType) { + this.contentType = contentType; + } + + public Long getSize() { + return size; + } + + public void setSize(Long size) { + this.size = size; + } + + public String getUrl() { + return url; + } + + public void setUrl(String url) { + this.url = url; + } + + public byte[] getContentBytes() { + return contentBytes; + } + + public void setContentBytes(byte[] contentBytes) { + this.contentBytes = contentBytes; + } +} diff --git a/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/document/service/DocumentParseBridgeService.java b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/document/service/DocumentParseBridgeService.java new file mode 100644 index 0000000..88af40a --- /dev/null +++ b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/document/service/DocumentParseBridgeService.java @@ -0,0 +1,67 @@ +package tech.easyflow.ai.document.service; + +import tech.easyflow.ai.document.model.DocumentParseScenario; +import tech.easyflow.ai.document.model.DocumentSourceRef; +import tech.easyflow.ai.document.model.DocumentParseTaskInfo; +import tech.easyflow.ai.document.model.DocumentParseTaskStatus; +import tech.easyflow.ai.document.model.DocumentParsedResult; + +/** + * 统一文档解析桥接门面。 + * + *业务模块通过该门面使用文档解析能力,而不是直接依赖 easy-agents 的原始请求和结果模型。
+ * + * @author Codex + * @since 2026-04-14 + */ +public interface DocumentParseBridgeService { + + /** + * 同步解析单文档。 + * + * @param source 文档源 + * @param scenario 解析场景 + * @return 标准化解析结果 + */ + DocumentParsedResult parse(DocumentSourceRef source, DocumentParseScenario scenario); + + /** + * 异步提交单文档解析任务。 + * + * @param source 文档源 + * @param scenario 解析场景 + * @return 异步任务状态 + */ + DocumentParseTaskStatus submit(DocumentSourceRef source, DocumentParseScenario scenario); + + /** + * 查询异步任务状态。 + * + * @param taskId 任务 ID + * @return 异步任务状态 + */ + DocumentParseTaskStatus queryTask(String taskId); + + /** + * 获取异步任务最终结果。 + * + *该方法面向“结果读取”语义,底层 provider 可能在内部等待任务完成后再返回最终结果, + * 因此不适合直接作为轻量状态轮询接口;如果业务需要统一查看“当前状态 + 已完成结果”, + * 应优先使用 {@link #queryTaskInfo(String)}。
+ * + * @param taskId 任务 ID + * @return 标准化解析结果 + */ + DocumentParsedResult queryResult(String taskId); + + /** + * 聚合查询异步任务信息。 + * + *当任务仍在处理中时仅返回状态;当任务已完成时会附带标准化结果。 + * 该方法适合用于页面或业务侧统一读取“当前状态 + 可用结果”。
+ * + * @param taskId 任务 ID + * @return 聚合任务信息 + */ + DocumentParseTaskInfo queryTaskInfo(String taskId); +} diff --git a/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/document/service/impl/DocumentParseBridgeServiceImpl.java b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/document/service/impl/DocumentParseBridgeServiceImpl.java new file mode 100644 index 0000000..c9503ac --- /dev/null +++ b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/document/service/impl/DocumentParseBridgeServiceImpl.java @@ -0,0 +1,170 @@ +package tech.easyflow.ai.document.service.impl; + +import com.easyagents.document.core.DocumentParseService; +import com.easyagents.document.core.model.ParseResponse; +import com.easyagents.document.core.model.ParseResult; +import com.easyagents.document.core.model.ParseTaskInfo; +import com.easyagents.document.core.model.ParseTaskStatus; +import org.springframework.lang.Nullable; +import org.springframework.stereotype.Service; +import org.springframework.util.StringUtils; +import tech.easyflow.ai.document.exception.DocumentParseBridgeException; +import tech.easyflow.ai.document.model.DocumentParseScenario; +import tech.easyflow.ai.document.model.DocumentSourceRef; +import tech.easyflow.ai.document.model.DocumentParseTaskInfo; +import tech.easyflow.ai.document.model.DocumentParseTaskStatus; +import tech.easyflow.ai.document.model.DocumentParsedResult; +import tech.easyflow.ai.document.service.DocumentParseBridgeService; +import tech.easyflow.ai.document.support.DocumentSourceLoader; +import tech.easyflow.ai.document.support.DocumentParseRequestFactory; +import tech.easyflow.ai.document.support.DocumentParseResultMapper; +import tech.easyflow.ai.document.support.LoadedDocumentSource; +import tech.easyflow.ai.utils.DocUtil; + +/** + * 统一文档解析桥接门面默认实现。 + * + * @author Codex + * @since 2026-04-14 + */ +@Service +public class DocumentParseBridgeServiceImpl implements DocumentParseBridgeService { + + @Nullable + private final DocumentParseService documentParseService; + private final DocumentSourceLoader documentSourceLoader; + private final DocumentParseRequestFactory parseRequestFactory; + private final DocumentParseResultMapper parseResultMapper; + + public DocumentParseBridgeServiceImpl(@Nullable DocumentParseService documentParseService, + DocumentSourceLoader documentSourceLoader, + DocumentParseRequestFactory parseRequestFactory, + DocumentParseResultMapper parseResultMapper) { + this.documentParseService = documentParseService; + this.documentSourceLoader = documentSourceLoader; + this.parseRequestFactory = parseRequestFactory; + this.parseResultMapper = parseResultMapper; + } + + /** + * {@inheritDoc} + */ + @Override + public DocumentParsedResult parse(DocumentSourceRef source, DocumentParseScenario scenario) { + try { + LoadedDocumentSource loadedSource = preparePdfSource(source); + ParseResponse response = requireService().parse(parseRequestFactory.build(loadedSource, scenario)); + return parseResultMapper.map(extractSingleResult(response, false)); + } catch (DocumentParseBridgeException e) { + throw e; + } catch (Exception e) { + throw DocumentParseBridgeException.parseFailed("同步文档解析失败", e); + } + } + + /** + * {@inheritDoc} + */ + @Override + public DocumentParseTaskStatus submit(DocumentSourceRef source, DocumentParseScenario scenario) { + try { + LoadedDocumentSource loadedSource = preparePdfSource(source); + ParseTaskStatus taskStatus = requireService().submit(parseRequestFactory.build(loadedSource, scenario)); + return parseResultMapper.map(taskStatus); + } catch (DocumentParseBridgeException e) { + throw e; + } catch (Exception e) { + throw DocumentParseBridgeException.taskFailed("提交异步文档解析任务失败", e); + } + } + + /** + * {@inheritDoc} + */ + @Override + public DocumentParseTaskStatus queryTask(String taskId) { + if (!StringUtils.hasText(taskId)) { + throw DocumentParseBridgeException.taskFailed("taskId 不能为空"); + } + try { + return parseResultMapper.map(requireService().queryTask(taskId)); + } catch (DocumentParseBridgeException e) { + throw e; + } catch (Exception e) { + throw DocumentParseBridgeException.taskFailed("查询异步文档解析任务状态失败", e); + } + } + + /** + * {@inheritDoc} + */ + @Override + public DocumentParsedResult queryResult(String taskId) { + if (!StringUtils.hasText(taskId)) { + throw DocumentParseBridgeException.resultFetchFailed("taskId 不能为空"); + } + try { + ParseResponse response = requireService().queryResult(taskId); + return parseResultMapper.map(extractSingleResult(response, true)); + } catch (DocumentParseBridgeException e) { + throw e; + } catch (Exception e) { + throw DocumentParseBridgeException.resultFetchFailed("获取异步文档解析结果失败", e); + } + } + + /** + * {@inheritDoc} + */ + @Override + public DocumentParseTaskInfo queryTaskInfo(String taskId) { + if (!StringUtils.hasText(taskId)) { + throw DocumentParseBridgeException.taskFailed("taskId 不能为空"); + } + try { + ParseTaskInfo taskInfo = requireService().queryTaskInfo(taskId); + return parseResultMapper.map(taskInfo); + } catch (DocumentParseBridgeException e) { + throw e; + } catch (Exception e) { + throw DocumentParseBridgeException.taskFailed("聚合查询异步文档解析任务信息失败", e); + } + } + + private DocumentParseService requireService() { + if (documentParseService == null) { + throw DocumentParseBridgeException.serviceNotEnabled(); + } + return documentParseService; + } + + private LoadedDocumentSource preparePdfSource(DocumentSourceRef source) { + LoadedDocumentSource loadedSource = documentSourceLoader.load(source); + if (!isPdf(loadedSource)) { + throw DocumentParseBridgeException.unsupportedSource("统一文档解析桥接首版仅支持 PDF 文件"); + } + return loadedSource; + } + + private boolean isPdf(LoadedDocumentSource loadedSource) { + String contentType = loadedSource.getContentType(); + if (StringUtils.hasText(contentType) && contentType.toLowerCase().contains("pdf")) { + return true; + } + String fileName = loadedSource.getFileName(); + if (!StringUtils.hasText(fileName) || !fileName.contains(".")) { + return false; + } + return "pdf".equals(DocUtil.normalizeSuffix(DocUtil.getSuffix(fileName))); + } + + private ParseResult extractSingleResult(ParseResponse response, boolean resultFetchPhase) { + if (response == null || response.getResults() == null || response.getResults().isEmpty()) { + if (resultFetchPhase) { + throw DocumentParseBridgeException.resultFetchFailed("异步文档解析结果为空"); + } + throw DocumentParseBridgeException.parseFailed("同步文档解析结果为空"); + } + return response.getResults().get(0); + } +} diff --git a/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/document/support/DocumentParseRequestFactory.java b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/document/support/DocumentParseRequestFactory.java new file mode 100644 index 0000000..0b49319 --- /dev/null +++ b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/document/support/DocumentParseRequestFactory.java @@ -0,0 +1,71 @@ +package tech.easyflow.ai.document.support; + +import com.easyagents.document.core.model.ParseFile; +import com.easyagents.document.core.model.ParseRequest; +import org.springframework.stereotype.Component; +import tech.easyflow.ai.document.exception.DocumentParseBridgeException; +import tech.easyflow.ai.document.model.DocumentParseScenario; + +/** + * easy-agents 解析请求工厂。 + * + *负责把 easyflow 业务场景预设映射为底层统一解析请求。
+ * + * @author Codex + * @since 2026-04-14 + */ +@Component +public class DocumentParseRequestFactory { + + /** + * 构建统一解析请求。 + * + * @param source 已加载文档源 + * @param scenario 解析场景 + * @return 统一解析请求 + */ + public ParseRequest build(LoadedDocumentSource source, DocumentParseScenario scenario) { + if (source == null || source.getContentBytes() == null || source.getContentBytes().length == 0) { + throw DocumentParseBridgeException.requestBuildFailed("文档源内容为空,无法构建解析请求"); + } + if (scenario == null) { + throw DocumentParseBridgeException.requestBuildFailed("解析场景不能为空"); + } + ParseRequest request = new ParseRequest(); + // 保持为空,交由 easy-agents provider 按环境配置回填默认值。 + request.setParseMethod(null); + request.setFormulaEnabled(null); + request.setTableEnabled(null); + request.addFile(ParseFile.of(source.getFileName(), source.getContentBytes(), source.getContentType())); + applyScenario(request, scenario); + return request; + } + + private void applyScenario(ParseRequest request, DocumentParseScenario scenario) { + switch (scenario) { + case WORKFLOW_TEXT: + request.setReturnMarkdown(Boolean.TRUE); + request.setReturnMiddleJson(Boolean.FALSE); + request.setReturnContentList(Boolean.FALSE); + request.setReturnModelOutput(Boolean.FALSE); + request.setReturnImages(Boolean.FALSE); + break; + case KNOWLEDGE_IMPORT: + request.setReturnMarkdown(Boolean.TRUE); + request.setReturnMiddleJson(Boolean.TRUE); + request.setReturnContentList(Boolean.TRUE); + request.setReturnModelOutput(Boolean.FALSE); + request.setReturnImages(Boolean.TRUE); + break; + case FULL_ARTIFACTS: + request.setReturnMarkdown(Boolean.TRUE); + request.setReturnMiddleJson(Boolean.TRUE); + request.setReturnContentList(Boolean.TRUE); + request.setReturnModelOutput(Boolean.TRUE); + request.setReturnImages(Boolean.TRUE); + break; + default: + throw DocumentParseBridgeException.requestBuildFailed("不支持的文档解析场景: " + scenario); + } + } +} diff --git a/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/document/support/DocumentParseResultMapper.java b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/document/support/DocumentParseResultMapper.java new file mode 100644 index 0000000..d1b4eae --- /dev/null +++ b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/document/support/DocumentParseResultMapper.java @@ -0,0 +1,128 @@ +package tech.easyflow.ai.document.support; + +import com.easyagents.document.core.model.ParseArtifacts; +import com.easyagents.document.core.model.ParseResult; +import com.easyagents.document.core.model.ParseTaskInfo; +import com.easyagents.document.core.model.ParseTaskStatus; +import org.springframework.stereotype.Component; +import org.springframework.util.StringUtils; +import tech.easyflow.ai.document.model.DocumentParseArtifacts; +import tech.easyflow.ai.document.model.DocumentParseTaskInfo; +import tech.easyflow.ai.document.model.DocumentParseTaskStatus; +import tech.easyflow.ai.document.model.DocumentParsedResult; + +/** + * easy-agents 结果映射器。 + * + *负责把底层解析结果转换为 easyflow 侧稳定 DTO,并统一 preferredText 规则。
+ * + * @author Codex + * @since 2026-04-14 + */ +@Component +public class DocumentParseResultMapper { + + /** + * 映射单文件解析结果。 + * + * @param parseResult 底层结果 + * @return easyflow 结果 + */ + public DocumentParsedResult map(ParseResult parseResult) { + DocumentParsedResult document = new DocumentParsedResult(); + if (parseResult == null) { + return document; + } + document.setFileName(parseResult.getFileName()); + document.setMarkdown(parseResult.getMarkdown()); + document.setPlainText(parseResult.getPlainText()); + document.setPreferredText(resolvePreferredText(parseResult)); + document.setPages(parseResult.getPages()); + document.setBlocks(parseResult.getBlocks()); + document.setTables(parseResult.getTables()); + document.setImages(parseResult.getImages()); + document.setWarnings(parseResult.getWarnings()); + document.setMetadata(parseResult.getMetadata()); + document.setArtifacts(mapArtifacts(parseResult.getArtifacts())); + return document; + } + + /** + * 映射异步任务状态。 + * + * @param taskStatus 底层任务状态 + * @return easyflow 任务状态 + */ + public DocumentParseTaskStatus map(ParseTaskStatus taskStatus) { + DocumentParseTaskStatus status = new DocumentParseTaskStatus(); + if (taskStatus == null) { + return status; + } + status.setTaskId(taskStatus.getTaskId()); + status.setStatus(taskStatus.getStatus()); + status.setBackend(taskStatus.getBackend()); + status.setFileNames(taskStatus.getFileNames()); + status.setCreatedAt(taskStatus.getCreatedAt()); + status.setStartedAt(taskStatus.getStartedAt()); + status.setCompletedAt(taskStatus.getCompletedAt()); + status.setError(taskStatus.getError()); + status.setStatusUrl(taskStatus.getStatusUrl()); + status.setResultUrl(taskStatus.getResultUrl()); + status.setQueuedAhead(taskStatus.getQueuedAhead()); + return status; + } + + /** + * 映射任务聚合查询结果。 + * + * @param taskInfo 底层任务聚合结果 + * @return easyflow 聚合任务结果 + */ + public DocumentParseTaskInfo map(ParseTaskInfo taskInfo) { + DocumentParseTaskInfo mapped = new DocumentParseTaskInfo(); + if (taskInfo == null) { + return mapped; + } + fillTaskStatus(mapped, taskInfo); + if (taskInfo.getResult() != null + && taskInfo.getResult().getResults() != null + && !taskInfo.getResult().getResults().isEmpty()) { + mapped.setResult(map(taskInfo.getResult().getResults().get(0))); + } + return mapped; + } + + private void fillTaskStatus(DocumentParseTaskStatus status, ParseTaskStatus taskStatus) { + status.setTaskId(taskStatus.getTaskId()); + status.setStatus(taskStatus.getStatus()); + status.setBackend(taskStatus.getBackend()); + status.setFileNames(taskStatus.getFileNames()); + status.setCreatedAt(taskStatus.getCreatedAt()); + status.setStartedAt(taskStatus.getStartedAt()); + status.setCompletedAt(taskStatus.getCompletedAt()); + status.setError(taskStatus.getError()); + status.setStatusUrl(taskStatus.getStatusUrl()); + status.setResultUrl(taskStatus.getResultUrl()); + status.setQueuedAhead(taskStatus.getQueuedAhead()); + } + + private String resolvePreferredText(ParseResult parseResult) { + if (StringUtils.hasText(parseResult.getMarkdown())) { + return parseResult.getMarkdown(); + } + return parseResult.getPlainText(); + } + + private DocumentParseArtifacts mapArtifacts(ParseArtifacts artifacts) { + DocumentParseArtifacts mappedArtifacts = new DocumentParseArtifacts(); + if (artifacts == null) { + return mappedArtifacts; + } + mappedArtifacts.setMiddleJson(artifacts.getMiddleJson()); + mappedArtifacts.setContentList(artifacts.getContentList()); + mappedArtifacts.setModelOutput(artifacts.getModelOutput()); + mappedArtifacts.setExtraJsonArtifacts(artifacts.getExtraJsonArtifacts()); + mappedArtifacts.setExtraBinaryArtifacts(artifacts.getExtraBinaryArtifacts()); + return mappedArtifacts; + } +} diff --git a/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/document/support/DocumentSourceLoader.java b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/document/support/DocumentSourceLoader.java new file mode 100644 index 0000000..48f2ecb --- /dev/null +++ b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/document/support/DocumentSourceLoader.java @@ -0,0 +1,146 @@ +package tech.easyflow.ai.document.support; + +import cn.hutool.http.HttpUtil; +import org.springframework.beans.factory.annotation.Qualifier; +import org.springframework.stereotype.Component; +import org.springframework.util.StringUtils; +import tech.easyflow.ai.document.exception.DocumentParseBridgeException; +import tech.easyflow.ai.document.model.DocumentSourceRef; +import tech.easyflow.common.filestorage.FileStorageService; +import tech.easyflow.common.filestorage.utils.PathGeneratorUtil; + +import java.io.IOException; +import java.io.InputStream; +import java.net.URLConnection; + +/** + * 文档源加载器。 + * + *负责把不同来源的文件引用统一转换为内存字节和标准文件元数据。
+ * + * @author Codex + * @since 2026-04-14 + */ +@Component +public class DocumentSourceLoader { + + private final FileStorageService fileStorageService; + + public DocumentSourceLoader(@Qualifier("default") FileStorageService fileStorageService) { + this.fileStorageService = fileStorageService; + } + + /** + * 加载文档源。 + * + * @param sourceRef easyflow 文档源 + * @return 内部已加载文档对象 + */ + public LoadedDocumentSource load(DocumentSourceRef sourceRef) { + if (sourceRef == null) { + throw DocumentParseBridgeException.unsupportedSource("文档源不能为空"); + } + if (hasContentBytes(sourceRef)) { + return buildLoadedSource( + resolveFileName(sourceRef), + resolveContentType(sourceRef, resolveFileName(sourceRef)), + resolveSize(sourceRef, sourceRef.getContentBytes().length), + sourceRef.getContentBytes() + ); + } + if (StringUtils.hasText(sourceRef.getFilePath())) { + if (isRemoteUrl(sourceRef.getFilePath())) { + return loadFromRemoteValue(sourceRef, sourceRef.getFilePath()); + } + return loadFromFilePath(sourceRef); + } + if (StringUtils.hasText(sourceRef.getUrl())) { + return loadFromUrl(sourceRef); + } + throw DocumentParseBridgeException.unsupportedSource("文档源缺少 filePath、url 或 contentBytes"); + } + + private LoadedDocumentSource loadFromFilePath(DocumentSourceRef sourceRef) { + String fileName = resolveFileName(sourceRef); + try (InputStream inputStream = fileStorageService.readStream(sourceRef.getFilePath())) { + byte[] contentBytes = inputStream.readAllBytes(); + long actualSize = sourceRef.getSize() != null ? sourceRef.getSize() : fileStorageService.getFileSize(sourceRef.getFilePath()); + return buildLoadedSource( + fileName, + resolveContentType(sourceRef, fileName), + resolveSize(sourceRef, actualSize), + contentBytes + ); + } catch (IOException e) { + throw DocumentParseBridgeException.sourceLoadFailed( + "读取文档存储文件失败: " + sourceRef.getFilePath(), + e + ); + } + } + + private LoadedDocumentSource loadFromUrl(DocumentSourceRef sourceRef) { + return loadFromRemoteValue(sourceRef, sourceRef.getUrl()); + } + + private LoadedDocumentSource loadFromRemoteValue(DocumentSourceRef sourceRef, String remoteUrl) { + String fileName = resolveFileName(sourceRef); + try { + byte[] contentBytes = HttpUtil.downloadBytes(remoteUrl); + return buildLoadedSource( + fileName, + resolveContentType(sourceRef, fileName), + resolveSize(sourceRef, contentBytes.length), + contentBytes + ); + } catch (Exception e) { + throw DocumentParseBridgeException.sourceLoadFailed( + "下载文档 URL 失败: " + remoteUrl, + e + ); + } + } + + private LoadedDocumentSource buildLoadedSource(String fileName, String contentType, Long size, byte[] contentBytes) { + LoadedDocumentSource loadedSource = new LoadedDocumentSource(); + loadedSource.setFileName(fileName); + loadedSource.setContentType(contentType); + loadedSource.setSize(size); + loadedSource.setContentBytes(contentBytes); + return loadedSource; + } + + private String resolveFileName(DocumentSourceRef sourceRef) { + if (StringUtils.hasText(sourceRef.getFileName())) { + return PathGeneratorUtil.getPureFileName(sourceRef.getFileName()); + } + if (StringUtils.hasText(sourceRef.getFilePath())) { + return PathGeneratorUtil.getPureFileName(sourceRef.getFilePath()); + } + if (StringUtils.hasText(sourceRef.getUrl())) { + String pureName = PathGeneratorUtil.getPureFileName(sourceRef.getUrl()); + int queryIndex = pureName.indexOf('?'); + return queryIndex >= 0 ? pureName.substring(0, queryIndex) : pureName; + } + throw DocumentParseBridgeException.unsupportedSource("文档源缺少可用文件名"); + } + + private String resolveContentType(DocumentSourceRef sourceRef, String fileName) { + if (StringUtils.hasText(sourceRef.getContentType())) { + return sourceRef.getContentType(); + } + return URLConnection.guessContentTypeFromName(fileName); + } + + private Long resolveSize(DocumentSourceRef sourceRef, long fallbackSize) { + return sourceRef.getSize() != null ? sourceRef.getSize() : fallbackSize; + } + + private boolean hasContentBytes(DocumentSourceRef sourceRef) { + return sourceRef.getContentBytes() != null && sourceRef.getContentBytes().length > 0; + } + + private boolean isRemoteUrl(String value) { + return value.startsWith("http://") || value.startsWith("https://"); + } +} diff --git a/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/document/support/LoadedDocumentSource.java b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/document/support/LoadedDocumentSource.java new file mode 100644 index 0000000..ee81be7 --- /dev/null +++ b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/document/support/LoadedDocumentSource.java @@ -0,0 +1,49 @@ +package tech.easyflow.ai.document.support; + +/** + * 桥接层内部已加载文档源。 + * + *该对象只在桥接层内部流转,用于承接已解析出的文件名、类型和字节内容。
+ * + * @author Codex + * @since 2026-04-14 + */ +public class LoadedDocumentSource { + + private String fileName; + private String contentType; + private Long size; + private byte[] contentBytes; + + public String getFileName() { + return fileName; + } + + public void setFileName(String fileName) { + this.fileName = fileName; + } + + public String getContentType() { + return contentType; + } + + public void setContentType(String contentType) { + this.contentType = contentType; + } + + public Long getSize() { + return size; + } + + public void setSize(Long size) { + this.size = size; + } + + public byte[] getContentBytes() { + return contentBytes; + } + + public void setContentBytes(byte[] contentBytes) { + this.contentBytes = contentBytes; + } +} diff --git a/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/node/DocNode.java b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/node/DocNode.java index 7f157cc..8ed4403 100644 --- a/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/node/DocNode.java +++ b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/node/DocNode.java @@ -4,28 +4,35 @@ import com.easyagents.core.util.StringUtil; import com.easyagents.flow.core.chain.Chain; import com.easyagents.flow.core.chain.Parameter; import com.easyagents.flow.core.node.BaseNode; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import tech.easyflow.ai.utils.DocUtil; import tech.easyflow.common.util.SpringContextUtil; -import java.io.ByteArrayInputStream; import java.util.HashMap; import java.util.List; import java.util.Map; +/** + * 工作流文件内容提取节点。 + * + *节点输入为统一文件对象,PDF 交给统一文档解析桥接服务, + * 其他类型继续走默认文档读取器。
+ * + * @author Codex + * @since 2026-04-14 + */ public class DocNode extends BaseNode { - private static final Logger log = LoggerFactory.getLogger(DocNode.class); - + /** + * 执行文件内容提取。 + * + * @param chain 当前流程链 + * @return 节点输出 + */ @Override public Map负责把工作流运行态中的文件对象转换为统一文档源,并根据文件类型选择 + * 统一文档解析桥接服务或默认读取器。
+ * + * @author Codex + * @since 2026-04-14 + */ +@Component +public class DocNodeFileContentExtractor { + + private final DocumentParseBridgeService documentParseBridgeService; + private final FileStorageService fileStorageService; + private final ReaderManager readerManager; + + /** + * 创建文件内容提取器。 + * + * @param documentParseBridgeService 统一文档解析桥接服务 + * @param fileStorageService 文件存储服务 + * @param readerManager 默认读取器管理器 + */ + public DocNodeFileContentExtractor(DocumentParseBridgeService documentParseBridgeService, + @Qualifier("default") FileStorageService fileStorageService, + ReaderManager readerManager) { + this.documentParseBridgeService = documentParseBridgeService; + this.fileStorageService = fileStorageService; + this.readerManager = readerManager; + } + + /** + * 提取文件文本内容。 + * + * @param fileValue 工作流运行态中的文件对象 + * @return 可供下游节点直接消费的文本 + */ + public String extract(Object fileValue) { + DocumentSourceRef sourceRef = toDocumentSourceRef(fileValue); + validateSourceRef(sourceRef); + if (isPdf(sourceRef)) { + return extractPdfContent(sourceRef); + } + return extractDefaultContent(sourceRef); + } + + /** + * 将运行时文件值转换为统一文档源。 + * + * @param fileValue 运行时文件值 + * @return 文档源 + */ + DocumentSourceRef toDocumentSourceRef(Object fileValue) { + if (fileValue instanceof DocumentSourceRef sourceRef) { + return sourceRef; + } + if (!(fileValue instanceof Map, ?> fileMap)) { + throw new BusinessException("文件输入格式不正确,必须为文件对象"); + } + DocumentSourceRef sourceRef = new DocumentSourceRef(); + sourceRef.setFileName(asText(fileMap.get("fileName"))); + sourceRef.setFilePath(asText(fileMap.get("filePath"))); + sourceRef.setContentType(asText(fileMap.get("contentType"))); + sourceRef.setUrl(asText(fileMap.get("url"))); + sourceRef.setSize(asLong(fileMap.get("size"))); + return sourceRef; + } + + private void validateSourceRef(DocumentSourceRef sourceRef) { + if (sourceRef == null) { + throw new BusinessException("文件输入不能为空"); + } + if (!StringUtil.hasText(sourceRef.getFileName())) { + throw new BusinessException("文件输入缺少 fileName"); + } + if (!StringUtil.hasText(sourceRef.getFilePath())) { + throw new BusinessException("文件输入缺少 filePath"); + } + } + + private boolean isPdf(DocumentSourceRef sourceRef) { + if (StringUtil.hasText(sourceRef.getContentType()) + && sourceRef.getContentType().toLowerCase().contains("pdf")) { + return true; + } + String fileName = sourceRef.getFileName(); + if (!StringUtil.hasText(fileName) || !fileName.contains(".")) { + return false; + } + return "pdf".equals(DocUtil.normalizeSuffix(DocUtil.getSuffix(fileName))); + } + + private String extractPdfContent(DocumentSourceRef sourceRef) { + DocumentParsedResult parsedResult = documentParseBridgeService.parse(sourceRef, DocumentParseScenario.WORKFLOW_TEXT); + String preferredText = parsedResult == null ? null : parsedResult.getPreferredText(); + if (StringUtil.hasText(preferredText)) { + return preferredText; + } + if (parsedResult != null && StringUtil.hasText(parsedResult.getMarkdown())) { + return parsedResult.getMarkdown(); + } + if (parsedResult != null && StringUtil.hasText(parsedResult.getPlainText())) { + return parsedResult.getPlainText(); + } + throw new BusinessException("PDF 文档解析结果为空"); + } + + private String extractDefaultContent(DocumentSourceRef sourceRef) { + try (InputStream inputStream = openInputStream(sourceRef)) { + return readerManager.getReader().read(sourceRef.getFileName(), inputStream); + } catch (IOException e) { + throw new RuntimeException("读取文件内容失败: " + sourceRef.getFilePath(), e); + } + } + + private InputStream openInputStream(DocumentSourceRef sourceRef) throws IOException { + String filePath = sourceRef.getFilePath(); + if (StringUtil.hasText(filePath) && isRemoteUrl(filePath)) { + byte[] bytes = HttpUtil.downloadBytes(filePath); + return new java.io.ByteArrayInputStream(bytes); + } + if (StringUtil.hasText(filePath)) { + return fileStorageService.readStream(filePath); + } + if (StringUtil.hasText(sourceRef.getUrl())) { + byte[] bytes = HttpUtil.downloadBytes(sourceRef.getUrl()); + return new java.io.ByteArrayInputStream(bytes); + } + throw new IOException("文件输入缺少可读取路径"); + } + + private boolean isRemoteUrl(String value) { + return value.startsWith("http://") || value.startsWith("https://"); + } + + private String asText(Object value) { + return value == null ? null : String.valueOf(value).trim(); + } + + private Long asLong(Object value) { + if (value == null) { + return null; + } + if (value instanceof Number number) { + return number.longValue(); + } + if (value instanceof String text && StringUtil.hasText(text)) { + return Long.parseLong(text.trim()); + } + return null; + } +} diff --git a/easyflow-modules/easyflow-module-ai/src/test/java/tech/easyflow/ai/document/service/impl/DocumentParseBridgeServiceImplTest.java b/easyflow-modules/easyflow-module-ai/src/test/java/tech/easyflow/ai/document/service/impl/DocumentParseBridgeServiceImplTest.java new file mode 100644 index 0000000..7854901 --- /dev/null +++ b/easyflow-modules/easyflow-module-ai/src/test/java/tech/easyflow/ai/document/service/impl/DocumentParseBridgeServiceImplTest.java @@ -0,0 +1,190 @@ +package tech.easyflow.ai.document.service.impl; + +import com.easyagents.document.core.DocumentParseService; +import com.easyagents.document.core.model.ParseRequest; +import com.easyagents.document.core.model.ParseResponse; +import com.easyagents.document.core.model.ParseResult; +import com.easyagents.document.core.model.ParseTaskInfo; +import com.easyagents.document.core.model.ParseTaskStatus; +import org.junit.Assert; +import org.junit.Test; +import tech.easyflow.ai.document.exception.DocumentParseBridgeException; +import tech.easyflow.ai.document.model.DocumentParseScenario; +import tech.easyflow.ai.document.model.DocumentSourceRef; +import tech.easyflow.ai.document.model.DocumentParseTaskInfo; +import tech.easyflow.ai.document.model.DocumentParseTaskStatus; +import tech.easyflow.ai.document.model.DocumentParsedResult; +import tech.easyflow.ai.document.support.DocumentSourceLoader; +import tech.easyflow.ai.document.support.DocumentParseRequestFactory; +import tech.easyflow.ai.document.support.DocumentParseResultMapper; +import tech.easyflow.common.filestorage.FileStorageService; + +import java.io.ByteArrayInputStream; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.util.Collections; + +/** + * {@link DocumentParseBridgeServiceImpl} 单元测试。 + * + * @author Codex + * @since 2026-04-14 + */ +public class DocumentParseBridgeServiceImplTest { + + /** + * 验证同步解析成功透传并返回标准化结果。 + */ + @Test + public void shouldParseSuccessfully() { + FakeDocumentParseService parseService = new FakeDocumentParseService(); + DocumentParseBridgeServiceImpl bridgeService = buildBridgeService(parseService); + + DocumentParsedResult document = bridgeService.parse(buildSource(), DocumentParseScenario.WORKFLOW_TEXT); + + Assert.assertEquals("# demo", document.getPreferredText()); + Assert.assertFalse(parseService.lastParseRequest.getReturnMiddleJson()); + Assert.assertFalse(parseService.lastParseRequest.getReturnImages()); + } + + /** + * 验证异步提交、状态查询和结果查询链路可用。 + */ + @Test + public void shouldSupportAsyncFlow() { + FakeDocumentParseService parseService = new FakeDocumentParseService(); + DocumentParseBridgeServiceImpl bridgeService = buildBridgeService(parseService); + + DocumentParseTaskStatus taskStatus = bridgeService.submit(buildSource(), DocumentParseScenario.KNOWLEDGE_IMPORT); + DocumentParseTaskStatus queriedStatus = bridgeService.queryTask("task-1"); + DocumentParsedResult queriedResult = bridgeService.queryResult("task-1"); + + Assert.assertEquals("task-1", taskStatus.getTaskId()); + Assert.assertEquals("running", queriedStatus.getStatus()); + Assert.assertEquals("# demo", queriedResult.getPreferredText()); + } + + /** + * 验证聚合查询在完成状态下会附带标准化结果。 + */ + @Test + public void shouldQueryTaskInfoSuccessfully() { + FakeDocumentParseService parseService = new FakeDocumentParseService(); + parseService.taskStatusValue = "completed"; + DocumentParseBridgeServiceImpl bridgeService = buildBridgeService(parseService); + + DocumentParseTaskInfo taskInfo = bridgeService.queryTaskInfo("task-1"); + + Assert.assertEquals("completed", taskInfo.getStatus()); + Assert.assertNotNull(taskInfo.getResult()); + Assert.assertEquals("# demo", taskInfo.getResult().getPreferredText()); + } + + /** + * 验证缺少底层服务时抛出稳定错误码。 + */ + @Test + public void shouldThrowWhenServiceDisabled() { + DocumentParseBridgeServiceImpl bridgeService = buildBridgeService(null); + + try { + bridgeService.parse(buildSource(), DocumentParseScenario.WORKFLOW_TEXT); + Assert.fail("expected DocumentParseBridgeException"); + } catch (DocumentParseBridgeException e) { + Assert.assertEquals("service_not_enabled", e.getCode()); + } + } + + private DocumentParseBridgeServiceImpl buildBridgeService(DocumentParseService parseService) { + return new DocumentParseBridgeServiceImpl( + parseService, + new DocumentSourceLoader(new InMemoryFileStorageService()), + new DocumentParseRequestFactory(), + new DocumentParseResultMapper() + ); + } + + private DocumentSourceRef buildSource() { + DocumentSourceRef sourceRef = DocumentSourceRef.ofBytes("demo.pdf", "pdf-data".getBytes(StandardCharsets.UTF_8)); + sourceRef.setContentType("application/pdf"); + sourceRef.setSize(8L); + return sourceRef; + } + + private static class InMemoryFileStorageService implements FileStorageService { + + @Override + public String save(org.springframework.web.multipart.MultipartFile file) { + return null; + } + + @Override + public void delete(String path) { + } + + @Override + public InputStream readStream(String path) { + return new ByteArrayInputStream("pdf-data".getBytes(StandardCharsets.UTF_8)); + } + + @Override + public long getFileSize(String path) { + return 8L; + } + } + + private static class FakeDocumentParseService implements DocumentParseService { + + private ParseRequest lastParseRequest; + private String taskStatusValue = "running"; + + @Override + public ParseResponse parse(ParseRequest request) { + this.lastParseRequest = request; + return buildResponse(); + } + + @Override + public ParseTaskStatus submit(ParseRequest request) { + this.lastParseRequest = request; + ParseTaskStatus status = new ParseTaskStatus(); + status.setTaskId("task-1"); + status.setStatus("submitted"); + status.setFileNames(Collections.singletonList("demo.pdf")); + return status; + } + + @Override + public ParseTaskStatus queryTask(String taskId) { + ParseTaskStatus status = new ParseTaskStatus(); + status.setTaskId(taskId); + status.setStatus(taskStatusValue); + status.setFileNames(Collections.singletonList("demo.pdf")); + return status; + } + + @Override + public ParseResponse queryResult(String taskId) { + return buildResponse(); + } + + @Override + public ParseTaskInfo queryTaskInfo(String taskId) { + ParseTaskInfo taskInfo = ParseTaskInfo.fromStatus(queryTask(taskId)); + if ("completed".equals(taskStatusValue)) { + taskInfo.setResult(buildResponse()); + } + return taskInfo; + } + + private ParseResponse buildResponse() { + ParseResult result = new ParseResult(); + result.setFileName("demo.pdf"); + result.setMarkdown("# demo"); + result.setPlainText("demo"); + ParseResponse response = new ParseResponse(); + response.setResults(Collections.singletonList(result)); + return response; + } + } +} diff --git a/easyflow-modules/easyflow-module-ai/src/test/java/tech/easyflow/ai/document/support/DocumentParseRequestFactoryTest.java b/easyflow-modules/easyflow-module-ai/src/test/java/tech/easyflow/ai/document/support/DocumentParseRequestFactoryTest.java new file mode 100644 index 0000000..8e42d82 --- /dev/null +++ b/easyflow-modules/easyflow-module-ai/src/test/java/tech/easyflow/ai/document/support/DocumentParseRequestFactoryTest.java @@ -0,0 +1,57 @@ +package tech.easyflow.ai.document.support; + +import com.easyagents.document.core.model.ParseRequest; +import org.junit.Assert; +import org.junit.Test; +import tech.easyflow.ai.document.model.DocumentParseScenario; + +/** + * {@link DocumentParseRequestFactory} 单元测试。 + * + * @author Codex + * @since 2026-04-14 + */ +public class DocumentParseRequestFactoryTest { + + /** + * 验证工作流文本场景只请求最小文本结果。 + */ + @Test + public void shouldBuildWorkflowTextScenarioRequest() { + DocumentParseRequestFactory factory = new DocumentParseRequestFactory(); + + ParseRequest request = factory.build(buildSource(), DocumentParseScenario.WORKFLOW_TEXT); + + Assert.assertNull(request.getParseMethod()); + Assert.assertNull(request.getFormulaEnabled()); + Assert.assertNull(request.getTableEnabled()); + Assert.assertTrue(request.getReturnMarkdown()); + Assert.assertFalse(request.getReturnMiddleJson()); + Assert.assertFalse(request.getReturnContentList()); + Assert.assertFalse(request.getReturnImages()); + } + + /** + * 验证知识库导入场景保留结构化工件。 + */ + @Test + public void shouldBuildKnowledgeImportScenarioRequest() { + DocumentParseRequestFactory factory = new DocumentParseRequestFactory(); + + ParseRequest request = factory.build(buildSource(), DocumentParseScenario.KNOWLEDGE_IMPORT); + + Assert.assertTrue(request.getReturnMarkdown()); + Assert.assertTrue(request.getReturnMiddleJson()); + Assert.assertTrue(request.getReturnContentList()); + Assert.assertTrue(request.getReturnImages()); + } + + private LoadedDocumentSource buildSource() { + LoadedDocumentSource source = new LoadedDocumentSource(); + source.setFileName("demo.pdf"); + source.setContentType("application/pdf"); + source.setContentBytes("pdf-data".getBytes()); + source.setSize(8L); + return source; + } +} diff --git a/easyflow-modules/easyflow-module-ai/src/test/java/tech/easyflow/ai/document/support/DocumentParseResultMapperTest.java b/easyflow-modules/easyflow-module-ai/src/test/java/tech/easyflow/ai/document/support/DocumentParseResultMapperTest.java new file mode 100644 index 0000000..7fcf9ed --- /dev/null +++ b/easyflow-modules/easyflow-module-ai/src/test/java/tech/easyflow/ai/document/support/DocumentParseResultMapperTest.java @@ -0,0 +1,82 @@ +package tech.easyflow.ai.document.support; + +import com.easyagents.document.core.model.ParseArtifacts; +import com.easyagents.document.core.model.ParseResult; +import com.easyagents.document.core.model.ParseResponse; +import com.easyagents.document.core.model.ParseTaskInfo; +import org.junit.Assert; +import org.junit.Test; +import tech.easyflow.ai.document.model.DocumentParseTaskInfo; +import tech.easyflow.ai.document.model.DocumentParsedResult; + +import java.util.Collections; + +/** + * {@link DocumentParseResultMapper} 单元测试。 + * + * @author Codex + * @since 2026-04-14 + */ +public class DocumentParseResultMapperTest { + + /** + * 验证 preferredText 按 markdown 优先、plainText 回退。 + */ + @Test + public void shouldPreferMarkdown() { + DocumentParseResultMapper mapper = new DocumentParseResultMapper(); + ParseResult result = new ParseResult(); + result.setFileName("demo.pdf"); + result.setMarkdown("# title"); + result.setPlainText("plain"); + + DocumentParsedResult mapped = mapper.map(result); + + Assert.assertEquals("# title", mapped.getPreferredText()); + } + + /** + * 验证结构化工件被正确映射。 + */ + @Test + public void shouldMapArtifacts() { + DocumentParseResultMapper mapper = new DocumentParseResultMapper(); + ParseResult result = new ParseResult(); + ParseArtifacts artifacts = new ParseArtifacts(); + artifacts.setMiddleJson(Collections.singletonMap("page", 1)); + artifacts.setContentList(Collections.singletonList("block")); + artifacts.setModelOutput(Collections.singletonMap("raw", "ok")); + result.setArtifacts(artifacts); + + DocumentParsedResult mapped = mapper.map(result); + + Assert.assertNotNull(mapped.getArtifacts()); + Assert.assertEquals(Collections.singletonMap("page", 1), mapped.getArtifacts().getMiddleJson()); + Assert.assertEquals(Collections.singletonList("block"), mapped.getArtifacts().getContentList()); + Assert.assertEquals(Collections.singletonMap("raw", "ok"), mapped.getArtifacts().getModelOutput()); + } + + /** + * 验证任务聚合结果被正确映射。 + */ + @Test + public void shouldMapTaskInfo() { + DocumentParseResultMapper mapper = new DocumentParseResultMapper(); + ParseTaskInfo taskInfo = new ParseTaskInfo(); + taskInfo.setTaskId("task-1"); + taskInfo.setStatus("completed"); + + ParseResult result = new ParseResult(); + result.setFileName("demo.pdf"); + result.setMarkdown("# title"); + ParseResponse response = new ParseResponse(); + response.setResults(Collections.singletonList(result)); + taskInfo.setResult(response); + + DocumentParseTaskInfo mapped = mapper.map(taskInfo); + + Assert.assertEquals("task-1", mapped.getTaskId()); + Assert.assertNotNull(mapped.getResult()); + Assert.assertEquals("# title", mapped.getResult().getPreferredText()); + } +} diff --git a/easyflow-modules/easyflow-module-ai/src/test/java/tech/easyflow/ai/document/support/DocumentSourceLoaderTest.java b/easyflow-modules/easyflow-module-ai/src/test/java/tech/easyflow/ai/document/support/DocumentSourceLoaderTest.java new file mode 100644 index 0000000..85983ca --- /dev/null +++ b/easyflow-modules/easyflow-module-ai/src/test/java/tech/easyflow/ai/document/support/DocumentSourceLoaderTest.java @@ -0,0 +1,133 @@ +package tech.easyflow.ai.document.support; + +import org.junit.Assert; +import org.junit.Test; +import tech.easyflow.ai.document.exception.DocumentParseBridgeException; +import tech.easyflow.ai.document.model.DocumentSourceRef; +import tech.easyflow.common.filestorage.FileStorageService; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.net.InetSocketAddress; +import java.nio.charset.StandardCharsets; +import com.sun.net.httpserver.HttpServer; + +/** + * {@link DocumentSourceLoader} 单元测试。 + * + * @author Codex + * @since 2026-04-14 + */ +public class DocumentSourceLoaderTest { + + /** + * 验证可从 filePath 正常读取文件内容。 + * + * @throws IOException 测试读取异常 + */ + @Test + public void shouldLoadContentFromFilePath() throws IOException { + DocumentSourceLoader loader = new DocumentSourceLoader(new FakeFileStorageService("demo-pdf".getBytes(StandardCharsets.UTF_8))); + DocumentSourceRef sourceRef = DocumentSourceRef.ofPath("/attachment/test/demo.pdf"); + + LoadedDocumentSource loadedSource = loader.load(sourceRef); + + Assert.assertEquals("demo.pdf", loadedSource.getFileName()); + Assert.assertEquals(8L, loadedSource.getSize().longValue()); + Assert.assertArrayEquals("demo-pdf".getBytes(StandardCharsets.UTF_8), loadedSource.getContentBytes()); + } + + /** + * 验证缺少有效来源时抛出明确异常。 + */ + @Test + public void shouldThrowWhenSourceMissing() { + DocumentSourceLoader loader = new DocumentSourceLoader(new FakeFileStorageService(new byte[0])); + + try { + loader.load(new DocumentSourceRef()); + Assert.fail("expected DocumentParseBridgeException"); + } catch (DocumentParseBridgeException e) { + Assert.assertEquals("unsupported_source", e.getCode()); + } + } + + /** + * 验证 filePath 为远端 URL 时不会误走存储读取。 + */ + @Test + public void shouldPreferRemoteDownloadWhenFilePathIsRemoteUrl() throws IOException { + DocumentSourceLoader loader = new DocumentSourceLoader(new FailingFileStorageService()); + HttpServer server = HttpServer.create(new InetSocketAddress(0), 0); + byte[] body = "demo-pdf".getBytes(StandardCharsets.UTF_8); + server.createContext("/demo.pdf", exchange -> { + exchange.sendResponseHeaders(200, body.length); + exchange.getResponseBody().write(body); + exchange.close(); + }); + server.start(); + try { + DocumentSourceRef sourceRef = new DocumentSourceRef(); + sourceRef.setFileName("demo.pdf"); + sourceRef.setFilePath("http://127.0.0.1:" + server.getAddress().getPort() + "/demo.pdf"); + + LoadedDocumentSource loadedSource = loader.load(sourceRef); + + Assert.assertEquals("demo.pdf", loadedSource.getFileName()); + Assert.assertArrayEquals(body, loadedSource.getContentBytes()); + } finally { + server.stop(0); + } + } + + private static class FakeFileStorageService implements FileStorageService { + + private final byte[] content; + + private FakeFileStorageService(byte[] content) { + this.content = content; + } + + @Override + public String save(org.springframework.web.multipart.MultipartFile file) { + return null; + } + + @Override + public void delete(String path) { + } + + @Override + public InputStream readStream(String path) { + return new ByteArrayInputStream(content); + } + + @Override + public long getFileSize(String path) { + return content.length; + } + } + + private static class FailingFileStorageService implements FileStorageService { + + @Override + public String save(org.springframework.web.multipart.MultipartFile file) { + return null; + } + + @Override + public void delete(String path) { + } + + @Override + public InputStream readStream(String path) throws IOException { + throw new IOException("should not read remote url from storage"); + } + + @Override + public long getFileSize(String path) { + return 0L; + } + } +} diff --git a/easyflow-modules/easyflow-module-ai/src/test/java/tech/easyflow/ai/node/DocNodeFileContentExtractorTest.java b/easyflow-modules/easyflow-module-ai/src/test/java/tech/easyflow/ai/node/DocNodeFileContentExtractorTest.java new file mode 100644 index 0000000..b1b2e25 --- /dev/null +++ b/easyflow-modules/easyflow-module-ai/src/test/java/tech/easyflow/ai/node/DocNodeFileContentExtractorTest.java @@ -0,0 +1,278 @@ +package tech.easyflow.ai.node; + +import org.junit.Assert; +import org.junit.Test; +import tech.easyflow.ai.document.model.DocumentParseTaskInfo; +import tech.easyflow.ai.document.model.DocumentParseTaskStatus; +import tech.easyflow.ai.document.model.DocumentParsedResult; +import tech.easyflow.ai.document.model.DocumentSourceRef; +import tech.easyflow.ai.document.service.DocumentParseBridgeService; +import tech.easyflow.common.filestorage.FileStorageService; +import tech.easyflow.common.web.exceptions.BusinessException; + +import java.io.ByteArrayInputStream; +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.net.InetSocketAddress; +import java.nio.charset.StandardCharsets; +import java.util.HashMap; +import java.util.Map; +import com.sun.net.httpserver.HttpServer; + +/** + * {@link DocNodeFileContentExtractor} 单元测试。 + * + * @author Codex + * @since 2026-04-14 + */ +public class DocNodeFileContentExtractorTest { + + /** + * 验证 PDF 文件会走统一文档解析桥接服务。 + */ + @Test + public void shouldUseDocumentBridgeForPdf() { + RecordingDocumentParseBridgeService bridgeService = new RecordingDocumentParseBridgeService(); + DocNodeFileContentExtractor extractor = new DocNodeFileContentExtractor( + bridgeService, + new FakeFileStorageService(), + new FakeReaderManager("ignored") + ); + + String content = extractor.extract(buildFileValue("demo.pdf", "/files/demo.pdf", "application/pdf")); + + Assert.assertEquals("# parsed", content); + Assert.assertNotNull(bridgeService.lastSource); + Assert.assertEquals("demo.pdf", bridgeService.lastSource.getFileName()); + } + + /** + * 验证非 PDF 文件会继续走默认读取器。 + */ + @Test + public void shouldUseDefaultReaderForNonPdf() { + RecordingDocumentParseBridgeService bridgeService = new RecordingDocumentParseBridgeService(); + DocNodeFileContentExtractor extractor = new DocNodeFileContentExtractor( + bridgeService, + new FakeFileStorageService(), + new FakeReaderManager("plain text") + ); + + String content = extractor.extract(buildFileValue("demo.docx", "/files/demo.docx", "application/vnd.openxmlformats-officedocument.wordprocessingml.document")); + + Assert.assertEquals("plain text", content); + Assert.assertNull(bridgeService.lastSource); + } + + /** + * 验证缺少 filePath 时会抛出明确异常。 + */ + @Test + public void shouldRejectMissingFilePath() { + DocNodeFileContentExtractor extractor = new DocNodeFileContentExtractor( + new RecordingDocumentParseBridgeService(), + new FakeFileStorageService(), + new FakeReaderManager("plain text") + ); + + try { + extractor.extract(buildFileValue("demo.pdf", null, "application/pdf")); + Assert.fail("expected BusinessException"); + } catch (BusinessException e) { + Assert.assertEquals("文件输入缺少 filePath", e.getMessage()); + } + } + + /** + * 验证解析结果为空时不会回退旧 PDF 读取链路。 + */ + @Test + public void shouldFailWhenPdfParseResultIsEmpty() { + RecordingDocumentParseBridgeService bridgeService = new RecordingDocumentParseBridgeService(); + bridgeService.response.setPreferredText(null); + bridgeService.response.setMarkdown(null); + bridgeService.response.setPlainText(null); + DocNodeFileContentExtractor extractor = new DocNodeFileContentExtractor( + bridgeService, + new FakeFileStorageService(), + new FakeReaderManager("pdf fallback") + ); + + try { + extractor.extract(buildFileValue("demo.pdf", "/files/demo.pdf", "application/pdf")); + Assert.fail("expected BusinessException"); + } catch (BusinessException e) { + Assert.assertEquals("PDF 文档解析结果为空", e.getMessage()); + } + } + + /** + * 验证远端素材 URL 的非 PDF 文件不会误走本地存储读取。 + */ + @Test + public void shouldReadRemoteUrlForNonPdf() { + RecordingDocumentParseBridgeService bridgeService = new RecordingDocumentParseBridgeService(); + HttpServer server; + try { + server = HttpServer.create(new InetSocketAddress(0), 0); + } catch (IOException e) { + throw new RuntimeException(e); + } + byte[] body = "remote text".getBytes(StandardCharsets.UTF_8); + server.createContext("/demo.docx", exchange -> { + exchange.sendResponseHeaders(200, body.length); + exchange.getResponseBody().write(body); + exchange.close(); + }); + server.start(); + try { + DocNodeFileContentExtractor extractor = new DocNodeFileContentExtractor( + bridgeService, + new FailingFileStorageService(), + new ReadingReaderManager() + ); + + String content = extractor.extract(buildFileValue( + "demo.docx", + "http://127.0.0.1:" + server.getAddress().getPort() + "/demo.docx", + "" + )); + + Assert.assertEquals("remote text", content); + Assert.assertNull(bridgeService.lastSource); + } finally { + server.stop(0); + } + } + + private Map