feat: 支持知识库导入 PPTX 与 XLSX 文档

- 打通 Office 文档桥接解析、解析进度承接与图片引用改写 - 落地 PPTX 按页分块、XLSX 行窗口分块以及预览与检索渲染闭环
2026-04-18 13:01:17 +08:00
parent ad67ba85ad
commit 4130381658
28 changed files with 2876 additions and 120 deletions
--- a/easyflow-modules/easyflow-module-ai/pom.xml
+++ b/easyflow-modules/easyflow-module-ai/pom.xml
@@ -112,7 +112,6 @@
            <groupId>com.easyagents</groupId>
            <artifactId>easy-agents-mcp</artifactId>
        </dependency>
-
        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
--- a/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/document/exception/DocumentParseBridgeException.java
+++ b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/document/exception/DocumentParseBridgeException.java
@@ -35,10 +35,14 @@ public class DocumentParseBridgeException extends RuntimeException {
    public static DocumentParseBridgeException serviceNotEnabled() {
        return new DocumentParseBridgeException(
            "service_not_enabled",
-            "统一文档解析服务未启用，请先配置 easy-agents.document.pdf.provider"
+            "统一文档解析服务未启用，请先配置 easy-agents.document.ocr.provider=mineru"
        );
    }

+    public static DocumentParseBridgeException serviceNotEnabled(String message) {
+        return new DocumentParseBridgeException("service_not_enabled", message);
+    }
+
    public static DocumentParseBridgeException unsupportedSource(String message) {
        return new DocumentParseBridgeException("unsupported_source", message);
    }
--- a/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/document/model/DocumentParseTaskStatus.java
+++ b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/document/model/DocumentParseTaskStatus.java
@@ -22,6 +22,11 @@ public class DocumentParseTaskStatus {
    private String statusUrl;
    private String resultUrl;
    private Integer queuedAhead;
+    private Integer progressPercent;
+    private String currentStage;
+    private Integer processedItems;
+    private Integer totalItems;
+    private String statusMessage;

    public String getTaskId() {
        return taskId;
@@ -110,4 +115,44 @@ public class DocumentParseTaskStatus {
    public void setQueuedAhead(Integer queuedAhead) {
        this.queuedAhead = queuedAhead;
    }
+
+    public Integer getProgressPercent() {
+        return progressPercent;
+    }
+
+    public void setProgressPercent(Integer progressPercent) {
+        this.progressPercent = progressPercent;
+    }
+
+    public String getCurrentStage() {
+        return currentStage;
+    }
+
+    public void setCurrentStage(String currentStage) {
+        this.currentStage = currentStage;
+    }
+
+    public Integer getProcessedItems() {
+        return processedItems;
+    }
+
+    public void setProcessedItems(Integer processedItems) {
+        this.processedItems = processedItems;
+    }
+
+    public Integer getTotalItems() {
+        return totalItems;
+    }
+
+    public void setTotalItems(Integer totalItems) {
+        this.totalItems = totalItems;
+    }
+
+    public String getStatusMessage() {
+        return statusMessage;
+    }
+
+    public void setStatusMessage(String statusMessage) {
+        this.statusMessage = statusMessage;
+    }
 }
--- a/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/document/service/impl/DocumentParseBridgeServiceImpl.java
+++ b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/document/service/impl/DocumentParseBridgeServiceImpl.java
@@ -5,6 +5,10 @@ import com.easyagents.document.core.entity.ParseResponse;
 import com.easyagents.document.core.entity.ParseResult;
 import com.easyagents.document.core.entity.ParseTaskInfo;
 import com.easyagents.document.core.entity.ParseTaskStatus;
+import com.easyagents.document.pdf.PdfDocumentParseService;
+import com.easyagents.document.pptx.PptxDocumentParseService;
+import com.easyagents.document.xlsx.XlsxDocumentParseService;
+import org.springframework.beans.factory.annotation.Qualifier;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.springframework.lang.Nullable;
@@ -20,8 +24,13 @@ import tech.easyflow.ai.document.service.DocumentParseBridgeService;
 import tech.easyflow.ai.document.support.DocumentSourceLoader;
 import tech.easyflow.ai.document.support.DocumentParseRequestFactory;
 import tech.easyflow.ai.document.support.DocumentParseResultMapper;
+import tech.easyflow.ai.document.support.DocumentParseSourceType;
 import tech.easyflow.ai.document.support.LoadedDocumentSource;
-import tech.easyflow.ai.utils.DocUtil;
+
+import java.util.ArrayList;
+import java.util.LinkedHashSet;
+import java.util.List;
+import java.util.function.Function;

 /**
 * 统一文档解析桥接门面默认实现。
@@ -33,18 +42,33 @@ import tech.easyflow.ai.utils.DocUtil;
 public class DocumentParseBridgeServiceImpl implements DocumentParseBridgeService {

    private static final Logger LOG = LoggerFactory.getLogger(DocumentParseBridgeServiceImpl.class);
+    private static final String DEFAULT_DOCUMENT_PARSE_SERVICE_BEAN_NAME = "documentParseService";

    @Nullable
-    private final DocumentParseService documentParseService;
+    private final DocumentParseService defaultDocumentParseService;
+    @Nullable
+    private final PdfDocumentParseService pdfDocumentParseService;
+    @Nullable
+    private final PptxDocumentParseService pptxDocumentParseService;
+    @Nullable
+    private final XlsxDocumentParseService xlsxDocumentParseService;
    private final DocumentSourceLoader documentSourceLoader;
    private final DocumentParseRequestFactory parseRequestFactory;
    private final DocumentParseResultMapper parseResultMapper;

-    public DocumentParseBridgeServiceImpl(@Nullable DocumentParseService documentParseService,
+    public DocumentParseBridgeServiceImpl(@Nullable
+                                          @Qualifier(DEFAULT_DOCUMENT_PARSE_SERVICE_BEAN_NAME)
+                                          DocumentParseService defaultDocumentParseService,
+                                          @Nullable PdfDocumentParseService pdfDocumentParseService,
+                                          @Nullable PptxDocumentParseService pptxDocumentParseService,
+                                          @Nullable XlsxDocumentParseService xlsxDocumentParseService,
                                          DocumentSourceLoader documentSourceLoader,
                                          DocumentParseRequestFactory parseRequestFactory,
                                          DocumentParseResultMapper parseResultMapper) {
-        this.documentParseService = documentParseService;
+        this.defaultDocumentParseService = defaultDocumentParseService;
+        this.pdfDocumentParseService = pdfDocumentParseService;
+        this.pptxDocumentParseService = pptxDocumentParseService;
+        this.xlsxDocumentParseService = xlsxDocumentParseService;
        this.documentSourceLoader = documentSourceLoader;
        this.parseRequestFactory = parseRequestFactory;
        this.parseResultMapper = parseResultMapper;
@@ -59,7 +83,8 @@ public class DocumentParseBridgeServiceImpl implements DocumentParseBridgeServic
            LoadedDocumentSource loadedSource = prepareSupportedSource(source);
            LOG.info("桥接服务开始同步解析文档: fileName={}, contentType={}, scenario={}",
                loadedSource.getFileName(), loadedSource.getContentType(), scenario);
-            ParseResponse response = requireService().parse(parseRequestFactory.build(loadedSource, scenario));
+            DocumentParseService parseService = resolveService(loadedSource);
+            ParseResponse response = parseService.parse(parseRequestFactory.build(loadedSource, scenario));
            DocumentParsedResult result = parseResultMapper.map(extractSingleResult(response, false));
            LOG.info("桥接服务同步解析完成: fileName={}, scenario={}, preferredTextLength={}",
                loadedSource.getFileName(), scenario, resolveTextLength(result));
@@ -84,7 +109,8 @@ public class DocumentParseBridgeServiceImpl implements DocumentParseBridgeServic
            LoadedDocumentSource loadedSource = prepareSupportedSource(source);
            LOG.info("桥接服务开始提交异步解析任务: fileName={}, contentType={}, scenario={}",
                loadedSource.getFileName(), loadedSource.getContentType(), scenario);
-            ParseTaskStatus taskStatus = requireService().submit(parseRequestFactory.build(loadedSource, scenario));
+            DocumentParseService parseService = resolveService(loadedSource);
+            ParseTaskStatus taskStatus = parseService.submit(parseRequestFactory.build(loadedSource, scenario));
            DocumentParseTaskStatus mappedStatus = parseResultMapper.map(taskStatus);
            LOG.info("桥接服务异步解析任务提交完成: fileName={}, scenario={}, providerTaskId={}, status={}",
                loadedSource.getFileName(), scenario, mappedStatus.getTaskId(), mappedStatus.getStatus());
@@ -109,7 +135,8 @@ public class DocumentParseBridgeServiceImpl implements DocumentParseBridgeServic
            throw DocumentParseBridgeException.taskFailed("taskId 不能为空");
        }
        try {
-            return parseResultMapper.map(requireService().queryTask(taskId));
+            ParseTaskStatus taskStatus = executeAgainstTaskService(taskId, service -> service.queryTask(taskId));
+            return parseResultMapper.map(taskStatus);
        } catch (DocumentParseBridgeException e) {
            throw e;
        } catch (Exception e) {
@@ -127,7 +154,7 @@ public class DocumentParseBridgeServiceImpl implements DocumentParseBridgeServic
        }
        try {
            LOG.info("桥接服务开始获取异步解析结果: providerTaskId={}", taskId);
-            ParseResponse response = requireService().queryResult(taskId);
+            ParseResponse response = executeAgainstTaskService(taskId, service -> service.queryResult(taskId));
            DocumentParsedResult result = parseResultMapper.map(extractSingleResult(response, true));
            LOG.info("桥接服务获取异步解析结果完成: providerTaskId={}, preferredTextLength={}",
                taskId, resolveTextLength(result));
@@ -150,7 +177,7 @@ public class DocumentParseBridgeServiceImpl implements DocumentParseBridgeServic
            throw DocumentParseBridgeException.taskFailed("taskId 不能为空");
        }
        try {
-            ParseTaskInfo taskInfo = requireService().queryTaskInfo(taskId);
+            ParseTaskInfo taskInfo = executeAgainstTaskService(taskId, service -> service.queryTaskInfo(taskId));
            DocumentParseTaskInfo mappedTaskInfo = parseResultMapper.map(taskInfo);
            LOG.info("桥接服务查询异步解析任务状态: providerTaskId={}, status={}, hasResult={}",
                taskId,
@@ -177,39 +204,84 @@ public class DocumentParseBridgeServiceImpl implements DocumentParseBridgeServic
        return text == null ? 0 : text.length();
    }

-    private DocumentParseService requireService() {
-        if (documentParseService == null) {
-            throw DocumentParseBridgeException.serviceNotEnabled();
-        }
-        return documentParseService;
-    }
-
    private LoadedDocumentSource prepareSupportedSource(DocumentSourceRef source) {
        LoadedDocumentSource loadedSource = documentSourceLoader.load(source);
        if (!isSupportedByBridge(loadedSource)) {
-            throw DocumentParseBridgeException.unsupportedSource("统一文档解析桥接当前仅支持 PDF、DOCX 文件");
+            throw DocumentParseBridgeException.unsupportedSource("统一文档解析桥接当前仅支持 PDF、DOCX、PPTX、XLSX 文件");
        }
        return loadedSource;
    }

    private boolean isSupportedByBridge(LoadedDocumentSource loadedSource) {
-        String contentType = loadedSource.getContentType();
-        if (StringUtils.hasText(contentType)) {
-            String normalizedContentType = contentType.toLowerCase();
-            if (normalizedContentType.contains("pdf")
-                || normalizedContentType.contains("wordprocessingml.document")) {
-                return true;
+        return DocumentParseSourceType.resolve(loadedSource.getFileName(), loadedSource.getContentType()) != DocumentParseSourceType.UNSUPPORTED;
+    }
+
+    private DocumentParseService resolveService(LoadedDocumentSource loadedSource) {
+        DocumentParseSourceType sourceType = DocumentParseSourceType.resolve(loadedSource.getFileName(), loadedSource.getContentType());
+        switch (sourceType) {
+            case PDF:
+                return requireSpecificService(pdfDocumentParseService, defaultDocumentParseService, "PDF");
+            case DOCX:
+                return requireSpecificService(defaultDocumentParseService, pdfDocumentParseService, "DOCX");
+            case PPTX:
+                return requireSpecificService(pptxDocumentParseService, null, "PPTX");
+            case XLSX:
+                return requireSpecificService(xlsxDocumentParseService, null, "XLSX");
+            default:
+                throw DocumentParseBridgeException.unsupportedSource("当前文件类型暂不支持桥接解析");
+        }
+    }
+
+    private DocumentParseService requireSpecificService(@Nullable DocumentParseService primaryService,
+                                                        @Nullable DocumentParseService fallbackService,
+                                                        String sourceType) {
+        if (primaryService != null) {
+            return primaryService;
+        }
+        if (fallbackService != null) {
+            return fallbackService;
+        }
+        throw DocumentParseBridgeException.serviceNotEnabled("未启用 " + sourceType + " 文档解析服务");
+    }
+
+    private <T> T executeAgainstTaskService(String taskId, Function<DocumentParseService, T> action) {
+        List<DocumentParseService> services = availableServices();
+        if (services.isEmpty()) {
+            throw DocumentParseBridgeException.serviceNotEnabled();
+        }
+        Exception lastException = null;
+        for (DocumentParseService service : services) {
+            try {
+                return action.apply(service);
+            } catch (Exception exception) {
+                lastException = exception;
+                LOG.debug("桥接服务任务查询尝试失败，准备切换下一个解析服务: taskId={}, service={}",
+                    taskId,
+                    service.getClass().getSimpleName(),
+                    exception);
            }
        }
-        String fileName = loadedSource.getFileName();
-        if (!StringUtils.hasText(fileName) || !fileName.contains(".")) {
-            return false;
+        if (lastException instanceof RuntimeException) {
+            throw (RuntimeException) lastException;
        }
-        String suffix = DocUtil.normalizeSuffix(DocUtil.getSuffix(fileName));
-        if ("pdf".equals(suffix) || "docx".equals(suffix)) {
-            return true;
+        throw DocumentParseBridgeException.taskFailed("未找到可处理当前任务ID的文档解析服务", lastException);
+    }
+
+    private List<DocumentParseService> availableServices() {
+        LinkedHashSet<DocumentParseService> services = new LinkedHashSet<DocumentParseService>();
+        if (pptxDocumentParseService != null) {
+            services.add(pptxDocumentParseService);
        }
-        return false;
+        if (xlsxDocumentParseService != null) {
+            services.add(xlsxDocumentParseService);
+        }
+        if (pdfDocumentParseService != null) {
+            services.add(pdfDocumentParseService);
+        }
+        if (defaultDocumentParseService != null) {
+            services.add(defaultDocumentParseService);
+        }
+        return new ArrayList<DocumentParseService>(services);
    }

    private ParseResult extractSingleResult(ParseResponse response, boolean resultFetchPhase) {
--- a/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/document/support/DocumentParseRequestFactory.java
+++ b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/document/support/DocumentParseRequestFactory.java
@@ -2,6 +2,9 @@ package tech.easyflow.ai.document.support;

 import com.easyagents.document.core.entity.ParseFile;
 import com.easyagents.document.core.entity.ParseRequest;
+import com.easyagents.document.core.entity.PdfParseRequest;
+import com.easyagents.document.core.entity.PptxParseRequest;
+import com.easyagents.document.core.entity.XlsxParseRequest;
 import org.springframework.stereotype.Component;
 import tech.easyflow.ai.document.exception.DocumentParseBridgeException;
 import tech.easyflow.ai.document.model.DocumentParseScenario;
@@ -31,12 +34,28 @@ public class DocumentParseRequestFactory {
        if (scenario == null) {
            throw DocumentParseBridgeException.requestBuildFailed("解析场景不能为空");
        }
-        ParseRequest request = new ParseRequest();
+        ParseRequest request = createTypedRequest(source);
        request.addFile(ParseFile.of(source.getFileName(), source.getContentBytes(), source.getContentType()));
        applyScenario(request, scenario);
        return request;
    }

+    private ParseRequest createTypedRequest(LoadedDocumentSource source) {
+        DocumentParseSourceType sourceType = DocumentParseSourceType.resolve(source.getFileName(), source.getContentType());
+        switch (sourceType) {
+            case PDF:
+                return new PdfParseRequest();
+            case PPTX:
+                return new PptxParseRequest();
+            case XLSX:
+                return new XlsxParseRequest();
+            case DOCX:
+                return new ParseRequest();
+            default:
+                throw DocumentParseBridgeException.requestBuildFailed("当前文件类型暂不支持桥接解析");
+        }
+    }
+
    private void applyScenario(ParseRequest request, DocumentParseScenario scenario) {
        switch (scenario) {
            case WORKFLOW_TEXT:
--- a/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/document/support/DocumentParseResultMapper.java
+++ b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/document/support/DocumentParseResultMapper.java
@@ -69,6 +69,11 @@ public class DocumentParseResultMapper {
        status.setStatusUrl(taskStatus.getStatusUrl());
        status.setResultUrl(taskStatus.getResultUrl());
        status.setQueuedAhead(taskStatus.getQueuedAhead());
+        status.setProgressPercent(taskStatus.getProgressPercent());
+        status.setCurrentStage(taskStatus.getCurrentStage());
+        status.setProcessedItems(taskStatus.getProcessedItems());
+        status.setTotalItems(taskStatus.getTotalItems());
+        status.setStatusMessage(taskStatus.getStatusMessage());
        return status;
    }

@@ -104,6 +109,11 @@ public class DocumentParseResultMapper {
        status.setStatusUrl(taskStatus.getStatusUrl());
        status.setResultUrl(taskStatus.getResultUrl());
        status.setQueuedAhead(taskStatus.getQueuedAhead());
+        status.setProgressPercent(taskStatus.getProgressPercent());
+        status.setCurrentStage(taskStatus.getCurrentStage());
+        status.setProcessedItems(taskStatus.getProcessedItems());
+        status.setTotalItems(taskStatus.getTotalItems());
+        status.setStatusMessage(taskStatus.getStatusMessage());
    }

    private String resolvePreferredText(ParseResult parseResult) {
--- a/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/document/support/DocumentParseSourceType.java
+++ b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/document/support/DocumentParseSourceType.java
@@ -0,0 +1,70 @@
+package tech.easyflow.ai.document.support;
+
+import org.springframework.util.StringUtils;
+import tech.easyflow.ai.utils.DocUtil;
+
+/**
+ * 统一文档解析桥接支持的源文件类型。
+ *
+ * @author Codex
+ * @since 2026-04-17
+ */
+public enum DocumentParseSourceType {
+
+    PDF,
+    DOCX,
+    PPTX,
+    XLSX,
+    UNSUPPORTED;
+
+    /**
+     * 根据文件名与内容类型推断文档类型。
+     *
+     * @param fileName 文件名
+     * @param contentType MIME 类型
+     * @return 文档类型
+     */
+    public static DocumentParseSourceType resolve(String fileName, String contentType) {
+        if (StringUtils.hasText(contentType)) {
+            String normalizedContentType = contentType.toLowerCase();
+            if (normalizedContentType.contains("pdf")) {
+                return PDF;
+            }
+            if (normalizedContentType.contains("wordprocessingml.document")) {
+                return DOCX;
+            }
+            if (normalizedContentType.contains("presentationml.presentation")) {
+                return PPTX;
+            }
+            if (normalizedContentType.contains("spreadsheetml.sheet")) {
+                return XLSX;
+            }
+        }
+        if (!StringUtils.hasText(fileName) || !fileName.contains(".")) {
+            return UNSUPPORTED;
+        }
+        String suffix = DocUtil.normalizeSuffix(DocUtil.getSuffix(fileName));
+        if ("pdf".equals(suffix)) {
+            return PDF;
+        }
+        if ("docx".equals(suffix)) {
+            return DOCX;
+        }
+        if ("pptx".equals(suffix)) {
+            return PPTX;
+        }
+        if ("xlsx".equals(suffix)) {
+            return XLSX;
+        }
+        return UNSUPPORTED;
+    }
+
+    /**
+     * 判断是否属于 Office 首版接入类型。
+     *
+     * @return 是否是本次 Office 类型
+     */
+    public boolean isOffice() {
+        return this == PPTX || this == XLSX;
+    }
+}
--- a/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/documentimport/DocumentImportDtos.java
+++ b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/documentimport/DocumentImportDtos.java
@@ -286,6 +286,7 @@ public final class DocumentImportDtos {
        private String chunkId;
        private String chunkType;
        private String content;
+        private String renderMarkdown;
        private List<String> headingPath = new ArrayList<String>();
        private Integer partNo;
        private Integer partTotal;
@@ -335,6 +336,14 @@ public final class DocumentImportDtos {
            this.content = content;
        }

+        public String getRenderMarkdown() {
+            return renderMarkdown;
+        }
+
+        public void setRenderMarkdown(String renderMarkdown) {
+            this.renderMarkdown = renderMarkdown;
+        }
+
        public List<String> getHeadingPath() {
            return headingPath;
        }
--- a/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/documentimport/DocumentImportKeys.java
+++ b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/documentimport/DocumentImportKeys.java
@@ -22,4 +22,19 @@ public final class DocumentImportKeys {
    public static final String KEY_DOCUMENT_PARSE_METADATA = "parse.metadata";
    public static final String KEY_DOCUMENT_PARSE_WARNINGS = "parse.warnings";
    public static final String KEY_DOCUMENT_PROVIDER_TASK_ID = "parse.providerTaskId";
+    public static final String KEY_DOCUMENT_PARSE_IMAGE_URLS = "parse.imageUrls";
+    public static final String KEY_DOCUMENT_PARSE_IMAGE_COUNT = "parse.imageCount";
+    public static final String KEY_DOCUMENT_PARSE_IMAGE_STORAGE_PREFIX = "parse.imageStoragePrefix";
+    public static final String KEY_DOCUMENT_PARSE_PROGRESS_PERCENT = "parse.progressPercent";
+    public static final String KEY_DOCUMENT_PARSE_CURRENT_STAGE = "parse.currentStage";
+    public static final String KEY_DOCUMENT_PARSE_PROCESSED_ITEMS = "parse.processedItems";
+    public static final String KEY_DOCUMENT_PARSE_TOTAL_ITEMS = "parse.totalItems";
+    public static final String KEY_DOCUMENT_PARSE_STATUS_MESSAGE = "parse.statusMessage";
+    public static final String KEY_DOCUMENT_RENDER_MARKDOWN = "renderMarkdown";
+    public static final String KEY_DOCUMENT_PAGE_INDEX = "pageIndex";
+    public static final String KEY_DOCUMENT_SHEET_NAME = "sheetName";
+    public static final String KEY_DOCUMENT_ROW_START = "rowStart";
+    public static final String KEY_DOCUMENT_ROW_END = "rowEnd";
+    public static final String KEY_DOCUMENT_IMAGE_REFS = "imageRefs";
+    public static final String KEY_DOCUMENT_PARSE_ARTIFACT_SUMMARY = "parseArtifactSummary";
 }
--- a/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/documentimport/task/DocumentImportTaskStatusStreamService.java
+++ b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/documentimport/task/DocumentImportTaskStatusStreamService.java
@@ -6,6 +6,7 @@ import org.springframework.stereotype.Service;
 import org.springframework.transaction.support.TransactionSynchronization;
 import org.springframework.transaction.support.TransactionSynchronizationManager;
 import org.springframework.web.servlet.mvc.method.annotation.SseEmitter;
+import tech.easyflow.ai.documentimport.DocumentImportKeys;
 import tech.easyflow.ai.entity.Document;
 import tech.easyflow.ai.mapper.DocumentMapper;
 import tech.easyflow.common.web.exceptions.BusinessException;
@@ -116,11 +117,21 @@ public class DocumentImportTaskStatusStreamService {
        payload.put("totalChunks", document.getTotalChunks());
        payload.put("completedChunks", document.getCompletedChunks());
        payload.put("failedChunks", document.getFailedChunks());
+        payload.put("parseCurrentStage", readOptionAsString(document, DocumentImportKeys.KEY_DOCUMENT_PARSE_CURRENT_STAGE));
+        payload.put("parseStatusMessage", readOptionAsString(document, DocumentImportKeys.KEY_DOCUMENT_PARSE_STATUS_MESSAGE));
        payload.put("lastTaskError", document.getLastTaskError());
        payload.put("taskModifiedAt", document.getTaskModifiedAt());
        return payload;
    }

+    private String readOptionAsString(Document document, String key) {
+        if (document == null || document.getOptions() == null || key == null) {
+            return null;
+        }
+        Object value = document.getOptions().get(key);
+        return value == null ? null : String.valueOf(value);
+    }
+
    private void sendAsync(String topicKey, SseEmitter emitter, String eventName, Map<String, Object> payload) {
        sseThreadPool.execute(() -> {
            try {
--- a/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/documentimport/task/KnowledgeDocumentImportTaskAppService.java
+++ b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/documentimport/task/KnowledgeDocumentImportTaskAppService.java
--- a/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/dto/KnowledgeSearchResultItem.java
+++ b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/dto/KnowledgeSearchResultItem.java
@@ -4,6 +4,8 @@ public class KnowledgeSearchResultItem {

    private Integer sorting;
    private String content;
+    private String renderMarkdown;
+    private String sourceFileName;
    private Double score;
    private String hitSource;
    private Double vectorScore;
@@ -25,6 +27,22 @@ public class KnowledgeSearchResultItem {
        this.content = content;
    }

+    public String getRenderMarkdown() {
+        return renderMarkdown;
+    }
+
+    public void setRenderMarkdown(String renderMarkdown) {
+        this.renderMarkdown = renderMarkdown;
+    }
+
+    public String getSourceFileName() {
+        return sourceFileName;
+    }
+
+    public void setSourceFileName(String sourceFileName) {
+        this.sourceFileName = sourceFileName;
+    }
+
    public Double getScore() {
        return score;
    }
--- a/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/service/impl/DocumentCollectionServiceImpl.java
+++ b/easyflow-modules/easyflow-module-ai/src/main/java/tech/easyflow/ai/service/impl/DocumentCollectionServiceImpl.java
@@ -32,6 +32,7 @@ import tech.easyflow.ai.entity.FaqItem;
 import tech.easyflow.ai.entity.Model;
 import tech.easyflow.ai.enums.DocumentProcessStatus;
 import tech.easyflow.ai.enums.PublishStatus;
+import tech.easyflow.ai.documentimport.DocumentImportKeys;
 import tech.easyflow.ai.mapper.DocumentChunkMapper;
 import tech.easyflow.ai.mapper.DocumentCollectionMapper;
 import tech.easyflow.ai.mapper.DocumentMapper;
@@ -406,6 +407,14 @@ public class DocumentCollectionServiceImpl extends ServiceImpl<DocumentCollectio
                    return false;
                }
                item.setContent(content);
+                String renderMarkdown = hitSnapshot.findChunkRenderMarkdown(item.getId());
+                if (StringUtil.hasText(renderMarkdown)) {
+                    item.addMetadata("renderMarkdown", renderMarkdown);
+                }
+                String sourceFileName = hitSnapshot.findSourceFileName(item.getId());
+                if (StringUtil.hasText(sourceFileName)) {
+                    item.addMetadata("sourceFileName", sourceFileName);
+                }
                return true;
            })
            .collect(Collectors.toList());
@@ -596,6 +605,30 @@ public class DocumentCollectionServiceImpl extends ServiceImpl<DocumentCollectio
            }
            return StringUtil.noText(documentChunk.getContent()) ? null : documentChunk.getContent();
        }
+
+        private String findChunkRenderMarkdown(Object chunkId) {
+            DocumentChunk documentChunk = chunkMap.get(String.valueOf(chunkId));
+            if (documentChunk == null || documentChunk.getDocumentId() == null || documentChunk.getOptions() == null) {
+                return null;
+            }
+            if (!documentMap.containsKey(String.valueOf(documentChunk.getDocumentId()))) {
+                return null;
+            }
+            Object renderMarkdown = documentChunk.getOptions().get(DocumentImportKeys.KEY_DOCUMENT_RENDER_MARKDOWN);
+            return renderMarkdown == null ? null : String.valueOf(renderMarkdown);
+        }
+
+        private String findSourceFileName(Object chunkId) {
+            DocumentChunk documentChunk = chunkMap.get(String.valueOf(chunkId));
+            if (documentChunk == null || documentChunk.getDocumentId() == null) {
+                return null;
+            }
+            tech.easyflow.ai.entity.Document sourceDocument = documentMap.get(String.valueOf(documentChunk.getDocumentId()));
+            if (sourceDocument == null || StringUtil.noText(sourceDocument.getTitle())) {
+                return null;
+            }
+            return sourceDocument.getTitle();
+        }
    }

    private String buildFaqPromptContent(FaqItem faqItem, List<Map<String, String>> images) {
--- a/easyflow-modules/easyflow-module-ai/src/test/java/tech/easyflow/ai/document/service/impl/DocumentParseBridgeServiceImplTest.java
+++ b/easyflow-modules/easyflow-module-ai/src/test/java/tech/easyflow/ai/document/service/impl/DocumentParseBridgeServiceImplTest.java
@@ -6,6 +6,9 @@ import com.easyagents.document.core.entity.ParseResponse;
 import com.easyagents.document.core.entity.ParseResult;
 import com.easyagents.document.core.entity.ParseTaskInfo;
 import com.easyagents.document.core.entity.ParseTaskStatus;
+import com.easyagents.document.pdf.PdfDocumentParseService;
+import com.easyagents.document.pptx.PptxDocumentParseService;
+import com.easyagents.document.xlsx.XlsxDocumentParseService;
 import org.junit.Assert;
 import org.junit.Test;
 import tech.easyflow.ai.document.exception.DocumentParseBridgeException;
@@ -37,8 +40,8 @@ public class DocumentParseBridgeServiceImplTest {
     */
    @Test
    public void shouldParseSuccessfully() {
-        FakeDocumentParseService parseService = new FakeDocumentParseService();
-        DocumentParseBridgeServiceImpl bridgeService = buildBridgeService(parseService);
+        FakePdfDocumentParseService parseService = new FakePdfDocumentParseService();
+        DocumentParseBridgeServiceImpl bridgeService = buildBridgeService(parseService, null, null, parseService);

        DocumentParsedResult document = bridgeService.parse(buildSource(), DocumentParseScenario.WORKFLOW_TEXT);

@@ -52,8 +55,8 @@ public class DocumentParseBridgeServiceImplTest {
     */
    @Test
    public void shouldSupportAsyncFlow() {
-        FakeDocumentParseService parseService = new FakeDocumentParseService();
-        DocumentParseBridgeServiceImpl bridgeService = buildBridgeService(parseService);
+        FakePdfDocumentParseService parseService = new FakePdfDocumentParseService();
+        DocumentParseBridgeServiceImpl bridgeService = buildBridgeService(parseService, null, null, parseService);

        DocumentParseTaskStatus taskStatus = bridgeService.submit(buildSource(), DocumentParseScenario.KNOWLEDGE_IMPORT);
        DocumentParseTaskStatus queriedStatus = bridgeService.queryTask("task-1");
@@ -69,9 +72,9 @@ public class DocumentParseBridgeServiceImplTest {
     */
    @Test
    public void shouldQueryTaskInfoSuccessfully() {
-        FakeDocumentParseService parseService = new FakeDocumentParseService();
+        FakePdfDocumentParseService parseService = new FakePdfDocumentParseService();
        parseService.taskStatusValue = "completed";
-        DocumentParseBridgeServiceImpl bridgeService = buildBridgeService(parseService);
+        DocumentParseBridgeServiceImpl bridgeService = buildBridgeService(parseService, null, null, parseService);

        DocumentParseTaskInfo taskInfo = bridgeService.queryTaskInfo("task-1");

@@ -85,7 +88,7 @@ public class DocumentParseBridgeServiceImplTest {
     */
    @Test
    public void shouldThrowWhenServiceDisabled() {
-        DocumentParseBridgeServiceImpl bridgeService = buildBridgeService(null);
+        DocumentParseBridgeServiceImpl bridgeService = buildBridgeService(null, null, null, null);

        try {
            bridgeService.parse(buildSource(), DocumentParseScenario.WORKFLOW_TEXT);
@@ -95,9 +98,29 @@ public class DocumentParseBridgeServiceImplTest {
        }
    }

-    private DocumentParseBridgeServiceImpl buildBridgeService(DocumentParseService parseService) {
+    @Test
+    public void shouldRoutePptxToDedicatedService() {
+        FakePptxDocumentParseService pptxService = new FakePptxDocumentParseService();
+        FakePdfDocumentParseService defaultService = new FakePdfDocumentParseService();
+        DocumentParseBridgeServiceImpl bridgeService = buildBridgeService(null, pptxService, null, defaultService);
+
+        DocumentParsedResult result = bridgeService.parse(buildSource("slides.pptx",
+            "application/vnd.openxmlformats-officedocument.presentationml.presentation"), DocumentParseScenario.KNOWLEDGE_IMPORT);
+
+        Assert.assertEquals("# pptx", result.getPreferredText());
+        Assert.assertEquals(1, pptxService.parseCallCount);
+        Assert.assertEquals(0, defaultService.parseCallCount);
+    }
+
+    private DocumentParseBridgeServiceImpl buildBridgeService(PdfDocumentParseService pdfDocumentParseService,
+                                                              PptxDocumentParseService pptxDocumentParseService,
+                                                              XlsxDocumentParseService xlsxDocumentParseService,
+                                                              DocumentParseService parseService) {
        return new DocumentParseBridgeServiceImpl(
            parseService,
+            pdfDocumentParseService,
+            pptxDocumentParseService,
+            xlsxDocumentParseService,
            new DocumentSourceLoader(new InMemoryFileStorageService()),
            new DocumentParseRequestFactory(),
            new DocumentParseResultMapper()
@@ -105,8 +128,12 @@ public class DocumentParseBridgeServiceImplTest {
    }

    private DocumentSourceRef buildSource() {
-        DocumentSourceRef sourceRef = DocumentSourceRef.ofBytes("demo.pdf", "pdf-data".getBytes(StandardCharsets.UTF_8));
-        sourceRef.setContentType("application/pdf");
+        return buildSource("demo.pdf", "application/pdf");
+    }
+
+    private DocumentSourceRef buildSource(String fileName, String contentType) {
+        DocumentSourceRef sourceRef = DocumentSourceRef.ofBytes(fileName, "pdf-data".getBytes(StandardCharsets.UTF_8));
+        sourceRef.setContentType(contentType);
        sourceRef.setSize(8L);
        return sourceRef;
    }
@@ -133,13 +160,15 @@ public class DocumentParseBridgeServiceImplTest {
        }
    }

-    private static class FakeDocumentParseService implements DocumentParseService {
+    private static class FakePdfDocumentParseService implements PdfDocumentParseService {

        private ParseRequest lastParseRequest;
        private String taskStatusValue = "running";
+        private int parseCallCount;

        @Override
        public ParseResponse parse(ParseRequest request) {
+            parseCallCount++;
            this.lastParseRequest = request;
            return buildResponse();
        }
@@ -187,4 +216,36 @@ public class DocumentParseBridgeServiceImplTest {
            return response;
        }
    }
+
+    private static class FakePptxDocumentParseService implements PptxDocumentParseService {
+
+        private int parseCallCount;
+
+        @Override
+        public ParseResponse parse(ParseRequest request) {
+            parseCallCount++;
+            ParseResult result = new ParseResult();
+            result.setFileName("slides.pptx");
+            result.setMarkdown("# pptx");
+            result.setPlainText("pptx");
+            ParseResponse response = new ParseResponse();
+            response.setResults(Collections.singletonList(result));
+            return response;
+        }
+
+        @Override
+        public ParseTaskStatus submit(ParseRequest request) {
+            throw new UnsupportedOperationException();
+        }
+
+        @Override
+        public ParseTaskStatus queryTask(String taskId) {
+            throw new UnsupportedOperationException();
+        }
+
+        @Override
+        public ParseResponse queryResult(String taskId) {
+            throw new UnsupportedOperationException();
+        }
+    }
 }
--- a/easyflow-modules/easyflow-module-ai/src/test/java/tech/easyflow/ai/document/support/DocumentParseRequestFactoryTest.java
+++ b/easyflow-modules/easyflow-module-ai/src/test/java/tech/easyflow/ai/document/support/DocumentParseRequestFactoryTest.java
@@ -1,6 +1,9 @@
 package tech.easyflow.ai.document.support;

 import com.easyagents.document.core.entity.ParseRequest;
+import com.easyagents.document.core.entity.PdfParseRequest;
+import com.easyagents.document.core.entity.PptxParseRequest;
+import com.easyagents.document.core.entity.XlsxParseRequest;
 import org.junit.Assert;
 import org.junit.Test;
 import tech.easyflow.ai.document.model.DocumentParseScenario;
@@ -26,6 +29,7 @@ public class DocumentParseRequestFactoryTest {
        Assert.assertFalse(request.getReturnMiddleJson());
        Assert.assertFalse(request.getReturnContentList());
        Assert.assertFalse(request.getReturnImages());
+        Assert.assertTrue(request instanceof PdfParseRequest);
    }

    /**
@@ -41,12 +45,33 @@ public class DocumentParseRequestFactoryTest {
        Assert.assertTrue(request.getReturnMiddleJson());
        Assert.assertTrue(request.getReturnContentList());
        Assert.assertTrue(request.getReturnImages());
+        Assert.assertTrue(request instanceof PdfParseRequest);
+    }
+
+    /**
+     * 验证 PPTX / XLSX 会构建对应的强类型请求。
+     */
+    @Test
+    public void shouldBuildOfficeTypedRequests() {
+        DocumentParseRequestFactory factory = new DocumentParseRequestFactory();
+
+        ParseRequest pptxRequest = factory.build(buildSource("slides.pptx",
+            "application/vnd.openxmlformats-officedocument.presentationml.presentation"), DocumentParseScenario.KNOWLEDGE_IMPORT);
+        ParseRequest xlsxRequest = factory.build(buildSource("table.xlsx",
+            "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"), DocumentParseScenario.KNOWLEDGE_IMPORT);
+
+        Assert.assertTrue(pptxRequest instanceof PptxParseRequest);
+        Assert.assertTrue(xlsxRequest instanceof XlsxParseRequest);
    }

    private LoadedDocumentSource buildSource() {
+        return buildSource("demo.pdf", "application/pdf");
+    }
+
+    private LoadedDocumentSource buildSource(String fileName, String contentType) {
        LoadedDocumentSource source = new LoadedDocumentSource();
-        source.setFileName("demo.pdf");
-        source.setContentType("application/pdf");
+        source.setFileName(fileName);
+        source.setContentType(contentType);
        source.setContentBytes("pdf-data".getBytes());
        source.setSize(8L);
        return source;
--- a/easyflow-modules/easyflow-module-ai/src/test/java/tech/easyflow/ai/document/support/DocumentParseResultMapperTest.java
+++ b/easyflow-modules/easyflow-module-ai/src/test/java/tech/easyflow/ai/document/support/DocumentParseResultMapperTest.java
@@ -4,6 +4,7 @@ import com.easyagents.document.core.entity.ParseArtifacts;
 import com.easyagents.document.core.entity.ParseResult;
 import com.easyagents.document.core.entity.ParseResponse;
 import com.easyagents.document.core.entity.ParseTaskInfo;
+import com.easyagents.document.core.entity.ParseTaskStatus;
 import org.junit.Assert;
 import org.junit.Test;
 import tech.easyflow.ai.document.model.DocumentParseTaskInfo;
@@ -65,6 +66,8 @@ public class DocumentParseResultMapperTest {
        ParseTaskInfo taskInfo = new ParseTaskInfo();
        taskInfo.setTaskId("task-1");
        taskInfo.setStatus("completed");
+        taskInfo.setProgressPercent(100);
+        taskInfo.setCurrentStage("completed");

        ParseResult result = new ParseResult();
        result.setFileName("demo.pdf");
@@ -76,7 +79,33 @@ public class DocumentParseResultMapperTest {
        DocumentParseTaskInfo mapped = mapper.map(taskInfo);

        Assert.assertEquals("task-1", mapped.getTaskId());
+        Assert.assertEquals(Integer.valueOf(100), mapped.getProgressPercent());
+        Assert.assertEquals("completed", mapped.getCurrentStage());
        Assert.assertNotNull(mapped.getResult());
        Assert.assertEquals("# title", mapped.getResult().getPreferredText());
    }
+
+    /**
+     * 验证异步进度字段被完整透传。
+     */
+    @Test
+    public void shouldMapTaskStatusProgressFields() {
+        DocumentParseResultMapper mapper = new DocumentParseResultMapper();
+        ParseTaskStatus status = new ParseTaskStatus();
+        status.setTaskId("task-2");
+        status.setStatus("running");
+        status.setProgressPercent(45);
+        status.setCurrentStage("ocr");
+        status.setProcessedItems(9);
+        status.setTotalItems(20);
+        status.setStatusMessage("正在识别图片");
+
+        tech.easyflow.ai.document.model.DocumentParseTaskStatus mapped = mapper.map(status);
+
+        Assert.assertEquals(Integer.valueOf(45), mapped.getProgressPercent());
+        Assert.assertEquals("ocr", mapped.getCurrentStage());
+        Assert.assertEquals(Integer.valueOf(9), mapped.getProcessedItems());
+        Assert.assertEquals(Integer.valueOf(20), mapped.getTotalItems());
+        Assert.assertEquals("正在识别图片", mapped.getStatusMessage());
+    }
 }
--- a/easyflow-modules/easyflow-module-ai/src/test/java/tech/easyflow/ai/documentimport/task/KnowledgeDocumentImportTaskAppServiceTest.java
+++ b/easyflow-modules/easyflow-module-ai/src/test/java/tech/easyflow/ai/documentimport/task/KnowledgeDocumentImportTaskAppServiceTest.java
@@ -1,17 +1,33 @@
 package tech.easyflow.ai.documentimport.task;

+import com.easyagents.document.core.entity.DocumentBlock;
+import com.easyagents.document.core.entity.DocumentImage;
+import com.easyagents.document.core.entity.DocumentTable;
+import com.easyagents.rag.ingestion.model.StrategyConfig;
 import org.junit.Assert;
 import org.junit.Test;
+import org.springframework.web.multipart.MultipartFile;
+import tech.easyflow.ai.document.model.DocumentParseArtifacts;
+import tech.easyflow.ai.document.model.DocumentParsedResult;
+import tech.easyflow.ai.documentimport.DocumentImportKeys;
+import tech.easyflow.ai.entity.DocumentChunk;
 import tech.easyflow.ai.entity.DocumentImportTask;
 import tech.easyflow.ai.enums.DocumentImportTaskStatus;
 import tech.easyflow.ai.enums.DocumentProcessStatus;
 import tech.easyflow.ai.mapper.DocumentMapper;
 import tech.easyflow.ai.service.DocumentImportTaskService;
+import tech.easyflow.common.filestorage.FileStorageService;

 import java.lang.reflect.Field;
 import java.lang.reflect.Method;
 import java.lang.reflect.Proxy;
 import java.math.BigInteger;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.Base64;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
 import java.util.concurrent.atomic.AtomicReference;

 /**
@@ -84,6 +100,258 @@ public class KnowledgeDocumentImportTaskAppServiceTest {
        Assert.assertEquals("新错误", updatedTask.getErrorSummary());
    }

+    /**
+     * 验证知识库导入会把解析图片上传到对象存储，并同步改写 Markdown 与结构化引用。
+     *
+     * @throws Exception 反射调用异常
+     */
+    @Test
+    public void normalizeParsedImagesForKnowledgeImportShouldUploadAndRewriteReferences() throws Exception {
+        KnowledgeDocumentImportTaskAppService service = new KnowledgeDocumentImportTaskAppService();
+        AtomicReference<String> savedPrePathRef = new AtomicReference<String>();
+        AtomicReference<String> savedFilenameRef = new AtomicReference<String>();
+        setField(service, "storageService", mockFileStorageService(savedPrePathRef, savedFilenameRef));
+
+        tech.easyflow.ai.entity.Document document = new tech.easyflow.ai.entity.Document();
+        document.setId(BigInteger.valueOf(88));
+        document.setTitle("产品说明书（终版）.pdf");
+
+        DocumentParsedResult parsedResult = new DocumentParsedResult();
+        parsedResult.setMarkdown("图例如下：\n![](images/sample-image.png)");
+        parsedResult.setPreferredText(parsedResult.getMarkdown());
+        parsedResult.setPlainText(parsedResult.getMarkdown());
+
+        DocumentImage image = new DocumentImage();
+        image.setName("sample-image.png");
+        image.setSourcePath("images/sample-image.png");
+        image.setMimeType("image/png");
+        image.setDataUrl("data:image/png;base64," + Base64.getEncoder().encodeToString("demo".getBytes(StandardCharsets.UTF_8)));
+        parsedResult.setImages(new ArrayList<DocumentImage>(List.of(image)));
+
+        DocumentBlock block = new DocumentBlock();
+        block.setImagePath("images/sample-image.png");
+        parsedResult.setBlocks(new ArrayList<DocumentBlock>(List.of(block)));
+
+        DocumentTable table = new DocumentTable();
+        table.setImagePath("images/sample-image.png");
+        parsedResult.setTables(new ArrayList<DocumentTable>(List.of(table)));
+
+        DocumentParseArtifacts artifacts = new DocumentParseArtifacts();
+        List<Map<String, Object>> contentList = new ArrayList<Map<String, Object>>();
+        Map<String, Object> contentItem = new LinkedHashMap<String, Object>();
+        contentItem.put("img_path", "images/sample-image.png");
+        contentList.add(contentItem);
+        artifacts.setContentList(contentList);
+        Map<String, Object> xlsxArtifact = new LinkedHashMap<String, Object>();
+        List<Map<String, Object>> sheetImages = new ArrayList<Map<String, Object>>();
+        sheetImages.add(new LinkedHashMap<String, Object>() {{
+            put("sheetName", "Sheet1");
+            put("sourcePaths", new ArrayList<String>(List.of("images/sample-image.png")));
+        }});
+        xlsxArtifact.put("sheetImages", sheetImages);
+        artifacts.setExtraJsonArtifacts(new LinkedHashMap<String, Object>() {{
+            put("xlsx", xlsxArtifact);
+        }});
+        parsedResult.setArtifacts(artifacts);
+
+        Method method = KnowledgeDocumentImportTaskAppService.class.getDeclaredMethod(
+            "normalizeParsedImagesForKnowledgeImport",
+            tech.easyflow.ai.entity.Document.class,
+            DocumentParsedResult.class
+        );
+        method.setAccessible(true);
+        DocumentParsedResult normalized = (DocumentParsedResult) method.invoke(service, document, parsedResult);
+
+        Assert.assertNotNull(normalized);
+        Assert.assertEquals("knowledge-parse/88_产品说明书_终版/images", savedPrePathRef.get());
+        Assert.assertEquals("sample-image.png", savedFilenameRef.get());
+
+        String expectedUrl = "http://localhost:39000/easyflow/attachment/knowledge-parse/88_产品说明书_终版/images/sample-image.png";
+        Assert.assertTrue(normalized.getMarkdown().contains(expectedUrl));
+        Assert.assertEquals(expectedUrl, normalized.getBlocks().get(0).getImagePath());
+        Assert.assertEquals(expectedUrl, normalized.getTables().get(0).getImagePath());
+        Assert.assertEquals(expectedUrl, normalized.getImages().get(0).getSourcePath());
+        Assert.assertNull(normalized.getImages().get(0).getDataUrl());
+        Object rewrittenContentList = normalized.getArtifacts().getContentList();
+        Assert.assertTrue(rewrittenContentList instanceof List<?>);
+        Assert.assertEquals(expectedUrl, ((Map<?, ?>) ((List<?>) rewrittenContentList).get(0)).get("img_path"));
+        Object rewrittenSheetImages = ((Map<?, ?>) normalized.getArtifacts().getExtraJsonArtifacts().get("xlsx")).get("sheetImages");
+        Assert.assertTrue(rewrittenSheetImages instanceof List<?>);
+        Object sourcePaths = ((Map<?, ?>) ((List<?>) rewrittenSheetImages).get(0)).get("sourcePaths");
+        Assert.assertEquals(expectedUrl, ((List<?>) sourcePaths).get(0));
+    }
+
+    /**
+     * 验证 PPTX 会基于页级工件生成稳定的知识库分块。
+     *
+     * @throws Exception 反射调用异常
+     */
+    @Test
+    public void buildOfficeDocumentChunksShouldSplitPptxBySlide() throws Exception {
+        KnowledgeDocumentImportTaskAppService service = new KnowledgeDocumentImportTaskAppService();
+        tech.easyflow.ai.entity.Document document = new tech.easyflow.ai.entity.Document();
+        document.setId(BigInteger.valueOf(101));
+        document.setCollectionId(BigInteger.valueOf(201));
+        document.setTitle("季度汇报.pptx");
+
+        Map<String, Object> parseArtifactSummary = new LinkedHashMap<String, Object>();
+        List<Map<String, Object>> slides = new ArrayList<Map<String, Object>>();
+        slides.add(new LinkedHashMap<String, Object>() {{
+            put("slideIndex", 0);
+            put("title", "封面");
+            put("ocrMarkdown", "本页介绍季度目标。");
+            put("imagePath", "https://example.com/slides/slide-001.png");
+            put("imageName", "slide-001-page");
+        }});
+        slides.add(new LinkedHashMap<String, Object>() {{
+            put("slideIndex", 1);
+            put("title", "经营分析");
+            put("ocrMarkdown", "收入同比增长 18%。");
+            put("imagePath", "https://example.com/slides/slide-002.png");
+            put("imageName", "slide-002-page");
+        }});
+        parseArtifactSummary.put("slides", slides);
+
+        Method method = KnowledgeDocumentImportTaskAppService.class.getDeclaredMethod(
+            "buildOfficeDocumentChunks",
+            tech.easyflow.ai.entity.Document.class,
+            String.class,
+            StrategyConfig.class,
+            Map.class
+        );
+        method.setAccessible(true);
+
+        @SuppressWarnings("unchecked")
+        List<DocumentChunk> chunks = (List<DocumentChunk>) method.invoke(
+            service,
+            document,
+            "pptx",
+            null,
+            parseArtifactSummary
+        );
+
+        Assert.assertEquals(2, chunks.size());
+        DocumentChunk firstChunk = chunks.get(0);
+        Assert.assertTrue(firstChunk.getContent().contains("Slide 1"));
+        Assert.assertTrue(firstChunk.getContent().contains("本页介绍季度目标"));
+        Assert.assertEquals("https://example.com/slides/slide-001.png",
+            ((List<?>) firstChunk.getOptions().get(DocumentImportKeys.KEY_DOCUMENT_IMAGE_REFS)).get(0));
+        Assert.assertEquals(1, firstChunk.getOptions().get(DocumentImportKeys.KEY_DOCUMENT_PAGE_INDEX));
+        Assert.assertTrue(String.valueOf(firstChunk.getOptions().get(DocumentImportKeys.KEY_DOCUMENT_RENDER_MARKDOWN))
+            .contains("slide-001.png"));
+    }
+
+    /**
+     * 验证 XLSX 纯图片 Sheet 不会退化为空内容，并会输出稳定图片引用。
+     *
+     * @throws Exception 反射调用异常
+     */
+    @Test
+    public void buildOfficeDocumentChunksShouldKeepImageOnlyXlsxSheetReferences() throws Exception {
+        KnowledgeDocumentImportTaskAppService service = new KnowledgeDocumentImportTaskAppService();
+        tech.easyflow.ai.entity.Document document = new tech.easyflow.ai.entity.Document();
+        document.setId(BigInteger.valueOf(102));
+        document.setCollectionId(BigInteger.valueOf(202));
+        document.setTitle("巡检记录.xlsx");
+
+        Map<String, Object> parseArtifactSummary = new LinkedHashMap<String, Object>();
+        List<Map<String, Object>> sheets = new ArrayList<Map<String, Object>>();
+        sheets.add(new LinkedHashMap<String, Object>() {{
+            put("sheetName", "图片页");
+            put("sheetIndex", 0);
+            put("rows", new ArrayList<Map<String, Object>>());
+        }});
+        parseArtifactSummary.put("sheets", sheets);
+
+        List<Map<String, Object>> cellImages = new ArrayList<Map<String, Object>>();
+        cellImages.add(new LinkedHashMap<String, Object>() {{
+            put("sheetName", "图片页");
+            put("referenceKey", "image-sheet-r2c2-001");
+            put("sourcePath", "https://example.com/xlsx/sheet/image-001.jpeg");
+            put("anchorCell", "B2");
+            put("ocrText", "设备状态正常");
+            put("fromRow", 1);
+        }});
+        parseArtifactSummary.put("cellImages", cellImages);
+
+        StrategyConfig strategyConfig = new StrategyConfig();
+        strategyConfig.setRowsPerChunk(10);
+
+        Method method = KnowledgeDocumentImportTaskAppService.class.getDeclaredMethod(
+            "buildOfficeDocumentChunks",
+            tech.easyflow.ai.entity.Document.class,
+            String.class,
+            StrategyConfig.class,
+            Map.class
+        );
+        method.setAccessible(true);
+
+        @SuppressWarnings("unchecked")
+        List<DocumentChunk> chunks = (List<DocumentChunk>) method.invoke(
+            service,
+            document,
+            "xlsx",
+            strategyConfig,
+            parseArtifactSummary
+        );
+
+        Assert.assertEquals(1, chunks.size());
+        DocumentChunk onlyChunk = chunks.get(0);
+        Assert.assertTrue(onlyChunk.getContent().contains("图片 OCR"));
+        Assert.assertTrue(onlyChunk.getContent().contains("设备状态正常"));
+        Assert.assertEquals("图片页", onlyChunk.getOptions().get(DocumentImportKeys.KEY_DOCUMENT_SHEET_NAME));
+        Assert.assertEquals("https://example.com/xlsx/sheet/image-001.jpeg",
+            ((List<?>) onlyChunk.getOptions().get(DocumentImportKeys.KEY_DOCUMENT_IMAGE_REFS)).get(0));
+        String renderMarkdown = String.valueOf(onlyChunk.getOptions().get(DocumentImportKeys.KEY_DOCUMENT_RENDER_MARKDOWN));
+        Assert.assertTrue(renderMarkdown.contains("[IMG:image-sheet-r2c2-001]"));
+        Assert.assertTrue(renderMarkdown.contains("![image-sheet-r2c2-001](https://example.com/xlsx/sheet/image-001.jpeg)"));
+    }
+
+    /**
+     * 验证空白 Sheet 不会被误判成纯图片分块。
+     *
+     * @throws Exception 反射调用异常
+     */
+    @Test
+    public void buildOfficeDocumentChunksShouldSkipBlankXlsxSheetWithoutImages() throws Exception {
+        KnowledgeDocumentImportTaskAppService service = new KnowledgeDocumentImportTaskAppService();
+        tech.easyflow.ai.entity.Document document = new tech.easyflow.ai.entity.Document();
+        document.setId(BigInteger.valueOf(103));
+        document.setCollectionId(BigInteger.valueOf(203));
+        document.setTitle("空白工作簿.xlsx");
+
+        Map<String, Object> parseArtifactSummary = new LinkedHashMap<String, Object>();
+        parseArtifactSummary.put("sheets", new ArrayList<Map<String, Object>>(List.of(new LinkedHashMap<String, Object>() {{
+            put("sheetName", "空白页");
+            put("sheetIndex", 0);
+            put("rows", new ArrayList<Map<String, Object>>());
+        }})));
+        parseArtifactSummary.put("cellImages", new ArrayList<Map<String, Object>>());
+
+        StrategyConfig strategyConfig = new StrategyConfig();
+        strategyConfig.setRowsPerChunk(10);
+
+        Method method = KnowledgeDocumentImportTaskAppService.class.getDeclaredMethod(
+            "buildOfficeDocumentChunks",
+            tech.easyflow.ai.entity.Document.class,
+            String.class,
+            StrategyConfig.class,
+            Map.class
+        );
+        method.setAccessible(true);
+
+        @SuppressWarnings("unchecked")
+        List<DocumentChunk> chunks = (List<DocumentChunk>) method.invoke(
+            service,
+            document,
+            "xlsx",
+            strategyConfig,
+            parseArtifactSummary
+        );
+
+        Assert.assertTrue(chunks.isEmpty());
+    }
+
    private static DocumentMapper mockDocumentMapper(tech.easyflow.ai.entity.Document persistedDocument,
                                                     AtomicReference<tech.easyflow.ai.entity.Document> updatedDocumentRef) {
        return (DocumentMapper) Proxy.newProxyInstance(
@@ -116,6 +384,22 @@ public class KnowledgeDocumentImportTaskAppServiceTest {
        );
    }

+    private static FileStorageService mockFileStorageService(AtomicReference<String> savedPrePathRef,
+                                                             AtomicReference<String> savedFilenameRef) {
+        return (FileStorageService) Proxy.newProxyInstance(
+            FileStorageService.class.getClassLoader(),
+            new Class<?>[]{FileStorageService.class},
+            (proxy, method, args) -> {
+                if ("save".equals(method.getName()) && args != null && args.length == 2 && args[0] instanceof MultipartFile file) {
+                    savedPrePathRef.set((String) args[1]);
+                    savedFilenameRef.set(file.getOriginalFilename());
+                    return "http://localhost:39000/easyflow/attachment/" + args[1] + "/" + file.getOriginalFilename();
+                }
+                return defaultValue(method.getReturnType());
+            }
+        );
+    }
+
    private static void setField(Object target, String fieldName, Object value) throws Exception {
        Field field = KnowledgeDocumentImportTaskAppService.class.getDeclaredField(fieldName);
        field.setAccessible(true);