diff --git a/easy-agents-document/easy-agents-document-core/src/main/java/com/easyagents/document/core/mineru/MineruClient.java b/easy-agents-document/easy-agents-document-core/src/main/java/com/easyagents/document/core/mineru/MineruClient.java
index 1cb756c..be2fb6a 100644
--- a/easy-agents-document/easy-agents-document-core/src/main/java/com/easyagents/document/core/mineru/MineruClient.java
+++ b/easy-agents-document/easy-agents-document-core/src/main/java/com/easyagents/document/core/mineru/MineruClient.java
@@ -1,6 +1,5 @@
package com.easyagents.document.core.mineru;
-import com.alibaba.fastjson2.JSON;
import com.alibaba.fastjson2.JSONObject;
import com.easyagents.core.util.StringUtil;
import com.easyagents.document.core.exception.DocumentParseException;
@@ -16,6 +15,7 @@ import okhttp3.ResponseBody;
import java.io.IOException;
import java.net.URLConnection;
+import java.nio.charset.StandardCharsets;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
@@ -115,7 +115,10 @@ public class MineruClient {
}
String contentType = response.header("Content-Type");
if (contentType != null && contentType.contains("application/json")) {
- JSONObject jsonObject = JSON.parseObject(new String(responseBytes));
+ JSONObject jsonObject = MineruJsonSupport.parseObject(
+ new String(responseBytes, StandardCharsets.UTF_8),
+ "MinerU async result response: " + path
+ );
throw new DocumentParseException("MinerU async result is not ready: " + jsonObject.toJSONString());
}
if (responseBytes.length < 2 || responseBytes[0] != 'P' || responseBytes[1] != 'K') {
@@ -148,9 +151,13 @@ public class MineruClient {
ResponseBody body = response.body();
String bodyText = body == null ? "" : body.string();
if (!response.isSuccessful()) {
- throw buildHttpException(path, response.code(), bodyText == null ? new byte[0] : bodyText.getBytes());
+ throw buildHttpException(
+ path,
+ response.code(),
+ bodyText == null ? new byte[0] : bodyText.getBytes(StandardCharsets.UTF_8)
+ );
}
- return JSON.parseObject(bodyText);
+ return MineruJsonSupport.parseObject(bodyText, "MinerU response body: " + path);
} catch (IOException exception) {
throw new DocumentParseException("Failed to call MinerU endpoint: " + path, exception);
}
@@ -197,7 +204,7 @@ public class MineruClient {
}
private DocumentParseException buildHttpException(String path, int statusCode, byte[] bodyBytes) {
- String bodyText = bodyBytes == null ? "" : new String(bodyBytes);
+ String bodyText = bodyBytes == null ? "" : new String(bodyBytes, StandardCharsets.UTF_8);
return new DocumentParseException(
"MinerU request failed: path=" + path + ", status=" + statusCode + ", body=" + bodyText
);
diff --git a/easy-agents-document/easy-agents-document-core/src/main/java/com/easyagents/document/core/mineru/MineruJsonSupport.java b/easy-agents-document/easy-agents-document-core/src/main/java/com/easyagents/document/core/mineru/MineruJsonSupport.java
new file mode 100644
index 0000000..2c8d07d
--- /dev/null
+++ b/easy-agents-document/easy-agents-document-core/src/main/java/com/easyagents/document/core/mineru/MineruJsonSupport.java
@@ -0,0 +1,199 @@
+package com.easyagents.document.core.mineru;
+
+import com.alibaba.fastjson2.JSON;
+import com.alibaba.fastjson2.JSONArray;
+import com.alibaba.fastjson2.JSONObject;
+import com.easyagents.core.util.StringUtil;
+import com.easyagents.document.core.exception.DocumentParseException;
+
+/**
+ * MinerU JSON 兼容解析工具。
+ *
+ *
部分部署返回的结构化字段会被额外包装成 JSON 字符串,
+ * 例如 {@code middle_json="{}"} 或整个响应体直接返回
+ * {@code "{\"results\":{...}}"}。该工具负责统一拆包,
+ * 让上层模块无需重复处理这些兼容分支。
+ *
+ * @author Codex
+ * @since 2026-04-17
+ */
+final class MineruJsonSupport {
+
+ private static final int MAX_JSON_UNWRAP_DEPTH = 4;
+ private static final int PREVIEW_LIMIT = 160;
+
+ private MineruJsonSupport() {
+ }
+
+ /**
+ * 解析 JSON 文本并要求最终结果为对象。
+ *
+ * @param text JSON 文本
+ * @param context 错误上下文
+ * @return JSON 对象
+ */
+ static JSONObject parseObject(String text, String context) {
+ Object value = parseValue(text, context);
+ if (value instanceof JSONObject) {
+ return (JSONObject) value;
+ }
+ throw unexpectedType(context, "JSONObject", value);
+ }
+
+ /**
+ * 规范化任意 JSON 值。
+ *
+ * @param value 原始值
+ * @param context 错误上下文
+ * @return 规范化后的 JSON 值
+ */
+ static Object normalizeValue(Object value, String context) {
+ if (value == null) {
+ return null;
+ }
+ if (value instanceof JSONObject || value instanceof JSONArray) {
+ return value;
+ }
+ if (value instanceof CharSequence) {
+ String text = value.toString().trim();
+ if (!StringUtil.hasText(text)) {
+ return null;
+ }
+ return parseValue(text, context);
+ }
+ try {
+ return unwrapNestedJsonString(JSON.parse(JSON.toJSONString(value)), context);
+ } catch (Exception exception) {
+ throw new DocumentParseException(
+ "Failed to normalize MinerU JSON value: " + context + ", preview=" + preview(String.valueOf(value)),
+ exception
+ );
+ }
+ }
+
+ /**
+ * 在值看起来像 JSON 时才尝试拆包,否则保留原始值。
+ *
+ * @param value 原始值
+ * @param context 错误上下文
+ * @return 规范化后的值或原值
+ */
+ static Object normalizeValueIfJsonLike(Object value, String context) {
+ if (!(value instanceof CharSequence)) {
+ return normalizeValue(value, context);
+ }
+ String text = value.toString().trim();
+ if (!looksLikeJson(text) && !(text.startsWith("\"") && text.endsWith("\""))) {
+ return value;
+ }
+ return normalizeValue(value, context);
+ }
+
+ /**
+ * 将任意 JSON 值转换为对象。
+ *
+ * @param value 原始值
+ * @param context 错误上下文
+ * @return JSON 对象
+ */
+ static JSONObject asObject(Object value, String context) {
+ Object normalized = normalizeValue(value, context);
+ if (normalized == null) {
+ return null;
+ }
+ if (normalized instanceof JSONObject) {
+ return (JSONObject) normalized;
+ }
+ throw unexpectedType(context, "JSONObject", normalized);
+ }
+
+ /**
+ * 将任意 JSON 值转换为数组。
+ *
+ * @param value 原始值
+ * @param context 错误上下文
+ * @return JSON 数组
+ */
+ static JSONArray asArray(Object value, String context) {
+ Object normalized = normalizeValue(value, context);
+ if (normalized == null) {
+ return null;
+ }
+ if (normalized instanceof JSONArray) {
+ return (JSONArray) normalized;
+ }
+ throw unexpectedType(context, "JSONArray", normalized);
+ }
+
+ /**
+ * 解析 JSON 文本并自动拆解被双层包装的字符串值。
+ *
+ * @param text JSON 文本
+ * @param context 错误上下文
+ * @return 解析结果
+ */
+ static Object parseValue(String text, String context) {
+ String trimmed = text == null ? null : text.trim();
+ if (!StringUtil.hasText(trimmed)) {
+ throw new DocumentParseException("MinerU JSON payload is empty: " + context);
+ }
+ try {
+ return unwrapNestedJsonString(JSON.parse(trimmed), context);
+ } catch (DocumentParseException exception) {
+ throw exception;
+ } catch (Exception exception) {
+ throw new DocumentParseException(
+ "Failed to parse MinerU JSON payload: " + context + ", preview=" + preview(trimmed),
+ exception
+ );
+ }
+ }
+
+ private static Object unwrapNestedJsonString(Object value, String context) {
+ Object current = value;
+ for (int depth = 0; depth < MAX_JSON_UNWRAP_DEPTH; depth++) {
+ if (!(current instanceof String)) {
+ return current;
+ }
+ String text = ((String) current).trim();
+ if (!looksLikeJson(text)) {
+ return current;
+ }
+ try {
+ current = JSON.parse(text);
+ } catch (Exception exception) {
+ throw new DocumentParseException(
+ "Failed to unwrap MinerU nested JSON string: " + context + ", preview=" + preview(text),
+ exception
+ );
+ }
+ }
+ return current;
+ }
+
+ private static boolean looksLikeJson(String text) {
+ if (!StringUtil.hasText(text)) {
+ return false;
+ }
+ char first = text.charAt(0);
+ return first == '{' || first == '[';
+ }
+
+ private static DocumentParseException unexpectedType(String context, String expectedType, Object actualValue) {
+ String actualType = actualValue == null ? "null" : actualValue.getClass().getSimpleName();
+ return new DocumentParseException(
+ "MinerU JSON payload type mismatch: " + context + ", expected=" + expectedType + ", actual=" + actualType
+ );
+ }
+
+ private static String preview(String text) {
+ if (text == null) {
+ return "";
+ }
+ String normalized = text.replace('\n', ' ').replace('\r', ' ');
+ if (normalized.length() <= PREVIEW_LIMIT) {
+ return normalized;
+ }
+ return normalized.substring(0, PREVIEW_LIMIT) + "...";
+ }
+}
diff --git a/easy-agents-document/easy-agents-document-core/src/main/java/com/easyagents/document/core/mineru/MineruMapper.java b/easy-agents-document/easy-agents-document-core/src/main/java/com/easyagents/document/core/mineru/MineruMapper.java
index 26d1db6..1f88ebc 100644
--- a/easy-agents-document/easy-agents-document-core/src/main/java/com/easyagents/document/core/mineru/MineruMapper.java
+++ b/easy-agents-document/easy-agents-document-core/src/main/java/com/easyagents/document/core/mineru/MineruMapper.java
@@ -116,10 +116,16 @@ public class MineruMapper {
payload.setBackend(jsonObject.getString("backend"));
payload.setVersion(jsonObject.getString("version"));
Map results = new LinkedHashMap();
- JSONObject resultJson = jsonObject.getJSONObject("results");
+ JSONObject resultJson = MineruJsonSupport.asObject(jsonObject.get("results"), "MinerU sync results");
if (resultJson != null) {
for (String key : resultJson.keySet()) {
- results.put(key, resultJson.getJSONObject(key));
+ JSONObject result = MineruJsonSupport.asObject(
+ resultJson.get(key),
+ "MinerU sync result entry: " + key
+ );
+ if (result != null) {
+ results.put(key, result);
+ }
}
}
payload.setResults(results);
@@ -239,12 +245,24 @@ public class MineruMapper {
result.setPlainText(result.getMarkdown());
ParseArtifacts artifacts = new ParseArtifacts();
- artifacts.setMiddleJson(fileResult.get("middle_json"));
- artifacts.setContentList(fileResult.get("content_list"));
- artifacts.setModelOutput(fileResult.get("model_output"));
+ artifacts.setMiddleJson(MineruJsonSupport.normalizeValue(
+ fileResult.get("middle_json"),
+ "MinerU result " + fileName + " middle_json"
+ ));
+ artifacts.setContentList(MineruJsonSupport.normalizeValue(
+ fileResult.get("content_list"),
+ "MinerU result " + fileName + " content_list"
+ ));
+ artifacts.setModelOutput(MineruJsonSupport.normalizeValueIfJsonLike(
+ fileResult.get("model_output"),
+ "MinerU result " + fileName + " model_output"
+ ));
result.setArtifacts(artifacts);
- Map imageDataUrls = toStringMap(fileResult.getJSONObject("images"));
+ Map imageDataUrls = toStringMap(MineruJsonSupport.asObject(
+ fileResult.get("images"),
+ "MinerU result " + fileName + " images"
+ ));
Map imageContents = toBinaryMap(imageDataUrls);
applyStructuredArtifacts(result, imageDataUrls, imageContents);
if (result.getMarkdown() == null && result.getArtifacts().getMiddleJson() == null && result.getArtifacts().getContentList() == null) {
@@ -266,8 +284,8 @@ public class MineruMapper {
Object contentListArtifact = firstJsonValue(bundle.entriesBySuffix, "_content_list.json");
Object modelOutputArtifact = firstJsonValue(bundle.entriesBySuffix, "_model.json");
- JSONObject middleJson = asObject(middleArtifact);
- JSONArray contentList = asArray(contentListArtifact);
+ JSONObject middleJson = asObject(middleArtifact, "MinerU ZIP middle artifact: " + fileName);
+ JSONArray contentList = asArray(contentListArtifact, "MinerU ZIP content_list artifact: " + fileName);
Object modelOutput = modelOutputArtifact;
if (contentList == null && middleArtifact instanceof JSONArray) {
@@ -283,7 +301,10 @@ public class MineruMapper {
artifacts.setContentList(contentList == null ? contentListArtifact : contentList);
artifacts.setModelOutput(modelOutput);
- JSONArray contentListV2 = asArray(firstJsonValue(bundle.entriesBySuffix, "_content_list_v2.json"));
+ JSONArray contentListV2 = asArray(
+ firstJsonValue(bundle.entriesBySuffix, "_content_list_v2.json"),
+ "MinerU ZIP content_list_v2 artifact: " + fileName
+ );
if (contentListV2 != null) {
artifacts.getExtraJsonArtifacts().put("contentListV2", contentListV2);
}
@@ -308,8 +329,8 @@ public class MineruMapper {
}
private void applyStructuredArtifacts(ParseResult result, Map imageDataUrls, Map imageContents) {
- JSONObject middleJson = asObject(result.getArtifacts().getMiddleJson());
- JSONArray contentList = asArray(result.getArtifacts().getContentList());
+ JSONObject middleJson = asObject(result.getArtifacts().getMiddleJson(), "MinerU middle_json artifact");
+ JSONArray contentList = asArray(result.getArtifacts().getContentList(), "MinerU content_list artifact");
if (middleJson != null) {
fillPages(result, middleJson);
@@ -576,34 +597,18 @@ public class MineruMapper {
if (!StringUtil.hasText(text)) {
return null;
}
- try {
- return JSON.parse(text);
- } catch (Exception exception) {
- throw new DocumentParseException("Failed to parse MinerU JSON artifact: suffix=" + suffix, exception);
- }
+ return MineruJsonSupport.parseValue(text, "MinerU ZIP artifact " + suffix);
}
- private JSONObject asObject(Object value) {
- if (value instanceof JSONObject) {
- return (JSONObject) value;
- }
- if (value == null) {
- return null;
- }
+ private JSONObject asObject(Object value, String context) {
if (value instanceof JSONArray) {
return null;
}
- return JSON.parseObject(JSON.toJSONString(value));
+ return MineruJsonSupport.asObject(value, context);
}
- private JSONArray asArray(Object value) {
- if (value instanceof JSONArray) {
- return (JSONArray) value;
- }
- if (value == null) {
- return null;
- }
- return JSON.parseArray(JSON.toJSONString(value));
+ private JSONArray asArray(Object value, String context) {
+ return MineruJsonSupport.asArray(value, context);
}
private List toStringList(JSONArray jsonArray) {
diff --git a/easy-agents-document/easy-agents-document-pdf/src/test/java/com/easyagents/document/pdf/mineru/MineruMapperTest.java b/easy-agents-document/easy-agents-document-pdf/src/test/java/com/easyagents/document/pdf/mineru/MineruMapperTest.java
index 115a803..2ce637d 100644
--- a/easy-agents-document/easy-agents-document-pdf/src/test/java/com/easyagents/document/pdf/mineru/MineruMapperTest.java
+++ b/easy-agents-document/easy-agents-document-pdf/src/test/java/com/easyagents/document/pdf/mineru/MineruMapperTest.java
@@ -49,6 +49,25 @@ public class MineruMapperTest {
Assert.assertNotNull(result.getArtifacts().getContentList());
}
+ @Test
+ public void shouldMapStringifiedSyncArtifacts() {
+ MineruMapper mapper = new MineruMapper(defaultProperties());
+ MineruResultPayload payload = mapper.toResultPayload(syncPayloadWithStringifiedArtifacts());
+
+ ParseResponse response = mapper.toParseResponse(payload);
+
+ Assert.assertEquals(1, response.getResults().size());
+ ParseResult result = response.getResults().get(0);
+ Assert.assertEquals("# title", result.getMarkdown());
+ Assert.assertFalse(result.getBlocks().isEmpty());
+ Assert.assertEquals(1, result.getTables().size());
+ Assert.assertEquals(2, result.getImages().size());
+ Assert.assertNotNull(result.getImages().get(0).getContent());
+ Assert.assertTrue(result.getArtifacts().getMiddleJson() instanceof JSONObject);
+ Assert.assertTrue(result.getArtifacts().getContentList() instanceof JSONArray);
+ Assert.assertEquals("plain-model-output", result.getArtifacts().getModelOutput());
+ }
+
@Test
public void shouldMapZipResponse() throws IOException {
MineruMapper mapper = new MineruMapper(defaultProperties());
@@ -292,6 +311,28 @@ public class MineruMapperTest {
return payload;
}
+ private JSONObject syncPayloadWithStringifiedArtifacts() {
+ JSONObject payload = new JSONObject();
+ payload.put("backend", "vlm-http-client");
+ payload.put("version", "3.0.9");
+
+ JSONObject result = new JSONObject();
+ result.put("md_content", "# title");
+ result.put("middle_json", middleJson().toJSONString());
+ result.put("content_list", contentList().toJSONString());
+ result.put("model_output", "plain-model-output");
+
+ JSONObject images = new JSONObject();
+ images.put("figure.png", "data:image/png;base64,ZmFrZQ==");
+ images.put("table.png", "data:image/png;base64,ZmFrZQ==");
+ result.put("images", images.toJSONString());
+
+ JSONObject results = new JSONObject();
+ results.put("demo", result.toJSONString());
+ payload.put("results", results);
+ return payload;
+ }
+
private JSONObject middleBlock(String type, String imagePath) {
JSONObject block = new JSONObject();
block.put("type", type);
diff --git a/easy-agents-document/easy-agents-document-pptx/src/test/java/com/easyagents/document/pptx/mineru/MineruPptxDocumentParseServiceTest.java b/easy-agents-document/easy-agents-document-pptx/src/test/java/com/easyagents/document/pptx/mineru/MineruPptxDocumentParseServiceTest.java
index f71607e..9bcad2c 100644
--- a/easy-agents-document/easy-agents-document-pptx/src/test/java/com/easyagents/document/pptx/mineru/MineruPptxDocumentParseServiceTest.java
+++ b/easy-agents-document/easy-agents-document-pptx/src/test/java/com/easyagents/document/pptx/mineru/MineruPptxDocumentParseServiceTest.java
@@ -81,6 +81,29 @@ public class MineruPptxDocumentParseServiceTest {
Assert.assertEquals(1, taskInfo.getResult().getResults().size());
}
+ @Test
+ public void shouldSupportStringifiedMineruSlideArtifacts() throws IOException {
+ RecordingClient client = new RecordingClient(defaultProperties(), true);
+ MineruMapper mapper = new MineruMapper(defaultProperties());
+ MineruPptxDocumentParseService service = new MineruPptxDocumentParseService(
+ defaultProperties(),
+ client,
+ mapper,
+ new DocumentAsyncTaskManager(new InMemoryDocumentAsyncTaskRepository(), directExecutor())
+ );
+
+ PptxParseRequest request = new PptxParseRequest();
+ request.addFile(ParseFile.of("demo.pptx", buildPptxBytes()));
+
+ ParseResponse response = service.parse(request);
+
+ Assert.assertEquals(1, response.getResults().size());
+ ParseResult result = response.getResults().get(0);
+ Assert.assertTrue(result.getMarkdown().contains("slide-ocr-1"));
+ Assert.assertFalse(result.getBlocks().isEmpty());
+ Assert.assertEquals(2, result.getImages().size());
+ }
+
private byte[] buildPptxBytes() throws IOException {
XMLSlideShow slideShow = new XMLSlideShow();
slideShow.setPageSize(new java.awt.Dimension(640, 360));
@@ -117,9 +140,15 @@ public class MineruPptxDocumentParseServiceTest {
private static class RecordingClient extends MineruClient {
private int parseCount;
+ private final boolean stringifyArtifacts;
private RecordingClient(MineruProperties properties) {
+ this(properties, false);
+ }
+
+ private RecordingClient(MineruProperties properties, boolean stringifyArtifacts) {
super(properties, new MineruMapper(properties));
+ this.stringifyArtifacts = stringifyArtifacts;
}
@Override
@@ -134,10 +163,10 @@ public class MineruPptxDocumentParseServiceTest {
payload.put("version", "3.0.9");
JSONObject result = new JSONObject();
result.put("md_content", "slide-ocr-" + index);
- result.put("middle_json", middleJson());
- result.put("content_list", contentList(index));
+ result.put("middle_json", stringifyArtifacts ? middleJson().toJSONString() : middleJson());
+ result.put("content_list", stringifyArtifacts ? contentList(index).toJSONString() : contentList(index));
JSONObject results = new JSONObject();
- results.put("slide-" + index, result);
+ results.put("slide-" + index, stringifyArtifacts ? result.toJSONString() : result);
payload.put("results", results);
return payload;
}
diff --git a/easy-agents-document/easy-agents-document-xlsx/src/main/java/com/easyagents/document/xlsx/mineru/MineruXlsxDocumentParseService.java b/easy-agents-document/easy-agents-document-xlsx/src/main/java/com/easyagents/document/xlsx/mineru/MineruXlsxDocumentParseService.java
index 8d68499..5062f58 100644
--- a/easy-agents-document/easy-agents-document-xlsx/src/main/java/com/easyagents/document/xlsx/mineru/MineruXlsxDocumentParseService.java
+++ b/easy-agents-document/easy-agents-document-xlsx/src/main/java/com/easyagents/document/xlsx/mineru/MineruXlsxDocumentParseService.java
@@ -253,10 +253,11 @@ public class MineruXlsxDocumentParseService extends AbstractAsyncDocumentParseSe
appendSheetHeader(extraction.markdown, sheet.getSheetName());
if (maxRow < 0 || maxCol <= 0) {
- extraction.markdown.append("_empty sheet_");
- if (Boolean.TRUE.equals(request.getIncludeImageAppendix()) && !imageArtifacts.isEmpty()) {
- appendImageAppendix(extraction.markdown, sheet.getSheetName(), imageArtifacts);
+ if (!imageArtifacts.isEmpty()) {
+ appendImageOnlySheet(extraction.markdown, sheet.getSheetName(), request, imageArtifacts);
+ return extraction;
}
+ extraction.markdown.append("_empty sheet_");
return extraction;
}
@@ -488,6 +489,29 @@ public class MineruXlsxDocumentParseService extends AbstractAsyncDocumentParseSe
}
}
+ private void appendImageOnlySheet(StringBuilder markdownBuilder,
+ String sheetName,
+ XlsxParseRequest request,
+ List imageArtifacts) {
+ markdownBuilder.append("## ").append(sheetName).append(" 图片内容\n\n");
+ for (XlsxCellImageArtifact imageArtifact : imageArtifacts) {
+ markdownBuilder.append("[IMG:")
+ .append(imageArtifact.getReferenceKey())
+ .append("]\n\n");
+ }
+ if (Boolean.TRUE.equals(request.getIncludeImageAppendix())) {
+ appendImageAppendix(markdownBuilder, sheetName, imageArtifacts);
+ return;
+ }
+ for (XlsxCellImageArtifact imageArtifact : imageArtifacts) {
+ markdownBuilder.append("
+ .append(imageArtifact.getSourcePath())
+ .append(")\n\n");
+ }
+ }
+
private List extractMergedRanges(XSSFSheet sheet) {
List mergedRanges = new ArrayList();
for (int index = 0; index < sheet.getNumMergedRegions(); index++) {
diff --git a/easy-agents-document/easy-agents-document-xlsx/src/test/java/com/easyagents/document/xlsx/mineru/MineruXlsxDocumentParseServiceTest.java b/easy-agents-document/easy-agents-document-xlsx/src/test/java/com/easyagents/document/xlsx/mineru/MineruXlsxDocumentParseServiceTest.java
index 5bc4c85..6d3a9df 100644
--- a/easy-agents-document/easy-agents-document-xlsx/src/test/java/com/easyagents/document/xlsx/mineru/MineruXlsxDocumentParseServiceTest.java
+++ b/easy-agents-document/easy-agents-document-xlsx/src/test/java/com/easyagents/document/xlsx/mineru/MineruXlsxDocumentParseServiceTest.java
@@ -138,7 +138,9 @@ public class MineruXlsxDocumentParseServiceTest {
XlsxParseArtifact artifact = extractXlsxArtifact(result);
Assert.assertTrue(result.getMarkdown().contains("# Sheet1"));
- Assert.assertTrue(result.getMarkdown().contains("_empty sheet_"));
+ Assert.assertFalse(result.getMarkdown().contains("_empty sheet_"));
+ Assert.assertTrue(result.getMarkdown().contains("## Sheet1 图片内容"));
+ Assert.assertTrue(result.getMarkdown().contains("[IMG:sheet1-r2c2-001]"));
Assert.assertTrue(result.getMarkdown().contains("## Sheet1 图片说明"));
Assert.assertTrue(result.getMarkdown().contains(""));
Assert.assertTrue(result.getMarkdown().contains("- 占位符:[IMG:sheet1-r2c2-001]"));
@@ -147,6 +149,30 @@ public class MineruXlsxDocumentParseServiceTest {
Assert.assertEquals("sheet1-r2c2-001", artifact.getSheetImages().get(0).getReferenceKeys().get(0));
}
+ @Test
+ public void shouldKeepMarkdownImageReferenceWhenImageAppendixDisabled() throws Exception {
+ RecordingClient client = new RecordingClient(defaultProperties());
+ MineruMapper mapper = new MineruMapper(defaultProperties());
+ MineruXlsxDocumentParseService service = new MineruXlsxDocumentParseService(
+ defaultProperties(),
+ client,
+ mapper,
+ new DocumentAsyncTaskManager(new InMemoryDocumentAsyncTaskRepository(), directExecutor())
+ );
+
+ XlsxParseRequest request = new XlsxParseRequest();
+ request.setIncludeImageAppendix(Boolean.FALSE);
+ request.addFile(ParseFile.of("image-only.xlsx", buildWorkbookBytesWithImageOnlySheet()));
+
+ ParseResponse response = service.parse(request);
+ ParseResult result = response.getResults().get(0);
+
+ Assert.assertTrue(result.getMarkdown().contains("## Sheet1 图片内容"));
+ Assert.assertTrue(result.getMarkdown().contains("[IMG:sheet1-r2c2-001]"));
+ Assert.assertTrue(result.getMarkdown().contains(""));
+ Assert.assertFalse(result.getMarkdown().contains("## Sheet1 图片说明"));
+ }
+
@Test
public void shouldTrackAsyncLifecycleAndExposeResult() throws Exception {
RecordingClient client = new RecordingClient(defaultProperties());
diff --git a/easy-agents-spring-boot-starter/src/main/java/com/easyagents/spring/boot/document/mineru/CommonMineruDocumentProperties.java b/easy-agents-spring-boot-starter/src/main/java/com/easyagents/spring/boot/document/mineru/CommonMineruDocumentProperties.java
index efd79a5..1e26e56 100644
--- a/easy-agents-spring-boot-starter/src/main/java/com/easyagents/spring/boot/document/mineru/CommonMineruDocumentProperties.java
+++ b/easy-agents-spring-boot-starter/src/main/java/com/easyagents/spring/boot/document/mineru/CommonMineruDocumentProperties.java
@@ -12,7 +12,7 @@ import java.util.List;
* @author Codex
* @since 2026-04-16
*/
-@ConfigurationProperties(prefix = "easy-agents.document.mineru")
+@ConfigurationProperties(prefix = "easy-agents.document.ocr.mineru")
public class CommonMineruDocumentProperties {
private String baseUrl;
diff --git a/easy-agents-spring-boot-starter/src/main/java/com/easyagents/spring/boot/document/pdf/mineru/MineruDocumentProperties.java b/easy-agents-spring-boot-starter/src/main/java/com/easyagents/spring/boot/document/pdf/mineru/MineruDocumentProperties.java
deleted file mode 100644
index ba30f29..0000000
--- a/easy-agents-spring-boot-starter/src/main/java/com/easyagents/spring/boot/document/pdf/mineru/MineruDocumentProperties.java
+++ /dev/null
@@ -1,119 +0,0 @@
-package com.easyagents.spring.boot.document.pdf.mineru;
-
-import org.springframework.boot.context.properties.ConfigurationProperties;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-
-/**
- * MinerU Spring Boot 配置。
- *
- * @author Codex
- * @since 2026-04-14
- */
-@ConfigurationProperties(prefix = "easy-agents.document.pdf.mineru")
-public class MineruDocumentProperties {
-
- private String baseUrl;
- private Integer connectTimeoutMs = 3000;
- private Integer readTimeoutMs = 600000;
- private Integer writeTimeoutMs = 600000;
- private Integer pollIntervalMs = 1000;
- private Integer resultTimeoutMs = 1800000;
- private String defaultBackend = "vlm-http-client";
- private String defaultParseMethod = "auto";
- private List defaultLangList = new ArrayList(Arrays.asList("ch"));
- private Boolean defaultFormulaEnable = true;
- private Boolean defaultTableEnable = true;
-
- public String getBaseUrl() {
- return baseUrl;
- }
-
- public void setBaseUrl(String baseUrl) {
- this.baseUrl = baseUrl;
- }
-
- public Integer getConnectTimeoutMs() {
- return connectTimeoutMs;
- }
-
- public void setConnectTimeoutMs(Integer connectTimeoutMs) {
- this.connectTimeoutMs = connectTimeoutMs;
- }
-
- public Integer getReadTimeoutMs() {
- return readTimeoutMs;
- }
-
- public void setReadTimeoutMs(Integer readTimeoutMs) {
- this.readTimeoutMs = readTimeoutMs;
- }
-
- public Integer getWriteTimeoutMs() {
- return writeTimeoutMs;
- }
-
- public void setWriteTimeoutMs(Integer writeTimeoutMs) {
- this.writeTimeoutMs = writeTimeoutMs;
- }
-
- public Integer getPollIntervalMs() {
- return pollIntervalMs;
- }
-
- public void setPollIntervalMs(Integer pollIntervalMs) {
- this.pollIntervalMs = pollIntervalMs;
- }
-
- public Integer getResultTimeoutMs() {
- return resultTimeoutMs;
- }
-
- public void setResultTimeoutMs(Integer resultTimeoutMs) {
- this.resultTimeoutMs = resultTimeoutMs;
- }
-
- public String getDefaultBackend() {
- return defaultBackend;
- }
-
- public void setDefaultBackend(String defaultBackend) {
- this.defaultBackend = defaultBackend;
- }
-
- public String getDefaultParseMethod() {
- return defaultParseMethod;
- }
-
- public void setDefaultParseMethod(String defaultParseMethod) {
- this.defaultParseMethod = defaultParseMethod;
- }
-
- public List getDefaultLangList() {
- return defaultLangList;
- }
-
- public void setDefaultLangList(List defaultLangList) {
- this.defaultLangList = defaultLangList == null
- ? new ArrayList(Arrays.asList("ch"))
- : defaultLangList;
- }
-
- public Boolean getDefaultFormulaEnable() {
- return defaultFormulaEnable;
- }
-
- public void setDefaultFormulaEnable(Boolean defaultFormulaEnable) {
- this.defaultFormulaEnable = defaultFormulaEnable;
- }
-
- public Boolean getDefaultTableEnable() {
- return defaultTableEnable;
- }
-
- public void setDefaultTableEnable(Boolean defaultTableEnable) {
- this.defaultTableEnable = defaultTableEnable;
- }
-}
diff --git a/easy-agents-spring-boot-starter/src/main/java/com/easyagents/spring/boot/document/pdf/mineru/MineruPdfAutoConfiguration.java b/easy-agents-spring-boot-starter/src/main/java/com/easyagents/spring/boot/document/pdf/mineru/MineruPdfAutoConfiguration.java
index 58ed9ec..9cdcc74 100644
--- a/easy-agents-spring-boot-starter/src/main/java/com/easyagents/spring/boot/document/pdf/mineru/MineruPdfAutoConfiguration.java
+++ b/easy-agents-spring-boot-starter/src/main/java/com/easyagents/spring/boot/document/pdf/mineru/MineruPdfAutoConfiguration.java
@@ -10,8 +10,11 @@ import org.springframework.boot.autoconfigure.condition.ConditionalOnClass;
import org.springframework.boot.autoconfigure.condition.ConditionalOnMissingBean;
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
import org.springframework.boot.context.properties.EnableConfigurationProperties;
+import org.springframework.beans.factory.config.BeanFactoryPostProcessor;
+import org.springframework.beans.factory.support.BeanDefinitionRegistry;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
+import org.springframework.context.annotation.Primary;
/**
* MinerU PDF 文档解析自动装配。
@@ -21,50 +24,94 @@ import org.springframework.context.annotation.Configuration;
*/
@Configuration(proxyBeanMethods = false)
@ConditionalOnClass(MineruPdfDocumentParseService.class)
-@ConditionalOnProperty(prefix = "easy-agents.document.pdf", name = "provider", havingValue = "mineru")
-@EnableConfigurationProperties({MineruDocumentProperties.class, CommonMineruDocumentProperties.class})
+@ConditionalOnProperty(prefix = "easy-agents.document.ocr", name = "provider", havingValue = "mineru")
+@EnableConfigurationProperties(CommonMineruDocumentProperties.class)
public class MineruPdfAutoConfiguration {
+ public static final String DEFAULT_DOCUMENT_PARSE_SERVICE_BEAN_NAME = "documentParseService";
+
/**
* 注册统一 PDF 解析服务。
*
- * @param properties Spring Boot 配置
+ * @param commonProperties Spring Boot 配置
* @return PDF 解析服务
*/
@Bean
+ @Primary
@ConditionalOnMissingBean(PdfDocumentParseService.class)
- public PdfDocumentParseService pdfDocumentParseService(MineruDocumentProperties properties,
- CommonMineruDocumentProperties commonProperties) {
- return new MineruPdfDocumentParseService(toMineruProperties(properties, commonProperties));
+ public PdfDocumentParseService pdfDocumentParseService(CommonMineruDocumentProperties commonProperties) {
+ return new MineruPdfDocumentParseService(toMineruProperties(commonProperties));
}
/**
- * 将 PDF 服务以统一文档解析服务类型暴露,便于调用方直接按抽象注入。
+ * 将默认文档解析服务名注册为 PDF 服务别名,避免重复创建同类型 Bean。
+ * 这里显式走 alias,而不是第二个 {@link DocumentParseService} Bean,
+ * 这样既能保持默认契约,也不会破坏按 {@link PdfDocumentParseService} 类型的唯一注入。
*
- * @param pdfDocumentParseService PDF 解析服务
- * @return 统一文档解析服务
+ * @return BeanFactory 后置处理器
*/
@Bean
- @ConditionalOnMissingBean(DocumentParseService.class)
- public DocumentParseService documentParseService(PdfDocumentParseService pdfDocumentParseService) {
- return pdfDocumentParseService;
+ public static BeanFactoryPostProcessor defaultDocumentParseServiceAliasPostProcessor() {
+ return beanFactory -> {
+ if (!(beanFactory instanceof BeanDefinitionRegistry)) {
+ return;
+ }
+ BeanDefinitionRegistry registry = (BeanDefinitionRegistry) beanFactory;
+ String aliasTarget = resolveAliasTarget(beanFactory, registry);
+ if (!StringUtil.hasText(aliasTarget)) {
+ return;
+ }
+ if (registry.containsBeanDefinition(DEFAULT_DOCUMENT_PARSE_SERVICE_BEAN_NAME)
+ || registry.isAlias(DEFAULT_DOCUMENT_PARSE_SERVICE_BEAN_NAME)) {
+ return;
+ }
+ if (registry.containsBeanDefinition(aliasTarget)
+ && !registry.getBeanDefinition(aliasTarget).isPrimary()) {
+ registry.getBeanDefinition(aliasTarget).setPrimary(true);
+ }
+ registry.registerAlias(aliasTarget, DEFAULT_DOCUMENT_PARSE_SERVICE_BEAN_NAME);
+ };
}
- private MineruProperties toMineruProperties(MineruDocumentProperties properties,
- CommonMineruDocumentProperties commonProperties) {
+ private static String resolveAliasTarget(org.springframework.beans.factory.config.ConfigurableListableBeanFactory beanFactory,
+ BeanDefinitionRegistry registry) {
+ String[] candidateNames = beanFactory.getBeanNamesForType(PdfDocumentParseService.class, true, false);
+ if (candidateNames == null || candidateNames.length == 0) {
+ return null;
+ }
+ if (candidateNames.length == 1) {
+ return candidateNames[0];
+ }
+ String primaryBeanName = null;
+ for (String candidateName : candidateNames) {
+ if (!registry.containsBeanDefinition(candidateName)) {
+ continue;
+ }
+ if (!registry.getBeanDefinition(candidateName).isPrimary()) {
+ continue;
+ }
+ if (primaryBeanName != null) {
+ return null;
+ }
+ primaryBeanName = candidateName;
+ }
+ return primaryBeanName;
+ }
+
+ private MineruProperties toMineruProperties(CommonMineruDocumentProperties commonProperties) {
MineruProperties mineruProperties = new MineruProperties();
boolean useCommon = commonProperties != null && StringUtil.hasText(commonProperties.getBaseUrl());
- mineruProperties.setBaseUrl(useCommon ? commonProperties.getBaseUrl() : properties.getBaseUrl());
- mineruProperties.setConnectTimeoutMs(useCommon ? commonProperties.getConnectTimeoutMs() : properties.getConnectTimeoutMs());
- mineruProperties.setReadTimeoutMs(useCommon ? commonProperties.getReadTimeoutMs() : properties.getReadTimeoutMs());
- mineruProperties.setWriteTimeoutMs(useCommon ? commonProperties.getWriteTimeoutMs() : properties.getWriteTimeoutMs());
- mineruProperties.setPollIntervalMs(useCommon ? commonProperties.getPollIntervalMs() : properties.getPollIntervalMs());
- mineruProperties.setResultTimeoutMs(useCommon ? commonProperties.getResultTimeoutMs() : properties.getResultTimeoutMs());
- mineruProperties.setDefaultBackend(useCommon ? commonProperties.getDefaultBackend() : properties.getDefaultBackend());
- mineruProperties.setDefaultParseMethod(useCommon ? commonProperties.getDefaultParseMethod() : properties.getDefaultParseMethod());
- mineruProperties.setDefaultLangList(useCommon ? commonProperties.getDefaultLangList() : properties.getDefaultLangList());
- mineruProperties.setDefaultFormulaEnable(useCommon ? commonProperties.getDefaultFormulaEnable() : properties.getDefaultFormulaEnable());
- mineruProperties.setDefaultTableEnable(useCommon ? commonProperties.getDefaultTableEnable() : properties.getDefaultTableEnable());
+ mineruProperties.setBaseUrl(useCommon ? commonProperties.getBaseUrl() : null);
+ mineruProperties.setConnectTimeoutMs(useCommon ? commonProperties.getConnectTimeoutMs() : null);
+ mineruProperties.setReadTimeoutMs(useCommon ? commonProperties.getReadTimeoutMs() : null);
+ mineruProperties.setWriteTimeoutMs(useCommon ? commonProperties.getWriteTimeoutMs() : null);
+ mineruProperties.setPollIntervalMs(useCommon ? commonProperties.getPollIntervalMs() : null);
+ mineruProperties.setResultTimeoutMs(useCommon ? commonProperties.getResultTimeoutMs() : null);
+ mineruProperties.setDefaultBackend(useCommon ? commonProperties.getDefaultBackend() : null);
+ mineruProperties.setDefaultParseMethod(useCommon ? commonProperties.getDefaultParseMethod() : null);
+ mineruProperties.setDefaultLangList(useCommon ? commonProperties.getDefaultLangList() : null);
+ mineruProperties.setDefaultFormulaEnable(useCommon ? commonProperties.getDefaultFormulaEnable() : null);
+ mineruProperties.setDefaultTableEnable(useCommon ? commonProperties.getDefaultTableEnable() : null);
return mineruProperties;
}
}
diff --git a/easy-agents-spring-boot-starter/src/main/java/com/easyagents/spring/boot/document/pptx/MineruPptxAutoConfiguration.java b/easy-agents-spring-boot-starter/src/main/java/com/easyagents/spring/boot/document/pptx/MineruPptxAutoConfiguration.java
index 3e1078a..c367776 100644
--- a/easy-agents-spring-boot-starter/src/main/java/com/easyagents/spring/boot/document/pptx/MineruPptxAutoConfiguration.java
+++ b/easy-agents-spring-boot-starter/src/main/java/com/easyagents/spring/boot/document/pptx/MineruPptxAutoConfiguration.java
@@ -6,6 +6,7 @@ import com.easyagents.document.core.mineru.MineruProperties;
import com.easyagents.document.pptx.PptxDocumentParseService;
import com.easyagents.document.pptx.mineru.MineruPptxDocumentParseService;
import com.easyagents.spring.boot.document.mineru.CommonMineruDocumentProperties;
+import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.boot.autoconfigure.condition.ConditionalOnClass;
import org.springframework.boot.autoconfigure.condition.ConditionalOnMissingBean;
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
@@ -24,21 +25,24 @@ import java.util.concurrent.Executors;
*/
@Configuration(proxyBeanMethods = false)
@ConditionalOnClass(MineruPptxDocumentParseService.class)
-@ConditionalOnProperty(prefix = "easy-agents.document.pptx", name = "enabled", havingValue = "true")
-@EnableConfigurationProperties({CommonMineruDocumentProperties.class, PptxDocumentProperties.class})
+@ConditionalOnProperty(prefix = "easy-agents.document.ocr", name = "provider", havingValue = "mineru")
+@EnableConfigurationProperties(CommonMineruDocumentProperties.class)
public class MineruPptxAutoConfiguration {
+ public static final String PPTX_DOCUMENT_ASYNC_TASK_MANAGER_BEAN_NAME = "pptxDocumentAsyncTaskManager";
+ private static final int DEFAULT_ASYNC_THREADS = 2;
+
@Bean
- @ConditionalOnMissingBean(name = "pptxDocumentAsyncTaskManager")
- public DocumentAsyncTaskManager pptxDocumentAsyncTaskManager(PptxDocumentProperties properties) {
- int threadCount = properties.getAsyncThreads() == null || properties.getAsyncThreads() <= 0 ? 2 : properties.getAsyncThreads();
- ExecutorService executorService = Executors.newFixedThreadPool(threadCount);
+ @ConditionalOnMissingBean(name = PPTX_DOCUMENT_ASYNC_TASK_MANAGER_BEAN_NAME)
+ public DocumentAsyncTaskManager pptxDocumentAsyncTaskManager() {
+ ExecutorService executorService = Executors.newFixedThreadPool(DEFAULT_ASYNC_THREADS);
return new DocumentAsyncTaskManager(new InMemoryDocumentAsyncTaskRepository(), executorService);
}
@Bean
@ConditionalOnMissingBean(PptxDocumentParseService.class)
public PptxDocumentParseService pptxDocumentParseService(CommonMineruDocumentProperties commonProperties,
+ @Qualifier(PPTX_DOCUMENT_ASYNC_TASK_MANAGER_BEAN_NAME)
DocumentAsyncTaskManager pptxDocumentAsyncTaskManager) {
return new MineruPptxDocumentParseService(toMineruProperties(commonProperties), pptxDocumentAsyncTaskManager);
}
diff --git a/easy-agents-spring-boot-starter/src/main/java/com/easyagents/spring/boot/document/pptx/PptxDocumentProperties.java b/easy-agents-spring-boot-starter/src/main/java/com/easyagents/spring/boot/document/pptx/PptxDocumentProperties.java
deleted file mode 100644
index d170f82..0000000
--- a/easy-agents-spring-boot-starter/src/main/java/com/easyagents/spring/boot/document/pptx/PptxDocumentProperties.java
+++ /dev/null
@@ -1,32 +0,0 @@
-package com.easyagents.spring.boot.document.pptx;
-
-import org.springframework.boot.context.properties.ConfigurationProperties;
-
-/**
- * PPTX 文档配置。
- *
- * @author Codex
- * @since 2026-04-16
- */
-@ConfigurationProperties(prefix = "easy-agents.document.pptx")
-public class PptxDocumentProperties {
-
- private Boolean enabled = false;
- private Integer asyncThreads = 2;
-
- public Boolean getEnabled() {
- return enabled;
- }
-
- public void setEnabled(Boolean enabled) {
- this.enabled = enabled;
- }
-
- public Integer getAsyncThreads() {
- return asyncThreads;
- }
-
- public void setAsyncThreads(Integer asyncThreads) {
- this.asyncThreads = asyncThreads;
- }
-}
diff --git a/easy-agents-spring-boot-starter/src/main/java/com/easyagents/spring/boot/document/xlsx/MineruXlsxAutoConfiguration.java b/easy-agents-spring-boot-starter/src/main/java/com/easyagents/spring/boot/document/xlsx/MineruXlsxAutoConfiguration.java
index 519b5ef..848e7a2 100644
--- a/easy-agents-spring-boot-starter/src/main/java/com/easyagents/spring/boot/document/xlsx/MineruXlsxAutoConfiguration.java
+++ b/easy-agents-spring-boot-starter/src/main/java/com/easyagents/spring/boot/document/xlsx/MineruXlsxAutoConfiguration.java
@@ -6,6 +6,7 @@ import com.easyagents.document.core.mineru.MineruProperties;
import com.easyagents.document.xlsx.XlsxDocumentParseService;
import com.easyagents.document.xlsx.mineru.MineruXlsxDocumentParseService;
import com.easyagents.spring.boot.document.mineru.CommonMineruDocumentProperties;
+import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.boot.autoconfigure.condition.ConditionalOnClass;
import org.springframework.boot.autoconfigure.condition.ConditionalOnMissingBean;
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
@@ -24,21 +25,24 @@ import java.util.concurrent.Executors;
*/
@Configuration(proxyBeanMethods = false)
@ConditionalOnClass(MineruXlsxDocumentParseService.class)
-@ConditionalOnProperty(prefix = "easy-agents.document.xlsx", name = "enabled", havingValue = "true")
-@EnableConfigurationProperties({CommonMineruDocumentProperties.class, XlsxDocumentProperties.class})
+@ConditionalOnProperty(prefix = "easy-agents.document.ocr", name = "provider", havingValue = "mineru")
+@EnableConfigurationProperties(CommonMineruDocumentProperties.class)
public class MineruXlsxAutoConfiguration {
+ public static final String XLSX_DOCUMENT_ASYNC_TASK_MANAGER_BEAN_NAME = "xlsxDocumentAsyncTaskManager";
+ private static final int DEFAULT_ASYNC_THREADS = 2;
+
@Bean
- @ConditionalOnMissingBean(name = "xlsxDocumentAsyncTaskManager")
- public DocumentAsyncTaskManager xlsxDocumentAsyncTaskManager(XlsxDocumentProperties properties) {
- int threadCount = properties.getAsyncThreads() == null || properties.getAsyncThreads() <= 0 ? 2 : properties.getAsyncThreads();
- ExecutorService executorService = Executors.newFixedThreadPool(threadCount);
+ @ConditionalOnMissingBean(name = XLSX_DOCUMENT_ASYNC_TASK_MANAGER_BEAN_NAME)
+ public DocumentAsyncTaskManager xlsxDocumentAsyncTaskManager() {
+ ExecutorService executorService = Executors.newFixedThreadPool(DEFAULT_ASYNC_THREADS);
return new DocumentAsyncTaskManager(new InMemoryDocumentAsyncTaskRepository(), executorService);
}
@Bean
@ConditionalOnMissingBean(XlsxDocumentParseService.class)
public XlsxDocumentParseService xlsxDocumentParseService(CommonMineruDocumentProperties commonProperties,
+ @Qualifier(XLSX_DOCUMENT_ASYNC_TASK_MANAGER_BEAN_NAME)
DocumentAsyncTaskManager xlsxDocumentAsyncTaskManager) {
return new MineruXlsxDocumentParseService(toMineruProperties(commonProperties), xlsxDocumentAsyncTaskManager);
}
diff --git a/easy-agents-spring-boot-starter/src/main/java/com/easyagents/spring/boot/document/xlsx/XlsxDocumentProperties.java b/easy-agents-spring-boot-starter/src/main/java/com/easyagents/spring/boot/document/xlsx/XlsxDocumentProperties.java
deleted file mode 100644
index 7c6818b..0000000
--- a/easy-agents-spring-boot-starter/src/main/java/com/easyagents/spring/boot/document/xlsx/XlsxDocumentProperties.java
+++ /dev/null
@@ -1,32 +0,0 @@
-package com.easyagents.spring.boot.document.xlsx;
-
-import org.springframework.boot.context.properties.ConfigurationProperties;
-
-/**
- * XLSX 文档配置。
- *
- * @author Codex
- * @since 2026-04-16
- */
-@ConfigurationProperties(prefix = "easy-agents.document.xlsx")
-public class XlsxDocumentProperties {
-
- private Boolean enabled = false;
- private Integer asyncThreads = 2;
-
- public Boolean getEnabled() {
- return enabled;
- }
-
- public void setEnabled(Boolean enabled) {
- this.enabled = enabled;
- }
-
- public Integer getAsyncThreads() {
- return asyncThreads;
- }
-
- public void setAsyncThreads(Integer asyncThreads) {
- this.asyncThreads = asyncThreads;
- }
-}
diff --git a/easy-agents-spring-boot-starter/src/test/java/com/easyagents/spring/boot/autoconfigure/StarterConditionalAutoConfigurationTest.java b/easy-agents-spring-boot-starter/src/test/java/com/easyagents/spring/boot/autoconfigure/StarterConditionalAutoConfigurationTest.java
index 8eb5092..1874118 100644
--- a/easy-agents-spring-boot-starter/src/test/java/com/easyagents/spring/boot/autoconfigure/StarterConditionalAutoConfigurationTest.java
+++ b/easy-agents-spring-boot-starter/src/test/java/com/easyagents/spring/boot/autoconfigure/StarterConditionalAutoConfigurationTest.java
@@ -1,6 +1,10 @@
package com.easyagents.spring.boot.autoconfigure;
import com.easyagents.document.core.DocumentParseService;
+import com.easyagents.document.core.entity.ParseRequest;
+import com.easyagents.document.core.entity.ParseResponse;
+import com.easyagents.document.core.entity.ParseTaskInfo;
+import com.easyagents.document.core.entity.ParseTaskStatus;
import com.easyagents.document.pdf.PdfDocumentParseService;
import com.easyagents.document.pptx.PptxDocumentParseService;
import com.easyagents.document.xlsx.XlsxDocumentParseService;
@@ -13,6 +17,8 @@ import com.easyagents.spring.boot.rag.ingestion.RagIngestionAutoConfiguration;
import com.easyagents.spring.boot.store.opensearch.OpenSearchAutoConfiguration;
import org.junit.Assert;
import org.junit.Test;
+import org.springframework.context.annotation.Bean;
+import org.springframework.context.annotation.Configuration;
import org.springframework.boot.test.context.runner.ApplicationContextRunner;
public class StarterConditionalAutoConfigurationTest {
@@ -49,27 +55,105 @@ public class StarterConditionalAutoConfigurationTest {
public void shouldCreateMineruDocumentBeansWhenConfigured() {
contextRunner
.withPropertyValues(
- "easy-agents.document.pdf.provider=mineru",
- "easy-agents.document.pdf.mineru.base-url=https://hub.wust.edu.cn/modelServer/mineru-api"
+ "easy-agents.document.ocr.provider=mineru",
+ "easy-agents.document.ocr.mineru.base-url=https://hub.wust.edu.cn/modelServer/mineru-api"
)
.run(context -> {
Assert.assertNotNull(context.getBean(PdfDocumentParseService.class));
+ Assert.assertNotNull(context.getBean(PptxDocumentParseService.class));
+ Assert.assertNotNull(context.getBean(XlsxDocumentParseService.class));
Assert.assertNotNull(context.getBean(DocumentParseService.class));
});
}
@Test
- public void shouldCreatePptxAndXlsxBeansWhenEnabled() {
+ public void shouldCreatePptxAndXlsxBeansWhenMineruOcrConfigured() {
contextRunner
.withPropertyValues(
- "easy-agents.document.mineru.base-url=https://hub.wust.edu.cn/modelServer/mineru-api",
- "easy-agents.document.pptx.enabled=true",
- "easy-agents.document.xlsx.enabled=true"
+ "easy-agents.document.ocr.provider=mineru",
+ "easy-agents.document.ocr.mineru.base-url=https://hub.wust.edu.cn/modelServer/mineru-api"
)
.run(context -> {
Assert.assertNotNull(context.getBean(PptxDocumentParseService.class));
Assert.assertNotNull(context.getBean(XlsxDocumentParseService.class));
- Assert.assertFalse(context.containsBean("documentParseService"));
+ Assert.assertNotNull(context.getBean(DocumentParseService.class));
});
}
+
+ @Test
+ public void shouldKeepPdfAsDefaultDocumentParseServiceWhenMineruOcrConfigured() {
+ contextRunner
+ .withPropertyValues(
+ "easy-agents.document.ocr.provider=mineru",
+ "easy-agents.document.ocr.mineru.base-url=https://hub.wust.edu.cn/modelServer/mineru-api"
+ )
+ .run(context -> {
+ PdfDocumentParseService pdfService = context.getBean(PdfDocumentParseService.class);
+ Assert.assertNotNull(pdfService);
+ Assert.assertNotNull(context.getBean(PptxDocumentParseService.class));
+ Assert.assertNotNull(context.getBean(XlsxDocumentParseService.class));
+ Assert.assertSame(pdfService, context.getBean("documentParseService"));
+ Assert.assertSame(pdfService, context.getBean(DocumentParseService.class));
+ });
+ }
+
+ @Test
+ public void shouldAliasCustomNamedPdfServiceAsDefaultDocumentParseService() {
+ new ApplicationContextRunner()
+ .withUserConfiguration(CustomPdfParseServiceConfiguration.class)
+ .withUserConfiguration(
+ RagIngestionAutoConfiguration.class,
+ OllamaAutoConfiguration.class,
+ OpenSearchAutoConfiguration.class,
+ MineruPdfAutoConfiguration.class,
+ MineruPptxAutoConfiguration.class,
+ MineruXlsxAutoConfiguration.class
+ )
+ .withPropertyValues(
+ "easy-agents.document.ocr.provider=mineru",
+ "easy-agents.document.ocr.mineru.base-url=https://hub.wust.edu.cn/modelServer/mineru-api"
+ )
+ .run(context -> {
+ PdfDocumentParseService pdfService = context.getBean(PdfDocumentParseService.class);
+ Assert.assertSame(pdfService, context.getBean("documentParseService"));
+ Assert.assertSame(pdfService, context.getBean(DocumentParseService.class));
+ });
+ }
+
+ @Configuration(proxyBeanMethods = false)
+ static class CustomPdfParseServiceConfiguration {
+
+ @Bean("customPdfService")
+ PdfDocumentParseService customPdfService() {
+ return new NoopPdfDocumentParseService();
+ }
+ }
+
+ static class NoopPdfDocumentParseService implements PdfDocumentParseService {
+
+ @Override
+ public ParseResponse parse(ParseRequest request) {
+ return new ParseResponse();
+ }
+
+ @Override
+ public ParseTaskStatus submit(ParseRequest request) {
+ return new ParseTaskStatus();
+ }
+
+ @Override
+ public ParseTaskStatus queryTask(String taskId) {
+ return new ParseTaskStatus();
+ }
+
+ @Override
+ public ParseResponse queryResult(String taskId) {
+ return new ParseResponse();
+ }
+
+ @Override
+ public ParseTaskInfo queryTaskInfo(String taskId) {
+ return new ParseTaskInfo();
+ }
+ }
}