refactor: 收敛文档 OCR 配置并补齐 Office 解析兼容
- 统一 MinerU OCR 配置结构并移除分模块冗余属性类 - 补齐 JSON 字符串化结果拆包、XLSX 图片兼容与对应回归测试
This commit is contained in:
@@ -81,6 +81,29 @@ public class MineruPptxDocumentParseServiceTest {
|
||||
Assert.assertEquals(1, taskInfo.getResult().getResults().size());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void shouldSupportStringifiedMineruSlideArtifacts() throws IOException {
|
||||
RecordingClient client = new RecordingClient(defaultProperties(), true);
|
||||
MineruMapper mapper = new MineruMapper(defaultProperties());
|
||||
MineruPptxDocumentParseService service = new MineruPptxDocumentParseService(
|
||||
defaultProperties(),
|
||||
client,
|
||||
mapper,
|
||||
new DocumentAsyncTaskManager(new InMemoryDocumentAsyncTaskRepository(), directExecutor())
|
||||
);
|
||||
|
||||
PptxParseRequest request = new PptxParseRequest();
|
||||
request.addFile(ParseFile.of("demo.pptx", buildPptxBytes()));
|
||||
|
||||
ParseResponse response = service.parse(request);
|
||||
|
||||
Assert.assertEquals(1, response.getResults().size());
|
||||
ParseResult result = response.getResults().get(0);
|
||||
Assert.assertTrue(result.getMarkdown().contains("slide-ocr-1"));
|
||||
Assert.assertFalse(result.getBlocks().isEmpty());
|
||||
Assert.assertEquals(2, result.getImages().size());
|
||||
}
|
||||
|
||||
private byte[] buildPptxBytes() throws IOException {
|
||||
XMLSlideShow slideShow = new XMLSlideShow();
|
||||
slideShow.setPageSize(new java.awt.Dimension(640, 360));
|
||||
@@ -117,9 +140,15 @@ public class MineruPptxDocumentParseServiceTest {
|
||||
private static class RecordingClient extends MineruClient {
|
||||
|
||||
private int parseCount;
|
||||
private final boolean stringifyArtifacts;
|
||||
|
||||
private RecordingClient(MineruProperties properties) {
|
||||
this(properties, false);
|
||||
}
|
||||
|
||||
private RecordingClient(MineruProperties properties, boolean stringifyArtifacts) {
|
||||
super(properties, new MineruMapper(properties));
|
||||
this.stringifyArtifacts = stringifyArtifacts;
|
||||
}
|
||||
|
||||
@Override
|
||||
@@ -134,10 +163,10 @@ public class MineruPptxDocumentParseServiceTest {
|
||||
payload.put("version", "3.0.9");
|
||||
JSONObject result = new JSONObject();
|
||||
result.put("md_content", "slide-ocr-" + index);
|
||||
result.put("middle_json", middleJson());
|
||||
result.put("content_list", contentList(index));
|
||||
result.put("middle_json", stringifyArtifacts ? middleJson().toJSONString() : middleJson());
|
||||
result.put("content_list", stringifyArtifacts ? contentList(index).toJSONString() : contentList(index));
|
||||
JSONObject results = new JSONObject();
|
||||
results.put("slide-" + index, result);
|
||||
results.put("slide-" + index, stringifyArtifacts ? result.toJSONString() : result);
|
||||
payload.put("results", results);
|
||||
return payload;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user