feat: 扩展 Office 文档解析能力
- 重构 document-core 与 MinerU 公共层,补齐 Office 异步任务基础设施 - 新增 PPTX/XLSX 解析模块与 starter 自动装配 - 补充 README 与相关测试覆盖
This commit is contained in:
@@ -0,0 +1,13 @@
|
||||
package com.easyagents.document.pptx;
|
||||
|
||||
import com.easyagents.document.core.DocumentParseService;
|
||||
import com.easyagents.document.core.entity.PptxParseRequest;
|
||||
|
||||
/**
|
||||
* PPTX 文档解析服务。
|
||||
*
|
||||
* @author Codex
|
||||
* @since 2026-04-16
|
||||
*/
|
||||
public interface PptxDocumentParseService extends DocumentParseService<PptxParseRequest> {
|
||||
}
|
||||
@@ -0,0 +1,17 @@
|
||||
package com.easyagents.document.pptx;
|
||||
|
||||
/**
|
||||
* PPTX provider SPI。
|
||||
*
|
||||
* @author Codex
|
||||
* @since 2026-04-16
|
||||
*/
|
||||
public interface PptxDocumentProvider extends PptxDocumentParseService {
|
||||
|
||||
/**
|
||||
* 获取 provider 标识。
|
||||
*
|
||||
* @return provider 名称
|
||||
*/
|
||||
String getProvider();
|
||||
}
|
||||
@@ -0,0 +1,408 @@
|
||||
package com.easyagents.document.pptx.mineru;
|
||||
|
||||
import com.easyagents.core.util.StringUtil;
|
||||
import com.easyagents.document.core.async.DocumentAsyncTaskManager;
|
||||
import com.easyagents.document.core.async.DocumentAsyncTaskRepository;
|
||||
import com.easyagents.document.core.async.DocumentAsyncTaskUpdater;
|
||||
import com.easyagents.document.core.async.InMemoryDocumentAsyncTaskRepository;
|
||||
import com.easyagents.document.core.mineru.MineruClient;
|
||||
import com.easyagents.document.core.mineru.MineruMapper;
|
||||
import com.easyagents.document.core.mineru.MineruProperties;
|
||||
import com.easyagents.document.core.entity.DocumentBlock;
|
||||
import com.easyagents.document.core.entity.DocumentImage;
|
||||
import com.easyagents.document.core.entity.DocumentPage;
|
||||
import com.easyagents.document.core.entity.DocumentTable;
|
||||
import com.easyagents.document.core.entity.ParseFile;
|
||||
import com.easyagents.document.core.entity.ParseRequest;
|
||||
import com.easyagents.document.core.entity.ParseResponse;
|
||||
import com.easyagents.document.core.entity.ParseResult;
|
||||
import com.easyagents.document.core.entity.PptxParseRequest;
|
||||
import com.easyagents.document.core.support.AbstractAsyncDocumentParseService;
|
||||
import com.easyagents.document.pptx.PptxDocumentProvider;
|
||||
import com.easyagents.document.pptx.model.PptxParseArtifact;
|
||||
import com.easyagents.document.pptx.model.PptxSlideArtifact;
|
||||
import org.apache.poi.xslf.usermodel.XMLSlideShow;
|
||||
import org.apache.poi.xslf.usermodel.XSLFSlide;
|
||||
|
||||
import javax.imageio.ImageIO;
|
||||
import java.awt.Color;
|
||||
import java.awt.Dimension;
|
||||
import java.awt.Graphics2D;
|
||||
import java.awt.RenderingHints;
|
||||
import java.awt.image.BufferedImage;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
|
||||
/**
|
||||
* 基于 MinerU 的 PPTX 文档解析服务。
|
||||
*
|
||||
* @author Codex
|
||||
* @since 2026-04-16
|
||||
*/
|
||||
public class MineruPptxDocumentParseService extends AbstractAsyncDocumentParseService<PptxParseRequest> implements PptxDocumentProvider {
|
||||
|
||||
public static final String PROVIDER_NAME = "mineru";
|
||||
|
||||
private final MineruProperties properties;
|
||||
private final MineruClient client;
|
||||
private final MineruMapper mapper;
|
||||
|
||||
/**
|
||||
* 创建服务实例。
|
||||
*
|
||||
* @param properties MinerU 配置
|
||||
*/
|
||||
public MineruPptxDocumentParseService(MineruProperties properties) {
|
||||
this(properties, new MineruMapper(properties));
|
||||
}
|
||||
|
||||
/**
|
||||
* 创建服务实例。
|
||||
*
|
||||
* @param properties MinerU 配置
|
||||
* @param mapper MinerU 映射器
|
||||
*/
|
||||
public MineruPptxDocumentParseService(MineruProperties properties, MineruMapper mapper) {
|
||||
this(properties, new MineruClient(properties, mapper), mapper, defaultTaskManager());
|
||||
}
|
||||
|
||||
/**
|
||||
* 创建服务实例。
|
||||
*
|
||||
* @param properties MinerU 配置
|
||||
* @param taskManager 异步任务管理器
|
||||
*/
|
||||
public MineruPptxDocumentParseService(MineruProperties properties, DocumentAsyncTaskManager taskManager) {
|
||||
this(properties, new MineruMapper(properties), taskManager);
|
||||
}
|
||||
|
||||
/**
|
||||
* 创建服务实例。
|
||||
*
|
||||
* @param properties MinerU 配置
|
||||
* @param mapper MinerU 映射器
|
||||
* @param taskManager 异步任务管理器
|
||||
*/
|
||||
public MineruPptxDocumentParseService(MineruProperties properties,
|
||||
MineruMapper mapper,
|
||||
DocumentAsyncTaskManager taskManager) {
|
||||
this(properties, new MineruClient(properties, mapper), mapper, taskManager);
|
||||
}
|
||||
|
||||
/**
|
||||
* 创建服务实例。
|
||||
*
|
||||
* @param properties MinerU 配置
|
||||
* @param client MinerU 客户端
|
||||
* @param mapper MinerU 映射器
|
||||
* @param taskManager 异步任务管理器
|
||||
*/
|
||||
public MineruPptxDocumentParseService(MineruProperties properties,
|
||||
MineruClient client,
|
||||
MineruMapper mapper,
|
||||
DocumentAsyncTaskManager taskManager) {
|
||||
super(taskManager);
|
||||
this.properties = properties;
|
||||
this.client = client;
|
||||
this.mapper = mapper;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getProvider() {
|
||||
return PROVIDER_NAME;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected PptxParseRequest normalizeRequest(ParseRequest request) {
|
||||
PptxParseRequest normalized = PptxParseRequest.from(request);
|
||||
if (normalized.getFiles() == null || normalized.getFiles().isEmpty()) {
|
||||
throw new IllegalArgumentException("PptxParseRequest files must not be empty");
|
||||
}
|
||||
normalized.setBackend(StringUtil.hasText(normalized.getBackend()) ? normalized.getBackend() : properties.getDefaultBackend());
|
||||
if (normalized.getLanguages() == null || normalized.getLanguages().isEmpty()) {
|
||||
normalized.setLanguages(new ArrayList<String>(properties.getDefaultLangList()));
|
||||
}
|
||||
normalized.setReturnMarkdown(normalized.getReturnMarkdown() == null ? Boolean.TRUE : normalized.getReturnMarkdown());
|
||||
normalized.setReturnMiddleJson(normalized.getReturnMiddleJson() == null ? Boolean.TRUE : normalized.getReturnMiddleJson());
|
||||
normalized.setReturnContentList(normalized.getReturnContentList() == null ? Boolean.TRUE : normalized.getReturnContentList());
|
||||
normalized.setReturnModelOutput(normalized.getReturnModelOutput() == null ? Boolean.FALSE : normalized.getReturnModelOutput());
|
||||
normalized.setReturnImages(normalized.getReturnImages() == null ? Boolean.TRUE : normalized.getReturnImages());
|
||||
normalized.setRenderScale(normalized.getRenderScale() == null || normalized.getRenderScale() <= 0 ? 2.0d : normalized.getRenderScale());
|
||||
normalized.setImageFormat(normalizeImageFormat(normalized.getImageFormat()));
|
||||
normalized.setIncludeSlideImageReference(
|
||||
normalized.getIncludeSlideImageReference() == null ? Boolean.TRUE : normalized.getIncludeSlideImageReference()
|
||||
);
|
||||
return normalized;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected ParseResponse doParse(PptxParseRequest request, DocumentAsyncTaskUpdater updater) {
|
||||
ParseResponse response = new ParseResponse();
|
||||
List<ParseResult> results = new ArrayList<ParseResult>();
|
||||
int totalSlides = countSlides(request);
|
||||
int processedSlides = 0;
|
||||
String backend = null;
|
||||
String version = null;
|
||||
|
||||
for (ParseFile file : request.getFiles()) {
|
||||
ParseResult result = parseSinglePptx(file, request, updater, processedSlides, totalSlides);
|
||||
processedSlides += Integer.parseInt(String.valueOf(result.getMetadata().get("slideCount")));
|
||||
if (backend == null) {
|
||||
backend = (String) result.getMetadata().get("ocrBackend");
|
||||
}
|
||||
if (version == null) {
|
||||
version = (String) result.getMetadata().get("ocrVersion");
|
||||
}
|
||||
result.getMetadata().remove("slideCount");
|
||||
result.getMetadata().remove("ocrBackend");
|
||||
result.getMetadata().remove("ocrVersion");
|
||||
results.add(result);
|
||||
}
|
||||
response.setBackend(StringUtil.hasText(backend) ? backend : request.getBackend());
|
||||
response.setVersion(version);
|
||||
response.setResults(results);
|
||||
return response;
|
||||
}
|
||||
|
||||
private ParseResult parseSinglePptx(ParseFile file,
|
||||
PptxParseRequest request,
|
||||
DocumentAsyncTaskUpdater updater,
|
||||
int processedSlidesBefore,
|
||||
int totalSlides) {
|
||||
ParseResult aggregate = new ParseResult();
|
||||
aggregate.setFileName(file.getFileName());
|
||||
StringBuilder markdownBuilder = new StringBuilder();
|
||||
PptxParseArtifact artifact = new PptxParseArtifact();
|
||||
String backend = null;
|
||||
String version = null;
|
||||
int slideCount = 0;
|
||||
|
||||
try (XMLSlideShow slideShow = new XMLSlideShow(new ByteArrayInputStream(file.getContent()))) {
|
||||
List<XSLFSlide> slides = slideShow.getSlides();
|
||||
Dimension pageSize = slideShow.getPageSize();
|
||||
int startSlide = request.getStartSlideIndex() == null ? 0 : Math.max(request.getStartSlideIndex(), 0);
|
||||
int endSlide = request.getEndSlideIndex() == null
|
||||
? slides.size() - 1
|
||||
: Math.min(request.getEndSlideIndex(), slides.size() - 1);
|
||||
if (endSlide < startSlide) {
|
||||
endSlide = startSlide - 1;
|
||||
}
|
||||
|
||||
for (int slideIndex = startSlide; slideIndex <= endSlide; slideIndex++) {
|
||||
XSLFSlide slide = slides.get(slideIndex);
|
||||
slideCount++;
|
||||
updateProgress(updater, "extracting", processedSlidesBefore + slideCount - 1, totalSlides,
|
||||
"正在渲染第 " + (slideIndex + 1) + " 页幻灯片");
|
||||
|
||||
byte[] imageBytes = renderSlide(slide, pageSize, request.getRenderScale(), request.getImageFormat());
|
||||
String imagePath = buildImagePath(slideIndex, request.getImageFormat());
|
||||
String imageName = buildImageName(slideIndex);
|
||||
|
||||
updateProgress(updater, "ocr", processedSlidesBefore + slideCount - 1, totalSlides,
|
||||
"正在识别第 " + (slideIndex + 1) + " 页幻灯片");
|
||||
ParseResult ocrResult = parseSlideImage(slideIndex, imageBytes, request, imagePath);
|
||||
|
||||
if (!StringUtil.hasText(backend)) {
|
||||
backend = (String) ocrResult.getMetadata().get("middleBackend");
|
||||
}
|
||||
if (!StringUtil.hasText(version)) {
|
||||
version = (String) ocrResult.getMetadata().get("middleVersion");
|
||||
}
|
||||
|
||||
appendSlideMarkdown(markdownBuilder, slideIndex, imageName, imagePath, request, ocrResult.getMarkdown());
|
||||
aggregate.getImages().add(buildSlideImage(slideIndex, imageName, imagePath, request.getImageFormat(), imageBytes));
|
||||
aggregate.getPages().add(buildPage(slideIndex, pageSize, request.getRenderScale()));
|
||||
mergeOcrResult(aggregate, slideIndex, ocrResult);
|
||||
artifact.getSlides().add(buildSlideArtifact(slideIndex, slide, imageName, imagePath, ocrResult));
|
||||
}
|
||||
} catch (IOException exception) {
|
||||
throw new IllegalStateException("Failed to parse PPTX file: " + file.getFileName(), exception);
|
||||
}
|
||||
|
||||
updateProgress(updater, "assembling", processedSlidesBefore + slideCount, totalSlides, "正在汇总 PPTX 解析结果");
|
||||
aggregate.setMarkdown(markdownBuilder.toString().trim());
|
||||
aggregate.setPlainText(aggregate.getMarkdown());
|
||||
aggregate.getArtifacts().getExtraJsonArtifacts().put("pptx", artifact);
|
||||
aggregate.getMetadata().put("slideCount", slideCount);
|
||||
aggregate.getMetadata().put("ocrBackend", backend);
|
||||
aggregate.getMetadata().put("ocrVersion", version);
|
||||
return aggregate;
|
||||
}
|
||||
|
||||
private ParseResult parseSlideImage(int slideIndex, byte[] imageBytes, PptxParseRequest request, String imagePath) {
|
||||
ParseRequest imageRequest = new ParseRequest();
|
||||
imageRequest.addFile(ParseFile.of("slide-" + (slideIndex + 1) + "." + request.getImageFormat(), imageBytes, "image/" + request.getImageFormat()));
|
||||
imageRequest.setBackend(request.getBackend());
|
||||
imageRequest.setLanguages(request.getLanguages());
|
||||
imageRequest.setReturnMarkdown(true);
|
||||
imageRequest.setReturnMiddleJson(true);
|
||||
imageRequest.setReturnContentList(true);
|
||||
imageRequest.setReturnModelOutput(false);
|
||||
imageRequest.setReturnImages(false);
|
||||
ParseResponse response = mapper.toParseResponse(client.parse(imageRequest));
|
||||
ParseResult result = response.getResults().isEmpty() ? new ParseResult() : response.getResults().get(0);
|
||||
if (!StringUtil.hasText(result.getMarkdown())) {
|
||||
result.setMarkdown(result.getPlainText());
|
||||
}
|
||||
result.getMetadata().put("slideImagePath", imagePath);
|
||||
return result;
|
||||
}
|
||||
|
||||
private void appendSlideMarkdown(StringBuilder markdownBuilder,
|
||||
int slideIndex,
|
||||
String imageName,
|
||||
String imagePath,
|
||||
PptxParseRequest request,
|
||||
String ocrMarkdown) {
|
||||
if (markdownBuilder.length() > 0) {
|
||||
markdownBuilder.append("\n\n");
|
||||
}
|
||||
markdownBuilder.append("# Slide ").append(slideIndex + 1).append("\n\n");
|
||||
if (Boolean.TRUE.equals(request.getIncludeSlideImageReference())) {
|
||||
markdownBuilder.append(".append(imagePath).append(")\n\n");
|
||||
}
|
||||
if (StringUtil.hasText(ocrMarkdown)) {
|
||||
markdownBuilder.append(ocrMarkdown.trim());
|
||||
}
|
||||
}
|
||||
|
||||
private DocumentImage buildSlideImage(int slideIndex, String imageName, String imagePath, String imageFormat, byte[] imageBytes) {
|
||||
DocumentImage image = new DocumentImage();
|
||||
image.setPageIndex(slideIndex);
|
||||
image.setName(imageName);
|
||||
image.setSourcePath(imagePath);
|
||||
image.setMimeType("image/" + imageFormat);
|
||||
image.setContent(imageBytes);
|
||||
return image;
|
||||
}
|
||||
|
||||
private DocumentPage buildPage(int slideIndex, Dimension pageSize, Double renderScale) {
|
||||
DocumentPage page = new DocumentPage();
|
||||
page.setPageIndex(slideIndex);
|
||||
page.setWidth(pageSize.getWidth() * renderScale);
|
||||
page.setHeight(pageSize.getHeight() * renderScale);
|
||||
return page;
|
||||
}
|
||||
|
||||
private void mergeOcrResult(ParseResult aggregate, int slideIndex, ParseResult ocrResult) {
|
||||
for (DocumentBlock block : ocrResult.getBlocks()) {
|
||||
block.setPageIndex(slideIndex);
|
||||
aggregate.getBlocks().add(block);
|
||||
}
|
||||
for (DocumentTable table : ocrResult.getTables()) {
|
||||
table.setPageIndex(slideIndex);
|
||||
aggregate.getTables().add(table);
|
||||
}
|
||||
for (String warning : ocrResult.getWarnings()) {
|
||||
aggregate.getWarnings().add("Slide " + (slideIndex + 1) + ": " + warning);
|
||||
}
|
||||
}
|
||||
|
||||
private PptxSlideArtifact buildSlideArtifact(int slideIndex,
|
||||
XSLFSlide slide,
|
||||
String imageName,
|
||||
String imagePath,
|
||||
ParseResult ocrResult) {
|
||||
PptxSlideArtifact artifact = new PptxSlideArtifact();
|
||||
artifact.setSlideIndex(slideIndex);
|
||||
artifact.setTitle(slide.getTitle());
|
||||
artifact.setImageName(imageName);
|
||||
artifact.setImagePath(imagePath);
|
||||
artifact.setOcrMarkdown(ocrResult.getMarkdown());
|
||||
artifact.setMiddleJson(ocrResult.getArtifacts().getMiddleJson());
|
||||
artifact.setContentList(ocrResult.getArtifacts().getContentList());
|
||||
artifact.setWarnings(new ArrayList<String>(ocrResult.getWarnings()));
|
||||
return artifact;
|
||||
}
|
||||
|
||||
private byte[] renderSlide(XSLFSlide slide, Dimension pageSize, Double renderScale, String imageFormat) throws IOException {
|
||||
double scale = renderScale == null ? 2.0d : renderScale;
|
||||
int width = Math.max(1, (int) Math.round(pageSize.getWidth() * scale));
|
||||
int height = Math.max(1, (int) Math.round(pageSize.getHeight() * scale));
|
||||
BufferedImage image = new BufferedImage(width, height, BufferedImage.TYPE_INT_RGB);
|
||||
Graphics2D graphics = image.createGraphics();
|
||||
try {
|
||||
graphics.setColor(Color.WHITE);
|
||||
graphics.fillRect(0, 0, width, height);
|
||||
graphics.scale(scale, scale);
|
||||
graphics.setRenderingHint(RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_ON);
|
||||
graphics.setRenderingHint(RenderingHints.KEY_RENDERING, RenderingHints.VALUE_RENDER_QUALITY);
|
||||
graphics.setRenderingHint(RenderingHints.KEY_INTERPOLATION, RenderingHints.VALUE_INTERPOLATION_BICUBIC);
|
||||
slide.draw(graphics);
|
||||
} finally {
|
||||
graphics.dispose();
|
||||
}
|
||||
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
|
||||
ImageIO.write(image, imageFormat, outputStream);
|
||||
return outputStream.toByteArray();
|
||||
}
|
||||
|
||||
private int countSlides(PptxParseRequest request) {
|
||||
int totalSlides = 0;
|
||||
for (ParseFile file : request.getFiles()) {
|
||||
try (XMLSlideShow slideShow = new XMLSlideShow(new ByteArrayInputStream(file.getContent()))) {
|
||||
int slideSize = slideShow.getSlides().size();
|
||||
int startSlide = request.getStartSlideIndex() == null ? 0 : Math.max(request.getStartSlideIndex(), 0);
|
||||
int endSlide = request.getEndSlideIndex() == null
|
||||
? slideSize - 1
|
||||
: Math.min(request.getEndSlideIndex(), slideSize - 1);
|
||||
if (endSlide >= startSlide) {
|
||||
totalSlides += endSlide - startSlide + 1;
|
||||
}
|
||||
} catch (IOException exception) {
|
||||
throw new IllegalStateException("Failed to inspect PPTX slide count: " + file.getFileName(), exception);
|
||||
}
|
||||
}
|
||||
return totalSlides;
|
||||
}
|
||||
|
||||
private void updateProgress(DocumentAsyncTaskUpdater updater,
|
||||
String stage,
|
||||
int processedItems,
|
||||
int totalItems,
|
||||
String message) {
|
||||
if (updater == null) {
|
||||
return;
|
||||
}
|
||||
int safeTotal = totalItems <= 0 ? 1 : totalItems;
|
||||
int percent = (int) Math.min(99, Math.round(processedItems * 100.0d / safeTotal));
|
||||
updater.update(stage, percent, processedItems, totalItems, message);
|
||||
}
|
||||
|
||||
private String normalizeImageFormat(String imageFormat) {
|
||||
if ("jpg".equalsIgnoreCase(imageFormat) || "jpeg".equalsIgnoreCase(imageFormat)) {
|
||||
return "jpg";
|
||||
}
|
||||
return "png";
|
||||
}
|
||||
|
||||
private String buildImagePath(int slideIndex, String imageFormat) {
|
||||
return "images/slide-" + formatIndex(slideIndex) + "/page." + imageFormat;
|
||||
}
|
||||
|
||||
private String buildImageName(int slideIndex) {
|
||||
return "slide-" + formatIndex(slideIndex) + "-page";
|
||||
}
|
||||
|
||||
private String formatIndex(int slideIndex) {
|
||||
int displayIndex = slideIndex + 1;
|
||||
if (displayIndex < 10) {
|
||||
return "00" + displayIndex;
|
||||
}
|
||||
if (displayIndex < 100) {
|
||||
return "0" + displayIndex;
|
||||
}
|
||||
return String.valueOf(displayIndex);
|
||||
}
|
||||
|
||||
private static DocumentAsyncTaskManager defaultTaskManager() {
|
||||
DocumentAsyncTaskRepository repository = new InMemoryDocumentAsyncTaskRepository();
|
||||
ExecutorService executorService = Executors.newFixedThreadPool(2);
|
||||
return new DocumentAsyncTaskManager(repository, executorService);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,23 @@
|
||||
package com.easyagents.document.pptx.model;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* PPTX 结构化工件。
|
||||
*
|
||||
* @author Codex
|
||||
* @since 2026-04-16
|
||||
*/
|
||||
public class PptxParseArtifact {
|
||||
|
||||
private List<PptxSlideArtifact> slides = new ArrayList<PptxSlideArtifact>();
|
||||
|
||||
public List<PptxSlideArtifact> getSlides() {
|
||||
return slides;
|
||||
}
|
||||
|
||||
public void setSlides(List<PptxSlideArtifact> slides) {
|
||||
this.slides = slides == null ? new ArrayList<PptxSlideArtifact>() : slides;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,86 @@
|
||||
package com.easyagents.document.pptx.model;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* 单页幻灯片工件。
|
||||
*
|
||||
* @author Codex
|
||||
* @since 2026-04-16
|
||||
*/
|
||||
public class PptxSlideArtifact {
|
||||
|
||||
private Integer slideIndex;
|
||||
private String title;
|
||||
private String imageName;
|
||||
private String imagePath;
|
||||
private String ocrMarkdown;
|
||||
private Object middleJson;
|
||||
private Object contentList;
|
||||
private List<String> warnings = new ArrayList<String>();
|
||||
|
||||
public Integer getSlideIndex() {
|
||||
return slideIndex;
|
||||
}
|
||||
|
||||
public void setSlideIndex(Integer slideIndex) {
|
||||
this.slideIndex = slideIndex;
|
||||
}
|
||||
|
||||
public String getTitle() {
|
||||
return title;
|
||||
}
|
||||
|
||||
public void setTitle(String title) {
|
||||
this.title = title;
|
||||
}
|
||||
|
||||
public String getImageName() {
|
||||
return imageName;
|
||||
}
|
||||
|
||||
public void setImageName(String imageName) {
|
||||
this.imageName = imageName;
|
||||
}
|
||||
|
||||
public String getImagePath() {
|
||||
return imagePath;
|
||||
}
|
||||
|
||||
public void setImagePath(String imagePath) {
|
||||
this.imagePath = imagePath;
|
||||
}
|
||||
|
||||
public String getOcrMarkdown() {
|
||||
return ocrMarkdown;
|
||||
}
|
||||
|
||||
public void setOcrMarkdown(String ocrMarkdown) {
|
||||
this.ocrMarkdown = ocrMarkdown;
|
||||
}
|
||||
|
||||
public Object getMiddleJson() {
|
||||
return middleJson;
|
||||
}
|
||||
|
||||
public void setMiddleJson(Object middleJson) {
|
||||
this.middleJson = middleJson;
|
||||
}
|
||||
|
||||
public Object getContentList() {
|
||||
return contentList;
|
||||
}
|
||||
|
||||
public void setContentList(Object contentList) {
|
||||
this.contentList = contentList;
|
||||
}
|
||||
|
||||
public List<String> getWarnings() {
|
||||
return warnings;
|
||||
}
|
||||
|
||||
public void setWarnings(List<String> warnings) {
|
||||
this.warnings = warnings == null ? new ArrayList<String>() : warnings;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,170 @@
|
||||
package com.easyagents.document.pptx.mineru;
|
||||
|
||||
import com.alibaba.fastjson2.JSONArray;
|
||||
import com.alibaba.fastjson2.JSONObject;
|
||||
import com.easyagents.document.core.async.DocumentAsyncTaskManager;
|
||||
import com.easyagents.document.core.async.InMemoryDocumentAsyncTaskRepository;
|
||||
import com.easyagents.document.core.mineru.MineruClient;
|
||||
import com.easyagents.document.core.mineru.MineruMapper;
|
||||
import com.easyagents.document.core.mineru.MineruProperties;
|
||||
import com.easyagents.document.core.mineru.MineruResultPayload;
|
||||
import com.easyagents.document.core.entity.ParseFile;
|
||||
import com.easyagents.document.core.entity.ParseResponse;
|
||||
import com.easyagents.document.core.entity.ParseResult;
|
||||
import com.easyagents.document.core.entity.ParseTaskInfo;
|
||||
import com.easyagents.document.core.entity.ParseTaskStatus;
|
||||
import com.easyagents.document.core.entity.PptxParseRequest;
|
||||
import org.apache.poi.xslf.usermodel.XMLSlideShow;
|
||||
import org.apache.poi.xslf.usermodel.XSLFSlide;
|
||||
import org.apache.poi.xslf.usermodel.XSLFTextBox;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.awt.Rectangle;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.concurrent.Executor;
|
||||
|
||||
/**
|
||||
* PPTX MinerU 服务测试。
|
||||
*
|
||||
* @author Codex
|
||||
* @since 2026-04-16
|
||||
*/
|
||||
public class MineruPptxDocumentParseServiceTest {
|
||||
|
||||
@Test
|
||||
public void shouldBuildMarkdownAndImagesForSlides() throws IOException {
|
||||
RecordingClient client = new RecordingClient(defaultProperties());
|
||||
MineruMapper mapper = new MineruMapper(defaultProperties());
|
||||
MineruPptxDocumentParseService service = new MineruPptxDocumentParseService(
|
||||
defaultProperties(),
|
||||
client,
|
||||
mapper,
|
||||
new DocumentAsyncTaskManager(new InMemoryDocumentAsyncTaskRepository(), directExecutor())
|
||||
);
|
||||
|
||||
PptxParseRequest request = new PptxParseRequest();
|
||||
request.addFile(ParseFile.of("demo.pptx", buildPptxBytes()));
|
||||
|
||||
ParseResponse response = service.parse(request);
|
||||
|
||||
Assert.assertEquals(1, response.getResults().size());
|
||||
ParseResult result = response.getResults().get(0);
|
||||
Assert.assertTrue(result.getMarkdown().contains("# Slide 1"));
|
||||
Assert.assertTrue(result.getMarkdown().contains("images/slide-001/page.png"));
|
||||
Assert.assertTrue(result.getMarkdown().contains("slide-ocr-1"));
|
||||
Assert.assertEquals(2, result.getImages().size());
|
||||
Assert.assertNotNull(result.getImages().get(0).getContent());
|
||||
Assert.assertNotNull(result.getArtifacts().getExtraJsonArtifacts().get("pptx"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void shouldSupportAsyncTaskFlow() throws IOException {
|
||||
RecordingClient client = new RecordingClient(defaultProperties());
|
||||
MineruMapper mapper = new MineruMapper(defaultProperties());
|
||||
MineruPptxDocumentParseService service = new MineruPptxDocumentParseService(
|
||||
defaultProperties(),
|
||||
client,
|
||||
mapper,
|
||||
new DocumentAsyncTaskManager(new InMemoryDocumentAsyncTaskRepository(), directExecutor())
|
||||
);
|
||||
|
||||
PptxParseRequest request = new PptxParseRequest();
|
||||
request.addFile(ParseFile.of("demo.pptx", buildPptxBytes()));
|
||||
|
||||
ParseTaskStatus status = service.submit(request);
|
||||
ParseTaskInfo taskInfo = service.queryTaskInfo(status.getTaskId());
|
||||
|
||||
Assert.assertEquals("completed", taskInfo.getStatus());
|
||||
Assert.assertNotNull(taskInfo.getResult());
|
||||
Assert.assertEquals(1, taskInfo.getResult().getResults().size());
|
||||
}
|
||||
|
||||
private byte[] buildPptxBytes() throws IOException {
|
||||
XMLSlideShow slideShow = new XMLSlideShow();
|
||||
slideShow.setPageSize(new java.awt.Dimension(640, 360));
|
||||
createSlide(slideShow, "第一页");
|
||||
createSlide(slideShow, "第二页");
|
||||
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
|
||||
slideShow.write(outputStream);
|
||||
slideShow.close();
|
||||
return outputStream.toByteArray();
|
||||
}
|
||||
|
||||
private void createSlide(XMLSlideShow slideShow, String text) {
|
||||
XSLFSlide slide = slideShow.createSlide();
|
||||
XSLFTextBox textBox = slide.createTextBox();
|
||||
textBox.setAnchor(new Rectangle(20, 20, 300, 80));
|
||||
textBox.setText(text);
|
||||
}
|
||||
|
||||
private MineruProperties defaultProperties() {
|
||||
MineruProperties properties = new MineruProperties();
|
||||
properties.setBaseUrl("http://127.0.0.1:8000");
|
||||
return properties;
|
||||
}
|
||||
|
||||
private Executor directExecutor() {
|
||||
return new Executor() {
|
||||
@Override
|
||||
public void execute(Runnable command) {
|
||||
command.run();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
private static class RecordingClient extends MineruClient {
|
||||
|
||||
private int parseCount;
|
||||
|
||||
private RecordingClient(MineruProperties properties) {
|
||||
super(properties, new MineruMapper(properties));
|
||||
}
|
||||
|
||||
@Override
|
||||
public MineruResultPayload parse(com.easyagents.document.core.entity.ParseRequest request) {
|
||||
parseCount++;
|
||||
return new MineruMapper(testProperties()).toResultPayload(syncPayload(parseCount));
|
||||
}
|
||||
|
||||
private JSONObject syncPayload(int index) {
|
||||
JSONObject payload = new JSONObject();
|
||||
payload.put("backend", "vlm-http-client");
|
||||
payload.put("version", "3.0.9");
|
||||
JSONObject result = new JSONObject();
|
||||
result.put("md_content", "slide-ocr-" + index);
|
||||
result.put("middle_json", middleJson());
|
||||
result.put("content_list", contentList(index));
|
||||
JSONObject results = new JSONObject();
|
||||
results.put("slide-" + index, result);
|
||||
payload.put("results", results);
|
||||
return payload;
|
||||
}
|
||||
|
||||
private JSONObject middleJson() {
|
||||
JSONObject middleJson = new JSONObject();
|
||||
middleJson.put("_backend", "vlm-http-client");
|
||||
middleJson.put("_version_name", "3.0.9");
|
||||
middleJson.put("pdf_info", new JSONArray());
|
||||
return middleJson;
|
||||
}
|
||||
|
||||
private JSONArray contentList(int index) {
|
||||
JSONArray contentList = new JSONArray();
|
||||
JSONObject text = new JSONObject();
|
||||
text.put("type", "text");
|
||||
text.put("text", "slide-ocr-" + index);
|
||||
text.put("page_idx", 0);
|
||||
text.put("bbox", new JSONArray());
|
||||
contentList.add(text);
|
||||
return contentList;
|
||||
}
|
||||
|
||||
private static MineruProperties testProperties() {
|
||||
MineruProperties properties = new MineruProperties();
|
||||
properties.setBaseUrl("http://127.0.0.1:8000");
|
||||
return properties;
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user