refactor: 收敛文档 OCR 配置并补齐 Office 解析兼容
- 统一 MinerU OCR 配置结构并移除分模块冗余属性类 - 补齐 JSON 字符串化结果拆包、XLSX 图片兼容与对应回归测试
This commit is contained in:
@@ -12,7 +12,7 @@ import java.util.List;
|
||||
* @author Codex
|
||||
* @since 2026-04-16
|
||||
*/
|
||||
@ConfigurationProperties(prefix = "easy-agents.document.mineru")
|
||||
@ConfigurationProperties(prefix = "easy-agents.document.ocr.mineru")
|
||||
public class CommonMineruDocumentProperties {
|
||||
|
||||
private String baseUrl;
|
||||
|
||||
@@ -1,119 +0,0 @@
|
||||
package com.easyagents.spring.boot.document.pdf.mineru;
|
||||
|
||||
import org.springframework.boot.context.properties.ConfigurationProperties;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* MinerU Spring Boot 配置。
|
||||
*
|
||||
* @author Codex
|
||||
* @since 2026-04-14
|
||||
*/
|
||||
@ConfigurationProperties(prefix = "easy-agents.document.pdf.mineru")
|
||||
public class MineruDocumentProperties {
|
||||
|
||||
private String baseUrl;
|
||||
private Integer connectTimeoutMs = 3000;
|
||||
private Integer readTimeoutMs = 600000;
|
||||
private Integer writeTimeoutMs = 600000;
|
||||
private Integer pollIntervalMs = 1000;
|
||||
private Integer resultTimeoutMs = 1800000;
|
||||
private String defaultBackend = "vlm-http-client";
|
||||
private String defaultParseMethod = "auto";
|
||||
private List<String> defaultLangList = new ArrayList<String>(Arrays.asList("ch"));
|
||||
private Boolean defaultFormulaEnable = true;
|
||||
private Boolean defaultTableEnable = true;
|
||||
|
||||
public String getBaseUrl() {
|
||||
return baseUrl;
|
||||
}
|
||||
|
||||
public void setBaseUrl(String baseUrl) {
|
||||
this.baseUrl = baseUrl;
|
||||
}
|
||||
|
||||
public Integer getConnectTimeoutMs() {
|
||||
return connectTimeoutMs;
|
||||
}
|
||||
|
||||
public void setConnectTimeoutMs(Integer connectTimeoutMs) {
|
||||
this.connectTimeoutMs = connectTimeoutMs;
|
||||
}
|
||||
|
||||
public Integer getReadTimeoutMs() {
|
||||
return readTimeoutMs;
|
||||
}
|
||||
|
||||
public void setReadTimeoutMs(Integer readTimeoutMs) {
|
||||
this.readTimeoutMs = readTimeoutMs;
|
||||
}
|
||||
|
||||
public Integer getWriteTimeoutMs() {
|
||||
return writeTimeoutMs;
|
||||
}
|
||||
|
||||
public void setWriteTimeoutMs(Integer writeTimeoutMs) {
|
||||
this.writeTimeoutMs = writeTimeoutMs;
|
||||
}
|
||||
|
||||
public Integer getPollIntervalMs() {
|
||||
return pollIntervalMs;
|
||||
}
|
||||
|
||||
public void setPollIntervalMs(Integer pollIntervalMs) {
|
||||
this.pollIntervalMs = pollIntervalMs;
|
||||
}
|
||||
|
||||
public Integer getResultTimeoutMs() {
|
||||
return resultTimeoutMs;
|
||||
}
|
||||
|
||||
public void setResultTimeoutMs(Integer resultTimeoutMs) {
|
||||
this.resultTimeoutMs = resultTimeoutMs;
|
||||
}
|
||||
|
||||
public String getDefaultBackend() {
|
||||
return defaultBackend;
|
||||
}
|
||||
|
||||
public void setDefaultBackend(String defaultBackend) {
|
||||
this.defaultBackend = defaultBackend;
|
||||
}
|
||||
|
||||
public String getDefaultParseMethod() {
|
||||
return defaultParseMethod;
|
||||
}
|
||||
|
||||
public void setDefaultParseMethod(String defaultParseMethod) {
|
||||
this.defaultParseMethod = defaultParseMethod;
|
||||
}
|
||||
|
||||
public List<String> getDefaultLangList() {
|
||||
return defaultLangList;
|
||||
}
|
||||
|
||||
public void setDefaultLangList(List<String> defaultLangList) {
|
||||
this.defaultLangList = defaultLangList == null
|
||||
? new ArrayList<String>(Arrays.asList("ch"))
|
||||
: defaultLangList;
|
||||
}
|
||||
|
||||
public Boolean getDefaultFormulaEnable() {
|
||||
return defaultFormulaEnable;
|
||||
}
|
||||
|
||||
public void setDefaultFormulaEnable(Boolean defaultFormulaEnable) {
|
||||
this.defaultFormulaEnable = defaultFormulaEnable;
|
||||
}
|
||||
|
||||
public Boolean getDefaultTableEnable() {
|
||||
return defaultTableEnable;
|
||||
}
|
||||
|
||||
public void setDefaultTableEnable(Boolean defaultTableEnable) {
|
||||
this.defaultTableEnable = defaultTableEnable;
|
||||
}
|
||||
}
|
||||
@@ -10,8 +10,11 @@ import org.springframework.boot.autoconfigure.condition.ConditionalOnClass;
|
||||
import org.springframework.boot.autoconfigure.condition.ConditionalOnMissingBean;
|
||||
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
|
||||
import org.springframework.boot.context.properties.EnableConfigurationProperties;
|
||||
import org.springframework.beans.factory.config.BeanFactoryPostProcessor;
|
||||
import org.springframework.beans.factory.support.BeanDefinitionRegistry;
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.context.annotation.Configuration;
|
||||
import org.springframework.context.annotation.Primary;
|
||||
|
||||
/**
|
||||
* MinerU PDF 文档解析自动装配。
|
||||
@@ -21,50 +24,94 @@ import org.springframework.context.annotation.Configuration;
|
||||
*/
|
||||
@Configuration(proxyBeanMethods = false)
|
||||
@ConditionalOnClass(MineruPdfDocumentParseService.class)
|
||||
@ConditionalOnProperty(prefix = "easy-agents.document.pdf", name = "provider", havingValue = "mineru")
|
||||
@EnableConfigurationProperties({MineruDocumentProperties.class, CommonMineruDocumentProperties.class})
|
||||
@ConditionalOnProperty(prefix = "easy-agents.document.ocr", name = "provider", havingValue = "mineru")
|
||||
@EnableConfigurationProperties(CommonMineruDocumentProperties.class)
|
||||
public class MineruPdfAutoConfiguration {
|
||||
|
||||
public static final String DEFAULT_DOCUMENT_PARSE_SERVICE_BEAN_NAME = "documentParseService";
|
||||
|
||||
/**
|
||||
* 注册统一 PDF 解析服务。
|
||||
*
|
||||
* @param properties Spring Boot 配置
|
||||
* @param commonProperties Spring Boot 配置
|
||||
* @return PDF 解析服务
|
||||
*/
|
||||
@Bean
|
||||
@Primary
|
||||
@ConditionalOnMissingBean(PdfDocumentParseService.class)
|
||||
public PdfDocumentParseService pdfDocumentParseService(MineruDocumentProperties properties,
|
||||
CommonMineruDocumentProperties commonProperties) {
|
||||
return new MineruPdfDocumentParseService(toMineruProperties(properties, commonProperties));
|
||||
public PdfDocumentParseService pdfDocumentParseService(CommonMineruDocumentProperties commonProperties) {
|
||||
return new MineruPdfDocumentParseService(toMineruProperties(commonProperties));
|
||||
}
|
||||
|
||||
/**
|
||||
* 将 PDF 服务以统一文档解析服务类型暴露,便于调用方直接按抽象注入。
|
||||
* 将默认文档解析服务名注册为 PDF 服务别名,避免重复创建同类型 Bean。
|
||||
* 这里显式走 alias,而不是第二个 {@link DocumentParseService} Bean,
|
||||
* 这样既能保持默认契约,也不会破坏按 {@link PdfDocumentParseService} 类型的唯一注入。
|
||||
*
|
||||
* @param pdfDocumentParseService PDF 解析服务
|
||||
* @return 统一文档解析服务
|
||||
* @return BeanFactory 后置处理器
|
||||
*/
|
||||
@Bean
|
||||
@ConditionalOnMissingBean(DocumentParseService.class)
|
||||
public DocumentParseService documentParseService(PdfDocumentParseService pdfDocumentParseService) {
|
||||
return pdfDocumentParseService;
|
||||
public static BeanFactoryPostProcessor defaultDocumentParseServiceAliasPostProcessor() {
|
||||
return beanFactory -> {
|
||||
if (!(beanFactory instanceof BeanDefinitionRegistry)) {
|
||||
return;
|
||||
}
|
||||
BeanDefinitionRegistry registry = (BeanDefinitionRegistry) beanFactory;
|
||||
String aliasTarget = resolveAliasTarget(beanFactory, registry);
|
||||
if (!StringUtil.hasText(aliasTarget)) {
|
||||
return;
|
||||
}
|
||||
if (registry.containsBeanDefinition(DEFAULT_DOCUMENT_PARSE_SERVICE_BEAN_NAME)
|
||||
|| registry.isAlias(DEFAULT_DOCUMENT_PARSE_SERVICE_BEAN_NAME)) {
|
||||
return;
|
||||
}
|
||||
if (registry.containsBeanDefinition(aliasTarget)
|
||||
&& !registry.getBeanDefinition(aliasTarget).isPrimary()) {
|
||||
registry.getBeanDefinition(aliasTarget).setPrimary(true);
|
||||
}
|
||||
registry.registerAlias(aliasTarget, DEFAULT_DOCUMENT_PARSE_SERVICE_BEAN_NAME);
|
||||
};
|
||||
}
|
||||
|
||||
private MineruProperties toMineruProperties(MineruDocumentProperties properties,
|
||||
CommonMineruDocumentProperties commonProperties) {
|
||||
private static String resolveAliasTarget(org.springframework.beans.factory.config.ConfigurableListableBeanFactory beanFactory,
|
||||
BeanDefinitionRegistry registry) {
|
||||
String[] candidateNames = beanFactory.getBeanNamesForType(PdfDocumentParseService.class, true, false);
|
||||
if (candidateNames == null || candidateNames.length == 0) {
|
||||
return null;
|
||||
}
|
||||
if (candidateNames.length == 1) {
|
||||
return candidateNames[0];
|
||||
}
|
||||
String primaryBeanName = null;
|
||||
for (String candidateName : candidateNames) {
|
||||
if (!registry.containsBeanDefinition(candidateName)) {
|
||||
continue;
|
||||
}
|
||||
if (!registry.getBeanDefinition(candidateName).isPrimary()) {
|
||||
continue;
|
||||
}
|
||||
if (primaryBeanName != null) {
|
||||
return null;
|
||||
}
|
||||
primaryBeanName = candidateName;
|
||||
}
|
||||
return primaryBeanName;
|
||||
}
|
||||
|
||||
private MineruProperties toMineruProperties(CommonMineruDocumentProperties commonProperties) {
|
||||
MineruProperties mineruProperties = new MineruProperties();
|
||||
boolean useCommon = commonProperties != null && StringUtil.hasText(commonProperties.getBaseUrl());
|
||||
mineruProperties.setBaseUrl(useCommon ? commonProperties.getBaseUrl() : properties.getBaseUrl());
|
||||
mineruProperties.setConnectTimeoutMs(useCommon ? commonProperties.getConnectTimeoutMs() : properties.getConnectTimeoutMs());
|
||||
mineruProperties.setReadTimeoutMs(useCommon ? commonProperties.getReadTimeoutMs() : properties.getReadTimeoutMs());
|
||||
mineruProperties.setWriteTimeoutMs(useCommon ? commonProperties.getWriteTimeoutMs() : properties.getWriteTimeoutMs());
|
||||
mineruProperties.setPollIntervalMs(useCommon ? commonProperties.getPollIntervalMs() : properties.getPollIntervalMs());
|
||||
mineruProperties.setResultTimeoutMs(useCommon ? commonProperties.getResultTimeoutMs() : properties.getResultTimeoutMs());
|
||||
mineruProperties.setDefaultBackend(useCommon ? commonProperties.getDefaultBackend() : properties.getDefaultBackend());
|
||||
mineruProperties.setDefaultParseMethod(useCommon ? commonProperties.getDefaultParseMethod() : properties.getDefaultParseMethod());
|
||||
mineruProperties.setDefaultLangList(useCommon ? commonProperties.getDefaultLangList() : properties.getDefaultLangList());
|
||||
mineruProperties.setDefaultFormulaEnable(useCommon ? commonProperties.getDefaultFormulaEnable() : properties.getDefaultFormulaEnable());
|
||||
mineruProperties.setDefaultTableEnable(useCommon ? commonProperties.getDefaultTableEnable() : properties.getDefaultTableEnable());
|
||||
mineruProperties.setBaseUrl(useCommon ? commonProperties.getBaseUrl() : null);
|
||||
mineruProperties.setConnectTimeoutMs(useCommon ? commonProperties.getConnectTimeoutMs() : null);
|
||||
mineruProperties.setReadTimeoutMs(useCommon ? commonProperties.getReadTimeoutMs() : null);
|
||||
mineruProperties.setWriteTimeoutMs(useCommon ? commonProperties.getWriteTimeoutMs() : null);
|
||||
mineruProperties.setPollIntervalMs(useCommon ? commonProperties.getPollIntervalMs() : null);
|
||||
mineruProperties.setResultTimeoutMs(useCommon ? commonProperties.getResultTimeoutMs() : null);
|
||||
mineruProperties.setDefaultBackend(useCommon ? commonProperties.getDefaultBackend() : null);
|
||||
mineruProperties.setDefaultParseMethod(useCommon ? commonProperties.getDefaultParseMethod() : null);
|
||||
mineruProperties.setDefaultLangList(useCommon ? commonProperties.getDefaultLangList() : null);
|
||||
mineruProperties.setDefaultFormulaEnable(useCommon ? commonProperties.getDefaultFormulaEnable() : null);
|
||||
mineruProperties.setDefaultTableEnable(useCommon ? commonProperties.getDefaultTableEnable() : null);
|
||||
return mineruProperties;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6,6 +6,7 @@ import com.easyagents.document.core.mineru.MineruProperties;
|
||||
import com.easyagents.document.pptx.PptxDocumentParseService;
|
||||
import com.easyagents.document.pptx.mineru.MineruPptxDocumentParseService;
|
||||
import com.easyagents.spring.boot.document.mineru.CommonMineruDocumentProperties;
|
||||
import org.springframework.beans.factory.annotation.Qualifier;
|
||||
import org.springframework.boot.autoconfigure.condition.ConditionalOnClass;
|
||||
import org.springframework.boot.autoconfigure.condition.ConditionalOnMissingBean;
|
||||
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
|
||||
@@ -24,21 +25,24 @@ import java.util.concurrent.Executors;
|
||||
*/
|
||||
@Configuration(proxyBeanMethods = false)
|
||||
@ConditionalOnClass(MineruPptxDocumentParseService.class)
|
||||
@ConditionalOnProperty(prefix = "easy-agents.document.pptx", name = "enabled", havingValue = "true")
|
||||
@EnableConfigurationProperties({CommonMineruDocumentProperties.class, PptxDocumentProperties.class})
|
||||
@ConditionalOnProperty(prefix = "easy-agents.document.ocr", name = "provider", havingValue = "mineru")
|
||||
@EnableConfigurationProperties(CommonMineruDocumentProperties.class)
|
||||
public class MineruPptxAutoConfiguration {
|
||||
|
||||
public static final String PPTX_DOCUMENT_ASYNC_TASK_MANAGER_BEAN_NAME = "pptxDocumentAsyncTaskManager";
|
||||
private static final int DEFAULT_ASYNC_THREADS = 2;
|
||||
|
||||
@Bean
|
||||
@ConditionalOnMissingBean(name = "pptxDocumentAsyncTaskManager")
|
||||
public DocumentAsyncTaskManager pptxDocumentAsyncTaskManager(PptxDocumentProperties properties) {
|
||||
int threadCount = properties.getAsyncThreads() == null || properties.getAsyncThreads() <= 0 ? 2 : properties.getAsyncThreads();
|
||||
ExecutorService executorService = Executors.newFixedThreadPool(threadCount);
|
||||
@ConditionalOnMissingBean(name = PPTX_DOCUMENT_ASYNC_TASK_MANAGER_BEAN_NAME)
|
||||
public DocumentAsyncTaskManager pptxDocumentAsyncTaskManager() {
|
||||
ExecutorService executorService = Executors.newFixedThreadPool(DEFAULT_ASYNC_THREADS);
|
||||
return new DocumentAsyncTaskManager(new InMemoryDocumentAsyncTaskRepository(), executorService);
|
||||
}
|
||||
|
||||
@Bean
|
||||
@ConditionalOnMissingBean(PptxDocumentParseService.class)
|
||||
public PptxDocumentParseService pptxDocumentParseService(CommonMineruDocumentProperties commonProperties,
|
||||
@Qualifier(PPTX_DOCUMENT_ASYNC_TASK_MANAGER_BEAN_NAME)
|
||||
DocumentAsyncTaskManager pptxDocumentAsyncTaskManager) {
|
||||
return new MineruPptxDocumentParseService(toMineruProperties(commonProperties), pptxDocumentAsyncTaskManager);
|
||||
}
|
||||
|
||||
@@ -1,32 +0,0 @@
|
||||
package com.easyagents.spring.boot.document.pptx;
|
||||
|
||||
import org.springframework.boot.context.properties.ConfigurationProperties;
|
||||
|
||||
/**
|
||||
* PPTX 文档配置。
|
||||
*
|
||||
* @author Codex
|
||||
* @since 2026-04-16
|
||||
*/
|
||||
@ConfigurationProperties(prefix = "easy-agents.document.pptx")
|
||||
public class PptxDocumentProperties {
|
||||
|
||||
private Boolean enabled = false;
|
||||
private Integer asyncThreads = 2;
|
||||
|
||||
public Boolean getEnabled() {
|
||||
return enabled;
|
||||
}
|
||||
|
||||
public void setEnabled(Boolean enabled) {
|
||||
this.enabled = enabled;
|
||||
}
|
||||
|
||||
public Integer getAsyncThreads() {
|
||||
return asyncThreads;
|
||||
}
|
||||
|
||||
public void setAsyncThreads(Integer asyncThreads) {
|
||||
this.asyncThreads = asyncThreads;
|
||||
}
|
||||
}
|
||||
@@ -6,6 +6,7 @@ import com.easyagents.document.core.mineru.MineruProperties;
|
||||
import com.easyagents.document.xlsx.XlsxDocumentParseService;
|
||||
import com.easyagents.document.xlsx.mineru.MineruXlsxDocumentParseService;
|
||||
import com.easyagents.spring.boot.document.mineru.CommonMineruDocumentProperties;
|
||||
import org.springframework.beans.factory.annotation.Qualifier;
|
||||
import org.springframework.boot.autoconfigure.condition.ConditionalOnClass;
|
||||
import org.springframework.boot.autoconfigure.condition.ConditionalOnMissingBean;
|
||||
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
|
||||
@@ -24,21 +25,24 @@ import java.util.concurrent.Executors;
|
||||
*/
|
||||
@Configuration(proxyBeanMethods = false)
|
||||
@ConditionalOnClass(MineruXlsxDocumentParseService.class)
|
||||
@ConditionalOnProperty(prefix = "easy-agents.document.xlsx", name = "enabled", havingValue = "true")
|
||||
@EnableConfigurationProperties({CommonMineruDocumentProperties.class, XlsxDocumentProperties.class})
|
||||
@ConditionalOnProperty(prefix = "easy-agents.document.ocr", name = "provider", havingValue = "mineru")
|
||||
@EnableConfigurationProperties(CommonMineruDocumentProperties.class)
|
||||
public class MineruXlsxAutoConfiguration {
|
||||
|
||||
public static final String XLSX_DOCUMENT_ASYNC_TASK_MANAGER_BEAN_NAME = "xlsxDocumentAsyncTaskManager";
|
||||
private static final int DEFAULT_ASYNC_THREADS = 2;
|
||||
|
||||
@Bean
|
||||
@ConditionalOnMissingBean(name = "xlsxDocumentAsyncTaskManager")
|
||||
public DocumentAsyncTaskManager xlsxDocumentAsyncTaskManager(XlsxDocumentProperties properties) {
|
||||
int threadCount = properties.getAsyncThreads() == null || properties.getAsyncThreads() <= 0 ? 2 : properties.getAsyncThreads();
|
||||
ExecutorService executorService = Executors.newFixedThreadPool(threadCount);
|
||||
@ConditionalOnMissingBean(name = XLSX_DOCUMENT_ASYNC_TASK_MANAGER_BEAN_NAME)
|
||||
public DocumentAsyncTaskManager xlsxDocumentAsyncTaskManager() {
|
||||
ExecutorService executorService = Executors.newFixedThreadPool(DEFAULT_ASYNC_THREADS);
|
||||
return new DocumentAsyncTaskManager(new InMemoryDocumentAsyncTaskRepository(), executorService);
|
||||
}
|
||||
|
||||
@Bean
|
||||
@ConditionalOnMissingBean(XlsxDocumentParseService.class)
|
||||
public XlsxDocumentParseService xlsxDocumentParseService(CommonMineruDocumentProperties commonProperties,
|
||||
@Qualifier(XLSX_DOCUMENT_ASYNC_TASK_MANAGER_BEAN_NAME)
|
||||
DocumentAsyncTaskManager xlsxDocumentAsyncTaskManager) {
|
||||
return new MineruXlsxDocumentParseService(toMineruProperties(commonProperties), xlsxDocumentAsyncTaskManager);
|
||||
}
|
||||
|
||||
@@ -1,32 +0,0 @@
|
||||
package com.easyagents.spring.boot.document.xlsx;
|
||||
|
||||
import org.springframework.boot.context.properties.ConfigurationProperties;
|
||||
|
||||
/**
|
||||
* XLSX 文档配置。
|
||||
*
|
||||
* @author Codex
|
||||
* @since 2026-04-16
|
||||
*/
|
||||
@ConfigurationProperties(prefix = "easy-agents.document.xlsx")
|
||||
public class XlsxDocumentProperties {
|
||||
|
||||
private Boolean enabled = false;
|
||||
private Integer asyncThreads = 2;
|
||||
|
||||
public Boolean getEnabled() {
|
||||
return enabled;
|
||||
}
|
||||
|
||||
public void setEnabled(Boolean enabled) {
|
||||
this.enabled = enabled;
|
||||
}
|
||||
|
||||
public Integer getAsyncThreads() {
|
||||
return asyncThreads;
|
||||
}
|
||||
|
||||
public void setAsyncThreads(Integer asyncThreads) {
|
||||
this.asyncThreads = asyncThreads;
|
||||
}
|
||||
}
|
||||
@@ -1,6 +1,10 @@
|
||||
package com.easyagents.spring.boot.autoconfigure;
|
||||
|
||||
import com.easyagents.document.core.DocumentParseService;
|
||||
import com.easyagents.document.core.entity.ParseRequest;
|
||||
import com.easyagents.document.core.entity.ParseResponse;
|
||||
import com.easyagents.document.core.entity.ParseTaskInfo;
|
||||
import com.easyagents.document.core.entity.ParseTaskStatus;
|
||||
import com.easyagents.document.pdf.PdfDocumentParseService;
|
||||
import com.easyagents.document.pptx.PptxDocumentParseService;
|
||||
import com.easyagents.document.xlsx.XlsxDocumentParseService;
|
||||
@@ -13,6 +17,8 @@ import com.easyagents.spring.boot.rag.ingestion.RagIngestionAutoConfiguration;
|
||||
import com.easyagents.spring.boot.store.opensearch.OpenSearchAutoConfiguration;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.context.annotation.Configuration;
|
||||
import org.springframework.boot.test.context.runner.ApplicationContextRunner;
|
||||
|
||||
public class StarterConditionalAutoConfigurationTest {
|
||||
@@ -49,27 +55,105 @@ public class StarterConditionalAutoConfigurationTest {
|
||||
public void shouldCreateMineruDocumentBeansWhenConfigured() {
|
||||
contextRunner
|
||||
.withPropertyValues(
|
||||
"easy-agents.document.pdf.provider=mineru",
|
||||
"easy-agents.document.pdf.mineru.base-url=https://hub.wust.edu.cn/modelServer/mineru-api"
|
||||
"easy-agents.document.ocr.provider=mineru",
|
||||
"easy-agents.document.ocr.mineru.base-url=https://hub.wust.edu.cn/modelServer/mineru-api"
|
||||
)
|
||||
.run(context -> {
|
||||
Assert.assertNotNull(context.getBean(PdfDocumentParseService.class));
|
||||
Assert.assertNotNull(context.getBean(PptxDocumentParseService.class));
|
||||
Assert.assertNotNull(context.getBean(XlsxDocumentParseService.class));
|
||||
Assert.assertNotNull(context.getBean(DocumentParseService.class));
|
||||
});
|
||||
}
|
||||
|
||||
@Test
|
||||
public void shouldCreatePptxAndXlsxBeansWhenEnabled() {
|
||||
public void shouldCreatePptxAndXlsxBeansWhenMineruOcrConfigured() {
|
||||
contextRunner
|
||||
.withPropertyValues(
|
||||
"easy-agents.document.mineru.base-url=https://hub.wust.edu.cn/modelServer/mineru-api",
|
||||
"easy-agents.document.pptx.enabled=true",
|
||||
"easy-agents.document.xlsx.enabled=true"
|
||||
"easy-agents.document.ocr.provider=mineru",
|
||||
"easy-agents.document.ocr.mineru.base-url=https://hub.wust.edu.cn/modelServer/mineru-api"
|
||||
)
|
||||
.run(context -> {
|
||||
Assert.assertNotNull(context.getBean(PptxDocumentParseService.class));
|
||||
Assert.assertNotNull(context.getBean(XlsxDocumentParseService.class));
|
||||
Assert.assertFalse(context.containsBean("documentParseService"));
|
||||
Assert.assertNotNull(context.getBean(DocumentParseService.class));
|
||||
});
|
||||
}
|
||||
|
||||
@Test
|
||||
public void shouldKeepPdfAsDefaultDocumentParseServiceWhenMineruOcrConfigured() {
|
||||
contextRunner
|
||||
.withPropertyValues(
|
||||
"easy-agents.document.ocr.provider=mineru",
|
||||
"easy-agents.document.ocr.mineru.base-url=https://hub.wust.edu.cn/modelServer/mineru-api"
|
||||
)
|
||||
.run(context -> {
|
||||
PdfDocumentParseService pdfService = context.getBean(PdfDocumentParseService.class);
|
||||
Assert.assertNotNull(pdfService);
|
||||
Assert.assertNotNull(context.getBean(PptxDocumentParseService.class));
|
||||
Assert.assertNotNull(context.getBean(XlsxDocumentParseService.class));
|
||||
Assert.assertSame(pdfService, context.getBean("documentParseService"));
|
||||
Assert.assertSame(pdfService, context.getBean(DocumentParseService.class));
|
||||
});
|
||||
}
|
||||
|
||||
@Test
|
||||
public void shouldAliasCustomNamedPdfServiceAsDefaultDocumentParseService() {
|
||||
new ApplicationContextRunner()
|
||||
.withUserConfiguration(CustomPdfParseServiceConfiguration.class)
|
||||
.withUserConfiguration(
|
||||
RagIngestionAutoConfiguration.class,
|
||||
OllamaAutoConfiguration.class,
|
||||
OpenSearchAutoConfiguration.class,
|
||||
MineruPdfAutoConfiguration.class,
|
||||
MineruPptxAutoConfiguration.class,
|
||||
MineruXlsxAutoConfiguration.class
|
||||
)
|
||||
.withPropertyValues(
|
||||
"easy-agents.document.ocr.provider=mineru",
|
||||
"easy-agents.document.ocr.mineru.base-url=https://hub.wust.edu.cn/modelServer/mineru-api"
|
||||
)
|
||||
.run(context -> {
|
||||
PdfDocumentParseService pdfService = context.getBean(PdfDocumentParseService.class);
|
||||
Assert.assertSame(pdfService, context.getBean("documentParseService"));
|
||||
Assert.assertSame(pdfService, context.getBean(DocumentParseService.class));
|
||||
});
|
||||
}
|
||||
|
||||
@Configuration(proxyBeanMethods = false)
|
||||
static class CustomPdfParseServiceConfiguration {
|
||||
|
||||
@Bean("customPdfService")
|
||||
PdfDocumentParseService customPdfService() {
|
||||
return new NoopPdfDocumentParseService();
|
||||
}
|
||||
}
|
||||
|
||||
static class NoopPdfDocumentParseService implements PdfDocumentParseService {
|
||||
|
||||
@Override
|
||||
public ParseResponse parse(ParseRequest request) {
|
||||
return new ParseResponse();
|
||||
}
|
||||
|
||||
@Override
|
||||
public ParseTaskStatus submit(ParseRequest request) {
|
||||
return new ParseTaskStatus();
|
||||
}
|
||||
|
||||
@Override
|
||||
public ParseTaskStatus queryTask(String taskId) {
|
||||
return new ParseTaskStatus();
|
||||
}
|
||||
|
||||
@Override
|
||||
public ParseResponse queryResult(String taskId) {
|
||||
return new ParseResponse();
|
||||
}
|
||||
|
||||
@Override
|
||||
public ParseTaskInfo queryTaskInfo(String taskId) {
|
||||
return new ParseTaskInfo();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user