feat: 支持知识库导入 PPTX 与 XLSX 文档
- 打通 Office 文档桥接解析、解析进度承接与图片引用改写 - 落地 PPTX 按页分块、XLSX 行窗口分块以及预览与检索渲染闭环
This commit is contained in:
@@ -407,6 +407,8 @@ public class DocumentCollectionController extends BaseCurdController<DocumentCol
|
|||||||
KnowledgeSearchResultItem item = new KnowledgeSearchResultItem();
|
KnowledgeSearchResultItem item = new KnowledgeSearchResultItem();
|
||||||
item.setSorting(index + 1);
|
item.setSorting(index + 1);
|
||||||
item.setContent(document.getContent());
|
item.setContent(document.getContent());
|
||||||
|
item.setRenderMarkdown(readMetadataAsString(document, "renderMarkdown"));
|
||||||
|
item.setSourceFileName(readMetadataAsString(document, "sourceFileName"));
|
||||||
item.setScore(roundScore(document.getScore()));
|
item.setScore(roundScore(document.getScore()));
|
||||||
item.setHitSource(readMetadataAsString(document, RagRetrievalMetadataKeys.HIT_SOURCE));
|
item.setHitSource(readMetadataAsString(document, RagRetrievalMetadataKeys.HIT_SOURCE));
|
||||||
item.setVectorScore(roundScore(readMetadataAsDouble(document, RagRetrievalMetadataKeys.VECTOR_SCORE)));
|
item.setVectorScore(roundScore(readMetadataAsDouble(document, RagRetrievalMetadataKeys.VECTOR_SCORE)));
|
||||||
|
|||||||
@@ -932,6 +932,10 @@ public class ShareKnowledgeController {
|
|||||||
KnowledgeSearchResultItem item = new KnowledgeSearchResultItem();
|
KnowledgeSearchResultItem item = new KnowledgeSearchResultItem();
|
||||||
item.setSorting(index + 1);
|
item.setSorting(index + 1);
|
||||||
item.setContent(document.getContent());
|
item.setContent(document.getContent());
|
||||||
|
Object renderMarkdown = document.getMetadata("renderMarkdown");
|
||||||
|
item.setRenderMarkdown(renderMarkdown == null ? null : String.valueOf(renderMarkdown));
|
||||||
|
Object sourceFileName = document.getMetadata("sourceFileName");
|
||||||
|
item.setSourceFileName(sourceFileName == null ? null : String.valueOf(sourceFileName));
|
||||||
item.setScore(document.getScore() == null ? null : document.getScore().doubleValue());
|
item.setScore(document.getScore() == null ? null : document.getScore().doubleValue());
|
||||||
Object hitSource = document.getMetadata("hitSource");
|
Object hitSource = document.getMetadata("hitSource");
|
||||||
item.setHitSource(hitSource == null ? null : String.valueOf(hitSource));
|
item.setHitSource(hitSource == null ? null : String.valueOf(hitSource));
|
||||||
|
|||||||
@@ -638,6 +638,10 @@ public class PublicKnowledgeShareController {
|
|||||||
for (com.easyagents.core.document.Document document : documents) {
|
for (com.easyagents.core.document.Document document : documents) {
|
||||||
KnowledgeSearchResultItem item = new KnowledgeSearchResultItem();
|
KnowledgeSearchResultItem item = new KnowledgeSearchResultItem();
|
||||||
item.setContent(document.getContent());
|
item.setContent(document.getContent());
|
||||||
|
Object renderMarkdown = document.getMetadata("renderMarkdown");
|
||||||
|
item.setRenderMarkdown(renderMarkdown == null ? null : String.valueOf(renderMarkdown));
|
||||||
|
Object sourceFileName = document.getMetadata("sourceFileName");
|
||||||
|
item.setSourceFileName(sourceFileName == null ? null : String.valueOf(sourceFileName));
|
||||||
item.setScore(document.getScore());
|
item.setScore(document.getScore());
|
||||||
Object hitSource = document.getMetadata("hitSource");
|
Object hitSource = document.getMetadata("hitSource");
|
||||||
item.setHitSource(hitSource == null ? null : String.valueOf(hitSource));
|
item.setHitSource(hitSource == null ? null : String.valueOf(hitSource));
|
||||||
|
|||||||
@@ -112,7 +112,6 @@
|
|||||||
<groupId>com.easyagents</groupId>
|
<groupId>com.easyagents</groupId>
|
||||||
<artifactId>easy-agents-mcp</artifactId>
|
<artifactId>easy-agents-mcp</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>junit</groupId>
|
<groupId>junit</groupId>
|
||||||
<artifactId>junit</artifactId>
|
<artifactId>junit</artifactId>
|
||||||
|
|||||||
@@ -35,10 +35,14 @@ public class DocumentParseBridgeException extends RuntimeException {
|
|||||||
public static DocumentParseBridgeException serviceNotEnabled() {
|
public static DocumentParseBridgeException serviceNotEnabled() {
|
||||||
return new DocumentParseBridgeException(
|
return new DocumentParseBridgeException(
|
||||||
"service_not_enabled",
|
"service_not_enabled",
|
||||||
"统一文档解析服务未启用,请先配置 easy-agents.document.pdf.provider"
|
"统一文档解析服务未启用,请先配置 easy-agents.document.ocr.provider=mineru"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static DocumentParseBridgeException serviceNotEnabled(String message) {
|
||||||
|
return new DocumentParseBridgeException("service_not_enabled", message);
|
||||||
|
}
|
||||||
|
|
||||||
public static DocumentParseBridgeException unsupportedSource(String message) {
|
public static DocumentParseBridgeException unsupportedSource(String message) {
|
||||||
return new DocumentParseBridgeException("unsupported_source", message);
|
return new DocumentParseBridgeException("unsupported_source", message);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -22,6 +22,11 @@ public class DocumentParseTaskStatus {
|
|||||||
private String statusUrl;
|
private String statusUrl;
|
||||||
private String resultUrl;
|
private String resultUrl;
|
||||||
private Integer queuedAhead;
|
private Integer queuedAhead;
|
||||||
|
private Integer progressPercent;
|
||||||
|
private String currentStage;
|
||||||
|
private Integer processedItems;
|
||||||
|
private Integer totalItems;
|
||||||
|
private String statusMessage;
|
||||||
|
|
||||||
public String getTaskId() {
|
public String getTaskId() {
|
||||||
return taskId;
|
return taskId;
|
||||||
@@ -110,4 +115,44 @@ public class DocumentParseTaskStatus {
|
|||||||
public void setQueuedAhead(Integer queuedAhead) {
|
public void setQueuedAhead(Integer queuedAhead) {
|
||||||
this.queuedAhead = queuedAhead;
|
this.queuedAhead = queuedAhead;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public Integer getProgressPercent() {
|
||||||
|
return progressPercent;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setProgressPercent(Integer progressPercent) {
|
||||||
|
this.progressPercent = progressPercent;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getCurrentStage() {
|
||||||
|
return currentStage;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setCurrentStage(String currentStage) {
|
||||||
|
this.currentStage = currentStage;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Integer getProcessedItems() {
|
||||||
|
return processedItems;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setProcessedItems(Integer processedItems) {
|
||||||
|
this.processedItems = processedItems;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Integer getTotalItems() {
|
||||||
|
return totalItems;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setTotalItems(Integer totalItems) {
|
||||||
|
this.totalItems = totalItems;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getStatusMessage() {
|
||||||
|
return statusMessage;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setStatusMessage(String statusMessage) {
|
||||||
|
this.statusMessage = statusMessage;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -5,6 +5,10 @@ import com.easyagents.document.core.entity.ParseResponse;
|
|||||||
import com.easyagents.document.core.entity.ParseResult;
|
import com.easyagents.document.core.entity.ParseResult;
|
||||||
import com.easyagents.document.core.entity.ParseTaskInfo;
|
import com.easyagents.document.core.entity.ParseTaskInfo;
|
||||||
import com.easyagents.document.core.entity.ParseTaskStatus;
|
import com.easyagents.document.core.entity.ParseTaskStatus;
|
||||||
|
import com.easyagents.document.pdf.PdfDocumentParseService;
|
||||||
|
import com.easyagents.document.pptx.PptxDocumentParseService;
|
||||||
|
import com.easyagents.document.xlsx.XlsxDocumentParseService;
|
||||||
|
import org.springframework.beans.factory.annotation.Qualifier;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
import org.springframework.lang.Nullable;
|
import org.springframework.lang.Nullable;
|
||||||
@@ -20,8 +24,13 @@ import tech.easyflow.ai.document.service.DocumentParseBridgeService;
|
|||||||
import tech.easyflow.ai.document.support.DocumentSourceLoader;
|
import tech.easyflow.ai.document.support.DocumentSourceLoader;
|
||||||
import tech.easyflow.ai.document.support.DocumentParseRequestFactory;
|
import tech.easyflow.ai.document.support.DocumentParseRequestFactory;
|
||||||
import tech.easyflow.ai.document.support.DocumentParseResultMapper;
|
import tech.easyflow.ai.document.support.DocumentParseResultMapper;
|
||||||
|
import tech.easyflow.ai.document.support.DocumentParseSourceType;
|
||||||
import tech.easyflow.ai.document.support.LoadedDocumentSource;
|
import tech.easyflow.ai.document.support.LoadedDocumentSource;
|
||||||
import tech.easyflow.ai.utils.DocUtil;
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.LinkedHashSet;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.function.Function;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 统一文档解析桥接门面默认实现。
|
* 统一文档解析桥接门面默认实现。
|
||||||
@@ -33,18 +42,33 @@ import tech.easyflow.ai.utils.DocUtil;
|
|||||||
public class DocumentParseBridgeServiceImpl implements DocumentParseBridgeService {
|
public class DocumentParseBridgeServiceImpl implements DocumentParseBridgeService {
|
||||||
|
|
||||||
private static final Logger LOG = LoggerFactory.getLogger(DocumentParseBridgeServiceImpl.class);
|
private static final Logger LOG = LoggerFactory.getLogger(DocumentParseBridgeServiceImpl.class);
|
||||||
|
private static final String DEFAULT_DOCUMENT_PARSE_SERVICE_BEAN_NAME = "documentParseService";
|
||||||
|
|
||||||
@Nullable
|
@Nullable
|
||||||
private final DocumentParseService documentParseService;
|
private final DocumentParseService defaultDocumentParseService;
|
||||||
|
@Nullable
|
||||||
|
private final PdfDocumentParseService pdfDocumentParseService;
|
||||||
|
@Nullable
|
||||||
|
private final PptxDocumentParseService pptxDocumentParseService;
|
||||||
|
@Nullable
|
||||||
|
private final XlsxDocumentParseService xlsxDocumentParseService;
|
||||||
private final DocumentSourceLoader documentSourceLoader;
|
private final DocumentSourceLoader documentSourceLoader;
|
||||||
private final DocumentParseRequestFactory parseRequestFactory;
|
private final DocumentParseRequestFactory parseRequestFactory;
|
||||||
private final DocumentParseResultMapper parseResultMapper;
|
private final DocumentParseResultMapper parseResultMapper;
|
||||||
|
|
||||||
public DocumentParseBridgeServiceImpl(@Nullable DocumentParseService documentParseService,
|
public DocumentParseBridgeServiceImpl(@Nullable
|
||||||
|
@Qualifier(DEFAULT_DOCUMENT_PARSE_SERVICE_BEAN_NAME)
|
||||||
|
DocumentParseService defaultDocumentParseService,
|
||||||
|
@Nullable PdfDocumentParseService pdfDocumentParseService,
|
||||||
|
@Nullable PptxDocumentParseService pptxDocumentParseService,
|
||||||
|
@Nullable XlsxDocumentParseService xlsxDocumentParseService,
|
||||||
DocumentSourceLoader documentSourceLoader,
|
DocumentSourceLoader documentSourceLoader,
|
||||||
DocumentParseRequestFactory parseRequestFactory,
|
DocumentParseRequestFactory parseRequestFactory,
|
||||||
DocumentParseResultMapper parseResultMapper) {
|
DocumentParseResultMapper parseResultMapper) {
|
||||||
this.documentParseService = documentParseService;
|
this.defaultDocumentParseService = defaultDocumentParseService;
|
||||||
|
this.pdfDocumentParseService = pdfDocumentParseService;
|
||||||
|
this.pptxDocumentParseService = pptxDocumentParseService;
|
||||||
|
this.xlsxDocumentParseService = xlsxDocumentParseService;
|
||||||
this.documentSourceLoader = documentSourceLoader;
|
this.documentSourceLoader = documentSourceLoader;
|
||||||
this.parseRequestFactory = parseRequestFactory;
|
this.parseRequestFactory = parseRequestFactory;
|
||||||
this.parseResultMapper = parseResultMapper;
|
this.parseResultMapper = parseResultMapper;
|
||||||
@@ -59,7 +83,8 @@ public class DocumentParseBridgeServiceImpl implements DocumentParseBridgeServic
|
|||||||
LoadedDocumentSource loadedSource = prepareSupportedSource(source);
|
LoadedDocumentSource loadedSource = prepareSupportedSource(source);
|
||||||
LOG.info("桥接服务开始同步解析文档: fileName={}, contentType={}, scenario={}",
|
LOG.info("桥接服务开始同步解析文档: fileName={}, contentType={}, scenario={}",
|
||||||
loadedSource.getFileName(), loadedSource.getContentType(), scenario);
|
loadedSource.getFileName(), loadedSource.getContentType(), scenario);
|
||||||
ParseResponse response = requireService().parse(parseRequestFactory.build(loadedSource, scenario));
|
DocumentParseService parseService = resolveService(loadedSource);
|
||||||
|
ParseResponse response = parseService.parse(parseRequestFactory.build(loadedSource, scenario));
|
||||||
DocumentParsedResult result = parseResultMapper.map(extractSingleResult(response, false));
|
DocumentParsedResult result = parseResultMapper.map(extractSingleResult(response, false));
|
||||||
LOG.info("桥接服务同步解析完成: fileName={}, scenario={}, preferredTextLength={}",
|
LOG.info("桥接服务同步解析完成: fileName={}, scenario={}, preferredTextLength={}",
|
||||||
loadedSource.getFileName(), scenario, resolveTextLength(result));
|
loadedSource.getFileName(), scenario, resolveTextLength(result));
|
||||||
@@ -84,7 +109,8 @@ public class DocumentParseBridgeServiceImpl implements DocumentParseBridgeServic
|
|||||||
LoadedDocumentSource loadedSource = prepareSupportedSource(source);
|
LoadedDocumentSource loadedSource = prepareSupportedSource(source);
|
||||||
LOG.info("桥接服务开始提交异步解析任务: fileName={}, contentType={}, scenario={}",
|
LOG.info("桥接服务开始提交异步解析任务: fileName={}, contentType={}, scenario={}",
|
||||||
loadedSource.getFileName(), loadedSource.getContentType(), scenario);
|
loadedSource.getFileName(), loadedSource.getContentType(), scenario);
|
||||||
ParseTaskStatus taskStatus = requireService().submit(parseRequestFactory.build(loadedSource, scenario));
|
DocumentParseService parseService = resolveService(loadedSource);
|
||||||
|
ParseTaskStatus taskStatus = parseService.submit(parseRequestFactory.build(loadedSource, scenario));
|
||||||
DocumentParseTaskStatus mappedStatus = parseResultMapper.map(taskStatus);
|
DocumentParseTaskStatus mappedStatus = parseResultMapper.map(taskStatus);
|
||||||
LOG.info("桥接服务异步解析任务提交完成: fileName={}, scenario={}, providerTaskId={}, status={}",
|
LOG.info("桥接服务异步解析任务提交完成: fileName={}, scenario={}, providerTaskId={}, status={}",
|
||||||
loadedSource.getFileName(), scenario, mappedStatus.getTaskId(), mappedStatus.getStatus());
|
loadedSource.getFileName(), scenario, mappedStatus.getTaskId(), mappedStatus.getStatus());
|
||||||
@@ -109,7 +135,8 @@ public class DocumentParseBridgeServiceImpl implements DocumentParseBridgeServic
|
|||||||
throw DocumentParseBridgeException.taskFailed("taskId 不能为空");
|
throw DocumentParseBridgeException.taskFailed("taskId 不能为空");
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
return parseResultMapper.map(requireService().queryTask(taskId));
|
ParseTaskStatus taskStatus = executeAgainstTaskService(taskId, service -> service.queryTask(taskId));
|
||||||
|
return parseResultMapper.map(taskStatus);
|
||||||
} catch (DocumentParseBridgeException e) {
|
} catch (DocumentParseBridgeException e) {
|
||||||
throw e;
|
throw e;
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
@@ -127,7 +154,7 @@ public class DocumentParseBridgeServiceImpl implements DocumentParseBridgeServic
|
|||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
LOG.info("桥接服务开始获取异步解析结果: providerTaskId={}", taskId);
|
LOG.info("桥接服务开始获取异步解析结果: providerTaskId={}", taskId);
|
||||||
ParseResponse response = requireService().queryResult(taskId);
|
ParseResponse response = executeAgainstTaskService(taskId, service -> service.queryResult(taskId));
|
||||||
DocumentParsedResult result = parseResultMapper.map(extractSingleResult(response, true));
|
DocumentParsedResult result = parseResultMapper.map(extractSingleResult(response, true));
|
||||||
LOG.info("桥接服务获取异步解析结果完成: providerTaskId={}, preferredTextLength={}",
|
LOG.info("桥接服务获取异步解析结果完成: providerTaskId={}, preferredTextLength={}",
|
||||||
taskId, resolveTextLength(result));
|
taskId, resolveTextLength(result));
|
||||||
@@ -150,7 +177,7 @@ public class DocumentParseBridgeServiceImpl implements DocumentParseBridgeServic
|
|||||||
throw DocumentParseBridgeException.taskFailed("taskId 不能为空");
|
throw DocumentParseBridgeException.taskFailed("taskId 不能为空");
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
ParseTaskInfo taskInfo = requireService().queryTaskInfo(taskId);
|
ParseTaskInfo taskInfo = executeAgainstTaskService(taskId, service -> service.queryTaskInfo(taskId));
|
||||||
DocumentParseTaskInfo mappedTaskInfo = parseResultMapper.map(taskInfo);
|
DocumentParseTaskInfo mappedTaskInfo = parseResultMapper.map(taskInfo);
|
||||||
LOG.info("桥接服务查询异步解析任务状态: providerTaskId={}, status={}, hasResult={}",
|
LOG.info("桥接服务查询异步解析任务状态: providerTaskId={}, status={}, hasResult={}",
|
||||||
taskId,
|
taskId,
|
||||||
@@ -177,39 +204,84 @@ public class DocumentParseBridgeServiceImpl implements DocumentParseBridgeServic
|
|||||||
return text == null ? 0 : text.length();
|
return text == null ? 0 : text.length();
|
||||||
}
|
}
|
||||||
|
|
||||||
private DocumentParseService requireService() {
|
|
||||||
if (documentParseService == null) {
|
|
||||||
throw DocumentParseBridgeException.serviceNotEnabled();
|
|
||||||
}
|
|
||||||
return documentParseService;
|
|
||||||
}
|
|
||||||
|
|
||||||
private LoadedDocumentSource prepareSupportedSource(DocumentSourceRef source) {
|
private LoadedDocumentSource prepareSupportedSource(DocumentSourceRef source) {
|
||||||
LoadedDocumentSource loadedSource = documentSourceLoader.load(source);
|
LoadedDocumentSource loadedSource = documentSourceLoader.load(source);
|
||||||
if (!isSupportedByBridge(loadedSource)) {
|
if (!isSupportedByBridge(loadedSource)) {
|
||||||
throw DocumentParseBridgeException.unsupportedSource("统一文档解析桥接当前仅支持 PDF、DOCX 文件");
|
throw DocumentParseBridgeException.unsupportedSource("统一文档解析桥接当前仅支持 PDF、DOCX、PPTX、XLSX 文件");
|
||||||
}
|
}
|
||||||
return loadedSource;
|
return loadedSource;
|
||||||
}
|
}
|
||||||
|
|
||||||
private boolean isSupportedByBridge(LoadedDocumentSource loadedSource) {
|
private boolean isSupportedByBridge(LoadedDocumentSource loadedSource) {
|
||||||
String contentType = loadedSource.getContentType();
|
return DocumentParseSourceType.resolve(loadedSource.getFileName(), loadedSource.getContentType()) != DocumentParseSourceType.UNSUPPORTED;
|
||||||
if (StringUtils.hasText(contentType)) {
|
}
|
||||||
String normalizedContentType = contentType.toLowerCase();
|
|
||||||
if (normalizedContentType.contains("pdf")
|
private DocumentParseService resolveService(LoadedDocumentSource loadedSource) {
|
||||||
|| normalizedContentType.contains("wordprocessingml.document")) {
|
DocumentParseSourceType sourceType = DocumentParseSourceType.resolve(loadedSource.getFileName(), loadedSource.getContentType());
|
||||||
return true;
|
switch (sourceType) {
|
||||||
|
case PDF:
|
||||||
|
return requireSpecificService(pdfDocumentParseService, defaultDocumentParseService, "PDF");
|
||||||
|
case DOCX:
|
||||||
|
return requireSpecificService(defaultDocumentParseService, pdfDocumentParseService, "DOCX");
|
||||||
|
case PPTX:
|
||||||
|
return requireSpecificService(pptxDocumentParseService, null, "PPTX");
|
||||||
|
case XLSX:
|
||||||
|
return requireSpecificService(xlsxDocumentParseService, null, "XLSX");
|
||||||
|
default:
|
||||||
|
throw DocumentParseBridgeException.unsupportedSource("当前文件类型暂不支持桥接解析");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private DocumentParseService requireSpecificService(@Nullable DocumentParseService primaryService,
|
||||||
|
@Nullable DocumentParseService fallbackService,
|
||||||
|
String sourceType) {
|
||||||
|
if (primaryService != null) {
|
||||||
|
return primaryService;
|
||||||
|
}
|
||||||
|
if (fallbackService != null) {
|
||||||
|
return fallbackService;
|
||||||
|
}
|
||||||
|
throw DocumentParseBridgeException.serviceNotEnabled("未启用 " + sourceType + " 文档解析服务");
|
||||||
|
}
|
||||||
|
|
||||||
|
private <T> T executeAgainstTaskService(String taskId, Function<DocumentParseService, T> action) {
|
||||||
|
List<DocumentParseService> services = availableServices();
|
||||||
|
if (services.isEmpty()) {
|
||||||
|
throw DocumentParseBridgeException.serviceNotEnabled();
|
||||||
|
}
|
||||||
|
Exception lastException = null;
|
||||||
|
for (DocumentParseService service : services) {
|
||||||
|
try {
|
||||||
|
return action.apply(service);
|
||||||
|
} catch (Exception exception) {
|
||||||
|
lastException = exception;
|
||||||
|
LOG.debug("桥接服务任务查询尝试失败,准备切换下一个解析服务: taskId={}, service={}",
|
||||||
|
taskId,
|
||||||
|
service.getClass().getSimpleName(),
|
||||||
|
exception);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
String fileName = loadedSource.getFileName();
|
if (lastException instanceof RuntimeException) {
|
||||||
if (!StringUtils.hasText(fileName) || !fileName.contains(".")) {
|
throw (RuntimeException) lastException;
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
String suffix = DocUtil.normalizeSuffix(DocUtil.getSuffix(fileName));
|
throw DocumentParseBridgeException.taskFailed("未找到可处理当前任务ID的文档解析服务", lastException);
|
||||||
if ("pdf".equals(suffix) || "docx".equals(suffix)) {
|
}
|
||||||
return true;
|
|
||||||
|
private List<DocumentParseService> availableServices() {
|
||||||
|
LinkedHashSet<DocumentParseService> services = new LinkedHashSet<DocumentParseService>();
|
||||||
|
if (pptxDocumentParseService != null) {
|
||||||
|
services.add(pptxDocumentParseService);
|
||||||
}
|
}
|
||||||
return false;
|
if (xlsxDocumentParseService != null) {
|
||||||
|
services.add(xlsxDocumentParseService);
|
||||||
|
}
|
||||||
|
if (pdfDocumentParseService != null) {
|
||||||
|
services.add(pdfDocumentParseService);
|
||||||
|
}
|
||||||
|
if (defaultDocumentParseService != null) {
|
||||||
|
services.add(defaultDocumentParseService);
|
||||||
|
}
|
||||||
|
return new ArrayList<DocumentParseService>(services);
|
||||||
}
|
}
|
||||||
|
|
||||||
private ParseResult extractSingleResult(ParseResponse response, boolean resultFetchPhase) {
|
private ParseResult extractSingleResult(ParseResponse response, boolean resultFetchPhase) {
|
||||||
|
|||||||
@@ -2,6 +2,9 @@ package tech.easyflow.ai.document.support;
|
|||||||
|
|
||||||
import com.easyagents.document.core.entity.ParseFile;
|
import com.easyagents.document.core.entity.ParseFile;
|
||||||
import com.easyagents.document.core.entity.ParseRequest;
|
import com.easyagents.document.core.entity.ParseRequest;
|
||||||
|
import com.easyagents.document.core.entity.PdfParseRequest;
|
||||||
|
import com.easyagents.document.core.entity.PptxParseRequest;
|
||||||
|
import com.easyagents.document.core.entity.XlsxParseRequest;
|
||||||
import org.springframework.stereotype.Component;
|
import org.springframework.stereotype.Component;
|
||||||
import tech.easyflow.ai.document.exception.DocumentParseBridgeException;
|
import tech.easyflow.ai.document.exception.DocumentParseBridgeException;
|
||||||
import tech.easyflow.ai.document.model.DocumentParseScenario;
|
import tech.easyflow.ai.document.model.DocumentParseScenario;
|
||||||
@@ -31,12 +34,28 @@ public class DocumentParseRequestFactory {
|
|||||||
if (scenario == null) {
|
if (scenario == null) {
|
||||||
throw DocumentParseBridgeException.requestBuildFailed("解析场景不能为空");
|
throw DocumentParseBridgeException.requestBuildFailed("解析场景不能为空");
|
||||||
}
|
}
|
||||||
ParseRequest request = new ParseRequest();
|
ParseRequest request = createTypedRequest(source);
|
||||||
request.addFile(ParseFile.of(source.getFileName(), source.getContentBytes(), source.getContentType()));
|
request.addFile(ParseFile.of(source.getFileName(), source.getContentBytes(), source.getContentType()));
|
||||||
applyScenario(request, scenario);
|
applyScenario(request, scenario);
|
||||||
return request;
|
return request;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private ParseRequest createTypedRequest(LoadedDocumentSource source) {
|
||||||
|
DocumentParseSourceType sourceType = DocumentParseSourceType.resolve(source.getFileName(), source.getContentType());
|
||||||
|
switch (sourceType) {
|
||||||
|
case PDF:
|
||||||
|
return new PdfParseRequest();
|
||||||
|
case PPTX:
|
||||||
|
return new PptxParseRequest();
|
||||||
|
case XLSX:
|
||||||
|
return new XlsxParseRequest();
|
||||||
|
case DOCX:
|
||||||
|
return new ParseRequest();
|
||||||
|
default:
|
||||||
|
throw DocumentParseBridgeException.requestBuildFailed("当前文件类型暂不支持桥接解析");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private void applyScenario(ParseRequest request, DocumentParseScenario scenario) {
|
private void applyScenario(ParseRequest request, DocumentParseScenario scenario) {
|
||||||
switch (scenario) {
|
switch (scenario) {
|
||||||
case WORKFLOW_TEXT:
|
case WORKFLOW_TEXT:
|
||||||
|
|||||||
@@ -69,6 +69,11 @@ public class DocumentParseResultMapper {
|
|||||||
status.setStatusUrl(taskStatus.getStatusUrl());
|
status.setStatusUrl(taskStatus.getStatusUrl());
|
||||||
status.setResultUrl(taskStatus.getResultUrl());
|
status.setResultUrl(taskStatus.getResultUrl());
|
||||||
status.setQueuedAhead(taskStatus.getQueuedAhead());
|
status.setQueuedAhead(taskStatus.getQueuedAhead());
|
||||||
|
status.setProgressPercent(taskStatus.getProgressPercent());
|
||||||
|
status.setCurrentStage(taskStatus.getCurrentStage());
|
||||||
|
status.setProcessedItems(taskStatus.getProcessedItems());
|
||||||
|
status.setTotalItems(taskStatus.getTotalItems());
|
||||||
|
status.setStatusMessage(taskStatus.getStatusMessage());
|
||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -104,6 +109,11 @@ public class DocumentParseResultMapper {
|
|||||||
status.setStatusUrl(taskStatus.getStatusUrl());
|
status.setStatusUrl(taskStatus.getStatusUrl());
|
||||||
status.setResultUrl(taskStatus.getResultUrl());
|
status.setResultUrl(taskStatus.getResultUrl());
|
||||||
status.setQueuedAhead(taskStatus.getQueuedAhead());
|
status.setQueuedAhead(taskStatus.getQueuedAhead());
|
||||||
|
status.setProgressPercent(taskStatus.getProgressPercent());
|
||||||
|
status.setCurrentStage(taskStatus.getCurrentStage());
|
||||||
|
status.setProcessedItems(taskStatus.getProcessedItems());
|
||||||
|
status.setTotalItems(taskStatus.getTotalItems());
|
||||||
|
status.setStatusMessage(taskStatus.getStatusMessage());
|
||||||
}
|
}
|
||||||
|
|
||||||
private String resolvePreferredText(ParseResult parseResult) {
|
private String resolvePreferredText(ParseResult parseResult) {
|
||||||
|
|||||||
@@ -0,0 +1,70 @@
|
|||||||
|
package tech.easyflow.ai.document.support;
|
||||||
|
|
||||||
|
import org.springframework.util.StringUtils;
|
||||||
|
import tech.easyflow.ai.utils.DocUtil;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 统一文档解析桥接支持的源文件类型。
|
||||||
|
*
|
||||||
|
* @author Codex
|
||||||
|
* @since 2026-04-17
|
||||||
|
*/
|
||||||
|
public enum DocumentParseSourceType {
|
||||||
|
|
||||||
|
PDF,
|
||||||
|
DOCX,
|
||||||
|
PPTX,
|
||||||
|
XLSX,
|
||||||
|
UNSUPPORTED;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 根据文件名与内容类型推断文档类型。
|
||||||
|
*
|
||||||
|
* @param fileName 文件名
|
||||||
|
* @param contentType MIME 类型
|
||||||
|
* @return 文档类型
|
||||||
|
*/
|
||||||
|
public static DocumentParseSourceType resolve(String fileName, String contentType) {
|
||||||
|
if (StringUtils.hasText(contentType)) {
|
||||||
|
String normalizedContentType = contentType.toLowerCase();
|
||||||
|
if (normalizedContentType.contains("pdf")) {
|
||||||
|
return PDF;
|
||||||
|
}
|
||||||
|
if (normalizedContentType.contains("wordprocessingml.document")) {
|
||||||
|
return DOCX;
|
||||||
|
}
|
||||||
|
if (normalizedContentType.contains("presentationml.presentation")) {
|
||||||
|
return PPTX;
|
||||||
|
}
|
||||||
|
if (normalizedContentType.contains("spreadsheetml.sheet")) {
|
||||||
|
return XLSX;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!StringUtils.hasText(fileName) || !fileName.contains(".")) {
|
||||||
|
return UNSUPPORTED;
|
||||||
|
}
|
||||||
|
String suffix = DocUtil.normalizeSuffix(DocUtil.getSuffix(fileName));
|
||||||
|
if ("pdf".equals(suffix)) {
|
||||||
|
return PDF;
|
||||||
|
}
|
||||||
|
if ("docx".equals(suffix)) {
|
||||||
|
return DOCX;
|
||||||
|
}
|
||||||
|
if ("pptx".equals(suffix)) {
|
||||||
|
return PPTX;
|
||||||
|
}
|
||||||
|
if ("xlsx".equals(suffix)) {
|
||||||
|
return XLSX;
|
||||||
|
}
|
||||||
|
return UNSUPPORTED;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 判断是否属于 Office 首版接入类型。
|
||||||
|
*
|
||||||
|
* @return 是否是本次 Office 类型
|
||||||
|
*/
|
||||||
|
public boolean isOffice() {
|
||||||
|
return this == PPTX || this == XLSX;
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -286,6 +286,7 @@ public final class DocumentImportDtos {
|
|||||||
private String chunkId;
|
private String chunkId;
|
||||||
private String chunkType;
|
private String chunkType;
|
||||||
private String content;
|
private String content;
|
||||||
|
private String renderMarkdown;
|
||||||
private List<String> headingPath = new ArrayList<String>();
|
private List<String> headingPath = new ArrayList<String>();
|
||||||
private Integer partNo;
|
private Integer partNo;
|
||||||
private Integer partTotal;
|
private Integer partTotal;
|
||||||
@@ -335,6 +336,14 @@ public final class DocumentImportDtos {
|
|||||||
this.content = content;
|
this.content = content;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public String getRenderMarkdown() {
|
||||||
|
return renderMarkdown;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setRenderMarkdown(String renderMarkdown) {
|
||||||
|
this.renderMarkdown = renderMarkdown;
|
||||||
|
}
|
||||||
|
|
||||||
public List<String> getHeadingPath() {
|
public List<String> getHeadingPath() {
|
||||||
return headingPath;
|
return headingPath;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -22,4 +22,19 @@ public final class DocumentImportKeys {
|
|||||||
public static final String KEY_DOCUMENT_PARSE_METADATA = "parse.metadata";
|
public static final String KEY_DOCUMENT_PARSE_METADATA = "parse.metadata";
|
||||||
public static final String KEY_DOCUMENT_PARSE_WARNINGS = "parse.warnings";
|
public static final String KEY_DOCUMENT_PARSE_WARNINGS = "parse.warnings";
|
||||||
public static final String KEY_DOCUMENT_PROVIDER_TASK_ID = "parse.providerTaskId";
|
public static final String KEY_DOCUMENT_PROVIDER_TASK_ID = "parse.providerTaskId";
|
||||||
|
public static final String KEY_DOCUMENT_PARSE_IMAGE_URLS = "parse.imageUrls";
|
||||||
|
public static final String KEY_DOCUMENT_PARSE_IMAGE_COUNT = "parse.imageCount";
|
||||||
|
public static final String KEY_DOCUMENT_PARSE_IMAGE_STORAGE_PREFIX = "parse.imageStoragePrefix";
|
||||||
|
public static final String KEY_DOCUMENT_PARSE_PROGRESS_PERCENT = "parse.progressPercent";
|
||||||
|
public static final String KEY_DOCUMENT_PARSE_CURRENT_STAGE = "parse.currentStage";
|
||||||
|
public static final String KEY_DOCUMENT_PARSE_PROCESSED_ITEMS = "parse.processedItems";
|
||||||
|
public static final String KEY_DOCUMENT_PARSE_TOTAL_ITEMS = "parse.totalItems";
|
||||||
|
public static final String KEY_DOCUMENT_PARSE_STATUS_MESSAGE = "parse.statusMessage";
|
||||||
|
public static final String KEY_DOCUMENT_RENDER_MARKDOWN = "renderMarkdown";
|
||||||
|
public static final String KEY_DOCUMENT_PAGE_INDEX = "pageIndex";
|
||||||
|
public static final String KEY_DOCUMENT_SHEET_NAME = "sheetName";
|
||||||
|
public static final String KEY_DOCUMENT_ROW_START = "rowStart";
|
||||||
|
public static final String KEY_DOCUMENT_ROW_END = "rowEnd";
|
||||||
|
public static final String KEY_DOCUMENT_IMAGE_REFS = "imageRefs";
|
||||||
|
public static final String KEY_DOCUMENT_PARSE_ARTIFACT_SUMMARY = "parseArtifactSummary";
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ import org.springframework.stereotype.Service;
|
|||||||
import org.springframework.transaction.support.TransactionSynchronization;
|
import org.springframework.transaction.support.TransactionSynchronization;
|
||||||
import org.springframework.transaction.support.TransactionSynchronizationManager;
|
import org.springframework.transaction.support.TransactionSynchronizationManager;
|
||||||
import org.springframework.web.servlet.mvc.method.annotation.SseEmitter;
|
import org.springframework.web.servlet.mvc.method.annotation.SseEmitter;
|
||||||
|
import tech.easyflow.ai.documentimport.DocumentImportKeys;
|
||||||
import tech.easyflow.ai.entity.Document;
|
import tech.easyflow.ai.entity.Document;
|
||||||
import tech.easyflow.ai.mapper.DocumentMapper;
|
import tech.easyflow.ai.mapper.DocumentMapper;
|
||||||
import tech.easyflow.common.web.exceptions.BusinessException;
|
import tech.easyflow.common.web.exceptions.BusinessException;
|
||||||
@@ -116,11 +117,21 @@ public class DocumentImportTaskStatusStreamService {
|
|||||||
payload.put("totalChunks", document.getTotalChunks());
|
payload.put("totalChunks", document.getTotalChunks());
|
||||||
payload.put("completedChunks", document.getCompletedChunks());
|
payload.put("completedChunks", document.getCompletedChunks());
|
||||||
payload.put("failedChunks", document.getFailedChunks());
|
payload.put("failedChunks", document.getFailedChunks());
|
||||||
|
payload.put("parseCurrentStage", readOptionAsString(document, DocumentImportKeys.KEY_DOCUMENT_PARSE_CURRENT_STAGE));
|
||||||
|
payload.put("parseStatusMessage", readOptionAsString(document, DocumentImportKeys.KEY_DOCUMENT_PARSE_STATUS_MESSAGE));
|
||||||
payload.put("lastTaskError", document.getLastTaskError());
|
payload.put("lastTaskError", document.getLastTaskError());
|
||||||
payload.put("taskModifiedAt", document.getTaskModifiedAt());
|
payload.put("taskModifiedAt", document.getTaskModifiedAt());
|
||||||
return payload;
|
return payload;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private String readOptionAsString(Document document, String key) {
|
||||||
|
if (document == null || document.getOptions() == null || key == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
Object value = document.getOptions().get(key);
|
||||||
|
return value == null ? null : String.valueOf(value);
|
||||||
|
}
|
||||||
|
|
||||||
private void sendAsync(String topicKey, SseEmitter emitter, String eventName, Map<String, Object> payload) {
|
private void sendAsync(String topicKey, SseEmitter emitter, String eventName, Map<String, Object> payload) {
|
||||||
sseThreadPool.execute(() -> {
|
sseThreadPool.execute(() -> {
|
||||||
try {
|
try {
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -4,6 +4,8 @@ public class KnowledgeSearchResultItem {
|
|||||||
|
|
||||||
private Integer sorting;
|
private Integer sorting;
|
||||||
private String content;
|
private String content;
|
||||||
|
private String renderMarkdown;
|
||||||
|
private String sourceFileName;
|
||||||
private Double score;
|
private Double score;
|
||||||
private String hitSource;
|
private String hitSource;
|
||||||
private Double vectorScore;
|
private Double vectorScore;
|
||||||
@@ -25,6 +27,22 @@ public class KnowledgeSearchResultItem {
|
|||||||
this.content = content;
|
this.content = content;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public String getRenderMarkdown() {
|
||||||
|
return renderMarkdown;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setRenderMarkdown(String renderMarkdown) {
|
||||||
|
this.renderMarkdown = renderMarkdown;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getSourceFileName() {
|
||||||
|
return sourceFileName;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setSourceFileName(String sourceFileName) {
|
||||||
|
this.sourceFileName = sourceFileName;
|
||||||
|
}
|
||||||
|
|
||||||
public Double getScore() {
|
public Double getScore() {
|
||||||
return score;
|
return score;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -32,6 +32,7 @@ import tech.easyflow.ai.entity.FaqItem;
|
|||||||
import tech.easyflow.ai.entity.Model;
|
import tech.easyflow.ai.entity.Model;
|
||||||
import tech.easyflow.ai.enums.DocumentProcessStatus;
|
import tech.easyflow.ai.enums.DocumentProcessStatus;
|
||||||
import tech.easyflow.ai.enums.PublishStatus;
|
import tech.easyflow.ai.enums.PublishStatus;
|
||||||
|
import tech.easyflow.ai.documentimport.DocumentImportKeys;
|
||||||
import tech.easyflow.ai.mapper.DocumentChunkMapper;
|
import tech.easyflow.ai.mapper.DocumentChunkMapper;
|
||||||
import tech.easyflow.ai.mapper.DocumentCollectionMapper;
|
import tech.easyflow.ai.mapper.DocumentCollectionMapper;
|
||||||
import tech.easyflow.ai.mapper.DocumentMapper;
|
import tech.easyflow.ai.mapper.DocumentMapper;
|
||||||
@@ -406,6 +407,14 @@ public class DocumentCollectionServiceImpl extends ServiceImpl<DocumentCollectio
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
item.setContent(content);
|
item.setContent(content);
|
||||||
|
String renderMarkdown = hitSnapshot.findChunkRenderMarkdown(item.getId());
|
||||||
|
if (StringUtil.hasText(renderMarkdown)) {
|
||||||
|
item.addMetadata("renderMarkdown", renderMarkdown);
|
||||||
|
}
|
||||||
|
String sourceFileName = hitSnapshot.findSourceFileName(item.getId());
|
||||||
|
if (StringUtil.hasText(sourceFileName)) {
|
||||||
|
item.addMetadata("sourceFileName", sourceFileName);
|
||||||
|
}
|
||||||
return true;
|
return true;
|
||||||
})
|
})
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
@@ -596,6 +605,30 @@ public class DocumentCollectionServiceImpl extends ServiceImpl<DocumentCollectio
|
|||||||
}
|
}
|
||||||
return StringUtil.noText(documentChunk.getContent()) ? null : documentChunk.getContent();
|
return StringUtil.noText(documentChunk.getContent()) ? null : documentChunk.getContent();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private String findChunkRenderMarkdown(Object chunkId) {
|
||||||
|
DocumentChunk documentChunk = chunkMap.get(String.valueOf(chunkId));
|
||||||
|
if (documentChunk == null || documentChunk.getDocumentId() == null || documentChunk.getOptions() == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
if (!documentMap.containsKey(String.valueOf(documentChunk.getDocumentId()))) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
Object renderMarkdown = documentChunk.getOptions().get(DocumentImportKeys.KEY_DOCUMENT_RENDER_MARKDOWN);
|
||||||
|
return renderMarkdown == null ? null : String.valueOf(renderMarkdown);
|
||||||
|
}
|
||||||
|
|
||||||
|
private String findSourceFileName(Object chunkId) {
|
||||||
|
DocumentChunk documentChunk = chunkMap.get(String.valueOf(chunkId));
|
||||||
|
if (documentChunk == null || documentChunk.getDocumentId() == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
tech.easyflow.ai.entity.Document sourceDocument = documentMap.get(String.valueOf(documentChunk.getDocumentId()));
|
||||||
|
if (sourceDocument == null || StringUtil.noText(sourceDocument.getTitle())) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
return sourceDocument.getTitle();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private String buildFaqPromptContent(FaqItem faqItem, List<Map<String, String>> images) {
|
private String buildFaqPromptContent(FaqItem faqItem, List<Map<String, String>> images) {
|
||||||
|
|||||||
@@ -6,6 +6,9 @@ import com.easyagents.document.core.entity.ParseResponse;
|
|||||||
import com.easyagents.document.core.entity.ParseResult;
|
import com.easyagents.document.core.entity.ParseResult;
|
||||||
import com.easyagents.document.core.entity.ParseTaskInfo;
|
import com.easyagents.document.core.entity.ParseTaskInfo;
|
||||||
import com.easyagents.document.core.entity.ParseTaskStatus;
|
import com.easyagents.document.core.entity.ParseTaskStatus;
|
||||||
|
import com.easyagents.document.pdf.PdfDocumentParseService;
|
||||||
|
import com.easyagents.document.pptx.PptxDocumentParseService;
|
||||||
|
import com.easyagents.document.xlsx.XlsxDocumentParseService;
|
||||||
import org.junit.Assert;
|
import org.junit.Assert;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
import tech.easyflow.ai.document.exception.DocumentParseBridgeException;
|
import tech.easyflow.ai.document.exception.DocumentParseBridgeException;
|
||||||
@@ -37,8 +40,8 @@ public class DocumentParseBridgeServiceImplTest {
|
|||||||
*/
|
*/
|
||||||
@Test
|
@Test
|
||||||
public void shouldParseSuccessfully() {
|
public void shouldParseSuccessfully() {
|
||||||
FakeDocumentParseService parseService = new FakeDocumentParseService();
|
FakePdfDocumentParseService parseService = new FakePdfDocumentParseService();
|
||||||
DocumentParseBridgeServiceImpl bridgeService = buildBridgeService(parseService);
|
DocumentParseBridgeServiceImpl bridgeService = buildBridgeService(parseService, null, null, parseService);
|
||||||
|
|
||||||
DocumentParsedResult document = bridgeService.parse(buildSource(), DocumentParseScenario.WORKFLOW_TEXT);
|
DocumentParsedResult document = bridgeService.parse(buildSource(), DocumentParseScenario.WORKFLOW_TEXT);
|
||||||
|
|
||||||
@@ -52,8 +55,8 @@ public class DocumentParseBridgeServiceImplTest {
|
|||||||
*/
|
*/
|
||||||
@Test
|
@Test
|
||||||
public void shouldSupportAsyncFlow() {
|
public void shouldSupportAsyncFlow() {
|
||||||
FakeDocumentParseService parseService = new FakeDocumentParseService();
|
FakePdfDocumentParseService parseService = new FakePdfDocumentParseService();
|
||||||
DocumentParseBridgeServiceImpl bridgeService = buildBridgeService(parseService);
|
DocumentParseBridgeServiceImpl bridgeService = buildBridgeService(parseService, null, null, parseService);
|
||||||
|
|
||||||
DocumentParseTaskStatus taskStatus = bridgeService.submit(buildSource(), DocumentParseScenario.KNOWLEDGE_IMPORT);
|
DocumentParseTaskStatus taskStatus = bridgeService.submit(buildSource(), DocumentParseScenario.KNOWLEDGE_IMPORT);
|
||||||
DocumentParseTaskStatus queriedStatus = bridgeService.queryTask("task-1");
|
DocumentParseTaskStatus queriedStatus = bridgeService.queryTask("task-1");
|
||||||
@@ -69,9 +72,9 @@ public class DocumentParseBridgeServiceImplTest {
|
|||||||
*/
|
*/
|
||||||
@Test
|
@Test
|
||||||
public void shouldQueryTaskInfoSuccessfully() {
|
public void shouldQueryTaskInfoSuccessfully() {
|
||||||
FakeDocumentParseService parseService = new FakeDocumentParseService();
|
FakePdfDocumentParseService parseService = new FakePdfDocumentParseService();
|
||||||
parseService.taskStatusValue = "completed";
|
parseService.taskStatusValue = "completed";
|
||||||
DocumentParseBridgeServiceImpl bridgeService = buildBridgeService(parseService);
|
DocumentParseBridgeServiceImpl bridgeService = buildBridgeService(parseService, null, null, parseService);
|
||||||
|
|
||||||
DocumentParseTaskInfo taskInfo = bridgeService.queryTaskInfo("task-1");
|
DocumentParseTaskInfo taskInfo = bridgeService.queryTaskInfo("task-1");
|
||||||
|
|
||||||
@@ -85,7 +88,7 @@ public class DocumentParseBridgeServiceImplTest {
|
|||||||
*/
|
*/
|
||||||
@Test
|
@Test
|
||||||
public void shouldThrowWhenServiceDisabled() {
|
public void shouldThrowWhenServiceDisabled() {
|
||||||
DocumentParseBridgeServiceImpl bridgeService = buildBridgeService(null);
|
DocumentParseBridgeServiceImpl bridgeService = buildBridgeService(null, null, null, null);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
bridgeService.parse(buildSource(), DocumentParseScenario.WORKFLOW_TEXT);
|
bridgeService.parse(buildSource(), DocumentParseScenario.WORKFLOW_TEXT);
|
||||||
@@ -95,9 +98,29 @@ public class DocumentParseBridgeServiceImplTest {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private DocumentParseBridgeServiceImpl buildBridgeService(DocumentParseService parseService) {
|
@Test
|
||||||
|
public void shouldRoutePptxToDedicatedService() {
|
||||||
|
FakePptxDocumentParseService pptxService = new FakePptxDocumentParseService();
|
||||||
|
FakePdfDocumentParseService defaultService = new FakePdfDocumentParseService();
|
||||||
|
DocumentParseBridgeServiceImpl bridgeService = buildBridgeService(null, pptxService, null, defaultService);
|
||||||
|
|
||||||
|
DocumentParsedResult result = bridgeService.parse(buildSource("slides.pptx",
|
||||||
|
"application/vnd.openxmlformats-officedocument.presentationml.presentation"), DocumentParseScenario.KNOWLEDGE_IMPORT);
|
||||||
|
|
||||||
|
Assert.assertEquals("# pptx", result.getPreferredText());
|
||||||
|
Assert.assertEquals(1, pptxService.parseCallCount);
|
||||||
|
Assert.assertEquals(0, defaultService.parseCallCount);
|
||||||
|
}
|
||||||
|
|
||||||
|
private DocumentParseBridgeServiceImpl buildBridgeService(PdfDocumentParseService pdfDocumentParseService,
|
||||||
|
PptxDocumentParseService pptxDocumentParseService,
|
||||||
|
XlsxDocumentParseService xlsxDocumentParseService,
|
||||||
|
DocumentParseService parseService) {
|
||||||
return new DocumentParseBridgeServiceImpl(
|
return new DocumentParseBridgeServiceImpl(
|
||||||
parseService,
|
parseService,
|
||||||
|
pdfDocumentParseService,
|
||||||
|
pptxDocumentParseService,
|
||||||
|
xlsxDocumentParseService,
|
||||||
new DocumentSourceLoader(new InMemoryFileStorageService()),
|
new DocumentSourceLoader(new InMemoryFileStorageService()),
|
||||||
new DocumentParseRequestFactory(),
|
new DocumentParseRequestFactory(),
|
||||||
new DocumentParseResultMapper()
|
new DocumentParseResultMapper()
|
||||||
@@ -105,8 +128,12 @@ public class DocumentParseBridgeServiceImplTest {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private DocumentSourceRef buildSource() {
|
private DocumentSourceRef buildSource() {
|
||||||
DocumentSourceRef sourceRef = DocumentSourceRef.ofBytes("demo.pdf", "pdf-data".getBytes(StandardCharsets.UTF_8));
|
return buildSource("demo.pdf", "application/pdf");
|
||||||
sourceRef.setContentType("application/pdf");
|
}
|
||||||
|
|
||||||
|
private DocumentSourceRef buildSource(String fileName, String contentType) {
|
||||||
|
DocumentSourceRef sourceRef = DocumentSourceRef.ofBytes(fileName, "pdf-data".getBytes(StandardCharsets.UTF_8));
|
||||||
|
sourceRef.setContentType(contentType);
|
||||||
sourceRef.setSize(8L);
|
sourceRef.setSize(8L);
|
||||||
return sourceRef;
|
return sourceRef;
|
||||||
}
|
}
|
||||||
@@ -133,13 +160,15 @@ public class DocumentParseBridgeServiceImplTest {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static class FakeDocumentParseService implements DocumentParseService {
|
private static class FakePdfDocumentParseService implements PdfDocumentParseService {
|
||||||
|
|
||||||
private ParseRequest lastParseRequest;
|
private ParseRequest lastParseRequest;
|
||||||
private String taskStatusValue = "running";
|
private String taskStatusValue = "running";
|
||||||
|
private int parseCallCount;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public ParseResponse parse(ParseRequest request) {
|
public ParseResponse parse(ParseRequest request) {
|
||||||
|
parseCallCount++;
|
||||||
this.lastParseRequest = request;
|
this.lastParseRequest = request;
|
||||||
return buildResponse();
|
return buildResponse();
|
||||||
}
|
}
|
||||||
@@ -187,4 +216,36 @@ public class DocumentParseBridgeServiceImplTest {
|
|||||||
return response;
|
return response;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static class FakePptxDocumentParseService implements PptxDocumentParseService {
|
||||||
|
|
||||||
|
private int parseCallCount;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ParseResponse parse(ParseRequest request) {
|
||||||
|
parseCallCount++;
|
||||||
|
ParseResult result = new ParseResult();
|
||||||
|
result.setFileName("slides.pptx");
|
||||||
|
result.setMarkdown("# pptx");
|
||||||
|
result.setPlainText("pptx");
|
||||||
|
ParseResponse response = new ParseResponse();
|
||||||
|
response.setResults(Collections.singletonList(result));
|
||||||
|
return response;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ParseTaskStatus submit(ParseRequest request) {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ParseTaskStatus queryTask(String taskId) {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ParseResponse queryResult(String taskId) {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,6 +1,9 @@
|
|||||||
package tech.easyflow.ai.document.support;
|
package tech.easyflow.ai.document.support;
|
||||||
|
|
||||||
import com.easyagents.document.core.entity.ParseRequest;
|
import com.easyagents.document.core.entity.ParseRequest;
|
||||||
|
import com.easyagents.document.core.entity.PdfParseRequest;
|
||||||
|
import com.easyagents.document.core.entity.PptxParseRequest;
|
||||||
|
import com.easyagents.document.core.entity.XlsxParseRequest;
|
||||||
import org.junit.Assert;
|
import org.junit.Assert;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
import tech.easyflow.ai.document.model.DocumentParseScenario;
|
import tech.easyflow.ai.document.model.DocumentParseScenario;
|
||||||
@@ -26,6 +29,7 @@ public class DocumentParseRequestFactoryTest {
|
|||||||
Assert.assertFalse(request.getReturnMiddleJson());
|
Assert.assertFalse(request.getReturnMiddleJson());
|
||||||
Assert.assertFalse(request.getReturnContentList());
|
Assert.assertFalse(request.getReturnContentList());
|
||||||
Assert.assertFalse(request.getReturnImages());
|
Assert.assertFalse(request.getReturnImages());
|
||||||
|
Assert.assertTrue(request instanceof PdfParseRequest);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -41,12 +45,33 @@ public class DocumentParseRequestFactoryTest {
|
|||||||
Assert.assertTrue(request.getReturnMiddleJson());
|
Assert.assertTrue(request.getReturnMiddleJson());
|
||||||
Assert.assertTrue(request.getReturnContentList());
|
Assert.assertTrue(request.getReturnContentList());
|
||||||
Assert.assertTrue(request.getReturnImages());
|
Assert.assertTrue(request.getReturnImages());
|
||||||
|
Assert.assertTrue(request instanceof PdfParseRequest);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 验证 PPTX / XLSX 会构建对应的强类型请求。
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void shouldBuildOfficeTypedRequests() {
|
||||||
|
DocumentParseRequestFactory factory = new DocumentParseRequestFactory();
|
||||||
|
|
||||||
|
ParseRequest pptxRequest = factory.build(buildSource("slides.pptx",
|
||||||
|
"application/vnd.openxmlformats-officedocument.presentationml.presentation"), DocumentParseScenario.KNOWLEDGE_IMPORT);
|
||||||
|
ParseRequest xlsxRequest = factory.build(buildSource("table.xlsx",
|
||||||
|
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"), DocumentParseScenario.KNOWLEDGE_IMPORT);
|
||||||
|
|
||||||
|
Assert.assertTrue(pptxRequest instanceof PptxParseRequest);
|
||||||
|
Assert.assertTrue(xlsxRequest instanceof XlsxParseRequest);
|
||||||
}
|
}
|
||||||
|
|
||||||
private LoadedDocumentSource buildSource() {
|
private LoadedDocumentSource buildSource() {
|
||||||
|
return buildSource("demo.pdf", "application/pdf");
|
||||||
|
}
|
||||||
|
|
||||||
|
private LoadedDocumentSource buildSource(String fileName, String contentType) {
|
||||||
LoadedDocumentSource source = new LoadedDocumentSource();
|
LoadedDocumentSource source = new LoadedDocumentSource();
|
||||||
source.setFileName("demo.pdf");
|
source.setFileName(fileName);
|
||||||
source.setContentType("application/pdf");
|
source.setContentType(contentType);
|
||||||
source.setContentBytes("pdf-data".getBytes());
|
source.setContentBytes("pdf-data".getBytes());
|
||||||
source.setSize(8L);
|
source.setSize(8L);
|
||||||
return source;
|
return source;
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ import com.easyagents.document.core.entity.ParseArtifacts;
|
|||||||
import com.easyagents.document.core.entity.ParseResult;
|
import com.easyagents.document.core.entity.ParseResult;
|
||||||
import com.easyagents.document.core.entity.ParseResponse;
|
import com.easyagents.document.core.entity.ParseResponse;
|
||||||
import com.easyagents.document.core.entity.ParseTaskInfo;
|
import com.easyagents.document.core.entity.ParseTaskInfo;
|
||||||
|
import com.easyagents.document.core.entity.ParseTaskStatus;
|
||||||
import org.junit.Assert;
|
import org.junit.Assert;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
import tech.easyflow.ai.document.model.DocumentParseTaskInfo;
|
import tech.easyflow.ai.document.model.DocumentParseTaskInfo;
|
||||||
@@ -65,6 +66,8 @@ public class DocumentParseResultMapperTest {
|
|||||||
ParseTaskInfo taskInfo = new ParseTaskInfo();
|
ParseTaskInfo taskInfo = new ParseTaskInfo();
|
||||||
taskInfo.setTaskId("task-1");
|
taskInfo.setTaskId("task-1");
|
||||||
taskInfo.setStatus("completed");
|
taskInfo.setStatus("completed");
|
||||||
|
taskInfo.setProgressPercent(100);
|
||||||
|
taskInfo.setCurrentStage("completed");
|
||||||
|
|
||||||
ParseResult result = new ParseResult();
|
ParseResult result = new ParseResult();
|
||||||
result.setFileName("demo.pdf");
|
result.setFileName("demo.pdf");
|
||||||
@@ -76,7 +79,33 @@ public class DocumentParseResultMapperTest {
|
|||||||
DocumentParseTaskInfo mapped = mapper.map(taskInfo);
|
DocumentParseTaskInfo mapped = mapper.map(taskInfo);
|
||||||
|
|
||||||
Assert.assertEquals("task-1", mapped.getTaskId());
|
Assert.assertEquals("task-1", mapped.getTaskId());
|
||||||
|
Assert.assertEquals(Integer.valueOf(100), mapped.getProgressPercent());
|
||||||
|
Assert.assertEquals("completed", mapped.getCurrentStage());
|
||||||
Assert.assertNotNull(mapped.getResult());
|
Assert.assertNotNull(mapped.getResult());
|
||||||
Assert.assertEquals("# title", mapped.getResult().getPreferredText());
|
Assert.assertEquals("# title", mapped.getResult().getPreferredText());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 验证异步进度字段被完整透传。
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void shouldMapTaskStatusProgressFields() {
|
||||||
|
DocumentParseResultMapper mapper = new DocumentParseResultMapper();
|
||||||
|
ParseTaskStatus status = new ParseTaskStatus();
|
||||||
|
status.setTaskId("task-2");
|
||||||
|
status.setStatus("running");
|
||||||
|
status.setProgressPercent(45);
|
||||||
|
status.setCurrentStage("ocr");
|
||||||
|
status.setProcessedItems(9);
|
||||||
|
status.setTotalItems(20);
|
||||||
|
status.setStatusMessage("正在识别图片");
|
||||||
|
|
||||||
|
tech.easyflow.ai.document.model.DocumentParseTaskStatus mapped = mapper.map(status);
|
||||||
|
|
||||||
|
Assert.assertEquals(Integer.valueOf(45), mapped.getProgressPercent());
|
||||||
|
Assert.assertEquals("ocr", mapped.getCurrentStage());
|
||||||
|
Assert.assertEquals(Integer.valueOf(9), mapped.getProcessedItems());
|
||||||
|
Assert.assertEquals(Integer.valueOf(20), mapped.getTotalItems());
|
||||||
|
Assert.assertEquals("正在识别图片", mapped.getStatusMessage());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,17 +1,33 @@
|
|||||||
package tech.easyflow.ai.documentimport.task;
|
package tech.easyflow.ai.documentimport.task;
|
||||||
|
|
||||||
|
import com.easyagents.document.core.entity.DocumentBlock;
|
||||||
|
import com.easyagents.document.core.entity.DocumentImage;
|
||||||
|
import com.easyagents.document.core.entity.DocumentTable;
|
||||||
|
import com.easyagents.rag.ingestion.model.StrategyConfig;
|
||||||
import org.junit.Assert;
|
import org.junit.Assert;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
import org.springframework.web.multipart.MultipartFile;
|
||||||
|
import tech.easyflow.ai.document.model.DocumentParseArtifacts;
|
||||||
|
import tech.easyflow.ai.document.model.DocumentParsedResult;
|
||||||
|
import tech.easyflow.ai.documentimport.DocumentImportKeys;
|
||||||
|
import tech.easyflow.ai.entity.DocumentChunk;
|
||||||
import tech.easyflow.ai.entity.DocumentImportTask;
|
import tech.easyflow.ai.entity.DocumentImportTask;
|
||||||
import tech.easyflow.ai.enums.DocumentImportTaskStatus;
|
import tech.easyflow.ai.enums.DocumentImportTaskStatus;
|
||||||
import tech.easyflow.ai.enums.DocumentProcessStatus;
|
import tech.easyflow.ai.enums.DocumentProcessStatus;
|
||||||
import tech.easyflow.ai.mapper.DocumentMapper;
|
import tech.easyflow.ai.mapper.DocumentMapper;
|
||||||
import tech.easyflow.ai.service.DocumentImportTaskService;
|
import tech.easyflow.ai.service.DocumentImportTaskService;
|
||||||
|
import tech.easyflow.common.filestorage.FileStorageService;
|
||||||
|
|
||||||
import java.lang.reflect.Field;
|
import java.lang.reflect.Field;
|
||||||
import java.lang.reflect.Method;
|
import java.lang.reflect.Method;
|
||||||
import java.lang.reflect.Proxy;
|
import java.lang.reflect.Proxy;
|
||||||
import java.math.BigInteger;
|
import java.math.BigInteger;
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Base64;
|
||||||
|
import java.util.LinkedHashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
import java.util.concurrent.atomic.AtomicReference;
|
import java.util.concurrent.atomic.AtomicReference;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -84,6 +100,258 @@ public class KnowledgeDocumentImportTaskAppServiceTest {
|
|||||||
Assert.assertEquals("新错误", updatedTask.getErrorSummary());
|
Assert.assertEquals("新错误", updatedTask.getErrorSummary());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 验证知识库导入会把解析图片上传到对象存储,并同步改写 Markdown 与结构化引用。
|
||||||
|
*
|
||||||
|
* @throws Exception 反射调用异常
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void normalizeParsedImagesForKnowledgeImportShouldUploadAndRewriteReferences() throws Exception {
|
||||||
|
KnowledgeDocumentImportTaskAppService service = new KnowledgeDocumentImportTaskAppService();
|
||||||
|
AtomicReference<String> savedPrePathRef = new AtomicReference<String>();
|
||||||
|
AtomicReference<String> savedFilenameRef = new AtomicReference<String>();
|
||||||
|
setField(service, "storageService", mockFileStorageService(savedPrePathRef, savedFilenameRef));
|
||||||
|
|
||||||
|
tech.easyflow.ai.entity.Document document = new tech.easyflow.ai.entity.Document();
|
||||||
|
document.setId(BigInteger.valueOf(88));
|
||||||
|
document.setTitle("产品说明书(终版).pdf");
|
||||||
|
|
||||||
|
DocumentParsedResult parsedResult = new DocumentParsedResult();
|
||||||
|
parsedResult.setMarkdown("图例如下:\n");
|
||||||
|
parsedResult.setPreferredText(parsedResult.getMarkdown());
|
||||||
|
parsedResult.setPlainText(parsedResult.getMarkdown());
|
||||||
|
|
||||||
|
DocumentImage image = new DocumentImage();
|
||||||
|
image.setName("sample-image.png");
|
||||||
|
image.setSourcePath("images/sample-image.png");
|
||||||
|
image.setMimeType("image/png");
|
||||||
|
image.setDataUrl("data:image/png;base64," + Base64.getEncoder().encodeToString("demo".getBytes(StandardCharsets.UTF_8)));
|
||||||
|
parsedResult.setImages(new ArrayList<DocumentImage>(List.of(image)));
|
||||||
|
|
||||||
|
DocumentBlock block = new DocumentBlock();
|
||||||
|
block.setImagePath("images/sample-image.png");
|
||||||
|
parsedResult.setBlocks(new ArrayList<DocumentBlock>(List.of(block)));
|
||||||
|
|
||||||
|
DocumentTable table = new DocumentTable();
|
||||||
|
table.setImagePath("images/sample-image.png");
|
||||||
|
parsedResult.setTables(new ArrayList<DocumentTable>(List.of(table)));
|
||||||
|
|
||||||
|
DocumentParseArtifacts artifacts = new DocumentParseArtifacts();
|
||||||
|
List<Map<String, Object>> contentList = new ArrayList<Map<String, Object>>();
|
||||||
|
Map<String, Object> contentItem = new LinkedHashMap<String, Object>();
|
||||||
|
contentItem.put("img_path", "images/sample-image.png");
|
||||||
|
contentList.add(contentItem);
|
||||||
|
artifacts.setContentList(contentList);
|
||||||
|
Map<String, Object> xlsxArtifact = new LinkedHashMap<String, Object>();
|
||||||
|
List<Map<String, Object>> sheetImages = new ArrayList<Map<String, Object>>();
|
||||||
|
sheetImages.add(new LinkedHashMap<String, Object>() {{
|
||||||
|
put("sheetName", "Sheet1");
|
||||||
|
put("sourcePaths", new ArrayList<String>(List.of("images/sample-image.png")));
|
||||||
|
}});
|
||||||
|
xlsxArtifact.put("sheetImages", sheetImages);
|
||||||
|
artifacts.setExtraJsonArtifacts(new LinkedHashMap<String, Object>() {{
|
||||||
|
put("xlsx", xlsxArtifact);
|
||||||
|
}});
|
||||||
|
parsedResult.setArtifacts(artifacts);
|
||||||
|
|
||||||
|
Method method = KnowledgeDocumentImportTaskAppService.class.getDeclaredMethod(
|
||||||
|
"normalizeParsedImagesForKnowledgeImport",
|
||||||
|
tech.easyflow.ai.entity.Document.class,
|
||||||
|
DocumentParsedResult.class
|
||||||
|
);
|
||||||
|
method.setAccessible(true);
|
||||||
|
DocumentParsedResult normalized = (DocumentParsedResult) method.invoke(service, document, parsedResult);
|
||||||
|
|
||||||
|
Assert.assertNotNull(normalized);
|
||||||
|
Assert.assertEquals("knowledge-parse/88_产品说明书_终版/images", savedPrePathRef.get());
|
||||||
|
Assert.assertEquals("sample-image.png", savedFilenameRef.get());
|
||||||
|
|
||||||
|
String expectedUrl = "http://localhost:39000/easyflow/attachment/knowledge-parse/88_产品说明书_终版/images/sample-image.png";
|
||||||
|
Assert.assertTrue(normalized.getMarkdown().contains(expectedUrl));
|
||||||
|
Assert.assertEquals(expectedUrl, normalized.getBlocks().get(0).getImagePath());
|
||||||
|
Assert.assertEquals(expectedUrl, normalized.getTables().get(0).getImagePath());
|
||||||
|
Assert.assertEquals(expectedUrl, normalized.getImages().get(0).getSourcePath());
|
||||||
|
Assert.assertNull(normalized.getImages().get(0).getDataUrl());
|
||||||
|
Object rewrittenContentList = normalized.getArtifacts().getContentList();
|
||||||
|
Assert.assertTrue(rewrittenContentList instanceof List<?>);
|
||||||
|
Assert.assertEquals(expectedUrl, ((Map<?, ?>) ((List<?>) rewrittenContentList).get(0)).get("img_path"));
|
||||||
|
Object rewrittenSheetImages = ((Map<?, ?>) normalized.getArtifacts().getExtraJsonArtifacts().get("xlsx")).get("sheetImages");
|
||||||
|
Assert.assertTrue(rewrittenSheetImages instanceof List<?>);
|
||||||
|
Object sourcePaths = ((Map<?, ?>) ((List<?>) rewrittenSheetImages).get(0)).get("sourcePaths");
|
||||||
|
Assert.assertEquals(expectedUrl, ((List<?>) sourcePaths).get(0));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 验证 PPTX 会基于页级工件生成稳定的知识库分块。
|
||||||
|
*
|
||||||
|
* @throws Exception 反射调用异常
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void buildOfficeDocumentChunksShouldSplitPptxBySlide() throws Exception {
|
||||||
|
KnowledgeDocumentImportTaskAppService service = new KnowledgeDocumentImportTaskAppService();
|
||||||
|
tech.easyflow.ai.entity.Document document = new tech.easyflow.ai.entity.Document();
|
||||||
|
document.setId(BigInteger.valueOf(101));
|
||||||
|
document.setCollectionId(BigInteger.valueOf(201));
|
||||||
|
document.setTitle("季度汇报.pptx");
|
||||||
|
|
||||||
|
Map<String, Object> parseArtifactSummary = new LinkedHashMap<String, Object>();
|
||||||
|
List<Map<String, Object>> slides = new ArrayList<Map<String, Object>>();
|
||||||
|
slides.add(new LinkedHashMap<String, Object>() {{
|
||||||
|
put("slideIndex", 0);
|
||||||
|
put("title", "封面");
|
||||||
|
put("ocrMarkdown", "本页介绍季度目标。");
|
||||||
|
put("imagePath", "https://example.com/slides/slide-001.png");
|
||||||
|
put("imageName", "slide-001-page");
|
||||||
|
}});
|
||||||
|
slides.add(new LinkedHashMap<String, Object>() {{
|
||||||
|
put("slideIndex", 1);
|
||||||
|
put("title", "经营分析");
|
||||||
|
put("ocrMarkdown", "收入同比增长 18%。");
|
||||||
|
put("imagePath", "https://example.com/slides/slide-002.png");
|
||||||
|
put("imageName", "slide-002-page");
|
||||||
|
}});
|
||||||
|
parseArtifactSummary.put("slides", slides);
|
||||||
|
|
||||||
|
Method method = KnowledgeDocumentImportTaskAppService.class.getDeclaredMethod(
|
||||||
|
"buildOfficeDocumentChunks",
|
||||||
|
tech.easyflow.ai.entity.Document.class,
|
||||||
|
String.class,
|
||||||
|
StrategyConfig.class,
|
||||||
|
Map.class
|
||||||
|
);
|
||||||
|
method.setAccessible(true);
|
||||||
|
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
List<DocumentChunk> chunks = (List<DocumentChunk>) method.invoke(
|
||||||
|
service,
|
||||||
|
document,
|
||||||
|
"pptx",
|
||||||
|
null,
|
||||||
|
parseArtifactSummary
|
||||||
|
);
|
||||||
|
|
||||||
|
Assert.assertEquals(2, chunks.size());
|
||||||
|
DocumentChunk firstChunk = chunks.get(0);
|
||||||
|
Assert.assertTrue(firstChunk.getContent().contains("Slide 1"));
|
||||||
|
Assert.assertTrue(firstChunk.getContent().contains("本页介绍季度目标"));
|
||||||
|
Assert.assertEquals("https://example.com/slides/slide-001.png",
|
||||||
|
((List<?>) firstChunk.getOptions().get(DocumentImportKeys.KEY_DOCUMENT_IMAGE_REFS)).get(0));
|
||||||
|
Assert.assertEquals(1, firstChunk.getOptions().get(DocumentImportKeys.KEY_DOCUMENT_PAGE_INDEX));
|
||||||
|
Assert.assertTrue(String.valueOf(firstChunk.getOptions().get(DocumentImportKeys.KEY_DOCUMENT_RENDER_MARKDOWN))
|
||||||
|
.contains("slide-001.png"));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 验证 XLSX 纯图片 Sheet 不会退化为空内容,并会输出稳定图片引用。
|
||||||
|
*
|
||||||
|
* @throws Exception 反射调用异常
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void buildOfficeDocumentChunksShouldKeepImageOnlyXlsxSheetReferences() throws Exception {
|
||||||
|
KnowledgeDocumentImportTaskAppService service = new KnowledgeDocumentImportTaskAppService();
|
||||||
|
tech.easyflow.ai.entity.Document document = new tech.easyflow.ai.entity.Document();
|
||||||
|
document.setId(BigInteger.valueOf(102));
|
||||||
|
document.setCollectionId(BigInteger.valueOf(202));
|
||||||
|
document.setTitle("巡检记录.xlsx");
|
||||||
|
|
||||||
|
Map<String, Object> parseArtifactSummary = new LinkedHashMap<String, Object>();
|
||||||
|
List<Map<String, Object>> sheets = new ArrayList<Map<String, Object>>();
|
||||||
|
sheets.add(new LinkedHashMap<String, Object>() {{
|
||||||
|
put("sheetName", "图片页");
|
||||||
|
put("sheetIndex", 0);
|
||||||
|
put("rows", new ArrayList<Map<String, Object>>());
|
||||||
|
}});
|
||||||
|
parseArtifactSummary.put("sheets", sheets);
|
||||||
|
|
||||||
|
List<Map<String, Object>> cellImages = new ArrayList<Map<String, Object>>();
|
||||||
|
cellImages.add(new LinkedHashMap<String, Object>() {{
|
||||||
|
put("sheetName", "图片页");
|
||||||
|
put("referenceKey", "image-sheet-r2c2-001");
|
||||||
|
put("sourcePath", "https://example.com/xlsx/sheet/image-001.jpeg");
|
||||||
|
put("anchorCell", "B2");
|
||||||
|
put("ocrText", "设备状态正常");
|
||||||
|
put("fromRow", 1);
|
||||||
|
}});
|
||||||
|
parseArtifactSummary.put("cellImages", cellImages);
|
||||||
|
|
||||||
|
StrategyConfig strategyConfig = new StrategyConfig();
|
||||||
|
strategyConfig.setRowsPerChunk(10);
|
||||||
|
|
||||||
|
Method method = KnowledgeDocumentImportTaskAppService.class.getDeclaredMethod(
|
||||||
|
"buildOfficeDocumentChunks",
|
||||||
|
tech.easyflow.ai.entity.Document.class,
|
||||||
|
String.class,
|
||||||
|
StrategyConfig.class,
|
||||||
|
Map.class
|
||||||
|
);
|
||||||
|
method.setAccessible(true);
|
||||||
|
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
List<DocumentChunk> chunks = (List<DocumentChunk>) method.invoke(
|
||||||
|
service,
|
||||||
|
document,
|
||||||
|
"xlsx",
|
||||||
|
strategyConfig,
|
||||||
|
parseArtifactSummary
|
||||||
|
);
|
||||||
|
|
||||||
|
Assert.assertEquals(1, chunks.size());
|
||||||
|
DocumentChunk onlyChunk = chunks.get(0);
|
||||||
|
Assert.assertTrue(onlyChunk.getContent().contains("图片 OCR"));
|
||||||
|
Assert.assertTrue(onlyChunk.getContent().contains("设备状态正常"));
|
||||||
|
Assert.assertEquals("图片页", onlyChunk.getOptions().get(DocumentImportKeys.KEY_DOCUMENT_SHEET_NAME));
|
||||||
|
Assert.assertEquals("https://example.com/xlsx/sheet/image-001.jpeg",
|
||||||
|
((List<?>) onlyChunk.getOptions().get(DocumentImportKeys.KEY_DOCUMENT_IMAGE_REFS)).get(0));
|
||||||
|
String renderMarkdown = String.valueOf(onlyChunk.getOptions().get(DocumentImportKeys.KEY_DOCUMENT_RENDER_MARKDOWN));
|
||||||
|
Assert.assertTrue(renderMarkdown.contains("[IMG:image-sheet-r2c2-001]"));
|
||||||
|
Assert.assertTrue(renderMarkdown.contains(""));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 验证空白 Sheet 不会被误判成纯图片分块。
|
||||||
|
*
|
||||||
|
* @throws Exception 反射调用异常
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void buildOfficeDocumentChunksShouldSkipBlankXlsxSheetWithoutImages() throws Exception {
|
||||||
|
KnowledgeDocumentImportTaskAppService service = new KnowledgeDocumentImportTaskAppService();
|
||||||
|
tech.easyflow.ai.entity.Document document = new tech.easyflow.ai.entity.Document();
|
||||||
|
document.setId(BigInteger.valueOf(103));
|
||||||
|
document.setCollectionId(BigInteger.valueOf(203));
|
||||||
|
document.setTitle("空白工作簿.xlsx");
|
||||||
|
|
||||||
|
Map<String, Object> parseArtifactSummary = new LinkedHashMap<String, Object>();
|
||||||
|
parseArtifactSummary.put("sheets", new ArrayList<Map<String, Object>>(List.of(new LinkedHashMap<String, Object>() {{
|
||||||
|
put("sheetName", "空白页");
|
||||||
|
put("sheetIndex", 0);
|
||||||
|
put("rows", new ArrayList<Map<String, Object>>());
|
||||||
|
}})));
|
||||||
|
parseArtifactSummary.put("cellImages", new ArrayList<Map<String, Object>>());
|
||||||
|
|
||||||
|
StrategyConfig strategyConfig = new StrategyConfig();
|
||||||
|
strategyConfig.setRowsPerChunk(10);
|
||||||
|
|
||||||
|
Method method = KnowledgeDocumentImportTaskAppService.class.getDeclaredMethod(
|
||||||
|
"buildOfficeDocumentChunks",
|
||||||
|
tech.easyflow.ai.entity.Document.class,
|
||||||
|
String.class,
|
||||||
|
StrategyConfig.class,
|
||||||
|
Map.class
|
||||||
|
);
|
||||||
|
method.setAccessible(true);
|
||||||
|
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
List<DocumentChunk> chunks = (List<DocumentChunk>) method.invoke(
|
||||||
|
service,
|
||||||
|
document,
|
||||||
|
"xlsx",
|
||||||
|
strategyConfig,
|
||||||
|
parseArtifactSummary
|
||||||
|
);
|
||||||
|
|
||||||
|
Assert.assertTrue(chunks.isEmpty());
|
||||||
|
}
|
||||||
|
|
||||||
private static DocumentMapper mockDocumentMapper(tech.easyflow.ai.entity.Document persistedDocument,
|
private static DocumentMapper mockDocumentMapper(tech.easyflow.ai.entity.Document persistedDocument,
|
||||||
AtomicReference<tech.easyflow.ai.entity.Document> updatedDocumentRef) {
|
AtomicReference<tech.easyflow.ai.entity.Document> updatedDocumentRef) {
|
||||||
return (DocumentMapper) Proxy.newProxyInstance(
|
return (DocumentMapper) Proxy.newProxyInstance(
|
||||||
@@ -116,6 +384,22 @@ public class KnowledgeDocumentImportTaskAppServiceTest {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static FileStorageService mockFileStorageService(AtomicReference<String> savedPrePathRef,
|
||||||
|
AtomicReference<String> savedFilenameRef) {
|
||||||
|
return (FileStorageService) Proxy.newProxyInstance(
|
||||||
|
FileStorageService.class.getClassLoader(),
|
||||||
|
new Class<?>[]{FileStorageService.class},
|
||||||
|
(proxy, method, args) -> {
|
||||||
|
if ("save".equals(method.getName()) && args != null && args.length == 2 && args[0] instanceof MultipartFile file) {
|
||||||
|
savedPrePathRef.set((String) args[1]);
|
||||||
|
savedFilenameRef.set(file.getOriginalFilename());
|
||||||
|
return "http://localhost:39000/easyflow/attachment/" + args[1] + "/" + file.getOriginalFilename();
|
||||||
|
}
|
||||||
|
return defaultValue(method.getReturnType());
|
||||||
|
}
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
private static void setField(Object target, String fieldName, Object value) throws Exception {
|
private static void setField(Object target, String fieldName, Object value) throws Exception {
|
||||||
Field field = KnowledgeDocumentImportTaskAppService.class.getDeclaredField(fieldName);
|
Field field = KnowledgeDocumentImportTaskAppService.class.getDeclaredField(fieldName);
|
||||||
field.setAccessible(true);
|
field.setAccessible(true);
|
||||||
|
|||||||
@@ -166,7 +166,7 @@ dromara:
|
|||||||
# easy-agents 文档解析统一配置
|
# easy-agents 文档解析统一配置
|
||||||
easy-agents:
|
easy-agents:
|
||||||
document:
|
document:
|
||||||
pdf:
|
ocr:
|
||||||
provider: mineru
|
provider: mineru
|
||||||
mineru:
|
mineru:
|
||||||
# 统一文档解析桥接层直接复用 easy-agents 的 provider 配置,不在 easyflow 再复制一套配置体系
|
# 统一文档解析桥接层直接复用 easy-agents 的 provider 配置,不在 easyflow 再复制一套配置体系
|
||||||
|
|||||||
@@ -1,10 +1,11 @@
|
|||||||
<script setup lang="ts">
|
<script setup lang="ts">
|
||||||
import { ref } from 'vue';
|
import { ref } from 'vue';
|
||||||
|
import ElXMarkdown from 'vue-element-plus-x/es/XMarkdown/index.js';
|
||||||
|
|
||||||
import { EasyFlowFormModal } from '@easyflow/common-ui';
|
import { EasyFlowFormModal } from '@easyflow/common-ui';
|
||||||
import { $t } from '@easyflow/locales';
|
import { $t } from '@easyflow/locales';
|
||||||
|
|
||||||
import { Delete, MoreFilled } from '@element-plus/icons-vue';
|
import { Delete, EditPen, MoreFilled } from '@element-plus/icons-vue';
|
||||||
import {
|
import {
|
||||||
ElButton,
|
ElButton,
|
||||||
ElDropdown,
|
ElDropdown,
|
||||||
@@ -12,6 +13,7 @@ import {
|
|||||||
ElDropdownMenu,
|
ElDropdownMenu,
|
||||||
ElForm,
|
ElForm,
|
||||||
ElFormItem,
|
ElFormItem,
|
||||||
|
ElIcon,
|
||||||
ElInput,
|
ElInput,
|
||||||
ElMessage,
|
ElMessage,
|
||||||
ElMessageBox,
|
ElMessageBox,
|
||||||
@@ -21,6 +23,10 @@ import {
|
|||||||
|
|
||||||
import { api } from '#/api/request';
|
import { api } from '#/api/request';
|
||||||
import PageData from '#/components/page/PageData.vue';
|
import PageData from '#/components/page/PageData.vue';
|
||||||
|
import {
|
||||||
|
markdownRenderProps,
|
||||||
|
resolveMarkdownContent,
|
||||||
|
} from '#/views/ai/documentCollection/markdown-content';
|
||||||
import { buildKnowledgePath } from '#/views/ai/documentCollection/share-path';
|
import { buildKnowledgePath } from '#/views/ai/documentCollection/share-path';
|
||||||
|
|
||||||
const props = defineProps({
|
const props = defineProps({
|
||||||
@@ -122,6 +128,85 @@ const form = ref({
|
|||||||
id: '',
|
id: '',
|
||||||
content: '',
|
content: '',
|
||||||
});
|
});
|
||||||
|
|
||||||
|
const getChunkOptions = (row: any) => row?.options || {};
|
||||||
|
|
||||||
|
const getMarkdown = (row: any) =>
|
||||||
|
resolveMarkdownContent(getChunkOptions(row)?.renderMarkdown || row?.content);
|
||||||
|
|
||||||
|
const isExcelChunk = (row: any) => {
|
||||||
|
const options = getChunkOptions(row);
|
||||||
|
const sourceFileExt = String(
|
||||||
|
options?.sourceFileExt || options?.['splitter.sourceFileExt'] || '',
|
||||||
|
).toLowerCase();
|
||||||
|
|
||||||
|
return Boolean(
|
||||||
|
sourceFileExt === 'xlsx' ||
|
||||||
|
options?.sheetName ||
|
||||||
|
options?.rowStart ||
|
||||||
|
options?.rowEnd,
|
||||||
|
);
|
||||||
|
};
|
||||||
|
|
||||||
|
const shouldUseExcelChunkCards = (pageList: any[] = []) =>
|
||||||
|
pageList.length > 0 && pageList.every((row) => isExcelChunk(row));
|
||||||
|
|
||||||
|
const getSheetName = (row: any) =>
|
||||||
|
String(getChunkOptions(row)?.sheetName || '');
|
||||||
|
|
||||||
|
const getRowStart = (row: any) => {
|
||||||
|
const rowStart = Number(getChunkOptions(row)?.rowStart || 0);
|
||||||
|
return Math.max(rowStart, 0);
|
||||||
|
};
|
||||||
|
|
||||||
|
const getRowEnd = (row: any) => {
|
||||||
|
const rowEnd = Number(getChunkOptions(row)?.rowEnd || 0);
|
||||||
|
return Math.max(rowEnd, 0);
|
||||||
|
};
|
||||||
|
|
||||||
|
const getRowRangeLabel = (row: any) => {
|
||||||
|
const rowStart = getRowStart(row);
|
||||||
|
const rowEnd = getRowEnd(row);
|
||||||
|
if (rowStart > 0 && rowEnd > 0) {
|
||||||
|
return rowStart === rowEnd
|
||||||
|
? `第 ${rowStart} 行`
|
||||||
|
: `第 ${rowStart}-${rowEnd} 行`;
|
||||||
|
}
|
||||||
|
if (rowStart > 0) {
|
||||||
|
return `第 ${rowStart} 行起`;
|
||||||
|
}
|
||||||
|
return '';
|
||||||
|
};
|
||||||
|
|
||||||
|
const getChunkTitle = (row: any) => {
|
||||||
|
const options = getChunkOptions(row);
|
||||||
|
if (options?.sourceLabel) {
|
||||||
|
return String(options.sourceLabel);
|
||||||
|
}
|
||||||
|
const sheetName = getSheetName(row);
|
||||||
|
const rowRangeLabel = getRowRangeLabel(row);
|
||||||
|
if (sheetName && rowRangeLabel) {
|
||||||
|
return `${sheetName} · ${rowRangeLabel}`;
|
||||||
|
}
|
||||||
|
return sheetName || row?.id || '-';
|
||||||
|
};
|
||||||
|
|
||||||
|
const getChunkIndexLabel = (row: any) => {
|
||||||
|
const sorting = Number(row?.sorting || 0);
|
||||||
|
if (sorting <= 0) {
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
return String(sorting).padStart(2, '0');
|
||||||
|
};
|
||||||
|
|
||||||
|
const getChunkHeaderLabel = (row: any) => {
|
||||||
|
const chunkIndexLabel = getChunkIndexLabel(row);
|
||||||
|
const chunkTitle = getChunkTitle(row);
|
||||||
|
if (chunkIndexLabel) {
|
||||||
|
return `分块 ${chunkIndexLabel} · ${chunkTitle}`;
|
||||||
|
}
|
||||||
|
return chunkTitle;
|
||||||
|
};
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
<template>
|
<template>
|
||||||
@@ -136,12 +221,76 @@ const form = ref({
|
|||||||
:extra-query-params="queryParams"
|
:extra-query-params="queryParams"
|
||||||
>
|
>
|
||||||
<template #default="{ pageList }">
|
<template #default="{ pageList }">
|
||||||
<ElTable :data="pageList" style="width: 100%" size="large">
|
<div v-if="shouldUseExcelChunkCards(pageList)" class="chunk-board">
|
||||||
|
<article v-for="row in pageList" :key="row.id" class="chunk-card">
|
||||||
|
<div v-if="props.manageable" class="chunk-card__toolbar">
|
||||||
|
<ElButton
|
||||||
|
circle
|
||||||
|
text
|
||||||
|
type="primary"
|
||||||
|
class="chunk-card__action"
|
||||||
|
@click="handleEdit(row)"
|
||||||
|
>
|
||||||
|
<ElIcon><EditPen /></ElIcon>
|
||||||
|
</ElButton>
|
||||||
|
<ElDropdown>
|
||||||
|
<ElButton
|
||||||
|
circle
|
||||||
|
text
|
||||||
|
class="chunk-card__action chunk-card__action--ghost"
|
||||||
|
>
|
||||||
|
<ElIcon><MoreFilled /></ElIcon>
|
||||||
|
</ElButton>
|
||||||
|
|
||||||
|
<template #dropdown>
|
||||||
|
<ElDropdownMenu>
|
||||||
|
<ElDropdownItem @click="handleDelete(row)">
|
||||||
|
<ElButton link type="danger" :icon="Delete">
|
||||||
|
{{ $t('button.delete') }}
|
||||||
|
</ElButton>
|
||||||
|
</ElDropdownItem>
|
||||||
|
</ElDropdownMenu>
|
||||||
|
</template>
|
||||||
|
</ElDropdown>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="chunk-card__header">
|
||||||
|
<div class="chunk-card__eyebrow">
|
||||||
|
<span class="chunk-card__eyebrow-dot"></span>
|
||||||
|
<span>{{ getChunkHeaderLabel(row) }}</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div v-if="getMarkdown(row)" class="chunk-card__content">
|
||||||
|
<div class="chunk-rich-content chunk-rich-content--card">
|
||||||
|
<ElXMarkdown
|
||||||
|
:markdown="getMarkdown(row)"
|
||||||
|
:allow-html="markdownRenderProps.allowHtml"
|
||||||
|
:sanitize="markdownRenderProps.sanitize"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<span v-else class="chunk-table__empty">-</span>
|
||||||
|
</article>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<ElTable v-else :data="pageList" style="width: 100%" size="large">
|
||||||
<ElTableColumn
|
<ElTableColumn
|
||||||
prop="content"
|
prop="content"
|
||||||
:label="$t('documentCollection.content')"
|
:label="$t('documentCollection.content')"
|
||||||
min-width="240"
|
min-width="240"
|
||||||
/>
|
>
|
||||||
|
<template #default="{ row }">
|
||||||
|
<div v-if="getMarkdown(row)" class="chunk-rich-content">
|
||||||
|
<ElXMarkdown
|
||||||
|
:markdown="getMarkdown(row)"
|
||||||
|
:allow-html="markdownRenderProps.allowHtml"
|
||||||
|
:sanitize="markdownRenderProps.sanitize"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
<span v-else class="chunk-table__empty">-</span>
|
||||||
|
</template>
|
||||||
|
</ElTableColumn>
|
||||||
<ElTableColumn
|
<ElTableColumn
|
||||||
v-if="props.manageable"
|
v-if="props.manageable"
|
||||||
:label="$t('common.handle')"
|
:label="$t('common.handle')"
|
||||||
@@ -198,4 +347,251 @@ const form = ref({
|
|||||||
</div>
|
</div>
|
||||||
</template>
|
</template>
|
||||||
|
|
||||||
<style scoped></style>
|
<style scoped>
|
||||||
|
.chunk-board {
|
||||||
|
display: grid;
|
||||||
|
gap: 14px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.chunk-card {
|
||||||
|
position: relative;
|
||||||
|
padding: 20px 20px 18px;
|
||||||
|
overflow: hidden;
|
||||||
|
background: linear-gradient(
|
||||||
|
135deg,
|
||||||
|
color-mix(in srgb, var(--el-color-primary-light-9) 80%, white) 0%,
|
||||||
|
var(--el-fill-color-blank) 38%
|
||||||
|
);
|
||||||
|
border: 1px solid color-mix(in srgb, var(--el-border-color-light) 78%, white);
|
||||||
|
border-radius: 18px;
|
||||||
|
box-shadow: 0 18px 40px rgb(15 23 42 / 6%);
|
||||||
|
}
|
||||||
|
|
||||||
|
.chunk-card::before {
|
||||||
|
position: absolute;
|
||||||
|
inset: 0 auto 0 0;
|
||||||
|
width: 4px;
|
||||||
|
content: '';
|
||||||
|
background: linear-gradient(
|
||||||
|
180deg,
|
||||||
|
var(--el-color-primary),
|
||||||
|
color-mix(in srgb, var(--el-color-primary) 44%, white)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
.chunk-card__toolbar {
|
||||||
|
position: absolute;
|
||||||
|
top: 14px;
|
||||||
|
right: 14px;
|
||||||
|
z-index: 1;
|
||||||
|
display: flex;
|
||||||
|
gap: 6px;
|
||||||
|
align-items: center;
|
||||||
|
}
|
||||||
|
|
||||||
|
.chunk-card__action {
|
||||||
|
width: 34px;
|
||||||
|
height: 34px;
|
||||||
|
color: var(--el-color-primary);
|
||||||
|
background: color-mix(in srgb, var(--el-color-primary-light-9) 66%, white);
|
||||||
|
border: 1px solid
|
||||||
|
color-mix(in srgb, var(--el-color-primary-light-8) 72%, white);
|
||||||
|
box-shadow: 0 8px 18px rgb(37 99 235 / 10%);
|
||||||
|
}
|
||||||
|
|
||||||
|
.chunk-card__action--ghost {
|
||||||
|
color: var(--el-text-color-secondary);
|
||||||
|
background: rgb(255 255 255 / 86%);
|
||||||
|
border-color: color-mix(in srgb, var(--el-border-color-light) 86%, white);
|
||||||
|
box-shadow: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
.chunk-card__header {
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
min-width: 0;
|
||||||
|
padding-right: 92px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.chunk-card__eyebrow {
|
||||||
|
display: inline-flex;
|
||||||
|
gap: 8px;
|
||||||
|
align-items: center;
|
||||||
|
min-height: 28px;
|
||||||
|
font-size: 12px;
|
||||||
|
font-weight: 600;
|
||||||
|
line-height: 1.5;
|
||||||
|
color: var(--el-text-color-secondary);
|
||||||
|
}
|
||||||
|
|
||||||
|
.chunk-card__eyebrow-dot {
|
||||||
|
width: 8px;
|
||||||
|
height: 8px;
|
||||||
|
border-radius: 999px;
|
||||||
|
background: var(--el-color-primary);
|
||||||
|
box-shadow: 0 0 0 4px
|
||||||
|
color-mix(in srgb, var(--el-color-primary-light-8) 50%, transparent);
|
||||||
|
}
|
||||||
|
|
||||||
|
.chunk-card__content {
|
||||||
|
padding-top: 14px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.chunk-rich-content {
|
||||||
|
min-width: 0;
|
||||||
|
padding: 4px 0;
|
||||||
|
font-size: 14px;
|
||||||
|
line-height: 1.72;
|
||||||
|
color: var(--el-text-color-regular);
|
||||||
|
}
|
||||||
|
|
||||||
|
.chunk-rich-content--card {
|
||||||
|
padding: 14px 16px;
|
||||||
|
overflow-x: auto;
|
||||||
|
background: rgb(255 255 255 / 82%);
|
||||||
|
border: 1px solid rgb(15 23 42 / 6%);
|
||||||
|
border-radius: 16px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.chunk-rich-content :deep(.markdown-body) {
|
||||||
|
font-size: inherit;
|
||||||
|
line-height: inherit;
|
||||||
|
color: inherit;
|
||||||
|
background: transparent;
|
||||||
|
}
|
||||||
|
|
||||||
|
.chunk-rich-content :deep(.markdown-body > :first-child) {
|
||||||
|
margin-top: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.chunk-rich-content :deep(.markdown-body > :last-child) {
|
||||||
|
margin-bottom: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.chunk-rich-content :deep(*) {
|
||||||
|
overflow-wrap: anywhere;
|
||||||
|
}
|
||||||
|
|
||||||
|
.chunk-rich-content :deep(p) {
|
||||||
|
margin: 0 0 10px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.chunk-rich-content :deep(p:last-child) {
|
||||||
|
margin-bottom: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.chunk-rich-content :deep(h1),
|
||||||
|
.chunk-rich-content :deep(h2),
|
||||||
|
.chunk-rich-content :deep(h3),
|
||||||
|
.chunk-rich-content :deep(h4),
|
||||||
|
.chunk-rich-content :deep(h5),
|
||||||
|
.chunk-rich-content :deep(h6) {
|
||||||
|
margin: 14px 0 10px;
|
||||||
|
font-weight: 600;
|
||||||
|
line-height: 1.45;
|
||||||
|
color: var(--el-text-color-primary);
|
||||||
|
}
|
||||||
|
|
||||||
|
.chunk-rich-content :deep(h1:first-child),
|
||||||
|
.chunk-rich-content :deep(h2:first-child),
|
||||||
|
.chunk-rich-content :deep(h3:first-child),
|
||||||
|
.chunk-rich-content :deep(h4:first-child),
|
||||||
|
.chunk-rich-content :deep(h5:first-child),
|
||||||
|
.chunk-rich-content :deep(h6:first-child) {
|
||||||
|
margin-top: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.chunk-rich-content :deep(ul),
|
||||||
|
.chunk-rich-content :deep(ol) {
|
||||||
|
padding-left: 20px;
|
||||||
|
margin: 0 0 12px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.chunk-rich-content :deep(li + li) {
|
||||||
|
margin-top: 4px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.chunk-rich-content :deep(a) {
|
||||||
|
color: var(--el-color-primary);
|
||||||
|
text-decoration: underline;
|
||||||
|
text-underline-offset: 2px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.chunk-rich-content :deep(img) {
|
||||||
|
display: block;
|
||||||
|
max-width: min(100%, 560px);
|
||||||
|
height: auto;
|
||||||
|
margin: 12px 0;
|
||||||
|
border: 1px solid rgb(15 23 42 / 8%);
|
||||||
|
border-radius: 12px;
|
||||||
|
box-shadow: 0 10px 24px rgb(15 23 42 / 8%);
|
||||||
|
}
|
||||||
|
|
||||||
|
.chunk-rich-content :deep(table) {
|
||||||
|
width: 100%;
|
||||||
|
margin: 12px 0;
|
||||||
|
overflow: hidden;
|
||||||
|
border-collapse: collapse;
|
||||||
|
background: rgb(255 255 255 / 92%);
|
||||||
|
border: 1px solid rgb(15 23 42 / 8%);
|
||||||
|
border-radius: 12px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.chunk-rich-content :deep(th),
|
||||||
|
.chunk-rich-content :deep(td) {
|
||||||
|
padding: 10px 12px;
|
||||||
|
text-align: left;
|
||||||
|
vertical-align: top;
|
||||||
|
border: 1px solid rgb(15 23 42 / 8%);
|
||||||
|
}
|
||||||
|
|
||||||
|
.chunk-rich-content :deep(th) {
|
||||||
|
font-weight: 600;
|
||||||
|
color: var(--el-text-color-primary);
|
||||||
|
background: rgb(37 99 235 / 4%);
|
||||||
|
}
|
||||||
|
|
||||||
|
.chunk-rich-content :deep(pre) {
|
||||||
|
max-width: 100%;
|
||||||
|
padding: 12px 14px;
|
||||||
|
overflow: auto;
|
||||||
|
background: rgb(15 23 42 / 4%);
|
||||||
|
border: 1px solid rgb(15 23 42 / 6%);
|
||||||
|
border-radius: 12px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.chunk-rich-content :deep(blockquote) {
|
||||||
|
padding-left: 12px;
|
||||||
|
margin: 12px 0;
|
||||||
|
color: var(--el-text-color-secondary);
|
||||||
|
border-left: 3px solid rgb(37 99 235 / 24%);
|
||||||
|
}
|
||||||
|
|
||||||
|
.chunk-table__empty {
|
||||||
|
color: var(--el-text-color-placeholder);
|
||||||
|
}
|
||||||
|
|
||||||
|
@media (max-width: 960px) {
|
||||||
|
.chunk-card {
|
||||||
|
padding: 18px 16px 16px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.chunk-card__toolbar {
|
||||||
|
position: static;
|
||||||
|
justify-content: flex-end;
|
||||||
|
padding-bottom: 12px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.chunk-card__header {
|
||||||
|
padding-right: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.chunk-rich-content--card {
|
||||||
|
padding: 12px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.chunk-card__eyebrow {
|
||||||
|
font-size: 12px;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
</style>
|
||||||
|
|||||||
@@ -40,6 +40,8 @@ interface DocumentStatusPayload {
|
|||||||
failedChunks?: number;
|
failedChunks?: number;
|
||||||
knowledgeId?: number | string;
|
knowledgeId?: number | string;
|
||||||
lastTaskError?: string;
|
lastTaskError?: string;
|
||||||
|
parseCurrentStage?: string;
|
||||||
|
parseStatusMessage?: string;
|
||||||
processStatus?: string;
|
processStatus?: string;
|
||||||
progressPercent?: number;
|
progressPercent?: number;
|
||||||
taskModifiedAt?: string;
|
taskModifiedAt?: string;
|
||||||
@@ -154,11 +156,24 @@ const statusMetaMap: Record<
|
|||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const defaultStatusMeta: {
|
||||||
|
icon: Component;
|
||||||
|
toneClass: string;
|
||||||
|
} = statusMetaMap.UPLOADED!;
|
||||||
|
|
||||||
const getStatusLabel = (status?: string) =>
|
const getStatusLabel = (status?: string) =>
|
||||||
$t(`documentCollection.taskStatus.${status || 'UPLOADED'}`);
|
$t(`documentCollection.taskStatus.${status || 'UPLOADED'}`);
|
||||||
|
|
||||||
const getStatusMeta = (status?: string) =>
|
const getStatusMeta = (
|
||||||
statusMetaMap[status || 'UPLOADED'] || statusMetaMap.UPLOADED;
|
status?: string,
|
||||||
|
): {
|
||||||
|
icon: Component;
|
||||||
|
toneClass: string;
|
||||||
|
} => statusMetaMap[status || 'UPLOADED'] ?? defaultStatusMeta;
|
||||||
|
|
||||||
|
const getStatusToneClass = (status?: string) => getStatusMeta(status).toneClass;
|
||||||
|
|
||||||
|
const getStatusIcon = (status?: string) => getStatusMeta(status).icon;
|
||||||
|
|
||||||
const getChunkCount = (row: any) => {
|
const getChunkCount = (row: any) => {
|
||||||
const totalChunks = Number(row.totalChunks || 0);
|
const totalChunks = Number(row.totalChunks || 0);
|
||||||
@@ -171,12 +186,28 @@ const getChunkCount = (row: any) => {
|
|||||||
const getProgressText = (row: any) => {
|
const getProgressText = (row: any) => {
|
||||||
const completed = Number(row.completedChunks || 0);
|
const completed = Number(row.completedChunks || 0);
|
||||||
const total = Number(row.totalChunks || 0);
|
const total = Number(row.totalChunks || 0);
|
||||||
|
if (row.processStatus === 'PARSING') {
|
||||||
|
return `${Number(row.progressPercent || 0)}%`;
|
||||||
|
}
|
||||||
if (total <= 0) {
|
if (total <= 0) {
|
||||||
return `${Number(row.progressPercent || 0)}%`;
|
return `${Number(row.progressPercent || 0)}%`;
|
||||||
}
|
}
|
||||||
return `${Number(row.progressPercent || 0)}% · ${completed}/${total}`;
|
return `${Number(row.progressPercent || 0)}% · ${completed}/${total}`;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const parseStageLabels: Record<string, string> = {
|
||||||
|
assembling: '汇总中',
|
||||||
|
extracting: '提取中',
|
||||||
|
ocr: 'OCR 中',
|
||||||
|
preparing: '准备中',
|
||||||
|
queued: '排队中',
|
||||||
|
};
|
||||||
|
|
||||||
|
const getProcessingHint = (row: any) =>
|
||||||
|
row.parseStatusMessage ||
|
||||||
|
parseStageLabels[row.parseCurrentStage || ''] ||
|
||||||
|
'';
|
||||||
|
|
||||||
const clearReconnectTimer = () => {
|
const clearReconnectTimer = () => {
|
||||||
if (!reconnectTimer) {
|
if (!reconnectTimer) {
|
||||||
return;
|
return;
|
||||||
@@ -211,6 +242,8 @@ const patchDocumentRow = (payload: DocumentStatusPayload) => {
|
|||||||
completedChunks: payload.completedChunks,
|
completedChunks: payload.completedChunks,
|
||||||
failedChunks: payload.failedChunks,
|
failedChunks: payload.failedChunks,
|
||||||
lastTaskError: payload.lastTaskError,
|
lastTaskError: payload.lastTaskError,
|
||||||
|
parseCurrentStage: payload.parseCurrentStage,
|
||||||
|
parseStatusMessage: payload.parseStatusMessage,
|
||||||
processStatus: payload.processStatus,
|
processStatus: payload.processStatus,
|
||||||
progressPercent: payload.progressPercent,
|
progressPercent: payload.progressPercent,
|
||||||
taskModifiedAt: payload.taskModifiedAt,
|
taskModifiedAt: payload.taskModifiedAt,
|
||||||
@@ -529,7 +562,7 @@ watch(
|
|||||||
<div class="status-cell">
|
<div class="status-cell">
|
||||||
<div
|
<div
|
||||||
class="status-pill"
|
class="status-pill"
|
||||||
:class="getStatusMeta(row.processStatus).toneClass"
|
:class="getStatusToneClass(row.processStatus)"
|
||||||
>
|
>
|
||||||
<span class="status-pill__icon-shell">
|
<span class="status-pill__icon-shell">
|
||||||
<ElIcon
|
<ElIcon
|
||||||
@@ -540,7 +573,7 @@ watch(
|
|||||||
: ''
|
: ''
|
||||||
"
|
"
|
||||||
>
|
>
|
||||||
<component :is="getStatusMeta(row.processStatus).icon" />
|
<component :is="getStatusIcon(row.processStatus)" />
|
||||||
</ElIcon>
|
</ElIcon>
|
||||||
</span>
|
</span>
|
||||||
<span class="status-pill__label">
|
<span class="status-pill__label">
|
||||||
@@ -548,7 +581,10 @@ watch(
|
|||||||
</span>
|
</span>
|
||||||
</div>
|
</div>
|
||||||
<div
|
<div
|
||||||
v-if="row.processStatus === 'INDEXING'"
|
v-if="
|
||||||
|
row.processStatus === 'INDEXING' ||
|
||||||
|
row.processStatus === 'PARSING'
|
||||||
|
"
|
||||||
class="status-progress"
|
class="status-progress"
|
||||||
>
|
>
|
||||||
<ElProgress
|
<ElProgress
|
||||||
@@ -558,6 +594,12 @@ watch(
|
|||||||
<span class="status-progress__text">
|
<span class="status-progress__text">
|
||||||
{{ getProgressText(row) }}
|
{{ getProgressText(row) }}
|
||||||
</span>
|
</span>
|
||||||
|
<span
|
||||||
|
v-if="row.processStatus === 'PARSING' && getProcessingHint(row)"
|
||||||
|
class="status-progress__hint"
|
||||||
|
>
|
||||||
|
{{ getProcessingHint(row) }}
|
||||||
|
</span>
|
||||||
</div>
|
</div>
|
||||||
<div
|
<div
|
||||||
v-else-if="row.lastTaskError"
|
v-else-if="row.lastTaskError"
|
||||||
@@ -663,6 +705,12 @@ watch(
|
|||||||
text-align: left;
|
text-align: left;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.status-progress__hint {
|
||||||
|
font-size: 12px;
|
||||||
|
color: var(--el-text-color-secondary);
|
||||||
|
text-align: left;
|
||||||
|
}
|
||||||
|
|
||||||
.status-error {
|
.status-error {
|
||||||
max-width: 176px;
|
max-width: 176px;
|
||||||
font-size: 12px;
|
font-size: 12px;
|
||||||
|
|||||||
@@ -21,6 +21,8 @@ type RetrievalMode = 'HYBRID' | 'KEYWORD' | 'VECTOR';
|
|||||||
interface SearchResultItem {
|
interface SearchResultItem {
|
||||||
sorting: number;
|
sorting: number;
|
||||||
content: string;
|
content: string;
|
||||||
|
renderMarkdown?: string;
|
||||||
|
sourceFileName?: string;
|
||||||
score?: number;
|
score?: number;
|
||||||
hitSource?: 'BOTH' | 'KEYWORD' | 'VECTOR';
|
hitSource?: 'BOTH' | 'KEYWORD' | 'VECTOR';
|
||||||
vectorScore?: number;
|
vectorScore?: number;
|
||||||
|
|||||||
@@ -1,17 +1,27 @@
|
|||||||
<script setup lang="ts">
|
<script setup lang="ts">
|
||||||
|
import type { PropType } from 'vue';
|
||||||
|
|
||||||
import { ref } from 'vue';
|
import { ref } from 'vue';
|
||||||
|
import ElXMarkdown from 'vue-element-plus-x/es/XMarkdown/index.js';
|
||||||
|
|
||||||
import { $t } from '@easyflow/locales';
|
import { $t } from '@easyflow/locales';
|
||||||
|
|
||||||
import { Document } from '@element-plus/icons-vue';
|
import { Document } from '@element-plus/icons-vue';
|
||||||
import { ElButton, ElEmpty, ElIcon, ElTag } from 'element-plus';
|
import { ElButton, ElEmpty, ElIcon, ElTag } from 'element-plus';
|
||||||
|
|
||||||
|
import {
|
||||||
|
markdownRenderProps,
|
||||||
|
resolveMarkdownContent,
|
||||||
|
} from '#/views/ai/documentCollection/markdown-content';
|
||||||
|
|
||||||
type RetrievalMode = 'HYBRID' | 'KEYWORD' | 'VECTOR';
|
type RetrievalMode = 'HYBRID' | 'KEYWORD' | 'VECTOR';
|
||||||
type HitSource = 'BOTH' | 'KEYWORD' | 'VECTOR';
|
type HitSource = 'BOTH' | 'KEYWORD' | 'VECTOR';
|
||||||
|
|
||||||
interface PreviewItem {
|
interface PreviewItem {
|
||||||
sorting: number | string;
|
sorting: number | string;
|
||||||
content: string;
|
content: string;
|
||||||
|
renderMarkdown?: string;
|
||||||
|
sourceFileName?: string;
|
||||||
score?: number | string;
|
score?: number | string;
|
||||||
hitSource?: HitSource;
|
hitSource?: HitSource;
|
||||||
}
|
}
|
||||||
@@ -42,12 +52,12 @@ const props = defineProps({
|
|||||||
default: false,
|
default: false,
|
||||||
},
|
},
|
||||||
onCancel: {
|
onCancel: {
|
||||||
type: Function,
|
type: Function as PropType<() => void>,
|
||||||
default: () => {},
|
default: () => undefined,
|
||||||
},
|
},
|
||||||
onConfirm: {
|
onConfirm: {
|
||||||
type: Function,
|
type: Function as PropType<() => void>,
|
||||||
default: () => {},
|
default: () => undefined,
|
||||||
},
|
},
|
||||||
isSearching: {
|
isSearching: {
|
||||||
type: Boolean,
|
type: Boolean,
|
||||||
@@ -100,18 +110,31 @@ const resolveHitSourceType = (hitSource?: HitSource) => {
|
|||||||
return 'info';
|
return 'info';
|
||||||
};
|
};
|
||||||
|
|
||||||
const normalizePreviewContent = (content?: string) => {
|
const resolvePreviewMarkdown = (item: PreviewItem) =>
|
||||||
if (!content) {
|
resolveMarkdownContent(item.renderMarkdown || item.content);
|
||||||
return '';
|
|
||||||
|
const resolveScoreLine = (item: PreviewItem) => {
|
||||||
|
const pieces: string[] = [];
|
||||||
|
if (
|
||||||
|
!props.hideScore &&
|
||||||
|
item.score !== undefined &&
|
||||||
|
item.score !== null &&
|
||||||
|
`${item.score}` !== ''
|
||||||
|
) {
|
||||||
|
pieces.push(`${$t('documentCollection.similarityScore')}: ${item.score}`);
|
||||||
}
|
}
|
||||||
if (typeof window !== 'undefined' && typeof DOMParser !== 'undefined') {
|
if (item.sourceFileName) {
|
||||||
const doc = new DOMParser().parseFromString(content, 'text/html');
|
pieces.push(`来源: ${item.sourceFileName}`);
|
||||||
return (doc.body.textContent || '').replaceAll(/\n\s*\n/g, '\n').trim();
|
|
||||||
}
|
}
|
||||||
return content
|
return pieces.join(' · ');
|
||||||
.replaceAll(/<[^>]+>/g, ' ')
|
};
|
||||||
.replaceAll(/\s+/g, ' ')
|
|
||||||
.trim();
|
const handleCancel = () => {
|
||||||
|
props.onCancel?.();
|
||||||
|
};
|
||||||
|
|
||||||
|
const handleConfirm = () => {
|
||||||
|
props.onConfirm?.();
|
||||||
};
|
};
|
||||||
|
|
||||||
defineExpose({
|
defineExpose({
|
||||||
@@ -149,8 +172,8 @@ defineExpose({
|
|||||||
<div class="segment-badge">
|
<div class="segment-badge">
|
||||||
{{ item.sorting ?? index + 1 }}
|
{{ item.sorting ?? index + 1 }}
|
||||||
</div>
|
</div>
|
||||||
<div v-if="!hideScore" class="score-text">
|
<div v-if="resolveScoreLine(item)" class="score-text">
|
||||||
{{ $t('documentCollection.similarityScore') }}: {{ item.score }}
|
{{ resolveScoreLine(item) }}
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<div
|
<div
|
||||||
@@ -174,8 +197,18 @@ defineExpose({
|
|||||||
</ElTag>
|
</ElTag>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<div class="content-desc">
|
<div
|
||||||
{{ normalizePreviewContent(item.content) }}
|
v-if="resolvePreviewMarkdown(item)"
|
||||||
|
class="content-desc content-desc--markdown"
|
||||||
|
>
|
||||||
|
<ElXMarkdown
|
||||||
|
:markdown="resolvePreviewMarkdown(item)"
|
||||||
|
:allow-html="markdownRenderProps.allowHtml"
|
||||||
|
:sanitize="markdownRenderProps.sanitize"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
<div v-else class="content-desc">
|
||||||
|
{{ item.content }}
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
@@ -193,17 +226,17 @@ defineExpose({
|
|||||||
<div class="action-buttons">
|
<div class="action-buttons">
|
||||||
<ElButton
|
<ElButton
|
||||||
:style="{ minWidth: '100px', height: '36px' }"
|
:style="{ minWidth: '100px', height: '36px' }"
|
||||||
@click="onCancel"
|
@click="handleCancel"
|
||||||
>
|
>
|
||||||
{{ $t('documentCollection.actions.confirmImport') }}
|
{{ $t('documentCollection.actions.cancelImport') }}
|
||||||
</ElButton>
|
</ElButton>
|
||||||
<ElButton
|
<ElButton
|
||||||
type="primary"
|
type="primary"
|
||||||
:style="{ minWidth: '100px', height: '36px' }"
|
:style="{ minWidth: '100px', height: '36px' }"
|
||||||
:loading="disabledConfirm"
|
:loading="disabledConfirm"
|
||||||
@click="onConfirm"
|
@click="handleConfirm"
|
||||||
>
|
>
|
||||||
{{ $t('documentCollection.actions.cancelImport') }}
|
{{ $t('documentCollection.actions.confirmImport') }}
|
||||||
</ElButton>
|
</ElButton>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
@@ -276,12 +309,26 @@ defineExpose({
|
|||||||
padding: 14px 16px;
|
padding: 14px 16px;
|
||||||
font-size: 14px;
|
font-size: 14px;
|
||||||
line-height: 1.6;
|
line-height: 1.6;
|
||||||
white-space: pre-wrap;
|
|
||||||
word-break: break-word;
|
word-break: break-word;
|
||||||
background: rgb(248 250 252 / 90%);
|
background: rgb(248 250 252 / 90%);
|
||||||
border-radius: 14px;
|
border-radius: 14px;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.content-desc--markdown :deep(.markdown-body) {
|
||||||
|
font-size: inherit;
|
||||||
|
line-height: inherit;
|
||||||
|
color: inherit;
|
||||||
|
background: transparent;
|
||||||
|
}
|
||||||
|
|
||||||
|
.content-desc--markdown :deep(.markdown-body > :first-child) {
|
||||||
|
margin-top: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.content-desc--markdown :deep(.markdown-body > :last-child) {
|
||||||
|
margin-bottom: 0;
|
||||||
|
}
|
||||||
|
|
||||||
.score-text {
|
.score-text {
|
||||||
font-size: 13px;
|
font-size: 13px;
|
||||||
font-weight: 500;
|
font-weight: 500;
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
<script setup lang="ts">
|
<script setup lang="ts">
|
||||||
import { reactive, ref, watch } from 'vue';
|
import { computed, reactive, ref, watch } from 'vue';
|
||||||
|
|
||||||
import { $t } from '@easyflow/locales';
|
import { $t } from '@easyflow/locales';
|
||||||
|
|
||||||
@@ -8,6 +8,7 @@ import {
|
|||||||
ElForm,
|
ElForm,
|
||||||
ElFormItem,
|
ElFormItem,
|
||||||
ElInput,
|
ElInput,
|
||||||
|
ElInputNumber,
|
||||||
ElMessage,
|
ElMessage,
|
||||||
ElOption,
|
ElOption,
|
||||||
ElSelect,
|
ElSelect,
|
||||||
@@ -85,7 +86,7 @@ const createDefaultFormState = () => ({
|
|||||||
mdSplitterLevel: 2,
|
mdSplitterLevel: 2,
|
||||||
overlapSize: 128,
|
overlapSize: 128,
|
||||||
regex: '',
|
regex: '',
|
||||||
rowsPerChunk: 1,
|
rowsPerChunk: 10,
|
||||||
strategyCode: 'AUTO',
|
strategyCode: 'AUTO',
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -126,6 +127,17 @@ const strategyOptions = [
|
|||||||
},
|
},
|
||||||
];
|
];
|
||||||
|
|
||||||
|
const fileExt = computed(() =>
|
||||||
|
String(props.documentTitle || '')
|
||||||
|
.split('.')
|
||||||
|
.pop()
|
||||||
|
?.toLowerCase() || '',
|
||||||
|
);
|
||||||
|
|
||||||
|
const isPptx = computed(() => fileExt.value === 'pptx');
|
||||||
|
const isXlsx = computed(() => fileExt.value === 'xlsx');
|
||||||
|
const showStrategySelector = computed(() => !isPptx.value && !isXlsx.value);
|
||||||
|
|
||||||
const mdLevels = [1, 2, 3, 4, 5, 6];
|
const mdLevels = [1, 2, 3, 4, 5, 6];
|
||||||
|
|
||||||
const showLengthSettings = (strategyCode?: string) =>
|
const showLengthSettings = (strategyCode?: string) =>
|
||||||
@@ -150,9 +162,22 @@ const resetPreviewState = () => {
|
|||||||
previewError.value = '';
|
previewError.value = '';
|
||||||
};
|
};
|
||||||
|
|
||||||
const buildStrategyConfig = () => ({
|
const buildStrategyConfig = () => {
|
||||||
...formState,
|
if (isPptx.value) {
|
||||||
});
|
return {
|
||||||
|
strategyCode: 'OFFICE_PPTX_PAGE',
|
||||||
|
};
|
||||||
|
}
|
||||||
|
if (isXlsx.value) {
|
||||||
|
return {
|
||||||
|
rowsPerChunk: formState.rowsPerChunk,
|
||||||
|
strategyCode: 'OFFICE_XLSX_ROW_WINDOW',
|
||||||
|
};
|
||||||
|
}
|
||||||
|
return {
|
||||||
|
...formState,
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
const normalizeSourceRanges = (ranges?: SourceRange[]) =>
|
const normalizeSourceRanges = (ranges?: SourceRange[]) =>
|
||||||
Array.isArray(ranges)
|
Array.isArray(ranges)
|
||||||
@@ -292,6 +317,13 @@ watch(
|
|||||||
previewSequence += 1;
|
previewSequence += 1;
|
||||||
clearPreviewTimer();
|
clearPreviewTimer();
|
||||||
Object.assign(formState, createDefaultFormState());
|
Object.assign(formState, createDefaultFormState());
|
||||||
|
if (isPptx.value) {
|
||||||
|
formState.strategyCode = 'OFFICE_PPTX_PAGE';
|
||||||
|
}
|
||||||
|
if (isXlsx.value) {
|
||||||
|
formState.strategyCode = 'OFFICE_XLSX_ROW_WINDOW';
|
||||||
|
formState.rowsPerChunk = 10;
|
||||||
|
}
|
||||||
resetPreviewState();
|
resetPreviewState();
|
||||||
if (activeDocumentId.value) {
|
if (activeDocumentId.value) {
|
||||||
schedulePreviewGeneration();
|
schedulePreviewGeneration();
|
||||||
@@ -317,6 +349,7 @@ watch(
|
|||||||
<ElForm :model="formState" label-position="top" class="workbench__form">
|
<ElForm :model="formState" label-position="top" class="workbench__form">
|
||||||
<div class="workbench__form-grid">
|
<div class="workbench__form-grid">
|
||||||
<ElFormItem
|
<ElFormItem
|
||||||
|
v-if="showStrategySelector"
|
||||||
:label="$t('documentCollection.importDoc.strategySelection')"
|
:label="$t('documentCollection.importDoc.strategySelection')"
|
||||||
class="workbench__form-full"
|
class="workbench__form-full"
|
||||||
>
|
>
|
||||||
@@ -330,6 +363,20 @@ watch(
|
|||||||
</ElSelect>
|
</ElSelect>
|
||||||
</ElFormItem>
|
</ElFormItem>
|
||||||
|
|
||||||
|
<ElFormItem
|
||||||
|
v-if="isXlsx"
|
||||||
|
label="每多少行分一块"
|
||||||
|
class="workbench__form-full"
|
||||||
|
>
|
||||||
|
<ElInputNumber
|
||||||
|
v-model="formState.rowsPerChunk"
|
||||||
|
:min="1"
|
||||||
|
:max="200"
|
||||||
|
:step="1"
|
||||||
|
class="workbench__rows-input"
|
||||||
|
/>
|
||||||
|
</ElFormItem>
|
||||||
|
|
||||||
<ElFormItem
|
<ElFormItem
|
||||||
v-if="showLengthSettings(formState.strategyCode)"
|
v-if="showLengthSettings(formState.strategyCode)"
|
||||||
:label="$t('documentCollection.splitterDoc.chunkSize')"
|
:label="$t('documentCollection.splitterDoc.chunkSize')"
|
||||||
@@ -478,6 +525,10 @@ watch(
|
|||||||
box-shadow: 0 0 0 1px rgb(15 23 42 / 7%) inset;
|
box-shadow: 0 0 0 1px rgb(15 23 42 / 7%) inset;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.workbench__rows-input {
|
||||||
|
width: 100%;
|
||||||
|
}
|
||||||
|
|
||||||
:deep(.workbench__form .el-slider__runway) {
|
:deep(.workbench__form .el-slider__runway) {
|
||||||
background: rgb(15 23 42 / 8%);
|
background: rgb(15 23 42 / 8%);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,10 +1,16 @@
|
|||||||
<script setup lang="ts">
|
<script setup lang="ts">
|
||||||
import { computed, ref, watch } from 'vue';
|
import { computed, ref, watch } from 'vue';
|
||||||
|
import ElXMarkdown from 'vue-element-plus-x/es/XMarkdown/index.js';
|
||||||
|
|
||||||
import { $t } from '@easyflow/locales';
|
import { $t } from '@easyflow/locales';
|
||||||
|
|
||||||
import { ElEmpty, ElSkeleton, ElTag } from 'element-plus';
|
import { ElEmpty, ElSkeleton, ElTag } from 'element-plus';
|
||||||
|
|
||||||
|
import {
|
||||||
|
markdownRenderProps,
|
||||||
|
resolveMarkdownContent,
|
||||||
|
} from '#/views/ai/documentCollection/markdown-content';
|
||||||
|
|
||||||
interface SourceRange {
|
interface SourceRange {
|
||||||
end: number;
|
end: number;
|
||||||
start: number;
|
start: number;
|
||||||
@@ -21,6 +27,7 @@ interface ChunkItem {
|
|||||||
partNo?: number;
|
partNo?: number;
|
||||||
partTotal?: number;
|
partTotal?: number;
|
||||||
question?: string;
|
question?: string;
|
||||||
|
renderMarkdown?: string;
|
||||||
sourceLabel?: string;
|
sourceLabel?: string;
|
||||||
sourceRanges?: SourceRange[];
|
sourceRanges?: SourceRange[];
|
||||||
tokenEstimate?: number;
|
tokenEstimate?: number;
|
||||||
@@ -180,15 +187,38 @@ const isActiveChunk = (chunk: ChunkItem) =>
|
|||||||
<div v-if="chunk.chunkType === 'qa_pair'" class="qa-block">
|
<div v-if="chunk.chunkType === 'qa_pair'" class="qa-block">
|
||||||
<div class="qa-block__item">
|
<div class="qa-block__item">
|
||||||
<span class="qa-block__label">Q</span>
|
<span class="qa-block__label">Q</span>
|
||||||
<span class="qa-block__text">{{ chunk.question }}</span>
|
<div class="qa-block__text">
|
||||||
|
<ElXMarkdown
|
||||||
|
:markdown="resolveMarkdownContent(chunk.question)"
|
||||||
|
:allow-html="markdownRenderProps.allowHtml"
|
||||||
|
:sanitize="markdownRenderProps.sanitize"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<div class="qa-block__item">
|
<div class="qa-block__item">
|
||||||
<span class="qa-block__label">A</span>
|
<span class="qa-block__label">A</span>
|
||||||
<span class="qa-block__text">{{ chunk.answer }}</span>
|
<div class="qa-block__text">
|
||||||
|
<ElXMarkdown
|
||||||
|
:markdown="resolveMarkdownContent(chunk.answer)"
|
||||||
|
:allow-html="markdownRenderProps.allowHtml"
|
||||||
|
:sanitize="markdownRenderProps.sanitize"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<pre class="chunk-card__content">{{ chunk.content }}</pre>
|
<div
|
||||||
|
v-if="resolveMarkdownContent(chunk.renderMarkdown || chunk.content)"
|
||||||
|
class="chunk-card__content chunk-card__content--markdown"
|
||||||
|
>
|
||||||
|
<ElXMarkdown
|
||||||
|
:markdown="
|
||||||
|
resolveMarkdownContent(chunk.renderMarkdown || chunk.content)
|
||||||
|
"
|
||||||
|
:allow-html="markdownRenderProps.allowHtml"
|
||||||
|
:sanitize="markdownRenderProps.sanitize"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
|
||||||
<div
|
<div
|
||||||
v-if="chunk.warnings && chunk.warnings.length > 0"
|
v-if="chunk.warnings && chunk.warnings.length > 0"
|
||||||
@@ -342,14 +372,129 @@ const isActiveChunk = (chunk: ChunkItem) =>
|
|||||||
|
|
||||||
.chunk-card__content {
|
.chunk-card__content {
|
||||||
margin: 14px 0 0;
|
margin: 14px 0 0;
|
||||||
font-family: inherit;
|
|
||||||
font-size: 13px;
|
font-size: 13px;
|
||||||
line-height: 1.75;
|
line-height: 1.75;
|
||||||
color: var(--el-text-color-regular);
|
color: var(--el-text-color-regular);
|
||||||
white-space: pre-wrap;
|
}
|
||||||
|
|
||||||
|
.chunk-card__content--markdown {
|
||||||
|
min-width: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.chunk-card__content--markdown :deep(.markdown-body) {
|
||||||
|
font-size: inherit;
|
||||||
|
line-height: inherit;
|
||||||
|
color: inherit;
|
||||||
|
background: transparent;
|
||||||
|
}
|
||||||
|
|
||||||
|
.chunk-card__content--markdown :deep(.markdown-body > :first-child) {
|
||||||
|
margin-top: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.chunk-card__content--markdown :deep(.markdown-body > :last-child) {
|
||||||
|
margin-bottom: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.chunk-card__content--markdown :deep(*) {
|
||||||
overflow-wrap: anywhere;
|
overflow-wrap: anywhere;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.chunk-card__content--markdown :deep(p) {
|
||||||
|
margin: 0 0 10px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.chunk-card__content--markdown :deep(p:last-child) {
|
||||||
|
margin-bottom: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.chunk-card__content--markdown :deep(h1),
|
||||||
|
.chunk-card__content--markdown :deep(h2),
|
||||||
|
.chunk-card__content--markdown :deep(h3),
|
||||||
|
.chunk-card__content--markdown :deep(h4),
|
||||||
|
.chunk-card__content--markdown :deep(h5),
|
||||||
|
.chunk-card__content--markdown :deep(h6) {
|
||||||
|
margin: 14px 0 10px;
|
||||||
|
font-weight: 600;
|
||||||
|
line-height: 1.45;
|
||||||
|
color: var(--el-text-color-primary);
|
||||||
|
}
|
||||||
|
|
||||||
|
.chunk-card__content--markdown :deep(h1:first-child),
|
||||||
|
.chunk-card__content--markdown :deep(h2:first-child),
|
||||||
|
.chunk-card__content--markdown :deep(h3:first-child),
|
||||||
|
.chunk-card__content--markdown :deep(h4:first-child),
|
||||||
|
.chunk-card__content--markdown :deep(h5:first-child),
|
||||||
|
.chunk-card__content--markdown :deep(h6:first-child) {
|
||||||
|
margin-top: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.chunk-card__content--markdown :deep(ul),
|
||||||
|
.chunk-card__content--markdown :deep(ol) {
|
||||||
|
padding-left: 20px;
|
||||||
|
margin: 0 0 12px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.chunk-card__content--markdown :deep(li + li) {
|
||||||
|
margin-top: 4px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.chunk-card__content--markdown :deep(a) {
|
||||||
|
color: var(--el-color-primary);
|
||||||
|
text-decoration: underline;
|
||||||
|
text-underline-offset: 2px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.chunk-card__content--markdown :deep(img) {
|
||||||
|
display: block;
|
||||||
|
max-width: min(100%, 520px);
|
||||||
|
height: auto;
|
||||||
|
margin: 12px 0;
|
||||||
|
border: 1px solid rgb(15 23 42 / 8%);
|
||||||
|
border-radius: 12px;
|
||||||
|
box-shadow: 0 10px 24px rgb(15 23 42 / 8%);
|
||||||
|
}
|
||||||
|
|
||||||
|
.chunk-card__content--markdown :deep(table) {
|
||||||
|
width: 100%;
|
||||||
|
margin: 12px 0;
|
||||||
|
overflow: hidden;
|
||||||
|
border-collapse: collapse;
|
||||||
|
background: rgb(255 255 255 / 92%);
|
||||||
|
border: 1px solid rgb(15 23 42 / 8%);
|
||||||
|
border-radius: 12px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.chunk-card__content--markdown :deep(th),
|
||||||
|
.chunk-card__content--markdown :deep(td) {
|
||||||
|
padding: 10px 12px;
|
||||||
|
text-align: left;
|
||||||
|
vertical-align: top;
|
||||||
|
border: 1px solid rgb(15 23 42 / 8%);
|
||||||
|
}
|
||||||
|
|
||||||
|
.chunk-card__content--markdown :deep(th) {
|
||||||
|
font-weight: 600;
|
||||||
|
color: var(--el-text-color-primary);
|
||||||
|
background: rgb(37 99 235 / 4%);
|
||||||
|
}
|
||||||
|
|
||||||
|
.chunk-card__content--markdown :deep(pre) {
|
||||||
|
max-width: 100%;
|
||||||
|
padding: 12px 14px;
|
||||||
|
overflow: auto;
|
||||||
|
background: rgb(15 23 42 / 4%);
|
||||||
|
border: 1px solid rgb(15 23 42 / 6%);
|
||||||
|
border-radius: 12px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.chunk-card__content--markdown :deep(blockquote) {
|
||||||
|
padding-left: 12px;
|
||||||
|
margin: 12px 0;
|
||||||
|
color: var(--el-text-color-secondary);
|
||||||
|
border-left: 3px solid rgb(37 99 235 / 24%);
|
||||||
|
}
|
||||||
|
|
||||||
.chunk-card__warnings {
|
.chunk-card__warnings {
|
||||||
display: flex;
|
display: flex;
|
||||||
flex-wrap: wrap;
|
flex-wrap: wrap;
|
||||||
@@ -387,10 +532,32 @@ const isActiveChunk = (chunk: ChunkItem) =>
|
|||||||
}
|
}
|
||||||
|
|
||||||
.qa-block__text {
|
.qa-block__text {
|
||||||
|
min-width: 0;
|
||||||
font-size: 13px;
|
font-size: 13px;
|
||||||
line-height: 1.7;
|
line-height: 1.7;
|
||||||
color: var(--el-text-color-regular);
|
color: var(--el-text-color-regular);
|
||||||
white-space: pre-wrap;
|
}
|
||||||
|
|
||||||
|
.qa-block__text :deep(.markdown-body) {
|
||||||
|
font-size: inherit;
|
||||||
|
line-height: inherit;
|
||||||
|
background: transparent;
|
||||||
|
}
|
||||||
|
|
||||||
|
.qa-block__text :deep(.markdown-body > :first-child) {
|
||||||
|
margin-top: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.qa-block__text :deep(.markdown-body > :last-child) {
|
||||||
|
margin-bottom: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.qa-block__text :deep(img) {
|
||||||
|
display: block;
|
||||||
|
max-width: min(100%, 420px);
|
||||||
|
height: auto;
|
||||||
|
margin: 10px 0;
|
||||||
|
border-radius: 10px;
|
||||||
}
|
}
|
||||||
|
|
||||||
@media (max-width: 960px) {
|
@media (max-width: 960px) {
|
||||||
|
|||||||
@@ -0,0 +1,40 @@
|
|||||||
|
const ESCAPED_TABLE_HTML_TAG_PATTERN =
|
||||||
|
/<\/?(?:table|thead|tbody|tfoot|tr|th|td|caption|colgroup|col)\b/i;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 将知识库分块内容规整为可直接交给 Markdown 组件的文本。
|
||||||
|
* 这里额外兼容被转义的 HTML 片段,例如 `<table>...</table>`。
|
||||||
|
*/
|
||||||
|
export const resolveMarkdownContent = (content?: string) => {
|
||||||
|
const markdown = String(content || '').trim();
|
||||||
|
if (!markdown) {
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ESCAPED_TABLE_HTML_TAG_PATTERN.test(markdown)) {
|
||||||
|
return decodeHtmlEntities(markdown);
|
||||||
|
}
|
||||||
|
|
||||||
|
return markdown;
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 统一控制 Markdown 中原生 HTML 的开启策略。
|
||||||
|
*/
|
||||||
|
export const markdownRenderProps = {
|
||||||
|
allowHtml: true,
|
||||||
|
sanitize: true,
|
||||||
|
} as const;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 解码后端可能返回的 HTML 实体,便于 Markdown 组件继续处理原生标签。
|
||||||
|
*/
|
||||||
|
function decodeHtmlEntities(content: string) {
|
||||||
|
if (typeof window === 'undefined' || window.DOMParser === undefined) {
|
||||||
|
return content;
|
||||||
|
}
|
||||||
|
|
||||||
|
const parser = new window.DOMParser();
|
||||||
|
const doc = parser.parseFromString(content, 'text/html');
|
||||||
|
return doc.documentElement.textContent || content;
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user