feat: 支持知识库导入 PPTX 与 XLSX 文档

- 打通 Office 文档桥接解析、解析进度承接与图片引用改写

- 落地 PPTX 按页分块、XLSX 行窗口分块以及预览与检索渲染闭环
This commit is contained in:
2026-04-18 13:01:17 +08:00
parent ad67ba85ad
commit 4130381658
28 changed files with 2876 additions and 120 deletions

View File

@@ -407,6 +407,8 @@ public class DocumentCollectionController extends BaseCurdController<DocumentCol
KnowledgeSearchResultItem item = new KnowledgeSearchResultItem(); KnowledgeSearchResultItem item = new KnowledgeSearchResultItem();
item.setSorting(index + 1); item.setSorting(index + 1);
item.setContent(document.getContent()); item.setContent(document.getContent());
item.setRenderMarkdown(readMetadataAsString(document, "renderMarkdown"));
item.setSourceFileName(readMetadataAsString(document, "sourceFileName"));
item.setScore(roundScore(document.getScore())); item.setScore(roundScore(document.getScore()));
item.setHitSource(readMetadataAsString(document, RagRetrievalMetadataKeys.HIT_SOURCE)); item.setHitSource(readMetadataAsString(document, RagRetrievalMetadataKeys.HIT_SOURCE));
item.setVectorScore(roundScore(readMetadataAsDouble(document, RagRetrievalMetadataKeys.VECTOR_SCORE))); item.setVectorScore(roundScore(readMetadataAsDouble(document, RagRetrievalMetadataKeys.VECTOR_SCORE)));

View File

@@ -932,6 +932,10 @@ public class ShareKnowledgeController {
KnowledgeSearchResultItem item = new KnowledgeSearchResultItem(); KnowledgeSearchResultItem item = new KnowledgeSearchResultItem();
item.setSorting(index + 1); item.setSorting(index + 1);
item.setContent(document.getContent()); item.setContent(document.getContent());
Object renderMarkdown = document.getMetadata("renderMarkdown");
item.setRenderMarkdown(renderMarkdown == null ? null : String.valueOf(renderMarkdown));
Object sourceFileName = document.getMetadata("sourceFileName");
item.setSourceFileName(sourceFileName == null ? null : String.valueOf(sourceFileName));
item.setScore(document.getScore() == null ? null : document.getScore().doubleValue()); item.setScore(document.getScore() == null ? null : document.getScore().doubleValue());
Object hitSource = document.getMetadata("hitSource"); Object hitSource = document.getMetadata("hitSource");
item.setHitSource(hitSource == null ? null : String.valueOf(hitSource)); item.setHitSource(hitSource == null ? null : String.valueOf(hitSource));

View File

@@ -638,6 +638,10 @@ public class PublicKnowledgeShareController {
for (com.easyagents.core.document.Document document : documents) { for (com.easyagents.core.document.Document document : documents) {
KnowledgeSearchResultItem item = new KnowledgeSearchResultItem(); KnowledgeSearchResultItem item = new KnowledgeSearchResultItem();
item.setContent(document.getContent()); item.setContent(document.getContent());
Object renderMarkdown = document.getMetadata("renderMarkdown");
item.setRenderMarkdown(renderMarkdown == null ? null : String.valueOf(renderMarkdown));
Object sourceFileName = document.getMetadata("sourceFileName");
item.setSourceFileName(sourceFileName == null ? null : String.valueOf(sourceFileName));
item.setScore(document.getScore()); item.setScore(document.getScore());
Object hitSource = document.getMetadata("hitSource"); Object hitSource = document.getMetadata("hitSource");
item.setHitSource(hitSource == null ? null : String.valueOf(hitSource)); item.setHitSource(hitSource == null ? null : String.valueOf(hitSource));

View File

@@ -112,7 +112,6 @@
<groupId>com.easyagents</groupId> <groupId>com.easyagents</groupId>
<artifactId>easy-agents-mcp</artifactId> <artifactId>easy-agents-mcp</artifactId>
</dependency> </dependency>
<dependency> <dependency>
<groupId>junit</groupId> <groupId>junit</groupId>
<artifactId>junit</artifactId> <artifactId>junit</artifactId>

View File

@@ -35,10 +35,14 @@ public class DocumentParseBridgeException extends RuntimeException {
public static DocumentParseBridgeException serviceNotEnabled() { public static DocumentParseBridgeException serviceNotEnabled() {
return new DocumentParseBridgeException( return new DocumentParseBridgeException(
"service_not_enabled", "service_not_enabled",
"统一文档解析服务未启用,请先配置 easy-agents.document.pdf.provider" "统一文档解析服务未启用,请先配置 easy-agents.document.ocr.provider=mineru"
); );
} }
public static DocumentParseBridgeException serviceNotEnabled(String message) {
return new DocumentParseBridgeException("service_not_enabled", message);
}
public static DocumentParseBridgeException unsupportedSource(String message) { public static DocumentParseBridgeException unsupportedSource(String message) {
return new DocumentParseBridgeException("unsupported_source", message); return new DocumentParseBridgeException("unsupported_source", message);
} }

View File

@@ -22,6 +22,11 @@ public class DocumentParseTaskStatus {
private String statusUrl; private String statusUrl;
private String resultUrl; private String resultUrl;
private Integer queuedAhead; private Integer queuedAhead;
private Integer progressPercent;
private String currentStage;
private Integer processedItems;
private Integer totalItems;
private String statusMessage;
public String getTaskId() { public String getTaskId() {
return taskId; return taskId;
@@ -110,4 +115,44 @@ public class DocumentParseTaskStatus {
public void setQueuedAhead(Integer queuedAhead) { public void setQueuedAhead(Integer queuedAhead) {
this.queuedAhead = queuedAhead; this.queuedAhead = queuedAhead;
} }
public Integer getProgressPercent() {
return progressPercent;
}
public void setProgressPercent(Integer progressPercent) {
this.progressPercent = progressPercent;
}
public String getCurrentStage() {
return currentStage;
}
public void setCurrentStage(String currentStage) {
this.currentStage = currentStage;
}
public Integer getProcessedItems() {
return processedItems;
}
public void setProcessedItems(Integer processedItems) {
this.processedItems = processedItems;
}
public Integer getTotalItems() {
return totalItems;
}
public void setTotalItems(Integer totalItems) {
this.totalItems = totalItems;
}
public String getStatusMessage() {
return statusMessage;
}
public void setStatusMessage(String statusMessage) {
this.statusMessage = statusMessage;
}
} }

View File

@@ -5,6 +5,10 @@ import com.easyagents.document.core.entity.ParseResponse;
import com.easyagents.document.core.entity.ParseResult; import com.easyagents.document.core.entity.ParseResult;
import com.easyagents.document.core.entity.ParseTaskInfo; import com.easyagents.document.core.entity.ParseTaskInfo;
import com.easyagents.document.core.entity.ParseTaskStatus; import com.easyagents.document.core.entity.ParseTaskStatus;
import com.easyagents.document.pdf.PdfDocumentParseService;
import com.easyagents.document.pptx.PptxDocumentParseService;
import com.easyagents.document.xlsx.XlsxDocumentParseService;
import org.springframework.beans.factory.annotation.Qualifier;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import org.springframework.lang.Nullable; import org.springframework.lang.Nullable;
@@ -20,8 +24,13 @@ import tech.easyflow.ai.document.service.DocumentParseBridgeService;
import tech.easyflow.ai.document.support.DocumentSourceLoader; import tech.easyflow.ai.document.support.DocumentSourceLoader;
import tech.easyflow.ai.document.support.DocumentParseRequestFactory; import tech.easyflow.ai.document.support.DocumentParseRequestFactory;
import tech.easyflow.ai.document.support.DocumentParseResultMapper; import tech.easyflow.ai.document.support.DocumentParseResultMapper;
import tech.easyflow.ai.document.support.DocumentParseSourceType;
import tech.easyflow.ai.document.support.LoadedDocumentSource; import tech.easyflow.ai.document.support.LoadedDocumentSource;
import tech.easyflow.ai.utils.DocUtil;
import java.util.ArrayList;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.function.Function;
/** /**
* 统一文档解析桥接门面默认实现。 * 统一文档解析桥接门面默认实现。
@@ -33,18 +42,33 @@ import tech.easyflow.ai.utils.DocUtil;
public class DocumentParseBridgeServiceImpl implements DocumentParseBridgeService { public class DocumentParseBridgeServiceImpl implements DocumentParseBridgeService {
private static final Logger LOG = LoggerFactory.getLogger(DocumentParseBridgeServiceImpl.class); private static final Logger LOG = LoggerFactory.getLogger(DocumentParseBridgeServiceImpl.class);
private static final String DEFAULT_DOCUMENT_PARSE_SERVICE_BEAN_NAME = "documentParseService";
@Nullable @Nullable
private final DocumentParseService documentParseService; private final DocumentParseService defaultDocumentParseService;
@Nullable
private final PdfDocumentParseService pdfDocumentParseService;
@Nullable
private final PptxDocumentParseService pptxDocumentParseService;
@Nullable
private final XlsxDocumentParseService xlsxDocumentParseService;
private final DocumentSourceLoader documentSourceLoader; private final DocumentSourceLoader documentSourceLoader;
private final DocumentParseRequestFactory parseRequestFactory; private final DocumentParseRequestFactory parseRequestFactory;
private final DocumentParseResultMapper parseResultMapper; private final DocumentParseResultMapper parseResultMapper;
public DocumentParseBridgeServiceImpl(@Nullable DocumentParseService documentParseService, public DocumentParseBridgeServiceImpl(@Nullable
@Qualifier(DEFAULT_DOCUMENT_PARSE_SERVICE_BEAN_NAME)
DocumentParseService defaultDocumentParseService,
@Nullable PdfDocumentParseService pdfDocumentParseService,
@Nullable PptxDocumentParseService pptxDocumentParseService,
@Nullable XlsxDocumentParseService xlsxDocumentParseService,
DocumentSourceLoader documentSourceLoader, DocumentSourceLoader documentSourceLoader,
DocumentParseRequestFactory parseRequestFactory, DocumentParseRequestFactory parseRequestFactory,
DocumentParseResultMapper parseResultMapper) { DocumentParseResultMapper parseResultMapper) {
this.documentParseService = documentParseService; this.defaultDocumentParseService = defaultDocumentParseService;
this.pdfDocumentParseService = pdfDocumentParseService;
this.pptxDocumentParseService = pptxDocumentParseService;
this.xlsxDocumentParseService = xlsxDocumentParseService;
this.documentSourceLoader = documentSourceLoader; this.documentSourceLoader = documentSourceLoader;
this.parseRequestFactory = parseRequestFactory; this.parseRequestFactory = parseRequestFactory;
this.parseResultMapper = parseResultMapper; this.parseResultMapper = parseResultMapper;
@@ -59,7 +83,8 @@ public class DocumentParseBridgeServiceImpl implements DocumentParseBridgeServic
LoadedDocumentSource loadedSource = prepareSupportedSource(source); LoadedDocumentSource loadedSource = prepareSupportedSource(source);
LOG.info("桥接服务开始同步解析文档: fileName={}, contentType={}, scenario={}", LOG.info("桥接服务开始同步解析文档: fileName={}, contentType={}, scenario={}",
loadedSource.getFileName(), loadedSource.getContentType(), scenario); loadedSource.getFileName(), loadedSource.getContentType(), scenario);
ParseResponse response = requireService().parse(parseRequestFactory.build(loadedSource, scenario)); DocumentParseService parseService = resolveService(loadedSource);
ParseResponse response = parseService.parse(parseRequestFactory.build(loadedSource, scenario));
DocumentParsedResult result = parseResultMapper.map(extractSingleResult(response, false)); DocumentParsedResult result = parseResultMapper.map(extractSingleResult(response, false));
LOG.info("桥接服务同步解析完成: fileName={}, scenario={}, preferredTextLength={}", LOG.info("桥接服务同步解析完成: fileName={}, scenario={}, preferredTextLength={}",
loadedSource.getFileName(), scenario, resolveTextLength(result)); loadedSource.getFileName(), scenario, resolveTextLength(result));
@@ -84,7 +109,8 @@ public class DocumentParseBridgeServiceImpl implements DocumentParseBridgeServic
LoadedDocumentSource loadedSource = prepareSupportedSource(source); LoadedDocumentSource loadedSource = prepareSupportedSource(source);
LOG.info("桥接服务开始提交异步解析任务: fileName={}, contentType={}, scenario={}", LOG.info("桥接服务开始提交异步解析任务: fileName={}, contentType={}, scenario={}",
loadedSource.getFileName(), loadedSource.getContentType(), scenario); loadedSource.getFileName(), loadedSource.getContentType(), scenario);
ParseTaskStatus taskStatus = requireService().submit(parseRequestFactory.build(loadedSource, scenario)); DocumentParseService parseService = resolveService(loadedSource);
ParseTaskStatus taskStatus = parseService.submit(parseRequestFactory.build(loadedSource, scenario));
DocumentParseTaskStatus mappedStatus = parseResultMapper.map(taskStatus); DocumentParseTaskStatus mappedStatus = parseResultMapper.map(taskStatus);
LOG.info("桥接服务异步解析任务提交完成: fileName={}, scenario={}, providerTaskId={}, status={}", LOG.info("桥接服务异步解析任务提交完成: fileName={}, scenario={}, providerTaskId={}, status={}",
loadedSource.getFileName(), scenario, mappedStatus.getTaskId(), mappedStatus.getStatus()); loadedSource.getFileName(), scenario, mappedStatus.getTaskId(), mappedStatus.getStatus());
@@ -109,7 +135,8 @@ public class DocumentParseBridgeServiceImpl implements DocumentParseBridgeServic
throw DocumentParseBridgeException.taskFailed("taskId 不能为空"); throw DocumentParseBridgeException.taskFailed("taskId 不能为空");
} }
try { try {
return parseResultMapper.map(requireService().queryTask(taskId)); ParseTaskStatus taskStatus = executeAgainstTaskService(taskId, service -> service.queryTask(taskId));
return parseResultMapper.map(taskStatus);
} catch (DocumentParseBridgeException e) { } catch (DocumentParseBridgeException e) {
throw e; throw e;
} catch (Exception e) { } catch (Exception e) {
@@ -127,7 +154,7 @@ public class DocumentParseBridgeServiceImpl implements DocumentParseBridgeServic
} }
try { try {
LOG.info("桥接服务开始获取异步解析结果: providerTaskId={}", taskId); LOG.info("桥接服务开始获取异步解析结果: providerTaskId={}", taskId);
ParseResponse response = requireService().queryResult(taskId); ParseResponse response = executeAgainstTaskService(taskId, service -> service.queryResult(taskId));
DocumentParsedResult result = parseResultMapper.map(extractSingleResult(response, true)); DocumentParsedResult result = parseResultMapper.map(extractSingleResult(response, true));
LOG.info("桥接服务获取异步解析结果完成: providerTaskId={}, preferredTextLength={}", LOG.info("桥接服务获取异步解析结果完成: providerTaskId={}, preferredTextLength={}",
taskId, resolveTextLength(result)); taskId, resolveTextLength(result));
@@ -150,7 +177,7 @@ public class DocumentParseBridgeServiceImpl implements DocumentParseBridgeServic
throw DocumentParseBridgeException.taskFailed("taskId 不能为空"); throw DocumentParseBridgeException.taskFailed("taskId 不能为空");
} }
try { try {
ParseTaskInfo taskInfo = requireService().queryTaskInfo(taskId); ParseTaskInfo taskInfo = executeAgainstTaskService(taskId, service -> service.queryTaskInfo(taskId));
DocumentParseTaskInfo mappedTaskInfo = parseResultMapper.map(taskInfo); DocumentParseTaskInfo mappedTaskInfo = parseResultMapper.map(taskInfo);
LOG.info("桥接服务查询异步解析任务状态: providerTaskId={}, status={}, hasResult={}", LOG.info("桥接服务查询异步解析任务状态: providerTaskId={}, status={}, hasResult={}",
taskId, taskId,
@@ -177,39 +204,84 @@ public class DocumentParseBridgeServiceImpl implements DocumentParseBridgeServic
return text == null ? 0 : text.length(); return text == null ? 0 : text.length();
} }
private DocumentParseService requireService() {
if (documentParseService == null) {
throw DocumentParseBridgeException.serviceNotEnabled();
}
return documentParseService;
}
private LoadedDocumentSource prepareSupportedSource(DocumentSourceRef source) { private LoadedDocumentSource prepareSupportedSource(DocumentSourceRef source) {
LoadedDocumentSource loadedSource = documentSourceLoader.load(source); LoadedDocumentSource loadedSource = documentSourceLoader.load(source);
if (!isSupportedByBridge(loadedSource)) { if (!isSupportedByBridge(loadedSource)) {
throw DocumentParseBridgeException.unsupportedSource("统一文档解析桥接当前仅支持 PDF、DOCX 文件"); throw DocumentParseBridgeException.unsupportedSource("统一文档解析桥接当前仅支持 PDF、DOCX、PPTX、XLSX 文件");
} }
return loadedSource; return loadedSource;
} }
private boolean isSupportedByBridge(LoadedDocumentSource loadedSource) { private boolean isSupportedByBridge(LoadedDocumentSource loadedSource) {
String contentType = loadedSource.getContentType(); return DocumentParseSourceType.resolve(loadedSource.getFileName(), loadedSource.getContentType()) != DocumentParseSourceType.UNSUPPORTED;
if (StringUtils.hasText(contentType)) { }
String normalizedContentType = contentType.toLowerCase();
if (normalizedContentType.contains("pdf") private DocumentParseService resolveService(LoadedDocumentSource loadedSource) {
|| normalizedContentType.contains("wordprocessingml.document")) { DocumentParseSourceType sourceType = DocumentParseSourceType.resolve(loadedSource.getFileName(), loadedSource.getContentType());
return true; switch (sourceType) {
case PDF:
return requireSpecificService(pdfDocumentParseService, defaultDocumentParseService, "PDF");
case DOCX:
return requireSpecificService(defaultDocumentParseService, pdfDocumentParseService, "DOCX");
case PPTX:
return requireSpecificService(pptxDocumentParseService, null, "PPTX");
case XLSX:
return requireSpecificService(xlsxDocumentParseService, null, "XLSX");
default:
throw DocumentParseBridgeException.unsupportedSource("当前文件类型暂不支持桥接解析");
} }
} }
String fileName = loadedSource.getFileName();
if (!StringUtils.hasText(fileName) || !fileName.contains(".")) { private DocumentParseService requireSpecificService(@Nullable DocumentParseService primaryService,
return false; @Nullable DocumentParseService fallbackService,
String sourceType) {
if (primaryService != null) {
return primaryService;
} }
String suffix = DocUtil.normalizeSuffix(DocUtil.getSuffix(fileName)); if (fallbackService != null) {
if ("pdf".equals(suffix) || "docx".equals(suffix)) { return fallbackService;
return true;
} }
return false; throw DocumentParseBridgeException.serviceNotEnabled("未启用 " + sourceType + " 文档解析服务");
}
private <T> T executeAgainstTaskService(String taskId, Function<DocumentParseService, T> action) {
List<DocumentParseService> services = availableServices();
if (services.isEmpty()) {
throw DocumentParseBridgeException.serviceNotEnabled();
}
Exception lastException = null;
for (DocumentParseService service : services) {
try {
return action.apply(service);
} catch (Exception exception) {
lastException = exception;
LOG.debug("桥接服务任务查询尝试失败,准备切换下一个解析服务: taskId={}, service={}",
taskId,
service.getClass().getSimpleName(),
exception);
}
}
if (lastException instanceof RuntimeException) {
throw (RuntimeException) lastException;
}
throw DocumentParseBridgeException.taskFailed("未找到可处理当前任务ID的文档解析服务", lastException);
}
private List<DocumentParseService> availableServices() {
LinkedHashSet<DocumentParseService> services = new LinkedHashSet<DocumentParseService>();
if (pptxDocumentParseService != null) {
services.add(pptxDocumentParseService);
}
if (xlsxDocumentParseService != null) {
services.add(xlsxDocumentParseService);
}
if (pdfDocumentParseService != null) {
services.add(pdfDocumentParseService);
}
if (defaultDocumentParseService != null) {
services.add(defaultDocumentParseService);
}
return new ArrayList<DocumentParseService>(services);
} }
private ParseResult extractSingleResult(ParseResponse response, boolean resultFetchPhase) { private ParseResult extractSingleResult(ParseResponse response, boolean resultFetchPhase) {

View File

@@ -2,6 +2,9 @@ package tech.easyflow.ai.document.support;
import com.easyagents.document.core.entity.ParseFile; import com.easyagents.document.core.entity.ParseFile;
import com.easyagents.document.core.entity.ParseRequest; import com.easyagents.document.core.entity.ParseRequest;
import com.easyagents.document.core.entity.PdfParseRequest;
import com.easyagents.document.core.entity.PptxParseRequest;
import com.easyagents.document.core.entity.XlsxParseRequest;
import org.springframework.stereotype.Component; import org.springframework.stereotype.Component;
import tech.easyflow.ai.document.exception.DocumentParseBridgeException; import tech.easyflow.ai.document.exception.DocumentParseBridgeException;
import tech.easyflow.ai.document.model.DocumentParseScenario; import tech.easyflow.ai.document.model.DocumentParseScenario;
@@ -31,12 +34,28 @@ public class DocumentParseRequestFactory {
if (scenario == null) { if (scenario == null) {
throw DocumentParseBridgeException.requestBuildFailed("解析场景不能为空"); throw DocumentParseBridgeException.requestBuildFailed("解析场景不能为空");
} }
ParseRequest request = new ParseRequest(); ParseRequest request = createTypedRequest(source);
request.addFile(ParseFile.of(source.getFileName(), source.getContentBytes(), source.getContentType())); request.addFile(ParseFile.of(source.getFileName(), source.getContentBytes(), source.getContentType()));
applyScenario(request, scenario); applyScenario(request, scenario);
return request; return request;
} }
private ParseRequest createTypedRequest(LoadedDocumentSource source) {
DocumentParseSourceType sourceType = DocumentParseSourceType.resolve(source.getFileName(), source.getContentType());
switch (sourceType) {
case PDF:
return new PdfParseRequest();
case PPTX:
return new PptxParseRequest();
case XLSX:
return new XlsxParseRequest();
case DOCX:
return new ParseRequest();
default:
throw DocumentParseBridgeException.requestBuildFailed("当前文件类型暂不支持桥接解析");
}
}
private void applyScenario(ParseRequest request, DocumentParseScenario scenario) { private void applyScenario(ParseRequest request, DocumentParseScenario scenario) {
switch (scenario) { switch (scenario) {
case WORKFLOW_TEXT: case WORKFLOW_TEXT:

View File

@@ -69,6 +69,11 @@ public class DocumentParseResultMapper {
status.setStatusUrl(taskStatus.getStatusUrl()); status.setStatusUrl(taskStatus.getStatusUrl());
status.setResultUrl(taskStatus.getResultUrl()); status.setResultUrl(taskStatus.getResultUrl());
status.setQueuedAhead(taskStatus.getQueuedAhead()); status.setQueuedAhead(taskStatus.getQueuedAhead());
status.setProgressPercent(taskStatus.getProgressPercent());
status.setCurrentStage(taskStatus.getCurrentStage());
status.setProcessedItems(taskStatus.getProcessedItems());
status.setTotalItems(taskStatus.getTotalItems());
status.setStatusMessage(taskStatus.getStatusMessage());
return status; return status;
} }
@@ -104,6 +109,11 @@ public class DocumentParseResultMapper {
status.setStatusUrl(taskStatus.getStatusUrl()); status.setStatusUrl(taskStatus.getStatusUrl());
status.setResultUrl(taskStatus.getResultUrl()); status.setResultUrl(taskStatus.getResultUrl());
status.setQueuedAhead(taskStatus.getQueuedAhead()); status.setQueuedAhead(taskStatus.getQueuedAhead());
status.setProgressPercent(taskStatus.getProgressPercent());
status.setCurrentStage(taskStatus.getCurrentStage());
status.setProcessedItems(taskStatus.getProcessedItems());
status.setTotalItems(taskStatus.getTotalItems());
status.setStatusMessage(taskStatus.getStatusMessage());
} }
private String resolvePreferredText(ParseResult parseResult) { private String resolvePreferredText(ParseResult parseResult) {

View File

@@ -0,0 +1,70 @@
package tech.easyflow.ai.document.support;
import org.springframework.util.StringUtils;
import tech.easyflow.ai.utils.DocUtil;
/**
* 统一文档解析桥接支持的源文件类型。
*
* @author Codex
* @since 2026-04-17
*/
public enum DocumentParseSourceType {
PDF,
DOCX,
PPTX,
XLSX,
UNSUPPORTED;
/**
* 根据文件名与内容类型推断文档类型。
*
* @param fileName 文件名
* @param contentType MIME 类型
* @return 文档类型
*/
public static DocumentParseSourceType resolve(String fileName, String contentType) {
if (StringUtils.hasText(contentType)) {
String normalizedContentType = contentType.toLowerCase();
if (normalizedContentType.contains("pdf")) {
return PDF;
}
if (normalizedContentType.contains("wordprocessingml.document")) {
return DOCX;
}
if (normalizedContentType.contains("presentationml.presentation")) {
return PPTX;
}
if (normalizedContentType.contains("spreadsheetml.sheet")) {
return XLSX;
}
}
if (!StringUtils.hasText(fileName) || !fileName.contains(".")) {
return UNSUPPORTED;
}
String suffix = DocUtil.normalizeSuffix(DocUtil.getSuffix(fileName));
if ("pdf".equals(suffix)) {
return PDF;
}
if ("docx".equals(suffix)) {
return DOCX;
}
if ("pptx".equals(suffix)) {
return PPTX;
}
if ("xlsx".equals(suffix)) {
return XLSX;
}
return UNSUPPORTED;
}
/**
* 判断是否属于 Office 首版接入类型。
*
* @return 是否是本次 Office 类型
*/
public boolean isOffice() {
return this == PPTX || this == XLSX;
}
}

View File

@@ -286,6 +286,7 @@ public final class DocumentImportDtos {
private String chunkId; private String chunkId;
private String chunkType; private String chunkType;
private String content; private String content;
private String renderMarkdown;
private List<String> headingPath = new ArrayList<String>(); private List<String> headingPath = new ArrayList<String>();
private Integer partNo; private Integer partNo;
private Integer partTotal; private Integer partTotal;
@@ -335,6 +336,14 @@ public final class DocumentImportDtos {
this.content = content; this.content = content;
} }
public String getRenderMarkdown() {
return renderMarkdown;
}
public void setRenderMarkdown(String renderMarkdown) {
this.renderMarkdown = renderMarkdown;
}
public List<String> getHeadingPath() { public List<String> getHeadingPath() {
return headingPath; return headingPath;
} }

View File

@@ -22,4 +22,19 @@ public final class DocumentImportKeys {
public static final String KEY_DOCUMENT_PARSE_METADATA = "parse.metadata"; public static final String KEY_DOCUMENT_PARSE_METADATA = "parse.metadata";
public static final String KEY_DOCUMENT_PARSE_WARNINGS = "parse.warnings"; public static final String KEY_DOCUMENT_PARSE_WARNINGS = "parse.warnings";
public static final String KEY_DOCUMENT_PROVIDER_TASK_ID = "parse.providerTaskId"; public static final String KEY_DOCUMENT_PROVIDER_TASK_ID = "parse.providerTaskId";
public static final String KEY_DOCUMENT_PARSE_IMAGE_URLS = "parse.imageUrls";
public static final String KEY_DOCUMENT_PARSE_IMAGE_COUNT = "parse.imageCount";
public static final String KEY_DOCUMENT_PARSE_IMAGE_STORAGE_PREFIX = "parse.imageStoragePrefix";
public static final String KEY_DOCUMENT_PARSE_PROGRESS_PERCENT = "parse.progressPercent";
public static final String KEY_DOCUMENT_PARSE_CURRENT_STAGE = "parse.currentStage";
public static final String KEY_DOCUMENT_PARSE_PROCESSED_ITEMS = "parse.processedItems";
public static final String KEY_DOCUMENT_PARSE_TOTAL_ITEMS = "parse.totalItems";
public static final String KEY_DOCUMENT_PARSE_STATUS_MESSAGE = "parse.statusMessage";
public static final String KEY_DOCUMENT_RENDER_MARKDOWN = "renderMarkdown";
public static final String KEY_DOCUMENT_PAGE_INDEX = "pageIndex";
public static final String KEY_DOCUMENT_SHEET_NAME = "sheetName";
public static final String KEY_DOCUMENT_ROW_START = "rowStart";
public static final String KEY_DOCUMENT_ROW_END = "rowEnd";
public static final String KEY_DOCUMENT_IMAGE_REFS = "imageRefs";
public static final String KEY_DOCUMENT_PARSE_ARTIFACT_SUMMARY = "parseArtifactSummary";
} }

View File

@@ -6,6 +6,7 @@ import org.springframework.stereotype.Service;
import org.springframework.transaction.support.TransactionSynchronization; import org.springframework.transaction.support.TransactionSynchronization;
import org.springframework.transaction.support.TransactionSynchronizationManager; import org.springframework.transaction.support.TransactionSynchronizationManager;
import org.springframework.web.servlet.mvc.method.annotation.SseEmitter; import org.springframework.web.servlet.mvc.method.annotation.SseEmitter;
import tech.easyflow.ai.documentimport.DocumentImportKeys;
import tech.easyflow.ai.entity.Document; import tech.easyflow.ai.entity.Document;
import tech.easyflow.ai.mapper.DocumentMapper; import tech.easyflow.ai.mapper.DocumentMapper;
import tech.easyflow.common.web.exceptions.BusinessException; import tech.easyflow.common.web.exceptions.BusinessException;
@@ -116,11 +117,21 @@ public class DocumentImportTaskStatusStreamService {
payload.put("totalChunks", document.getTotalChunks()); payload.put("totalChunks", document.getTotalChunks());
payload.put("completedChunks", document.getCompletedChunks()); payload.put("completedChunks", document.getCompletedChunks());
payload.put("failedChunks", document.getFailedChunks()); payload.put("failedChunks", document.getFailedChunks());
payload.put("parseCurrentStage", readOptionAsString(document, DocumentImportKeys.KEY_DOCUMENT_PARSE_CURRENT_STAGE));
payload.put("parseStatusMessage", readOptionAsString(document, DocumentImportKeys.KEY_DOCUMENT_PARSE_STATUS_MESSAGE));
payload.put("lastTaskError", document.getLastTaskError()); payload.put("lastTaskError", document.getLastTaskError());
payload.put("taskModifiedAt", document.getTaskModifiedAt()); payload.put("taskModifiedAt", document.getTaskModifiedAt());
return payload; return payload;
} }
private String readOptionAsString(Document document, String key) {
if (document == null || document.getOptions() == null || key == null) {
return null;
}
Object value = document.getOptions().get(key);
return value == null ? null : String.valueOf(value);
}
private void sendAsync(String topicKey, SseEmitter emitter, String eventName, Map<String, Object> payload) { private void sendAsync(String topicKey, SseEmitter emitter, String eventName, Map<String, Object> payload) {
sseThreadPool.execute(() -> { sseThreadPool.execute(() -> {
try { try {

View File

@@ -4,6 +4,8 @@ public class KnowledgeSearchResultItem {
private Integer sorting; private Integer sorting;
private String content; private String content;
private String renderMarkdown;
private String sourceFileName;
private Double score; private Double score;
private String hitSource; private String hitSource;
private Double vectorScore; private Double vectorScore;
@@ -25,6 +27,22 @@ public class KnowledgeSearchResultItem {
this.content = content; this.content = content;
} }
public String getRenderMarkdown() {
return renderMarkdown;
}
public void setRenderMarkdown(String renderMarkdown) {
this.renderMarkdown = renderMarkdown;
}
public String getSourceFileName() {
return sourceFileName;
}
public void setSourceFileName(String sourceFileName) {
this.sourceFileName = sourceFileName;
}
public Double getScore() { public Double getScore() {
return score; return score;
} }

View File

@@ -32,6 +32,7 @@ import tech.easyflow.ai.entity.FaqItem;
import tech.easyflow.ai.entity.Model; import tech.easyflow.ai.entity.Model;
import tech.easyflow.ai.enums.DocumentProcessStatus; import tech.easyflow.ai.enums.DocumentProcessStatus;
import tech.easyflow.ai.enums.PublishStatus; import tech.easyflow.ai.enums.PublishStatus;
import tech.easyflow.ai.documentimport.DocumentImportKeys;
import tech.easyflow.ai.mapper.DocumentChunkMapper; import tech.easyflow.ai.mapper.DocumentChunkMapper;
import tech.easyflow.ai.mapper.DocumentCollectionMapper; import tech.easyflow.ai.mapper.DocumentCollectionMapper;
import tech.easyflow.ai.mapper.DocumentMapper; import tech.easyflow.ai.mapper.DocumentMapper;
@@ -406,6 +407,14 @@ public class DocumentCollectionServiceImpl extends ServiceImpl<DocumentCollectio
return false; return false;
} }
item.setContent(content); item.setContent(content);
String renderMarkdown = hitSnapshot.findChunkRenderMarkdown(item.getId());
if (StringUtil.hasText(renderMarkdown)) {
item.addMetadata("renderMarkdown", renderMarkdown);
}
String sourceFileName = hitSnapshot.findSourceFileName(item.getId());
if (StringUtil.hasText(sourceFileName)) {
item.addMetadata("sourceFileName", sourceFileName);
}
return true; return true;
}) })
.collect(Collectors.toList()); .collect(Collectors.toList());
@@ -596,6 +605,30 @@ public class DocumentCollectionServiceImpl extends ServiceImpl<DocumentCollectio
} }
return StringUtil.noText(documentChunk.getContent()) ? null : documentChunk.getContent(); return StringUtil.noText(documentChunk.getContent()) ? null : documentChunk.getContent();
} }
private String findChunkRenderMarkdown(Object chunkId) {
DocumentChunk documentChunk = chunkMap.get(String.valueOf(chunkId));
if (documentChunk == null || documentChunk.getDocumentId() == null || documentChunk.getOptions() == null) {
return null;
}
if (!documentMap.containsKey(String.valueOf(documentChunk.getDocumentId()))) {
return null;
}
Object renderMarkdown = documentChunk.getOptions().get(DocumentImportKeys.KEY_DOCUMENT_RENDER_MARKDOWN);
return renderMarkdown == null ? null : String.valueOf(renderMarkdown);
}
private String findSourceFileName(Object chunkId) {
DocumentChunk documentChunk = chunkMap.get(String.valueOf(chunkId));
if (documentChunk == null || documentChunk.getDocumentId() == null) {
return null;
}
tech.easyflow.ai.entity.Document sourceDocument = documentMap.get(String.valueOf(documentChunk.getDocumentId()));
if (sourceDocument == null || StringUtil.noText(sourceDocument.getTitle())) {
return null;
}
return sourceDocument.getTitle();
}
} }
private String buildFaqPromptContent(FaqItem faqItem, List<Map<String, String>> images) { private String buildFaqPromptContent(FaqItem faqItem, List<Map<String, String>> images) {

View File

@@ -6,6 +6,9 @@ import com.easyagents.document.core.entity.ParseResponse;
import com.easyagents.document.core.entity.ParseResult; import com.easyagents.document.core.entity.ParseResult;
import com.easyagents.document.core.entity.ParseTaskInfo; import com.easyagents.document.core.entity.ParseTaskInfo;
import com.easyagents.document.core.entity.ParseTaskStatus; import com.easyagents.document.core.entity.ParseTaskStatus;
import com.easyagents.document.pdf.PdfDocumentParseService;
import com.easyagents.document.pptx.PptxDocumentParseService;
import com.easyagents.document.xlsx.XlsxDocumentParseService;
import org.junit.Assert; import org.junit.Assert;
import org.junit.Test; import org.junit.Test;
import tech.easyflow.ai.document.exception.DocumentParseBridgeException; import tech.easyflow.ai.document.exception.DocumentParseBridgeException;
@@ -37,8 +40,8 @@ public class DocumentParseBridgeServiceImplTest {
*/ */
@Test @Test
public void shouldParseSuccessfully() { public void shouldParseSuccessfully() {
FakeDocumentParseService parseService = new FakeDocumentParseService(); FakePdfDocumentParseService parseService = new FakePdfDocumentParseService();
DocumentParseBridgeServiceImpl bridgeService = buildBridgeService(parseService); DocumentParseBridgeServiceImpl bridgeService = buildBridgeService(parseService, null, null, parseService);
DocumentParsedResult document = bridgeService.parse(buildSource(), DocumentParseScenario.WORKFLOW_TEXT); DocumentParsedResult document = bridgeService.parse(buildSource(), DocumentParseScenario.WORKFLOW_TEXT);
@@ -52,8 +55,8 @@ public class DocumentParseBridgeServiceImplTest {
*/ */
@Test @Test
public void shouldSupportAsyncFlow() { public void shouldSupportAsyncFlow() {
FakeDocumentParseService parseService = new FakeDocumentParseService(); FakePdfDocumentParseService parseService = new FakePdfDocumentParseService();
DocumentParseBridgeServiceImpl bridgeService = buildBridgeService(parseService); DocumentParseBridgeServiceImpl bridgeService = buildBridgeService(parseService, null, null, parseService);
DocumentParseTaskStatus taskStatus = bridgeService.submit(buildSource(), DocumentParseScenario.KNOWLEDGE_IMPORT); DocumentParseTaskStatus taskStatus = bridgeService.submit(buildSource(), DocumentParseScenario.KNOWLEDGE_IMPORT);
DocumentParseTaskStatus queriedStatus = bridgeService.queryTask("task-1"); DocumentParseTaskStatus queriedStatus = bridgeService.queryTask("task-1");
@@ -69,9 +72,9 @@ public class DocumentParseBridgeServiceImplTest {
*/ */
@Test @Test
public void shouldQueryTaskInfoSuccessfully() { public void shouldQueryTaskInfoSuccessfully() {
FakeDocumentParseService parseService = new FakeDocumentParseService(); FakePdfDocumentParseService parseService = new FakePdfDocumentParseService();
parseService.taskStatusValue = "completed"; parseService.taskStatusValue = "completed";
DocumentParseBridgeServiceImpl bridgeService = buildBridgeService(parseService); DocumentParseBridgeServiceImpl bridgeService = buildBridgeService(parseService, null, null, parseService);
DocumentParseTaskInfo taskInfo = bridgeService.queryTaskInfo("task-1"); DocumentParseTaskInfo taskInfo = bridgeService.queryTaskInfo("task-1");
@@ -85,7 +88,7 @@ public class DocumentParseBridgeServiceImplTest {
*/ */
@Test @Test
public void shouldThrowWhenServiceDisabled() { public void shouldThrowWhenServiceDisabled() {
DocumentParseBridgeServiceImpl bridgeService = buildBridgeService(null); DocumentParseBridgeServiceImpl bridgeService = buildBridgeService(null, null, null, null);
try { try {
bridgeService.parse(buildSource(), DocumentParseScenario.WORKFLOW_TEXT); bridgeService.parse(buildSource(), DocumentParseScenario.WORKFLOW_TEXT);
@@ -95,9 +98,29 @@ public class DocumentParseBridgeServiceImplTest {
} }
} }
private DocumentParseBridgeServiceImpl buildBridgeService(DocumentParseService parseService) { @Test
public void shouldRoutePptxToDedicatedService() {
FakePptxDocumentParseService pptxService = new FakePptxDocumentParseService();
FakePdfDocumentParseService defaultService = new FakePdfDocumentParseService();
DocumentParseBridgeServiceImpl bridgeService = buildBridgeService(null, pptxService, null, defaultService);
DocumentParsedResult result = bridgeService.parse(buildSource("slides.pptx",
"application/vnd.openxmlformats-officedocument.presentationml.presentation"), DocumentParseScenario.KNOWLEDGE_IMPORT);
Assert.assertEquals("# pptx", result.getPreferredText());
Assert.assertEquals(1, pptxService.parseCallCount);
Assert.assertEquals(0, defaultService.parseCallCount);
}
private DocumentParseBridgeServiceImpl buildBridgeService(PdfDocumentParseService pdfDocumentParseService,
PptxDocumentParseService pptxDocumentParseService,
XlsxDocumentParseService xlsxDocumentParseService,
DocumentParseService parseService) {
return new DocumentParseBridgeServiceImpl( return new DocumentParseBridgeServiceImpl(
parseService, parseService,
pdfDocumentParseService,
pptxDocumentParseService,
xlsxDocumentParseService,
new DocumentSourceLoader(new InMemoryFileStorageService()), new DocumentSourceLoader(new InMemoryFileStorageService()),
new DocumentParseRequestFactory(), new DocumentParseRequestFactory(),
new DocumentParseResultMapper() new DocumentParseResultMapper()
@@ -105,8 +128,12 @@ public class DocumentParseBridgeServiceImplTest {
} }
private DocumentSourceRef buildSource() { private DocumentSourceRef buildSource() {
DocumentSourceRef sourceRef = DocumentSourceRef.ofBytes("demo.pdf", "pdf-data".getBytes(StandardCharsets.UTF_8)); return buildSource("demo.pdf", "application/pdf");
sourceRef.setContentType("application/pdf"); }
private DocumentSourceRef buildSource(String fileName, String contentType) {
DocumentSourceRef sourceRef = DocumentSourceRef.ofBytes(fileName, "pdf-data".getBytes(StandardCharsets.UTF_8));
sourceRef.setContentType(contentType);
sourceRef.setSize(8L); sourceRef.setSize(8L);
return sourceRef; return sourceRef;
} }
@@ -133,13 +160,15 @@ public class DocumentParseBridgeServiceImplTest {
} }
} }
private static class FakeDocumentParseService implements DocumentParseService { private static class FakePdfDocumentParseService implements PdfDocumentParseService {
private ParseRequest lastParseRequest; private ParseRequest lastParseRequest;
private String taskStatusValue = "running"; private String taskStatusValue = "running";
private int parseCallCount;
@Override @Override
public ParseResponse parse(ParseRequest request) { public ParseResponse parse(ParseRequest request) {
parseCallCount++;
this.lastParseRequest = request; this.lastParseRequest = request;
return buildResponse(); return buildResponse();
} }
@@ -187,4 +216,36 @@ public class DocumentParseBridgeServiceImplTest {
return response; return response;
} }
} }
private static class FakePptxDocumentParseService implements PptxDocumentParseService {
private int parseCallCount;
@Override
public ParseResponse parse(ParseRequest request) {
parseCallCount++;
ParseResult result = new ParseResult();
result.setFileName("slides.pptx");
result.setMarkdown("# pptx");
result.setPlainText("pptx");
ParseResponse response = new ParseResponse();
response.setResults(Collections.singletonList(result));
return response;
}
@Override
public ParseTaskStatus submit(ParseRequest request) {
throw new UnsupportedOperationException();
}
@Override
public ParseTaskStatus queryTask(String taskId) {
throw new UnsupportedOperationException();
}
@Override
public ParseResponse queryResult(String taskId) {
throw new UnsupportedOperationException();
}
}
} }

View File

@@ -1,6 +1,9 @@
package tech.easyflow.ai.document.support; package tech.easyflow.ai.document.support;
import com.easyagents.document.core.entity.ParseRequest; import com.easyagents.document.core.entity.ParseRequest;
import com.easyagents.document.core.entity.PdfParseRequest;
import com.easyagents.document.core.entity.PptxParseRequest;
import com.easyagents.document.core.entity.XlsxParseRequest;
import org.junit.Assert; import org.junit.Assert;
import org.junit.Test; import org.junit.Test;
import tech.easyflow.ai.document.model.DocumentParseScenario; import tech.easyflow.ai.document.model.DocumentParseScenario;
@@ -26,6 +29,7 @@ public class DocumentParseRequestFactoryTest {
Assert.assertFalse(request.getReturnMiddleJson()); Assert.assertFalse(request.getReturnMiddleJson());
Assert.assertFalse(request.getReturnContentList()); Assert.assertFalse(request.getReturnContentList());
Assert.assertFalse(request.getReturnImages()); Assert.assertFalse(request.getReturnImages());
Assert.assertTrue(request instanceof PdfParseRequest);
} }
/** /**
@@ -41,12 +45,33 @@ public class DocumentParseRequestFactoryTest {
Assert.assertTrue(request.getReturnMiddleJson()); Assert.assertTrue(request.getReturnMiddleJson());
Assert.assertTrue(request.getReturnContentList()); Assert.assertTrue(request.getReturnContentList());
Assert.assertTrue(request.getReturnImages()); Assert.assertTrue(request.getReturnImages());
Assert.assertTrue(request instanceof PdfParseRequest);
}
/**
* 验证 PPTX / XLSX 会构建对应的强类型请求。
*/
@Test
public void shouldBuildOfficeTypedRequests() {
DocumentParseRequestFactory factory = new DocumentParseRequestFactory();
ParseRequest pptxRequest = factory.build(buildSource("slides.pptx",
"application/vnd.openxmlformats-officedocument.presentationml.presentation"), DocumentParseScenario.KNOWLEDGE_IMPORT);
ParseRequest xlsxRequest = factory.build(buildSource("table.xlsx",
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"), DocumentParseScenario.KNOWLEDGE_IMPORT);
Assert.assertTrue(pptxRequest instanceof PptxParseRequest);
Assert.assertTrue(xlsxRequest instanceof XlsxParseRequest);
} }
private LoadedDocumentSource buildSource() { private LoadedDocumentSource buildSource() {
return buildSource("demo.pdf", "application/pdf");
}
private LoadedDocumentSource buildSource(String fileName, String contentType) {
LoadedDocumentSource source = new LoadedDocumentSource(); LoadedDocumentSource source = new LoadedDocumentSource();
source.setFileName("demo.pdf"); source.setFileName(fileName);
source.setContentType("application/pdf"); source.setContentType(contentType);
source.setContentBytes("pdf-data".getBytes()); source.setContentBytes("pdf-data".getBytes());
source.setSize(8L); source.setSize(8L);
return source; return source;

View File

@@ -4,6 +4,7 @@ import com.easyagents.document.core.entity.ParseArtifacts;
import com.easyagents.document.core.entity.ParseResult; import com.easyagents.document.core.entity.ParseResult;
import com.easyagents.document.core.entity.ParseResponse; import com.easyagents.document.core.entity.ParseResponse;
import com.easyagents.document.core.entity.ParseTaskInfo; import com.easyagents.document.core.entity.ParseTaskInfo;
import com.easyagents.document.core.entity.ParseTaskStatus;
import org.junit.Assert; import org.junit.Assert;
import org.junit.Test; import org.junit.Test;
import tech.easyflow.ai.document.model.DocumentParseTaskInfo; import tech.easyflow.ai.document.model.DocumentParseTaskInfo;
@@ -65,6 +66,8 @@ public class DocumentParseResultMapperTest {
ParseTaskInfo taskInfo = new ParseTaskInfo(); ParseTaskInfo taskInfo = new ParseTaskInfo();
taskInfo.setTaskId("task-1"); taskInfo.setTaskId("task-1");
taskInfo.setStatus("completed"); taskInfo.setStatus("completed");
taskInfo.setProgressPercent(100);
taskInfo.setCurrentStage("completed");
ParseResult result = new ParseResult(); ParseResult result = new ParseResult();
result.setFileName("demo.pdf"); result.setFileName("demo.pdf");
@@ -76,7 +79,33 @@ public class DocumentParseResultMapperTest {
DocumentParseTaskInfo mapped = mapper.map(taskInfo); DocumentParseTaskInfo mapped = mapper.map(taskInfo);
Assert.assertEquals("task-1", mapped.getTaskId()); Assert.assertEquals("task-1", mapped.getTaskId());
Assert.assertEquals(Integer.valueOf(100), mapped.getProgressPercent());
Assert.assertEquals("completed", mapped.getCurrentStage());
Assert.assertNotNull(mapped.getResult()); Assert.assertNotNull(mapped.getResult());
Assert.assertEquals("# title", mapped.getResult().getPreferredText()); Assert.assertEquals("# title", mapped.getResult().getPreferredText());
} }
/**
* 验证异步进度字段被完整透传。
*/
@Test
public void shouldMapTaskStatusProgressFields() {
DocumentParseResultMapper mapper = new DocumentParseResultMapper();
ParseTaskStatus status = new ParseTaskStatus();
status.setTaskId("task-2");
status.setStatus("running");
status.setProgressPercent(45);
status.setCurrentStage("ocr");
status.setProcessedItems(9);
status.setTotalItems(20);
status.setStatusMessage("正在识别图片");
tech.easyflow.ai.document.model.DocumentParseTaskStatus mapped = mapper.map(status);
Assert.assertEquals(Integer.valueOf(45), mapped.getProgressPercent());
Assert.assertEquals("ocr", mapped.getCurrentStage());
Assert.assertEquals(Integer.valueOf(9), mapped.getProcessedItems());
Assert.assertEquals(Integer.valueOf(20), mapped.getTotalItems());
Assert.assertEquals("正在识别图片", mapped.getStatusMessage());
}
} }

View File

@@ -1,17 +1,33 @@
package tech.easyflow.ai.documentimport.task; package tech.easyflow.ai.documentimport.task;
import com.easyagents.document.core.entity.DocumentBlock;
import com.easyagents.document.core.entity.DocumentImage;
import com.easyagents.document.core.entity.DocumentTable;
import com.easyagents.rag.ingestion.model.StrategyConfig;
import org.junit.Assert; import org.junit.Assert;
import org.junit.Test; import org.junit.Test;
import org.springframework.web.multipart.MultipartFile;
import tech.easyflow.ai.document.model.DocumentParseArtifacts;
import tech.easyflow.ai.document.model.DocumentParsedResult;
import tech.easyflow.ai.documentimport.DocumentImportKeys;
import tech.easyflow.ai.entity.DocumentChunk;
import tech.easyflow.ai.entity.DocumentImportTask; import tech.easyflow.ai.entity.DocumentImportTask;
import tech.easyflow.ai.enums.DocumentImportTaskStatus; import tech.easyflow.ai.enums.DocumentImportTaskStatus;
import tech.easyflow.ai.enums.DocumentProcessStatus; import tech.easyflow.ai.enums.DocumentProcessStatus;
import tech.easyflow.ai.mapper.DocumentMapper; import tech.easyflow.ai.mapper.DocumentMapper;
import tech.easyflow.ai.service.DocumentImportTaskService; import tech.easyflow.ai.service.DocumentImportTaskService;
import tech.easyflow.common.filestorage.FileStorageService;
import java.lang.reflect.Field; import java.lang.reflect.Field;
import java.lang.reflect.Method; import java.lang.reflect.Method;
import java.lang.reflect.Proxy; import java.lang.reflect.Proxy;
import java.math.BigInteger; import java.math.BigInteger;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Base64;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicReference; import java.util.concurrent.atomic.AtomicReference;
/** /**
@@ -84,6 +100,258 @@ public class KnowledgeDocumentImportTaskAppServiceTest {
Assert.assertEquals("新错误", updatedTask.getErrorSummary()); Assert.assertEquals("新错误", updatedTask.getErrorSummary());
} }
/**
* 验证知识库导入会把解析图片上传到对象存储,并同步改写 Markdown 与结构化引用。
*
* @throws Exception 反射调用异常
*/
@Test
public void normalizeParsedImagesForKnowledgeImportShouldUploadAndRewriteReferences() throws Exception {
KnowledgeDocumentImportTaskAppService service = new KnowledgeDocumentImportTaskAppService();
AtomicReference<String> savedPrePathRef = new AtomicReference<String>();
AtomicReference<String> savedFilenameRef = new AtomicReference<String>();
setField(service, "storageService", mockFileStorageService(savedPrePathRef, savedFilenameRef));
tech.easyflow.ai.entity.Document document = new tech.easyflow.ai.entity.Document();
document.setId(BigInteger.valueOf(88));
document.setTitle("产品说明书(终版).pdf");
DocumentParsedResult parsedResult = new DocumentParsedResult();
parsedResult.setMarkdown("图例如下:\n![](images/sample-image.png)");
parsedResult.setPreferredText(parsedResult.getMarkdown());
parsedResult.setPlainText(parsedResult.getMarkdown());
DocumentImage image = new DocumentImage();
image.setName("sample-image.png");
image.setSourcePath("images/sample-image.png");
image.setMimeType("image/png");
image.setDataUrl("data:image/png;base64," + Base64.getEncoder().encodeToString("demo".getBytes(StandardCharsets.UTF_8)));
parsedResult.setImages(new ArrayList<DocumentImage>(List.of(image)));
DocumentBlock block = new DocumentBlock();
block.setImagePath("images/sample-image.png");
parsedResult.setBlocks(new ArrayList<DocumentBlock>(List.of(block)));
DocumentTable table = new DocumentTable();
table.setImagePath("images/sample-image.png");
parsedResult.setTables(new ArrayList<DocumentTable>(List.of(table)));
DocumentParseArtifacts artifacts = new DocumentParseArtifacts();
List<Map<String, Object>> contentList = new ArrayList<Map<String, Object>>();
Map<String, Object> contentItem = new LinkedHashMap<String, Object>();
contentItem.put("img_path", "images/sample-image.png");
contentList.add(contentItem);
artifacts.setContentList(contentList);
Map<String, Object> xlsxArtifact = new LinkedHashMap<String, Object>();
List<Map<String, Object>> sheetImages = new ArrayList<Map<String, Object>>();
sheetImages.add(new LinkedHashMap<String, Object>() {{
put("sheetName", "Sheet1");
put("sourcePaths", new ArrayList<String>(List.of("images/sample-image.png")));
}});
xlsxArtifact.put("sheetImages", sheetImages);
artifacts.setExtraJsonArtifacts(new LinkedHashMap<String, Object>() {{
put("xlsx", xlsxArtifact);
}});
parsedResult.setArtifacts(artifacts);
Method method = KnowledgeDocumentImportTaskAppService.class.getDeclaredMethod(
"normalizeParsedImagesForKnowledgeImport",
tech.easyflow.ai.entity.Document.class,
DocumentParsedResult.class
);
method.setAccessible(true);
DocumentParsedResult normalized = (DocumentParsedResult) method.invoke(service, document, parsedResult);
Assert.assertNotNull(normalized);
Assert.assertEquals("knowledge-parse/88_产品说明书_终版/images", savedPrePathRef.get());
Assert.assertEquals("sample-image.png", savedFilenameRef.get());
String expectedUrl = "http://localhost:39000/easyflow/attachment/knowledge-parse/88_产品说明书_终版/images/sample-image.png";
Assert.assertTrue(normalized.getMarkdown().contains(expectedUrl));
Assert.assertEquals(expectedUrl, normalized.getBlocks().get(0).getImagePath());
Assert.assertEquals(expectedUrl, normalized.getTables().get(0).getImagePath());
Assert.assertEquals(expectedUrl, normalized.getImages().get(0).getSourcePath());
Assert.assertNull(normalized.getImages().get(0).getDataUrl());
Object rewrittenContentList = normalized.getArtifacts().getContentList();
Assert.assertTrue(rewrittenContentList instanceof List<?>);
Assert.assertEquals(expectedUrl, ((Map<?, ?>) ((List<?>) rewrittenContentList).get(0)).get("img_path"));
Object rewrittenSheetImages = ((Map<?, ?>) normalized.getArtifacts().getExtraJsonArtifacts().get("xlsx")).get("sheetImages");
Assert.assertTrue(rewrittenSheetImages instanceof List<?>);
Object sourcePaths = ((Map<?, ?>) ((List<?>) rewrittenSheetImages).get(0)).get("sourcePaths");
Assert.assertEquals(expectedUrl, ((List<?>) sourcePaths).get(0));
}
/**
* 验证 PPTX 会基于页级工件生成稳定的知识库分块。
*
* @throws Exception 反射调用异常
*/
@Test
public void buildOfficeDocumentChunksShouldSplitPptxBySlide() throws Exception {
KnowledgeDocumentImportTaskAppService service = new KnowledgeDocumentImportTaskAppService();
tech.easyflow.ai.entity.Document document = new tech.easyflow.ai.entity.Document();
document.setId(BigInteger.valueOf(101));
document.setCollectionId(BigInteger.valueOf(201));
document.setTitle("季度汇报.pptx");
Map<String, Object> parseArtifactSummary = new LinkedHashMap<String, Object>();
List<Map<String, Object>> slides = new ArrayList<Map<String, Object>>();
slides.add(new LinkedHashMap<String, Object>() {{
put("slideIndex", 0);
put("title", "封面");
put("ocrMarkdown", "本页介绍季度目标。");
put("imagePath", "https://example.com/slides/slide-001.png");
put("imageName", "slide-001-page");
}});
slides.add(new LinkedHashMap<String, Object>() {{
put("slideIndex", 1);
put("title", "经营分析");
put("ocrMarkdown", "收入同比增长 18%。");
put("imagePath", "https://example.com/slides/slide-002.png");
put("imageName", "slide-002-page");
}});
parseArtifactSummary.put("slides", slides);
Method method = KnowledgeDocumentImportTaskAppService.class.getDeclaredMethod(
"buildOfficeDocumentChunks",
tech.easyflow.ai.entity.Document.class,
String.class,
StrategyConfig.class,
Map.class
);
method.setAccessible(true);
@SuppressWarnings("unchecked")
List<DocumentChunk> chunks = (List<DocumentChunk>) method.invoke(
service,
document,
"pptx",
null,
parseArtifactSummary
);
Assert.assertEquals(2, chunks.size());
DocumentChunk firstChunk = chunks.get(0);
Assert.assertTrue(firstChunk.getContent().contains("Slide 1"));
Assert.assertTrue(firstChunk.getContent().contains("本页介绍季度目标"));
Assert.assertEquals("https://example.com/slides/slide-001.png",
((List<?>) firstChunk.getOptions().get(DocumentImportKeys.KEY_DOCUMENT_IMAGE_REFS)).get(0));
Assert.assertEquals(1, firstChunk.getOptions().get(DocumentImportKeys.KEY_DOCUMENT_PAGE_INDEX));
Assert.assertTrue(String.valueOf(firstChunk.getOptions().get(DocumentImportKeys.KEY_DOCUMENT_RENDER_MARKDOWN))
.contains("slide-001.png"));
}
/**
* 验证 XLSX 纯图片 Sheet 不会退化为空内容,并会输出稳定图片引用。
*
* @throws Exception 反射调用异常
*/
@Test
public void buildOfficeDocumentChunksShouldKeepImageOnlyXlsxSheetReferences() throws Exception {
KnowledgeDocumentImportTaskAppService service = new KnowledgeDocumentImportTaskAppService();
tech.easyflow.ai.entity.Document document = new tech.easyflow.ai.entity.Document();
document.setId(BigInteger.valueOf(102));
document.setCollectionId(BigInteger.valueOf(202));
document.setTitle("巡检记录.xlsx");
Map<String, Object> parseArtifactSummary = new LinkedHashMap<String, Object>();
List<Map<String, Object>> sheets = new ArrayList<Map<String, Object>>();
sheets.add(new LinkedHashMap<String, Object>() {{
put("sheetName", "图片页");
put("sheetIndex", 0);
put("rows", new ArrayList<Map<String, Object>>());
}});
parseArtifactSummary.put("sheets", sheets);
List<Map<String, Object>> cellImages = new ArrayList<Map<String, Object>>();
cellImages.add(new LinkedHashMap<String, Object>() {{
put("sheetName", "图片页");
put("referenceKey", "image-sheet-r2c2-001");
put("sourcePath", "https://example.com/xlsx/sheet/image-001.jpeg");
put("anchorCell", "B2");
put("ocrText", "设备状态正常");
put("fromRow", 1);
}});
parseArtifactSummary.put("cellImages", cellImages);
StrategyConfig strategyConfig = new StrategyConfig();
strategyConfig.setRowsPerChunk(10);
Method method = KnowledgeDocumentImportTaskAppService.class.getDeclaredMethod(
"buildOfficeDocumentChunks",
tech.easyflow.ai.entity.Document.class,
String.class,
StrategyConfig.class,
Map.class
);
method.setAccessible(true);
@SuppressWarnings("unchecked")
List<DocumentChunk> chunks = (List<DocumentChunk>) method.invoke(
service,
document,
"xlsx",
strategyConfig,
parseArtifactSummary
);
Assert.assertEquals(1, chunks.size());
DocumentChunk onlyChunk = chunks.get(0);
Assert.assertTrue(onlyChunk.getContent().contains("图片 OCR"));
Assert.assertTrue(onlyChunk.getContent().contains("设备状态正常"));
Assert.assertEquals("图片页", onlyChunk.getOptions().get(DocumentImportKeys.KEY_DOCUMENT_SHEET_NAME));
Assert.assertEquals("https://example.com/xlsx/sheet/image-001.jpeg",
((List<?>) onlyChunk.getOptions().get(DocumentImportKeys.KEY_DOCUMENT_IMAGE_REFS)).get(0));
String renderMarkdown = String.valueOf(onlyChunk.getOptions().get(DocumentImportKeys.KEY_DOCUMENT_RENDER_MARKDOWN));
Assert.assertTrue(renderMarkdown.contains("[IMG:image-sheet-r2c2-001]"));
Assert.assertTrue(renderMarkdown.contains("![image-sheet-r2c2-001](https://example.com/xlsx/sheet/image-001.jpeg)"));
}
/**
* 验证空白 Sheet 不会被误判成纯图片分块。
*
* @throws Exception 反射调用异常
*/
@Test
public void buildOfficeDocumentChunksShouldSkipBlankXlsxSheetWithoutImages() throws Exception {
KnowledgeDocumentImportTaskAppService service = new KnowledgeDocumentImportTaskAppService();
tech.easyflow.ai.entity.Document document = new tech.easyflow.ai.entity.Document();
document.setId(BigInteger.valueOf(103));
document.setCollectionId(BigInteger.valueOf(203));
document.setTitle("空白工作簿.xlsx");
Map<String, Object> parseArtifactSummary = new LinkedHashMap<String, Object>();
parseArtifactSummary.put("sheets", new ArrayList<Map<String, Object>>(List.of(new LinkedHashMap<String, Object>() {{
put("sheetName", "空白页");
put("sheetIndex", 0);
put("rows", new ArrayList<Map<String, Object>>());
}})));
parseArtifactSummary.put("cellImages", new ArrayList<Map<String, Object>>());
StrategyConfig strategyConfig = new StrategyConfig();
strategyConfig.setRowsPerChunk(10);
Method method = KnowledgeDocumentImportTaskAppService.class.getDeclaredMethod(
"buildOfficeDocumentChunks",
tech.easyflow.ai.entity.Document.class,
String.class,
StrategyConfig.class,
Map.class
);
method.setAccessible(true);
@SuppressWarnings("unchecked")
List<DocumentChunk> chunks = (List<DocumentChunk>) method.invoke(
service,
document,
"xlsx",
strategyConfig,
parseArtifactSummary
);
Assert.assertTrue(chunks.isEmpty());
}
private static DocumentMapper mockDocumentMapper(tech.easyflow.ai.entity.Document persistedDocument, private static DocumentMapper mockDocumentMapper(tech.easyflow.ai.entity.Document persistedDocument,
AtomicReference<tech.easyflow.ai.entity.Document> updatedDocumentRef) { AtomicReference<tech.easyflow.ai.entity.Document> updatedDocumentRef) {
return (DocumentMapper) Proxy.newProxyInstance( return (DocumentMapper) Proxy.newProxyInstance(
@@ -116,6 +384,22 @@ public class KnowledgeDocumentImportTaskAppServiceTest {
); );
} }
private static FileStorageService mockFileStorageService(AtomicReference<String> savedPrePathRef,
AtomicReference<String> savedFilenameRef) {
return (FileStorageService) Proxy.newProxyInstance(
FileStorageService.class.getClassLoader(),
new Class<?>[]{FileStorageService.class},
(proxy, method, args) -> {
if ("save".equals(method.getName()) && args != null && args.length == 2 && args[0] instanceof MultipartFile file) {
savedPrePathRef.set((String) args[1]);
savedFilenameRef.set(file.getOriginalFilename());
return "http://localhost:39000/easyflow/attachment/" + args[1] + "/" + file.getOriginalFilename();
}
return defaultValue(method.getReturnType());
}
);
}
private static void setField(Object target, String fieldName, Object value) throws Exception { private static void setField(Object target, String fieldName, Object value) throws Exception {
Field field = KnowledgeDocumentImportTaskAppService.class.getDeclaredField(fieldName); Field field = KnowledgeDocumentImportTaskAppService.class.getDeclaredField(fieldName);
field.setAccessible(true); field.setAccessible(true);

View File

@@ -166,7 +166,7 @@ dromara:
# easy-agents 文档解析统一配置 # easy-agents 文档解析统一配置
easy-agents: easy-agents:
document: document:
pdf: ocr:
provider: mineru provider: mineru
mineru: mineru:
# 统一文档解析桥接层直接复用 easy-agents 的 provider 配置,不在 easyflow 再复制一套配置体系 # 统一文档解析桥接层直接复用 easy-agents 的 provider 配置,不在 easyflow 再复制一套配置体系

View File

@@ -1,10 +1,11 @@
<script setup lang="ts"> <script setup lang="ts">
import { ref } from 'vue'; import { ref } from 'vue';
import ElXMarkdown from 'vue-element-plus-x/es/XMarkdown/index.js';
import { EasyFlowFormModal } from '@easyflow/common-ui'; import { EasyFlowFormModal } from '@easyflow/common-ui';
import { $t } from '@easyflow/locales'; import { $t } from '@easyflow/locales';
import { Delete, MoreFilled } from '@element-plus/icons-vue'; import { Delete, EditPen, MoreFilled } from '@element-plus/icons-vue';
import { import {
ElButton, ElButton,
ElDropdown, ElDropdown,
@@ -12,6 +13,7 @@ import {
ElDropdownMenu, ElDropdownMenu,
ElForm, ElForm,
ElFormItem, ElFormItem,
ElIcon,
ElInput, ElInput,
ElMessage, ElMessage,
ElMessageBox, ElMessageBox,
@@ -21,6 +23,10 @@ import {
import { api } from '#/api/request'; import { api } from '#/api/request';
import PageData from '#/components/page/PageData.vue'; import PageData from '#/components/page/PageData.vue';
import {
markdownRenderProps,
resolveMarkdownContent,
} from '#/views/ai/documentCollection/markdown-content';
import { buildKnowledgePath } from '#/views/ai/documentCollection/share-path'; import { buildKnowledgePath } from '#/views/ai/documentCollection/share-path';
const props = defineProps({ const props = defineProps({
@@ -122,6 +128,85 @@ const form = ref({
id: '', id: '',
content: '', content: '',
}); });
const getChunkOptions = (row: any) => row?.options || {};
const getMarkdown = (row: any) =>
resolveMarkdownContent(getChunkOptions(row)?.renderMarkdown || row?.content);
const isExcelChunk = (row: any) => {
const options = getChunkOptions(row);
const sourceFileExt = String(
options?.sourceFileExt || options?.['splitter.sourceFileExt'] || '',
).toLowerCase();
return Boolean(
sourceFileExt === 'xlsx' ||
options?.sheetName ||
options?.rowStart ||
options?.rowEnd,
);
};
const shouldUseExcelChunkCards = (pageList: any[] = []) =>
pageList.length > 0 && pageList.every((row) => isExcelChunk(row));
const getSheetName = (row: any) =>
String(getChunkOptions(row)?.sheetName || '');
const getRowStart = (row: any) => {
const rowStart = Number(getChunkOptions(row)?.rowStart || 0);
return Math.max(rowStart, 0);
};
const getRowEnd = (row: any) => {
const rowEnd = Number(getChunkOptions(row)?.rowEnd || 0);
return Math.max(rowEnd, 0);
};
const getRowRangeLabel = (row: any) => {
const rowStart = getRowStart(row);
const rowEnd = getRowEnd(row);
if (rowStart > 0 && rowEnd > 0) {
return rowStart === rowEnd
? `${rowStart}`
: `${rowStart}-${rowEnd}`;
}
if (rowStart > 0) {
return `${rowStart} 行起`;
}
return '';
};
const getChunkTitle = (row: any) => {
const options = getChunkOptions(row);
if (options?.sourceLabel) {
return String(options.sourceLabel);
}
const sheetName = getSheetName(row);
const rowRangeLabel = getRowRangeLabel(row);
if (sheetName && rowRangeLabel) {
return `${sheetName} · ${rowRangeLabel}`;
}
return sheetName || row?.id || '-';
};
const getChunkIndexLabel = (row: any) => {
const sorting = Number(row?.sorting || 0);
if (sorting <= 0) {
return '';
}
return String(sorting).padStart(2, '0');
};
const getChunkHeaderLabel = (row: any) => {
const chunkIndexLabel = getChunkIndexLabel(row);
const chunkTitle = getChunkTitle(row);
if (chunkIndexLabel) {
return `分块 ${chunkIndexLabel} · ${chunkTitle}`;
}
return chunkTitle;
};
</script> </script>
<template> <template>
@@ -136,12 +221,76 @@ const form = ref({
:extra-query-params="queryParams" :extra-query-params="queryParams"
> >
<template #default="{ pageList }"> <template #default="{ pageList }">
<ElTable :data="pageList" style="width: 100%" size="large"> <div v-if="shouldUseExcelChunkCards(pageList)" class="chunk-board">
<article v-for="row in pageList" :key="row.id" class="chunk-card">
<div v-if="props.manageable" class="chunk-card__toolbar">
<ElButton
circle
text
type="primary"
class="chunk-card__action"
@click="handleEdit(row)"
>
<ElIcon><EditPen /></ElIcon>
</ElButton>
<ElDropdown>
<ElButton
circle
text
class="chunk-card__action chunk-card__action--ghost"
>
<ElIcon><MoreFilled /></ElIcon>
</ElButton>
<template #dropdown>
<ElDropdownMenu>
<ElDropdownItem @click="handleDelete(row)">
<ElButton link type="danger" :icon="Delete">
{{ $t('button.delete') }}
</ElButton>
</ElDropdownItem>
</ElDropdownMenu>
</template>
</ElDropdown>
</div>
<div class="chunk-card__header">
<div class="chunk-card__eyebrow">
<span class="chunk-card__eyebrow-dot"></span>
<span>{{ getChunkHeaderLabel(row) }}</span>
</div>
</div>
<div v-if="getMarkdown(row)" class="chunk-card__content">
<div class="chunk-rich-content chunk-rich-content--card">
<ElXMarkdown
:markdown="getMarkdown(row)"
:allow-html="markdownRenderProps.allowHtml"
:sanitize="markdownRenderProps.sanitize"
/>
</div>
</div>
<span v-else class="chunk-table__empty">-</span>
</article>
</div>
<ElTable v-else :data="pageList" style="width: 100%" size="large">
<ElTableColumn <ElTableColumn
prop="content" prop="content"
:label="$t('documentCollection.content')" :label="$t('documentCollection.content')"
min-width="240" min-width="240"
>
<template #default="{ row }">
<div v-if="getMarkdown(row)" class="chunk-rich-content">
<ElXMarkdown
:markdown="getMarkdown(row)"
:allow-html="markdownRenderProps.allowHtml"
:sanitize="markdownRenderProps.sanitize"
/> />
</div>
<span v-else class="chunk-table__empty">-</span>
</template>
</ElTableColumn>
<ElTableColumn <ElTableColumn
v-if="props.manageable" v-if="props.manageable"
:label="$t('common.handle')" :label="$t('common.handle')"
@@ -198,4 +347,251 @@ const form = ref({
</div> </div>
</template> </template>
<style scoped></style> <style scoped>
.chunk-board {
display: grid;
gap: 14px;
}
.chunk-card {
position: relative;
padding: 20px 20px 18px;
overflow: hidden;
background: linear-gradient(
135deg,
color-mix(in srgb, var(--el-color-primary-light-9) 80%, white) 0%,
var(--el-fill-color-blank) 38%
);
border: 1px solid color-mix(in srgb, var(--el-border-color-light) 78%, white);
border-radius: 18px;
box-shadow: 0 18px 40px rgb(15 23 42 / 6%);
}
.chunk-card::before {
position: absolute;
inset: 0 auto 0 0;
width: 4px;
content: '';
background: linear-gradient(
180deg,
var(--el-color-primary),
color-mix(in srgb, var(--el-color-primary) 44%, white)
);
}
.chunk-card__toolbar {
position: absolute;
top: 14px;
right: 14px;
z-index: 1;
display: flex;
gap: 6px;
align-items: center;
}
.chunk-card__action {
width: 34px;
height: 34px;
color: var(--el-color-primary);
background: color-mix(in srgb, var(--el-color-primary-light-9) 66%, white);
border: 1px solid
color-mix(in srgb, var(--el-color-primary-light-8) 72%, white);
box-shadow: 0 8px 18px rgb(37 99 235 / 10%);
}
.chunk-card__action--ghost {
color: var(--el-text-color-secondary);
background: rgb(255 255 255 / 86%);
border-color: color-mix(in srgb, var(--el-border-color-light) 86%, white);
box-shadow: none;
}
.chunk-card__header {
display: flex;
flex-direction: column;
min-width: 0;
padding-right: 92px;
}
.chunk-card__eyebrow {
display: inline-flex;
gap: 8px;
align-items: center;
min-height: 28px;
font-size: 12px;
font-weight: 600;
line-height: 1.5;
color: var(--el-text-color-secondary);
}
.chunk-card__eyebrow-dot {
width: 8px;
height: 8px;
border-radius: 999px;
background: var(--el-color-primary);
box-shadow: 0 0 0 4px
color-mix(in srgb, var(--el-color-primary-light-8) 50%, transparent);
}
.chunk-card__content {
padding-top: 14px;
}
.chunk-rich-content {
min-width: 0;
padding: 4px 0;
font-size: 14px;
line-height: 1.72;
color: var(--el-text-color-regular);
}
.chunk-rich-content--card {
padding: 14px 16px;
overflow-x: auto;
background: rgb(255 255 255 / 82%);
border: 1px solid rgb(15 23 42 / 6%);
border-radius: 16px;
}
.chunk-rich-content :deep(.markdown-body) {
font-size: inherit;
line-height: inherit;
color: inherit;
background: transparent;
}
.chunk-rich-content :deep(.markdown-body > :first-child) {
margin-top: 0;
}
.chunk-rich-content :deep(.markdown-body > :last-child) {
margin-bottom: 0;
}
.chunk-rich-content :deep(*) {
overflow-wrap: anywhere;
}
.chunk-rich-content :deep(p) {
margin: 0 0 10px;
}
.chunk-rich-content :deep(p:last-child) {
margin-bottom: 0;
}
.chunk-rich-content :deep(h1),
.chunk-rich-content :deep(h2),
.chunk-rich-content :deep(h3),
.chunk-rich-content :deep(h4),
.chunk-rich-content :deep(h5),
.chunk-rich-content :deep(h6) {
margin: 14px 0 10px;
font-weight: 600;
line-height: 1.45;
color: var(--el-text-color-primary);
}
.chunk-rich-content :deep(h1:first-child),
.chunk-rich-content :deep(h2:first-child),
.chunk-rich-content :deep(h3:first-child),
.chunk-rich-content :deep(h4:first-child),
.chunk-rich-content :deep(h5:first-child),
.chunk-rich-content :deep(h6:first-child) {
margin-top: 0;
}
.chunk-rich-content :deep(ul),
.chunk-rich-content :deep(ol) {
padding-left: 20px;
margin: 0 0 12px;
}
.chunk-rich-content :deep(li + li) {
margin-top: 4px;
}
.chunk-rich-content :deep(a) {
color: var(--el-color-primary);
text-decoration: underline;
text-underline-offset: 2px;
}
.chunk-rich-content :deep(img) {
display: block;
max-width: min(100%, 560px);
height: auto;
margin: 12px 0;
border: 1px solid rgb(15 23 42 / 8%);
border-radius: 12px;
box-shadow: 0 10px 24px rgb(15 23 42 / 8%);
}
.chunk-rich-content :deep(table) {
width: 100%;
margin: 12px 0;
overflow: hidden;
border-collapse: collapse;
background: rgb(255 255 255 / 92%);
border: 1px solid rgb(15 23 42 / 8%);
border-radius: 12px;
}
.chunk-rich-content :deep(th),
.chunk-rich-content :deep(td) {
padding: 10px 12px;
text-align: left;
vertical-align: top;
border: 1px solid rgb(15 23 42 / 8%);
}
.chunk-rich-content :deep(th) {
font-weight: 600;
color: var(--el-text-color-primary);
background: rgb(37 99 235 / 4%);
}
.chunk-rich-content :deep(pre) {
max-width: 100%;
padding: 12px 14px;
overflow: auto;
background: rgb(15 23 42 / 4%);
border: 1px solid rgb(15 23 42 / 6%);
border-radius: 12px;
}
.chunk-rich-content :deep(blockquote) {
padding-left: 12px;
margin: 12px 0;
color: var(--el-text-color-secondary);
border-left: 3px solid rgb(37 99 235 / 24%);
}
.chunk-table__empty {
color: var(--el-text-color-placeholder);
}
@media (max-width: 960px) {
.chunk-card {
padding: 18px 16px 16px;
}
.chunk-card__toolbar {
position: static;
justify-content: flex-end;
padding-bottom: 12px;
}
.chunk-card__header {
padding-right: 0;
}
.chunk-rich-content--card {
padding: 12px;
}
.chunk-card__eyebrow {
font-size: 12px;
}
}
</style>

View File

@@ -40,6 +40,8 @@ interface DocumentStatusPayload {
failedChunks?: number; failedChunks?: number;
knowledgeId?: number | string; knowledgeId?: number | string;
lastTaskError?: string; lastTaskError?: string;
parseCurrentStage?: string;
parseStatusMessage?: string;
processStatus?: string; processStatus?: string;
progressPercent?: number; progressPercent?: number;
taskModifiedAt?: string; taskModifiedAt?: string;
@@ -154,11 +156,24 @@ const statusMetaMap: Record<
}, },
}; };
const defaultStatusMeta: {
icon: Component;
toneClass: string;
} = statusMetaMap.UPLOADED!;
const getStatusLabel = (status?: string) => const getStatusLabel = (status?: string) =>
$t(`documentCollection.taskStatus.${status || 'UPLOADED'}`); $t(`documentCollection.taskStatus.${status || 'UPLOADED'}`);
const getStatusMeta = (status?: string) => const getStatusMeta = (
statusMetaMap[status || 'UPLOADED'] || statusMetaMap.UPLOADED; status?: string,
): {
icon: Component;
toneClass: string;
} => statusMetaMap[status || 'UPLOADED'] ?? defaultStatusMeta;
const getStatusToneClass = (status?: string) => getStatusMeta(status).toneClass;
const getStatusIcon = (status?: string) => getStatusMeta(status).icon;
const getChunkCount = (row: any) => { const getChunkCount = (row: any) => {
const totalChunks = Number(row.totalChunks || 0); const totalChunks = Number(row.totalChunks || 0);
@@ -171,12 +186,28 @@ const getChunkCount = (row: any) => {
const getProgressText = (row: any) => { const getProgressText = (row: any) => {
const completed = Number(row.completedChunks || 0); const completed = Number(row.completedChunks || 0);
const total = Number(row.totalChunks || 0); const total = Number(row.totalChunks || 0);
if (row.processStatus === 'PARSING') {
return `${Number(row.progressPercent || 0)}%`;
}
if (total <= 0) { if (total <= 0) {
return `${Number(row.progressPercent || 0)}%`; return `${Number(row.progressPercent || 0)}%`;
} }
return `${Number(row.progressPercent || 0)}% · ${completed}/${total}`; return `${Number(row.progressPercent || 0)}% · ${completed}/${total}`;
}; };
const parseStageLabels: Record<string, string> = {
assembling: '汇总中',
extracting: '提取中',
ocr: 'OCR 中',
preparing: '准备中',
queued: '排队中',
};
const getProcessingHint = (row: any) =>
row.parseStatusMessage ||
parseStageLabels[row.parseCurrentStage || ''] ||
'';
const clearReconnectTimer = () => { const clearReconnectTimer = () => {
if (!reconnectTimer) { if (!reconnectTimer) {
return; return;
@@ -211,6 +242,8 @@ const patchDocumentRow = (payload: DocumentStatusPayload) => {
completedChunks: payload.completedChunks, completedChunks: payload.completedChunks,
failedChunks: payload.failedChunks, failedChunks: payload.failedChunks,
lastTaskError: payload.lastTaskError, lastTaskError: payload.lastTaskError,
parseCurrentStage: payload.parseCurrentStage,
parseStatusMessage: payload.parseStatusMessage,
processStatus: payload.processStatus, processStatus: payload.processStatus,
progressPercent: payload.progressPercent, progressPercent: payload.progressPercent,
taskModifiedAt: payload.taskModifiedAt, taskModifiedAt: payload.taskModifiedAt,
@@ -529,7 +562,7 @@ watch(
<div class="status-cell"> <div class="status-cell">
<div <div
class="status-pill" class="status-pill"
:class="getStatusMeta(row.processStatus).toneClass" :class="getStatusToneClass(row.processStatus)"
> >
<span class="status-pill__icon-shell"> <span class="status-pill__icon-shell">
<ElIcon <ElIcon
@@ -540,7 +573,7 @@ watch(
: '' : ''
" "
> >
<component :is="getStatusMeta(row.processStatus).icon" /> <component :is="getStatusIcon(row.processStatus)" />
</ElIcon> </ElIcon>
</span> </span>
<span class="status-pill__label"> <span class="status-pill__label">
@@ -548,7 +581,10 @@ watch(
</span> </span>
</div> </div>
<div <div
v-if="row.processStatus === 'INDEXING'" v-if="
row.processStatus === 'INDEXING' ||
row.processStatus === 'PARSING'
"
class="status-progress" class="status-progress"
> >
<ElProgress <ElProgress
@@ -558,6 +594,12 @@ watch(
<span class="status-progress__text"> <span class="status-progress__text">
{{ getProgressText(row) }} {{ getProgressText(row) }}
</span> </span>
<span
v-if="row.processStatus === 'PARSING' && getProcessingHint(row)"
class="status-progress__hint"
>
{{ getProcessingHint(row) }}
</span>
</div> </div>
<div <div
v-else-if="row.lastTaskError" v-else-if="row.lastTaskError"
@@ -663,6 +705,12 @@ watch(
text-align: left; text-align: left;
} }
.status-progress__hint {
font-size: 12px;
color: var(--el-text-color-secondary);
text-align: left;
}
.status-error { .status-error {
max-width: 176px; max-width: 176px;
font-size: 12px; font-size: 12px;

View File

@@ -21,6 +21,8 @@ type RetrievalMode = 'HYBRID' | 'KEYWORD' | 'VECTOR';
interface SearchResultItem { interface SearchResultItem {
sorting: number; sorting: number;
content: string; content: string;
renderMarkdown?: string;
sourceFileName?: string;
score?: number; score?: number;
hitSource?: 'BOTH' | 'KEYWORD' | 'VECTOR'; hitSource?: 'BOTH' | 'KEYWORD' | 'VECTOR';
vectorScore?: number; vectorScore?: number;

View File

@@ -1,17 +1,27 @@
<script setup lang="ts"> <script setup lang="ts">
import type { PropType } from 'vue';
import { ref } from 'vue'; import { ref } from 'vue';
import ElXMarkdown from 'vue-element-plus-x/es/XMarkdown/index.js';
import { $t } from '@easyflow/locales'; import { $t } from '@easyflow/locales';
import { Document } from '@element-plus/icons-vue'; import { Document } from '@element-plus/icons-vue';
import { ElButton, ElEmpty, ElIcon, ElTag } from 'element-plus'; import { ElButton, ElEmpty, ElIcon, ElTag } from 'element-plus';
import {
markdownRenderProps,
resolveMarkdownContent,
} from '#/views/ai/documentCollection/markdown-content';
type RetrievalMode = 'HYBRID' | 'KEYWORD' | 'VECTOR'; type RetrievalMode = 'HYBRID' | 'KEYWORD' | 'VECTOR';
type HitSource = 'BOTH' | 'KEYWORD' | 'VECTOR'; type HitSource = 'BOTH' | 'KEYWORD' | 'VECTOR';
interface PreviewItem { interface PreviewItem {
sorting: number | string; sorting: number | string;
content: string; content: string;
renderMarkdown?: string;
sourceFileName?: string;
score?: number | string; score?: number | string;
hitSource?: HitSource; hitSource?: HitSource;
} }
@@ -42,12 +52,12 @@ const props = defineProps({
default: false, default: false,
}, },
onCancel: { onCancel: {
type: Function, type: Function as PropType<() => void>,
default: () => {}, default: () => undefined,
}, },
onConfirm: { onConfirm: {
type: Function, type: Function as PropType<() => void>,
default: () => {}, default: () => undefined,
}, },
isSearching: { isSearching: {
type: Boolean, type: Boolean,
@@ -100,18 +110,31 @@ const resolveHitSourceType = (hitSource?: HitSource) => {
return 'info'; return 'info';
}; };
const normalizePreviewContent = (content?: string) => { const resolvePreviewMarkdown = (item: PreviewItem) =>
if (!content) { resolveMarkdownContent(item.renderMarkdown || item.content);
return '';
const resolveScoreLine = (item: PreviewItem) => {
const pieces: string[] = [];
if (
!props.hideScore &&
item.score !== undefined &&
item.score !== null &&
`${item.score}` !== ''
) {
pieces.push(`${$t('documentCollection.similarityScore')}: ${item.score}`);
} }
if (typeof window !== 'undefined' && typeof DOMParser !== 'undefined') { if (item.sourceFileName) {
const doc = new DOMParser().parseFromString(content, 'text/html'); pieces.push(`来源: ${item.sourceFileName}`);
return (doc.body.textContent || '').replaceAll(/\n\s*\n/g, '\n').trim();
} }
return content return pieces.join(' · ');
.replaceAll(/<[^>]+>/g, ' ') };
.replaceAll(/\s+/g, ' ')
.trim(); const handleCancel = () => {
props.onCancel?.();
};
const handleConfirm = () => {
props.onConfirm?.();
}; };
defineExpose({ defineExpose({
@@ -149,8 +172,8 @@ defineExpose({
<div class="segment-badge"> <div class="segment-badge">
{{ item.sorting ?? index + 1 }} {{ item.sorting ?? index + 1 }}
</div> </div>
<div v-if="!hideScore" class="score-text"> <div v-if="resolveScoreLine(item)" class="score-text">
{{ $t('documentCollection.similarityScore') }}: {{ item.score }} {{ resolveScoreLine(item) }}
</div> </div>
</div> </div>
<div <div
@@ -174,8 +197,18 @@ defineExpose({
</ElTag> </ElTag>
</div> </div>
</div> </div>
<div class="content-desc"> <div
{{ normalizePreviewContent(item.content) }} v-if="resolvePreviewMarkdown(item)"
class="content-desc content-desc--markdown"
>
<ElXMarkdown
:markdown="resolvePreviewMarkdown(item)"
:allow-html="markdownRenderProps.allowHtml"
:sanitize="markdownRenderProps.sanitize"
/>
</div>
<div v-else class="content-desc">
{{ item.content }}
</div> </div>
</div> </div>
</div> </div>
@@ -193,17 +226,17 @@ defineExpose({
<div class="action-buttons"> <div class="action-buttons">
<ElButton <ElButton
:style="{ minWidth: '100px', height: '36px' }" :style="{ minWidth: '100px', height: '36px' }"
@click="onCancel" @click="handleCancel"
> >
{{ $t('documentCollection.actions.confirmImport') }} {{ $t('documentCollection.actions.cancelImport') }}
</ElButton> </ElButton>
<ElButton <ElButton
type="primary" type="primary"
:style="{ minWidth: '100px', height: '36px' }" :style="{ minWidth: '100px', height: '36px' }"
:loading="disabledConfirm" :loading="disabledConfirm"
@click="onConfirm" @click="handleConfirm"
> >
{{ $t('documentCollection.actions.cancelImport') }} {{ $t('documentCollection.actions.confirmImport') }}
</ElButton> </ElButton>
</div> </div>
</div> </div>
@@ -276,12 +309,26 @@ defineExpose({
padding: 14px 16px; padding: 14px 16px;
font-size: 14px; font-size: 14px;
line-height: 1.6; line-height: 1.6;
white-space: pre-wrap;
word-break: break-word; word-break: break-word;
background: rgb(248 250 252 / 90%); background: rgb(248 250 252 / 90%);
border-radius: 14px; border-radius: 14px;
} }
.content-desc--markdown :deep(.markdown-body) {
font-size: inherit;
line-height: inherit;
color: inherit;
background: transparent;
}
.content-desc--markdown :deep(.markdown-body > :first-child) {
margin-top: 0;
}
.content-desc--markdown :deep(.markdown-body > :last-child) {
margin-bottom: 0;
}
.score-text { .score-text {
font-size: 13px; font-size: 13px;
font-weight: 500; font-weight: 500;

View File

@@ -1,5 +1,5 @@
<script setup lang="ts"> <script setup lang="ts">
import { reactive, ref, watch } from 'vue'; import { computed, reactive, ref, watch } from 'vue';
import { $t } from '@easyflow/locales'; import { $t } from '@easyflow/locales';
@@ -8,6 +8,7 @@ import {
ElForm, ElForm,
ElFormItem, ElFormItem,
ElInput, ElInput,
ElInputNumber,
ElMessage, ElMessage,
ElOption, ElOption,
ElSelect, ElSelect,
@@ -85,7 +86,7 @@ const createDefaultFormState = () => ({
mdSplitterLevel: 2, mdSplitterLevel: 2,
overlapSize: 128, overlapSize: 128,
regex: '', regex: '',
rowsPerChunk: 1, rowsPerChunk: 10,
strategyCode: 'AUTO', strategyCode: 'AUTO',
}); });
@@ -126,6 +127,17 @@ const strategyOptions = [
}, },
]; ];
const fileExt = computed(() =>
String(props.documentTitle || '')
.split('.')
.pop()
?.toLowerCase() || '',
);
const isPptx = computed(() => fileExt.value === 'pptx');
const isXlsx = computed(() => fileExt.value === 'xlsx');
const showStrategySelector = computed(() => !isPptx.value && !isXlsx.value);
const mdLevels = [1, 2, 3, 4, 5, 6]; const mdLevels = [1, 2, 3, 4, 5, 6];
const showLengthSettings = (strategyCode?: string) => const showLengthSettings = (strategyCode?: string) =>
@@ -150,9 +162,22 @@ const resetPreviewState = () => {
previewError.value = ''; previewError.value = '';
}; };
const buildStrategyConfig = () => ({ const buildStrategyConfig = () => {
if (isPptx.value) {
return {
strategyCode: 'OFFICE_PPTX_PAGE',
};
}
if (isXlsx.value) {
return {
rowsPerChunk: formState.rowsPerChunk,
strategyCode: 'OFFICE_XLSX_ROW_WINDOW',
};
}
return {
...formState, ...formState,
}); };
};
const normalizeSourceRanges = (ranges?: SourceRange[]) => const normalizeSourceRanges = (ranges?: SourceRange[]) =>
Array.isArray(ranges) Array.isArray(ranges)
@@ -292,6 +317,13 @@ watch(
previewSequence += 1; previewSequence += 1;
clearPreviewTimer(); clearPreviewTimer();
Object.assign(formState, createDefaultFormState()); Object.assign(formState, createDefaultFormState());
if (isPptx.value) {
formState.strategyCode = 'OFFICE_PPTX_PAGE';
}
if (isXlsx.value) {
formState.strategyCode = 'OFFICE_XLSX_ROW_WINDOW';
formState.rowsPerChunk = 10;
}
resetPreviewState(); resetPreviewState();
if (activeDocumentId.value) { if (activeDocumentId.value) {
schedulePreviewGeneration(); schedulePreviewGeneration();
@@ -317,6 +349,7 @@ watch(
<ElForm :model="formState" label-position="top" class="workbench__form"> <ElForm :model="formState" label-position="top" class="workbench__form">
<div class="workbench__form-grid"> <div class="workbench__form-grid">
<ElFormItem <ElFormItem
v-if="showStrategySelector"
:label="$t('documentCollection.importDoc.strategySelection')" :label="$t('documentCollection.importDoc.strategySelection')"
class="workbench__form-full" class="workbench__form-full"
> >
@@ -330,6 +363,20 @@ watch(
</ElSelect> </ElSelect>
</ElFormItem> </ElFormItem>
<ElFormItem
v-if="isXlsx"
label="每多少行分一块"
class="workbench__form-full"
>
<ElInputNumber
v-model="formState.rowsPerChunk"
:min="1"
:max="200"
:step="1"
class="workbench__rows-input"
/>
</ElFormItem>
<ElFormItem <ElFormItem
v-if="showLengthSettings(formState.strategyCode)" v-if="showLengthSettings(formState.strategyCode)"
:label="$t('documentCollection.splitterDoc.chunkSize')" :label="$t('documentCollection.splitterDoc.chunkSize')"
@@ -478,6 +525,10 @@ watch(
box-shadow: 0 0 0 1px rgb(15 23 42 / 7%) inset; box-shadow: 0 0 0 1px rgb(15 23 42 / 7%) inset;
} }
.workbench__rows-input {
width: 100%;
}
:deep(.workbench__form .el-slider__runway) { :deep(.workbench__form .el-slider__runway) {
background: rgb(15 23 42 / 8%); background: rgb(15 23 42 / 8%);
} }

View File

@@ -1,10 +1,16 @@
<script setup lang="ts"> <script setup lang="ts">
import { computed, ref, watch } from 'vue'; import { computed, ref, watch } from 'vue';
import ElXMarkdown from 'vue-element-plus-x/es/XMarkdown/index.js';
import { $t } from '@easyflow/locales'; import { $t } from '@easyflow/locales';
import { ElEmpty, ElSkeleton, ElTag } from 'element-plus'; import { ElEmpty, ElSkeleton, ElTag } from 'element-plus';
import {
markdownRenderProps,
resolveMarkdownContent,
} from '#/views/ai/documentCollection/markdown-content';
interface SourceRange { interface SourceRange {
end: number; end: number;
start: number; start: number;
@@ -21,6 +27,7 @@ interface ChunkItem {
partNo?: number; partNo?: number;
partTotal?: number; partTotal?: number;
question?: string; question?: string;
renderMarkdown?: string;
sourceLabel?: string; sourceLabel?: string;
sourceRanges?: SourceRange[]; sourceRanges?: SourceRange[];
tokenEstimate?: number; tokenEstimate?: number;
@@ -180,15 +187,38 @@ const isActiveChunk = (chunk: ChunkItem) =>
<div v-if="chunk.chunkType === 'qa_pair'" class="qa-block"> <div v-if="chunk.chunkType === 'qa_pair'" class="qa-block">
<div class="qa-block__item"> <div class="qa-block__item">
<span class="qa-block__label">Q</span> <span class="qa-block__label">Q</span>
<span class="qa-block__text">{{ chunk.question }}</span> <div class="qa-block__text">
<ElXMarkdown
:markdown="resolveMarkdownContent(chunk.question)"
:allow-html="markdownRenderProps.allowHtml"
:sanitize="markdownRenderProps.sanitize"
/>
</div>
</div> </div>
<div class="qa-block__item"> <div class="qa-block__item">
<span class="qa-block__label">A</span> <span class="qa-block__label">A</span>
<span class="qa-block__text">{{ chunk.answer }}</span> <div class="qa-block__text">
<ElXMarkdown
:markdown="resolveMarkdownContent(chunk.answer)"
:allow-html="markdownRenderProps.allowHtml"
:sanitize="markdownRenderProps.sanitize"
/>
</div>
</div> </div>
</div> </div>
<pre class="chunk-card__content">{{ chunk.content }}</pre> <div
v-if="resolveMarkdownContent(chunk.renderMarkdown || chunk.content)"
class="chunk-card__content chunk-card__content--markdown"
>
<ElXMarkdown
:markdown="
resolveMarkdownContent(chunk.renderMarkdown || chunk.content)
"
:allow-html="markdownRenderProps.allowHtml"
:sanitize="markdownRenderProps.sanitize"
/>
</div>
<div <div
v-if="chunk.warnings && chunk.warnings.length > 0" v-if="chunk.warnings && chunk.warnings.length > 0"
@@ -342,14 +372,129 @@ const isActiveChunk = (chunk: ChunkItem) =>
.chunk-card__content { .chunk-card__content {
margin: 14px 0 0; margin: 14px 0 0;
font-family: inherit;
font-size: 13px; font-size: 13px;
line-height: 1.75; line-height: 1.75;
color: var(--el-text-color-regular); color: var(--el-text-color-regular);
white-space: pre-wrap; }
.chunk-card__content--markdown {
min-width: 0;
}
.chunk-card__content--markdown :deep(.markdown-body) {
font-size: inherit;
line-height: inherit;
color: inherit;
background: transparent;
}
.chunk-card__content--markdown :deep(.markdown-body > :first-child) {
margin-top: 0;
}
.chunk-card__content--markdown :deep(.markdown-body > :last-child) {
margin-bottom: 0;
}
.chunk-card__content--markdown :deep(*) {
overflow-wrap: anywhere; overflow-wrap: anywhere;
} }
.chunk-card__content--markdown :deep(p) {
margin: 0 0 10px;
}
.chunk-card__content--markdown :deep(p:last-child) {
margin-bottom: 0;
}
.chunk-card__content--markdown :deep(h1),
.chunk-card__content--markdown :deep(h2),
.chunk-card__content--markdown :deep(h3),
.chunk-card__content--markdown :deep(h4),
.chunk-card__content--markdown :deep(h5),
.chunk-card__content--markdown :deep(h6) {
margin: 14px 0 10px;
font-weight: 600;
line-height: 1.45;
color: var(--el-text-color-primary);
}
.chunk-card__content--markdown :deep(h1:first-child),
.chunk-card__content--markdown :deep(h2:first-child),
.chunk-card__content--markdown :deep(h3:first-child),
.chunk-card__content--markdown :deep(h4:first-child),
.chunk-card__content--markdown :deep(h5:first-child),
.chunk-card__content--markdown :deep(h6:first-child) {
margin-top: 0;
}
.chunk-card__content--markdown :deep(ul),
.chunk-card__content--markdown :deep(ol) {
padding-left: 20px;
margin: 0 0 12px;
}
.chunk-card__content--markdown :deep(li + li) {
margin-top: 4px;
}
.chunk-card__content--markdown :deep(a) {
color: var(--el-color-primary);
text-decoration: underline;
text-underline-offset: 2px;
}
.chunk-card__content--markdown :deep(img) {
display: block;
max-width: min(100%, 520px);
height: auto;
margin: 12px 0;
border: 1px solid rgb(15 23 42 / 8%);
border-radius: 12px;
box-shadow: 0 10px 24px rgb(15 23 42 / 8%);
}
.chunk-card__content--markdown :deep(table) {
width: 100%;
margin: 12px 0;
overflow: hidden;
border-collapse: collapse;
background: rgb(255 255 255 / 92%);
border: 1px solid rgb(15 23 42 / 8%);
border-radius: 12px;
}
.chunk-card__content--markdown :deep(th),
.chunk-card__content--markdown :deep(td) {
padding: 10px 12px;
text-align: left;
vertical-align: top;
border: 1px solid rgb(15 23 42 / 8%);
}
.chunk-card__content--markdown :deep(th) {
font-weight: 600;
color: var(--el-text-color-primary);
background: rgb(37 99 235 / 4%);
}
.chunk-card__content--markdown :deep(pre) {
max-width: 100%;
padding: 12px 14px;
overflow: auto;
background: rgb(15 23 42 / 4%);
border: 1px solid rgb(15 23 42 / 6%);
border-radius: 12px;
}
.chunk-card__content--markdown :deep(blockquote) {
padding-left: 12px;
margin: 12px 0;
color: var(--el-text-color-secondary);
border-left: 3px solid rgb(37 99 235 / 24%);
}
.chunk-card__warnings { .chunk-card__warnings {
display: flex; display: flex;
flex-wrap: wrap; flex-wrap: wrap;
@@ -387,10 +532,32 @@ const isActiveChunk = (chunk: ChunkItem) =>
} }
.qa-block__text { .qa-block__text {
min-width: 0;
font-size: 13px; font-size: 13px;
line-height: 1.7; line-height: 1.7;
color: var(--el-text-color-regular); color: var(--el-text-color-regular);
white-space: pre-wrap; }
.qa-block__text :deep(.markdown-body) {
font-size: inherit;
line-height: inherit;
background: transparent;
}
.qa-block__text :deep(.markdown-body > :first-child) {
margin-top: 0;
}
.qa-block__text :deep(.markdown-body > :last-child) {
margin-bottom: 0;
}
.qa-block__text :deep(img) {
display: block;
max-width: min(100%, 420px);
height: auto;
margin: 10px 0;
border-radius: 10px;
} }
@media (max-width: 960px) { @media (max-width: 960px) {

View File

@@ -0,0 +1,40 @@
const ESCAPED_TABLE_HTML_TAG_PATTERN =
/&lt;\/?(?:table|thead|tbody|tfoot|tr|th|td|caption|colgroup|col)\b/i;
/**
* 将知识库分块内容规整为可直接交给 Markdown 组件的文本。
* 这里额外兼容被转义的 HTML 片段,例如 `&lt;table&gt;...&lt;/table&gt;`。
*/
export const resolveMarkdownContent = (content?: string) => {
const markdown = String(content || '').trim();
if (!markdown) {
return '';
}
if (ESCAPED_TABLE_HTML_TAG_PATTERN.test(markdown)) {
return decodeHtmlEntities(markdown);
}
return markdown;
};
/**
* 统一控制 Markdown 中原生 HTML 的开启策略。
*/
export const markdownRenderProps = {
allowHtml: true,
sanitize: true,
} as const;
/**
* 解码后端可能返回的 HTML 实体,便于 Markdown 组件继续处理原生标签。
*/
function decodeHtmlEntities(content: string) {
if (typeof window === 'undefined' || window.DOMParser === undefined) {
return content;
}
const parser = new window.DOMParser();
const doc = parser.parseFromString(content, 'text/html');
return doc.documentElement.textContent || content;
}