feat: 增加知识库新类型FAQ知识库

This commit is contained in:
2026-02-24 13:07:14 +08:00
parent e27ba8d457
commit a9060ed2b9
27 changed files with 1701 additions and 58 deletions

View File

@@ -64,6 +64,10 @@
<groupId>cn.hutool</groupId>
<artifactId>hutool-core</artifactId>
</dependency>
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
</dependency>
<dependency>
<groupId>tech.easyflow</groupId>
<artifactId>easyflow-module-system</artifactId>

View File

@@ -4,7 +4,6 @@ import com.easyagents.engine.es.ElasticSearcher;
import com.easyagents.search.engine.lucene.LuceneSearcher;
import com.easyagents.search.engine.service.DocumentSearcher;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
@@ -29,6 +28,9 @@ public class SearcherFactory {
public DocumentSearcher getSearcher(String defaultSearcherType) {
if (defaultSearcherType == null) {
defaultSearcherType = "lucene";
}
switch (defaultSearcherType) {
case "elasticSearch":
return new ElasticSearcher(aiEsConfig);
@@ -37,4 +39,4 @@ public class SearcherFactory {
return new LuceneSearcher(luceneConfig);
}
}
}
}

View File

@@ -34,6 +34,9 @@ import java.util.Map;
@Table("tb_document_collection")
public class DocumentCollection extends DocumentCollectionBase {
public static final String TYPE_DOCUMENT = "DOCUMENT";
public static final String TYPE_FAQ = "FAQ";
/**
* 文档块问题集合配置key
*/
@@ -98,6 +101,14 @@ public class DocumentCollection extends DocumentCollectionBase {
return this.getVectorStoreEnable() != null && this.getVectorStoreEnable();
}
public boolean isFaqCollection() {
return TYPE_FAQ.equalsIgnoreCase(this.getCollectionType());
}
public boolean isDocumentCollection() {
return !isFaqCollection();
}
public boolean isSearchEngineEnabled() {
return this.getSearchEngineEnable() != null && this.getSearchEngineEnable();
}

View File

@@ -0,0 +1,8 @@
package tech.easyflow.ai.entity;
import com.mybatisflex.annotation.Table;
import tech.easyflow.ai.entity.base.FaqItemBase;
@Table("tb_faq_item")
public class FaqItem extends FaqItemBase {
}

View File

@@ -4,11 +4,12 @@ import com.mybatisflex.annotation.Column;
import com.mybatisflex.annotation.Id;
import com.mybatisflex.annotation.KeyType;
import com.mybatisflex.core.handler.FastjsonTypeHandler;
import tech.easyflow.common.entity.DateEntity;
import java.io.Serializable;
import java.math.BigInteger;
import java.util.Date;
import java.util.Map;
import tech.easyflow.common.entity.DateEntity;
public class DocumentCollectionBase extends DateEntity implements Serializable {
@@ -21,6 +22,12 @@ public class DocumentCollectionBase extends DateEntity implements Serializable {
@Id(keyType = KeyType.Generator, value = "snowFlakeId", comment = "Id")
private BigInteger id;
/**
* 知识库类型: DOCUMENT/FAQ
*/
@Column(comment = "知识库类型: DOCUMENT/FAQ")
private String collectionType;
/**
* 别名
*/
@@ -161,6 +168,14 @@ public class DocumentCollectionBase extends DateEntity implements Serializable {
this.id = id;
}
public String getCollectionType() {
return collectionType;
}
public void setCollectionType(String collectionType) {
this.collectionType = collectionType;
}
public String getAlias() {
return alias;
}

View File

@@ -0,0 +1,137 @@
package tech.easyflow.ai.entity.base;
import com.mybatisflex.annotation.Column;
import com.mybatisflex.annotation.Id;
import com.mybatisflex.annotation.KeyType;
import com.mybatisflex.core.handler.FastjsonTypeHandler;
import java.io.Serializable;
import java.math.BigInteger;
import java.util.Date;
import java.util.Map;
public class FaqItemBase implements Serializable {
private static final long serialVersionUID = 1L;
@Id(keyType = KeyType.Generator, value = "snowFlakeId")
private BigInteger id;
@Column(comment = "知识库ID")
private BigInteger collectionId;
@Column(comment = "问题")
private String question;
@Column(comment = "答案HTML")
private String answerHtml;
@Column(comment = "答案纯文本")
private String answerText;
@Column(comment = "排序")
private Integer orderNo;
@Column(typeHandler = FastjsonTypeHandler.class, comment = "扩展项")
private Map<String, Object> options;
@Column(comment = "创建时间")
private Date created;
@Column(comment = "创建人")
private BigInteger createdBy;
@Column(comment = "更新时间")
private Date modified;
@Column(comment = "更新人")
private BigInteger modifiedBy;
public BigInteger getId() {
return id;
}
public void setId(BigInteger id) {
this.id = id;
}
public BigInteger getCollectionId() {
return collectionId;
}
public void setCollectionId(BigInteger collectionId) {
this.collectionId = collectionId;
}
public String getQuestion() {
return question;
}
public void setQuestion(String question) {
this.question = question;
}
public String getAnswerHtml() {
return answerHtml;
}
public void setAnswerHtml(String answerHtml) {
this.answerHtml = answerHtml;
}
public String getAnswerText() {
return answerText;
}
public void setAnswerText(String answerText) {
this.answerText = answerText;
}
public Integer getOrderNo() {
return orderNo;
}
public void setOrderNo(Integer orderNo) {
this.orderNo = orderNo;
}
public Map<String, Object> getOptions() {
return options;
}
public void setOptions(Map<String, Object> options) {
this.options = options;
}
public Date getCreated() {
return created;
}
public void setCreated(Date created) {
this.created = created;
}
public BigInteger getCreatedBy() {
return createdBy;
}
public void setCreatedBy(BigInteger createdBy) {
this.createdBy = createdBy;
}
public Date getModified() {
return modified;
}
public void setModified(Date modified) {
this.modified = modified;
}
public BigInteger getModifiedBy() {
return modifiedBy;
}
public void setModifiedBy(BigInteger modifiedBy) {
this.modifiedBy = modifiedBy;
}
}

View File

@@ -0,0 +1,7 @@
package tech.easyflow.ai.mapper;
import com.mybatisflex.core.BaseMapper;
import tech.easyflow.ai.entity.FaqItem;
public interface FaqItemMapper extends BaseMapper<FaqItem> {
}

View File

@@ -0,0 +1,15 @@
package tech.easyflow.ai.service;
import com.mybatisflex.core.service.IService;
import tech.easyflow.ai.entity.FaqItem;
import java.math.BigInteger;
public interface FaqItemService extends IService<FaqItem> {
boolean saveFaqItem(FaqItem entity);
boolean updateFaqItem(FaqItem entity);
boolean removeFaqItem(BigInteger id);
}

View File

@@ -17,9 +17,11 @@ import org.springframework.stereotype.Service;
import tech.easyflow.ai.config.SearcherFactory;
import tech.easyflow.ai.entity.DocumentChunk;
import tech.easyflow.ai.entity.DocumentCollection;
import tech.easyflow.ai.entity.FaqItem;
import tech.easyflow.ai.entity.Model;
import tech.easyflow.ai.mapper.DocumentChunkMapper;
import tech.easyflow.ai.mapper.DocumentCollectionMapper;
import tech.easyflow.ai.mapper.FaqItemMapper;
import tech.easyflow.ai.service.DocumentChunkService;
import tech.easyflow.ai.service.DocumentCollectionService;
import tech.easyflow.ai.service.ModelService;
@@ -63,6 +65,9 @@ public class DocumentCollectionServiceImpl extends ServiceImpl<DocumentCollectio
@Autowired
private DocumentChunkMapper documentChunkMapper;
@Autowired
private FaqItemMapper faqItemMapper;
@Override
public List<Document> search(BigInteger id, String keyword) {
@@ -123,13 +128,7 @@ public class DocumentCollectionServiceImpl extends ServiceImpl<DocumentCollectio
Map<String, Document> uniqueDocs = combinedFuture.get(); // 阻塞等待所有查询完成
List<Document> searchDocuments = new ArrayList<>(uniqueDocs.values());
searchDocuments.sort((doc1, doc2) -> Double.compare(doc2.getScore(), doc1.getScore()));
searchDocuments.forEach(item ->{
DocumentChunk documentChunk = documentChunkMapper.selectOneById((Serializable) item.getId());
if (documentChunk != null && !StringUtil.noText(documentChunk.getContent())){
item.setContent(documentChunk.getContent());
}
});
fillSearchContent(documentCollection, searchDocuments);
if (searchDocuments.isEmpty()) {
return Collections.emptyList();
}
@@ -243,4 +242,33 @@ public class DocumentCollectionServiceImpl extends ServiceImpl<DocumentCollectio
.limit(maxResults)
.collect(Collectors.toList());
}
private void fillSearchContent(DocumentCollection documentCollection, List<Document> searchDocuments) {
if (searchDocuments == null || searchDocuments.isEmpty()) {
return;
}
List<Serializable> ids = searchDocuments.stream()
.map(item -> (Serializable) item.getId())
.collect(Collectors.toList());
if (documentCollection.isFaqCollection()) {
Map<String, FaqItem> faqItemMap = faqItemMapper.selectListByIds(ids).stream()
.collect(Collectors.toMap(item -> item.getId().toString(), item -> item, (a, b) -> a));
searchDocuments.forEach(item -> {
FaqItem faqItem = faqItemMap.get(String.valueOf(item.getId()));
if (faqItem != null) {
item.setContent("问题:" + faqItem.getQuestion() + "\n答案" + faqItem.getAnswerText());
}
});
return;
}
Map<String, DocumentChunk> chunkMap = documentChunkMapper.selectListByIds(ids).stream()
.collect(Collectors.toMap(item -> item.getId().toString(), item -> item, (a, b) -> a));
searchDocuments.forEach(item -> {
DocumentChunk documentChunk = chunkMap.get(String.valueOf(item.getId()));
if (documentChunk != null && !StringUtil.noText(documentChunk.getContent())) {
item.setContent(documentChunk.getContent());
}
});
}
}

View File

@@ -160,6 +160,13 @@ public class DocumentServiceImpl extends ServiceImpl<DocumentMapper, Document> i
@Transactional
public Result<?> textSplit(DocumentCollectionSplitParams documentCollectionSplitParams) {
try {
DocumentCollection knowledge = knowledgeService.getById(documentCollectionSplitParams.getKnowledgeId());
if (knowledge == null) {
throw new BusinessException("知识库不存在");
}
if (knowledge.isFaqCollection()) {
throw new BusinessException("FAQ知识库不支持文档上传");
}
String filePath = documentCollectionSplitParams.getFilePath();
String fileOriginName = documentCollectionSplitParams.getFileOriginName();
InputStream inputStream = storageService.readStream(filePath);
@@ -264,6 +271,9 @@ public class DocumentServiceImpl extends ServiceImpl<DocumentMapper, Document> i
if (knowledge == null) {
throw new BusinessException("知识库不存在");
}
if (knowledge.isFaqCollection()) {
throw new BusinessException("FAQ知识库不支持文档上传");
}
DocumentStore documentStore = null;
try {
documentStore = knowledge.toDocumentStore();

View File

@@ -0,0 +1,285 @@
package tech.easyflow.ai.service.impl;
import cn.dev33.satoken.stp.StpUtil;
import com.easyagents.core.model.embedding.EmbeddingModel;
import com.easyagents.core.model.embedding.EmbeddingOptions;
import com.easyagents.core.store.DocumentStore;
import com.easyagents.core.store.StoreOptions;
import com.easyagents.core.store.StoreResult;
import com.easyagents.search.engine.service.DocumentSearcher;
import com.mybatisflex.core.query.QueryWrapper;
import com.mybatisflex.spring.service.impl.ServiceImpl;
import org.jsoup.Jsoup;
import org.jsoup.safety.Safelist;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
import tech.easyflow.ai.config.SearcherFactory;
import tech.easyflow.ai.entity.DocumentCollection;
import tech.easyflow.ai.entity.FaqItem;
import tech.easyflow.ai.entity.Model;
import tech.easyflow.ai.mapper.FaqItemMapper;
import tech.easyflow.ai.service.DocumentCollectionService;
import tech.easyflow.ai.service.FaqItemService;
import tech.easyflow.ai.service.ModelService;
import tech.easyflow.common.util.StringUtil;
import tech.easyflow.common.web.exceptions.BusinessException;
import javax.annotation.Resource;
import java.math.BigInteger;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
import static tech.easyflow.ai.entity.DocumentCollection.KEY_CAN_UPDATE_EMBEDDING_MODEL;
import static tech.easyflow.ai.entity.DocumentCollection.KEY_SEARCH_ENGINE_TYPE;
@Service
public class FaqItemServiceImpl extends ServiceImpl<FaqItemMapper, FaqItem> implements FaqItemService {
private static final Safelist ANSWER_SAFE_LIST = Safelist.basic()
.addTags("h1", "h2", "h3", "h4", "h5", "h6");
@Resource
private DocumentCollectionService documentCollectionService;
@Resource
private ModelService modelService;
@Autowired
private SearcherFactory searcherFactory;
@Override
@Transactional
public boolean saveFaqItem(FaqItem entity) {
checkAndNormalize(entity, true);
Date now = new Date();
BigInteger userId = getCurrentUserId();
entity.setCreated(now);
entity.setModified(now);
entity.setCreatedBy(userId);
entity.setModifiedBy(userId);
if (entity.getOrderNo() == null) {
entity.setOrderNo(nextOrderNo(entity.getCollectionId()));
}
boolean success = save(entity);
if (!success) {
return false;
}
DocumentCollection collection = getFaqCollection(entity.getCollectionId());
storeToVector(collection, entity, false);
return true;
}
@Override
@Transactional
public boolean updateFaqItem(FaqItem entity) {
if (entity.getId() == null) {
throw new BusinessException("FAQ条目ID不能为空");
}
FaqItem old = getById(entity.getId());
if (old == null) {
throw new BusinessException("FAQ条目不存在");
}
if (entity.getCollectionId() == null) {
entity.setCollectionId(old.getCollectionId());
}
checkAndNormalize(entity, false);
old.setQuestion(entity.getQuestion());
old.setAnswerHtml(entity.getAnswerHtml());
old.setAnswerText(entity.getAnswerText());
if (entity.getOrderNo() != null) {
old.setOrderNo(entity.getOrderNo());
}
old.setModified(new Date());
old.setModifiedBy(getCurrentUserId());
old.setOptions(entity.getOptions());
boolean success = updateById(old);
if (!success) {
return false;
}
DocumentCollection collection = getFaqCollection(old.getCollectionId());
storeToVector(collection, old, true);
return true;
}
@Override
@Transactional
public boolean removeFaqItem(BigInteger id) {
FaqItem old = getById(id);
if (old == null) {
throw new BusinessException("FAQ条目不存在");
}
DocumentCollection collection = getFaqCollection(old.getCollectionId());
removeFromVector(collection, old);
return removeById(id);
}
private void checkAndNormalize(FaqItem entity, boolean isSave) {
if (entity == null) {
throw new BusinessException("FAQ条目不能为空");
}
if (entity.getCollectionId() == null) {
throw new BusinessException("知识库ID不能为空");
}
if (StringUtil.noText(entity.getQuestion())) {
throw new BusinessException("问题不能为空");
}
if (StringUtil.noText(entity.getAnswerHtml())) {
throw new BusinessException("答案不能为空");
}
if (isSave && entity.getId() != null) {
throw new BusinessException("新增FAQ条目不允许传入ID");
}
entity.setQuestion(entity.getQuestion().trim());
String cleanHtml = Jsoup.clean(entity.getAnswerHtml(), ANSWER_SAFE_LIST);
String answerText = Jsoup.parse(cleanHtml).text();
if (StringUtil.noText(answerText)) {
throw new BusinessException("答案不能为空");
}
entity.setAnswerHtml(cleanHtml);
entity.setAnswerText(answerText);
}
private DocumentCollection getFaqCollection(BigInteger collectionId) {
DocumentCollection collection = documentCollectionService.getById(collectionId);
if (collection == null) {
throw new BusinessException("知识库不存在");
}
if (!collection.isFaqCollection()) {
throw new BusinessException("当前知识库不是FAQ类型");
}
return collection;
}
private void storeToVector(DocumentCollection collection, FaqItem entity, boolean isUpdate) {
PreparedStore preparedStore = prepareStore(collection);
com.easyagents.core.document.Document doc = toSearchDocument(entity);
StoreResult result = isUpdate
? preparedStore.documentStore.update(doc, preparedStore.storeOptions)
: preparedStore.documentStore.store(Collections.singletonList(doc), preparedStore.storeOptions);
if (result == null || !result.isSuccess()) {
throw new BusinessException("FAQ向量化失败");
}
if (collection.isSearchEngineEnabled()) {
DocumentSearcher searcher = searcherFactory.getSearcher((String) collection.getOptionsByKey(KEY_SEARCH_ENGINE_TYPE));
if (searcher != null) {
if (isUpdate) {
searcher.deleteDocument(entity.getId());
}
searcher.addDocument(doc);
}
}
markCollectionEmbedded(collection, preparedStore.embeddingModel);
}
private void removeFromVector(DocumentCollection collection, FaqItem entity) {
PreparedStore preparedStore = prepareStore(collection);
preparedStore.documentStore.delete(Collections.singletonList(entity.getId()), preparedStore.storeOptions);
if (collection.isSearchEngineEnabled()) {
DocumentSearcher searcher = searcherFactory.getSearcher((String) collection.getOptionsByKey(KEY_SEARCH_ENGINE_TYPE));
if (searcher != null) {
searcher.deleteDocument(entity.getId());
}
}
}
private PreparedStore prepareStore(DocumentCollection collection) {
DocumentStore documentStore;
try {
documentStore = collection.toDocumentStore();
} catch (Exception e) {
throw new BusinessException("向量数据库配置错误");
}
if (documentStore == null) {
throw new BusinessException("向量数据库配置错误");
}
Model model = modelService.getModelInstance(collection.getVectorEmbedModelId());
if (model == null) {
throw new BusinessException("该知识库未配置向量模型");
}
EmbeddingModel embeddingModel = model.toEmbeddingModel();
documentStore.setEmbeddingModel(embeddingModel);
StoreOptions options = StoreOptions.ofCollectionName(collection.getVectorStoreCollection());
EmbeddingOptions embeddingOptions = new EmbeddingOptions();
embeddingOptions.setModel(model.getModelName());
embeddingOptions.setDimensions(collection.getDimensionOfVectorModel());
options.setEmbeddingOptions(embeddingOptions);
options.setIndexName(options.getCollectionName());
return new PreparedStore(documentStore, options, embeddingModel);
}
private com.easyagents.core.document.Document toSearchDocument(FaqItem entity) {
String content = buildSearchContent(entity);
com.easyagents.core.document.Document doc = com.easyagents.core.document.Document.of(content);
doc.setId(entity.getId());
Map<String, Object> metadata = new HashMap<>();
metadata.put("question", entity.getQuestion());
metadata.put("answerText", entity.getAnswerText());
doc.setMetadataMap(metadata);
return doc;
}
private String buildSearchContent(FaqItem entity) {
return "问题:" + entity.getQuestion() + "\n答案" + entity.getAnswerText();
}
private void markCollectionEmbedded(DocumentCollection collection, EmbeddingModel embeddingModel) {
Map<String, Object> options = collection.getOptions() == null
? new HashMap<>()
: new HashMap<>(collection.getOptions());
if (!Boolean.FALSE.equals(options.get(KEY_CAN_UPDATE_EMBEDDING_MODEL))) {
options.put(KEY_CAN_UPDATE_EMBEDDING_MODEL, false);
DocumentCollection updateCollection = new DocumentCollection();
updateCollection.setId(collection.getId());
updateCollection.setOptions(options);
documentCollectionService.updateById(updateCollection);
}
if (collection.getDimensionOfVectorModel() == null) {
int dimension = Model.getEmbeddingDimension(embeddingModel);
DocumentCollection updateCollection = new DocumentCollection();
updateCollection.setId(collection.getId());
updateCollection.setDimensionOfVectorModel(dimension);
documentCollectionService.updateById(updateCollection);
}
}
private Integer nextOrderNo(BigInteger collectionId) {
java.util.List<FaqItem> list = list(QueryWrapper.create()
.eq(FaqItem::getCollectionId, collectionId)
.orderBy("order_no desc"));
if (list == null || list.isEmpty() || list.get(0).getOrderNo() == null) {
return 0;
}
return list.get(0).getOrderNo() + 1;
}
private BigInteger getCurrentUserId() {
if (!StpUtil.isLogin()) {
return null;
}
return BigInteger.valueOf(StpUtil.getLoginIdAsLong());
}
private static class PreparedStore {
private final DocumentStore documentStore;
private final StoreOptions storeOptions;
private final EmbeddingModel embeddingModel;
private PreparedStore(DocumentStore documentStore, StoreOptions storeOptions, EmbeddingModel embeddingModel) {
this.documentStore = documentStore;
this.storeOptions = storeOptions;
this.embeddingModel = embeddingModel;
}
}
}