From bc0066f82c15c3fb701787faee762159b3a49767 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E8=B1=AA=E6=9D=B0?= <297467301@qq.com> Date: Thu, 6 Mar 2025 14:34:10 +0800 Subject: [PATCH] =?UTF-8?q?=E6=96=87=E6=A1=A3=E5=90=91=E9=87=8F=E5=8C=96?= =?UTF-8?q?=E5=8A=9F=E8=83=BD=E6=96=B0=E5=A2=9E?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ui/src/views/docmanager/embedding/index.vue | 188 ++++++++++++++++++ ui/src/views/docmanager/upload/index.vue | 22 ++ .../birdnest/domain/DocumentEmbedding.java | 26 +++ .../domain/vo/DocumentEmbeddingVO.java | 24 +++ .../service/IDocumentEmbeddingService.java | 31 +++ .../impl/DocumentEmbeddingServiceImpl.java | 88 ++++++++ .../birdnest/DocumentController.java | 49 ++++- .../birdnest/DocumentEmbeddingController.java | 63 ++++++ .../src/main/resources/application.yml | 14 ++ web/ruoyi-framework/pom.xml | 25 +++ .../config/OpenAiEmbeddingModelConfig.java | 33 +++ .../config/PgVectorEmbeddingStoreConfig.java | 42 ++++ 12 files changed, 601 insertions(+), 4 deletions(-) create mode 100644 ui/src/views/docmanager/embedding/index.vue create mode 100644 web/ruoyi-admin/src/main/java/com/ruoyi/birdnest/domain/DocumentEmbedding.java create mode 100644 web/ruoyi-admin/src/main/java/com/ruoyi/birdnest/domain/vo/DocumentEmbeddingVO.java create mode 100644 web/ruoyi-admin/src/main/java/com/ruoyi/birdnest/service/IDocumentEmbeddingService.java create mode 100644 web/ruoyi-admin/src/main/java/com/ruoyi/birdnest/service/impl/DocumentEmbeddingServiceImpl.java create mode 100644 web/ruoyi-admin/src/main/java/com/ruoyi/web/controller/birdnest/DocumentEmbeddingController.java create mode 100644 web/ruoyi-framework/src/main/java/com/ruoyi/framework/config/OpenAiEmbeddingModelConfig.java create mode 100644 web/ruoyi-framework/src/main/java/com/ruoyi/framework/config/PgVectorEmbeddingStoreConfig.java diff --git a/ui/src/views/docmanager/embedding/index.vue b/ui/src/views/docmanager/embedding/index.vue new file mode 100644 index 0000000..19603bb --- /dev/null +++ b/ui/src/views/docmanager/embedding/index.vue @@ -0,0 +1,188 @@ + + + + + + + diff --git a/ui/src/views/docmanager/upload/index.vue b/ui/src/views/docmanager/upload/index.vue index 1bd7a6d..92871e8 100644 --- a/ui/src/views/docmanager/upload/index.vue +++ b/ui/src/views/docmanager/upload/index.vue @@ -62,7 +62,15 @@ > 下载 + + 向量化 + + @@ -234,6 +242,20 @@ this.$message.error("下载失败"); }); }, + documentEmbedding(fileName) { + docRequest({ + url: `/documents/${fileName}/embedding`, + method: "POST", + //responseType: "blob", // 设置响应类型为blob + }) + .then((response) => { + this.$message.success("向量化成功"); + }) + .catch((error) => { + console.error("There was an error downloading the file!", error); + this.$message.error("向量化失败"); + }); + }, fetchDocuments() { docRequest .get("/documents/list", { diff --git a/web/ruoyi-admin/src/main/java/com/ruoyi/birdnest/domain/DocumentEmbedding.java b/web/ruoyi-admin/src/main/java/com/ruoyi/birdnest/domain/DocumentEmbedding.java new file mode 100644 index 0000000..c1ad29d --- /dev/null +++ b/web/ruoyi-admin/src/main/java/com/ruoyi/birdnest/domain/DocumentEmbedding.java @@ -0,0 +1,26 @@ +package com.ruoyi.birdnest.domain; + +import lombok.AllArgsConstructor; +import lombok.Data; +import lombok.NoArgsConstructor; +import java.io.Serializable; + +/** + * @author chenhaojie + * @since 2025-03-05 09:34:42 + */ +@Data +@NoArgsConstructor +@AllArgsConstructor +public class DocumentEmbedding { + + + private String embeddingId; + + private float[] embedding; + + private String text; + + private String metadata; + +} diff --git a/web/ruoyi-admin/src/main/java/com/ruoyi/birdnest/domain/vo/DocumentEmbeddingVO.java b/web/ruoyi-admin/src/main/java/com/ruoyi/birdnest/domain/vo/DocumentEmbeddingVO.java new file mode 100644 index 0000000..bd97a76 --- /dev/null +++ b/web/ruoyi-admin/src/main/java/com/ruoyi/birdnest/domain/vo/DocumentEmbeddingVO.java @@ -0,0 +1,24 @@ +package com.ruoyi.birdnest.domain.vo; + +import lombok.AllArgsConstructor; +import lombok.Data; +import lombok.NoArgsConstructor; +import java.io.Serializable; + +@Data +@NoArgsConstructor +@AllArgsConstructor +public class DocumentEmbeddingVO { + + + private String embeddingId; + + private float[] embedding; + + private String text; + + private String fileName; + + private String index; + +} \ No newline at end of file diff --git a/web/ruoyi-admin/src/main/java/com/ruoyi/birdnest/service/IDocumentEmbeddingService.java b/web/ruoyi-admin/src/main/java/com/ruoyi/birdnest/service/IDocumentEmbeddingService.java new file mode 100644 index 0000000..bd5d65d --- /dev/null +++ b/web/ruoyi-admin/src/main/java/com/ruoyi/birdnest/service/IDocumentEmbeddingService.java @@ -0,0 +1,31 @@ +package com.ruoyi.birdnest.service; + +import com.ruoyi.birdnest.domain.DocumentEmbedding; +import com.ruoyi.birdnest.domain.vo.DocumentEmbeddingVO; + +import java.util.List; + +/** + * @author chenhaojie + * @since 2025-03-05 09:12:17 + */ +public interface IDocumentEmbeddingService +{ + + /** + * 查询【请填写功能名称】列表 + * + * @param documentEmbedding 【请填写功能名称】 + * @return 【请填写功能名称】集合 + */ + public List selectDocumentEmbeddingList(DocumentEmbedding documentEmbedding); + + /** + * 批量删除【请填写功能名称】 + * + * @param embeddingIds 需要删除的【请填写功能名称】主键集合 + * @return 结果 + */ + public void deleteDocumentEmbeddingByEmbeddingIds(String[] embeddingIds); + +} diff --git a/web/ruoyi-admin/src/main/java/com/ruoyi/birdnest/service/impl/DocumentEmbeddingServiceImpl.java b/web/ruoyi-admin/src/main/java/com/ruoyi/birdnest/service/impl/DocumentEmbeddingServiceImpl.java new file mode 100644 index 0000000..43ce65a --- /dev/null +++ b/web/ruoyi-admin/src/main/java/com/ruoyi/birdnest/service/impl/DocumentEmbeddingServiceImpl.java @@ -0,0 +1,88 @@ +package com.ruoyi.birdnest.service.impl; + +import java.util.ArrayList; +import java.util.List; + +import com.ruoyi.birdnest.domain.vo.DocumentEmbeddingVO; +import com.ruoyi.common.utils.StringUtils; +import dev.langchain4j.data.embedding.Embedding; +import dev.langchain4j.data.segment.TextSegment; +import dev.langchain4j.model.openai.OpenAiEmbeddingModel; +import dev.langchain4j.store.embedding.EmbeddingMatch; +import dev.langchain4j.store.embedding.EmbeddingSearchRequest; +import dev.langchain4j.store.embedding.EmbeddingSearchResult; +import dev.langchain4j.store.embedding.pgvector.PgVectorEmbeddingStore; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Service; +import com.ruoyi.birdnest.domain.DocumentEmbedding; +import com.ruoyi.birdnest.service.IDocumentEmbeddingService; + +/** + * @author chenhaojie + * @since 2025-03-05 09:12:17 + */ +@Service +public class DocumentEmbeddingServiceImpl implements IDocumentEmbeddingService +{ + @Autowired + private PgVectorEmbeddingStore pgVectorEmbeddingStore; + @Autowired + private OpenAiEmbeddingModel openAiEmbeddingModel; + + /** + * 查询【请填写功能名称】列表 + * + * @param documentEmbedding 【请填写功能名称】 + * @return 【请填写功能名称】 + */ + @Override + public List selectDocumentEmbeddingList(DocumentEmbedding documentEmbedding) + { + List documentEmbeddings = new ArrayList<>(); + String text = documentEmbedding.getText(); + + // 搜索相似的嵌入向量 + Embedding embedding = null; + if (StringUtils.isNotEmpty(text)) { + TextSegment textSegment = TextSegment.from(text); + // 生成文本的嵌入向量 + embedding = openAiEmbeddingModel.embed(textSegment).content(); + } else { + int embeddingSize = 1024; + float[] defaultVector = new float[embeddingSize]; + for (int i = 0; i < embeddingSize; i++) { + defaultVector[i] = 0.1f; // 将每个元素初始化为 0.1 + } + embedding = new Embedding(defaultVector); + } + int maxResults = 10; + + EmbeddingSearchRequest request = EmbeddingSearchRequest.builder() + .queryEmbedding(embedding) + .maxResults(maxResults) + .build(); + + EmbeddingSearchResult searchResult = pgVectorEmbeddingStore.search(request); + for (EmbeddingMatch embeddingMatch : searchResult.matches()) { + if (embeddingMatch != null) { + TextSegment textSegment1 = (TextSegment) embeddingMatch.embedded(); + DocumentEmbeddingVO resultDocumentEmbedding = new DocumentEmbeddingVO(embeddingMatch.embeddingId(), embeddingMatch.embedding().vector(), textSegment1.text(), textSegment1.metadata().getString("fileName"), textSegment1.metadata().getString("index")); + documentEmbeddings.add(resultDocumentEmbedding); + } + } + + return documentEmbeddings; + } + + /** + * 批量删除【请填写功能名称】 + * + * @param embeddingIds 需要删除的【请填写功能名称】主键 + * @return 结果 + */ + @Override + public void deleteDocumentEmbeddingByEmbeddingIds(String[] embeddingIds) + { + pgVectorEmbeddingStore.removeAll(List.of(embeddingIds)); + } +} diff --git a/web/ruoyi-admin/src/main/java/com/ruoyi/web/controller/birdnest/DocumentController.java b/web/ruoyi-admin/src/main/java/com/ruoyi/web/controller/birdnest/DocumentController.java index d372742..69093c9 100644 --- a/web/ruoyi-admin/src/main/java/com/ruoyi/web/controller/birdnest/DocumentController.java +++ b/web/ruoyi-admin/src/main/java/com/ruoyi/web/controller/birdnest/DocumentController.java @@ -4,6 +4,13 @@ import com.mongodb.client.gridfs.model.GridFSFile; import com.ruoyi.birdnest.domain.DocumentInfo; import com.ruoyi.birdnest.domain.PageResponse; import com.ruoyi.birdnest.service.DocumentService; +import dev.langchain4j.data.document.Document; +import dev.langchain4j.data.document.DocumentParser; +import dev.langchain4j.data.document.parser.TextDocumentParser; +import dev.langchain4j.data.embedding.Embedding; +import dev.langchain4j.data.segment.TextSegment; +import dev.langchain4j.model.openai.OpenAiEmbeddingModel; +import dev.langchain4j.store.embedding.pgvector.PgVectorEmbeddingStore; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.data.domain.Page; import org.springframework.data.domain.PageImpl; @@ -29,21 +36,23 @@ import java.nio.file.Path; import java.nio.file.Paths; import java.util.ArrayList; import java.util.List; +import dev.langchain4j.data.document.splitter.DocumentSplitters; + @RestController @RequestMapping("/documents") public class DocumentController { - @Autowired private GridFsTemplate gridFsTemplate; - @Autowired private MongoTemplate mongoTemplate; - - @Autowired private DocumentService documentService; + @Autowired + private OpenAiEmbeddingModel embeddingModel; + @Autowired + private PgVectorEmbeddingStore vectorStore; // 分片上传逻辑 @PostMapping("/upload/chunk") @@ -147,4 +156,36 @@ public class DocumentController { PageResponse response = documentService.findFilesByName(fileName, page, size); return ResponseEntity.ok(response); } + + + @PostMapping("/{fileName}/embedding") + public ResponseEntity documentEmbedding(@PathVariable String fileName) throws IOException { + Query query = new Query(Criteria.where("filename").is(fileName)); + GridFSFile gridFSFile = gridFsTemplate.findOne(query); + + if (gridFSFile == null) { + return ResponseEntity.status(HttpStatus.NOT_FOUND).body(null); + } + + GridFsResource resource = gridFsTemplate.getResource(gridFSFile); + + // 创建 Document 对象 + DocumentParser parser = new TextDocumentParser(); + Document document = parser.parse(resource.getInputStream()); + document.metadata().put("fileName", fileName); + // 使用基于句子的分段器,可根据需求选择其他分段器 + List segments = DocumentSplitters.recursive(1024, 256).split(document); + // 分批处理,每次处理 16 条记录 + int batchSize = 16; + for (int i = 0; i < segments.size(); i += batchSize) { + int endIndex = Math.min(i + batchSize, segments.size()); + List batchSegments = segments.subList(i, endIndex); + // 处理当前批次的记录 + List embeddings = embeddingModel.embedAll(batchSegments).content(); + vectorStore.addAll(embeddings, batchSegments); + } + + return ResponseEntity.ok() + .body(true); + } } \ No newline at end of file diff --git a/web/ruoyi-admin/src/main/java/com/ruoyi/web/controller/birdnest/DocumentEmbeddingController.java b/web/ruoyi-admin/src/main/java/com/ruoyi/web/controller/birdnest/DocumentEmbeddingController.java new file mode 100644 index 0000000..5f948fb --- /dev/null +++ b/web/ruoyi-admin/src/main/java/com/ruoyi/web/controller/birdnest/DocumentEmbeddingController.java @@ -0,0 +1,63 @@ +package com.ruoyi.web.controller.birdnest; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.ruoyi.birdnest.domain.DocumentEmbedding; +import com.ruoyi.birdnest.domain.vo.DocumentEmbeddingVO; +import com.ruoyi.birdnest.service.IDocumentEmbeddingService; +import com.ruoyi.common.core.page.TableDataInfo; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.data.domain.Page; +import org.springframework.http.ResponseEntity; +import org.springframework.web.bind.annotation.GetMapping; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RequestParam; +import org.springframework.web.bind.annotation.RestController; + +import java.util.List; + +import javax.servlet.http.HttpServletResponse; +import org.springframework.security.access.prepost.PreAuthorize; +import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.PutMapping; +import org.springframework.web.bind.annotation.DeleteMapping; +import org.springframework.web.bind.annotation.PathVariable; +import org.springframework.web.bind.annotation.RequestBody; +import com.ruoyi.common.annotation.Log; +import com.ruoyi.common.core.controller.BaseController; +import com.ruoyi.common.core.domain.AjaxResult; +import com.ruoyi.common.enums.BusinessType; +import com.ruoyi.common.utils.poi.ExcelUtil; + +/** + * @author chenhaojie + * @since 2025-03-05 09:12:17 + */ +@RestController +@RequestMapping("/documents/embeddings") +public class DocumentEmbeddingController extends BaseController{ + @Autowired + private IDocumentEmbeddingService documentEmbeddingService; + + /** + * 查询【请填写功能名称】列表 + */ + @PreAuthorize("@ss.hasPermi('system:embedding:list')") + @GetMapping("/list") + public TableDataInfo list(DocumentEmbedding documentEmbedding) throws JsonProcessingException { + startPage(); + List list = documentEmbeddingService.selectDocumentEmbeddingList(documentEmbedding); + return getDataTable(list); + } + + /** + * 删除【请填写功能名称】 + */ + @PreAuthorize("@ss.hasPermi('system:embedding:remove')") + @Log(title = "【请填写功能名称】", businessType = BusinessType.DELETE) + @DeleteMapping("/{embeddingIds}") + public AjaxResult remove(@PathVariable String[] embeddingIds) + { + documentEmbeddingService.deleteDocumentEmbeddingByEmbeddingIds(embeddingIds); + return toAjax(true); + } +} diff --git a/web/ruoyi-admin/src/main/resources/application.yml b/web/ruoyi-admin/src/main/resources/application.yml index c7c162d..ca9148f 100644 --- a/web/ruoyi-admin/src/main/resources/application.yml +++ b/web/ruoyi-admin/src/main/resources/application.yml @@ -135,3 +135,17 @@ data: mongodb: uri: mongodb://localhost:27017/birdnest database: birdnest + +embedding: + model: + apiKey: + baseUrl: + modelName: + store: + host: + port: + user: + password: + database: + table: + dimension: \ No newline at end of file diff --git a/web/ruoyi-framework/pom.xml b/web/ruoyi-framework/pom.xml index 882c17f..bc03d27 100644 --- a/web/ruoyi-framework/pom.xml +++ b/web/ruoyi-framework/pom.xml @@ -63,6 +63,31 @@ spring-data-mongodb + + + dev.langchain4j + langchain4j-open-ai + 1.0.0-beta1 + + + + dev.langchain4j + langchain4j-document-loader-amazon-s3 + 1.0.0-beta1 + + + + dev.langchain4j + langchain4j + 1.0.0-beta1 + + + dev.langchain4j + langchain4j-pgvector + 1.0.0-beta1 + + + \ No newline at end of file diff --git a/web/ruoyi-framework/src/main/java/com/ruoyi/framework/config/OpenAiEmbeddingModelConfig.java b/web/ruoyi-framework/src/main/java/com/ruoyi/framework/config/OpenAiEmbeddingModelConfig.java new file mode 100644 index 0000000..b15242b --- /dev/null +++ b/web/ruoyi-framework/src/main/java/com/ruoyi/framework/config/OpenAiEmbeddingModelConfig.java @@ -0,0 +1,33 @@ +package com.ruoyi.framework.config; + +import dev.langchain4j.model.openai.OpenAiEmbeddingModel; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; + +/** + * @author chenhaojie + * @since 2025-03-05 16:08:42 + */ +@Configuration +public class OpenAiEmbeddingModelConfig { + @Value("${embedding.model.apiKey}") + private String apiKey; + + @Value("${embedding.model.baseUrl}") + private String baseUrl; + + @Value("${embedding.model.modelName}") + private String modelName; + + @Bean + public OpenAiEmbeddingModel initOpenAiEmbeddingModel() { + OpenAiEmbeddingModel embeddingModel = OpenAiEmbeddingModel + .builder() + .apiKey(apiKey) + .baseUrl(baseUrl) + .modelName(modelName) + .build(); + return embeddingModel; + } +} diff --git a/web/ruoyi-framework/src/main/java/com/ruoyi/framework/config/PgVectorEmbeddingStoreConfig.java b/web/ruoyi-framework/src/main/java/com/ruoyi/framework/config/PgVectorEmbeddingStoreConfig.java new file mode 100644 index 0000000..75965b9 --- /dev/null +++ b/web/ruoyi-framework/src/main/java/com/ruoyi/framework/config/PgVectorEmbeddingStoreConfig.java @@ -0,0 +1,42 @@ +package com.ruoyi.framework.config; + +import dev.langchain4j.store.embedding.pgvector.PgVectorEmbeddingStore; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; + +/** + * @author chenhaojie + * @since 2025-03-05 15:56:43 + */ +@Configuration +public class PgVectorEmbeddingStoreConfig { + @Value("${embedding.store.host}") + private String host; + @Value("${embedding.store.port}") + private Integer port; + @Value("${embedding.store.user}") + private String user; + @Value("${embedding.store.password}") + private String password; + @Value("${embedding.store.database}") + private String database; + @Value("${embedding.store.table}") + private String table; + @Value("${embedding.store.dimension}") + private Integer dimension; + + @Bean + public PgVectorEmbeddingStore initPgVectorEmbeddingStore() { + PgVectorEmbeddingStore vectorStore = PgVectorEmbeddingStore.builder() + .host(host) + .port(port) + .user(user) + .password(password) + .database(database) + .table(table) + .dimension(dimension) + .build(); + return vectorStore; + } +} -- Gitee