diff --git a/content/docs-lite/zh/docs/DataVec/DataVec-integrations.md b/content/docs-lite/zh/docs/DataVec/DataVec-integrations.md index 9345bc5a4ece15fe1654136a2d4ac8334822cfeb..dc98a9e63988bf59cbab1634bcfb488b8d0b4d48 100644 --- a/content/docs-lite/zh/docs/DataVec/DataVec-integrations.md +++ b/content/docs-lite/zh/docs/DataVec/DataVec-integrations.md @@ -13,7 +13,7 @@ openGauss DataVec提供多种第三方组件的集成教程,并通过多语言 ## API Reference - [Python](https://gitee.com/opengauss/openGauss-connector-python-psycopg2) -- [Java](https://gitee.com/opengauss/openGauss-connector-jdbc) +- [Java](integrationJava.md) - [Node.js](https://gitee.com/opengauss/openGauss-connector-nodejs) - [Go](https://gitee.com/opengauss/openGauss-connector-go-pq) diff --git a/content/docs-lite/zh/docs/DataVec/DataVec-tutorials.md b/content/docs-lite/zh/docs/DataVec/DataVec-tutorials.md index d5852bed20621a699e3f0434bdc729a8ab8ac819..fdf3cc46fc0a8a80687b324ef900b27759ab7af1 100644 --- a/content/docs-lite/zh/docs/DataVec/DataVec-tutorials.md +++ b/content/docs-lite/zh/docs/DataVec/DataVec-tutorials.md @@ -4,4 +4,4 @@ - [打破AI黑盒,拥抱开源力量:基于openGauss+DeepSeek的本地知识库,打造你的专属AI助手!](openGauss-RAG实践.md) - [openGauss DataVec + Dify, 快速搭建你的智能助手平台](openGauss-Dify.md) -- SpringBoot 集成 openGauss DataVec, 智能问答一站式开发 +- [SpringBoot 集成 openGauss DataVec, 实现向量化检索](openGauss-Springboot.md) diff --git a/content/docs-lite/zh/docs/DataVec/figures/opgs-springboot.png b/content/docs-lite/zh/docs/DataVec/figures/opgs-springboot.png new file mode 100644 index 0000000000000000000000000000000000000000..8c6ea80b3f041c84bcaf9a8f6b727a9e2169d369 Binary files /dev/null and b/content/docs-lite/zh/docs/DataVec/figures/opgs-springboot.png differ diff --git a/content/docs-lite/zh/docs/DataVec/integrationJava.md b/content/docs-lite/zh/docs/DataVec/integrationJava.md new file mode 100644 index 0000000000000000000000000000000000000000..d653097d3619e2da48c002ea43de6135667d3538 --- /dev/null +++ b/content/docs-lite/zh/docs/DataVec/integrationJava.md @@ -0,0 +1,134 @@ +# Java SDK对接向量数据库 +本文介绍如何使用Java语言调用openGauss向量数据库 + +## 要求 +- 安装java1.8及以上版本 +- Apache Maven + +## 安装SDK +开发者可以直接从maven中央仓库中获取jar包[maven中央仓库下载](https://central.sonatype.com/artifact/org.opengauss/opengauss-jdbc),也可以在openGauss官网下载[社区官网下载](https://opengauss.org/zh/download/),运行以下命令安装Java SDK +```xml + + org.opengauss + opengauss-jdbc + your version + +``` +## 基本操作 +### 1.连接数据库 +```java +public Connection getConnection(String username, String passwd) +{ + String driver = "org.opengauss.Driver"; + String sourceURL = "jdbc:opengauss://localhost:port/database_name"; + Connection conn = null; + + try { + Class.forName(driver).getDeclaredConstructor().newInstance(); + } catch(Exception e) { + e.printStackTrace(); + return null; + } + try { + conn = DriverManager.getConnection(sourceURL, username, passwd); + System.out.println("Connection succeed!"); + } catch(Exception e) { + e.printStackTrace(); + return null; + } + return conn; +} +``` +### 2.创建表 +```java +// 执行普通SQL语句 +public void ExecuteSQL(Connection conn, String sql) +{ + Statement stmt = null; + try { + stmt = conn.createStatement(); + int rc = stmt.executeUpdate(sql); + stmt.close(); + } catch (SQLException e) { + if (stmt != null) { + try { + stmt.close(); + } catch (SQLException e1) { + e1.printStackTrace(); + } + } + e.printStackTrace(); + } +} + +public void CreateTable(int dim) +{ + String sql = String.format("CREATE TABLE IF NOT EXISTS demotable(id INTEGER, content TEXT, embedding vector(%d));", dim); + ExecuteSQL(sql); +} +``` +### 3.创建索引 +```java +// 用L2距离创建HNSW类型的向量索引 +public void CreateIndex() +{ + String sql = String.format("CREATE INDEX ON demotable USING hnsw (embedding vector_l2_ops);"); + ExecuteSQL(sql); +} +``` +### 4.插入/删除/更新 +- 插入 + ```java +public void InsertDataSingle(int id, String content, String vector) +{ + String sql = String.format("INSERT INTO demotable VALUES(%d, '%s', '%s');", id, content, vector); + ExecuteSQL(sql); +} +``` +- 删除 +```java +public void DeleteData() +{ + String sql = String.format("DELETE FROM demotable where id > 10;"); + ExecuteSQL(sql); +} +``` +- 更新 +```java +public void UpdateData(String vector) +{ + String sql = String.format("UPDATE demotable set embedding = '%s' where id = 10;"); + ExecuteSQL(sql); +} +``` +### 5.查询 +```java +public String findNearestVectors(Connection conn, int efsearch, String vector, int topK) +{ + Statement statement = null; + ResultSet resultSet = null; + String res = ""; + // 设置查询参数 + String paramsql = String.format("set hnsw_ef_search = %d;", efsearch); + ExecuteSQL(paramsql); + String querysql = String.format("SELECT * FROM demotable ORDER BY embedding <-> '%s' LIMIT %d;", vector, topK); + try { + statement = conn.createStatement(); + resultSet = statement.executeQuery(querysql); + while (resultSet.next()) { + int id = resultSet.getInt("id"); + String content = resultSet.getString("content"); + Object embed = resultSet.getObject("embedding"); + // 替换成你希望的结果 + res += "id: " + id + ", content: " + content + ",embedding: " + embed + "\n"; + } + } catch (Exception e) { + e.printStackTrace(); + } finally { + try { if (resultSet != null) resultSet.close(); } catch(Exception e) {} + try { if (statement != null) statement.close(); } catch(Exception e) {} + } + return res; +} +``` +[更多操作示例参考](https://gitee.com/opengauss/openGauss-connector-jdbc) \ No newline at end of file diff --git a/content/docs-lite/zh/docs/DataVec/openGauss-Springboot.md b/content/docs-lite/zh/docs/DataVec/openGauss-Springboot.md new file mode 100644 index 0000000000000000000000000000000000000000..62aa6029285b660e64ff0e2ca14668472b299482 --- /dev/null +++ b/content/docs-lite/zh/docs/DataVec/openGauss-Springboot.md @@ -0,0 +1,200 @@ +# Spring Boot集成openGauss DataVec实现向量化检索 +本文将介绍如何在 Spring Boot 框架中集成 openGauss DataVec 向量数据库,并调用 Ollama 服务提供的 embedding 服务,以此高效实现数据的向量化存储与检索,为 RAG(检索增强生成)提供助力。 +## 要求 +- 安装java1.8及以上版本 +- Spring Boot 3.X及以上版本 +- Ollama服务安装部署 [部署参考](https://github.com/ollama/ollama) +- openGauss数据库安装部署 [容器镜像安装](../InstallationGuide/容器镜像安装.md) + +# 添加Maven依赖 +在pom.xml中添加openGauss jdbc和ollama sdk依赖 +```xml + + org.opengauss + opengauss-jdbc + 6.0.1 + + + org.springframework.ai + spring-ai-ollama-spring-boot-starter + 1.0.0.M2 + +``` +# 配置yml文件 +在application.properities文件中配置相应的数据信息 +``` +server.port=8088 +spring.application.name=your_project_name + +spring.datasource.url=jdbc:opengauss://localhost:port/database_name +spring.datasource.username=username +spring.datasource.password=password +spring.datasource.driver-class-name=org.opengauss.Driver + +ollama.model=nomic-embed-text:latest // 你选择的embedding模型 +ollama.modelDim=768 // 向量化后数据的维度 +ollama.embeddingURL=ip:port // ollama服务运行的服务器ip和port +``` +![流程图](figures/opgs-springboot.png) +# 向量数据库配置和操作类 +- 向量数据库的配置类,获取服务地址、用户名、密码等,并[建立连接](integrationJava.md) +```java +@Configuration +public class opgsConfig { + @Value("${spring.datasource.url}") + private String url; + + @Value("${spring.datasource.username}") + private String username; + + @Value("${spring.datasource.password}") + private String password; + + @Value("${spring.datasource.driver-class-name}") + private String driver; + + public Connection getConnection() { + // 连接数据库 + } +} +``` +- 向量数据库的操作类,与数据库交互,完成增删改查,表、向量索引的创建等 [示例参考](integrationJava.md) +```java +@Repository +public class Repository { + private Connection conn; + + public void CreateTable(int dim) + { + ... + } + + public void CreateIndex() + { + ... + } + + public void InsertDataSingle(int id, String content, String vector) + { + ... + } + + public String findNearestVectors(int efsearch, String vector, int topK) + { + ... + } + ... +} +``` +# Service层 +调用Ollama服务将Controller层传递过来的原始数据embedding,并调用操作类中封装的API实现数据访问 +```java +@Service +public class Service { + private final Repository repository; + + @Value("${ollama.modelDim}") + private int vectorDim; + + @Value("${ollama.embeddingURL}") + private String embeddingURL; + + @Value("${ollama.model}") + private String ollamaModel; + + + // 调取Ollama服务embedding + public float[] getEmbedding(String message) + { + OllamaApi ollamaApi = new OllamaApi(embeddingURL); + OllamaOptions options = OllamaOptions.builder().withModel(ollamaModel).build(); + OllamaEmbeddingModel embeddingModel = new OllamaEmbeddingModel(ollamaApi, options); + EmbeddingResponse embeddingResponse = embeddingModel.call(new EmbeddingRequest(List.of(message), options)); + return embeddingResponse.getResult().getOutput(); + } + + // 调取Repository类与向量数据库交互的API + public void CreateTxtTable() + { + repository.CreateTable(vectorDim); + } + + public void InsertTuples(int id, String message) + { + float[] res = getEmbedding(message); + repository.InsertDataSingle(id, message, Arrays.toString(res)); + } + + public void IndexTxt() + { + repository.CreateIndex(); + } + + public String QueryContent(int efsearch, String query, int topK) + { + float[] res = getEmbedding(query); + return repository.findNearestVectors(efsearch, Arrays.toString(res), int topK); + } + ... +} +``` + +# Controller层 +```java +@RestController +public class Controller { + @Autowired + private Service service; + + @GetMapping("/index") + public String IndexDoc() + { + service.CreateTxtTable(); + service.InsertTuples(0, "大规模预训练语言模型 高效并行训练 支持多种NLP任务"); + service.InsertTuples(1, "多模态融合模型 结合文本、图像和音频输入 提供全面的数据理解能力"); + service.InsertTuples(2, "分布式深度学习框架 易于扩展 支持大规模数据处理"); + service.InsertTuples(3, "视频理解与生成模型 先进的时间序列分析技术 适用于监控和娱乐"); + service.InsertTuples(4, "超高分辨率图像生成模型 GAN架构 强大的细节捕捉能力"); + service.IndexTxt(); + return "embedding and index succeed!" + + } + + @GetMapping("/queryVector") + public String queryVector() + { + String query = "适合高效并行训练的大语言模型有哪些"; + int topK = 3; + String res = service.QueryContent(2, query, topK); + System.out.println(res); + return res; + } + +} +``` + +# 结果展示 +- 网页中输入localhost:8088/index完成文本的embedding和索引创建 + +页面会返回如下结果,用户可根据结果自定义前端页面 +``` +embedding and index succeed! +``` +- 网页中输入localhost:8088/queryVector得到json格式的查询结果 + +``` +[ + { + "id": 0, + "content": "大规模预训练语言模型 高效并行训练 支持多种NLP任务" + }, + { + "id": 4, + "content": "超高分辨率图像生成模型 GAN架构 强大的细节捕捉能力" + }, + { + "id": 2, + "content": "分布式深度学习框架 易于扩展 支持大规模数据处理" + }, +] +``` \ No newline at end of file diff --git a/content/zh/docs/DataVec/DataVec-integrations.md b/content/zh/docs/DataVec/DataVec-integrations.md index 9345bc5a4ece15fe1654136a2d4ac8334822cfeb..dc98a9e63988bf59cbab1634bcfb488b8d0b4d48 100644 --- a/content/zh/docs/DataVec/DataVec-integrations.md +++ b/content/zh/docs/DataVec/DataVec-integrations.md @@ -13,7 +13,7 @@ openGauss DataVec提供多种第三方组件的集成教程,并通过多语言 ## API Reference - [Python](https://gitee.com/opengauss/openGauss-connector-python-psycopg2) -- [Java](https://gitee.com/opengauss/openGauss-connector-jdbc) +- [Java](integrationJava.md) - [Node.js](https://gitee.com/opengauss/openGauss-connector-nodejs) - [Go](https://gitee.com/opengauss/openGauss-connector-go-pq) diff --git a/content/zh/docs/DataVec/DataVec-tutorials.md b/content/zh/docs/DataVec/DataVec-tutorials.md index d5852bed20621a699e3f0434bdc729a8ab8ac819..fdf3cc46fc0a8a80687b324ef900b27759ab7af1 100644 --- a/content/zh/docs/DataVec/DataVec-tutorials.md +++ b/content/zh/docs/DataVec/DataVec-tutorials.md @@ -4,4 +4,4 @@ - [打破AI黑盒,拥抱开源力量:基于openGauss+DeepSeek的本地知识库,打造你的专属AI助手!](openGauss-RAG实践.md) - [openGauss DataVec + Dify, 快速搭建你的智能助手平台](openGauss-Dify.md) -- SpringBoot 集成 openGauss DataVec, 智能问答一站式开发 +- [SpringBoot 集成 openGauss DataVec, 实现向量化检索](openGauss-Springboot.md) diff --git a/content/zh/docs/DataVec/figures/opgs-springboot.png b/content/zh/docs/DataVec/figures/opgs-springboot.png new file mode 100644 index 0000000000000000000000000000000000000000..8c6ea80b3f041c84bcaf9a8f6b727a9e2169d369 Binary files /dev/null and b/content/zh/docs/DataVec/figures/opgs-springboot.png differ diff --git a/content/zh/docs/DataVec/integrationJava.md b/content/zh/docs/DataVec/integrationJava.md new file mode 100644 index 0000000000000000000000000000000000000000..d653097d3619e2da48c002ea43de6135667d3538 --- /dev/null +++ b/content/zh/docs/DataVec/integrationJava.md @@ -0,0 +1,134 @@ +# Java SDK对接向量数据库 +本文介绍如何使用Java语言调用openGauss向量数据库 + +## 要求 +- 安装java1.8及以上版本 +- Apache Maven + +## 安装SDK +开发者可以直接从maven中央仓库中获取jar包[maven中央仓库下载](https://central.sonatype.com/artifact/org.opengauss/opengauss-jdbc),也可以在openGauss官网下载[社区官网下载](https://opengauss.org/zh/download/),运行以下命令安装Java SDK +```xml + + org.opengauss + opengauss-jdbc + your version + +``` +## 基本操作 +### 1.连接数据库 +```java +public Connection getConnection(String username, String passwd) +{ + String driver = "org.opengauss.Driver"; + String sourceURL = "jdbc:opengauss://localhost:port/database_name"; + Connection conn = null; + + try { + Class.forName(driver).getDeclaredConstructor().newInstance(); + } catch(Exception e) { + e.printStackTrace(); + return null; + } + try { + conn = DriverManager.getConnection(sourceURL, username, passwd); + System.out.println("Connection succeed!"); + } catch(Exception e) { + e.printStackTrace(); + return null; + } + return conn; +} +``` +### 2.创建表 +```java +// 执行普通SQL语句 +public void ExecuteSQL(Connection conn, String sql) +{ + Statement stmt = null; + try { + stmt = conn.createStatement(); + int rc = stmt.executeUpdate(sql); + stmt.close(); + } catch (SQLException e) { + if (stmt != null) { + try { + stmt.close(); + } catch (SQLException e1) { + e1.printStackTrace(); + } + } + e.printStackTrace(); + } +} + +public void CreateTable(int dim) +{ + String sql = String.format("CREATE TABLE IF NOT EXISTS demotable(id INTEGER, content TEXT, embedding vector(%d));", dim); + ExecuteSQL(sql); +} +``` +### 3.创建索引 +```java +// 用L2距离创建HNSW类型的向量索引 +public void CreateIndex() +{ + String sql = String.format("CREATE INDEX ON demotable USING hnsw (embedding vector_l2_ops);"); + ExecuteSQL(sql); +} +``` +### 4.插入/删除/更新 +- 插入 + ```java +public void InsertDataSingle(int id, String content, String vector) +{ + String sql = String.format("INSERT INTO demotable VALUES(%d, '%s', '%s');", id, content, vector); + ExecuteSQL(sql); +} +``` +- 删除 +```java +public void DeleteData() +{ + String sql = String.format("DELETE FROM demotable where id > 10;"); + ExecuteSQL(sql); +} +``` +- 更新 +```java +public void UpdateData(String vector) +{ + String sql = String.format("UPDATE demotable set embedding = '%s' where id = 10;"); + ExecuteSQL(sql); +} +``` +### 5.查询 +```java +public String findNearestVectors(Connection conn, int efsearch, String vector, int topK) +{ + Statement statement = null; + ResultSet resultSet = null; + String res = ""; + // 设置查询参数 + String paramsql = String.format("set hnsw_ef_search = %d;", efsearch); + ExecuteSQL(paramsql); + String querysql = String.format("SELECT * FROM demotable ORDER BY embedding <-> '%s' LIMIT %d;", vector, topK); + try { + statement = conn.createStatement(); + resultSet = statement.executeQuery(querysql); + while (resultSet.next()) { + int id = resultSet.getInt("id"); + String content = resultSet.getString("content"); + Object embed = resultSet.getObject("embedding"); + // 替换成你希望的结果 + res += "id: " + id + ", content: " + content + ",embedding: " + embed + "\n"; + } + } catch (Exception e) { + e.printStackTrace(); + } finally { + try { if (resultSet != null) resultSet.close(); } catch(Exception e) {} + try { if (statement != null) statement.close(); } catch(Exception e) {} + } + return res; +} +``` +[更多操作示例参考](https://gitee.com/opengauss/openGauss-connector-jdbc) \ No newline at end of file diff --git a/content/zh/docs/DataVec/openGauss-Springboot.md b/content/zh/docs/DataVec/openGauss-Springboot.md new file mode 100644 index 0000000000000000000000000000000000000000..62aa6029285b660e64ff0e2ca14668472b299482 --- /dev/null +++ b/content/zh/docs/DataVec/openGauss-Springboot.md @@ -0,0 +1,200 @@ +# Spring Boot集成openGauss DataVec实现向量化检索 +本文将介绍如何在 Spring Boot 框架中集成 openGauss DataVec 向量数据库,并调用 Ollama 服务提供的 embedding 服务,以此高效实现数据的向量化存储与检索,为 RAG(检索增强生成)提供助力。 +## 要求 +- 安装java1.8及以上版本 +- Spring Boot 3.X及以上版本 +- Ollama服务安装部署 [部署参考](https://github.com/ollama/ollama) +- openGauss数据库安装部署 [容器镜像安装](../InstallationGuide/容器镜像安装.md) + +# 添加Maven依赖 +在pom.xml中添加openGauss jdbc和ollama sdk依赖 +```xml + + org.opengauss + opengauss-jdbc + 6.0.1 + + + org.springframework.ai + spring-ai-ollama-spring-boot-starter + 1.0.0.M2 + +``` +# 配置yml文件 +在application.properities文件中配置相应的数据信息 +``` +server.port=8088 +spring.application.name=your_project_name + +spring.datasource.url=jdbc:opengauss://localhost:port/database_name +spring.datasource.username=username +spring.datasource.password=password +spring.datasource.driver-class-name=org.opengauss.Driver + +ollama.model=nomic-embed-text:latest // 你选择的embedding模型 +ollama.modelDim=768 // 向量化后数据的维度 +ollama.embeddingURL=ip:port // ollama服务运行的服务器ip和port +``` +![流程图](figures/opgs-springboot.png) +# 向量数据库配置和操作类 +- 向量数据库的配置类,获取服务地址、用户名、密码等,并[建立连接](integrationJava.md) +```java +@Configuration +public class opgsConfig { + @Value("${spring.datasource.url}") + private String url; + + @Value("${spring.datasource.username}") + private String username; + + @Value("${spring.datasource.password}") + private String password; + + @Value("${spring.datasource.driver-class-name}") + private String driver; + + public Connection getConnection() { + // 连接数据库 + } +} +``` +- 向量数据库的操作类,与数据库交互,完成增删改查,表、向量索引的创建等 [示例参考](integrationJava.md) +```java +@Repository +public class Repository { + private Connection conn; + + public void CreateTable(int dim) + { + ... + } + + public void CreateIndex() + { + ... + } + + public void InsertDataSingle(int id, String content, String vector) + { + ... + } + + public String findNearestVectors(int efsearch, String vector, int topK) + { + ... + } + ... +} +``` +# Service层 +调用Ollama服务将Controller层传递过来的原始数据embedding,并调用操作类中封装的API实现数据访问 +```java +@Service +public class Service { + private final Repository repository; + + @Value("${ollama.modelDim}") + private int vectorDim; + + @Value("${ollama.embeddingURL}") + private String embeddingURL; + + @Value("${ollama.model}") + private String ollamaModel; + + + // 调取Ollama服务embedding + public float[] getEmbedding(String message) + { + OllamaApi ollamaApi = new OllamaApi(embeddingURL); + OllamaOptions options = OllamaOptions.builder().withModel(ollamaModel).build(); + OllamaEmbeddingModel embeddingModel = new OllamaEmbeddingModel(ollamaApi, options); + EmbeddingResponse embeddingResponse = embeddingModel.call(new EmbeddingRequest(List.of(message), options)); + return embeddingResponse.getResult().getOutput(); + } + + // 调取Repository类与向量数据库交互的API + public void CreateTxtTable() + { + repository.CreateTable(vectorDim); + } + + public void InsertTuples(int id, String message) + { + float[] res = getEmbedding(message); + repository.InsertDataSingle(id, message, Arrays.toString(res)); + } + + public void IndexTxt() + { + repository.CreateIndex(); + } + + public String QueryContent(int efsearch, String query, int topK) + { + float[] res = getEmbedding(query); + return repository.findNearestVectors(efsearch, Arrays.toString(res), int topK); + } + ... +} +``` + +# Controller层 +```java +@RestController +public class Controller { + @Autowired + private Service service; + + @GetMapping("/index") + public String IndexDoc() + { + service.CreateTxtTable(); + service.InsertTuples(0, "大规模预训练语言模型 高效并行训练 支持多种NLP任务"); + service.InsertTuples(1, "多模态融合模型 结合文本、图像和音频输入 提供全面的数据理解能力"); + service.InsertTuples(2, "分布式深度学习框架 易于扩展 支持大规模数据处理"); + service.InsertTuples(3, "视频理解与生成模型 先进的时间序列分析技术 适用于监控和娱乐"); + service.InsertTuples(4, "超高分辨率图像生成模型 GAN架构 强大的细节捕捉能力"); + service.IndexTxt(); + return "embedding and index succeed!" + + } + + @GetMapping("/queryVector") + public String queryVector() + { + String query = "适合高效并行训练的大语言模型有哪些"; + int topK = 3; + String res = service.QueryContent(2, query, topK); + System.out.println(res); + return res; + } + +} +``` + +# 结果展示 +- 网页中输入localhost:8088/index完成文本的embedding和索引创建 + +页面会返回如下结果,用户可根据结果自定义前端页面 +``` +embedding and index succeed! +``` +- 网页中输入localhost:8088/queryVector得到json格式的查询结果 + +``` +[ + { + "id": 0, + "content": "大规模预训练语言模型 高效并行训练 支持多种NLP任务" + }, + { + "id": 4, + "content": "超高分辨率图像生成模型 GAN架构 强大的细节捕捉能力" + }, + { + "id": 2, + "content": "分布式深度学习框架 易于扩展 支持大规模数据处理" + }, +] +``` \ No newline at end of file