diff --git a/doc/dev_2.0.0.sql b/doc/dev_2.0.0.sql new file mode 100644 index 0000000..b995af9 --- /dev/null +++ b/doc/dev_2.0.0.sql @@ -0,0 +1 @@ +-- 新增RAG搜索的SQL \ No newline at end of file diff --git a/know_sub_business/src/main/resources/application.yml b/know_sub_business/src/main/resources/application.yml index 99b2b45..71ab810 100644 --- a/know_sub_business/src/main/resources/application.yml +++ b/know_sub_business/src/main/resources/application.yml @@ -12,8 +12,6 @@ server: # 是否分配的直接内存 direct-buffers: true spring: - elasticsearch: - uris: http://192.168.10.137:9200 main: allow-bean-definition-overriding: true servlet: @@ -77,11 +75,4 @@ knife4j: user: # 默认密码 default: - password: 123456 - -#spring: -# elasticsearch: -# uris: http://192.168.10.137:9200 - -embedding: - url: http://192.168.10.137:8711/embeddings/ \ No newline at end of file + password: 123456 \ No newline at end of file diff --git a/know_sub_etl/pom.xml b/know_sub_etl/pom.xml new file mode 100644 index 0000000..59b50ec --- /dev/null +++ b/know_sub_etl/pom.xml @@ -0,0 +1,154 @@ + + + 4.0.0 + + com.supervision + know_sub + 1.0.0 + + + know_sub_etl + + + 17 + 17 + UTF-8 + + + + + + org.springframework.ai + spring-ai-tika-document-reader + + + + org.apache.httpcomponents + httpclient + 4.5.13 + + + + + + + + + org.springframework.ai + spring-ai-elasticsearch-store + + + + org.springframework.boot + spring-boot-starter-data-elasticsearch + + + + org.springframework.boot + spring-boot-configuration-processor + + + + org.springframework.boot + spring-boot-starter + + + + org.springframework.boot + spring-boot-starter-test + test + + + + com.supervision + know_sub_common + 1.0.0 + + + + com.supervision + know_sub_model + 1.0.0 + + + + org.projectlombok + lombok + provided + + + + + + + + central + aliyun central repo + https://maven.aliyun.com/nexus/content/repositories/central/ + default + + true + daily + + + false + never + + + + spring-snapshots + Spring Snapshots + https://repo.spring.io/snapshot + + false + + + + spring-milestones + Spring Milestones + https://repo.spring.io/milestone + + false + + + + + + + + + + + org.springframework.boot + spring-boot-maven-plugin + 3.3.0 + + + + repackage + + + + + + + org.projectlombok + lombok + + + + + + org.apache.maven.plugins + maven-compiler-plugin + 3.11.0 + + true + + + + + + \ No newline at end of file diff --git a/know_sub_etl/src/main/java/som/supervision/knowsub/KnowSubEtlApplication.java b/know_sub_etl/src/main/java/som/supervision/knowsub/KnowSubEtlApplication.java new file mode 100644 index 0000000..766e5ee --- /dev/null +++ b/know_sub_etl/src/main/java/som/supervision/knowsub/KnowSubEtlApplication.java @@ -0,0 +1,18 @@ +package som.supervision.knowsub; + +import org.mybatis.spring.annotation.MapperScan; +import org.springframework.boot.SpringApplication; +import org.springframework.boot.autoconfigure.SpringBootApplication; +import org.springframework.boot.context.properties.EnableConfigurationProperties; +import org.springframework.scheduling.annotation.EnableScheduling; + +@SpringBootApplication +@EnableScheduling +@MapperScan(basePackages = {"com.supervision.**.mapper"}) +@EnableConfigurationProperties +public class KnowSubEtlApplication { + + public static void main(String[] args) { + SpringApplication.run(KnowSubEtlApplication.class, args); + } +} diff --git a/know_sub_etl/src/main/java/som/supervision/knowsub/config/ElasticsearchVectorStoreConfig.java b/know_sub_etl/src/main/java/som/supervision/knowsub/config/ElasticsearchVectorStoreConfig.java new file mode 100644 index 0000000..8e62217 --- /dev/null +++ b/know_sub_etl/src/main/java/som/supervision/knowsub/config/ElasticsearchVectorStoreConfig.java @@ -0,0 +1,33 @@ +package som.supervision.knowsub.config; + +import org.elasticsearch.client.RestClient; +import org.springframework.ai.embedding.EmbeddingModel; +import org.springframework.ai.vectorstore.ElasticsearchVectorStore; +import org.springframework.ai.vectorstore.ElasticsearchVectorStoreOptions; +import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty; +import org.springframework.boot.context.properties.EnableConfigurationProperties; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +import org.springframework.util.Assert; + +@Configuration +@EnableConfigurationProperties(EmbeddingProperties.class) +public class ElasticsearchVectorStoreConfig { + + @Bean + @ConditionalOnProperty(prefix = "embedding", name = "url") + public EmbeddingModel embeddingModel(EmbeddingProperties embeddingProperties) { + Assert.notNull(embeddingProperties.getUrl(), "配置文件embedding:url未找到"); + return new VectorEmbeddingModel(embeddingProperties.getUrl()); + } + + @Bean + @ConditionalOnProperty(prefix = "embedding", name = "url") + public ElasticsearchVectorStore vectorStore(EmbeddingModel embeddingModel, RestClient restClient) { + ElasticsearchVectorStoreOptions options = new ElasticsearchVectorStoreOptions(); + options.setIndexName("know-sub-rag-store"); + options.setDimensions(1024); + return new ElasticsearchVectorStore(options, restClient, embeddingModel, true); + } + +} diff --git a/know_sub_etl/src/main/java/som/supervision/knowsub/config/EmbeddingProperties.java b/know_sub_etl/src/main/java/som/supervision/knowsub/config/EmbeddingProperties.java new file mode 100644 index 0000000..f9a5839 --- /dev/null +++ b/know_sub_etl/src/main/java/som/supervision/knowsub/config/EmbeddingProperties.java @@ -0,0 +1,12 @@ +package som.supervision.knowsub.config; + +import lombok.Data; +import org.springframework.boot.context.properties.ConfigurationProperties; + +@Data +@ConfigurationProperties(prefix = "embedding") +public class EmbeddingProperties { + + private String url; + +} diff --git a/know_sub_etl/src/main/java/som/supervision/knowsub/config/VectorEmbeddingModel.java b/know_sub_etl/src/main/java/som/supervision/knowsub/config/VectorEmbeddingModel.java new file mode 100644 index 0000000..cc7d578 --- /dev/null +++ b/know_sub_etl/src/main/java/som/supervision/knowsub/config/VectorEmbeddingModel.java @@ -0,0 +1,57 @@ +package som.supervision.knowsub.config; + +import cn.hutool.http.HttpUtil; +import cn.hutool.json.JSONUtil; +import lombok.Data; +import lombok.extern.slf4j.Slf4j; +import org.springframework.ai.document.Document; +import org.springframework.ai.embedding.*; +import org.springframework.util.Assert; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.concurrent.atomic.AtomicInteger; + +@Slf4j +public class VectorEmbeddingModel implements EmbeddingModel { + + private final String embeddingUrl; + + public VectorEmbeddingModel(String embeddingUrl) { + this.embeddingUrl = embeddingUrl; + } + + @Override + public List embed(Document document) { + List> list = this.call(new EmbeddingRequest(List.of(document.getContent()), EmbeddingOptions.EMPTY)) + .getResults() + .stream() + .map(Embedding::getOutput) + .toList(); + return list.iterator().next(); + } + + @Override + public EmbeddingResponse call(EmbeddingRequest request) { + Assert.notEmpty(request.getInstructions(), "At least one text is required!"); + List> embeddingList = new ArrayList<>(); + + for (String inputContent : request.getInstructions()) { + // 这里需要吧inputContent转化为向量数据 + String post = HttpUtil.post(embeddingUrl, JSONUtil.toJsonStr(Map.of("text", inputContent))); + EmbeddingData bean = JSONUtil.toBean(post, EmbeddingData.class); + embeddingList.add(bean.embeddings); + } + var indexCounter = new AtomicInteger(0); + List embeddings = embeddingList.stream() + .map(e -> new Embedding(e, indexCounter.getAndIncrement())) + .toList(); + return new EmbeddingResponse(embeddings); + } + + @Data + private static class EmbeddingData { + private List embeddings; + } +} diff --git a/know_sub_etl/src/main/java/som/supervision/knowsub/controller/KnowledgeEtlController.java b/know_sub_etl/src/main/java/som/supervision/knowsub/controller/KnowledgeEtlController.java new file mode 100644 index 0000000..23ed8a4 --- /dev/null +++ b/know_sub_etl/src/main/java/som/supervision/knowsub/controller/KnowledgeEtlController.java @@ -0,0 +1,27 @@ +package som.supervision.knowsub.controller; + +import io.swagger.v3.oas.annotations.Operation; +import io.swagger.v3.oas.annotations.tags.Tag; +import lombok.RequiredArgsConstructor; +import org.apache.ibatis.annotations.Param; +import org.springframework.web.bind.annotation.*; +import org.springframework.web.multipart.MultipartFile; +import som.supervision.knowsub.service.KnowledgeEtlService; + +import java.io.IOException; + +@Tag(name = "知识ETL类") +@RestController +@RequestMapping("etl") +@RequiredArgsConstructor +public class KnowledgeEtlController { + + private final KnowledgeEtlService knowledgeEtlService; + + @Operation(summary = "对知识进行ETL") + @PostMapping("knowledgeEtl") + public void knowledgeEtl(@RequestParam("files") MultipartFile[] files) { + knowledgeEtlService.knowledgeEtl(files); + } + +} diff --git a/know_sub_etl/src/main/java/som/supervision/knowsub/service/KnowledgeEtlService.java b/know_sub_etl/src/main/java/som/supervision/knowsub/service/KnowledgeEtlService.java new file mode 100644 index 0000000..96d3ab3 --- /dev/null +++ b/know_sub_etl/src/main/java/som/supervision/knowsub/service/KnowledgeEtlService.java @@ -0,0 +1,12 @@ +package som.supervision.knowsub.service; + +import org.springframework.web.bind.annotation.RequestParam; +import org.springframework.web.multipart.MultipartFile; + +import java.io.IOException; + +public interface KnowledgeEtlService { + + void knowledgeEtl(MultipartFile[] files); + +} diff --git a/know_sub_etl/src/main/java/som/supervision/knowsub/service/impl/KnowledgeEtlServiceImpl.java b/know_sub_etl/src/main/java/som/supervision/knowsub/service/impl/KnowledgeEtlServiceImpl.java new file mode 100644 index 0000000..a5824f5 --- /dev/null +++ b/know_sub_etl/src/main/java/som/supervision/knowsub/service/impl/KnowledgeEtlServiceImpl.java @@ -0,0 +1,66 @@ +package som.supervision.knowsub.service.impl; + +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import org.springframework.ai.document.Document; +import org.springframework.ai.reader.tika.TikaDocumentReader; +import org.springframework.ai.transformer.splitter.TokenTextSplitter; +import org.springframework.ai.vectorstore.ElasticsearchVectorStore; +import org.springframework.core.io.InputStreamResource; +import org.springframework.stereotype.Service; +import org.springframework.web.multipart.MultipartFile; +import som.supervision.knowsub.service.KnowledgeEtlService; + +import java.io.InputStream; +import java.util.List; +import java.util.concurrent.atomic.AtomicInteger; + +@Slf4j +@Service +@RequiredArgsConstructor +public class KnowledgeEtlServiceImpl implements KnowledgeEtlService { + + private final ElasticsearchVectorStore elasticsearchVectorStore; + + /** + * 参考文档 + * + * @param inputStream 输入流 + */ + private void loadFile(InputStream inputStream, String fileName) { + // 首先使用tika进行文件切分操作 + log.info("{} 进行内容切分", fileName); + TikaDocumentReader tikaDocumentReader = new TikaDocumentReader(new InputStreamResource(inputStream)); + List documents = tikaDocumentReader.read(); + log.info("{} 切分完成,开始进行chunk分割", fileName); + // 然后切分为chunk + TokenTextSplitter tokenTextSplitter = new TokenTextSplitter(500, 250, 10, 1000, true); + List apply = tokenTextSplitter.apply(documents); + for (Document document : apply) { + document.getMetadata().put("fileName", fileName); + } + log.info("{} 切分完成,开始进行保存到向量库中", fileName); + // 保存到向量数据库中 + elasticsearchVectorStore.accept(apply); + log.info("{} 保存完成", fileName); + + } + + + @Override + public void knowledgeEtl(MultipartFile[] files) { + AtomicInteger atomicInteger = new AtomicInteger(1); + for (MultipartFile file : files) { + try { + loadFile(file.getInputStream(), file.getOriginalFilename()); + } catch (Exception e) { + log.error("{}文件处理失败", file.getOriginalFilename(), e); + } + int andIncrement = atomicInteger.getAndIncrement(); + log.info("处理第{}个文件,剩余:{}个", andIncrement, files.length - andIncrement + 1); + + } + log.info("文件处理结束"); + } + +} diff --git a/know_sub_etl/src/main/resources/application.yml b/know_sub_etl/src/main/resources/application.yml new file mode 100644 index 0000000..87349f4 --- /dev/null +++ b/know_sub_etl/src/main/resources/application.yml @@ -0,0 +1,94 @@ +#服务器端口 +server: + port: 9202 + servlet: + context-path: /know-sub-etl + undertow: + # HTTP post内容的最大大小。当值为-1时,默认值为大小是无限的 + max-http-post-size: -1 + # 以下的配置会影响buffer,这些buffer会用于服务器连接的IO操作,有点类似netty的池化内存管理 + # 每块buffer的空间大小,越小的空间被利用越充分 + buffer-size: 512 + # 是否分配的直接内存 + direct-buffers: true +spring: + elasticsearch: + uris: http://192.168.10.137:9200 + + main: + allow-bean-definition-overriding: true + servlet: + multipart: + max-file-size: 100MB + max-request-size: 100MB + ## 数据源配置 + datasource: + type: com.alibaba.druid.pool.DruidDataSource + druid: + driver-class-name: com.mysql.cj.jdbc.Driver + url: jdbc:mysql://192.168.10.137:3306/know_sub?useUnicode=true&characterEncoding=utf-8&useSSL=true&nullCatalogMeansCurrent=true&serverTimezone=GMT%2B8 + username: root + password: '123456' + initial-size: 5 # 初始化大小 + min-idle: 10 # 最小连接数 + max-active: 20 # 最大连接数 + max-wait: 60000 # 获取连接时的最大等待时间 + min-evictable-idle-time-millis: 300000 # 一个连接在池中最小生存的时间,单位是毫秒 + time-between-eviction-runs-millis: 60000 # 多久才进行一次检测需要关闭的空闲连接,单位是毫秒 + filters: stat,wall # 配置扩展插件:stat-监控统计,log4j-日志,wall-防火墙(防止SQL注入),去掉后,监控界面的sql无法统计 + validation-query: SELECT 1 # 检测连接是否有效的 SQL语句,为空时以下三个配置均无效 + test-on-borrow: true # 申请连接时执行validationQuery检测连接是否有效,默认true,开启后会降低性能 + test-on-return: true # 归还连接时执行validationQuery检测连接是否有效,默认false,开启后会降低性能 + test-while-idle: true # 申请连接时如果空闲时间大于timeBetweenEvictionRunsMillis,执行validationQuery检测连接是否有效,默认false,建议开启,不影响性能 + stat-view-servlet: + enabled: true # 是否开启 StatViewServlet + loginUsername: admin + loginPassword: 123456 + filter: + stat: + enabled: true # 是否开启 FilterStat,默认true + log-slow-sql: true # 是否开启 慢SQL 记录,默认false + slow-sql-millis: 5000 # 慢 SQL 的标准,默认 3000,单位:毫秒 + merge-sql: false # 合并多个连接池的监控数据,默认false + +mybatis-plus: + mapper-locations: classpath*:mapper/**/*.xml + configuration: + log-impl: org.apache.ibatis.logging.stdout.StdOutImpl + +# springdoc-openapi项目配置 +springdoc: + # 默认是false,需要设置为true 解决GET请求时,用自定义对象接受现实错误的问题 + default-flat-param-object: true + swagger-ui: + path: /swagger-ui.html + tags-sorter: alpha + operations-sorter: alpha + api-docs: + path: /v3/api-docs + group-configs: + - group: 'default' + paths-to-match: '/**' + packages-to-scan: com.supervision +# knife4j的增强配置,不需要增强可以不配 +knife4j: + enable: true + setting: + language: zh_cn +user: + # 默认密码 + default: + password: 123456 + +#spring: +# elasticsearch: +# uris: http://192.168.10.137:9200 + +embedding: + url: http://192.168.10.137:8711/embeddings/ + +vector: + redis: + uri: redis://:123456@192.168.10.137:6380 + indexName: 'know-sub-rag-store' + prefix: 'know-sub-rag-store:' diff --git a/know_sub_rag/pom.xml b/know_sub_rag/pom.xml index 6a704b5..97a296e 100644 --- a/know_sub_rag/pom.xml +++ b/know_sub_rag/pom.xml @@ -19,16 +19,36 @@ - - io.springboot.ai + org.springframework.ai + spring-ai-tika-document-reader + + + + org.apache.httpcomponents + httpclient + 4.5.13 + + + + org.apache.poi + poi-ooxml + + + + + + + + + org.springframework.ai spring-ai-ollama-spring-boot-starter + - io.springboot.ai + org.springframework.ai spring-ai-elasticsearch-store - 1.0.3 @@ -73,6 +93,41 @@ + + + central + aliyun central repo + https://maven.aliyun.com/nexus/content/repositories/central/ + default + + true + daily + + + false + never + + + + spring-snapshots + Spring Snapshots + https://repo.spring.io/snapshot + + false + + + + spring-milestones + Spring Milestones + https://repo.spring.io/milestone + + false + + + + + + diff --git a/know_sub_rag/src/main/java/com/supervision/knowsub/KnowSubRagApplication.java b/know_sub_rag/src/main/java/com/supervision/knowsub/KnowSubRagApplication.java new file mode 100644 index 0000000..e8882a6 --- /dev/null +++ b/know_sub_rag/src/main/java/com/supervision/knowsub/KnowSubRagApplication.java @@ -0,0 +1,20 @@ +package com.supervision.knowsub; + +import org.mybatis.spring.annotation.MapperScan; +import org.springframework.boot.SpringApplication; +import org.springframework.boot.autoconfigure.SpringBootApplication; +import org.springframework.boot.context.properties.EnableConfigurationProperties; +import org.springframework.scheduling.annotation.EnableScheduling; + +import javax.swing.*; + +@SpringBootApplication +@EnableScheduling +@MapperScan(basePackages = {"com.supervision.**.mapper"}) +@EnableConfigurationProperties +public class KnowSubRagApplication { + + public static void main(String[] args) { + SpringApplication.run(KnowSubRagApplication.class, args); + } +} diff --git a/know_sub_rag/src/main/java/com/supervision/knowsub/config/ElasticsearchVectorStoreConfig.java b/know_sub_rag/src/main/java/com/supervision/knowsub/config/ElasticsearchVectorStoreConfig.java index ddf5e87..1921040 100644 --- a/know_sub_rag/src/main/java/com/supervision/knowsub/config/ElasticsearchVectorStoreConfig.java +++ b/know_sub_rag/src/main/java/com/supervision/knowsub/config/ElasticsearchVectorStoreConfig.java @@ -1,8 +1,10 @@ package com.supervision.knowsub.config; import org.elasticsearch.client.RestClient; +import org.springframework.ai.autoconfigure.vectorstore.elasticsearch.ElasticsearchVectorStoreProperties; +import org.springframework.ai.embedding.EmbeddingModel; import org.springframework.ai.vectorstore.ElasticsearchVectorStore; -import org.springframework.beans.factory.annotation.Value; +import org.springframework.ai.vectorstore.ElasticsearchVectorStoreOptions; import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty; import org.springframework.boot.context.properties.EnableConfigurationProperties; import org.springframework.context.annotation.Bean; @@ -13,18 +15,20 @@ import org.springframework.util.Assert; @EnableConfigurationProperties(EmbeddingProperties.class) public class ElasticsearchVectorStoreConfig { - @Value("${embedding.url}") - private String url; @Bean -// @ConditionalOnProperty(prefix = "embedding", name = "url") - public VectorEmbeddingClient vectorEmbeddingClient(EmbeddingProperties embeddingProperties) { + @ConditionalOnProperty(prefix = "embedding", name = "url") + public EmbeddingModel embeddingModel(EmbeddingProperties embeddingProperties) { Assert.notNull(embeddingProperties.getUrl(), "配置文件embedding:url未找到"); - return new VectorEmbeddingClient(embeddingProperties.getUrl()); + return new VectorEmbeddingModel(embeddingProperties.getUrl()); } @Bean - public ElasticsearchVectorStore vectorStore(VectorEmbeddingClient embeddingModel, RestClient restClient) { - return new ElasticsearchVectorStore(restClient, embeddingModel); + @ConditionalOnProperty(prefix = "embedding", name = "url") + public ElasticsearchVectorStore vectorStore(ElasticsearchVectorStoreProperties properties,EmbeddingModel embeddingModel, RestClient restClient) { + ElasticsearchVectorStoreOptions options = new ElasticsearchVectorStoreOptions(); + options.setIndexName(properties.getIndexName()); + options.setDimensions(1024); + return new ElasticsearchVectorStore(options, restClient, embeddingModel, true); } } diff --git a/know_sub_rag/src/main/java/com/supervision/knowsub/config/EmbeddingProperties.java b/know_sub_rag/src/main/java/com/supervision/knowsub/config/EmbeddingProperties.java index f6b6706..1e73dc5 100644 --- a/know_sub_rag/src/main/java/com/supervision/knowsub/config/EmbeddingProperties.java +++ b/know_sub_rag/src/main/java/com/supervision/knowsub/config/EmbeddingProperties.java @@ -2,7 +2,6 @@ package com.supervision.knowsub.config; import lombok.Data; import org.springframework.boot.context.properties.ConfigurationProperties; -import org.springframework.boot.context.properties.EnableConfigurationProperties; @Data @ConfigurationProperties(prefix = "embedding") diff --git a/know_sub_rag/src/main/java/com/supervision/knowsub/config/VectorEmbeddingClient.java b/know_sub_rag/src/main/java/com/supervision/knowsub/config/VectorEmbeddingModel.java similarity index 93% rename from know_sub_rag/src/main/java/com/supervision/knowsub/config/VectorEmbeddingClient.java rename to know_sub_rag/src/main/java/com/supervision/knowsub/config/VectorEmbeddingModel.java index 481761e..60b3487 100644 --- a/know_sub_rag/src/main/java/com/supervision/knowsub/config/VectorEmbeddingClient.java +++ b/know_sub_rag/src/main/java/com/supervision/knowsub/config/VectorEmbeddingModel.java @@ -14,11 +14,11 @@ import java.util.Map; import java.util.concurrent.atomic.AtomicInteger; @Slf4j -public class VectorEmbeddingClient extends AbstractEmbeddingClient { +public class VectorEmbeddingModel implements EmbeddingModel { private final String embeddingUrl; - public VectorEmbeddingClient(String embeddingUrl) { + public VectorEmbeddingModel(String embeddingUrl) { this.embeddingUrl = embeddingUrl; } diff --git a/know_sub_rag/src/main/java/com/supervision/knowsub/controller/EsTestController.java b/know_sub_rag/src/main/java/com/supervision/knowsub/controller/EsTestController.java new file mode 100644 index 0000000..93320c5 --- /dev/null +++ b/know_sub_rag/src/main/java/com/supervision/knowsub/controller/EsTestController.java @@ -0,0 +1,33 @@ +package com.supervision.knowsub.controller; + +import cn.hutool.json.JSONUtil; +import org.springframework.ai.document.Document; +import org.springframework.ai.vectorstore.ElasticsearchVectorStore; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.web.bind.annotation.GetMapping; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RestController; + +import java.util.List; + +@RestController +@RequestMapping("esTest") +public class EsTestController { + + @Autowired + private ElasticsearchVectorStore elasticsearchVectorStore; + + @GetMapping("add") + public void add() { + Document document = new Document("测试测试"); + elasticsearchVectorStore.add(List.of(document)); + } + + @GetMapping("search") + public void search(String question) { + List result = elasticsearchVectorStore.similaritySearch(question); + for (Document document : result) { + System.out.println(document.getContent()); + } + } +} diff --git a/know_sub_rag/src/main/java/com/supervision/knowsub/controller/RagController.java b/know_sub_rag/src/main/java/com/supervision/knowsub/controller/RagController.java new file mode 100644 index 0000000..c041fcf --- /dev/null +++ b/know_sub_rag/src/main/java/com/supervision/knowsub/controller/RagController.java @@ -0,0 +1,26 @@ +package com.supervision.knowsub.controller; + +import com.supervision.knowsub.service.RagService; +import com.supervision.knowsub.vo.RagResVO; +import io.swagger.v3.oas.annotations.Operation; +import io.swagger.v3.oas.annotations.tags.Tag; +import lombok.RequiredArgsConstructor; +import org.springframework.web.bind.annotation.GetMapping; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RestController; + +@Tag(name = "RAG问答") +@RestController +@RequestMapping("rag") +@RequiredArgsConstructor +public class RagController { + + private final RagService ragService; + + @Operation(summary = "问答") + @GetMapping("esAsk") + public RagResVO esAsk(String question) { + return ragService.esAsk(question); + } + +} diff --git a/know_sub_rag/src/main/java/com/supervision/knowsub/controller/TestController.java b/know_sub_rag/src/main/java/com/supervision/knowsub/controller/TestController.java new file mode 100644 index 0000000..a0a72ea --- /dev/null +++ b/know_sub_rag/src/main/java/com/supervision/knowsub/controller/TestController.java @@ -0,0 +1,41 @@ +package com.supervision.knowsub.controller; + +import cn.hutool.json.JSONUtil; +import cn.hutool.poi.excel.ExcelReader; +import cn.hutool.poi.excel.ExcelUtil; +import cn.hutool.poi.excel.ExcelWriter; +import com.supervision.knowsub.service.RagService; +import com.supervision.knowsub.vo.RagResVO; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import org.springframework.web.bind.annotation.GetMapping; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RestController; + +import java.util.List; + +@RestController +@RequestMapping("test") +@Slf4j +@RequiredArgsConstructor +public class TestController { + + private final RagService ragService; + + @GetMapping("esTest") + public void esTest(){ + ExcelReader reader = ExcelUtil.getReader("/Users/flevance/Desktop/深圳人社POC/question.xlsx","Sheet2"); + List objects = reader.readColumn(3, 1); + ExcelWriter writer = reader.getWriter(); + + for (int i = 0; i < objects.size(); i++) { + RagResVO ask = ragService.esAsk(objects.get(i).toString()); + + writer.writeCellValue(5, i + 1, ask.getAnswer()); + writer.writeCellValue(6, i + 1, JSONUtil.toJsonStr(ask.getFileName())); + log.info("第{}条数据写入成功,剩余{}条", i + 1, objects.size() - i - 1); + } + writer.flush(); + } + +} diff --git a/know_sub_rag/src/main/java/com/supervision/knowsub/service/RagService.java b/know_sub_rag/src/main/java/com/supervision/knowsub/service/RagService.java new file mode 100644 index 0000000..ce76c90 --- /dev/null +++ b/know_sub_rag/src/main/java/com/supervision/knowsub/service/RagService.java @@ -0,0 +1,9 @@ +package com.supervision.knowsub.service; + +import com.supervision.knowsub.vo.RagResVO; + +public interface RagService { + + RagResVO esAsk(String question); + +} diff --git a/know_sub_rag/src/main/java/com/supervision/knowsub/service/impl/RagServiceImpl.java b/know_sub_rag/src/main/java/com/supervision/knowsub/service/impl/RagServiceImpl.java new file mode 100644 index 0000000..bdcdbbf --- /dev/null +++ b/know_sub_rag/src/main/java/com/supervision/knowsub/service/impl/RagServiceImpl.java @@ -0,0 +1,116 @@ +package com.supervision.knowsub.service.impl; + +import cn.hutool.core.util.StrUtil; +import com.supervision.knowsub.service.RagService; +import com.supervision.knowsub.vo.RagResVO; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import org.springframework.ai.chat.messages.Message; +import org.springframework.ai.chat.messages.UserMessage; +import org.springframework.ai.chat.model.ChatResponse; +import org.springframework.ai.chat.prompt.Prompt; +import org.springframework.ai.chat.prompt.SystemPromptTemplate; +import org.springframework.ai.document.Document; +import org.springframework.ai.ollama.OllamaChatModel; +import org.springframework.ai.vectorstore.ElasticsearchVectorStore; +import org.springframework.ai.vectorstore.SearchRequest; +import org.springframework.stereotype.Service; + +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +@Slf4j +@Service +@RequiredArgsConstructor +public class RagServiceImpl implements RagService { + + private final ElasticsearchVectorStore elasticsearchVectorStore; + +// private final OllamaChatClient chatClient ; + + private final OllamaChatModel ollamaChatModel; + + + private static final String springDemoSystemPrompt = """ + 您正在协助解答有关卡琳娜所提供服务的问题。 + 卡琳娜是一个政府政策解决中心,专注于政策问题解答。 + 卡琳娜的使命是根据政策文档,尽可能回答用户关于政策上的疑问。 + + 利用来自“上下文”部分的信息提供准确的答案。 + 您只提供重新表述的查询,且别无其他!不要在查询前后添加任何内容! + 如果不确定,只需说明您不知道。 + + 政策内容: + {context} + """; + + private static final String systemPrompt = """ + 使用下面的语料来回答问题。如果你不知道问题的答案,直接回答"我不知道",禁止随意编造答案。 + 保证答案尽可能简洁,你的回答中不可以带有星号。请注意!在每次回答结束之后,你都必须接上"感谢你的提问"作为结束语. + 您只提供重新表述的查询,且别无其他!不要在查询前后添加任何内容! + 以下是一对问题和答案的样例:请问:秦始皇的原名是什么?答:秦始皇原名赢政。感谢你的提问。 + 以下是语料: + {context} + """; + + public static final String systemPrompt1 = """ + 您是政务知识领域的权威专家,能够根据给定的内容,为用户从给定内容中提炼出问题的准确、详尽、合规且具有权威性的回答。 + 若给定内容中信息不足或不明确,应向用户说明情况,并告知无法给出确切答案的原因。 + + 回答内容中,不要出现:"请注意,具体的政策和流程可能会有所变化,因此建议您咨询当地的人力资源和社会保障部门或访问官方网站以获取最新信息。"! + 回答内容中也不要出现"根据您提供的信息"字样! + + 以下是参考资料,请围绕参考资料回答: + {context} + """; + + public static final String langChainChatPrompt = """ + 根据已知信息,简洁和专业的来回答问题。 + 如果无法从中得到答案,请说 "根据已知信息无法回答该问题"。 + 不允许在答案中添加编造成分!!! + 答案请使用中文! + <已知信息>{context} + <问题>{question} + """; + + @Override + public RagResVO esAsk(String question) { + log.info("检索相关文档"); + List similarDocuments = elasticsearchVectorStore.similaritySearch(SearchRequest.query(question).withTopK(10)); + Set fileNameList = new HashSet<>(); + for (Document similarDocument : similarDocuments) { + fileNameList.add(String.valueOf(similarDocument.getMetadata().get("fileName"))); + } + log.info("找到:{}条相关文档", similarDocuments.size()); + // 构建系统消息 + String relevantDocument = similarDocuments.stream().map(Document::getContent).collect(Collectors.joining("\n")); + SystemPromptTemplate systemPromptTemplate = new SystemPromptTemplate(systemPrompt1); + Message systemMessage = systemPromptTemplate.createMessage(Map.of("context", relevantDocument)); + // 构建用户消息 + UserMessage userMessage = new UserMessage(question); + Prompt prompt = new Prompt(List.of(systemMessage, userMessage)); + + // 构建系统消息 +// String relevantDocument = similarDocuments.stream().map(Document::getContent).collect(Collectors.joining("\n")); +// String format = StrUtil.format(langChainChatPrompt, Map.of("context", relevantDocument, "question", question)); +// +// Prompt prompt = new Prompt(new UserMessage(format)); + + log.info("开始询问GPT问题"); + ChatResponse call = ollamaChatModel.call(prompt); + log.info("AI responded."); + RagResVO ragResVO = new RagResVO(); + ragResVO.setAnswer(call.getResult().getOutput().getContent()); + ragResVO.setFileName(fileNameList); + + + return ragResVO; + } + + +} + + diff --git a/know_sub_rag/src/main/java/com/supervision/knowsub/util/AiChatUtil.java b/know_sub_rag/src/main/java/com/supervision/knowsub/util/AiChatUtil.java new file mode 100644 index 0000000..69f68a6 --- /dev/null +++ b/know_sub_rag/src/main/java/com/supervision/knowsub/util/AiChatUtil.java @@ -0,0 +1,94 @@ +package com.supervision.knowsub.util; + +import lombok.extern.slf4j.Slf4j; + +@Slf4j +public class AiChatUtil { + +// private static final ExecutorService chatExecutor = ThreadUtil.newFixedExecutor(5, 5, "chat", new ThreadPoolExecutor.CallerRunsPolicy()); +// +// private static final OllamaChatClient chatClient = SpringBeanUtil.getBean(OllamaChatClient.class); +// +// /** +// * 单轮对话 +// * +// * @param chat 对话的内容 +// * @return jsonObject +// */ +// public static Optional chat(String chat) { +// Prompt prompt = new Prompt(List.of(new UserMessage(chat))); +// Future submit = chatExecutor.submit(new ChatTask(chatClient, prompt)); +// try { +// return Optional.of(JSONUtil.parseObj(submit.get())); +// } catch (ExecutionException | InterruptedException e) { +// log.error("调用大模型生成失败"); +// } +// return Optional.empty(); +// } +// +// /** +// * 支持多轮对话,自定义消息 +// * +// * @param messageList 消息列表 +// * @return jsonObject +// */ +// public static Optional chat(List messageList) { +// Prompt prompt = new Prompt(messageList); +// Future submit = chatExecutor.submit(new ChatTask(chatClient, prompt)); +// try { +// return Optional.of(JSONUtil.parseObj(submit.get())); +// } catch (ExecutionException | InterruptedException e) { +// log.error("调用大模型生成失败"); +// } +// return Optional.empty(); +// } +// +// /** +// * 支持序列化的方式 +// * +// * @param messageList 消息列表 +// * @param clazz 需要序列化的对象 +// * @param 需要序列化的对象的泛型 +// * @return 对应对象类型, 不支持列表类型 +// */ +// public static Optional chat(List messageList, Class clazz) { +// Prompt prompt = new Prompt(messageList); +// Future submit = chatExecutor.submit(new ChatTask(chatClient, prompt)); +// try { +// String s = submit.get(); +// return Optional.ofNullable(JSONUtil.toBean(s, clazz)); +// } catch (ExecutionException | InterruptedException e) { +// log.error("调用大模型生成失败", e); +// } +// return Optional.empty(); +// } +// +// /** +// * 支持序列化的方式的对话 +// * +// * @param chat 对话的消息 +// * @param clazz 需要序列化的对象 +// * @param 需要序列化的对象的泛型 +// * @return 对应对象类型, 不支持列表类型 +// */ +// public static Optional chat(String chat, Class clazz) { +// Prompt prompt = new Prompt(List.of(new UserMessage(chat))); +// Future submit = chatExecutor.submit(new ChatTask(chatClient, prompt)); +// try { +// String s = submit.get(); +// return Optional.ofNullable(JSONUtil.toBean(s, clazz)); +// } catch (ExecutionException | InterruptedException e) { +// log.error("调用大模型生成失败"); +// } +// return Optional.empty(); +// } +// +// private record ChatTask(OllamaChatClient chatClient, Prompt prompt) implements Callable { +// @Override +// public String call() { +// ChatResponse call = chatClient.call(prompt); +// return call.getResult().getOutput().getContent(); +// } +// } + +} diff --git a/know_sub_rag/src/main/java/com/supervision/knowsub/vo/RagResVO.java b/know_sub_rag/src/main/java/com/supervision/knowsub/vo/RagResVO.java new file mode 100644 index 0000000..b888cfd --- /dev/null +++ b/know_sub_rag/src/main/java/com/supervision/knowsub/vo/RagResVO.java @@ -0,0 +1,14 @@ +package com.supervision.knowsub.vo; + +import lombok.Data; + +import java.util.List; +import java.util.Set; + +@Data +public class RagResVO { + + private String answer; + + private Set fileName; +} diff --git a/know_sub_rag/src/main/resources/application-rag.yml b/know_sub_rag/src/main/resources/application-rag.yml index 44f7a01..586ae52 100644 --- a/know_sub_rag/src/main/resources/application-rag.yml +++ b/know_sub_rag/src/main/resources/application-rag.yml @@ -1,6 +1,107 @@ +#服务器端口 +server: + port: 9203 + servlet: + context-path: /know-sub-rag + undertow: + # HTTP post内容的最大大小。当值为-1时,默认值为大小是无限的 + max-http-post-size: -1 + # 以下的配置会影响buffer,这些buffer会用于服务器连接的IO操作,有点类似netty的池化内存管理 + # 每块buffer的空间大小,越小的空间被利用越充分 + buffer-size: 512 + # 是否分配的直接内存 + direct-buffers: true spring: elasticsearch: uris: http://192.168.10.137:9200 + ai: + ollama: + base-url: http://192.168.10.70:11434 + chat: + enabled: true + options: + model: llama3-chinese:8b + keep-alive: 1000m + temperature: 0.1 + + vectorstore: + elasticsearch: + index-name: know-sub-rag-store + + main: + allow-bean-definition-overriding: true + servlet: + multipart: + max-file-size: 100MB + max-request-size: 100MB + ## 数据源配置 + datasource: + type: com.alibaba.druid.pool.DruidDataSource + druid: + driver-class-name: com.mysql.cj.jdbc.Driver + url: jdbc:mysql://192.168.10.137:3306/know_sub?useUnicode=true&characterEncoding=utf-8&useSSL=true&nullCatalogMeansCurrent=true&serverTimezone=GMT%2B8 + username: root + password: '123456' + initial-size: 5 # 初始化大小 + min-idle: 10 # 最小连接数 + max-active: 20 # 最大连接数 + max-wait: 60000 # 获取连接时的最大等待时间 + min-evictable-idle-time-millis: 300000 # 一个连接在池中最小生存的时间,单位是毫秒 + time-between-eviction-runs-millis: 60000 # 多久才进行一次检测需要关闭的空闲连接,单位是毫秒 + filters: stat,wall # 配置扩展插件:stat-监控统计,log4j-日志,wall-防火墙(防止SQL注入),去掉后,监控界面的sql无法统计 + validation-query: SELECT 1 # 检测连接是否有效的 SQL语句,为空时以下三个配置均无效 + test-on-borrow: true # 申请连接时执行validationQuery检测连接是否有效,默认true,开启后会降低性能 + test-on-return: true # 归还连接时执行validationQuery检测连接是否有效,默认false,开启后会降低性能 + test-while-idle: true # 申请连接时如果空闲时间大于timeBetweenEvictionRunsMillis,执行validationQuery检测连接是否有效,默认false,建议开启,不影响性能 + stat-view-servlet: + enabled: true # 是否开启 StatViewServlet + loginUsername: admin + loginPassword: 123456 + filter: + stat: + enabled: true # 是否开启 FilterStat,默认true + log-slow-sql: true # 是否开启 慢SQL 记录,默认false + slow-sql-millis: 5000 # 慢 SQL 的标准,默认 3000,单位:毫秒 + merge-sql: false # 合并多个连接池的监控数据,默认false + +mybatis-plus: + mapper-locations: classpath*:mapper/**/*.xml + configuration: + log-impl: org.apache.ibatis.logging.stdout.StdOutImpl + +# springdoc-openapi项目配置 +springdoc: + # 默认是false,需要设置为true 解决GET请求时,用自定义对象接受现实错误的问题 + default-flat-param-object: true + swagger-ui: + path: /swagger-ui.html + tags-sorter: alpha + operations-sorter: alpha + api-docs: + path: /v3/api-docs + group-configs: + - group: 'default' + paths-to-match: '/**' + packages-to-scan: com.supervision +# knife4j的增强配置,不需要增强可以不配 +knife4j: + enable: true + setting: + language: zh_cn +user: + # 默认密码 + default: + password: 123456 + +#spring: +# elasticsearch: +# uris: http://192.168.10.137:9200 embedding: - url: http://192.168.10.137:8711/embeddings/ \ No newline at end of file + url: http://192.168.10.137:8711/embeddings/ + +vector: + redis: + uri: redis://:123456@192.168.10.137:6380 + indexName: 'know-sub-rag-store' + prefix: 'know-sub-rag-store:' \ No newline at end of file diff --git a/pom.xml b/pom.xml index 351a4e1..c29f70e 100644 --- a/pom.xml +++ b/pom.xml @@ -19,7 +19,8 @@ know_sub_common know_sub_business know_sub_model - + know_sub_rag + know_sub_etl @@ -44,6 +45,7 @@ + io.springboot.ai spring-ai-bom @@ -52,6 +54,14 @@ import + + org.springframework.ai + spring-ai-bom + 1.0.0-M1 + pom + import + + mysql mysql-connector-java @@ -112,4 +122,37 @@ + + + central + aliyun central repo + https://maven.aliyun.com/nexus/content/repositories/central/ + default + + true + daily + + + false + never + + + + spring-snapshots + Spring Snapshots + https://repo.spring.io/snapshot + + false + + + + spring-milestones + Spring Milestones + https://repo.spring.io/milestone + + false + + + +