Merge remote-tracking branch 'origin/dev_1.0.0' into dev_1.0.0

# Conflicts:
#	know_sub_business/pom.xml
#	know_sub_rag/src/main/java/com/supervision/knowsub/config/ElasticsearchVectorStoreConfig.java
#	pom.xml
dev_1.0.0
xueqingkun 4 months ago
commit a9348831f7

@ -0,0 +1 @@
-- 新增RAG搜索的SQL

@ -12,8 +12,6 @@ server:
# 是否分配的直接内存
direct-buffers: true
spring:
elasticsearch:
uris: http://192.168.10.137:9200
main:
allow-bean-definition-overriding: true
servlet:
@ -77,11 +75,4 @@ knife4j:
user:
# 默认密码
default:
password: 123456
#spring:
# elasticsearch:
# uris: http://192.168.10.137:9200
embedding:
url: http://192.168.10.137:8711/embeddings/
password: 123456

@ -0,0 +1,154 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>com.supervision</groupId>
<artifactId>know_sub</artifactId>
<version>1.0.0</version>
</parent>
<artifactId>know_sub_etl</artifactId>
<properties>
<maven.compiler.source>17</maven.compiler.source>
<maven.compiler.target>17</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<dependencies>
<dependency>
<groupId>org.springframework.ai</groupId>
<artifactId>spring-ai-tika-document-reader</artifactId>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.5.13</version>
</dependency>
<!-- <dependency>-->
<!-- <groupId>io.springboot.ai</groupId>-->
<!-- <artifactId>spring-ai-ollama-spring-boot-starter</artifactId>-->
<!-- </dependency>-->
<dependency>
<groupId>org.springframework.ai</groupId>
<artifactId>spring-ai-elasticsearch-store</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-data-elasticsearch</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-configuration-processor</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.supervision</groupId>
<artifactId>know_sub_common</artifactId>
<version>1.0.0</version>
</dependency>
<dependency>
<groupId>com.supervision</groupId>
<artifactId>know_sub_model</artifactId>
<version>1.0.0</version>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<scope>provided</scope>
</dependency>
</dependencies>
<repositories>
<repository>
<id>central</id>
<name>aliyun central repo</name>
<url>https://maven.aliyun.com/nexus/content/repositories/central/</url>
<layout>default</layout>
<releases>
<enabled>true</enabled>
<updatePolicy>daily</updatePolicy>
</releases>
<snapshots>
<enabled>false</enabled>
<updatePolicy>never</updatePolicy>
</snapshots>
</repository>
<repository>
<id>spring-snapshots</id>
<name>Spring Snapshots</name>
<url>https://repo.spring.io/snapshot</url>
<snapshots>
<enabled>false</enabled>
</snapshots>
</repository>
<repository>
<id>spring-milestones</id>
<name>Spring Milestones</name>
<url>https://repo.spring.io/milestone</url>
<snapshots>
<enabled>false</enabled>
</snapshots>
</repository>
</repositories>
<build>
<plugins>
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
<version>3.3.0</version>
<executions>
<execution>
<goals>
<goal>repackage</goal>
</goals>
</execution>
</executions>
<configuration>
<excludes>
<exclude>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
</exclude>
</excludes>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.11.0</version>
<configuration>
<parameters>true</parameters>
</configuration>
</plugin>
</plugins>
</build>
</project>

@ -0,0 +1,18 @@
package som.supervision.knowsub;
import org.mybatis.spring.annotation.MapperScan;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.boot.context.properties.EnableConfigurationProperties;
import org.springframework.scheduling.annotation.EnableScheduling;
@SpringBootApplication
@EnableScheduling
@MapperScan(basePackages = {"com.supervision.**.mapper"})
@EnableConfigurationProperties
public class KnowSubEtlApplication {
public static void main(String[] args) {
SpringApplication.run(KnowSubEtlApplication.class, args);
}
}

@ -0,0 +1,33 @@
package som.supervision.knowsub.config;
import org.elasticsearch.client.RestClient;
import org.springframework.ai.embedding.EmbeddingModel;
import org.springframework.ai.vectorstore.ElasticsearchVectorStore;
import org.springframework.ai.vectorstore.ElasticsearchVectorStoreOptions;
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
import org.springframework.boot.context.properties.EnableConfigurationProperties;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.util.Assert;
@Configuration
@EnableConfigurationProperties(EmbeddingProperties.class)
public class ElasticsearchVectorStoreConfig {
@Bean
@ConditionalOnProperty(prefix = "embedding", name = "url")
public EmbeddingModel embeddingModel(EmbeddingProperties embeddingProperties) {
Assert.notNull(embeddingProperties.getUrl(), "配置文件embedding:url未找到");
return new VectorEmbeddingModel(embeddingProperties.getUrl());
}
@Bean
@ConditionalOnProperty(prefix = "embedding", name = "url")
public ElasticsearchVectorStore vectorStore(EmbeddingModel embeddingModel, RestClient restClient) {
ElasticsearchVectorStoreOptions options = new ElasticsearchVectorStoreOptions();
options.setIndexName("know-sub-rag-store");
options.setDimensions(1024);
return new ElasticsearchVectorStore(options, restClient, embeddingModel, true);
}
}

@ -0,0 +1,12 @@
package som.supervision.knowsub.config;
import lombok.Data;
import org.springframework.boot.context.properties.ConfigurationProperties;
@Data
@ConfigurationProperties(prefix = "embedding")
public class EmbeddingProperties {
private String url;
}

@ -0,0 +1,57 @@
package som.supervision.knowsub.config;
import cn.hutool.http.HttpUtil;
import cn.hutool.json.JSONUtil;
import lombok.Data;
import lombok.extern.slf4j.Slf4j;
import org.springframework.ai.document.Document;
import org.springframework.ai.embedding.*;
import org.springframework.util.Assert;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicInteger;
@Slf4j
public class VectorEmbeddingModel implements EmbeddingModel {
private final String embeddingUrl;
public VectorEmbeddingModel(String embeddingUrl) {
this.embeddingUrl = embeddingUrl;
}
@Override
public List<Double> embed(Document document) {
List<List<Double>> list = this.call(new EmbeddingRequest(List.of(document.getContent()), EmbeddingOptions.EMPTY))
.getResults()
.stream()
.map(Embedding::getOutput)
.toList();
return list.iterator().next();
}
@Override
public EmbeddingResponse call(EmbeddingRequest request) {
Assert.notEmpty(request.getInstructions(), "At least one text is required!");
List<List<Double>> embeddingList = new ArrayList<>();
for (String inputContent : request.getInstructions()) {
// 这里需要吧inputContent转化为向量数据
String post = HttpUtil.post(embeddingUrl, JSONUtil.toJsonStr(Map.of("text", inputContent)));
EmbeddingData bean = JSONUtil.toBean(post, EmbeddingData.class);
embeddingList.add(bean.embeddings);
}
var indexCounter = new AtomicInteger(0);
List<Embedding> embeddings = embeddingList.stream()
.map(e -> new Embedding(e, indexCounter.getAndIncrement()))
.toList();
return new EmbeddingResponse(embeddings);
}
@Data
private static class EmbeddingData {
private List<Double> embeddings;
}
}

@ -0,0 +1,27 @@
package som.supervision.knowsub.controller;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
import lombok.RequiredArgsConstructor;
import org.apache.ibatis.annotations.Param;
import org.springframework.web.bind.annotation.*;
import org.springframework.web.multipart.MultipartFile;
import som.supervision.knowsub.service.KnowledgeEtlService;
import java.io.IOException;
@Tag(name = "知识ETL类")
@RestController
@RequestMapping("etl")
@RequiredArgsConstructor
public class KnowledgeEtlController {
private final KnowledgeEtlService knowledgeEtlService;
@Operation(summary = "对知识进行ETL")
@PostMapping("knowledgeEtl")
public void knowledgeEtl(@RequestParam("files") MultipartFile[] files) {
knowledgeEtlService.knowledgeEtl(files);
}
}

@ -0,0 +1,12 @@
package som.supervision.knowsub.service;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.multipart.MultipartFile;
import java.io.IOException;
public interface KnowledgeEtlService {
void knowledgeEtl(MultipartFile[] files);
}

@ -0,0 +1,66 @@
package som.supervision.knowsub.service.impl;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.ai.document.Document;
import org.springframework.ai.reader.tika.TikaDocumentReader;
import org.springframework.ai.transformer.splitter.TokenTextSplitter;
import org.springframework.ai.vectorstore.ElasticsearchVectorStore;
import org.springframework.core.io.InputStreamResource;
import org.springframework.stereotype.Service;
import org.springframework.web.multipart.MultipartFile;
import som.supervision.knowsub.service.KnowledgeEtlService;
import java.io.InputStream;
import java.util.List;
import java.util.concurrent.atomic.AtomicInteger;
@Slf4j
@Service
@RequiredArgsConstructor
public class KnowledgeEtlServiceImpl implements KnowledgeEtlService {
private final ElasticsearchVectorStore elasticsearchVectorStore;
/**
* <a href="https://zhuanlan.zhihu.com/p/703705663"/>
*
* @param inputStream
*/
private void loadFile(InputStream inputStream, String fileName) {
// 首先使用tika进行文件切分操作
log.info("{} 进行内容切分", fileName);
TikaDocumentReader tikaDocumentReader = new TikaDocumentReader(new InputStreamResource(inputStream));
List<Document> documents = tikaDocumentReader.read();
log.info("{} 切分完成,开始进行chunk分割", fileName);
// 然后切分为chunk
TokenTextSplitter tokenTextSplitter = new TokenTextSplitter(500, 250, 10, 1000, true);
List<Document> apply = tokenTextSplitter.apply(documents);
for (Document document : apply) {
document.getMetadata().put("fileName", fileName);
}
log.info("{} 切分完成,开始进行保存到向量库中", fileName);
// 保存到向量数据库中
elasticsearchVectorStore.accept(apply);
log.info("{} 保存完成", fileName);
}
@Override
public void knowledgeEtl(MultipartFile[] files) {
AtomicInteger atomicInteger = new AtomicInteger(1);
for (MultipartFile file : files) {
try {
loadFile(file.getInputStream(), file.getOriginalFilename());
} catch (Exception e) {
log.error("{}文件处理失败", file.getOriginalFilename(), e);
}
int andIncrement = atomicInteger.getAndIncrement();
log.info("处理第{}个文件,剩余:{}个", andIncrement, files.length - andIncrement + 1);
}
log.info("文件处理结束");
}
}

@ -0,0 +1,94 @@
#服务器端口
server:
port: 9202
servlet:
context-path: /know-sub-etl
undertow:
# HTTP post内容的最大大小。当值为-1时默认值为大小是无限的
max-http-post-size: -1
# 以下的配置会影响buffer,这些buffer会用于服务器连接的IO操作,有点类似netty的池化内存管理
# 每块buffer的空间大小,越小的空间被利用越充分
buffer-size: 512
# 是否分配的直接内存
direct-buffers: true
spring:
elasticsearch:
uris: http://192.168.10.137:9200
main:
allow-bean-definition-overriding: true
servlet:
multipart:
max-file-size: 100MB
max-request-size: 100MB
## 数据源配置
datasource:
type: com.alibaba.druid.pool.DruidDataSource
druid:
driver-class-name: com.mysql.cj.jdbc.Driver
url: jdbc:mysql://192.168.10.137:3306/know_sub?useUnicode=true&characterEncoding=utf-8&useSSL=true&nullCatalogMeansCurrent=true&serverTimezone=GMT%2B8
username: root
password: '123456'
initial-size: 5 # 初始化大小
min-idle: 10 # 最小连接数
max-active: 20 # 最大连接数
max-wait: 60000 # 获取连接时的最大等待时间
min-evictable-idle-time-millis: 300000 # 一个连接在池中最小生存的时间,单位是毫秒
time-between-eviction-runs-millis: 60000 # 多久才进行一次检测需要关闭的空闲连接,单位是毫秒
filters: stat,wall # 配置扩展插件stat-监控统计log4j-日志wall-防火墙防止SQL注入去掉后监控界面的sql无法统计
validation-query: SELECT 1 # 检测连接是否有效的 SQL语句为空时以下三个配置均无效
test-on-borrow: true # 申请连接时执行validationQuery检测连接是否有效默认true开启后会降低性能
test-on-return: true # 归还连接时执行validationQuery检测连接是否有效默认false开启后会降低性能
test-while-idle: true # 申请连接时如果空闲时间大于timeBetweenEvictionRunsMillis执行validationQuery检测连接是否有效默认false建议开启不影响性能
stat-view-servlet:
enabled: true # 是否开启 StatViewServlet
loginUsername: admin
loginPassword: 123456
filter:
stat:
enabled: true # 是否开启 FilterStat默认true
log-slow-sql: true # 是否开启 慢SQL 记录默认false
slow-sql-millis: 5000 # 慢 SQL 的标准,默认 3000单位毫秒
merge-sql: false # 合并多个连接池的监控数据默认false
mybatis-plus:
mapper-locations: classpath*:mapper/**/*.xml
configuration:
log-impl: org.apache.ibatis.logging.stdout.StdOutImpl
# springdoc-openapi项目配置
springdoc:
# 默认是false需要设置为true 解决GET请求时,用自定义对象接受现实错误的问题
default-flat-param-object: true
swagger-ui:
path: /swagger-ui.html
tags-sorter: alpha
operations-sorter: alpha
api-docs:
path: /v3/api-docs
group-configs:
- group: 'default'
paths-to-match: '/**'
packages-to-scan: com.supervision
# knife4j的增强配置不需要增强可以不配
knife4j:
enable: true
setting:
language: zh_cn
user:
# 默认密码
default:
password: 123456
#spring:
# elasticsearch:
# uris: http://192.168.10.137:9200
embedding:
url: http://192.168.10.137:8711/embeddings/
vector:
redis:
uri: redis://:123456@192.168.10.137:6380
indexName: 'know-sub-rag-store'
prefix: 'know-sub-rag-store:'

@ -19,16 +19,36 @@
<dependencies>
<!-- 引入ollama的依赖.版本号来自于 dependencyManagement中 spring-ai-bom中的版本号.-->
<dependency>
<groupId>io.springboot.ai</groupId>
<groupId>org.springframework.ai</groupId>
<artifactId>spring-ai-tika-document-reader</artifactId>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.5.13</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
</dependency>
<!-- <dependency>-->
<!-- <groupId>io.springboot.ai</groupId>-->
<!-- <artifactId>spring-ai-ollama-spring-boot-starter</artifactId>-->
<!-- </dependency>-->
<dependency>
<groupId>org.springframework.ai</groupId>
<artifactId>spring-ai-ollama-spring-boot-starter</artifactId>
</dependency>
<dependency>
<groupId>io.springboot.ai</groupId>
<groupId>org.springframework.ai</groupId>
<artifactId>spring-ai-elasticsearch-store</artifactId>
<version>1.0.3</version>
</dependency>
<dependency>
@ -73,6 +93,41 @@
</dependencies>
<repositories>
<repository>
<id>central</id>
<name>aliyun central repo</name>
<url>https://maven.aliyun.com/nexus/content/repositories/central/</url>
<layout>default</layout>
<releases>
<enabled>true</enabled>
<updatePolicy>daily</updatePolicy>
</releases>
<snapshots>
<enabled>false</enabled>
<updatePolicy>never</updatePolicy>
</snapshots>
</repository>
<repository>
<id>spring-snapshots</id>
<name>Spring Snapshots</name>
<url>https://repo.spring.io/snapshot</url>
<snapshots>
<enabled>false</enabled>
</snapshots>
</repository>
<repository>
<id>spring-milestones</id>
<name>Spring Milestones</name>
<url>https://repo.spring.io/milestone</url>
<snapshots>
<enabled>false</enabled>
</snapshots>
</repository>
</repositories>
<build>
<plugins>

@ -0,0 +1,20 @@
package com.supervision.knowsub;
import org.mybatis.spring.annotation.MapperScan;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.boot.context.properties.EnableConfigurationProperties;
import org.springframework.scheduling.annotation.EnableScheduling;
import javax.swing.*;
@SpringBootApplication
@EnableScheduling
@MapperScan(basePackages = {"com.supervision.**.mapper"})
@EnableConfigurationProperties
public class KnowSubRagApplication {
public static void main(String[] args) {
SpringApplication.run(KnowSubRagApplication.class, args);
}
}

@ -1,8 +1,10 @@
package com.supervision.knowsub.config;
import org.elasticsearch.client.RestClient;
import org.springframework.ai.autoconfigure.vectorstore.elasticsearch.ElasticsearchVectorStoreProperties;
import org.springframework.ai.embedding.EmbeddingModel;
import org.springframework.ai.vectorstore.ElasticsearchVectorStore;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.ai.vectorstore.ElasticsearchVectorStoreOptions;
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
import org.springframework.boot.context.properties.EnableConfigurationProperties;
import org.springframework.context.annotation.Bean;
@ -13,18 +15,20 @@ import org.springframework.util.Assert;
@EnableConfigurationProperties(EmbeddingProperties.class)
public class ElasticsearchVectorStoreConfig {
@Value("${embedding.url}")
private String url;
@Bean
// @ConditionalOnProperty(prefix = "embedding", name = "url")
public VectorEmbeddingClient vectorEmbeddingClient(EmbeddingProperties embeddingProperties) {
@ConditionalOnProperty(prefix = "embedding", name = "url")
public EmbeddingModel embeddingModel(EmbeddingProperties embeddingProperties) {
Assert.notNull(embeddingProperties.getUrl(), "配置文件embedding:url未找到");
return new VectorEmbeddingClient(embeddingProperties.getUrl());
return new VectorEmbeddingModel(embeddingProperties.getUrl());
}
@Bean
public ElasticsearchVectorStore vectorStore(VectorEmbeddingClient embeddingModel, RestClient restClient) {
return new ElasticsearchVectorStore(restClient, embeddingModel);
@ConditionalOnProperty(prefix = "embedding", name = "url")
public ElasticsearchVectorStore vectorStore(ElasticsearchVectorStoreProperties properties,EmbeddingModel embeddingModel, RestClient restClient) {
ElasticsearchVectorStoreOptions options = new ElasticsearchVectorStoreOptions();
options.setIndexName(properties.getIndexName());
options.setDimensions(1024);
return new ElasticsearchVectorStore(options, restClient, embeddingModel, true);
}
}

@ -2,7 +2,6 @@ package com.supervision.knowsub.config;
import lombok.Data;
import org.springframework.boot.context.properties.ConfigurationProperties;
import org.springframework.boot.context.properties.EnableConfigurationProperties;
@Data
@ConfigurationProperties(prefix = "embedding")

@ -14,11 +14,11 @@ import java.util.Map;
import java.util.concurrent.atomic.AtomicInteger;
@Slf4j
public class VectorEmbeddingClient extends AbstractEmbeddingClient {
public class VectorEmbeddingModel implements EmbeddingModel {
private final String embeddingUrl;
public VectorEmbeddingClient(String embeddingUrl) {
public VectorEmbeddingModel(String embeddingUrl) {
this.embeddingUrl = embeddingUrl;
}

@ -0,0 +1,33 @@
package com.supervision.knowsub.controller;
import cn.hutool.json.JSONUtil;
import org.springframework.ai.document.Document;
import org.springframework.ai.vectorstore.ElasticsearchVectorStore;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import java.util.List;
@RestController
@RequestMapping("esTest")
public class EsTestController {
@Autowired
private ElasticsearchVectorStore elasticsearchVectorStore;
@GetMapping("add")
public void add() {
Document document = new Document("测试测试");
elasticsearchVectorStore.add(List.of(document));
}
@GetMapping("search")
public void search(String question) {
List<Document> result = elasticsearchVectorStore.similaritySearch(question);
for (Document document : result) {
System.out.println(document.getContent());
}
}
}

@ -0,0 +1,26 @@
package com.supervision.knowsub.controller;
import com.supervision.knowsub.service.RagService;
import com.supervision.knowsub.vo.RagResVO;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
import lombok.RequiredArgsConstructor;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
@Tag(name = "RAG问答")
@RestController
@RequestMapping("rag")
@RequiredArgsConstructor
public class RagController {
private final RagService ragService;
@Operation(summary = "问答")
@GetMapping("esAsk")
public RagResVO esAsk(String question) {
return ragService.esAsk(question);
}
}

@ -0,0 +1,41 @@
package com.supervision.knowsub.controller;
import cn.hutool.json.JSONUtil;
import cn.hutool.poi.excel.ExcelReader;
import cn.hutool.poi.excel.ExcelUtil;
import cn.hutool.poi.excel.ExcelWriter;
import com.supervision.knowsub.service.RagService;
import com.supervision.knowsub.vo.RagResVO;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import java.util.List;
@RestController
@RequestMapping("test")
@Slf4j
@RequiredArgsConstructor
public class TestController {
private final RagService ragService;
@GetMapping("esTest")
public void esTest(){
ExcelReader reader = ExcelUtil.getReader("/Users/flevance/Desktop/深圳人社POC/question.xlsx","Sheet2");
List<Object> objects = reader.readColumn(3, 1);
ExcelWriter writer = reader.getWriter();
for (int i = 0; i < objects.size(); i++) {
RagResVO ask = ragService.esAsk(objects.get(i).toString());
writer.writeCellValue(5, i + 1, ask.getAnswer());
writer.writeCellValue(6, i + 1, JSONUtil.toJsonStr(ask.getFileName()));
log.info("第{}条数据写入成功,剩余{}条", i + 1, objects.size() - i - 1);
}
writer.flush();
}
}

@ -0,0 +1,9 @@
package com.supervision.knowsub.service;
import com.supervision.knowsub.vo.RagResVO;
public interface RagService {
RagResVO esAsk(String question);
}

@ -0,0 +1,116 @@
package com.supervision.knowsub.service.impl;
import cn.hutool.core.util.StrUtil;
import com.supervision.knowsub.service.RagService;
import com.supervision.knowsub.vo.RagResVO;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.ai.chat.messages.Message;
import org.springframework.ai.chat.messages.UserMessage;
import org.springframework.ai.chat.model.ChatResponse;
import org.springframework.ai.chat.prompt.Prompt;
import org.springframework.ai.chat.prompt.SystemPromptTemplate;
import org.springframework.ai.document.Document;
import org.springframework.ai.ollama.OllamaChatModel;
import org.springframework.ai.vectorstore.ElasticsearchVectorStore;
import org.springframework.ai.vectorstore.SearchRequest;
import org.springframework.stereotype.Service;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
@Slf4j
@Service
@RequiredArgsConstructor
public class RagServiceImpl implements RagService {
private final ElasticsearchVectorStore elasticsearchVectorStore;
// private final OllamaChatClient chatClient ;
private final OllamaChatModel ollamaChatModel;
private static final String springDemoSystemPrompt = """
使
:
<context>{context}</context>
""";
private static final String systemPrompt = """
使"我不知道"
!"感谢你的提问".
::?:
:
<context>{context}</context>
""";
public static final String systemPrompt1 = """
,:"请注意,具体的政策和流程可能会有所变化,因此建议您咨询当地的人力资源和社会保障部门或访问官方网站以获取最新信息。"!
"根据您提供的信息"!
,:
<context>{context}</context>
""";
public static final String langChainChatPrompt = """
"根据已知信息无法回答该问题"
!!!
使!
<>{context}</>
<>{question}</>
""";
@Override
public RagResVO esAsk(String question) {
log.info("检索相关文档");
List<Document> similarDocuments = elasticsearchVectorStore.similaritySearch(SearchRequest.query(question).withTopK(10));
Set<String> fileNameList = new HashSet<>();
for (Document similarDocument : similarDocuments) {
fileNameList.add(String.valueOf(similarDocument.getMetadata().get("fileName")));
}
log.info("找到:{}条相关文档", similarDocuments.size());
// 构建系统消息
String relevantDocument = similarDocuments.stream().map(Document::getContent).collect(Collectors.joining("\n"));
SystemPromptTemplate systemPromptTemplate = new SystemPromptTemplate(systemPrompt1);
Message systemMessage = systemPromptTemplate.createMessage(Map.of("context", relevantDocument));
// 构建用户消息
UserMessage userMessage = new UserMessage(question);
Prompt prompt = new Prompt(List.of(systemMessage, userMessage));
// 构建系统消息
// String relevantDocument = similarDocuments.stream().map(Document::getContent).collect(Collectors.joining("\n"));
// String format = StrUtil.format(langChainChatPrompt, Map.of("context", relevantDocument, "question", question));
//
// Prompt prompt = new Prompt(new UserMessage(format));
log.info("开始询问GPT问题");
ChatResponse call = ollamaChatModel.call(prompt);
log.info("AI responded.");
RagResVO ragResVO = new RagResVO();
ragResVO.setAnswer(call.getResult().getOutput().getContent());
ragResVO.setFileName(fileNameList);
return ragResVO;
}
}

@ -0,0 +1,94 @@
package com.supervision.knowsub.util;
import lombok.extern.slf4j.Slf4j;
@Slf4j
public class AiChatUtil {
// private static final ExecutorService chatExecutor = ThreadUtil.newFixedExecutor(5, 5, "chat", new ThreadPoolExecutor.CallerRunsPolicy());
//
// private static final OllamaChatClient chatClient = SpringBeanUtil.getBean(OllamaChatClient.class);
//
// /**
// * 单轮对话
// *
// * @param chat 对话的内容
// * @return jsonObject
// */
// public static Optional<JSONObject> chat(String chat) {
// Prompt prompt = new Prompt(List.of(new UserMessage(chat)));
// Future<String> submit = chatExecutor.submit(new ChatTask(chatClient, prompt));
// try {
// return Optional.of(JSONUtil.parseObj(submit.get()));
// } catch (ExecutionException | InterruptedException e) {
// log.error("调用大模型生成失败");
// }
// return Optional.empty();
// }
//
// /**
// * 支持多轮对话,自定义消息
// *
// * @param messageList 消息列表
// * @return jsonObject
// */
// public static Optional<JSONObject> chat(List<Message> messageList) {
// Prompt prompt = new Prompt(messageList);
// Future<String> submit = chatExecutor.submit(new ChatTask(chatClient, prompt));
// try {
// return Optional.of(JSONUtil.parseObj(submit.get()));
// } catch (ExecutionException | InterruptedException e) {
// log.error("调用大模型生成失败");
// }
// return Optional.empty();
// }
//
// /**
// * 支持序列化的方式
// *
// * @param messageList 消息列表
// * @param clazz 需要序列化的对象
// * @param <T> 需要序列化的对象的泛型
// * @return 对应对象类型, 不支持列表类型
// */
// public static <T> Optional<T> chat(List<Message> messageList, Class<T> clazz) {
// Prompt prompt = new Prompt(messageList);
// Future<String> submit = chatExecutor.submit(new ChatTask(chatClient, prompt));
// try {
// String s = submit.get();
// return Optional.ofNullable(JSONUtil.toBean(s, clazz));
// } catch (ExecutionException | InterruptedException e) {
// log.error("调用大模型生成失败", e);
// }
// return Optional.empty();
// }
//
// /**
// * 支持序列化的方式的对话
// *
// * @param chat 对话的消息
// * @param clazz 需要序列化的对象
// * @param <T> 需要序列化的对象的泛型
// * @return 对应对象类型, 不支持列表类型
// */
// public static <T> Optional<T> chat(String chat, Class<T> clazz) {
// Prompt prompt = new Prompt(List.of(new UserMessage(chat)));
// Future<String> submit = chatExecutor.submit(new ChatTask(chatClient, prompt));
// try {
// String s = submit.get();
// return Optional.ofNullable(JSONUtil.toBean(s, clazz));
// } catch (ExecutionException | InterruptedException e) {
// log.error("调用大模型生成失败");
// }
// return Optional.empty();
// }
//
// private record ChatTask(OllamaChatClient chatClient, Prompt prompt) implements Callable<String> {
// @Override
// public String call() {
// ChatResponse call = chatClient.call(prompt);
// return call.getResult().getOutput().getContent();
// }
// }
}

@ -0,0 +1,14 @@
package com.supervision.knowsub.vo;
import lombok.Data;
import java.util.List;
import java.util.Set;
@Data
public class RagResVO {
private String answer;
private Set<String> fileName;
}

@ -1,6 +1,107 @@
#服务器端口
server:
port: 9203
servlet:
context-path: /know-sub-rag
undertow:
# HTTP post内容的最大大小。当值为-1时默认值为大小是无限的
max-http-post-size: -1
# 以下的配置会影响buffer,这些buffer会用于服务器连接的IO操作,有点类似netty的池化内存管理
# 每块buffer的空间大小,越小的空间被利用越充分
buffer-size: 512
# 是否分配的直接内存
direct-buffers: true
spring:
elasticsearch:
uris: http://192.168.10.137:9200
ai:
ollama:
base-url: http://192.168.10.70:11434
chat:
enabled: true
options:
model: llama3-chinese:8b
keep-alive: 1000m
temperature: 0.1
vectorstore:
elasticsearch:
index-name: know-sub-rag-store
main:
allow-bean-definition-overriding: true
servlet:
multipart:
max-file-size: 100MB
max-request-size: 100MB
## 数据源配置
datasource:
type: com.alibaba.druid.pool.DruidDataSource
druid:
driver-class-name: com.mysql.cj.jdbc.Driver
url: jdbc:mysql://192.168.10.137:3306/know_sub?useUnicode=true&characterEncoding=utf-8&useSSL=true&nullCatalogMeansCurrent=true&serverTimezone=GMT%2B8
username: root
password: '123456'
initial-size: 5 # 初始化大小
min-idle: 10 # 最小连接数
max-active: 20 # 最大连接数
max-wait: 60000 # 获取连接时的最大等待时间
min-evictable-idle-time-millis: 300000 # 一个连接在池中最小生存的时间,单位是毫秒
time-between-eviction-runs-millis: 60000 # 多久才进行一次检测需要关闭的空闲连接,单位是毫秒
filters: stat,wall # 配置扩展插件stat-监控统计log4j-日志wall-防火墙防止SQL注入去掉后监控界面的sql无法统计
validation-query: SELECT 1 # 检测连接是否有效的 SQL语句为空时以下三个配置均无效
test-on-borrow: true # 申请连接时执行validationQuery检测连接是否有效默认true开启后会降低性能
test-on-return: true # 归还连接时执行validationQuery检测连接是否有效默认false开启后会降低性能
test-while-idle: true # 申请连接时如果空闲时间大于timeBetweenEvictionRunsMillis执行validationQuery检测连接是否有效默认false建议开启不影响性能
stat-view-servlet:
enabled: true # 是否开启 StatViewServlet
loginUsername: admin
loginPassword: 123456
filter:
stat:
enabled: true # 是否开启 FilterStat默认true
log-slow-sql: true # 是否开启 慢SQL 记录默认false
slow-sql-millis: 5000 # 慢 SQL 的标准,默认 3000单位毫秒
merge-sql: false # 合并多个连接池的监控数据默认false
mybatis-plus:
mapper-locations: classpath*:mapper/**/*.xml
configuration:
log-impl: org.apache.ibatis.logging.stdout.StdOutImpl
# springdoc-openapi项目配置
springdoc:
# 默认是false需要设置为true 解决GET请求时,用自定义对象接受现实错误的问题
default-flat-param-object: true
swagger-ui:
path: /swagger-ui.html
tags-sorter: alpha
operations-sorter: alpha
api-docs:
path: /v3/api-docs
group-configs:
- group: 'default'
paths-to-match: '/**'
packages-to-scan: com.supervision
# knife4j的增强配置不需要增强可以不配
knife4j:
enable: true
setting:
language: zh_cn
user:
# 默认密码
default:
password: 123456
#spring:
# elasticsearch:
# uris: http://192.168.10.137:9200
embedding:
url: http://192.168.10.137:8711/embeddings/
url: http://192.168.10.137:8711/embeddings/
vector:
redis:
uri: redis://:123456@192.168.10.137:6380
indexName: 'know-sub-rag-store'
prefix: 'know-sub-rag-store:'

@ -19,7 +19,8 @@
<module>know_sub_common</module>
<module>know_sub_business</module>
<module>know_sub_model</module>
<!-- <module>know_sub_rag</module>-->
<module>know_sub_rag</module>
<module>know_sub_etl</module>
</modules>
@ -44,6 +45,7 @@
<dependencyManagement>
<dependencies>
<dependency>
<groupId>io.springboot.ai</groupId>
<artifactId>spring-ai-bom</artifactId>
@ -52,6 +54,14 @@
<scope>import</scope>
</dependency>
<dependency>
<groupId>org.springframework.ai</groupId>
<artifactId>spring-ai-bom</artifactId>
<version>1.0.0-M1</version>
<type>pom</type>
<scope>import</scope>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
@ -112,4 +122,37 @@
</dependencies>
</dependencyManagement>
<repositories>
<repository>
<id>central</id>
<name>aliyun central repo</name>
<url>https://maven.aliyun.com/nexus/content/repositories/central/</url>
<layout>default</layout>
<releases>
<enabled>true</enabled>
<updatePolicy>daily</updatePolicy>
</releases>
<snapshots>
<enabled>false</enabled>
<updatePolicy>never</updatePolicy>
</snapshots>
</repository>
<repository>
<id>spring-snapshots</id>
<name>Spring Snapshots</name>
<url>https://repo.spring.io/snapshot</url>
<snapshots>
<enabled>false</enabled>
</snapshots>
</repository>
<repository>
<id>spring-milestones</id>
<name>Spring Milestones</name>
<url>https://repo.spring.io/milestone</url>
<snapshots>
<enabled>false</enabled>
</snapshots>
</repository>
</repositories>
</project>

Loading…
Cancel
Save