From da52d5ced414a6f83ce4e1534cb765813dfd42e4 Mon Sep 17 00:00:00 2001 From: xueqingkun Date: Fri, 24 May 2024 15:01:32 +0800 Subject: [PATCH] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E6=B5=8B=E8=AF=95=E7=B1=BB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../test/java/com/supervision/VecTest.java | 340 +++++++++++++++++- 1 file changed, 334 insertions(+), 6 deletions(-) diff --git a/virtual-patient-web/src/test/java/com/supervision/VecTest.java b/virtual-patient-web/src/test/java/com/supervision/VecTest.java index 6a5ca53e..57b8079b 100644 --- a/virtual-patient-web/src/test/java/com/supervision/VecTest.java +++ b/virtual-patient-web/src/test/java/com/supervision/VecTest.java @@ -13,6 +13,7 @@ import cn.hutool.json.JSONObject; import cn.hutool.json.JSONUtil; import cn.hutool.poi.excel.ExcelReader; import cn.hutool.poi.excel.ExcelUtil; +import cn.hutool.poi.excel.ExcelWriter; import com.supervision.model.AskTemplateQuestionLibrary; import com.supervision.model.CommonDic; import com.supervision.service.AskTemplateQuestionLibraryService; @@ -24,9 +25,7 @@ import org.springframework.beans.factory.annotation.Autowired; import org.springframework.boot.test.context.SpringBootTest; import org.springframework.test.context.junit4.SpringJUnit4ClassRunner; -import java.util.HashMap; -import java.util.List; -import java.util.Map; +import java.util.*; import java.util.stream.Collectors; @Slf4j @@ -42,9 +41,9 @@ public class VecTest { private CommonDicService commonDicService; - private static final String BASE_URL = "http://192.168.10.137:8711"; + private static final String BASE_URL = "http://192.168.10.137:8719"; @Test - public void mainTest() throws Exception { + public void questionCompareGenerateTest() { String filePath = "F:\\tmp\\1\\副本医生临床问诊问题收集v1.xlsx"; ExcelReader reader = ExcelUtil.getReader(filePath,1); @@ -94,11 +93,340 @@ public class VecTest { } log.info("开始把结果数据写入表格"); - ExcelUtil.getWriter(filePath,"结果对比").write(dataList).flush() ; + ExcelUtil.getWriter(filePath,"结果对比-2").write(dataList).flush() ; log.info("结束把结果数据写入表格"); } + + @Test + public void questionEqualGenerateTest() { + + List libraries = askTemplateQuestionLibraryService.list() + .stream().peek(library -> library.getQuestion().forEach(StrUtil::trim)) + .collect(Collectors.toList()); + + List commonDics = commonDicService.lambdaQuery().eq(CommonDic::getGroupCode, "AQT").list(); + Map dicMap = commonDics.stream().collect(Collectors.toMap(CommonDic::getId, v -> v)); + + // 构建问题map id -->> question --->> library + Map> questionMap = new HashMap<>(); + for (AskTemplateQuestionLibrary library : libraries) { + Map map = library.getQuestion().stream().collect(Collectors.toMap(v -> v, v -> library)); + questionMap.put(library.getId(), map); + } + + // 循环处理数据 + List> result = new ArrayList<>(); + for (AskTemplateQuestionLibrary library : libraries) { + List questionList = library.getQuestion(); + for (String sourceQuestion : questionList) { + //确认同组数据中是否存在相同的问题 + long count = questionList.stream().filter(v -> StrUtil.equals(sourceQuestion, v)).count(); + if (count > 1 ){ + Map map = new HashMap<>(); + map.put("sourceId",library.getId()); + map.put("sourceDesc",library.getDescription()); + map.put("sourceQuestion",sourceQuestion); + map.put("sourceQuestionCode",library.getCode()); + map.put("sourceDicPath",dicMap.get(library.getDictId()).getNameZhPath()); + map.put("targetId",library.getId()); + map.put("targetDesc",library.getDescription()); + map.put("targetQuestion",sourceQuestion); + map.put("sameSource","是"); + map.put("targetDicPath",dicMap.get(library.getDictId()).getNameZhPath()); + result.add(map); + } + + // 确认是否存在重复的问题数据 + for (Map.Entry> firstEntry : questionMap.entrySet()) { + if (StrUtil.equals(library.getId(), firstEntry.getKey())){ + continue; + } + Map questionLibraryMap = firstEntry.getValue(); + for (Map.Entry secondEntry : questionLibraryMap.entrySet()) { + String targetQuestion = secondEntry.getKey(); + AskTemplateQuestionLibrary targetLibrary = secondEntry.getValue(); + if (StrUtil.equals(sourceQuestion, targetQuestion)){ + HashMap map = new HashMap<>(); + map.put("sourceId",library.getId()); + map.put("sourceDesc",library.getDescription()); + map.put("sourceQuestion",sourceQuestion); + map.put("sourceDicPath",dicMap.get(library.getDictId()).getNameZhPath()); + map.put("targetId",targetLibrary.getId()); + map.put("targetDesc",targetLibrary.getDescription()); + map.put("targetQuestion",targetQuestion); + // 如果id相等则是同源数据 + map.put("sameSource","否"); + map.put("targetDicPath",dicMap.get(targetLibrary.getDictId()).getNameZhPath()); + result.add(map); + } + } + } + } + } + + // 写出数据 + String filePath = "F:\\tmp\\1\\问题库问题对比.xlsx"; + ExcelWriter writer = ExcelUtil.getWriter(filePath, "完全相等对比"); + writer.addHeaderAlias("sourceId", "源问题库id"); + writer.addHeaderAlias("sourceDesc", "源问题库描述"); + writer.addHeaderAlias("sourceQuestion", "源问题"); + writer.addHeaderAlias("sourceDicPath", "源分类"); + writer.addHeaderAlias("targetId", "目标问题库id"); + writer.addHeaderAlias("targetDesc", "目标问题库描述"); + writer.addHeaderAlias("targetQuestion", "目标问题"); + writer.addHeaderAlias("sameSource", "是否同源"); + writer.addHeaderAlias("targetDicPath", "目标分类"); + writer.write(result).flush(); + + } + + @Test + public void questionSimilarityGenerateTest() { + + List questionLibraries = askTemplateQuestionLibraryService.list(); + List commonDics = commonDicService.lambdaQuery().eq(CommonDic::getGroupCode, "AQT").list(); + Map dicMap = commonDics.stream().collect(Collectors.toMap(CommonDic::getId, v -> v)); + + Map libraryMap = questionLibraries.stream().collect(Collectors.toMap(AskTemplateQuestionLibrary::getId, library -> library)); + // compare + + List> result = new ArrayList<>(); + for (AskTemplateQuestionLibrary questionLibrary : questionLibraries) { + List questionList = questionLibrary.getQuestion(); + for (String question : questionList) { + List> maps = questionMatch(question); + if (CollUtil.isEmpty(maps)){ + log.warn("questionMatch:问题:{}没有匹配到结果", question); + continue; + } + for (Map map : maps) { + String targetId = MapUtil.getStr(map, "matchQuestionCode"); + if (!StrUtil.equals(questionLibrary.getId(),targetId)){ + HashMap map1 = new HashMap<>(); + map1.put("sourceId",questionLibrary.getId()); + map1.put("sourceDesc",questionLibrary.getDescription()); + map1.put("sourceQuestion",question); + CommonDic commonDic = dicMap.get(questionLibrary.getDictId()); + map1.put("sourceDicPath", commonDic.getNameZhPath()); + map1.put("targetId",targetId); + AskTemplateQuestionLibrary targetLibrary = libraryMap.get(targetId); + map1.put("targetDesc",targetLibrary.getDescription()); + map1.put("targetQuestion",MapUtil.getStr(map, "matchQuestion")); + map1.put("matchScore",MapUtil.getStr(map, "matchScore")); + map1.put("targetDicPath",dicMap.get(targetLibrary.getDictId()).getNameZhPath()); + result.add(map1); + } + } + } + } + + String filePath = "F:\\tmp\\1\\问题库问题对比.xlsx"; + ExcelWriter writer = ExcelUtil.getWriter(filePath, "向量相似度对比"); + writer.addHeaderAlias("sourceId", "源问题id"); + writer.addHeaderAlias("sourceDesc", "源问题描述"); + writer.addHeaderAlias("sourceQuestion", "源问题"); + writer.addHeaderAlias("sourceDicPath", "源分类"); + writer.addHeaderAlias("targetId", "目标问题"); + writer.addHeaderAlias("targetDesc", "目标问题描述"); + writer.addHeaderAlias("targetQuestion", "目标问题"); + writer.addHeaderAlias("matchScore", "相似度"); + writer.addHeaderAlias("targetDicPath", "目标分类"); + writer.write(result,true); + writer.close(); + + } + + + @Test + public void questionSimilarityTopGenerateTest() { + List questionLibraries = askTemplateQuestionLibraryService.list(); + List commonDics = commonDicService.lambdaQuery().eq(CommonDic::getGroupCode, "AQT").list(); + Map dicMap = commonDics.stream().collect(Collectors.toMap(CommonDic::getId, v -> v)); + Map libraryMap = questionLibraries.stream().collect(Collectors.toMap(AskTemplateQuestionLibrary::getId, library -> library)); + + List> result = new ArrayList<>(); + for (AskTemplateQuestionLibrary questionLibrary : questionLibraries) { + for (String question : questionLibrary.getQuestion()) { + + List> maps = questionMatch(question); + // 只获取前四条数据 + if (CollUtil.isEmpty(maps)){ + log.warn("questionMatch:问题:{}没有匹配到结果", question); + continue; + } + + if (maps.size() > 3){ + // 截取前四条数据 + maps = maps.subList(0, 4); + } + + int groupRank = 1; + for (Map matchMap : maps) { + Map map = new HashMap<>(); + String targetId = MapUtil.getStr(matchMap, "matchQuestionCode"); + String matchQuestion = MapUtil.getStr(matchMap, "matchQuestion"); + String matchScore = MapUtil.getStr(matchMap, "matchScore"); + map.put("sourceId", questionLibrary.getId()); + map.put("sourceDesc", questionLibrary.getDescription()); + map.put("sourceQuestion",question); + String sourceNamePath = dicMap.get(questionLibrary.getDictId()).getNameZhPath(); + map.put("sourceDicPath", sourceNamePath); + + map.put("targetId",targetId); + AskTemplateQuestionLibrary targetLibrary = libraryMap.get(targetId); + map.put("targetDesc",targetLibrary.getDescription()); + map.put("targetQuestion",matchQuestion); + String targetNamePath = dicMap.get(targetLibrary.getDictId()).getNameZhPath(); + map.put("targetDicPath",targetNamePath); + map.put("matchScore",matchScore); + + // 问题是否相等 + map.put("questionEq",StrUtil.equals(question,matchQuestion) ? "是" : "否"); + // 分类是否相等 + map.put("questionDicEq",StrUtil.equals(sourceNamePath,targetNamePath) ? "是" : "否"); + + map.put("groupRank",groupRank); + groupRank ++; + result.add(map); + } + + } + String filePath = "F:\\tmp\\1\\问题库问题对比.xlsx"; + ExcelWriter writer = ExcelUtil.getWriter(filePath, "多维度向量相似度对比"); + writer.addHeaderAlias("sourceId", "源问题id"); + writer.addHeaderAlias("sourceDesc", "源问题描述"); + writer.addHeaderAlias("sourceQuestion", "源问题"); + writer.addHeaderAlias("sourceDicPath", "源分类"); + writer.addHeaderAlias("targetId", "目标问题"); + writer.addHeaderAlias("targetDesc", "目标问题描述"); + writer.addHeaderAlias("targetQuestion", "目标问题"); + writer.addHeaderAlias("matchScore", "相似度"); + writer.addHeaderAlias("targetDicPath", "目标分类"); + writer.addHeaderAlias("questionEq", "同一语句"); + writer.addHeaderAlias("questionDicEq", "同一分类"); + writer.addHeaderAlias("groupRank", "排序"); + writer.write(result,true); + writer.close(); + } + } + + + @Test + public void questionCaseGenerateTest() { + + + List questionLibraries = askTemplateQuestionLibraryService.list(); + List commonDics = commonDicService.lambdaQuery().eq(CommonDic::getGroupCode, "AQT").list(); + Map dicMap = commonDics.stream().collect(Collectors.toMap(CommonDic::getId, v -> v)); + Map libraryMap = questionLibraries.stream().collect(Collectors.toMap(AskTemplateQuestionLibrary::getId, library -> library)); + + String path = "F:\\tmp\\1\\门诊收集0524.xlsx"; + + ExcelReader reader = ExcelUtil.getReader(path); + List> maps = reader.readAll(); + + for (Map map : maps) { + String question = MapUtil.getStr(map, "日常问诊"); + List> matchMapList = questionMatch(question); + if (CollUtil.isEmpty(matchMapList)) { + log.warn("questionMatch:问题:{}没有匹配到结果", question); + continue; + } + Map first = CollUtil.getFirst(matchMapList); + String matchQuestion = MapUtil.getStr(first, "matchQuestion"); + String score = MapUtil.getStr(first, "matchScore"); + String targetId = MapUtil.getStr(first, "matchQuestionCode"); + map.put("targetId", targetId); + map.put("targetQuestion", matchQuestion); + AskTemplateQuestionLibrary targetLibrary = libraryMap.get(targetId); + map.put("targetDesc", targetLibrary.getDescription()); + Long dictId = targetLibrary.getDictId(); + map.put("targetDicPath", dicMap.get(dictId).getNameZhPath()); + map.put("matchScore", score); + } + + ExcelWriter writer = ExcelUtil.getWriter(path, "问询结果"); + writer.addHeaderAlias("病例", "病例"); + writer.addHeaderAlias("日常问诊", "日常问诊"); + writer.addHeaderAlias("targetId", "目标问题"); + writer.addHeaderAlias("targetDesc", "目标问题描述"); + writer.addHeaderAlias("targetQuestion", "目标问题"); + writer.addHeaderAlias("matchScore", "相似度"); + writer.addHeaderAlias("targetDicPath", "目标分类"); + writer.write(maps,true); + writer.close(); + } + + private List> questionMatch(String question){ + Assert.notEmpty(question, "问题不能为空"); + + Map params = new HashMap<>(); + params.put("question",question); + HttpRequest request = HttpRequest.post(BASE_URL + "/matchQuestion") + .body(JSONUtil.toJsonStr(params)); + log.info("askMatch:question:{}", question); + try (HttpResponse response = request.execute()){ + String body = response.body(); + log.info("askMatch:响应结果:{}", body); + JSONObject entries = JSONUtil.parseObj(body); + if (entries.get("code",Integer.class) != 200){ + log.warn("askMatch:匹配失败"); + return null; + } + JSONArray jsonArray = JSONUtil.parseArray(entries.get("data")); + if (CollUtil.isNotEmpty(jsonArray)){ + ArrayList> maps = new ArrayList<>(); + for (Object o : jsonArray) { + maps.add(JSONUtil.toBean(JSONUtil.toJsonStr(o), Map.class)); + } + return maps; + } + }catch (Exception e){ + log.error("chat:请求失败", e); + } + + log.warn("askMatch:匹配失败"); + return null; + } + + + @Test + public void initVecData() { + List questionLibraries = askTemplateQuestionLibraryService.list(); + + // load vec data + List> vecData = questionLibraries.stream().map(library -> { + Map map = new HashMap<>(); + map.put("questionCode", library.getId()); + map.put("questionList", library.getQuestion()); + return map; + }).collect(Collectors.toList()); + saveVec(vecData); + } + + private void saveVec(List> List){ + + HttpRequest request = HttpRequest.post(BASE_URL + "/updateDatabase") + .body(JSONUtil.toJsonStr(List)); + + log.info("saveVec:请求参数:{}", JSONUtil.toJsonStr(List)); + + try (HttpResponse response = request.execute()){ + String body = response.body(); + log.info("saveVec:响应结果:{}", body); + JSONObject entries = JSONUtil.parseObj(body); + if (!"success".equals(entries.get("status",String.class))){ + log.warn("saveVec:保存失败"); + throw new RuntimeException("saveVec:保存失败"); + } + }catch (Exception e){ + log.error("saveVec:保存失败", e); + throw new RuntimeException("saveVec:保存失败"); + } + } + private String askMatch(String question){ Assert.notEmpty(question, "问题不能为空");