diff --git a/.gitignore b/.gitignore index 5d381cc..d746071 100644 --- a/.gitignore +++ b/.gitignore @@ -160,3 +160,4 @@ cython_debug/ # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ +/.idea diff --git a/.idea/.gitignore b/.idea/.gitignore deleted file mode 100644 index 35410ca..0000000 --- a/.idea/.gitignore +++ /dev/null @@ -1,8 +0,0 @@ -# 默认忽略的文件 -/shelf/ -/workspace.xml -# 基于编辑器的 HTTP 客户端请求 -/httpRequests/ -# Datasource local storage ignored files -/dataSources/ -/dataSources.local.xml diff --git a/.idea/deployment.xml b/.idea/deployment.xml deleted file mode 100644 index 0fb5507..0000000 --- a/.idea/deployment.xml +++ /dev/null @@ -1,56 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml deleted file mode 100644 index 3f552f9..0000000 --- a/.idea/inspectionProfiles/Project_Default.xml +++ /dev/null @@ -1,28 +0,0 @@ - - - - \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml deleted file mode 100644 index 105ce2d..0000000 --- a/.idea/inspectionProfiles/profiles_settings.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - - \ No newline at end of file diff --git a/.idea/interro_robot_tool.iml b/.idea/interro_robot_tool.iml deleted file mode 100644 index 0071ccf..0000000 --- a/.idea/interro_robot_tool.iml +++ /dev/null @@ -1,12 +0,0 @@ - - - - - - - - - - \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml deleted file mode 100644 index f3bddf6..0000000 --- a/.idea/misc.xml +++ /dev/null @@ -1,4 +0,0 @@ - - - - \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml deleted file mode 100644 index d8eea58..0000000 --- a/.idea/modules.xml +++ /dev/null @@ -1,8 +0,0 @@ - - - - - - - - \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml deleted file mode 100644 index 35eb1dd..0000000 --- a/.idea/vcs.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - - - - \ No newline at end of file diff --git a/config/knowledge_base_names.txt b/config/knowledge_base_names.txt index 0f144fb..45c521e 100644 --- a/config/knowledge_base_names.txt +++ b/config/knowledge_base_names.txt @@ -1,2 +1,2 @@ -a0f9848b-2d65-4b37-85ca-6712061f01c0 -38de6667-4f5d-4f0a-8165-992ab76c1424 \ No newline at end of file +f5361731-865c-4c36-90a5-70499c207562 +2d5cdfb8-b1ec-4e29-9e0d-45bfd48afedf \ No newline at end of file diff --git a/faiss_cache.py b/faiss_cache.py index a62bf3b..7e61488 100644 --- a/faiss_cache.py +++ b/faiss_cache.py @@ -101,7 +101,7 @@ class KBFaissPool(_FaissPool): if os.path.isfile(os.path.join(vs_path, "index.faiss")): # load the embedding model embeddings = self.load_kb_embeddings(local_model_path=embed_local_model_path, embed_device=embed_device) - vector_store = FAISS.load_local(vs_path, embeddings, normalize_L2=True,distance_strategy="METRIC_INNER_PRODUCT") + vector_store = FAISS.load_local(vs_path, embeddings, normalize_L2=True,distance_strategy="METRIC_INNER_PRODUCT", allow_dangerous_deserialization=True) elif create: # create an empty vector store diff --git a/fast_api.py b/fast_api.py index d0949e1..5c7224c 100644 --- a/fast_api.py +++ b/fast_api.py @@ -1,53 +1,58 @@ -from fastapi import FastAPI, HTTPException, BackgroundTasks -from qa_Ask import QAService, match_query, store_data -from pydantic import BaseModel -from collections import deque -import requests +# coding=gbk +import yaml +import sys import os import time import uuid import json import shutil -import yaml import logging +from collections import deque +from pydantic import BaseModel +from fastapi import BackgroundTasks +from fastapi import FastAPI, HTTPException +from qa_Ask import QAService, match_query, store_data app = FastAPI() -import sys - -# 配置日志记录到文件和终端 +# ־¼ļն logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', handlers=[ logging.FileHandler('log/app.log'), - logging.StreamHandler(sys.stdout) # 添加控制台处理程序 + logging.StreamHandler(sys.stdout) # ӿ̨ ] ) logger = logging.getLogger(__name__) + class QuestionRequest(BaseModel): question: str scoreThreshold: float + class QuestionResponse(BaseModel): code: int msg: str data: list + class QuestionItem(BaseModel): questionId: str questionList: list[str] + class InputText(BaseModel): inputText: str + class ExtractedInfo(BaseModel): name: str cardNumber: str - idNnumber: str + idNumber: str + -# 读取配置文件 with open('config/config.yaml', 'r') as config_file: config_data = yaml.safe_load(config_file) @@ -56,21 +61,24 @@ api_url = config_data['api']['url'] path = config_data['output_file_path'] max_knowledge_bases = config_data['max_knowledge_bases'] + def load_knowledge_bases(): - """加载知识库名称列表""" + """֪ʶб""" if os.path.exists(knowledge_base_file): with open(knowledge_base_file, "r") as file: return file.read().splitlines() else: return [] + def save_knowledge_bases(names): - """保存知识库名称列表到文件""" + """֪ʶбļ""" with open(knowledge_base_file, "w") as file: file.write("\n".join(names)) + def update_kb(kb_name, qa_service, path, max_knowledge_bases): - """更新知识库""" + """֪ʶ""" store_data(qa_service, path) if len(recent_knowledge_bases) == max_knowledge_bases: @@ -82,19 +90,21 @@ def update_kb(kb_name, qa_service, path, max_knowledge_bases): os.remove(path) logger.info(f"Knowledge base updated: {kb_name}\n" - f"Please wait while the database is being updated···") + f"Please wait while the database is being updated") + recent_knowledge_bases = deque(load_knowledge_bases(), maxlen=max_knowledge_bases) + def text_to_number(text_id): - chinese_nums = {'零': '0', '一': '1', '二': '2', '三': '3', '四': '4', '五': '5', '六': '6', '七': '7', '八': '8', '九': '9'} - for chinese_num, arabic_num in chinese_nums.items(): - text_id = text_id.replace(chinese_num, arabic_num) - return text_id + chinese_nums = {'': '0', 'һ': '1', '': '2', '': '3', '': '4', '': '5', '': '6', '': '7', '': '8', '': '9'} + translation_table = str.maketrans(chinese_nums) + return text_id.translate(translation_table) + @app.post("/updateDatabase") async def save_to_json(question_items: list[QuestionItem], background_tasks: BackgroundTasks): - """接收问题数据并异步保存为JSON文件,触发后台更新任务""" + """ݲ첽ΪJSONļ̨""" try: json_data = json.dumps([item.dict() for item in question_items], ensure_ascii=False, indent=2) path = "output.json" @@ -111,16 +121,17 @@ async def save_to_json(question_items: list[QuestionItem], background_tasks: Bac update_kb, kb_name, qa_service, path, max_knowledge_bases ) - return {"status": "success", "message": "Please wait while the database is being updated···"} + return {"status": "success", "message": "Please wait while the database is being updated"} except Exception as e: logger.error(f"Error saving data to file or scheduling knowledge base update task: {e}") # raise HTTPException(status_code=500, detail=f"Internal Server Error: {str(e)}") - return {"status": "error", "message": "update task error···"} + return {"status": "error", "message": "update task error"} + @app.post("/matchQuestion") def match_question(request: QuestionRequest): - """匹配问题的端点""" + """ƥĶ˵""" try: logger.info(f"match_question:Request: {request}") start_time = time.time() @@ -147,33 +158,40 @@ def match_question(request: QuestionRequest): logger.error(f"Error matching question: {e}") return QuestionResponse(code=500, msg="success", data=[]) + +from paddlenlp import Taskflow +corrector = Taskflow("text_correction") +schema = ["", '', '永Ա', "֤", "֤", "", "׿", "п", ] + +name = Taskflow('information_extraction', schema=schema[:2], model='uie-base') +identity = Taskflow('information_extraction', schema=schema[3:5], model='uie-base') +card = Taskflow('information_extraction', schema=schema[5:8], model='uie-base') + + @app.post("/extractInformation") async def extract_information(input_data: InputText): - """提取信息的端点""" + """ȡϢĶ˵""" try: - inputText = input_data.inputText - from paddlenlp import Taskflow - - corrector = Taskflow("text_correction") - data = corrector(inputText) + input_text = input_data.inputText + data = corrector(input_text) target_value = data[0]['target'] + converted_id = text_to_number(target_value + '') - converted_id = text_to_number(target_value) - - schema = ["姓名", '嫌疑人', '涉案人员', "身份证号", "交易证件号", "卡号", "交易卡号", "银行卡号", ] - ie = Taskflow('information_extraction', schema=schema, model='uie-base') - extracted_info = ie(converted_id) + extracted_info = {} + for model_name, model in zip(["name", "identity", "card"], [name, identity, card]): + extracted_info[model_name] = model(converted_id) result = {} - for item in extracted_info: - for key, value in item.items(): - result[key.lower()] = value[0]['text'] + for model_name, info_list in extracted_info.items(): + for item in info_list: + for key, value in item.items(): + result[key.lower()] = value[0]['text'] extracted_result = ExtractedInfo( - name=result.get('姓名', '') or result.get('嫌疑人', '') or result.get('涉案人员', ''), - cardNumber=result.get('卡号', '') or result.get('交易卡号', '') or result.get('银行卡号', ''), - idNnumber=result.get('身份证号', '') or result.get('交易证件号', '') or result.get('交易证件号', '') + name=result.get('', '') or result.get('', '') or result.get('永Ա', ''), + cardNumber=result.get('', '') or result.get('׿', '') or result.get('п', ''), + idNumber=result.get('֤', '') or result.get('֤', '') or result.get('֤', '') ) return extracted_result @@ -182,7 +200,9 @@ async def extract_information(input_data: InputText): logger.error(f"Error extracting information: {e}") raise HTTPException(status_code=500, detail="Internal Server Error") + if __name__ == "__main__": import uvicorn - uvicorn.run(app, host="0.0.0.0", port=8000) + uvicorn.run(app, host="0.0.0.0", port=8001) + diff --git a/knowledge_base/38de6667-4f5d-4f0a-8165-992ab76c1424/vector_store/FAISS/index.faiss b/knowledge_base/38de6667-4f5d-4f0a-8165-992ab76c1424/vector_store/FAISS/index.faiss deleted file mode 100644 index 11f3caf..0000000 Binary files a/knowledge_base/38de6667-4f5d-4f0a-8165-992ab76c1424/vector_store/FAISS/index.faiss and /dev/null differ diff --git a/knowledge_base/38de6667-4f5d-4f0a-8165-992ab76c1424/vector_store/FAISS/index.pkl b/knowledge_base/38de6667-4f5d-4f0a-8165-992ab76c1424/vector_store/FAISS/index.pkl deleted file mode 100644 index 0747df7..0000000 Binary files a/knowledge_base/38de6667-4f5d-4f0a-8165-992ab76c1424/vector_store/FAISS/index.pkl and /dev/null differ diff --git a/knowledge_base/3e6eef9a-7bea-4a35-8312-088a921cebac/vector_store/FAISS/index.faiss b/knowledge_base/3e6eef9a-7bea-4a35-8312-088a921cebac/vector_store/FAISS/index.faiss deleted file mode 100644 index f16c24d..0000000 Binary files a/knowledge_base/3e6eef9a-7bea-4a35-8312-088a921cebac/vector_store/FAISS/index.faiss and /dev/null differ diff --git a/knowledge_base/3e6eef9a-7bea-4a35-8312-088a921cebac/vector_store/FAISS/index.pkl b/knowledge_base/3e6eef9a-7bea-4a35-8312-088a921cebac/vector_store/FAISS/index.pkl deleted file mode 100644 index 0799a5e..0000000 Binary files a/knowledge_base/3e6eef9a-7bea-4a35-8312-088a921cebac/vector_store/FAISS/index.pkl and /dev/null differ diff --git a/knowledge_base/74dffff8-590f-4f1e-8440-7a45e7aad169/vector_store/FAISS/index.faiss b/knowledge_base/74dffff8-590f-4f1e-8440-7a45e7aad169/vector_store/FAISS/index.faiss deleted file mode 100644 index b32ecde..0000000 Binary files a/knowledge_base/74dffff8-590f-4f1e-8440-7a45e7aad169/vector_store/FAISS/index.faiss and /dev/null differ diff --git a/knowledge_base/74dffff8-590f-4f1e-8440-7a45e7aad169/vector_store/FAISS/index.pkl b/knowledge_base/74dffff8-590f-4f1e-8440-7a45e7aad169/vector_store/FAISS/index.pkl deleted file mode 100644 index 377bfaf..0000000 Binary files a/knowledge_base/74dffff8-590f-4f1e-8440-7a45e7aad169/vector_store/FAISS/index.pkl and /dev/null differ diff --git a/knowledge_base/a0f9848b-2d65-4b37-85ca-6712061f01c0/vector_store/FAISS/index.faiss b/knowledge_base/a0f9848b-2d65-4b37-85ca-6712061f01c0/vector_store/FAISS/index.faiss deleted file mode 100644 index 5c2a80b..0000000 Binary files a/knowledge_base/a0f9848b-2d65-4b37-85ca-6712061f01c0/vector_store/FAISS/index.faiss and /dev/null differ diff --git a/knowledge_base/a0f9848b-2d65-4b37-85ca-6712061f01c0/vector_store/FAISS/index.pkl b/knowledge_base/a0f9848b-2d65-4b37-85ca-6712061f01c0/vector_store/FAISS/index.pkl deleted file mode 100644 index 1e1ba16..0000000 Binary files a/knowledge_base/a0f9848b-2d65-4b37-85ca-6712061f01c0/vector_store/FAISS/index.pkl and /dev/null differ diff --git a/knowledge_base/a9281337-4313-4699-a49c-68da829c4884/vector_store/FAISS/index.faiss b/knowledge_base/a9281337-4313-4699-a49c-68da829c4884/vector_store/FAISS/index.faiss deleted file mode 100644 index b32ecde..0000000 Binary files a/knowledge_base/a9281337-4313-4699-a49c-68da829c4884/vector_store/FAISS/index.faiss and /dev/null differ diff --git a/knowledge_base/a9281337-4313-4699-a49c-68da829c4884/vector_store/FAISS/index.pkl b/knowledge_base/a9281337-4313-4699-a49c-68da829c4884/vector_store/FAISS/index.pkl deleted file mode 100644 index 377bfaf..0000000 Binary files a/knowledge_base/a9281337-4313-4699-a49c-68da829c4884/vector_store/FAISS/index.pkl and /dev/null differ diff --git a/knowledge_base/db5f0e79-263d-46f0-a959-a48e32390d53/vector_store/FAISS/index.faiss b/knowledge_base/db5f0e79-263d-46f0-a959-a48e32390d53/vector_store/FAISS/index.faiss deleted file mode 100644 index 5c2a80b..0000000 Binary files a/knowledge_base/db5f0e79-263d-46f0-a959-a48e32390d53/vector_store/FAISS/index.faiss and /dev/null differ diff --git a/knowledge_base/db5f0e79-263d-46f0-a959-a48e32390d53/vector_store/FAISS/index.pkl b/knowledge_base/db5f0e79-263d-46f0-a959-a48e32390d53/vector_store/FAISS/index.pkl deleted file mode 100644 index 166b184..0000000 Binary files a/knowledge_base/db5f0e79-263d-46f0-a959-a48e32390d53/vector_store/FAISS/index.pkl and /dev/null differ diff --git a/knowledge_base/e9e61b56-69ee-4420-bb03-9841620560fa/vector_store/FAISS/index.faiss b/knowledge_base/e9e61b56-69ee-4420-bb03-9841620560fa/vector_store/FAISS/index.faiss deleted file mode 100644 index 5c2a80b..0000000 Binary files a/knowledge_base/e9e61b56-69ee-4420-bb03-9841620560fa/vector_store/FAISS/index.faiss and /dev/null differ diff --git a/knowledge_base/e9e61b56-69ee-4420-bb03-9841620560fa/vector_store/FAISS/index.pkl b/knowledge_base/e9e61b56-69ee-4420-bb03-9841620560fa/vector_store/FAISS/index.pkl deleted file mode 100644 index 1e1ba16..0000000 Binary files a/knowledge_base/e9e61b56-69ee-4420-bb03-9841620560fa/vector_store/FAISS/index.pkl and /dev/null differ