diff --git a/.gitignore b/.gitignore
index 5d381cc..d746071 100644
--- a/.gitignore
+++ b/.gitignore
@@ -160,3 +160,4 @@ cython_debug/
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
+/.idea
diff --git a/.idea/.gitignore b/.idea/.gitignore
deleted file mode 100644
index 35410ca..0000000
--- a/.idea/.gitignore
+++ /dev/null
@@ -1,8 +0,0 @@
-# 默认忽略的文件
-/shelf/
-/workspace.xml
-# 基于编辑器的 HTTP 客户端请求
-/httpRequests/
-# Datasource local storage ignored files
-/dataSources/
-/dataSources.local.xml
diff --git a/.idea/deployment.xml b/.idea/deployment.xml
deleted file mode 100644
index 0fb5507..0000000
--- a/.idea/deployment.xml
+++ /dev/null
@@ -1,56 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml
deleted file mode 100644
index 3f552f9..0000000
--- a/.idea/inspectionProfiles/Project_Default.xml
+++ /dev/null
@@ -1,28 +0,0 @@
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
deleted file mode 100644
index 105ce2d..0000000
--- a/.idea/inspectionProfiles/profiles_settings.xml
+++ /dev/null
@@ -1,6 +0,0 @@
-
-
-
-
-
-
\ No newline at end of file
diff --git a/.idea/interro_robot_tool.iml b/.idea/interro_robot_tool.iml
deleted file mode 100644
index 0071ccf..0000000
--- a/.idea/interro_robot_tool.iml
+++ /dev/null
@@ -1,12 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
deleted file mode 100644
index f3bddf6..0000000
--- a/.idea/misc.xml
+++ /dev/null
@@ -1,4 +0,0 @@
-
-
-
-
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
deleted file mode 100644
index d8eea58..0000000
--- a/.idea/modules.xml
+++ /dev/null
@@ -1,8 +0,0 @@
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
deleted file mode 100644
index 35eb1dd..0000000
--- a/.idea/vcs.xml
+++ /dev/null
@@ -1,6 +0,0 @@
-
-
-
-
-
-
\ No newline at end of file
diff --git a/config/knowledge_base_names.txt b/config/knowledge_base_names.txt
index 0f144fb..45c521e 100644
--- a/config/knowledge_base_names.txt
+++ b/config/knowledge_base_names.txt
@@ -1,2 +1,2 @@
-a0f9848b-2d65-4b37-85ca-6712061f01c0
-38de6667-4f5d-4f0a-8165-992ab76c1424
\ No newline at end of file
+f5361731-865c-4c36-90a5-70499c207562
+2d5cdfb8-b1ec-4e29-9e0d-45bfd48afedf
\ No newline at end of file
diff --git a/faiss_cache.py b/faiss_cache.py
index a62bf3b..7e61488 100644
--- a/faiss_cache.py
+++ b/faiss_cache.py
@@ -101,7 +101,7 @@ class KBFaissPool(_FaissPool):
if os.path.isfile(os.path.join(vs_path, "index.faiss")):
# load the embedding model
embeddings = self.load_kb_embeddings(local_model_path=embed_local_model_path, embed_device=embed_device)
- vector_store = FAISS.load_local(vs_path, embeddings, normalize_L2=True,distance_strategy="METRIC_INNER_PRODUCT")
+ vector_store = FAISS.load_local(vs_path, embeddings, normalize_L2=True,distance_strategy="METRIC_INNER_PRODUCT", allow_dangerous_deserialization=True)
elif create:
# create an empty vector store
diff --git a/fast_api.py b/fast_api.py
index d0949e1..5c7224c 100644
--- a/fast_api.py
+++ b/fast_api.py
@@ -1,53 +1,58 @@
-from fastapi import FastAPI, HTTPException, BackgroundTasks
-from qa_Ask import QAService, match_query, store_data
-from pydantic import BaseModel
-from collections import deque
-import requests
+# coding=gbk
+import yaml
+import sys
import os
import time
import uuid
import json
import shutil
-import yaml
import logging
+from collections import deque
+from pydantic import BaseModel
+from fastapi import BackgroundTasks
+from fastapi import FastAPI, HTTPException
+from qa_Ask import QAService, match_query, store_data
app = FastAPI()
-import sys
-
-# 配置日志记录到文件和终端
+# ־¼ļն
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler('log/app.log'),
- logging.StreamHandler(sys.stdout) # 添加控制台处理程序
+ logging.StreamHandler(sys.stdout) # ӿ̨
]
)
logger = logging.getLogger(__name__)
+
class QuestionRequest(BaseModel):
question: str
scoreThreshold: float
+
class QuestionResponse(BaseModel):
code: int
msg: str
data: list
+
class QuestionItem(BaseModel):
questionId: str
questionList: list[str]
+
class InputText(BaseModel):
inputText: str
+
class ExtractedInfo(BaseModel):
name: str
cardNumber: str
- idNnumber: str
+ idNumber: str
+
-# 读取配置文件
with open('config/config.yaml', 'r') as config_file:
config_data = yaml.safe_load(config_file)
@@ -56,21 +61,24 @@ api_url = config_data['api']['url']
path = config_data['output_file_path']
max_knowledge_bases = config_data['max_knowledge_bases']
+
def load_knowledge_bases():
- """加载知识库名称列表"""
+ """֪ʶб"""
if os.path.exists(knowledge_base_file):
with open(knowledge_base_file, "r") as file:
return file.read().splitlines()
else:
return []
+
def save_knowledge_bases(names):
- """保存知识库名称列表到文件"""
+ """֪ʶбļ"""
with open(knowledge_base_file, "w") as file:
file.write("\n".join(names))
+
def update_kb(kb_name, qa_service, path, max_knowledge_bases):
- """更新知识库"""
+ """֪ʶ"""
store_data(qa_service, path)
if len(recent_knowledge_bases) == max_knowledge_bases:
@@ -82,19 +90,21 @@ def update_kb(kb_name, qa_service, path, max_knowledge_bases):
os.remove(path)
logger.info(f"Knowledge base updated: {kb_name}\n"
- f"Please wait while the database is being updated···")
+ f"Please wait while the database is being updated")
+
recent_knowledge_bases = deque(load_knowledge_bases(), maxlen=max_knowledge_bases)
+
def text_to_number(text_id):
- chinese_nums = {'零': '0', '一': '1', '二': '2', '三': '3', '四': '4', '五': '5', '六': '6', '七': '7', '八': '8', '九': '9'}
- for chinese_num, arabic_num in chinese_nums.items():
- text_id = text_id.replace(chinese_num, arabic_num)
- return text_id
+ chinese_nums = {'': '0', 'һ': '1', '': '2', '': '3', '': '4', '': '5', '': '6', '': '7', '': '8', '': '9'}
+ translation_table = str.maketrans(chinese_nums)
+ return text_id.translate(translation_table)
+
@app.post("/updateDatabase")
async def save_to_json(question_items: list[QuestionItem], background_tasks: BackgroundTasks):
- """接收问题数据并异步保存为JSON文件,触发后台更新任务"""
+ """ݲ첽ΪJSONļ̨"""
try:
json_data = json.dumps([item.dict() for item in question_items], ensure_ascii=False, indent=2)
path = "output.json"
@@ -111,16 +121,17 @@ async def save_to_json(question_items: list[QuestionItem], background_tasks: Bac
update_kb, kb_name, qa_service, path, max_knowledge_bases
)
- return {"status": "success", "message": "Please wait while the database is being updated···"}
+ return {"status": "success", "message": "Please wait while the database is being updated"}
except Exception as e:
logger.error(f"Error saving data to file or scheduling knowledge base update task: {e}")
# raise HTTPException(status_code=500, detail=f"Internal Server Error: {str(e)}")
- return {"status": "error", "message": "update task error···"}
+ return {"status": "error", "message": "update task error"}
+
@app.post("/matchQuestion")
def match_question(request: QuestionRequest):
- """匹配问题的端点"""
+ """ƥĶ˵"""
try:
logger.info(f"match_question:Request: {request}")
start_time = time.time()
@@ -147,33 +158,40 @@ def match_question(request: QuestionRequest):
logger.error(f"Error matching question: {e}")
return QuestionResponse(code=500, msg="success", data=[])
+
+from paddlenlp import Taskflow
+corrector = Taskflow("text_correction")
+schema = ["", '', '永Ա', "֤", "֤", "", "", "п", ]
+
+name = Taskflow('information_extraction', schema=schema[:2], model='uie-base')
+identity = Taskflow('information_extraction', schema=schema[3:5], model='uie-base')
+card = Taskflow('information_extraction', schema=schema[5:8], model='uie-base')
+
+
@app.post("/extractInformation")
async def extract_information(input_data: InputText):
- """提取信息的端点"""
+ """ȡϢĶ˵"""
try:
- inputText = input_data.inputText
- from paddlenlp import Taskflow
-
- corrector = Taskflow("text_correction")
- data = corrector(inputText)
+ input_text = input_data.inputText
+ data = corrector(input_text)
target_value = data[0]['target']
+ converted_id = text_to_number(target_value + '')
- converted_id = text_to_number(target_value)
-
- schema = ["姓名", '嫌疑人', '涉案人员', "身份证号", "交易证件号", "卡号", "交易卡号", "银行卡号", ]
- ie = Taskflow('information_extraction', schema=schema, model='uie-base')
- extracted_info = ie(converted_id)
+ extracted_info = {}
+ for model_name, model in zip(["name", "identity", "card"], [name, identity, card]):
+ extracted_info[model_name] = model(converted_id)
result = {}
- for item in extracted_info:
- for key, value in item.items():
- result[key.lower()] = value[0]['text']
+ for model_name, info_list in extracted_info.items():
+ for item in info_list:
+ for key, value in item.items():
+ result[key.lower()] = value[0]['text']
extracted_result = ExtractedInfo(
- name=result.get('姓名', '') or result.get('嫌疑人', '') or result.get('涉案人员', ''),
- cardNumber=result.get('卡号', '') or result.get('交易卡号', '') or result.get('银行卡号', ''),
- idNnumber=result.get('身份证号', '') or result.get('交易证件号', '') or result.get('交易证件号', '')
+ name=result.get('', '') or result.get('', '') or result.get('永Ա', ''),
+ cardNumber=result.get('', '') or result.get('', '') or result.get('п', ''),
+ idNumber=result.get('֤', '') or result.get('֤', '') or result.get('֤', '')
)
return extracted_result
@@ -182,7 +200,9 @@ async def extract_information(input_data: InputText):
logger.error(f"Error extracting information: {e}")
raise HTTPException(status_code=500, detail="Internal Server Error")
+
if __name__ == "__main__":
import uvicorn
- uvicorn.run(app, host="0.0.0.0", port=8000)
+ uvicorn.run(app, host="0.0.0.0", port=8001)
+
diff --git a/knowledge_base/38de6667-4f5d-4f0a-8165-992ab76c1424/vector_store/FAISS/index.faiss b/knowledge_base/38de6667-4f5d-4f0a-8165-992ab76c1424/vector_store/FAISS/index.faiss
deleted file mode 100644
index 11f3caf..0000000
Binary files a/knowledge_base/38de6667-4f5d-4f0a-8165-992ab76c1424/vector_store/FAISS/index.faiss and /dev/null differ
diff --git a/knowledge_base/38de6667-4f5d-4f0a-8165-992ab76c1424/vector_store/FAISS/index.pkl b/knowledge_base/38de6667-4f5d-4f0a-8165-992ab76c1424/vector_store/FAISS/index.pkl
deleted file mode 100644
index 0747df7..0000000
Binary files a/knowledge_base/38de6667-4f5d-4f0a-8165-992ab76c1424/vector_store/FAISS/index.pkl and /dev/null differ
diff --git a/knowledge_base/3e6eef9a-7bea-4a35-8312-088a921cebac/vector_store/FAISS/index.faiss b/knowledge_base/3e6eef9a-7bea-4a35-8312-088a921cebac/vector_store/FAISS/index.faiss
deleted file mode 100644
index f16c24d..0000000
Binary files a/knowledge_base/3e6eef9a-7bea-4a35-8312-088a921cebac/vector_store/FAISS/index.faiss and /dev/null differ
diff --git a/knowledge_base/3e6eef9a-7bea-4a35-8312-088a921cebac/vector_store/FAISS/index.pkl b/knowledge_base/3e6eef9a-7bea-4a35-8312-088a921cebac/vector_store/FAISS/index.pkl
deleted file mode 100644
index 0799a5e..0000000
Binary files a/knowledge_base/3e6eef9a-7bea-4a35-8312-088a921cebac/vector_store/FAISS/index.pkl and /dev/null differ
diff --git a/knowledge_base/74dffff8-590f-4f1e-8440-7a45e7aad169/vector_store/FAISS/index.faiss b/knowledge_base/74dffff8-590f-4f1e-8440-7a45e7aad169/vector_store/FAISS/index.faiss
deleted file mode 100644
index b32ecde..0000000
Binary files a/knowledge_base/74dffff8-590f-4f1e-8440-7a45e7aad169/vector_store/FAISS/index.faiss and /dev/null differ
diff --git a/knowledge_base/74dffff8-590f-4f1e-8440-7a45e7aad169/vector_store/FAISS/index.pkl b/knowledge_base/74dffff8-590f-4f1e-8440-7a45e7aad169/vector_store/FAISS/index.pkl
deleted file mode 100644
index 377bfaf..0000000
Binary files a/knowledge_base/74dffff8-590f-4f1e-8440-7a45e7aad169/vector_store/FAISS/index.pkl and /dev/null differ
diff --git a/knowledge_base/a0f9848b-2d65-4b37-85ca-6712061f01c0/vector_store/FAISS/index.faiss b/knowledge_base/a0f9848b-2d65-4b37-85ca-6712061f01c0/vector_store/FAISS/index.faiss
deleted file mode 100644
index 5c2a80b..0000000
Binary files a/knowledge_base/a0f9848b-2d65-4b37-85ca-6712061f01c0/vector_store/FAISS/index.faiss and /dev/null differ
diff --git a/knowledge_base/a0f9848b-2d65-4b37-85ca-6712061f01c0/vector_store/FAISS/index.pkl b/knowledge_base/a0f9848b-2d65-4b37-85ca-6712061f01c0/vector_store/FAISS/index.pkl
deleted file mode 100644
index 1e1ba16..0000000
Binary files a/knowledge_base/a0f9848b-2d65-4b37-85ca-6712061f01c0/vector_store/FAISS/index.pkl and /dev/null differ
diff --git a/knowledge_base/a9281337-4313-4699-a49c-68da829c4884/vector_store/FAISS/index.faiss b/knowledge_base/a9281337-4313-4699-a49c-68da829c4884/vector_store/FAISS/index.faiss
deleted file mode 100644
index b32ecde..0000000
Binary files a/knowledge_base/a9281337-4313-4699-a49c-68da829c4884/vector_store/FAISS/index.faiss and /dev/null differ
diff --git a/knowledge_base/a9281337-4313-4699-a49c-68da829c4884/vector_store/FAISS/index.pkl b/knowledge_base/a9281337-4313-4699-a49c-68da829c4884/vector_store/FAISS/index.pkl
deleted file mode 100644
index 377bfaf..0000000
Binary files a/knowledge_base/a9281337-4313-4699-a49c-68da829c4884/vector_store/FAISS/index.pkl and /dev/null differ
diff --git a/knowledge_base/db5f0e79-263d-46f0-a959-a48e32390d53/vector_store/FAISS/index.faiss b/knowledge_base/db5f0e79-263d-46f0-a959-a48e32390d53/vector_store/FAISS/index.faiss
deleted file mode 100644
index 5c2a80b..0000000
Binary files a/knowledge_base/db5f0e79-263d-46f0-a959-a48e32390d53/vector_store/FAISS/index.faiss and /dev/null differ
diff --git a/knowledge_base/db5f0e79-263d-46f0-a959-a48e32390d53/vector_store/FAISS/index.pkl b/knowledge_base/db5f0e79-263d-46f0-a959-a48e32390d53/vector_store/FAISS/index.pkl
deleted file mode 100644
index 166b184..0000000
Binary files a/knowledge_base/db5f0e79-263d-46f0-a959-a48e32390d53/vector_store/FAISS/index.pkl and /dev/null differ
diff --git a/knowledge_base/e9e61b56-69ee-4420-bb03-9841620560fa/vector_store/FAISS/index.faiss b/knowledge_base/e9e61b56-69ee-4420-bb03-9841620560fa/vector_store/FAISS/index.faiss
deleted file mode 100644
index 5c2a80b..0000000
Binary files a/knowledge_base/e9e61b56-69ee-4420-bb03-9841620560fa/vector_store/FAISS/index.faiss and /dev/null differ
diff --git a/knowledge_base/e9e61b56-69ee-4420-bb03-9841620560fa/vector_store/FAISS/index.pkl b/knowledge_base/e9e61b56-69ee-4420-bb03-9841620560fa/vector_store/FAISS/index.pkl
deleted file mode 100644
index 1e1ba16..0000000
Binary files a/knowledge_base/e9e61b56-69ee-4420-bb03-9841620560fa/vector_store/FAISS/index.pkl and /dev/null differ