From a58f199554cba9a116c4d36acf5949cf69384d97 Mon Sep 17 00:00:00 2001
From: fanpt <320622572@qq.com>
Date: Wed, 6 Mar 2024 09:14:11 +0800
Subject: [PATCH] =?UTF-8?q?=E9=A1=B9=E7=9B=AE=E6=96=87=E4=BB=B6=E6=8E=A8?=
 =?UTF-8?q?=E9=80=81?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 qa_amend.py => qa_Ask.py |   0
 qa_test.py               | 127 ---------------------------------------
 test.py                  |  82 -------------------------
 3 files changed, 209 deletions(-)
 rename qa_amend.py => qa_Ask.py (100%)
 delete mode 100644 qa_test.py
 delete mode 100644 test.py

diff --git a/qa_amend.py b/qa_Ask.py
similarity index 100%
rename from qa_amend.py
rename to qa_Ask.py
diff --git a/qa_test.py b/qa_test.py
deleted file mode 100644
index a38e1ad..0000000
--- a/qa_test.py
+++ /dev/null
@@ -1,127 +0,0 @@
-
-import sys
-
-from kb_config import logger
-from sentence_transformers import CrossEncoder
-from faiss_kb_service import FaissKBService, DocumentWithVectorStoreId
-from langchain.docstore.document import Document
-from base_kb import KnowledgeFile
-
-class QAService():
-    def __init__(self, kb_name, device) -> None:
-
-        embed_model_path = 'bge-large-zh-v1.5'
-        fkbs = FaissKBService(kb_name, embed_model_path=embed_model_path, device=device)
-        fkbs.do_create_kb()
-        self.fkbs = fkbs
-        self.kb_name = kb_name
-
-    def delete_qa_file(self, qa_file_id):
-        kb_file = KnowledgeFile(qa_file_id, self.kb_name)
-        self.fkbs.do_delete_doc(kb_file, not_refresh_vs_cache=True)
-
-    def update_qa_doc(self, qa_file_id, doc_list, id_list):
-        self.delete_qa_file(qa_file_id)
-
-        doc_infos = self.fkbs.do_add_doc(doc_list, ids=id_list)
-        logger.info('fassi add docs: ' + str(len(doc_infos)))
-
-        self.fkbs.save_vector_store()
-
-
-    def search(self,
-               query,
-               top_k = 3,
-               score_threshold = 0.75,
-               reranked=False):
-
-        docs = self.fkbs.do_search(query, top_k, 1 - score_threshold)
-
-        return docs
-
-
-import json
-
-
-def create_question_id(intent_code, j, test_question):
-    return f"{intent_code}@{j}@{test_question}"
-
-
-
-def load_testing_data(file_path):
-
-    test_data_list = []
-    question_list = []
-    id_list = []
-
-    with open(file_path, encoding='utf-8') as f:
-        data = json.load(f)
-        for i, item in enumerate(data):
-            test_question = item['testQuestion']
-            intent_code = item['expectIntentCode']
-            test_data_list.append((test_question, intent_code))
-
-            q_list = item['expectIntentQuestionExample']
-            for j, q in enumerate(q_list):
-                q_id = create_question_id(intent_code, j, test_question)
-                question_list.append(q)
-                id_list.append(q_id)
-    return test_data_list, question_list, id_list
-
-
-def convert_to_doc_list(question_list, id_list, qa_file_id):
-    doc_list = []
-    for question, id in zip(question_list, id_list):
-
-        metadata = {
-            'source': qa_file_id,
-            'id': id
-        }
-        doc = Document(page_content=question, metadata=metadata)
-        doc_list.append(doc)
-
-    return doc_list
-
-import time
-def work():
-    start_time = time.time()
-    kb_name = 'my_kb_test'
-    device = None
-    qa_service = QAService(kb_name, device)
-
-
-    test_data_list, question_list, id_list = load_testing_data(r'test_data/testing_data.json')
-    print('Loaded data!')
-
-    qa_file_id = 'QA_TEST_2'  # the source of the qa, using for data cleaning, make sure to be unique
-
-
-    doc_list = convert_to_doc_list(question_list, id_list, qa_file_id)
-
-    qa_service.update_qa_doc(qa_file_id, doc_list, id_list)
-
-    cnt = 0
-    for query, code in test_data_list:
-        rst = qa_service.search(query)
-        if do_test(query, code, rst):
-            cnt += 1
-
-    print(str(cnt) + '/' + str(len(test_data_list)))
-
-    end_time = time.time()
-    elapsed_time = end_time - start_time
-    print(f"总耗时: {elapsed_time} 秒")
-
-def do_test(query, expected_intent_code, rst):
-    if rst is None or len(rst)==0:
-        print('Empty: ' + query)
-        return False
-    for rst_doc, similarity_score in rst:
-        page_content = rst_doc.page_content
-        intent_code = rst_doc.metadata['id'].split('@')[0]
-        print(
-            f"{query} vs {page_content} : {expected_intent_code} vs {intent_code} - Similarity Score: {1 - similarity_score}")
-    return True
-
-work()
-
diff --git a/test.py b/test.py
deleted file mode 100644
index 7773f78..0000000
--- a/test.py
+++ /dev/null
@@ -1,82 +0,0 @@
-
-
-import json
-
-
-
-def create_answer_id(i):
-    return 'A_' + str(i)
-
-def create_question_id(intent_code, j):
-    return intent_code + '@' + str(j)
-
-
-def load_traing_data(file_path):
-
-    question_list = []
-    id_list = []
-
-    with open(file_path) as f:
-        data = json.load(f)
-        for i, item in enumerate(data):
-            intent_code = item['intentCode']
-            q_list = item['questionExample']
-            for j, q in enumerate(q_list):
-                q_id = create_question_id(intent_code, j)
-                question_list.append(q)
-                id_list.append(q_id)
-
-    return question_list, id_list
-
-
-def load_testing_data(file_path):
-
-    test_data_list = []
-    question_list = []
-    id_list = []
-
-    with open(file_path, encoding='utf-8') as f:
-        data = json.load(f)
-        for i, item in enumerate(data):
-            test_question = item['testQuestion']
-            intent_code = item['expectIntentCode']
-            test_data_list.append((test_question, intent_code))
-
-            q_list = item['expectIntentQuestionExample']
-            for j, q in enumerate(q_list):
-                q_id = create_question_id(intent_code, j)
-                question_list.append(q)
-                id_list.append(q_id)
-
-    return test_data_list, question_list, id_list
-
-
-
-
-def work():
-    # question_list, id_list = load_traing_data('test_data/training_data.json')
-
-    # print(question_list[0])
-    # print(id_list[0])
-
-    # print(question_list[21])
-    # print(id_list[21])
-
-    # intent_conde, idx = id_list[21].split('@')
-    # print(intent_conde)
-    # print(idx)
-
-    test_data_list, question_list, id_list = load_testing_data('test_data/testing_data.json')
-    q_len = len(question_list)
-    print(question_list[0])
-    print(id_list[0])
-    print(question_list[q_len-1])
-    print(id_list[q_len-1])
-    print('#########')
-    print(test_data_list[0][0])
-    print(test_data_list[0][1])
-
-
-work()
-
-