From a58f199554cba9a116c4d36acf5949cf69384d97 Mon Sep 17 00:00:00 2001 From: fanpt <320622572@qq.com> Date: Wed, 6 Mar 2024 09:14:11 +0800 Subject: [PATCH] =?UTF-8?q?=E9=A1=B9=E7=9B=AE=E6=96=87=E4=BB=B6=E6=8E=A8?= =?UTF-8?q?=E9=80=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- qa_amend.py => qa_Ask.py | 0 qa_test.py | 127 --------------------------------------- test.py | 82 ------------------------- 3 files changed, 209 deletions(-) rename qa_amend.py => qa_Ask.py (100%) delete mode 100644 qa_test.py delete mode 100644 test.py diff --git a/qa_amend.py b/qa_Ask.py similarity index 100% rename from qa_amend.py rename to qa_Ask.py diff --git a/qa_test.py b/qa_test.py deleted file mode 100644 index a38e1ad..0000000 --- a/qa_test.py +++ /dev/null @@ -1,127 +0,0 @@ - -import sys - -from kb_config import logger -from sentence_transformers import CrossEncoder -from faiss_kb_service import FaissKBService, DocumentWithVectorStoreId -from langchain.docstore.document import Document -from base_kb import KnowledgeFile - -class QAService(): - def __init__(self, kb_name, device) -> None: - - embed_model_path = 'bge-large-zh-v1.5' - fkbs = FaissKBService(kb_name, embed_model_path=embed_model_path, device=device) - fkbs.do_create_kb() - self.fkbs = fkbs - self.kb_name = kb_name - - def delete_qa_file(self, qa_file_id): - kb_file = KnowledgeFile(qa_file_id, self.kb_name) - self.fkbs.do_delete_doc(kb_file, not_refresh_vs_cache=True) - - def update_qa_doc(self, qa_file_id, doc_list, id_list): - self.delete_qa_file(qa_file_id) - - doc_infos = self.fkbs.do_add_doc(doc_list, ids=id_list) - logger.info('fassi add docs: ' + str(len(doc_infos))) - - self.fkbs.save_vector_store() - - - def search(self, - query, - top_k = 3, - score_threshold = 0.75, - reranked=False): - - docs = self.fkbs.do_search(query, top_k, 1 - score_threshold) - - return docs - - -import json - - -def create_question_id(intent_code, j, test_question): - return f"{intent_code}@{j}@{test_question}" - - - -def load_testing_data(file_path): - - test_data_list = [] - question_list = [] - id_list = [] - - with open(file_path, encoding='utf-8') as f: - data = json.load(f) - for i, item in enumerate(data): - test_question = item['testQuestion'] - intent_code = item['expectIntentCode'] - test_data_list.append((test_question, intent_code)) - - q_list = item['expectIntentQuestionExample'] - for j, q in enumerate(q_list): - q_id = create_question_id(intent_code, j, test_question) - question_list.append(q) - id_list.append(q_id) - return test_data_list, question_list, id_list - - -def convert_to_doc_list(question_list, id_list, qa_file_id): - doc_list = [] - for question, id in zip(question_list, id_list): - - metadata = { - 'source': qa_file_id, - 'id': id - } - doc = Document(page_content=question, metadata=metadata) - doc_list.append(doc) - - return doc_list - -import time -def work(): - start_time = time.time() - kb_name = 'my_kb_test' - device = None - qa_service = QAService(kb_name, device) - - - test_data_list, question_list, id_list = load_testing_data(r'test_data/testing_data.json') - print('Loaded data!') - - qa_file_id = 'QA_TEST_2' # the source of the qa, using for data cleaning, make sure to be unique - - - doc_list = convert_to_doc_list(question_list, id_list, qa_file_id) - - qa_service.update_qa_doc(qa_file_id, doc_list, id_list) - - cnt = 0 - for query, code in test_data_list: - rst = qa_service.search(query) - if do_test(query, code, rst): - cnt += 1 - - print(str(cnt) + '/' + str(len(test_data_list))) - - end_time = time.time() - elapsed_time = end_time - start_time - print(f"总耗时: {elapsed_time} 秒") - -def do_test(query, expected_intent_code, rst): - if rst is None or len(rst)==0: - print('Empty: ' + query) - return False - for rst_doc, similarity_score in rst: - page_content = rst_doc.page_content - intent_code = rst_doc.metadata['id'].split('@')[0] - print( - f"{query} vs {page_content} : {expected_intent_code} vs {intent_code} - Similarity Score: {1 - similarity_score}") - return True - -work() - diff --git a/test.py b/test.py deleted file mode 100644 index 7773f78..0000000 --- a/test.py +++ /dev/null @@ -1,82 +0,0 @@ - - -import json - - - -def create_answer_id(i): - return 'A_' + str(i) - -def create_question_id(intent_code, j): - return intent_code + '@' + str(j) - - -def load_traing_data(file_path): - - question_list = [] - id_list = [] - - with open(file_path) as f: - data = json.load(f) - for i, item in enumerate(data): - intent_code = item['intentCode'] - q_list = item['questionExample'] - for j, q in enumerate(q_list): - q_id = create_question_id(intent_code, j) - question_list.append(q) - id_list.append(q_id) - - return question_list, id_list - - -def load_testing_data(file_path): - - test_data_list = [] - question_list = [] - id_list = [] - - with open(file_path, encoding='utf-8') as f: - data = json.load(f) - for i, item in enumerate(data): - test_question = item['testQuestion'] - intent_code = item['expectIntentCode'] - test_data_list.append((test_question, intent_code)) - - q_list = item['expectIntentQuestionExample'] - for j, q in enumerate(q_list): - q_id = create_question_id(intent_code, j) - question_list.append(q) - id_list.append(q_id) - - return test_data_list, question_list, id_list - - - - -def work(): - # question_list, id_list = load_traing_data('test_data/training_data.json') - - # print(question_list[0]) - # print(id_list[0]) - - # print(question_list[21]) - # print(id_list[21]) - - # intent_conde, idx = id_list[21].split('@') - # print(intent_conde) - # print(idx) - - test_data_list, question_list, id_list = load_testing_data('test_data/testing_data.json') - q_len = len(question_list) - print(question_list[0]) - print(id_list[0]) - print(question_list[q_len-1]) - print(id_list[q_len-1]) - print('#########') - print(test_data_list[0][0]) - print(test_data_list[0][1]) - - -work() - -