diff --git a/001-调研文档/nlp基础概念调研.pdf b/001-调研文档/nlp基础概念调研.pdf new file mode 100644 index 0000000..2429471 Binary files /dev/null and b/001-调研文档/nlp基础概念调研.pdf differ diff --git a/001-调研文档/情感音频分类调研以及实验情况.pdf b/001-调研文档/情感音频分类调研以及实验情况.pdf new file mode 100644 index 0000000..5e5172f Binary files /dev/null and b/001-调研文档/情感音频分类调研以及实验情况.pdf differ diff --git a/001-调研文档/文本匹配调研信息.pdf b/001-调研文档/文本匹配调研信息.pdf new file mode 100644 index 0000000..5c386b0 Binary files /dev/null and b/001-调研文档/文本匹配调研信息.pdf differ diff --git a/001-调研文档/音频算法调研.pdf b/001-调研文档/音频算法调研.pdf new file mode 100644 index 0000000..5ba3bdd Binary files /dev/null and b/001-调研文档/音频算法调研.pdf differ diff --git a/001-调研文档/音频降噪&语音分离.pdf b/001-调研文档/音频降噪&语音分离.pdf new file mode 100644 index 0000000..df3825c Binary files /dev/null and b/001-调研文档/音频降噪&语音分离.pdf differ diff --git a/002-方案和结论文档/拼音匹配效果对比.md b/002-方案和结论文档/拼音匹配效果对比.md new file mode 100644 index 0000000..6d43832 --- /dev/null +++ b/002-方案和结论文档/拼音匹配效果对比.md @@ -0,0 +1,54 @@ +--使用fuzzywuzzy + --choices = ['zhong1 guo2 gong1 shang1 yin2 hang2','zhong1 guo1 nong2 ye4 yin1 hangh ','jian4 she4 yin2 hang2 '] + --k = process.extract("nong2 ye4", choices, limit=2) + --print(k) + + 结果1:[('zhong guo nong ye yin hang ', 90), ('zhong guo gong shang yin hang', 51)] + 结果2:[('zhong1 guo1 nong2 ye4 yin1 hangh ', 90), ('zhong1 guo2 gong1 shang1 yin2 hang2', 40)] + + 使用音调对识别准确有一定的提高 + + --k = fuzz.partial_ratio("ta1 ma1 de4", "zhe2 ta1 bu4 hou3 ma1 de4") + result:73 + --k = fuzz.partial_ratio("ta1 ma1 de4", "zhe2 bu4 hou3 ma1 de4") + result:73 + --k = fuzz.partial_ratio("ta1 ma1", "zhe2 bu4 hou3 ma1 de4") + result:57 + --k = fuzz.partial_ratio("ta1", "zhe2 bu4 hou3 ma1 de4") + result:67 + + --k = fuzz.partial_ratio("tā mā de", "wǒ diū le nǐ mā mèi mèi qí shí jiù diū de shí hòu hǎo") + result:50 + --k = fuzz.partial_ratio("mā de", "wǒ diū le nǐ mā mèi mèi qí shí jiù diū de shí hòu hǎo") + result:60 + --k = fuzz.partial_ratio("ta ma de", "zai jia shang yi ge na ge wo ba ma de bao xian san qian duo") + result:88 + --k = fuzz.partial_ratio("ma de", "zai jia shang yi ge na ge wo ba ma de bao xian san qian duo") + result:100 + + --k = fuzz.partial_ratio("ta mā", "nǐ shì gǎo mà hǎo yǒu le") + 40 + --k = fuzz.partial_ratio("mā", "nǐ shì gǎo mà hǎo yǒu le") + 50 + --k = fuzz.partial_ratio("ta ma", "ni shi gao ma hao you le") + 80 + --k = fuzz.partial_ratio("ma", "ni shi gao ma hao you le") + 100 + +结论:有音标的会减少误识别 + + --k = fuzz.partial_ratio("tā mā de", "shuāng fāng huì yǒu yí gè nà tā mā gè lù xiàng jī tōng xiàng běi de zhè biān shì jiāo gěi nǐ chǔ lǐ") + 75 + --k = fuzz.partial_ratio("tā mā de", "tā mā gè lù xiàng jī chǔ lǐ") + 75 + +[{'mā de:shuāng fāng huì yǒu yí gè nà gè lù xiàng jī': 40}] +[{'mā de:tuì huí lái le shì ba a': 40}] +[{'cāo:líng líng líng líng bā líng líng bā wǔ liù hào sān bā wǔ yāo': 67}] +[{'tā mā de:bú yào gēn tā nào le': 50}] +[{'tā mā de:wèi hé sǐ zhuā sì diǎn sì': 50}] +[{'mā de:zhè nǚ hào mǎ de fàng chū lái le': 80}] +[{'mā de:tóng yì zài mà nǐ diàn huà': 40}] +[{'tā mā de:gē shì tā mā de': 100}] +[{'tā mā de:tā mā sǐ bī de': 75}] +