diff --git a/aore/miscutils/fysearch.py b/aore/miscutils/fysearch.py new file mode 100644 index 0000000..d56a0e3 --- /dev/null +++ b/aore/miscutils/fysearch.py @@ -0,0 +1,30 @@ +# -*- coding: utf-8 -*- +import re + +import Levenshtein + + +def violet_ratio(pattern, candidate): + arr_pattern = re.split(r"[ ,:.#$-]+", pattern) + arr_candidate = re.split(r"[ ,:.#$-]+", candidate) + + result = list() + + for i in range(len(arr_pattern) - 1, 0, -1): + max_j = -1 + max_ratio = 0 + allowed_nums = range(len(arr_candidate) - 1, 0, -1) + + for j in allowed_nums: + ratio = Levenshtein.ratio(arr_pattern[i], arr_candidate[j]) + if max_ratio < ratio: + max_ratio = ratio + max_j = j + + result.append(max_j*max_ratio) + + if max_j > -1: + allowed_nums.remove(max_j) + del arr_candidate[max_j] + + return sum(result) - len(arr_candidate) diff --git a/aore/search/search.py b/aore/search/search.py index d734e64..a1efeac 100644 --- a/aore/search/search.py +++ b/aore/search/search.py @@ -6,9 +6,12 @@ import time import Levenshtein import sphinxapi +from fuzzywuzzy import fuzz + from aore.config import basic from aore.config import sphinx_conf from aore.miscutils.exceptions import FiasException +from aore.miscutils.fysearch import violet_ratio from aore.miscutils.trigram import trigram from wordentry import WordEntry from wordvariation import VariationType @@ -172,4 +175,12 @@ class SphinxSearch: ratio=match['attrs']['krank'], cort=i)) + # При строгом поиске нам надо еще добавить fuzzy и выбрать самое большое значение при отклонении + # выше заданного + for result in results: + print("{} {}".format(result['text'], fuzz.ratio(text, result['text']))) + print("{} {}".format(result['text'], fuzz.partial_ratio(text, result['text']))) + print("{} {}".format(result['text'], violet_ratio(text, result['text'].lower()))) + print("--") + return results diff --git a/requirements.txt b/requirements.txt index 4c5c823..0ffb6ba 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ +fuzzywuzzy>=0.10.0 lxml>=3.4.0 bottle>=0.12.9 psycopg2>=2.6.1