Start working with strong search
This commit is contained in:
parent
cbeedcc985
commit
2374afe21c
30
aore/miscutils/fysearch.py
Normal file
30
aore/miscutils/fysearch.py
Normal file
@ -0,0 +1,30 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import re
|
||||
|
||||
import Levenshtein
|
||||
|
||||
|
||||
def violet_ratio(pattern, candidate):
|
||||
arr_pattern = re.split(r"[ ,:.#$-]+", pattern)
|
||||
arr_candidate = re.split(r"[ ,:.#$-]+", candidate)
|
||||
|
||||
result = list()
|
||||
|
||||
for i in range(len(arr_pattern) - 1, 0, -1):
|
||||
max_j = -1
|
||||
max_ratio = 0
|
||||
allowed_nums = range(len(arr_candidate) - 1, 0, -1)
|
||||
|
||||
for j in allowed_nums:
|
||||
ratio = Levenshtein.ratio(arr_pattern[i], arr_candidate[j])
|
||||
if max_ratio < ratio:
|
||||
max_ratio = ratio
|
||||
max_j = j
|
||||
|
||||
result.append(max_j*max_ratio)
|
||||
|
||||
if max_j > -1:
|
||||
allowed_nums.remove(max_j)
|
||||
del arr_candidate[max_j]
|
||||
|
||||
return sum(result) - len(arr_candidate)
|
@ -6,9 +6,12 @@ import time
|
||||
import Levenshtein
|
||||
import sphinxapi
|
||||
|
||||
from fuzzywuzzy import fuzz
|
||||
|
||||
from aore.config import basic
|
||||
from aore.config import sphinx_conf
|
||||
from aore.miscutils.exceptions import FiasException
|
||||
from aore.miscutils.fysearch import violet_ratio
|
||||
from aore.miscutils.trigram import trigram
|
||||
from wordentry import WordEntry
|
||||
from wordvariation import VariationType
|
||||
@ -172,4 +175,12 @@ class SphinxSearch:
|
||||
ratio=match['attrs']['krank'],
|
||||
cort=i))
|
||||
|
||||
# При строгом поиске нам надо еще добавить fuzzy и выбрать самое большое значение при отклонении
|
||||
# выше заданного
|
||||
for result in results:
|
||||
print("{} {}".format(result['text'], fuzz.ratio(text, result['text'])))
|
||||
print("{} {}".format(result['text'], fuzz.partial_ratio(text, result['text'])))
|
||||
print("{} {}".format(result['text'], violet_ratio(text, result['text'].lower())))
|
||||
print("--")
|
||||
|
||||
return results
|
||||
|
@ -1,3 +1,4 @@
|
||||
fuzzywuzzy>=0.10.0
|
||||
lxml>=3.4.0
|
||||
bottle>=0.12.9
|
||||
psycopg2>=2.6.1
|
||||
|
Loading…
x
Reference in New Issue
Block a user