Start working with strong search
This commit is contained in:
parent
cbeedcc985
commit
2374afe21c
30
aore/miscutils/fysearch.py
Normal file
30
aore/miscutils/fysearch.py
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
import re
|
||||||
|
|
||||||
|
import Levenshtein
|
||||||
|
|
||||||
|
|
||||||
|
def violet_ratio(pattern, candidate):
|
||||||
|
arr_pattern = re.split(r"[ ,:.#$-]+", pattern)
|
||||||
|
arr_candidate = re.split(r"[ ,:.#$-]+", candidate)
|
||||||
|
|
||||||
|
result = list()
|
||||||
|
|
||||||
|
for i in range(len(arr_pattern) - 1, 0, -1):
|
||||||
|
max_j = -1
|
||||||
|
max_ratio = 0
|
||||||
|
allowed_nums = range(len(arr_candidate) - 1, 0, -1)
|
||||||
|
|
||||||
|
for j in allowed_nums:
|
||||||
|
ratio = Levenshtein.ratio(arr_pattern[i], arr_candidate[j])
|
||||||
|
if max_ratio < ratio:
|
||||||
|
max_ratio = ratio
|
||||||
|
max_j = j
|
||||||
|
|
||||||
|
result.append(max_j*max_ratio)
|
||||||
|
|
||||||
|
if max_j > -1:
|
||||||
|
allowed_nums.remove(max_j)
|
||||||
|
del arr_candidate[max_j]
|
||||||
|
|
||||||
|
return sum(result) - len(arr_candidate)
|
@ -6,9 +6,12 @@ import time
|
|||||||
import Levenshtein
|
import Levenshtein
|
||||||
import sphinxapi
|
import sphinxapi
|
||||||
|
|
||||||
|
from fuzzywuzzy import fuzz
|
||||||
|
|
||||||
from aore.config import basic
|
from aore.config import basic
|
||||||
from aore.config import sphinx_conf
|
from aore.config import sphinx_conf
|
||||||
from aore.miscutils.exceptions import FiasException
|
from aore.miscutils.exceptions import FiasException
|
||||||
|
from aore.miscutils.fysearch import violet_ratio
|
||||||
from aore.miscutils.trigram import trigram
|
from aore.miscutils.trigram import trigram
|
||||||
from wordentry import WordEntry
|
from wordentry import WordEntry
|
||||||
from wordvariation import VariationType
|
from wordvariation import VariationType
|
||||||
@ -172,4 +175,12 @@ class SphinxSearch:
|
|||||||
ratio=match['attrs']['krank'],
|
ratio=match['attrs']['krank'],
|
||||||
cort=i))
|
cort=i))
|
||||||
|
|
||||||
|
# При строгом поиске нам надо еще добавить fuzzy и выбрать самое большое значение при отклонении
|
||||||
|
# выше заданного
|
||||||
|
for result in results:
|
||||||
|
print("{} {}".format(result['text'], fuzz.ratio(text, result['text'])))
|
||||||
|
print("{} {}".format(result['text'], fuzz.partial_ratio(text, result['text'])))
|
||||||
|
print("{} {}".format(result['text'], violet_ratio(text, result['text'].lower())))
|
||||||
|
print("--")
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
fuzzywuzzy>=0.10.0
|
||||||
lxml>=3.4.0
|
lxml>=3.4.0
|
||||||
bottle>=0.12.9
|
bottle>=0.12.9
|
||||||
psycopg2>=2.6.1
|
psycopg2>=2.6.1
|
||||||
|
Loading…
x
Reference in New Issue
Block a user