Start working with strong search

This commit is contained in:
jar3b 2016-03-16 18:09:45 +03:00
parent cbeedcc985
commit 2374afe21c
3 changed files with 42 additions and 0 deletions

View File

@ -0,0 +1,30 @@
# -*- coding: utf-8 -*-
import re
import Levenshtein
def violet_ratio(pattern, candidate):
arr_pattern = re.split(r"[ ,:.#$-]+", pattern)
arr_candidate = re.split(r"[ ,:.#$-]+", candidate)
result = list()
for i in range(len(arr_pattern) - 1, 0, -1):
max_j = -1
max_ratio = 0
allowed_nums = range(len(arr_candidate) - 1, 0, -1)
for j in allowed_nums:
ratio = Levenshtein.ratio(arr_pattern[i], arr_candidate[j])
if max_ratio < ratio:
max_ratio = ratio
max_j = j
result.append(max_j*max_ratio)
if max_j > -1:
allowed_nums.remove(max_j)
del arr_candidate[max_j]
return sum(result) - len(arr_candidate)

View File

@ -6,9 +6,12 @@ import time
import Levenshtein import Levenshtein
import sphinxapi import sphinxapi
from fuzzywuzzy import fuzz
from aore.config import basic from aore.config import basic
from aore.config import sphinx_conf from aore.config import sphinx_conf
from aore.miscutils.exceptions import FiasException from aore.miscutils.exceptions import FiasException
from aore.miscutils.fysearch import violet_ratio
from aore.miscutils.trigram import trigram from aore.miscutils.trigram import trigram
from wordentry import WordEntry from wordentry import WordEntry
from wordvariation import VariationType from wordvariation import VariationType
@ -172,4 +175,12 @@ class SphinxSearch:
ratio=match['attrs']['krank'], ratio=match['attrs']['krank'],
cort=i)) cort=i))
# При строгом поиске нам надо еще добавить fuzzy и выбрать самое большое значение при отклонении
# выше заданного
for result in results:
print("{} {}".format(result['text'], fuzz.ratio(text, result['text'])))
print("{} {}".format(result['text'], fuzz.partial_ratio(text, result['text'])))
print("{} {}".format(result['text'], violet_ratio(text, result['text'].lower())))
print("--")
return results return results

View File

@ -1,3 +1,4 @@
fuzzywuzzy>=0.10.0
lxml>=3.4.0 lxml>=3.4.0
bottle>=0.12.9 bottle>=0.12.9
psycopg2>=2.6.1 psycopg2>=2.6.1