Some corrections in strong search

This commit is contained in:
jar3b 2016-03-16 22:18:36 +03:00
parent 8dff95c692
commit e764b44da4
6 changed files with 49 additions and 31 deletions

View File

@ -2,31 +2,44 @@
class BasicConfig: class BasicConfig:
logging = False
logfile = ""
def __init__(self): def __init__(self):
self.logging = False pass
self.logfile = ""
class SphinxConfig: class SphinxConfig:
listen = "127.0.0.1:9312"
index_addjobj = "idx_fias_addrobj"
index_sugg = "idx_fias_sugg"
var_dir = None
min_length_to_star = 3
def __init__(self): def __init__(self):
self.listen = "127.0.0.1:9312" pass
self.index_addjobj = "idx_fias_addrobj"
self.index_sugg = "idx_fias_sugg"
self.var_dir = None
self.min_length_to_star = 3
class DatabaseConfig: class DatabaseConfig:
host = None
user = None
password = None
database = None
port = None
def __init__(self): def __init__(self):
self.host = None pass
self.user = None
self.password = None
self.database = None
self.port = None
class UnrarConfig: class UnrarConfig:
path = None
def __init__(self): def __init__(self):
self.path = None pass
class Folders: class Folders:
temp = None
def __init__(self): def __init__(self):
self.temp = None pass

View File

@ -10,10 +10,10 @@ def violet_ratio(pattern, candidate):
result = list() result = list()
for i in range(len(arr_pattern) - 1, 0, -1): for i in range(len(arr_pattern) - 1, -1, -1):
max_j = -1 max_j = -1
max_ratio = 0 max_ratio = -1
allowed_nums = range(len(arr_candidate) - 1, 0, -1) allowed_nums = range(len(arr_candidate) - 1, -1, -1)
for j in allowed_nums: for j in allowed_nums:
ratio = Levenshtein.ratio(arr_pattern[i], arr_candidate[j]) ratio = Levenshtein.ratio(arr_pattern[i], arr_candidate[j])
@ -21,7 +21,7 @@ def violet_ratio(pattern, candidate):
max_ratio = ratio max_ratio = ratio
max_j = j max_j = j
result.append(max_j*max_ratio) result.append(max_j*abs(max_ratio))
if max_j > -1: if max_j > -1:
allowed_nums.remove(max_j) allowed_nums.remove(max_j)

View File

@ -57,7 +57,7 @@ class FiasFactory:
results = self.searcher.find(text, strong) results = self.searcher.find(text, strong)
except Exception, err: except Exception, err:
if basic.logging: if BasicConfig.logging:
logging.error(traceback.format_exc(err)) logging.error(traceback.format_exc(err))
return dict(error=err.args[0]) return dict(error=err.args[0])

View File

@ -8,8 +8,6 @@ import sphinxapi
from aore.config import BasicConfig from aore.config import BasicConfig
from aore.config import SphinxConfig from aore.config import SphinxConfig
from fuzzywuzzy import fuzz
from aore.miscutils.exceptions import FiasException from aore.miscutils.exceptions import FiasException
from aore.miscutils.fysearch import violet_ratio from aore.miscutils.fysearch import violet_ratio
from aore.miscutils.trigram import trigram from aore.miscutils.trigram import trigram
@ -35,8 +33,8 @@ class SphinxSearch:
sphinx_port = None sphinx_port = None
# Получаем строку подключения для Sphinx # Получаем строку подключения для Sphinx
if ":" in sphinx_conf.listen and "unix:/" not in sphinx_conf.listen: if ":" in SphinxConfig.listen and "unix:/" not in SphinxConfig.listen:
sphinx_host, sphinx_port = sphinx_conf.listen.split(":") sphinx_host, sphinx_port = SphinxConfig.listen.split(":")
sphinx_port = int(sphinx_port) sphinx_port = int(sphinx_port)
# Настраиваем подключение для подсказок # Настраиваем подключение для подсказок
@ -154,7 +152,6 @@ class SphinxSearch:
rs = self.client_show.RunQueries() rs = self.client_show.RunQueries()
elapsed_t = time.time() - start_t elapsed_t = time.time() - start_t
if rs is None: if rs is None:
raise FiasException("Cannot find sentence.") raise FiasException("Cannot find sentence.")
@ -178,10 +175,18 @@ class SphinxSearch:
# При строгом поиске нам надо еще добавить fuzzy и выбрать самое большое значение при отклонении # При строгом поиске нам надо еще добавить fuzzy и выбрать самое большое значение при отклонении
# выше заданного # выше заданного
for result in results: if strong:
print("{} {}".format(result['text'], fuzz.ratio(text, result['text']))) for result in results:
print("{} {}".format(result['text'], fuzz.partial_ratio(text, result['text']))) result['strong_rank'] = violet_ratio(text, result['text'].lower())
print("{} {}".format(result['text'], violet_ratio(text, result['text'].lower())))
print("--") # Сортируем по убыванию признака
results.sort(key=lambda x: x['strong_rank'], reverse=True)
# Если подряд два одинаково релеватных результата - это плохо, на автомат такое отдавать нельзя
if abs(results[0]['strong_rank'] - results[1]['strong_rank']) == 0.0:
raise FiasException("No matches")
else:
print results[0]['text'], results[0]['strong_rank']
return results[0]
return results return results

View File

@ -9,7 +9,7 @@ except ImportError:
assert "No config" assert "No config"
# Define main app # Define main app
phias_app = phias.App(config.basic.logfile) phias_app = phias.App(config.BasicConfig.logfile)
# Define wsgi app # Define wsgi app
application = phias_app.get_app() application = phias_app.get_app()

View File

@ -1,4 +1,3 @@
fuzzywuzzy>=0.10.0
lxml>=3.4.0 lxml>=3.4.0
bottle>=0.12.9 bottle>=0.12.9
psycopg2>=2.6.1 psycopg2>=2.6.1
@ -8,3 +7,4 @@ enum34>=1.0.0
rarfile rarfile
requests>=2.8.1 requests>=2.8.1
soap2py==1.16 soap2py==1.16
sphinxapi