From e764b44da415993730d7f6be4077ff01a1746dbd Mon Sep 17 00:00:00 2001 From: jar3b Date: Wed, 16 Mar 2016 22:18:36 +0300 Subject: [PATCH] Some corrections in strong search --- aore/config/common.py | 41 +++++++++++++++++++++++++------------- aore/miscutils/fysearch.py | 8 ++++---- aore/search/fiasfactory.py | 2 +- aore/search/search.py | 25 +++++++++++++---------- passenger_wsgi.py | 2 +- requirements.txt | 2 +- 6 files changed, 49 insertions(+), 31 deletions(-) diff --git a/aore/config/common.py b/aore/config/common.py index 9553d79..10a304e 100644 --- a/aore/config/common.py +++ b/aore/config/common.py @@ -2,31 +2,44 @@ class BasicConfig: + logging = False + logfile = "" + def __init__(self): - self.logging = False - self.logfile = "" + pass + class SphinxConfig: + listen = "127.0.0.1:9312" + index_addjobj = "idx_fias_addrobj" + index_sugg = "idx_fias_sugg" + var_dir = None + min_length_to_star = 3 + def __init__(self): - self.listen = "127.0.0.1:9312" - self.index_addjobj = "idx_fias_addrobj" - self.index_sugg = "idx_fias_sugg" - self.var_dir = None - self.min_length_to_star = 3 + pass + class DatabaseConfig: + host = None + user = None + password = None + database = None + port = None + def __init__(self): - self.host = None - self.user = None - self.password = None - self.database = None - self.port = None + pass + class UnrarConfig: + path = None + def __init__(self): - self.path = None + pass class Folders: + temp = None + def __init__(self): - self.temp = None + pass diff --git a/aore/miscutils/fysearch.py b/aore/miscutils/fysearch.py index d56a0e3..367cb47 100644 --- a/aore/miscutils/fysearch.py +++ b/aore/miscutils/fysearch.py @@ -10,10 +10,10 @@ def violet_ratio(pattern, candidate): result = list() - for i in range(len(arr_pattern) - 1, 0, -1): + for i in range(len(arr_pattern) - 1, -1, -1): max_j = -1 - max_ratio = 0 - allowed_nums = range(len(arr_candidate) - 1, 0, -1) + max_ratio = -1 + allowed_nums = range(len(arr_candidate) - 1, -1, -1) for j in allowed_nums: ratio = Levenshtein.ratio(arr_pattern[i], arr_candidate[j]) @@ -21,7 +21,7 @@ def violet_ratio(pattern, candidate): max_ratio = ratio max_j = j - result.append(max_j*max_ratio) + result.append(max_j*abs(max_ratio)) if max_j > -1: allowed_nums.remove(max_j) diff --git a/aore/search/fiasfactory.py b/aore/search/fiasfactory.py index 6fbbffc..edee085 100644 --- a/aore/search/fiasfactory.py +++ b/aore/search/fiasfactory.py @@ -57,7 +57,7 @@ class FiasFactory: results = self.searcher.find(text, strong) except Exception, err: - if basic.logging: + if BasicConfig.logging: logging.error(traceback.format_exc(err)) return dict(error=err.args[0]) diff --git a/aore/search/search.py b/aore/search/search.py index 4e3c384..63a61f3 100644 --- a/aore/search/search.py +++ b/aore/search/search.py @@ -8,8 +8,6 @@ import sphinxapi from aore.config import BasicConfig from aore.config import SphinxConfig -from fuzzywuzzy import fuzz - from aore.miscutils.exceptions import FiasException from aore.miscutils.fysearch import violet_ratio from aore.miscutils.trigram import trigram @@ -35,8 +33,8 @@ class SphinxSearch: sphinx_port = None # Получаем строку подключения для Sphinx - if ":" in sphinx_conf.listen and "unix:/" not in sphinx_conf.listen: - sphinx_host, sphinx_port = sphinx_conf.listen.split(":") + if ":" in SphinxConfig.listen and "unix:/" not in SphinxConfig.listen: + sphinx_host, sphinx_port = SphinxConfig.listen.split(":") sphinx_port = int(sphinx_port) # Настраиваем подключение для подсказок @@ -154,7 +152,6 @@ class SphinxSearch: rs = self.client_show.RunQueries() elapsed_t = time.time() - start_t - if rs is None: raise FiasException("Cannot find sentence.") @@ -178,10 +175,18 @@ class SphinxSearch: # При строгом поиске нам надо еще добавить fuzzy и выбрать самое большое значение при отклонении # выше заданного - for result in results: - print("{} {}".format(result['text'], fuzz.ratio(text, result['text']))) - print("{} {}".format(result['text'], fuzz.partial_ratio(text, result['text']))) - print("{} {}".format(result['text'], violet_ratio(text, result['text'].lower()))) - print("--") + if strong: + for result in results: + result['strong_rank'] = violet_ratio(text, result['text'].lower()) + + # Сортируем по убыванию признака + results.sort(key=lambda x: x['strong_rank'], reverse=True) + + # Если подряд два одинаково релеватных результата - это плохо, на автомат такое отдавать нельзя + if abs(results[0]['strong_rank'] - results[1]['strong_rank']) == 0.0: + raise FiasException("No matches") + else: + print results[0]['text'], results[0]['strong_rank'] + return results[0] return results diff --git a/passenger_wsgi.py b/passenger_wsgi.py index 2ddf04b..da2543f 100644 --- a/passenger_wsgi.py +++ b/passenger_wsgi.py @@ -9,7 +9,7 @@ except ImportError: assert "No config" # Define main app -phias_app = phias.App(config.basic.logfile) +phias_app = phias.App(config.BasicConfig.logfile) # Define wsgi app application = phias_app.get_app() diff --git a/requirements.txt b/requirements.txt index 0ffb6ba..669c6a1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,3 @@ -fuzzywuzzy>=0.10.0 lxml>=3.4.0 bottle>=0.12.9 psycopg2>=2.6.1 @@ -8,3 +7,4 @@ enum34>=1.0.0 rarfile requests>=2.8.1 soap2py==1.16 +sphinxapi