From 2374afe21cd6f2859a2cf1c45adaff3a799bb693 Mon Sep 17 00:00:00 2001 From: jar3b Date: Wed, 16 Mar 2016 18:09:45 +0300 Subject: [PATCH 1/4] Start working with strong search --- aore/miscutils/fysearch.py | 30 ++++++++++++++++++++++++++++++ aore/search/search.py | 11 +++++++++++ requirements.txt | 1 + 3 files changed, 42 insertions(+) create mode 100644 aore/miscutils/fysearch.py diff --git a/aore/miscutils/fysearch.py b/aore/miscutils/fysearch.py new file mode 100644 index 0000000..d56a0e3 --- /dev/null +++ b/aore/miscutils/fysearch.py @@ -0,0 +1,30 @@ +# -*- coding: utf-8 -*- +import re + +import Levenshtein + + +def violet_ratio(pattern, candidate): + arr_pattern = re.split(r"[ ,:.#$-]+", pattern) + arr_candidate = re.split(r"[ ,:.#$-]+", candidate) + + result = list() + + for i in range(len(arr_pattern) - 1, 0, -1): + max_j = -1 + max_ratio = 0 + allowed_nums = range(len(arr_candidate) - 1, 0, -1) + + for j in allowed_nums: + ratio = Levenshtein.ratio(arr_pattern[i], arr_candidate[j]) + if max_ratio < ratio: + max_ratio = ratio + max_j = j + + result.append(max_j*max_ratio) + + if max_j > -1: + allowed_nums.remove(max_j) + del arr_candidate[max_j] + + return sum(result) - len(arr_candidate) diff --git a/aore/search/search.py b/aore/search/search.py index d734e64..a1efeac 100644 --- a/aore/search/search.py +++ b/aore/search/search.py @@ -6,9 +6,12 @@ import time import Levenshtein import sphinxapi +from fuzzywuzzy import fuzz + from aore.config import basic from aore.config import sphinx_conf from aore.miscutils.exceptions import FiasException +from aore.miscutils.fysearch import violet_ratio from aore.miscutils.trigram import trigram from wordentry import WordEntry from wordvariation import VariationType @@ -172,4 +175,12 @@ class SphinxSearch: ratio=match['attrs']['krank'], cort=i)) + # При строгом поиске нам надо еще добавить fuzzy и выбрать самое большое значение при отклонении + # выше заданного + for result in results: + print("{} {}".format(result['text'], fuzz.ratio(text, result['text']))) + print("{} {}".format(result['text'], fuzz.partial_ratio(text, result['text']))) + print("{} {}".format(result['text'], violet_ratio(text, result['text'].lower()))) + print("--") + return results diff --git a/requirements.txt b/requirements.txt index 4c5c823..0ffb6ba 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ +fuzzywuzzy>=0.10.0 lxml>=3.4.0 bottle>=0.12.9 psycopg2>=2.6.1 From 8dff95c692bc898f2bf1907b2f084b9cb396a248 Mon Sep 17 00:00:00 2001 From: jar3b Date: Wed, 16 Mar 2016 20:34:08 +0300 Subject: [PATCH 2/4] Config refactoring --- aore/config/common.py | 53 ++++++++++++++++---------------- aore/miscutils/sphinx.py | 58 ++++++++++++++++++------------------ aore/search/fiasfactory.py | 8 ++--- aore/search/search.py | 21 ++++++------- aore/search/wordentry.py | 4 +-- aore/updater/aodataparser.py | 8 ++--- aore/updater/aorar.py | 10 +++---- aore/updater/dbhandler.py | 4 +-- aore/updater/updater.py | 13 ++++---- config.example.py | 29 +++++++++++------- 10 files changed, 110 insertions(+), 98 deletions(-) diff --git a/aore/config/common.py b/aore/config/common.py index 34bee2e..9553d79 100644 --- a/aore/config/common.py +++ b/aore/config/common.py @@ -1,29 +1,32 @@ # -*- coding: utf-8 -*- -class basic: - logging = False - logfile = "" + +class BasicConfig: + def __init__(self): + self.logging = False + self.logfile = "" + +class SphinxConfig: + def __init__(self): + self.listen = "127.0.0.1:9312" + self.index_addjobj = "idx_fias_addrobj" + self.index_sugg = "idx_fias_sugg" + self.var_dir = None + self.min_length_to_star = 3 + +class DatabaseConfig: + def __init__(self): + self.host = None + self.user = None + self.password = None + self.database = None + self.port = None + +class UnrarConfig: + def __init__(self): + self.path = None -class sphinx_conf: - listen = "127.0.0.1:9312" - index_addjobj = "idx_fias_addrobj" - index_sugg = "idx_fias_sugg" - var_dir = None - min_length_to_star = 3 - - -class db_conf: - host = None - user = None - password = None - database = None - port = None - - -class unrar_config: - path = None - - -class folders: - temp = None +class Folders: + def __init__(self): + self.temp = None diff --git a/aore/miscutils/sphinx.py b/aore/miscutils/sphinx.py index 011dd82..22c1542 100644 --- a/aore/miscutils/sphinx.py +++ b/aore/miscutils/sphinx.py @@ -5,7 +5,7 @@ import os from bottle import template -from aore.config import folders, db_conf, sphinx_conf +from aore.config import Folders, DatabaseConfig, SphinxConfig from aore.miscutils.trigram import trigram from aore.updater.aoxmltableentry import AoXmlTableEntry from aore.updater.dbhandler import DbHandler @@ -18,16 +18,16 @@ class SphinxHelper: self.aodp = DbHandler() # Создаем временную папку, если ее нет - if not os.path.exists(folders.temp): - os.makedirs(folders.temp) + if not os.path.exists(Folders.temp): + os.makedirs(Folders.temp) # оздаем 3 папки для Сфинкса - if not os.path.exists(sphinx_conf.var_dir+'/run'): - os.makedirs(sphinx_conf.var_dir+'/run') - if not os.path.exists(sphinx_conf.var_dir+'/log'): - os.makedirs(sphinx_conf.var_dir+'/log') - if not os.path.exists(sphinx_conf.var_dir+'/data'): - os.makedirs(sphinx_conf.var_dir+'/data') + if not os.path.exists(SphinxConfig.var_dir+ '/run'): + os.makedirs(SphinxConfig.var_dir + '/run') + if not os.path.exists(SphinxConfig.var_dir+ '/log'): + os.makedirs(SphinxConfig.var_dir + '/log') + if not os.path.exists(SphinxConfig.var_dir+ '/data'): + os.makedirs(SphinxConfig.var_dir + '/data') def configure_indexer(self, indexer_binary, config_filename): logging.info("Start configuring Sphinx...") @@ -64,15 +64,15 @@ class SphinxHelper: logging.info("Successfully configured. Please restart searchd.") def __create_sugg_index_config(self): - fname = os.path.abspath(folders.temp + "/suggest.conf") + fname = os.path.abspath(Folders.temp + "/suggest.conf") logging.info("Creating config %s", fname) - conf_data = template('aore/templates/sphinx/idx_suggest.conf', db_host=db_conf.host, - db_user=db_conf.user, - db_password=db_conf.password, - db_name=db_conf.database, db_port=db_conf.port, - index_name=sphinx_conf.index_sugg, - sphinx_var_path=sphinx_conf.var_dir) + conf_data = template('aore/templates/sphinx/idx_suggest.conf', db_host=DatabaseConfig.host, + db_user=DatabaseConfig.user, + db_password=DatabaseConfig.password, + db_name=DatabaseConfig.database, db_port=DatabaseConfig.port, + index_name=SphinxConfig.index_sugg, + sphinx_var_path=SphinxConfig.var_dir) f = open(fname, "w") f.write(conf_data) @@ -84,7 +84,7 @@ class SphinxHelper: def __dbexport_sugg_dict(self): logging.info("Place suggestion dict to DB %s...", self.files['dict.txt']) - dict_dat_fname = os.path.abspath(folders.temp + "/suggdict.csv") + dict_dat_fname = os.path.abspath(Folders.temp + "/suggdict.csv") csv_counter = 0 with open(self.files['dict.txt'], "r") as dict_file, open(dict_dat_fname, "w") as exit_file: @@ -115,17 +115,17 @@ class SphinxHelper: logging.info("Done.") def __create_ao_index_config(self): - fname = os.path.abspath(folders.temp + "/addrobj.conf") + fname = os.path.abspath(Folders.temp + "/addrobj.conf") logging.info("Creating config %s", fname) - conf_data = template('aore/templates/sphinx/idx_addrobj.conf', db_host=db_conf.host, - db_user=db_conf.user, - db_password=db_conf.password, - db_name=db_conf.database, db_port=db_conf.port, + conf_data = template('aore/templates/sphinx/idx_addrobj.conf', db_host=DatabaseConfig.host, + db_user=DatabaseConfig.user, + db_password=DatabaseConfig.password, + db_name=DatabaseConfig.database, db_port=DatabaseConfig.port, sql_query=template('aore/templates/postgre/sphinx_query.sql').replace("\n", " \\\n"), - index_name=sphinx_conf.index_addjobj, - sphinx_var_path=sphinx_conf.var_dir, - min_length_to_star=sphinx_conf.min_length_to_star) + index_name=SphinxConfig.index_addjobj, + sphinx_var_path=SphinxConfig.var_dir, + min_length_to_star=SphinxConfig.min_length_to_star) f = open(fname, "w") f.write(conf_data) @@ -136,11 +136,11 @@ class SphinxHelper: return fname def __create_suggestion_dict(self): - fname = os.path.abspath(folders.temp + "/suggdict.txt") + fname = os.path.abspath(Folders.temp + "/suggdict.txt") logging.info("Make suggestion dict (%s)...", fname) run_builddict_cmd = "{} {} -c {} --buildstops {} 200000 --buildfreqs".format(self.index_binary, - sphinx_conf.index_addjobj, + SphinxConfig.index_addjobj, self.files['addrobj.conf'], fname) os.system(run_builddict_cmd) logging.info("Done.") @@ -152,8 +152,8 @@ class SphinxHelper: logging.info("Creating main config %s...", out_filename) conf_data = template('aore/templates/sphinx/sphinx.conf', - sphinx_listen=sphinx_conf.listen.replace("unix://", ''), - sphinx_var_path=sphinx_conf.var_dir) + sphinx_listen=SphinxConfig.listen.replace("unix://", ''), + sphinx_var_path=SphinxConfig.var_dir) f = open(out_filename, "w") for fname, fpath in self.files.iteritems(): diff --git a/aore/search/fiasfactory.py b/aore/search/fiasfactory.py index b8cdae8..6fbbffc 100644 --- a/aore/search/fiasfactory.py +++ b/aore/search/fiasfactory.py @@ -8,14 +8,14 @@ import psycopg2 import traceback from bottle import template -from aore.config import db_conf, basic +from aore.config import DatabaseConfig, BasicConfig from aore.dbutils.dbimpl import DBImpl from search import SphinxSearch class FiasFactory: def __init__(self): - self.db = DBImpl(psycopg2, db_conf) + self.db = DBImpl(psycopg2, DatabaseConfig) self.searcher = SphinxSearch(self.db) self.expand_templ = template('aore/templates/postgre/expand_query.sql', aoid="//aoid") self.normalize_templ = template('aore/templates/postgre/normalize_query.sql', aoid="//aoid") @@ -71,7 +71,7 @@ class FiasFactory: sql_query = self.normalize_templ.replace("//aoid", aoid_guid) rows = self.db.get_rows(sql_query, True) except Exception, err: - if basic.logging: + if BasicConfig.logging: logging.error(traceback.format_exc(err)) return dict(error=err.args[0]) @@ -92,7 +92,7 @@ class FiasFactory: sql_query = self.expand_templ.replace("//aoid", normalized_id) rows = self.db.get_rows(sql_query, True) except Exception, err: - if basic.logging: + if BasicConfig.logging: logging.error(traceback.format_exc(err)) return dict(error=err.args[0]) diff --git a/aore/search/search.py b/aore/search/search.py index a1efeac..4e3c384 100644 --- a/aore/search/search.py +++ b/aore/search/search.py @@ -6,10 +6,10 @@ import time import Levenshtein import sphinxapi +from aore.config import BasicConfig +from aore.config import SphinxConfig from fuzzywuzzy import fuzz -from aore.config import basic -from aore.config import sphinx_conf from aore.miscutils.exceptions import FiasException from aore.miscutils.fysearch import violet_ratio from aore.miscutils.trigram import trigram @@ -31,7 +31,7 @@ class SphinxSearch: def __init__(self, db): self.db = db - sphinx_host = sphinx_conf.listen + sphinx_host = SphinxConfig.listen sphinx_port = None # Получаем строку подключения для Sphinx @@ -53,7 +53,7 @@ class SphinxSearch: def __configure(self, index_name, word_len): self.client_sugg.ResetFilters() - if index_name == sphinx_conf.index_sugg: + if index_name == SphinxConfig.index_sugg: self.client_sugg.SetRankingMode(sphinxapi.SPH_RANK_WORDCOUNT) self.client_sugg.SetFilterRange("len", int(word_len) - self.delta_len, int(word_len) + self.delta_len) self.client_sugg.SetSelect("word, len, @weight+{}-abs(len-{}) AS krank".format(self.delta_len, word_len)) @@ -67,8 +67,8 @@ class SphinxSearch: word_len = str(len(word) / 2) trigrammed_word = '"{}"/1'.format(trigram(word)) - self.__configure(sphinx_conf.index_sugg, word_len) - result = self.client_sugg.Query(trigrammed_word, sphinx_conf.index_sugg) + self.__configure(SphinxConfig.index_sugg, word_len) + result = self.client_sugg.Query(trigrammed_word, SphinxConfig.index_sugg) # Если по данному слову не найдено подсказок (а такое бывает?) # возвращаем [] @@ -138,26 +138,27 @@ class SphinxSearch: good_vars_word_count = len(set([v.parent for v in good_vars])) freq_vars_word_count = len(set([v.parent for v in freq_vars])) - self.__configure(sphinx_conf.index_addjobj, word_count) + self.__configure(SphinxConfig.index_addjobj, word_count) # формируем строки для поиска в Сфинксе for i in range(good_vars_word_count, max(0, good_vars_word_count - 3), -1): first_q = "@fullname \"{}\"/{}".format(" ".join(good_var.text for good_var in good_vars), i) if self.search_freq_words and freq_vars_word_count: second_q = " @sname {}".format(" ".join(freq_var.text for freq_var in freq_vars)) - self.client_show.AddQuery(first_q + second_q, sphinx_conf.index_addjobj) + self.client_show.AddQuery(first_q + second_q, SphinxConfig.index_addjobj) del second_q - self.client_show.AddQuery(first_q, sphinx_conf.index_addjobj) + self.client_show.AddQuery(first_q, SphinxConfig.index_addjobj) del first_q start_t = time.time() rs = self.client_show.RunQueries() elapsed_t = time.time() - start_t + if rs is None: raise FiasException("Cannot find sentence.") - if basic.logging: + if BasicConfig.logging: logging.info("Sphinx time for {} = {}".format(text, elapsed_t)) results = [] diff --git a/aore/search/wordentry.py b/aore/search/wordentry.py index 300008b..99a3b07 100644 --- a/aore/search/wordentry.py +++ b/aore/search/wordentry.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- import re -from aore.config import sphinx_conf +from aore.config import SphinxConfig from aore.search.wordvariation import WordVariation, VariationType @@ -68,7 +68,7 @@ class WordEntry: self.MT_AS_IS = False # Строка слишком котроткая, то по лайку не ищем, сфинкс такого не прожует - if self.MT_LAST_STAR and self.word_len < sphinx_conf.min_length_to_star: + if self.MT_LAST_STAR and self.word_len < SphinxConfig.min_length_to_star: self.MT_LAST_STAR = False self.MT_AS_IS = True diff --git a/aore/updater/aodataparser.py b/aore/updater/aodataparser.py index 32c6ff8..e0e2522 100644 --- a/aore/updater/aodataparser.py +++ b/aore/updater/aodataparser.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- import os -from aore.config import folders +from aore.config import Folders from aore.dbutils.dbschemas import db_shemas from aore.miscutils.exceptions import FiasException from aore.updater.xmlparser import XMLParser @@ -16,8 +16,8 @@ class AoDataParser: self.allowed_fields = db_shemas[self.datasource.table_name].fields # Создаем временную папку, если ее нет - if not os.path.exists(folders.temp): - os.makedirs(folders.temp) + if not os.path.exists(Folders.temp): + os.makedirs(Folders.temp) self.pagesize = pagesize self.currentpage = 0 @@ -56,7 +56,7 @@ class AoDataParser: self.data_bereit_callback = data_callback self.currentpage = 0 self.base_filename = \ - folders.temp + "/fd_" + \ + Folders.temp + "/fd_" + \ str(self.datasource.operation_type) + "_" + \ self.datasource.table_name + ".csv.part{}" self.counter = self.pagesize + 1 diff --git a/aore/updater/aorar.py b/aore/updater/aorar.py index b8b095d..05c1d36 100644 --- a/aore/updater/aorar.py +++ b/aore/updater/aorar.py @@ -7,14 +7,14 @@ from traceback import format_exc import rarfile import requests -from aore.config import folders, unrar_config +from aore.config import Folders, UnrarConfig from aore.miscutils.exceptions import FiasException from aoxmltableentry import AoXmlTableEntry class AoRar: def __init__(self): - rarfile.UNRAR_TOOL = unrar_config.path + rarfile.UNRAR_TOOL = UnrarConfig.path self.fname = None self.mode = None @@ -25,12 +25,12 @@ class AoRar: def download(self, url): logging.info("Downloading %s", url) try: - local_filename = os.path.abspath(folders.temp + "/" + url.split('/')[-1]) + local_filename = os.path.abspath(Folders.temp + "/" + url.split('/')[-1]) if os.path.isfile(local_filename): os.remove(local_filename) else: - if not os.path.exists(folders.temp): - os.makedirs(folders.temp) + if not os.path.exists(Folders.temp): + os.makedirs(Folders.temp) request = requests.get(url, stream=True) with open(local_filename, 'wb') as f: diff --git a/aore/updater/dbhandler.py b/aore/updater/dbhandler.py index 5e99337..96e00f6 100644 --- a/aore/updater/dbhandler.py +++ b/aore/updater/dbhandler.py @@ -5,7 +5,7 @@ import logging import psycopg2 from bottle import template -from aore.config import db_conf +from aore.config import DatabaseConfig from aore.dbutils.dbimpl import DBImpl from aore.dbutils.dbschemas import db_shemas from aore.updater.aoxmltableentry import AoXmlTableEntry @@ -13,7 +13,7 @@ from aore.updater.aoxmltableentry import AoXmlTableEntry class DbHandler: def __init__(self): - self.db = DBImpl(psycopg2, db_conf) + self.db = DBImpl(psycopg2, DatabaseConfig) def bulk_csv(self, operation_type, table_name, processed_count, csv_file_name): sql_query = None diff --git a/aore/updater/updater.py b/aore/updater/updater.py index 0568819..5babd7f 100644 --- a/aore/updater/updater.py +++ b/aore/updater/updater.py @@ -5,7 +5,7 @@ from os import walk, path import psycopg2 -from aore.config import db_conf +from aore.config import DatabaseConfig from aore.dbutils.dbimpl import DBImpl from aore.dbutils.dbschemas import allowed_tables, db_shemas from aore.updater.aodataparser import AoDataParser @@ -36,7 +36,7 @@ class Updater: def get_current_fias_version(cls): db = None try: - db = DBImpl(psycopg2, db_conf) + db = DBImpl(psycopg2, DatabaseConfig) rows = db.get_rows('SELECT version FROM "CONFIG" WHERE id=0', True) assert len(rows) > 0, "Cannot get a version" return rows[0]['version'] @@ -48,7 +48,7 @@ class Updater: @classmethod def __set__update_version(cls, updver=0): - db = DBImpl(psycopg2, db_conf) + db = DBImpl(psycopg2, DatabaseConfig) try: assert isinstance(updver, int), "Update version must be of int type." db.execute('UPDATE "CONFIG" SET version={} WHERE id=0'.format(updver)) @@ -69,9 +69,10 @@ class Updater: return mode def __get_updates_from_folder(self, foldername): - # TODO: Вычислять версию, если берем данные из каталога - yield dict(intver=self.__get_update_version_from_console(), - textver="Unknown", delta_url=foldername, + fias_db_version = self.__get_update_version_from_console() + yield dict(intver=fias_db_version, + textver="Version {}".format(fias_db_version), + delta_url=foldername, complete_url=foldername) @staticmethod diff --git a/config.example.py b/config.example.py index 23c87b6..8c43376 100644 --- a/config.example.py +++ b/config.example.py @@ -2,17 +2,24 @@ from aore import config # Config section -config.sphinx_conf.listen = "127.0.0.1:9312" -config.sphinx_conf.var_dir = "C:\\Sphinx" -config.db_conf.database = "pyfias" -config.db_conf.host = "192.168.0.1" -config.db_conf.port = 5432 -config.db_conf.user = "postgres" -config.db_conf.password = "postgres" +# Address and port where sphinx was listening, +# may be a unix socket like 'unix://tmp/pyphias.sock' +config.SphinxConfig.listen = "127.0.0.1:9312" +# Base sphinx folder +config.SphinxConfig.var_dir = "C:\\Sphinx" -config.unrar_config.path = "C:\\Program Files\\WinRAR\\unrar.exe" -config.folders.temp = "E:\\!TEMP" +# DB config +config.DatabaseConfig.database = "fias_db" +config.DatabaseConfig.host = "192.168.0.1" +config.DatabaseConfig.port = 5432 +config.DatabaseConfig.user = "postgres" +config.DatabaseConfig.password = "postgres" -config.basic.logging = True -config.basic.logfile = "pyphias.log" +# Path to unrar, in Linux may be 'unrar' +config.UnrarConfig.path = "C:\\Program Files\\WinRAR\\unrar.exe" +# Temp folder, in Linux may be '/tmp/myfolder' +config.Folders.temp = "E:\\!TEMP" + +config.BasicConfig.logging = True +config.BasicConfig.logfile = "pyphias.log" \ No newline at end of file From e764b44da415993730d7f6be4077ff01a1746dbd Mon Sep 17 00:00:00 2001 From: jar3b Date: Wed, 16 Mar 2016 22:18:36 +0300 Subject: [PATCH 3/4] Some corrections in strong search --- aore/config/common.py | 41 +++++++++++++++++++++++++------------- aore/miscutils/fysearch.py | 8 ++++---- aore/search/fiasfactory.py | 2 +- aore/search/search.py | 25 +++++++++++++---------- passenger_wsgi.py | 2 +- requirements.txt | 2 +- 6 files changed, 49 insertions(+), 31 deletions(-) diff --git a/aore/config/common.py b/aore/config/common.py index 9553d79..10a304e 100644 --- a/aore/config/common.py +++ b/aore/config/common.py @@ -2,31 +2,44 @@ class BasicConfig: + logging = False + logfile = "" + def __init__(self): - self.logging = False - self.logfile = "" + pass + class SphinxConfig: + listen = "127.0.0.1:9312" + index_addjobj = "idx_fias_addrobj" + index_sugg = "idx_fias_sugg" + var_dir = None + min_length_to_star = 3 + def __init__(self): - self.listen = "127.0.0.1:9312" - self.index_addjobj = "idx_fias_addrobj" - self.index_sugg = "idx_fias_sugg" - self.var_dir = None - self.min_length_to_star = 3 + pass + class DatabaseConfig: + host = None + user = None + password = None + database = None + port = None + def __init__(self): - self.host = None - self.user = None - self.password = None - self.database = None - self.port = None + pass + class UnrarConfig: + path = None + def __init__(self): - self.path = None + pass class Folders: + temp = None + def __init__(self): - self.temp = None + pass diff --git a/aore/miscutils/fysearch.py b/aore/miscutils/fysearch.py index d56a0e3..367cb47 100644 --- a/aore/miscutils/fysearch.py +++ b/aore/miscutils/fysearch.py @@ -10,10 +10,10 @@ def violet_ratio(pattern, candidate): result = list() - for i in range(len(arr_pattern) - 1, 0, -1): + for i in range(len(arr_pattern) - 1, -1, -1): max_j = -1 - max_ratio = 0 - allowed_nums = range(len(arr_candidate) - 1, 0, -1) + max_ratio = -1 + allowed_nums = range(len(arr_candidate) - 1, -1, -1) for j in allowed_nums: ratio = Levenshtein.ratio(arr_pattern[i], arr_candidate[j]) @@ -21,7 +21,7 @@ def violet_ratio(pattern, candidate): max_ratio = ratio max_j = j - result.append(max_j*max_ratio) + result.append(max_j*abs(max_ratio)) if max_j > -1: allowed_nums.remove(max_j) diff --git a/aore/search/fiasfactory.py b/aore/search/fiasfactory.py index 6fbbffc..edee085 100644 --- a/aore/search/fiasfactory.py +++ b/aore/search/fiasfactory.py @@ -57,7 +57,7 @@ class FiasFactory: results = self.searcher.find(text, strong) except Exception, err: - if basic.logging: + if BasicConfig.logging: logging.error(traceback.format_exc(err)) return dict(error=err.args[0]) diff --git a/aore/search/search.py b/aore/search/search.py index 4e3c384..63a61f3 100644 --- a/aore/search/search.py +++ b/aore/search/search.py @@ -8,8 +8,6 @@ import sphinxapi from aore.config import BasicConfig from aore.config import SphinxConfig -from fuzzywuzzy import fuzz - from aore.miscutils.exceptions import FiasException from aore.miscutils.fysearch import violet_ratio from aore.miscutils.trigram import trigram @@ -35,8 +33,8 @@ class SphinxSearch: sphinx_port = None # Получаем строку подключения для Sphinx - if ":" in sphinx_conf.listen and "unix:/" not in sphinx_conf.listen: - sphinx_host, sphinx_port = sphinx_conf.listen.split(":") + if ":" in SphinxConfig.listen and "unix:/" not in SphinxConfig.listen: + sphinx_host, sphinx_port = SphinxConfig.listen.split(":") sphinx_port = int(sphinx_port) # Настраиваем подключение для подсказок @@ -154,7 +152,6 @@ class SphinxSearch: rs = self.client_show.RunQueries() elapsed_t = time.time() - start_t - if rs is None: raise FiasException("Cannot find sentence.") @@ -178,10 +175,18 @@ class SphinxSearch: # При строгом поиске нам надо еще добавить fuzzy и выбрать самое большое значение при отклонении # выше заданного - for result in results: - print("{} {}".format(result['text'], fuzz.ratio(text, result['text']))) - print("{} {}".format(result['text'], fuzz.partial_ratio(text, result['text']))) - print("{} {}".format(result['text'], violet_ratio(text, result['text'].lower()))) - print("--") + if strong: + for result in results: + result['strong_rank'] = violet_ratio(text, result['text'].lower()) + + # Сортируем по убыванию признака + results.sort(key=lambda x: x['strong_rank'], reverse=True) + + # Если подряд два одинаково релеватных результата - это плохо, на автомат такое отдавать нельзя + if abs(results[0]['strong_rank'] - results[1]['strong_rank']) == 0.0: + raise FiasException("No matches") + else: + print results[0]['text'], results[0]['strong_rank'] + return results[0] return results diff --git a/passenger_wsgi.py b/passenger_wsgi.py index 2ddf04b..da2543f 100644 --- a/passenger_wsgi.py +++ b/passenger_wsgi.py @@ -9,7 +9,7 @@ except ImportError: assert "No config" # Define main app -phias_app = phias.App(config.basic.logfile) +phias_app = phias.App(config.BasicConfig.logfile) # Define wsgi app application = phias_app.get_app() diff --git a/requirements.txt b/requirements.txt index 0ffb6ba..669c6a1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,3 @@ -fuzzywuzzy>=0.10.0 lxml>=3.4.0 bottle>=0.12.9 psycopg2>=2.6.1 @@ -8,3 +7,4 @@ enum34>=1.0.0 rarfile requests>=2.8.1 soap2py==1.16 +sphinxapi From d05f2a37fb814260e20a64e1c7d103707d44cb35 Mon Sep 17 00:00:00 2001 From: jar3b Date: Thu, 17 Mar 2016 16:13:17 +0300 Subject: [PATCH 4/4] Remove debug output in search.py --- aore/search/search.py | 1 - 1 file changed, 1 deletion(-) diff --git a/aore/search/search.py b/aore/search/search.py index 63a61f3..0b879ca 100644 --- a/aore/search/search.py +++ b/aore/search/search.py @@ -186,7 +186,6 @@ class SphinxSearch: if abs(results[0]['strong_rank'] - results[1]['strong_rank']) == 0.0: raise FiasException("No matches") else: - print results[0]['text'], results[0]['strong_rank'] return results[0] return results