From 16a144205bfeefc0a551d1817c73bf4c33bab81b Mon Sep 17 00:00:00 2001 From: Jack Stdin Date: Tue, 2 Feb 2016 23:28:20 +0300 Subject: [PATCH] =?UTF-8?q?=D0=9D=D0=B5=D0=BC=D0=BD=D0=BE=D0=B3=D0=BE=20?= =?UTF-8?q?=D1=83=D0=BB=D1=83=D1=87=D1=88=D0=B5=D0=BD=20=D0=BF=D0=BE=D0=B8?= =?UTF-8?q?=D1=81=D0=BA=20(=D1=87=D0=B5=D1=80=D0=B5=D0=B7=20=D0=BD=D0=B5?= =?UTF-8?q?=D1=81=D0=BA=D0=BE=D0=BB=D1=8C=D0=BA=D0=BE=20query)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- aore/fias/search.py | 40 ++++++++++++++++++++++++++-------------- aore/fias/wordentry.py | 6 ++++-- aore/phias.py | 5 +---- passenger_wsgi.py | 2 +- 4 files changed, 32 insertions(+), 21 deletions(-) diff --git a/aore/fias/search.py b/aore/fias/search.py index 2f41b70..9e89761 100644 --- a/aore/fias/search.py +++ b/aore/fias/search.py @@ -22,22 +22,24 @@ class SphinxSearch: self.rating_limit_hard_count = 3 self.default_rating_delta = 2 - self.regression_coef = 0.04 + self.regression_coef = 0.08 + + self.max_result = 10 self.db = db self.client_sugg = sphinxapi.SphinxClient() self.client_sugg.SetServer(sphinx_conf.host_name, sphinx_conf.port) - self.client_sugg.SetLimits(0, 10) + self.client_sugg.SetLimits(0, self.max_result) self.client_sugg.SetConnectTimeout(3.0) self.client_show = sphinxapi.SphinxClient() self.client_show.SetServer(sphinx_conf.host_name, sphinx_conf.port) - self.client_show.SetLimits(0, 10) + self.client_show.SetLimits(0, self.max_result) self.client_show.SetConnectTimeout(3.0) def __configure(self, index_name, wlen=None): if index_name == sphinx_conf.index_sugg and wlen: - self.client_sugg.SetRankingMode(sphinxapi.SPH_RANK_BM25) + self.client_sugg.SetRankingMode(sphinxapi.SPH_RANK_WORDCOUNT) self.client_sugg.SetFilterRange("len", int(wlen) - self.delta_len, int(wlen) + self.delta_len) self.client_sugg.SetSelect("word, len, @weight+{}-abs(len-{}) AS krank".format(self.delta_len, wlen)) self.client_sugg.SetSortMode(sphinxapi.SPH_SORT_EXTENDED, "krank DESC") @@ -98,30 +100,40 @@ class SphinxSearch: word_entry.add_variation_socr() def __get_word_entries(self, words, strong): + we_list = [] for word in words: - if not strong and len(word) < self.word_length_soft: - continue if word != '': we = WordEntry(self.db, word) self.__add_word_variations(we, strong) - assert we.get_variations() != "()", "Cannot process sentence." - yield we + assert we.get_variations() != "", "Cannot process sentence." + we_list.append(we) + return we_list def find(self, text, strong): - logging.info("FIND ") words = self.__split_phrase(text) word_entries = self.__get_word_entries(words, strong) - sentence = "{}".format(" MAYBE ".join(x.get_variations() for x in word_entries)) + word_count = len(word_entries) + for x in range(word_count, max(0, word_count - 3), -1): + self.client_show.AddQuery("\"{}\"/{}".format(" ".join(x.get_variations() for x in word_entries), x), + sphinx_conf.index_addjobj) self.__configure(sphinx_conf.index_addjobj) - logging.info("QUERY " + sentence) - rs = self.client_show.Query(sentence, sphinx_conf.index_addjobj) + logging.info("QUERY ") + rs = self.client_show.RunQueries() logging.info("OK") results = [] - for ma in rs['matches']: - results.append(dict(aoid=ma['attrs']['aoid'], text=ma['attrs']['fullname'], ratio=ma['weight'])) + parsed_ids = [] + + for i in range(0, len(rs)): + for ma in rs[i]['matches']: + if len(results) >= self.max_result: + break + if not ma['attrs']['aoid'] in parsed_ids: + parsed_ids.append(ma['attrs']['aoid']) + results.append( + dict(aoid=ma['attrs']['aoid'], text=ma['attrs']['fullname'], ratio=ma['weight'], cort=i)) if strong: results.sort(key=lambda x: Levenshtein.ratio(text, x['text']), reverse=True) diff --git a/aore/fias/wordentry.py b/aore/fias/wordentry.py index 69c0b68..ce93aab 100644 --- a/aore/fias/wordentry.py +++ b/aore/fias/wordentry.py @@ -71,14 +71,16 @@ class WordEntry: self.variations.append(variation_string) def get_variations(self): - return "({})".format(" | ".join(self.variations)) + if len(self.variations) == 1: + return self.variations[0] + return "{}".format(" ".join(self.variations)) def __get_ranks(self): sql_qry = "SELECT COUNT(*), NULL FROM \"AOTRIG\" WHERE word LIKE '{}%' AND LENGTH(word) > {} " \ "UNION ALL SELECT COUNT(*), NULL FROM \"AOTRIG\" WHERE word='{}' " \ "UNION ALL SELECT COUNT(*), MAX(scname) FROM \"SOCRBASE\" WHERE socrname ILIKE '{}'" \ "UNION ALL SELECT COUNT(*), NULL FROM \"SOCRBASE\" WHERE scname ILIKE '{}';".format( - self.word, self.word_len, self.word, self.word, self.word) + self.word, self.word_len, self.word, self.word, self.word) result = self.db.get_rows(sql_qry) diff --git a/aore/phias.py b/aore/phias.py index bc8a91e..5eea907 100644 --- a/aore/phias.py +++ b/aore/phias.py @@ -26,13 +26,10 @@ def normalize(aoid): @app.route('/find/') @app.route('/find//') def find(text, strong=False): - logging.warning("START") strong = (strong == "strong") response.content_type = 'application/json' - res = json.dumps(fias_factory.find(text, strong)) - logging.warning("END") - return res + return json.dumps(fias_factory.find(text, strong)) @app.error(404) diff --git a/passenger_wsgi.py b/passenger_wsgi.py index 319a0a2..c9ccd80 100644 --- a/passenger_wsgi.py +++ b/passenger_wsgi.py @@ -5,4 +5,4 @@ from aore import phias application = phias.app if __name__ == '__main__': - application.run(host='localhost', port=55001, debug=True) + application.run(host='0.0.0.0', port=55001, debug=True)