From c2fd52c825476a470ecc6a94909f4d3c473fceb9 Mon Sep 17 00:00:00 2001 From: Jack Stdin Date: Mon, 18 Jan 2016 17:57:31 +0300 Subject: [PATCH] =?UTF-8?q?=D0=9D=D0=B5=D0=B1=D0=BE=D0=BB=D1=8C=D1=88?= =?UTF-8?q?=D0=B8=D0=B5=20=D0=BF=D1=80=D0=B0=D0=B2=D0=BA=D0=B8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- aore/fias/fiasfactory.py | 5 +++-- aore/fias/search.py | 18 +++++++++++++----- aore/fias/wordentry.py | 3 ++- aore/templates/postgre/post_create.sql | 3 +++ aore/updater/updater.py | 2 +- manage.py | 6 +++--- 6 files changed, 25 insertions(+), 12 deletions(-) diff --git a/aore/fias/fiasfactory.py b/aore/fias/fiasfactory.py index 7f9791d..ffaca33 100644 --- a/aore/fias/fiasfactory.py +++ b/aore/fias/fiasfactory.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- from aore.fias.search import SphinxSearch +import logging class FiasFactory: @@ -7,11 +8,11 @@ class FiasFactory: self.searcher = SphinxSearch() # text - строка поиска - # strong - строгий поиск или "мягкий" (с допущением ошибок, опечаток) + # strong - строгий поиск (True) или "мягкий" (False) (с допущением ошибок, опечаток) # out_format - "full" or "simple" - полный (подробно для каждого подпункта) или простой (только строка и AOID) def find(self, text, strong=False, out_format="simple"): try: results = self.searcher.find(text, strong) - + print results except: return [] diff --git a/aore/fias/search.py b/aore/fias/search.py index a6171c2..0e261bf 100644 --- a/aore/fias/search.py +++ b/aore/fias/search.py @@ -5,6 +5,7 @@ import re import Levenshtein import psycopg2 import sphinxapi +import logging from aore.config import db as dbparams, sphinx_index_sugg, sphinx_index_addjobj from aore.dbutils.dbimpl import DBImpl @@ -23,14 +24,14 @@ class SphinxSearch: self.db = DBImpl(psycopg2, dbparams) self.client_sugg = sphinxapi.SphinxClient() - self.client_sugg.SetServer("localhost", 9312) + self.client_sugg.SetServer("127.0.0.1", 9312) self.client_sugg.SetLimits(0, 10) - self.client_sugg.SetConnectTimeout(3.0) + self.client_sugg.SetConnectTimeout(7.0) self.client_show = sphinxapi.SphinxClient() - self.client_show.SetServer("localhost", 9312) + self.client_show.SetServer("127.0.0.1", 9312) self.client_show.SetLimits(0, 10) - self.client_show.SetConnectTimeout(3.0) + self.client_show.SetConnectTimeout(7.0) def __configure(self, index_name, wlen=None): if index_name == "idx_fias_sugg": @@ -42,7 +43,7 @@ class SphinxSearch: self.client_sugg.SetSortMode(sphinxapi.SPH_SORT_EXTENDED, "krank DESC") else: self.client_show.SetMatchMode(sphinxapi.SPH_MATCH_EXTENDED2) - self.client_show.SetRankingMode(sphinxapi.SPH_RANK_BM25) + #self.client_show.SetRankingMode(sphinxapi.SPH_RANK_BM25) def __get_suggest(self, word, rating_limit, count): word_len = str(len(word) / 2) @@ -101,6 +102,7 @@ class SphinxSearch: if word != '': we = WordEntry(self.db, word) self.__add_word_variations(we, strong) + if we.get_variations() == "()": raise BaseException("Cannot process sentence.") yield we @@ -111,7 +113,13 @@ class SphinxSearch: sentence = "{}".format(" MAYBE ".join(x.get_variations() for x in word_entries)) self.__configure(sphinx_index_addjobj) + logging.info("QUERY "+sentence) rs = self.client_show.Query(sentence, sphinx_index_addjobj) + logging.info("OK") + + print json.dumps(rs) + + logging.info("OK") results = [] for ma in rs['matches']: diff --git a/aore/fias/wordentry.py b/aore/fias/wordentry.py index db4b62d..f42a50a 100644 --- a/aore/fias/wordentry.py +++ b/aore/fias/wordentry.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- import re +import logging class WordEntry: @@ -65,7 +66,7 @@ class WordEntry: return "({})".format(" | ".join(self.variations)) def __get_ranks(self): - word_len = len(self.word) + word_len = len(unicode(self.word)) sql_qry = "SELECT COUNT(*), NULL FROM \"AOTRIG\" WHERE word LIKE '{}%' AND LENGTH(word) > {} " \ "UNION ALL SELECT COUNT(*), NULL FROM \"AOTRIG\" WHERE word='{}' " \ "UNION ALL SELECT COUNT(*), MAX(scname) FROM \"SOCRBASE\" WHERE socrname ILIKE '{}'" \ diff --git a/aore/templates/postgre/post_create.sql b/aore/templates/postgre/post_create.sql index 0ea6c7f..89d12b2 100644 --- a/aore/templates/postgre/post_create.sql +++ b/aore/templates/postgre/post_create.sql @@ -4,4 +4,7 @@ CREATE INDEX "sphinx_ind_livestatus" ON "ADDROBJ" USING btree ("actstatus", "liv CREATE INDEX "sphinx_ind_aoguid" ON "ADDROBJ" USING btree ("aoguid"); CREATE INDEX "SOCRBASE_scname_idx" ON "SOCRBASE" USING btree ("scname"); CREATE INDEX "SOCRBASE_socrname_idx" ON "SOCRBASE" USING btree ("socrname"); +CREATE INDEX "SOCRBASE_scname_gin_idx" ON "SOCRBASE" USING gin(scname gin_trgm_ops); +CREATE INDEX "SOCRBASE_socrname_gin_idx" ON "SOCRBASE" USING gin(socrname gin_trgm_ops); CREATE INDEX "AOTRIG_word_idx" ON "AOTRIG" USING btree ("word"); +CREATE INDEX "AOTRIG_word_gin_idx" ON "AOTRIG" USING gin(word gin_trgm_ops); diff --git a/aore/updater/updater.py b/aore/updater/updater.py index dc0baab..c119017 100644 --- a/aore/updater/updater.py +++ b/aore/updater/updater.py @@ -72,7 +72,7 @@ class Updater: self.__init_update_entries(updates_generator) self.db_handler.pre_update() - for update_entry in self.updates_generator: + for update_entry in self.updalist_generator: logging.info("Processing update #{}".format(update_entry['intver'])) for table_entry in self.tablelist_generator(update_entry['delta_url']): self.process_single_entry(table_entry.operation_type, table_entry) diff --git a/manage.py b/manage.py index cbe6549..874b953 100644 --- a/manage.py +++ b/manage.py @@ -2,7 +2,7 @@ import optparse -from aore.fias.search import SphinxSearch +from aore.fias.fiasfactory import FiasFactory from aore.miscutils.sphinx import SphinxHelper from aore.updater.updater import Updater from aore.updater.soapreceiver import SoapReceiver @@ -116,8 +116,8 @@ def main(): # 4 Debug purposes.. if options.test: - sph = SphinxSearch() - sph.find('кедровая пасраул') + sph = FiasFactory() + sph.find('ул кемровая пасраул алтай майминский р-н') if __name__ == '__main__':