Небольшие правки
This commit is contained in:
		@@ -37,9 +37,14 @@ SPHINX_VAR_DIRS = dict(
 | 
			
		||||
# config_type = "test"
 | 
			
		||||
 | 
			
		||||
# Main section
 | 
			
		||||
sphinx_index_addjobj="idx_fias_addrobj"
 | 
			
		||||
sphinx_index_sugg="idx_fias_sugg"
 | 
			
		||||
sphinx_var_dir=SPHINX_VAR_DIRS[config_type]
 | 
			
		||||
sphinx = dict(
 | 
			
		||||
    host_name="localhost",
 | 
			
		||||
    port=9312,
 | 
			
		||||
    index_addjobj="idx_fias_addrobj",
 | 
			
		||||
    index_sugg="idx_fias_sugg",
 | 
			
		||||
    var_dir=SPHINX_VAR_DIRS[config_type]
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
db = DB_INSTANCES[config_type]
 | 
			
		||||
unrar = UNRAR_PATHES[config_type]
 | 
			
		||||
trashfolder = "files/"
 | 
			
		||||
 
 | 
			
		||||
@@ -1,6 +1,7 @@
 | 
			
		||||
# -*- coding: utf-8 -*-
 | 
			
		||||
 | 
			
		||||
from traceback import format_exc
 | 
			
		||||
import psycopg2.extras
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class DBImpl:
 | 
			
		||||
@@ -30,7 +31,10 @@ class DBImpl:
 | 
			
		||||
            self.transaction_rollback()
 | 
			
		||||
            raise BaseException("Error execute sql query. Reason : {}".format(format_exc()))
 | 
			
		||||
 | 
			
		||||
    def get_rows(self, query_string):
 | 
			
		||||
    def get_rows(self, query_string, dict_cursor=False):
 | 
			
		||||
        if dict_cursor:
 | 
			
		||||
            cur = self.connection.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
 | 
			
		||||
        else:
 | 
			
		||||
            cur = self.connection.cursor()
 | 
			
		||||
        cur.execute(query_string)
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,11 +1,17 @@
 | 
			
		||||
# -*- coding: utf-8 -*-
 | 
			
		||||
import psycopg2
 | 
			
		||||
from bottle import template
 | 
			
		||||
 | 
			
		||||
from aore.dbutils.dbimpl import DBImpl
 | 
			
		||||
from aore.fias.search import SphinxSearch
 | 
			
		||||
import logging
 | 
			
		||||
from aore.config import db as dbparams
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class FiasFactory:
 | 
			
		||||
    def __init__(self):
 | 
			
		||||
        self.searcher = SphinxSearch()
 | 
			
		||||
        self.db = DBImpl(psycopg2, dbparams)
 | 
			
		||||
        self.searcher = SphinxSearch(self.db)
 | 
			
		||||
        self.expand_templ = template('aore/templates/postgre/expand_query.sql', aoid="//aoid")
 | 
			
		||||
 | 
			
		||||
    # text - строка поиска
 | 
			
		||||
    # strong - строгий поиск (True) или "мягкий" (False) (с допущением ошибок, опечаток)
 | 
			
		||||
@@ -13,6 +19,21 @@ class FiasFactory:
 | 
			
		||||
    def find(self, text, strong=False, out_format="simple"):
 | 
			
		||||
        try:
 | 
			
		||||
            results = self.searcher.find(text, strong)
 | 
			
		||||
            print results
 | 
			
		||||
        except:
 | 
			
		||||
            return []
 | 
			
		||||
        except Exception, err:
 | 
			
		||||
            return dict(error=err.args[0])
 | 
			
		||||
 | 
			
		||||
        return results
 | 
			
		||||
 | 
			
		||||
    # Нормализует подаваемый AOID или AOGUID в актуальный AOID
 | 
			
		||||
    def normalize(self, aoid_guid):
 | 
			
		||||
        pass
 | 
			
		||||
 | 
			
		||||
    # Разворачивает AOID в представление (перед этим нормализует)
 | 
			
		||||
    def expand(self, aoid_guid):
 | 
			
		||||
        try:
 | 
			
		||||
            sql_query = self.expand_templ.replace("//aoid", aoid_guid)
 | 
			
		||||
            rows = self.db.get_rows(sql_query, True)
 | 
			
		||||
        except Exception, err:
 | 
			
		||||
            return dict(error=err.args[0])
 | 
			
		||||
 | 
			
		||||
        return rows
 | 
			
		||||
 
 | 
			
		||||
@@ -1,40 +1,42 @@
 | 
			
		||||
# -*- coding: utf-8 -*-
 | 
			
		||||
import json
 | 
			
		||||
import logging
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
import Levenshtein
 | 
			
		||||
import psycopg2
 | 
			
		||||
import sphinxapi
 | 
			
		||||
import logging
 | 
			
		||||
 | 
			
		||||
from aore.config import db as dbparams, sphinx_index_sugg, sphinx_index_addjobj
 | 
			
		||||
from aore.dbutils.dbimpl import DBImpl
 | 
			
		||||
from aore.config import sphinx
 | 
			
		||||
from aore.fias.wordentry import WordEntry
 | 
			
		||||
from aore.miscutils.trigram import trigram
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class SphinxSearch:
 | 
			
		||||
    def __init__(self):
 | 
			
		||||
    def __init__(self, db):
 | 
			
		||||
        self.delta_len = 2
 | 
			
		||||
 | 
			
		||||
        self.rating_limit_soft = 0.4
 | 
			
		||||
        self.rating_limit_soft_count = 6
 | 
			
		||||
        self.word_length_soft = 3
 | 
			
		||||
 | 
			
		||||
        self.rating_limit_hard = 0.82
 | 
			
		||||
        self.rating_limit_hard_count = 3
 | 
			
		||||
 | 
			
		||||
        self.default_rating_delta = 2
 | 
			
		||||
        self.regression_coef = 0.04
 | 
			
		||||
 | 
			
		||||
        self.db = DBImpl(psycopg2, dbparams)
 | 
			
		||||
 | 
			
		||||
        self.db = db
 | 
			
		||||
        self.client_sugg = sphinxapi.SphinxClient()
 | 
			
		||||
        self.client_sugg.SetServer("127.0.0.1", 9312)
 | 
			
		||||
        self.client_sugg.SetServer(sphinx.host, sphinx.port)
 | 
			
		||||
        self.client_sugg.SetLimits(0, 10)
 | 
			
		||||
        self.client_sugg.SetConnectTimeout(7.0)
 | 
			
		||||
        self.client_sugg.SetConnectTimeout(3.0)
 | 
			
		||||
 | 
			
		||||
        self.client_show = sphinxapi.SphinxClient()
 | 
			
		||||
        self.client_show.SetServer("127.0.0.1", 9312)
 | 
			
		||||
        self.client_show.SetServer(sphinx.host, sphinx.port)
 | 
			
		||||
        self.client_show.SetLimits(0, 10)
 | 
			
		||||
        self.client_show.SetConnectTimeout(7.0)
 | 
			
		||||
        self.client_show.SetConnectTimeout(3.0)
 | 
			
		||||
 | 
			
		||||
    def __configure(self, index_name, wlen=None):
 | 
			
		||||
        if index_name == "idx_fias_sugg":
 | 
			
		||||
        if index_name == sphinx.index_sugg:
 | 
			
		||||
            if wlen:
 | 
			
		||||
                self.client_sugg.SetMatchMode(sphinxapi.SPH_MATCH_EXTENDED2)
 | 
			
		||||
                self.client_sugg.SetRankingMode(sphinxapi.SPH_RANK_WORDCOUNT)
 | 
			
		||||
@@ -43,14 +45,15 @@ class SphinxSearch:
 | 
			
		||||
                self.client_sugg.SetSortMode(sphinxapi.SPH_SORT_EXTENDED, "krank DESC")
 | 
			
		||||
        else:
 | 
			
		||||
            self.client_show.SetMatchMode(sphinxapi.SPH_MATCH_EXTENDED2)
 | 
			
		||||
            #self.client_show.SetRankingMode(sphinxapi.SPH_RANK_BM25)
 | 
			
		||||
            self.client_show.SetRankingMode(sphinxapi.SPH_RANK_BM25)
 | 
			
		||||
            self.client_show.SetSortMode(sphinxapi.SPH_SORT_RELEVANCE)
 | 
			
		||||
 | 
			
		||||
    def __get_suggest(self, word, rating_limit, count):
 | 
			
		||||
        word_len = str(len(word) / 2)
 | 
			
		||||
        trigrammed_word = '"{}"/1'.format(trigram(word))
 | 
			
		||||
 | 
			
		||||
        self.__configure(sphinx_index_sugg, word_len)
 | 
			
		||||
        result = self.client_sugg.Query(trigrammed_word, sphinx_index_sugg)
 | 
			
		||||
        self.__configure(sphinx.index_sugg, word_len)
 | 
			
		||||
        result = self.client_sugg.Query(trigrammed_word, sphinx.index_sugg)
 | 
			
		||||
 | 
			
		||||
        # Если по данному слову не найдено подсказок (а такое бывает?)
 | 
			
		||||
        # возвращаем []
 | 
			
		||||
@@ -83,11 +86,11 @@ class SphinxSearch:
 | 
			
		||||
 | 
			
		||||
    def __add_word_variations(self, word_entry, strong):
 | 
			
		||||
        if word_entry.MT_MANY_SUGG and not strong:
 | 
			
		||||
            suggs = self.__get_suggest(word_entry.word, self.rating_limit_soft, 6)
 | 
			
		||||
            suggs = self.__get_suggest(word_entry.word, self.rating_limit_soft, self.rating_limit_soft_count)
 | 
			
		||||
            for suggestion in suggs:
 | 
			
		||||
                word_entry.add_variation(suggestion[0])
 | 
			
		||||
        if word_entry.MT_SOME_SUGG and not strong:
 | 
			
		||||
            suggs = self.__get_suggest(word_entry.word, self.rating_limit_hard, 3)
 | 
			
		||||
            suggs = self.__get_suggest(word_entry.word, self.rating_limit_hard, self.rating_limit_hard_count)
 | 
			
		||||
            for suggestion in suggs:
 | 
			
		||||
                word_entry.add_variation(suggestion[0])
 | 
			
		||||
        if word_entry.MT_LAST_STAR:
 | 
			
		||||
@@ -99,6 +102,8 @@ class SphinxSearch:
 | 
			
		||||
 | 
			
		||||
    def __get_word_entries(self, words, strong):
 | 
			
		||||
        for word in words:
 | 
			
		||||
            if not strong and len(word) < self.word_length_soft:
 | 
			
		||||
                continue
 | 
			
		||||
            if word != '':
 | 
			
		||||
                we = WordEntry(self.db, word)
 | 
			
		||||
                self.__add_word_variations(we, strong)
 | 
			
		||||
@@ -112,16 +117,15 @@ class SphinxSearch:
 | 
			
		||||
        word_entries = self.__get_word_entries(words, strong)
 | 
			
		||||
        sentence = "{}".format(" MAYBE ".join(x.get_variations() for x in word_entries))
 | 
			
		||||
 | 
			
		||||
        self.__configure(sphinx_index_addjobj)
 | 
			
		||||
        self.__configure(sphinx.index_addjobj)
 | 
			
		||||
        logging.info("QUERY " + sentence)
 | 
			
		||||
        rs = self.client_show.Query(sentence, sphinx_index_addjobj)
 | 
			
		||||
        logging.info("OK")
 | 
			
		||||
 | 
			
		||||
        print json.dumps(rs)
 | 
			
		||||
 | 
			
		||||
        rs = self.client_show.Query(sentence, sphinx.index_addjobj)
 | 
			
		||||
        logging.info("OK")
 | 
			
		||||
 | 
			
		||||
        results = []
 | 
			
		||||
        for ma in rs['matches']:
 | 
			
		||||
            results.append([ma['attrs']['aoid'], ma['attrs']['fullname'], ma['weight']])
 | 
			
		||||
            results.append(dict(aoid=ma['attrs']['aoid'], text=ma['attrs']['fullname'], ratio=ma['weight']))
 | 
			
		||||
 | 
			
		||||
        if strong:
 | 
			
		||||
            results.sort(key=lambda x: Levenshtein.ratio(text, x['text']), reverse=True)
 | 
			
		||||
        return results
 | 
			
		||||
 
 | 
			
		||||
@@ -80,7 +80,10 @@ class WordEntry:
 | 
			
		||||
        outmask = ""
 | 
			
		||||
        for ra in result:
 | 
			
		||||
            if ra[0] > 1:
 | 
			
		||||
                if word_len > 2:
 | 
			
		||||
                    outmask += 'x'
 | 
			
		||||
                else:
 | 
			
		||||
                    outmask += '1'
 | 
			
		||||
            else:
 | 
			
		||||
                outmask += str(ra[0])
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -7,12 +7,12 @@ from bottle import template
 | 
			
		||||
 | 
			
		||||
from aore.updater.aoxmltableentry import AoXmlTableEntry
 | 
			
		||||
from aore.updater.dbhandler import DbHandler
 | 
			
		||||
from aore.config import db as dbconfig, sphinx_index_addjobj, sphinx_var_dir, trashfolder, sphinx_index_sugg
 | 
			
		||||
from aore.config import db as dbconfig, sphinx, trashfolder
 | 
			
		||||
from trigram import trigram
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class SphinxHelper:
 | 
			
		||||
    def __init__(self, ):
 | 
			
		||||
    def __init__(self):
 | 
			
		||||
        self.index_binary = None
 | 
			
		||||
        self.files = dict()
 | 
			
		||||
 | 
			
		||||
@@ -58,8 +58,8 @@ class SphinxHelper:
 | 
			
		||||
                             db_user=dbconfig['user'],
 | 
			
		||||
                             db_password=dbconfig['password'],
 | 
			
		||||
                             db_name=dbconfig['database'], db_port=dbconfig['port'],
 | 
			
		||||
                             index_name=sphinx_index_sugg,
 | 
			
		||||
                             sphinx_var_path=sphinx_var_dir)
 | 
			
		||||
                             index_name=sphinx.index_sugg,
 | 
			
		||||
                             sphinx_var_path=sphinx.var_dir)
 | 
			
		||||
 | 
			
		||||
        f = open(fname, "w")
 | 
			
		||||
        f.write(conf_data)
 | 
			
		||||
@@ -112,8 +112,8 @@ class SphinxHelper:
 | 
			
		||||
                             db_password=dbconfig['password'],
 | 
			
		||||
                             db_name=dbconfig['database'], db_port=dbconfig['port'],
 | 
			
		||||
                             sql_query=template('aore/templates/postgre/sphinx_query.sql').replace("\n", " \\\n"),
 | 
			
		||||
                             index_name=sphinx_index_addjobj,
 | 
			
		||||
                             sphinx_var_path=sphinx_var_dir)
 | 
			
		||||
                             index_name=sphinx.index_addjobj,
 | 
			
		||||
                             sphinx_var_path=sphinx.var_dir)
 | 
			
		||||
 | 
			
		||||
        f = open(fname, "w")
 | 
			
		||||
        f.write(conf_data)
 | 
			
		||||
@@ -128,7 +128,7 @@ class SphinxHelper:
 | 
			
		||||
        logging.info("Make suggestion dict ({})...".format(fname))
 | 
			
		||||
 | 
			
		||||
        run_builddict_cmd = "{} {} -c {} --buildstops {} 200000 --buildfreqs".format(self.index_binary,
 | 
			
		||||
                                                                                     sphinx_index_addjobj,
 | 
			
		||||
                                                                                     sphinx.index_addjobj,
 | 
			
		||||
                                                                                     self.files['addrobj.conf'], fname)
 | 
			
		||||
        os.system(run_builddict_cmd)
 | 
			
		||||
        logging.info("Done.")
 | 
			
		||||
@@ -139,7 +139,7 @@ class SphinxHelper:
 | 
			
		||||
        out_filename = os.path.abspath(config_fname)
 | 
			
		||||
        logging.info("Creating main config {}...".format(out_filename))
 | 
			
		||||
 | 
			
		||||
        conf_data = template('aore/templates/sphinx/sphinx.conf', sphinx_var_path=sphinx_var_dir)
 | 
			
		||||
        conf_data = template('aore/templates/sphinx/sphinx.conf', sphinx_var_path=sphinx.var_dir)
 | 
			
		||||
 | 
			
		||||
        f = open(out_filename, "w")
 | 
			
		||||
        for fname, fpath in self.files.iteritems():
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										9
									
								
								aore/templates/postgre/expand_query.sql
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										9
									
								
								aore/templates/postgre/expand_query.sql
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,9 @@
 | 
			
		||||
WITH RECURSIVE child_to_parents AS (
 | 
			
		||||
  SELECT "ADDROBJ".* FROM "ADDROBJ"
 | 
			
		||||
		WHERE aoid = '{{ aoid }}'
 | 
			
		||||
  UNION ALL
 | 
			
		||||
  SELECT "ADDROBJ".* FROM "ADDROBJ", child_to_parents
 | 
			
		||||
			WHERE "ADDROBJ".aoguid = child_to_parents.parentguid
 | 
			
		||||
        AND "ADDROBJ".actstatus = True AND "ADDROBJ".livestatus = True AND "ADDROBJ".nextid IS NULL
 | 
			
		||||
)
 | 
			
		||||
SELECT DISTINCT ON (scname) cs.aoid, cs.aoguid, cs.shortname, cs.formalname, cs.aolevel, s.socrname FROM child_to_parents cs LEFT JOIN "SOCRBASE" s ON s.scname=cs.shortname ORDER BY scname, aolevel;
 | 
			
		||||
@@ -17,16 +17,10 @@ source {{index_name}}
 | 
			
		||||
 | 
			
		||||
index {{ index_name }}
 | 
			
		||||
{
 | 
			
		||||
    docinfo             = extern
 | 
			
		||||
    morphology          = stem_ru
 | 
			
		||||
    min_stemming_len    = 3
 | 
			
		||||
 | 
			
		||||
    stopwords           =
 | 
			
		||||
    min_word_len        = 1
 | 
			
		||||
    charset_type        = utf-8
 | 
			
		||||
    min_prefix_len      = 1
 | 
			
		||||
    min_infix_len       = 0
 | 
			
		||||
    enable_star         = 1
 | 
			
		||||
    ngram_len           = 1
 | 
			
		||||
 | 
			
		||||
    # strip html by default
 | 
			
		||||
    html_strip          = 1
 | 
			
		||||
 
 | 
			
		||||
@@ -1,5 +1,5 @@
 | 
			
		||||
# -*- coding: utf-8 -*-
 | 
			
		||||
 | 
			
		||||
import json
 | 
			
		||||
import optparse
 | 
			
		||||
 | 
			
		||||
from aore.fias.fiasfactory import FiasFactory
 | 
			
		||||
@@ -117,7 +117,9 @@ def main():
 | 
			
		||||
    # 4 Debug purposes..
 | 
			
		||||
    if options.test:
 | 
			
		||||
        sph = FiasFactory()
 | 
			
		||||
        sph.find('ул кемровая пасраул алтай майминский р-н')
 | 
			
		||||
        print json.dumps(sph.expand("453091f5-2336-4aea-9b90-c4060dca0b33"))
 | 
			
		||||
        print json.dumps(sph.find('с паспаул ул кедровая', True))
 | 
			
		||||
        print json.dumps(sph.find('с паспаул ул кедровая'))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user