From 1a221d9a93b308b1d557c57b8146f379ee3fc4b9 Mon Sep 17 00:00:00 2001 From: Jack Stdin Date: Sun, 31 Jan 2016 23:23:52 +0300 Subject: [PATCH] =?UTF-8?q?=D0=9F=D0=B5=D1=80=D0=B5=D0=B4=D0=B5=D0=BB?= =?UTF-8?q?=D0=B0=D0=BD=20=D0=BA=D0=BE=D0=BD=D1=84=D0=B8=D0=B3=20(=D0=BF?= =?UTF-8?q?=D0=BE=D0=B4=20dev=20=D0=B8=20production)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- aore/config.py | 50 ---------------------- aore/config/__init__.py | 4 ++ aore/config/common.py | 24 +++++++++++ aore/config/dev.py | 16 +++++++ aore/dbutils/dbimpl.py | 6 ++- aore/dbutils/dbschemas.py | 2 +- aore/fias/fiasfactory.py | 27 +++++++++--- aore/fias/search.py | 19 ++++---- aore/fias/wordentry.py | 1 - aore/miscutils/sphinx.py | 38 ++++++++-------- aore/templates/postgre/normalize_query.sql | 1 + aore/updater/aodataparser.py | 5 +-- aore/updater/aorar.py | 9 ++-- aore/updater/dbhandler.py | 6 +-- aore/updater/updater.py | 3 +- manage.py | 12 +++--- 16 files changed, 117 insertions(+), 106 deletions(-) delete mode 100644 aore/config.py create mode 100644 aore/config/__init__.py create mode 100644 aore/config/common.py create mode 100644 aore/config/dev.py create mode 100644 aore/templates/postgre/normalize_query.sql diff --git a/aore/config.py b/aore/config.py deleted file mode 100644 index da9b7f4..0000000 --- a/aore/config.py +++ /dev/null @@ -1,50 +0,0 @@ -# -*- coding: utf-8 -*- - -from platform import system - -config_type = "production" -if "Windows" in system(): - config_type = "test" - -DB_INSTANCES = dict( - test=dict( - host="localhost", - user="postgres", - password="intercon", - database="postgres", - port=5432 - ), - production=dict( - host="localhost", - user="***", - password="***", - database="***", - port=5432 - ) -) - -UNRAR_PATHES = dict( - test="C:\Program Files (x86)\WinRAR\unrar.exe", - production="unrar" -) - -SPHINX_VAR_DIRS = dict( - test="C:/Sphinx", - production="/var/sphinx" -) - -# Uncomment if you want to specify config_type manually -# config_type = "test" - -# Main section -sphinx = dict( - host_name="localhost", - port=9312, - index_addjobj="idx_fias_addrobj", - index_sugg="idx_fias_sugg", - var_dir=SPHINX_VAR_DIRS[config_type] -) - -db = DB_INSTANCES[config_type] -unrar = UNRAR_PATHES[config_type] -trashfolder = "files/" diff --git a/aore/config/__init__.py b/aore/config/__init__.py new file mode 100644 index 0000000..a19824a --- /dev/null +++ b/aore/config/__init__.py @@ -0,0 +1,4 @@ +# -*- coding: utf-8 -*- + +from __future__ import absolute_import +from .dev import * diff --git a/aore/config/common.py b/aore/config/common.py new file mode 100644 index 0000000..fbadf2c --- /dev/null +++ b/aore/config/common.py @@ -0,0 +1,24 @@ +# -*- coding: utf-8 -*- + +class sphinx_conf: + host_name = "localhost" + port = 9312 + index_addjobj = "idx_fias_addrobj" + index_sugg = "idx_fias_sugg" + var_dir = None + + +class db_conf: + host = None + user = None + password = None + database = None + port = None + + +class unrar_config: + path = None + + +class folders: + temp = None diff --git a/aore/config/dev.py b/aore/config/dev.py new file mode 100644 index 0000000..05fd5a4 --- /dev/null +++ b/aore/config/dev.py @@ -0,0 +1,16 @@ +# -*- coding: utf-8 -*- + +from __future__ import absolute_import + +from .common import * + +sphinx_conf.var_dir = "C:\\Sphinx" + +db_conf.database = "postgres" +db_conf.host = "localhost" +db_conf.port = 5432 +db_conf.user = "postgres" +db_conf.password = "intercon" + +unrar_config.path = "C:\Program Files (x86)\WinRAR\unrar.exe" +folders.temp = "E:\\!TEMP" diff --git a/aore/dbutils/dbimpl.py b/aore/dbutils/dbimpl.py index 09a1a1e..eca3e38 100644 --- a/aore/dbutils/dbimpl.py +++ b/aore/dbutils/dbimpl.py @@ -1,13 +1,15 @@ # -*- coding: utf-8 -*- from traceback import format_exc + import psycopg2.extras class DBImpl: - def __init__(self, engine, params): + def __init__(self, engine, db_config): self.db_engine = engine - self.connection = engine.connect(**params) + self.connection = engine.connect(dbname=db_config.database, user=db_config.user, password=db_config.password, + port=db_config.port, host=db_config.host) def transaction_commit(self): self.connection.commit() diff --git a/aore/dbutils/dbschemas.py b/aore/dbutils/dbschemas.py index 45b5a8f..5de0732 100644 --- a/aore/dbutils/dbschemas.py +++ b/aore/dbutils/dbschemas.py @@ -19,6 +19,6 @@ db_shemas['SOCRBASE'] = DbSchema("SOCRBASE", ["LEVEL", "SOCRNAME", "SCNAME", "KO "AddressObjectType") db_shemas['AOTRIG'] = DbSchema("AOTRIG", ["WORD", "TRIGRAMM", "FREQUENCY"], "word", - None) + None) allowed_tables = ["ADDROBJ", "SOCRBASE"] diff --git a/aore/fias/fiasfactory.py b/aore/fias/fiasfactory.py index 4f8803a..edb9e2b 100644 --- a/aore/fias/fiasfactory.py +++ b/aore/fias/fiasfactory.py @@ -4,19 +4,20 @@ from bottle import template from aore.dbutils.dbimpl import DBImpl from aore.fias.search import SphinxSearch -from aore.config import db as dbparams +from aore.config import db_conf class FiasFactory: def __init__(self): - self.db = DBImpl(psycopg2, dbparams) + self.db = DBImpl(psycopg2, db_conf) self.searcher = SphinxSearch(self.db) self.expand_templ = template('aore/templates/postgre/expand_query.sql', aoid="//aoid") + self.normalize_templ = template('aore/templates/postgre/normalize_query.sql', aoid="//aoid") # text - строка поиска # strong - строгий поиск (True) или "мягкий" (False) (с допущением ошибок, опечаток) - # out_format - "full" or "simple" - полный (подробно для каждого подпункта) или простой (только строка и AOID) - def find(self, text, strong=False, out_format="simple"): + # Строгий используется при импорте из внешних систем (автоматически), где ошибка критична + def find(self, text, strong=False): try: results = self.searcher.find(text, strong) except Exception, err: @@ -26,12 +27,26 @@ class FiasFactory: # Нормализует подаваемый AOID или AOGUID в актуальный AOID def normalize(self, aoid_guid): - pass + try: + sql_query = self.normalize_templ.replace("//aoid", aoid_guid) + rows = self.db.get_rows(sql_query, True) + except Exception, err: + return dict(error=err.args[0]) + + if len(rows) == 0: + return [] + else: + return rows[0] # Разворачивает AOID в представление (перед этим нормализует) def expand(self, aoid_guid): try: - sql_query = self.expand_templ.replace("//aoid", aoid_guid) + normalized_id = self.normalize(aoid_guid) + if 'aoid' not in normalized_id: + raise BaseException("Invalid AOID or AOGUID") + else: + normalized_id = normalized_id['aoid'] + sql_query = self.expand_templ.replace("//aoid", normalized_id) rows = self.db.get_rows(sql_query, True) except Exception, err: return dict(error=err.args[0]) diff --git a/aore/fias/search.py b/aore/fias/search.py index a56653f..5419215 100644 --- a/aore/fias/search.py +++ b/aore/fias/search.py @@ -5,7 +5,7 @@ import re import Levenshtein import sphinxapi -from aore.config import sphinx +from aore.config import sphinx_conf from aore.fias.wordentry import WordEntry from aore.miscutils.trigram import trigram @@ -26,17 +26,17 @@ class SphinxSearch: self.db = db self.client_sugg = sphinxapi.SphinxClient() - self.client_sugg.SetServer(sphinx.host, sphinx.port) + self.client_sugg.SetServer(sphinx_conf.host_name, sphinx_conf.port) self.client_sugg.SetLimits(0, 10) self.client_sugg.SetConnectTimeout(3.0) self.client_show = sphinxapi.SphinxClient() - self.client_show.SetServer(sphinx.host, sphinx.port) + self.client_show.SetServer(sphinx_conf.host_name, sphinx_conf.port) self.client_show.SetLimits(0, 10) self.client_show.SetConnectTimeout(3.0) def __configure(self, index_name, wlen=None): - if index_name == sphinx.index_sugg: + if index_name == sphinx_conf.index_sugg: if wlen: self.client_sugg.SetMatchMode(sphinxapi.SPH_MATCH_EXTENDED2) self.client_sugg.SetRankingMode(sphinxapi.SPH_RANK_WORDCOUNT) @@ -52,8 +52,8 @@ class SphinxSearch: word_len = str(len(word) / 2) trigrammed_word = '"{}"/1'.format(trigram(word)) - self.__configure(sphinx.index_sugg, word_len) - result = self.client_sugg.Query(trigrammed_word, sphinx.index_sugg) + self.__configure(sphinx_conf.index_sugg, word_len) + result = self.client_sugg.Query(trigrammed_word, sphinx_conf.index_sugg) # Если по данному слову не найдено подсказок (а такое бывает?) # возвращаем [] @@ -67,7 +67,7 @@ class SphinxSearch: outlist = list() for match in result['matches']: if len(outlist) >= count: - break; + break if maxrank - match['attrs']['krank'] < self.default_rating_delta: jaro_rating = Levenshtein.jaro(word, match['attrs']['word']) @@ -117,9 +117,9 @@ class SphinxSearch: word_entries = self.__get_word_entries(words, strong) sentence = "{}".format(" MAYBE ".join(x.get_variations() for x in word_entries)) - self.__configure(sphinx.index_addjobj) + self.__configure(sphinx_conf.index_addjobj) logging.info("QUERY " + sentence) - rs = self.client_show.Query(sentence, sphinx.index_addjobj) + rs = self.client_show.Query(sentence, sphinx_conf.index_addjobj) logging.info("OK") results = [] @@ -128,4 +128,5 @@ class SphinxSearch: if strong: results.sort(key=lambda x: Levenshtein.ratio(text, x['text']), reverse=True) + return results diff --git a/aore/fias/wordentry.py b/aore/fias/wordentry.py index 50d4421..71bcb32 100644 --- a/aore/fias/wordentry.py +++ b/aore/fias/wordentry.py @@ -1,6 +1,5 @@ # -*- coding: utf-8 -*- import re -import logging class WordEntry: diff --git a/aore/miscutils/sphinx.py b/aore/miscutils/sphinx.py index 8bed437..da6abf6 100644 --- a/aore/miscutils/sphinx.py +++ b/aore/miscutils/sphinx.py @@ -5,9 +5,9 @@ import os from bottle import template +from aore.config import folders, db_conf, sphinx_conf from aore.updater.aoxmltableentry import AoXmlTableEntry from aore.updater.dbhandler import DbHandler -from aore.config import db as dbconfig, sphinx, trashfolder from trigram import trigram @@ -51,15 +51,15 @@ class SphinxHelper: logging.info("Successfully configured. Please restart searchd.") def __create_sugg_index_config(self): - fname = os.path.abspath(trashfolder + "suggest.conf") + fname = os.path.abspath(folders.temp + "/suggest.conf") logging.info("Creating config {}".format(fname)) - conf_data = template('aore/templates/sphinx/idx_suggest.conf', db_host=dbconfig['host'], - db_user=dbconfig['user'], - db_password=dbconfig['password'], - db_name=dbconfig['database'], db_port=dbconfig['port'], - index_name=sphinx.index_sugg, - sphinx_var_path=sphinx.var_dir) + conf_data = template('aore/templates/sphinx/idx_suggest.conf', db_host=db_conf.host, + db_user=db_conf.user, + db_password=db_conf.password, + db_name=db_conf.database, db_port=db_conf.port, + index_name=sphinx_conf.index_sugg, + sphinx_var_path=sphinx_conf.var_dir) f = open(fname, "w") f.write(conf_data) @@ -71,7 +71,7 @@ class SphinxHelper: def __dbexport_sugg_dict(self): logging.info("Place suggestion dict to DB {}...".format(self.files['dict.txt'])) - dict_dat_fname = os.path.abspath(trashfolder + "suggdict.csv") + dict_dat_fname = os.path.abspath(folders.temp + "/suggdict.csv") csv_counter = 0 with open(self.files['dict.txt'], "r") as dict_file, open(dict_dat_fname, "w") as exit_file: @@ -104,16 +104,16 @@ class SphinxHelper: logging.info("Done.") def __create_ao_index_config(self): - fname = os.path.abspath(trashfolder + "addrobj.conf") + fname = os.path.abspath(folders.temp + "/addrobj.conf") logging.info("Creating config {}".format(fname)) - conf_data = template('aore/templates/sphinx/idx_addrobj.conf', db_host=dbconfig['host'], - db_user=dbconfig['user'], - db_password=dbconfig['password'], - db_name=dbconfig['database'], db_port=dbconfig['port'], + conf_data = template('aore/templates/sphinx/idx_addrobj.conf', db_host=db_conf.host, + db_user=db_conf.user, + db_password=db_conf.password, + db_name=db_conf.database, db_port=db_conf.port, sql_query=template('aore/templates/postgre/sphinx_query.sql').replace("\n", " \\\n"), - index_name=sphinx.index_addjobj, - sphinx_var_path=sphinx.var_dir) + index_name=sphinx_conf.index_addjobj, + sphinx_var_path=sphinx_conf.var_dir) f = open(fname, "w") f.write(conf_data) @@ -124,11 +124,11 @@ class SphinxHelper: return fname def __create_suggestion_dict(self): - fname = os.path.abspath(trashfolder + "suggdict.txt") + fname = os.path.abspath(folders.temp + "/suggdict.txt") logging.info("Make suggestion dict ({})...".format(fname)) run_builddict_cmd = "{} {} -c {} --buildstops {} 200000 --buildfreqs".format(self.index_binary, - sphinx.index_addjobj, + sphinx_conf.index_addjobj, self.files['addrobj.conf'], fname) os.system(run_builddict_cmd) logging.info("Done.") @@ -139,7 +139,7 @@ class SphinxHelper: out_filename = os.path.abspath(config_fname) logging.info("Creating main config {}...".format(out_filename)) - conf_data = template('aore/templates/sphinx/sphinx.conf', sphinx_var_path=sphinx.var_dir) + conf_data = template('aore/templates/sphinx/sphinx.conf', sphinx_var_path=sphinx_conf.var_dir) f = open(out_filename, "w") for fname, fpath in self.files.iteritems(): diff --git a/aore/templates/postgre/normalize_query.sql b/aore/templates/postgre/normalize_query.sql new file mode 100644 index 0000000..615378d --- /dev/null +++ b/aore/templates/postgre/normalize_query.sql @@ -0,0 +1 @@ +SELECT AOID FROM "ADDROBJ" WHERE (AOID='{{ aoid }}' OR AOGUID='{{ aoid }}') AND ACTSTATUS=True AND LIVESTATUS=True AND NEXTID IS NULL LIMIT 1; \ No newline at end of file diff --git a/aore/updater/aodataparser.py b/aore/updater/aodataparser.py index 1126e66..5e2ac4b 100644 --- a/aore/updater/aodataparser.py +++ b/aore/updater/aodataparser.py @@ -1,8 +1,7 @@ # -*- coding: utf-8 -*- import os -from aore.updater.aoxmltableentry import AoXmlTableEntry -from aore.config import trashfolder +from aore.config import folders from aore.dbutils.dbschemas import db_shemas from xmlparser import XMLParser @@ -52,7 +51,7 @@ class AoDataParser: self.data_bereit_callback = data_callback self.currentpage = 0 self.base_filename = \ - trashfolder + "fd_" + \ + folders.temp + "/fd_" + \ str(self.datasource.operation_type) + "_" + \ self.datasource.table_name + ".csv.part{}" self.counter = self.pagesize + 1 diff --git a/aore/updater/aorar.py b/aore/updater/aorar.py index ff136e7..48c9938 100644 --- a/aore/updater/aorar.py +++ b/aore/updater/aorar.py @@ -7,21 +7,22 @@ from traceback import format_exc import rarfile import requests -from aore.config import unrar, trashfolder +from aore.config import folders, unrar_config from aoxmltableentry import AoXmlTableEntry class AoRar: def __init__(self): - rarfile.UNRAR_TOOL = unrar + rarfile.UNRAR_TOOL = unrar_config.path def download(self, url): logging.info("Downloading {}".format(url)) try: - local_filename = os.path.abspath(trashfolder + url.split('/')[-1]) + local_filename = os.path.abspath(folders.temp + "/" + url.split('/')[-1]) if os.path.isfile(local_filename): + # TODO: UNCOMMENT os.remove(local_filename) return local_filename - os.remove(local_filename) + request = requests.get(url, stream=True) with open(local_filename, 'wb') as f: diff --git a/aore/updater/dbhandler.py b/aore/updater/dbhandler.py index 755d020..8214093 100644 --- a/aore/updater/dbhandler.py +++ b/aore/updater/dbhandler.py @@ -5,15 +5,15 @@ import logging import psycopg2 from bottle import template -from aore.updater.aoxmltableentry import AoXmlTableEntry -from aore.config import db as dbparams +from aore.config import db_conf from aore.dbutils.dbimpl import DBImpl from aore.dbutils.dbschemas import db_shemas +from aore.updater.aoxmltableentry import AoXmlTableEntry class DbHandler: def __init__(self): - self.db = DBImpl(psycopg2, dbparams) + self.db = DBImpl(psycopg2, db_conf) def bulk_csv(self, operation_type, table_name, processed_count, csv_file_name): sql_query = None diff --git a/aore/updater/updater.py b/aore/updater/updater.py index c119017..739fe72 100644 --- a/aore/updater/updater.py +++ b/aore/updater/updater.py @@ -3,12 +3,11 @@ import logging from os import walk, path +from aore.dbutils.dbschemas import allowed_tables from aore.updater.aodataparser import AoDataParser from aore.updater.aorar import AoRar from aore.updater.aoxmltableentry import AoXmlTableEntry from aore.updater.dbhandler import DbHandler -from aore.updater.soapreceiver import SoapReceiver -from aore.dbutils.dbschemas import allowed_tables class Updater: diff --git a/manage.py b/manage.py index 5499ecd..0d61134 100644 --- a/manage.py +++ b/manage.py @@ -4,8 +4,8 @@ import optparse from aore.fias.fiasfactory import FiasFactory from aore.miscutils.sphinx import SphinxHelper -from aore.updater.updater import Updater from aore.updater.soapreceiver import SoapReceiver +from aore.updater.updater import Updater def print_fias_versions(): @@ -25,7 +25,7 @@ def parse_update_str(updates_str): if updates_str == "all": return None - upd_list = updates_str.lower().replace(' ','').split(',') + upd_list = updates_str.lower().replace(' ', '').split(',') out_list = [] for u_entry in upd_list: @@ -38,7 +38,7 @@ def parse_update_str(updates_str): return out_list -def get_allowed_updates(updates_str, mode = "create"): +def get_allowed_updates(updates_str, mode="create"): imp = SoapReceiver() current_version = imp.get_current_fias_version() all_versions = [x for x in imp.get_update_list()] @@ -56,7 +56,7 @@ def get_allowed_updates(updates_str, mode = "create"): if uv_ver > current_version and (not user_defined_list or uv_ver in user_defined_list): out_list.append(uv) - out_list.sort(key=lambda x: x['intver']) + out_list.sort(key=lambda item: item['intver']) for ol_entry in out_list: yield ol_entry @@ -117,9 +117,9 @@ def main(): # 4 Debug purposes.. if options.test: sph = FiasFactory() + print json.dumps(sph.normalize("463ce8e4-928b-45cc-be76-46c2494632b6")) print json.dumps(sph.expand("453091f5-2336-4aea-9b90-c4060dca0b33")) - print json.dumps(sph.find('с паспаул ул кедровая', True)) - print json.dumps(sph.find('с паспаул ул кедровая')) + print json.dumps(sph.find('ул кемровая пасраул алтай майминский р-н')) if __name__ == '__main__':