diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 0000000..3322fca --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,27 @@ +Copyright (c) 2016, hellotan +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of py-phias nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/aore/dbutils/dbimpl.py b/aore/dbutils/dbimpl.py index eca3e38..3b6ed03 100644 --- a/aore/dbutils/dbimpl.py +++ b/aore/dbutils/dbimpl.py @@ -4,6 +4,8 @@ from traceback import format_exc import psycopg2.extras +from aore.miscutils.exceptions import FiasException + class DBImpl: def __init__(self, engine, db_config): @@ -28,20 +30,26 @@ class DBImpl: try: cur = self.get_cursor() cur.execute(sql_query) + cur.close() self.transaction_commit() except: self.transaction_rollback() - raise BaseException("Error execute sql query. Reason : {}".format(format_exc())) + raise FiasException("Error execute sql query. Reason : {}".format(format_exc())) def get_rows(self, query_string, dict_cursor=False): if dict_cursor: cur = self.connection.cursor(cursor_factory=psycopg2.extras.RealDictCursor) else: cur = self.connection.cursor() - cur.execute(query_string) - rows = cur.fetchall() - if cur: + try: + cur.execute(query_string) + + rows = cur.fetchall() cur.close() + self.transaction_commit() + except: + self.transaction_rollback() + raise FiasException("Error execute sql query. Reason : {}".format(format_exc())) return rows diff --git a/aore/fias/fiasfactory.py b/aore/fias/fiasfactory.py index 23e2656..cc22055 100644 --- a/aore/fias/fiasfactory.py +++ b/aore/fias/fiasfactory.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- import psycopg2 from bottle import template - +import sys from aore.dbutils.dbimpl import DBImpl from aore.fias.search import SphinxSearch from aore.config import db_conf @@ -49,7 +49,7 @@ class FiasFactory: results = self.searcher.find(text, strong) except Exception, err: - return dict(error=err.message) + return dict(error=err.args[0]) return results @@ -61,7 +61,7 @@ class FiasFactory: sql_query = self.normalize_templ.replace("//aoid", aoid_guid) rows = self.db.get_rows(sql_query, True) except Exception, err: - return dict(error=err.message) + return dict(error=err.args[0]) if len(rows) == 0: return [] @@ -80,6 +80,6 @@ class FiasFactory: sql_query = self.expand_templ.replace("//aoid", normalized_id) rows = self.db.get_rows(sql_query, True) except Exception, err: - return dict(error=err.message) + return dict(error=err.args[0]) return rows diff --git a/aore/fias/search.py b/aore/fias/search.py index 3d790a8..2f41b70 100644 --- a/aore/fias/search.py +++ b/aore/fias/search.py @@ -36,15 +36,12 @@ class SphinxSearch: self.client_show.SetConnectTimeout(3.0) def __configure(self, index_name, wlen=None): - if index_name == sphinx_conf.index_sugg: - if wlen: - self.client_sugg.SetMatchMode(sphinxapi.SPH_MATCH_EXTENDED2) - self.client_sugg.SetRankingMode(sphinxapi.SPH_RANK_WORDCOUNT) - self.client_sugg.SetFilterRange("len", int(wlen) - self.delta_len, int(wlen) + self.delta_len) - self.client_sugg.SetSelect("word, len, @weight+{}-abs(len-{}) AS krank".format(self.delta_len, wlen)) - self.client_sugg.SetSortMode(sphinxapi.SPH_SORT_EXTENDED, "krank DESC") + if index_name == sphinx_conf.index_sugg and wlen: + self.client_sugg.SetRankingMode(sphinxapi.SPH_RANK_BM25) + self.client_sugg.SetFilterRange("len", int(wlen) - self.delta_len, int(wlen) + self.delta_len) + self.client_sugg.SetSelect("word, len, @weight+{}-abs(len-{}) AS krank".format(self.delta_len, wlen)) + self.client_sugg.SetSortMode(sphinxapi.SPH_SORT_EXTENDED, "krank DESC") else: - self.client_show.SetMatchMode(sphinxapi.SPH_MATCH_EXTENDED2) self.client_show.SetRankingMode(sphinxapi.SPH_RANK_BM25) self.client_show.SetSortMode(sphinxapi.SPH_SORT_RELEVANCE) @@ -108,11 +105,11 @@ class SphinxSearch: we = WordEntry(self.db, word) self.__add_word_variations(we, strong) - if we.get_variations() == "()": - raise BaseException("Cannot process sentence.") + assert we.get_variations() != "()", "Cannot process sentence." yield we def find(self, text, strong): + logging.info("FIND ") words = self.__split_phrase(text) word_entries = self.__get_word_entries(words, strong) sentence = "{}".format(" MAYBE ".join(x.get_variations() for x in word_entries)) diff --git a/aore/fias/wordentry.py b/aore/fias/wordentry.py index 71bcb32..69c0b68 100644 --- a/aore/fias/wordentry.py +++ b/aore/fias/wordentry.py @@ -16,7 +16,7 @@ class WordEntry: # -1x - одно по лайку и много точных. Быть не может. # x0 - много по лайку и нет точных. Недопечатка. Немного подсказок и *. # x1 - много по лайку и один точный. Чет нашли. Как есть и *. - # xx - много по лайку и много точных. Оставляем как есть и * + # xx - много по лайку и много точных. Оставляем как есть и * TODO В данном случае лайк лучше убрать # # Теперь по сокращениям. Они работюат отдельно (ПОКА ЧТО) # 3rd - кол-во слов по точному совпдению по полному сокращению. @@ -29,7 +29,7 @@ class WordEntry: # 11 - найдено одно полное и одно малое. Бывает (допустим, 'сад'). Добавляем как есть. # -1x - найдено одно полное и куча малых. Ну бред. # x0 - найдено куча полных и ни одного малого. Добавляем малое. - # x1 - Куча полных и 1 малое. TODO Хз, бывает ли. Не обрабатываем. + # x1 - Куча полных и 1 малое. Хз, бывает ли. Не обрабатываем. # xx - Куча полных и куча малых. Не обрабатываем. match_types = dict( MT_MANY_SUGG=['0000'], @@ -39,9 +39,12 @@ class WordEntry: MT_ADD_SOCR=['..10', '..x0'] ) + min_word_len_to_star = 4 + def __init__(self, db, word): self.db = db self.word = str(word) + self.word_len = len(unicode(self.word)) self.variations = [] self.scname = None self.ranks = self.__get_ranks() @@ -51,9 +54,15 @@ class WordEntry: for z in y: self.__dict__[x] = self.__dict__[x] or re.search(z, self.ranks) is not None + # Если ищем по лайку, то точное совпадение не ищем (оно и так будет включено) if self.MT_LAST_STAR: self.MT_AS_IS = False + # Строка слишком котроткая, то по лайку не ищем, будет очень долго + if self.MT_LAST_STAR and self.word_len < self.min_word_len_to_star: + self.MT_LAST_STAR = False + self.MT_AS_IS = True + def add_variation_socr(self): if self.scname: self.add_variation(self.scname) @@ -65,28 +74,32 @@ class WordEntry: return "({})".format(" | ".join(self.variations)) def __get_ranks(self): - word_len = len(unicode(self.word)) sql_qry = "SELECT COUNT(*), NULL FROM \"AOTRIG\" WHERE word LIKE '{}%' AND LENGTH(word) > {} " \ "UNION ALL SELECT COUNT(*), NULL FROM \"AOTRIG\" WHERE word='{}' " \ "UNION ALL SELECT COUNT(*), MAX(scname) FROM \"SOCRBASE\" WHERE socrname ILIKE '{}'" \ - "UNION ALL SELECT COUNT(*), NULL FROM \"SOCRBASE\" WHERE scname ILIKE '{}'".format( - self.word, word_len, self.word, self.word, self.word) + "UNION ALL SELECT COUNT(*), NULL FROM \"SOCRBASE\" WHERE scname ILIKE '{}';".format( + self.word, self.word_len, self.word, self.word, self.word) result = self.db.get_rows(sql_qry) + + # Проставляем "сокращенное" сокращение, если нашли полное if not self.scname: self.scname = result[2][1] - outmask = "" + # Формируем список найденных величин совпадений: + # result[x] + # x = 0, поиск по неполному совпадению (лайк*), и по длине строки больше исходной + # x = 1, поиск по точному совпадению + # x = 2, поиск по базе сокращений (по полному) + # x = 3, то же, но по краткому + out_mask_list = [] for ra in result: if ra[0] > 1: - if word_len > 2: - outmask += 'x' - else: - outmask += '1' + out_mask_list.append('x') else: - outmask += str(ra[0]) + out_mask_list.append(str(ra[0])) - return outmask + return ''.join(out_mask_list) def get_type(self): return ", ".join([x for x in self.match_types if self.__dict__[x]]) diff --git a/manage.py b/aore/manage.py similarity index 100% rename from manage.py rename to aore/manage.py diff --git a/aore/miscutils/exceptions.py b/aore/miscutils/exceptions.py new file mode 100644 index 0000000..ce89dc0 --- /dev/null +++ b/aore/miscutils/exceptions.py @@ -0,0 +1,6 @@ +# -*- coding: utf-8 -*- + + +class FiasException(Exception): + def __str__(self): + return repr(self.args[0]) diff --git a/aore/miscutils/sphinx.py b/aore/miscutils/sphinx.py index da6abf6..7943919 100644 --- a/aore/miscutils/sphinx.py +++ b/aore/miscutils/sphinx.py @@ -85,8 +85,7 @@ class SphinxHelper: splitting_seq = line.split(' ') keyword = splitting_seq[0] freq = splitting_seq[1].rstrip('\n') - if not keyword or not freq: - raise BaseException("Cannot process {}".format(self.files['dict.txt'])) + assert keyword and freq, "Cannot process {}".format(self.files['dict.txt']) nodes.append(keyword) nodes.append(trigram(keyword)) diff --git a/aore/phias.py b/aore/phias.py index 6c752b2..bc8a91e 100644 --- a/aore/phias.py +++ b/aore/phias.py @@ -2,7 +2,7 @@ import json import logging -from bottle import Bottle +from bottle import Bottle, response from aore.fias.fiasfactory import FiasFactory @@ -13,21 +13,29 @@ fias_factory = FiasFactory() @app.route('/expand/') def expand(aoid): + response.content_type = 'application/json' return json.dumps(fias_factory.expand(aoid)) @app.route('/normalize/') def normalize(aoid): + response.content_type = 'application/json' return json.dumps(fias_factory.normalize(aoid)) @app.route('/find/') @app.route('/find//') def find(text, strong=False): + logging.warning("START") strong = (strong == "strong") - return json.dumps(fias_factory.find(text, strong)) + response.content_type = 'application/json' + + res = json.dumps(fias_factory.find(text, strong)) + logging.warning("END") + return res @app.error(404) def error404(error): + response.content_type = 'application/json' return json.dumps(dict(error="Page not found")) diff --git a/aore/updater/aodataparser.py b/aore/updater/aodataparser.py index 5e2ac4b..046aa0d 100644 --- a/aore/updater/aodataparser.py +++ b/aore/updater/aodataparser.py @@ -3,6 +3,7 @@ import os from aore.config import folders from aore.dbutils.dbschemas import db_shemas +from aore.miscutils.exceptions import FiasException from xmlparser import XMLParser @@ -10,7 +11,7 @@ class AoDataParser: def __init__(self, datasource, pagesize): self.datasource = datasource if self.datasource.table_name not in db_shemas: - raise BaseException("Cannot parse {}: Not configured.".format(self.datasource.table_name)) + raise FiasException("Cannot parse {}: Not configured.".format(self.datasource.table_name)) else: self.allowed_fields = db_shemas[self.datasource.table_name].fields diff --git a/aore/updater/aorar.py b/aore/updater/aorar.py index 3407df8..a8c3fb7 100644 --- a/aore/updater/aorar.py +++ b/aore/updater/aorar.py @@ -8,6 +8,7 @@ import rarfile import requests from aore.config import folders, unrar_config +from aore.miscutils.exceptions import FiasException from aoxmltableentry import AoXmlTableEntry @@ -29,7 +30,7 @@ class AoRar: if chunk: f.write(chunk) except: - raise BaseException("Error downloading. Reason : {}".format(format_exc())) + raise FiasException("Error downloading. Reason : {}".format(format_exc())) logging.info("Downloaded {} bytes".format(request.headers['Content-length'])) return local_filename diff --git a/aore/updater/updater.py b/aore/updater/updater.py index 9aeb554..26d8afe 100644 --- a/aore/updater/updater.py +++ b/aore/updater/updater.py @@ -63,7 +63,7 @@ class Updater: def __init_update_entries(self, updates_generator): if self.mode == "http": - assert updates_generator + assert updates_generator, "No generator" self.tablelist_generator = self.__get_updates_from_rar self.updalist_generator = updates_generator else: diff --git a/setup.py b/setup.py index f52b6cb..408ed83 100644 --- a/setup.py +++ b/setup.py @@ -5,16 +5,17 @@ setup( version='0.0.1', packages=['aore', 'aore.fias', 'aore.config', 'aore.dbutils', 'aore.updater', 'aore.miscutils'], url='https://github.com/jar3b/py-phias', - license='', + license='BSD', author='hellotan', author_email='hellotan@live.ru', description='Python application that can operate with FIAS (Russian Address Object DB)', - install_requires=['lxml', - 'psycopg2', - 'bottle', - 'pysimplesoap', - 'python-Levenshtein', - 'enum34', - 'rarfile', - 'requests'] + install_requires= + ['lxml', + 'psycopg2>=2.6.0', + 'bottle>=0.12.0', + 'pysimplesoap', + 'python-Levenshtein', + 'enum34', + 'rarfile', + 'requests'] )