Улучшен поиск, правки кода.
This commit is contained in:
parent
90cae604fa
commit
8088bff07a
27
LICENSE.txt
Normal file
27
LICENSE.txt
Normal file
@ -0,0 +1,27 @@
|
||||
Copyright (c) 2016, hellotan
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of py-phias nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
@ -4,6 +4,8 @@ from traceback import format_exc
|
||||
|
||||
import psycopg2.extras
|
||||
|
||||
from aore.miscutils.exceptions import FiasException
|
||||
|
||||
|
||||
class DBImpl:
|
||||
def __init__(self, engine, db_config):
|
||||
@ -28,20 +30,26 @@ class DBImpl:
|
||||
try:
|
||||
cur = self.get_cursor()
|
||||
cur.execute(sql_query)
|
||||
cur.close()
|
||||
self.transaction_commit()
|
||||
except:
|
||||
self.transaction_rollback()
|
||||
raise BaseException("Error execute sql query. Reason : {}".format(format_exc()))
|
||||
raise FiasException("Error execute sql query. Reason : {}".format(format_exc()))
|
||||
|
||||
def get_rows(self, query_string, dict_cursor=False):
|
||||
if dict_cursor:
|
||||
cur = self.connection.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
|
||||
else:
|
||||
cur = self.connection.cursor()
|
||||
cur.execute(query_string)
|
||||
|
||||
rows = cur.fetchall()
|
||||
if cur:
|
||||
try:
|
||||
cur.execute(query_string)
|
||||
|
||||
rows = cur.fetchall()
|
||||
cur.close()
|
||||
self.transaction_commit()
|
||||
except:
|
||||
self.transaction_rollback()
|
||||
raise FiasException("Error execute sql query. Reason : {}".format(format_exc()))
|
||||
|
||||
return rows
|
||||
|
@ -1,7 +1,7 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import psycopg2
|
||||
from bottle import template
|
||||
|
||||
import sys
|
||||
from aore.dbutils.dbimpl import DBImpl
|
||||
from aore.fias.search import SphinxSearch
|
||||
from aore.config import db_conf
|
||||
@ -49,7 +49,7 @@ class FiasFactory:
|
||||
|
||||
results = self.searcher.find(text, strong)
|
||||
except Exception, err:
|
||||
return dict(error=err.message)
|
||||
return dict(error=err.args[0])
|
||||
|
||||
return results
|
||||
|
||||
@ -61,7 +61,7 @@ class FiasFactory:
|
||||
sql_query = self.normalize_templ.replace("//aoid", aoid_guid)
|
||||
rows = self.db.get_rows(sql_query, True)
|
||||
except Exception, err:
|
||||
return dict(error=err.message)
|
||||
return dict(error=err.args[0])
|
||||
|
||||
if len(rows) == 0:
|
||||
return []
|
||||
@ -80,6 +80,6 @@ class FiasFactory:
|
||||
sql_query = self.expand_templ.replace("//aoid", normalized_id)
|
||||
rows = self.db.get_rows(sql_query, True)
|
||||
except Exception, err:
|
||||
return dict(error=err.message)
|
||||
return dict(error=err.args[0])
|
||||
|
||||
return rows
|
||||
|
@ -36,15 +36,12 @@ class SphinxSearch:
|
||||
self.client_show.SetConnectTimeout(3.0)
|
||||
|
||||
def __configure(self, index_name, wlen=None):
|
||||
if index_name == sphinx_conf.index_sugg:
|
||||
if wlen:
|
||||
self.client_sugg.SetMatchMode(sphinxapi.SPH_MATCH_EXTENDED2)
|
||||
self.client_sugg.SetRankingMode(sphinxapi.SPH_RANK_WORDCOUNT)
|
||||
self.client_sugg.SetFilterRange("len", int(wlen) - self.delta_len, int(wlen) + self.delta_len)
|
||||
self.client_sugg.SetSelect("word, len, @weight+{}-abs(len-{}) AS krank".format(self.delta_len, wlen))
|
||||
self.client_sugg.SetSortMode(sphinxapi.SPH_SORT_EXTENDED, "krank DESC")
|
||||
if index_name == sphinx_conf.index_sugg and wlen:
|
||||
self.client_sugg.SetRankingMode(sphinxapi.SPH_RANK_BM25)
|
||||
self.client_sugg.SetFilterRange("len", int(wlen) - self.delta_len, int(wlen) + self.delta_len)
|
||||
self.client_sugg.SetSelect("word, len, @weight+{}-abs(len-{}) AS krank".format(self.delta_len, wlen))
|
||||
self.client_sugg.SetSortMode(sphinxapi.SPH_SORT_EXTENDED, "krank DESC")
|
||||
else:
|
||||
self.client_show.SetMatchMode(sphinxapi.SPH_MATCH_EXTENDED2)
|
||||
self.client_show.SetRankingMode(sphinxapi.SPH_RANK_BM25)
|
||||
self.client_show.SetSortMode(sphinxapi.SPH_SORT_RELEVANCE)
|
||||
|
||||
@ -108,11 +105,11 @@ class SphinxSearch:
|
||||
we = WordEntry(self.db, word)
|
||||
self.__add_word_variations(we, strong)
|
||||
|
||||
if we.get_variations() == "()":
|
||||
raise BaseException("Cannot process sentence.")
|
||||
assert we.get_variations() != "()", "Cannot process sentence."
|
||||
yield we
|
||||
|
||||
def find(self, text, strong):
|
||||
logging.info("FIND ")
|
||||
words = self.__split_phrase(text)
|
||||
word_entries = self.__get_word_entries(words, strong)
|
||||
sentence = "{}".format(" MAYBE ".join(x.get_variations() for x in word_entries))
|
||||
|
@ -16,7 +16,7 @@ class WordEntry:
|
||||
# -1x - одно по лайку и много точных. Быть не может.
|
||||
# x0 - много по лайку и нет точных. Недопечатка. Немного подсказок и *.
|
||||
# x1 - много по лайку и один точный. Чет нашли. Как есть и *.
|
||||
# xx - много по лайку и много точных. Оставляем как есть и *
|
||||
# xx - много по лайку и много точных. Оставляем как есть и * TODO В данном случае лайк лучше убрать
|
||||
#
|
||||
# Теперь по сокращениям. Они работюат отдельно (ПОКА ЧТО)
|
||||
# 3rd - кол-во слов по точному совпдению по полному сокращению.
|
||||
@ -29,7 +29,7 @@ class WordEntry:
|
||||
# 11 - найдено одно полное и одно малое. Бывает (допустим, 'сад'). Добавляем как есть.
|
||||
# -1x - найдено одно полное и куча малых. Ну бред.
|
||||
# x0 - найдено куча полных и ни одного малого. Добавляем малое.
|
||||
# x1 - Куча полных и 1 малое. TODO Хз, бывает ли. Не обрабатываем.
|
||||
# x1 - Куча полных и 1 малое. Хз, бывает ли. Не обрабатываем.
|
||||
# xx - Куча полных и куча малых. Не обрабатываем.
|
||||
match_types = dict(
|
||||
MT_MANY_SUGG=['0000'],
|
||||
@ -39,9 +39,12 @@ class WordEntry:
|
||||
MT_ADD_SOCR=['..10', '..x0']
|
||||
)
|
||||
|
||||
min_word_len_to_star = 4
|
||||
|
||||
def __init__(self, db, word):
|
||||
self.db = db
|
||||
self.word = str(word)
|
||||
self.word_len = len(unicode(self.word))
|
||||
self.variations = []
|
||||
self.scname = None
|
||||
self.ranks = self.__get_ranks()
|
||||
@ -51,9 +54,15 @@ class WordEntry:
|
||||
for z in y:
|
||||
self.__dict__[x] = self.__dict__[x] or re.search(z, self.ranks) is not None
|
||||
|
||||
# Если ищем по лайку, то точное совпадение не ищем (оно и так будет включено)
|
||||
if self.MT_LAST_STAR:
|
||||
self.MT_AS_IS = False
|
||||
|
||||
# Строка слишком котроткая, то по лайку не ищем, будет очень долго
|
||||
if self.MT_LAST_STAR and self.word_len < self.min_word_len_to_star:
|
||||
self.MT_LAST_STAR = False
|
||||
self.MT_AS_IS = True
|
||||
|
||||
def add_variation_socr(self):
|
||||
if self.scname:
|
||||
self.add_variation(self.scname)
|
||||
@ -65,28 +74,32 @@ class WordEntry:
|
||||
return "({})".format(" | ".join(self.variations))
|
||||
|
||||
def __get_ranks(self):
|
||||
word_len = len(unicode(self.word))
|
||||
sql_qry = "SELECT COUNT(*), NULL FROM \"AOTRIG\" WHERE word LIKE '{}%' AND LENGTH(word) > {} " \
|
||||
"UNION ALL SELECT COUNT(*), NULL FROM \"AOTRIG\" WHERE word='{}' " \
|
||||
"UNION ALL SELECT COUNT(*), MAX(scname) FROM \"SOCRBASE\" WHERE socrname ILIKE '{}'" \
|
||||
"UNION ALL SELECT COUNT(*), NULL FROM \"SOCRBASE\" WHERE scname ILIKE '{}'".format(
|
||||
self.word, word_len, self.word, self.word, self.word)
|
||||
"UNION ALL SELECT COUNT(*), NULL FROM \"SOCRBASE\" WHERE scname ILIKE '{}';".format(
|
||||
self.word, self.word_len, self.word, self.word, self.word)
|
||||
|
||||
result = self.db.get_rows(sql_qry)
|
||||
|
||||
# Проставляем "сокращенное" сокращение, если нашли полное
|
||||
if not self.scname:
|
||||
self.scname = result[2][1]
|
||||
|
||||
outmask = ""
|
||||
# Формируем список найденных величин совпадений:
|
||||
# result[x]
|
||||
# x = 0, поиск по неполному совпадению (лайк*), и по длине строки больше исходной
|
||||
# x = 1, поиск по точному совпадению
|
||||
# x = 2, поиск по базе сокращений (по полному)
|
||||
# x = 3, то же, но по краткому
|
||||
out_mask_list = []
|
||||
for ra in result:
|
||||
if ra[0] > 1:
|
||||
if word_len > 2:
|
||||
outmask += 'x'
|
||||
else:
|
||||
outmask += '1'
|
||||
out_mask_list.append('x')
|
||||
else:
|
||||
outmask += str(ra[0])
|
||||
out_mask_list.append(str(ra[0]))
|
||||
|
||||
return outmask
|
||||
return ''.join(out_mask_list)
|
||||
|
||||
def get_type(self):
|
||||
return ", ".join([x for x in self.match_types if self.__dict__[x]])
|
||||
|
6
aore/miscutils/exceptions.py
Normal file
6
aore/miscutils/exceptions.py
Normal file
@ -0,0 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
|
||||
class FiasException(Exception):
|
||||
def __str__(self):
|
||||
return repr(self.args[0])
|
@ -85,8 +85,7 @@ class SphinxHelper:
|
||||
splitting_seq = line.split(' ')
|
||||
keyword = splitting_seq[0]
|
||||
freq = splitting_seq[1].rstrip('\n')
|
||||
if not keyword or not freq:
|
||||
raise BaseException("Cannot process {}".format(self.files['dict.txt']))
|
||||
assert keyword and freq, "Cannot process {}".format(self.files['dict.txt'])
|
||||
|
||||
nodes.append(keyword)
|
||||
nodes.append(trigram(keyword))
|
||||
|
@ -2,7 +2,7 @@
|
||||
import json
|
||||
import logging
|
||||
|
||||
from bottle import Bottle
|
||||
from bottle import Bottle, response
|
||||
|
||||
from aore.fias.fiasfactory import FiasFactory
|
||||
|
||||
@ -13,21 +13,29 @@ fias_factory = FiasFactory()
|
||||
|
||||
@app.route('/expand/<aoid:re:[\w]{8}(-[\w]{4}){3}-[\w]{12}>')
|
||||
def expand(aoid):
|
||||
response.content_type = 'application/json'
|
||||
return json.dumps(fias_factory.expand(aoid))
|
||||
|
||||
|
||||
@app.route('/normalize/<aoid:re:[\w]{8}(-[\w]{4}){3}-[\w]{12}>')
|
||||
def normalize(aoid):
|
||||
response.content_type = 'application/json'
|
||||
return json.dumps(fias_factory.normalize(aoid))
|
||||
|
||||
|
||||
@app.route('/find/<text>')
|
||||
@app.route('/find/<text>/<strong>')
|
||||
def find(text, strong=False):
|
||||
logging.warning("START")
|
||||
strong = (strong == "strong")
|
||||
return json.dumps(fias_factory.find(text, strong))
|
||||
response.content_type = 'application/json'
|
||||
|
||||
res = json.dumps(fias_factory.find(text, strong))
|
||||
logging.warning("END")
|
||||
return res
|
||||
|
||||
|
||||
@app.error(404)
|
||||
def error404(error):
|
||||
response.content_type = 'application/json'
|
||||
return json.dumps(dict(error="Page not found"))
|
||||
|
@ -3,6 +3,7 @@ import os
|
||||
|
||||
from aore.config import folders
|
||||
from aore.dbutils.dbschemas import db_shemas
|
||||
from aore.miscutils.exceptions import FiasException
|
||||
from xmlparser import XMLParser
|
||||
|
||||
|
||||
@ -10,7 +11,7 @@ class AoDataParser:
|
||||
def __init__(self, datasource, pagesize):
|
||||
self.datasource = datasource
|
||||
if self.datasource.table_name not in db_shemas:
|
||||
raise BaseException("Cannot parse {}: Not configured.".format(self.datasource.table_name))
|
||||
raise FiasException("Cannot parse {}: Not configured.".format(self.datasource.table_name))
|
||||
else:
|
||||
self.allowed_fields = db_shemas[self.datasource.table_name].fields
|
||||
|
||||
|
@ -8,6 +8,7 @@ import rarfile
|
||||
import requests
|
||||
|
||||
from aore.config import folders, unrar_config
|
||||
from aore.miscutils.exceptions import FiasException
|
||||
from aoxmltableentry import AoXmlTableEntry
|
||||
|
||||
|
||||
@ -29,7 +30,7 @@ class AoRar:
|
||||
if chunk:
|
||||
f.write(chunk)
|
||||
except:
|
||||
raise BaseException("Error downloading. Reason : {}".format(format_exc()))
|
||||
raise FiasException("Error downloading. Reason : {}".format(format_exc()))
|
||||
|
||||
logging.info("Downloaded {} bytes".format(request.headers['Content-length']))
|
||||
return local_filename
|
||||
|
@ -63,7 +63,7 @@ class Updater:
|
||||
|
||||
def __init_update_entries(self, updates_generator):
|
||||
if self.mode == "http":
|
||||
assert updates_generator
|
||||
assert updates_generator, "No generator"
|
||||
self.tablelist_generator = self.__get_updates_from_rar
|
||||
self.updalist_generator = updates_generator
|
||||
else:
|
||||
|
19
setup.py
19
setup.py
@ -5,16 +5,17 @@ setup(
|
||||
version='0.0.1',
|
||||
packages=['aore', 'aore.fias', 'aore.config', 'aore.dbutils', 'aore.updater', 'aore.miscutils'],
|
||||
url='https://github.com/jar3b/py-phias',
|
||||
license='',
|
||||
license='BSD',
|
||||
author='hellotan',
|
||||
author_email='hellotan@live.ru',
|
||||
description='Python application that can operate with FIAS (Russian Address Object DB)',
|
||||
install_requires=['lxml',
|
||||
'psycopg2',
|
||||
'bottle',
|
||||
'pysimplesoap',
|
||||
'python-Levenshtein',
|
||||
'enum34',
|
||||
'rarfile',
|
||||
'requests']
|
||||
install_requires=
|
||||
['lxml',
|
||||
'psycopg2>=2.6.0',
|
||||
'bottle>=0.12.0',
|
||||
'pysimplesoap',
|
||||
'python-Levenshtein',
|
||||
'enum34',
|
||||
'rarfile',
|
||||
'requests']
|
||||
)
|
||||
|
Loading…
x
Reference in New Issue
Block a user