Big commit for py3
This commit is contained in:
@@ -1,16 +1,16 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import logging
|
||||
import re
|
||||
import urllib
|
||||
import traceback
|
||||
import urllib.parse
|
||||
from uuid import UUID
|
||||
|
||||
import psycopg2
|
||||
import traceback
|
||||
from bottle import template
|
||||
|
||||
from aore.config import DatabaseConfig, BasicConfig
|
||||
from aore.dbutils.dbimpl import DBImpl
|
||||
from search import SphinxSearch
|
||||
from .search import SphinxSearch
|
||||
|
||||
|
||||
class FiasFactory:
|
||||
@@ -38,10 +38,10 @@ class FiasFactory:
|
||||
if rule == "boolean":
|
||||
assert isinstance(param, bool), "Invalid parameter type"
|
||||
if rule == "uuid":
|
||||
assert (isinstance(param, str) or isinstance(param, unicode)) and self.__check_uuid(
|
||||
assert isinstance(param, str) and self.__check_uuid(
|
||||
param), "Invalid parameter value"
|
||||
if rule == "text":
|
||||
assert isinstance(param, str) or isinstance(param, unicode), "Invalid parameter type"
|
||||
assert isinstance(param, str), "Invalid parameter type"
|
||||
assert len(param) > 3, "Text too short"
|
||||
pattern = re.compile(r"[A-za-zА-Яа-я \-,.#№]+")
|
||||
assert pattern.match(param), "Invalid parameter value"
|
||||
@@ -52,15 +52,17 @@ class FiasFactory:
|
||||
|
||||
def find(self, text, strong=False):
|
||||
try:
|
||||
text = urllib.unquote(text).decode('utf8')
|
||||
text = urllib.parse.unquote(str(text))
|
||||
self.__check_param(text, "text")
|
||||
self.__check_param(strong, "boolean")
|
||||
|
||||
results = self.searcher.find(text, strong)
|
||||
except Exception, err:
|
||||
except Exception as err:
|
||||
if BasicConfig.logging:
|
||||
logging.error(traceback.format_exc(err))
|
||||
return dict(error=err.args[0])
|
||||
logging.error(traceback.format_exc())
|
||||
if BasicConfig.debug_print:
|
||||
traceback.print_exc()
|
||||
return dict(error=str(err))
|
||||
|
||||
return results
|
||||
|
||||
@@ -73,10 +75,12 @@ class FiasFactory:
|
||||
rows = self.db.get_rows(sql_query, True)
|
||||
|
||||
assert len(rows), "Record with this AOID not found in DB"
|
||||
except Exception, err:
|
||||
except Exception as err:
|
||||
if BasicConfig.logging:
|
||||
logging.error(traceback.format_exc(err))
|
||||
return dict(error=err.args[0])
|
||||
logging.error(traceback.format_exc())
|
||||
if BasicConfig.debug_print:
|
||||
traceback.print_exc()
|
||||
return dict(error=str(err))
|
||||
|
||||
if len(rows) == 0:
|
||||
return []
|
||||
@@ -94,10 +98,12 @@ class FiasFactory:
|
||||
|
||||
sql_query = self.expand_templ.replace("//aoid", normalized_id)
|
||||
rows = self.db.get_rows(sql_query, True)
|
||||
except Exception, err:
|
||||
except Exception as err:
|
||||
if BasicConfig.logging:
|
||||
logging.error(traceback.format_exc(err))
|
||||
return dict(error=err.args[0])
|
||||
logging.error(traceback.format_exc())
|
||||
if BasicConfig.debug_print:
|
||||
traceback.print_exc()
|
||||
return dict(error=str(err))
|
||||
|
||||
return rows
|
||||
|
||||
@@ -111,9 +117,11 @@ class FiasFactory:
|
||||
rows = self.db.get_rows(sql_query, True)
|
||||
|
||||
assert len(rows), "Record with this AOID not found in DB"
|
||||
except Exception, err:
|
||||
except Exception as err:
|
||||
if BasicConfig.logging:
|
||||
logging.error(traceback.format_exc(err))
|
||||
return dict(error=err.args[0])
|
||||
logging.error(traceback.format_exc())
|
||||
if BasicConfig.debug_print:
|
||||
traceback.print_exc()
|
||||
return dict(error=str(err))
|
||||
|
||||
return rows
|
||||
|
||||
@@ -11,8 +11,8 @@ from aore.config import SphinxConfig
|
||||
from aore.miscutils.exceptions import FiasException
|
||||
from aore.miscutils.fysearch import violet_ratio
|
||||
from aore.miscutils.trigram import trigram
|
||||
from wordentry import WordEntry
|
||||
from wordvariation import VariationType
|
||||
from .wordentry import WordEntry
|
||||
from .wordvariation import VariationType
|
||||
|
||||
|
||||
class SphinxSearch:
|
||||
@@ -62,7 +62,7 @@ class SphinxSearch:
|
||||
self.client_show.SetSortMode(sphinxapi.SPH_SORT_EXTENDED, "krank DESC")
|
||||
|
||||
def __get_suggest(self, word, rating_limit, count):
|
||||
word_len = str(len(word) / 2)
|
||||
word_len = len(word)
|
||||
trigrammed_word = '"{}"/1'.format(trigram(word))
|
||||
|
||||
self.__configure(SphinxConfig.index_sugg, word_len)
|
||||
@@ -95,7 +95,7 @@ class SphinxSearch:
|
||||
return outlist
|
||||
|
||||
# Получает список объектов (слово)
|
||||
def __get_word_entries(self, words, strong):
|
||||
def __get_word_entries(self, words):
|
||||
we_list = []
|
||||
for word in words:
|
||||
if word != '':
|
||||
@@ -111,14 +111,14 @@ class SphinxSearch:
|
||||
|
||||
def find(self, text, strong):
|
||||
def split_phrase(phrase):
|
||||
phrase = unicode(phrase).lower()
|
||||
phrase = phrase.lower()
|
||||
return re.split(r"[ ,:.#$]+", phrase)
|
||||
|
||||
# сплитим текст на слова
|
||||
words = split_phrase(text)
|
||||
|
||||
# получаем список объектов (слов)
|
||||
word_entries = self.__get_word_entries(words, strong)
|
||||
word_entries = self.__get_word_entries(words)
|
||||
word_count = len(word_entries)
|
||||
|
||||
# проверяем, есть ли вообще что-либо в списке объектов слов (или же все убрали как частое)
|
||||
@@ -169,7 +169,7 @@ class SphinxSearch:
|
||||
parsed_ids.append(match['attrs']['aoid'])
|
||||
results.append(
|
||||
dict(aoid=match['attrs']['aoid'],
|
||||
text=unicode(match['attrs']['fullname']),
|
||||
text=str(match['attrs']['fullname']),
|
||||
ratio=match['attrs']['krank'],
|
||||
cort=i))
|
||||
|
||||
|
||||
@@ -5,6 +5,10 @@ from aore.config import SphinxConfig
|
||||
from aore.search.wordvariation import WordVariation, VariationType
|
||||
|
||||
|
||||
def cleanup_string(word):
|
||||
return word.replace('-', '').replace('@', '').replace('#', '')
|
||||
|
||||
|
||||
class WordEntry:
|
||||
# Варианты распеределния для слов с первыми двумя символами, где:
|
||||
# 0 - не найдено, 1 - найдено одно, x - найдено много (>1)
|
||||
@@ -51,14 +55,14 @@ class WordEntry:
|
||||
|
||||
def __init__(self, db, word):
|
||||
self.db = db
|
||||
self.bare_word = str(word)
|
||||
self.word = self.__cleanify(self.bare_word)
|
||||
self.word_len = len(unicode(self.word))
|
||||
self.bare_word = word
|
||||
self.word = cleanup_string(self.bare_word)
|
||||
self.word_len = len(self.word)
|
||||
self.parameters = dict(IS_FREQ=False, SOCR_WORD=None)
|
||||
self.ranks = self.__init_ranks()
|
||||
|
||||
# Заполняем параметры слова
|
||||
for mt_name, mt_values in self.match_types.iteritems():
|
||||
for mt_name, mt_values in self.match_types.items():
|
||||
self.__dict__[mt_name] = False
|
||||
for mt_value in mt_values:
|
||||
self.__dict__[mt_name] = self.__dict__[mt_name] or re.search(mt_value, self.ranks)
|
||||
@@ -72,9 +76,6 @@ class WordEntry:
|
||||
self.MT_LAST_STAR = False
|
||||
self.MT_AS_IS = True
|
||||
|
||||
def __cleanify(self, word):
|
||||
return word.replace('-', '').replace('@', '')
|
||||
|
||||
def variations_generator(self, strong, suggestion_func):
|
||||
default_var_type = VariationType.normal
|
||||
# Если слово встречается часто, ставим у всех вариантов тип VariationType.freq
|
||||
@@ -115,8 +116,9 @@ class WordEntry:
|
||||
"UNION ALL SELECT COUNT(*), NULL FROM \"AOTRIG\" WHERE word='{}' " \
|
||||
"UNION ALL SELECT COUNT(*), MAX(scname) FROM \"SOCRBASE\" WHERE socrname ILIKE '{}'" \
|
||||
"UNION ALL SELECT COUNT(*), NULL FROM \"SOCRBASE\" WHERE scname ILIKE '{}'" \
|
||||
"UNION ALL SELECT frequency, NULL FROM \"AOTRIG\" WHERE word='{}';".format(
|
||||
self.word, self.word_len, self.word, self.bare_word, self.bare_word, self.word)
|
||||
"UNION ALL SELECT frequency, NULL FROM \"AOTRIG\" WHERE word='{}';".format(self.word, self.word_len,
|
||||
self.word, self.bare_word,
|
||||
self.bare_word, self.word)
|
||||
|
||||
result = self.db.get_rows(sql_qry)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user