Добавлена возможность обновления (создания) конкретной версии
This commit is contained in:
@@ -1,39 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from pysimplesoap.client import SoapClient
|
||||
|
||||
|
||||
class Importer:
|
||||
def __init__(self):
|
||||
self.client = SoapClient(
|
||||
location="http://fias.nalog.ru/WebServices/Public/DownloadService.asmx",
|
||||
action='http://fias.nalog.ru/WebServices/Public/DownloadService.asmx/',
|
||||
namespace="http://fias.nalog.ru/WebServices/Public/DownloadService.asmx",
|
||||
soap_ns='soap', trace=False, ns=False)
|
||||
|
||||
def get_current_fias_version(self):
|
||||
return 224 # TODO FIXIT
|
||||
|
||||
def get_full(self):
|
||||
response = self.client.GetLastDownloadFileInfo()
|
||||
|
||||
assert response, "Response is null"
|
||||
downloadfileinfo = response.GetLastDownloadFileInfoResponse.GetLastDownloadFileInfoResult
|
||||
|
||||
assert downloadfileinfo.VersionId < self.get_current_fias_version(), "DB is already up-to-date"
|
||||
|
||||
yield dict(intver=int(downloadfileinfo.VersionId), strver=str(downloadfileinfo.TextVersion),
|
||||
url=str(downloadfileinfo.FiasCompleteXmlUrl))
|
||||
|
||||
# return (intver, strver, url)
|
||||
def get_updates(self):
|
||||
response = self.client.GetAllDownloadFileInfo()
|
||||
|
||||
assert response, "Response is null"
|
||||
|
||||
current_fias_version = self.get_current_fias_version()
|
||||
|
||||
for DownloadFileInfo in response.GetAllDownloadFileInfoResponse.GetAllDownloadFileInfoResult.DownloadFileInfo:
|
||||
if int(DownloadFileInfo.VersionId) > current_fias_version:
|
||||
yield dict(intver=int(DownloadFileInfo.VersionId), strver=str(DownloadFileInfo.TextVersion),
|
||||
url=str(DownloadFileInfo.FiasDeltaXmlUrl))
|
||||
17
aore/fias/fiasfactory.py
Normal file
17
aore/fias/fiasfactory.py
Normal file
@@ -0,0 +1,17 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from aore.fias.search import SphinxSearch
|
||||
|
||||
|
||||
class FiasFactory:
|
||||
def __init__(self):
|
||||
self.searcher = SphinxSearch()
|
||||
|
||||
# text - строка поиска
|
||||
# strong - строгий поиск или "мягкий" (с допущением ошибок, опечаток)
|
||||
# out_format - "full" or "simple" - полный (подробно для каждого подпункта) или простой (только строка и AOID)
|
||||
def find(self, text, strong=False, out_format="simple"):
|
||||
try:
|
||||
results = self.searcher.find(text, strong)
|
||||
|
||||
except:
|
||||
return []
|
||||
@@ -4,7 +4,7 @@ import re
|
||||
|
||||
import Levenshtein
|
||||
import psycopg2
|
||||
import aore.sphinxapi as sphinxapi
|
||||
import sphinxapi
|
||||
|
||||
from aore.config import db as dbparams, sphinx_index_sugg, sphinx_index_addjobj
|
||||
from aore.dbutils.dbimpl import DBImpl
|
||||
@@ -80,12 +80,12 @@ class SphinxSearch:
|
||||
phrase = unicode(phrase).replace('-', '').replace('@', '').lower()
|
||||
return re.split(r"[ ,:.#$]+", phrase)
|
||||
|
||||
def __add_word_variations(self, word_entry):
|
||||
if word_entry.MT_MANY_SUGG:
|
||||
def __add_word_variations(self, word_entry, strong):
|
||||
if word_entry.MT_MANY_SUGG and not strong:
|
||||
suggs = self.__get_suggest(word_entry.word, self.rating_limit_soft, 6)
|
||||
for suggestion in suggs:
|
||||
word_entry.add_variation(suggestion[0])
|
||||
if word_entry.MT_SOME_SUGG:
|
||||
if word_entry.MT_SOME_SUGG and not strong:
|
||||
suggs = self.__get_suggest(word_entry.word, self.rating_limit_hard, 3)
|
||||
for suggestion in suggs:
|
||||
word_entry.add_variation(suggestion[0])
|
||||
@@ -96,16 +96,18 @@ class SphinxSearch:
|
||||
if word_entry.MT_ADD_SOCR:
|
||||
word_entry.add_variation_socr()
|
||||
|
||||
def __get_word_entries(self, words):
|
||||
def __get_word_entries(self, words, strong):
|
||||
for word in words:
|
||||
if word != '':
|
||||
we = WordEntry(self.db, word)
|
||||
self.__add_word_variations(we)
|
||||
self.__add_word_variations(we, strong)
|
||||
if we.get_variations() == "()":
|
||||
raise BaseException("Cannot process sentence.")
|
||||
yield we
|
||||
|
||||
def find(self, text):
|
||||
def find(self, text, strong):
|
||||
words = self.__split_phrase(text)
|
||||
word_entries = self.__get_word_entries(words)
|
||||
word_entries = self.__get_word_entries(words, strong)
|
||||
sentence = "{}".format(" MAYBE ".join(x.get_variations() for x in word_entries))
|
||||
|
||||
self.__configure(sphinx_index_addjobj)
|
||||
@@ -114,4 +116,4 @@ class SphinxSearch:
|
||||
results = []
|
||||
for ma in rs['matches']:
|
||||
results.append([ma['attrs']['aoid'], ma['attrs']['fullname'], ma['weight']])
|
||||
print results
|
||||
return results
|
||||
|
||||
@@ -5,9 +5,9 @@ import os
|
||||
|
||||
from bottle import template
|
||||
|
||||
from aore.aoutils.aoxmltableentry import AoXmlTableEntry
|
||||
from aore.updater.aoxmltableentry import AoXmlTableEntry
|
||||
from aore.updater.dbhandler import DbHandler
|
||||
from aore.config import db as dbconfig, sphinx_index_addjobj, sphinx_var_dir, trashfolder, sphinx_index_sugg
|
||||
from aore.dbutils.dbhandler import DbHandler
|
||||
from trigram import trigram
|
||||
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import os
|
||||
|
||||
from aore.aoutils.aoxmltableentry import AoXmlTableEntry
|
||||
from aore.updater.aoxmltableentry import AoXmlTableEntry
|
||||
from aore.config import trashfolder
|
||||
from aore.dbutils.dbschemas import db_shemas
|
||||
from xmlparser import XMLParser
|
||||
@@ -18,18 +18,12 @@ class AoDataParser:
|
||||
self.pagesize = pagesize
|
||||
self.currentpage = 0
|
||||
self.counter = 0
|
||||
self.addrobj_filter = self.datasource.table_name == 'ADDROBJ' and self.datasource.operation_type == AoXmlTableEntry.OperationType.create
|
||||
|
||||
self.base_filename = ""
|
||||
self.csv_file = None
|
||||
self.data_bereit_callback = None
|
||||
|
||||
def import_update(self, attr):
|
||||
# Addrobj anvanced filter
|
||||
if self.addrobj_filter:
|
||||
if attr['ACTSTATUS'] == '0' or 'NEXTID' in attr:
|
||||
return
|
||||
|
||||
if self.counter > self.pagesize:
|
||||
# Send old file to DB engine
|
||||
if self.csv_file:
|
||||
@@ -5,7 +5,7 @@ import logging
|
||||
import psycopg2
|
||||
from bottle import template
|
||||
|
||||
from aore.aoutils.aoxmltableentry import AoXmlTableEntry
|
||||
from aore.updater.aoxmltableentry import AoXmlTableEntry
|
||||
from aore.config import db as dbparams
|
||||
from aore.dbutils.dbimpl import DBImpl
|
||||
from aore.dbutils.dbschemas import db_shemas
|
||||
26
aore/updater/soapreceiver.py
Normal file
26
aore/updater/soapreceiver.py
Normal file
@@ -0,0 +1,26 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from pysimplesoap.client import SoapClient
|
||||
|
||||
|
||||
class SoapReceiver:
|
||||
def __init__(self):
|
||||
self.client = SoapClient(
|
||||
location="http://fias.nalog.ru/WebServices/Public/DownloadService.asmx",
|
||||
action='http://fias.nalog.ru/WebServices/Public/DownloadService.asmx/',
|
||||
namespace="http://fias.nalog.ru/WebServices/Public/DownloadService.asmx",
|
||||
soap_ns='soap', trace=False, ns=False)
|
||||
|
||||
def get_current_fias_version(self):
|
||||
return 224 # TODO FIXIT
|
||||
|
||||
# return (intver, strver, url)
|
||||
def get_update_list(self):
|
||||
response = self.client.GetAllDownloadFileInfo()
|
||||
|
||||
assert response, "Response is null"
|
||||
|
||||
for DownloadFileInfo in response.GetAllDownloadFileInfoResponse.GetAllDownloadFileInfoResult.DownloadFileInfo:
|
||||
yield dict(intver=int(DownloadFileInfo.VersionId), strver=str(DownloadFileInfo.TextVersion),
|
||||
delta_url=str(DownloadFileInfo.FiasDeltaXmlUrl),
|
||||
complete_url=str(DownloadFileInfo.FiasCompleteXmlUrl))
|
||||
@@ -3,15 +3,15 @@
|
||||
import logging
|
||||
from os import walk, path
|
||||
|
||||
from aore.aoutils.aodataparser import AoDataParser
|
||||
from aore.aoutils.aorar import AoRar
|
||||
from aore.aoutils.aoxmltableentry import AoXmlTableEntry
|
||||
from aore.aoutils.importer import Importer
|
||||
from aore.dbutils.dbhandler import DbHandler
|
||||
from aore.updater.aodataparser import AoDataParser
|
||||
from aore.updater.aorar import AoRar
|
||||
from aore.updater.aoxmltableentry import AoXmlTableEntry
|
||||
from aore.updater.dbhandler import DbHandler
|
||||
from aore.updater.soapreceiver import SoapReceiver
|
||||
from aore.dbutils.dbschemas import allowed_tables
|
||||
|
||||
|
||||
class AoUpdater:
|
||||
class Updater:
|
||||
# Source: "http", directory (as a full path to unpacked xmls)
|
||||
def __init__(self, source="http"):
|
||||
self.db_handler = DbHandler()
|
||||
@@ -31,7 +31,7 @@ class AoUpdater:
|
||||
|
||||
def __get_updates_from_folder(self, foldername):
|
||||
# TODO: Вычислять версию, если берем данные из каталога
|
||||
yield dict(intver=0, textver="Unknown", url=foldername)
|
||||
yield dict(intver=0, textver="Unknown", delta_url=foldername, complete_url=foldername)
|
||||
|
||||
def __get_updates_from_rar(self, url):
|
||||
aorar = AoRar()
|
||||
@@ -39,14 +39,11 @@ class AoUpdater:
|
||||
for table_entry in aorar.get_table_entries(fname, allowed_tables):
|
||||
yield table_entry
|
||||
|
||||
def __init_update_entries(self, full_base):
|
||||
def __init_update_entries(self, updates_generator):
|
||||
if self.mode == "http":
|
||||
assert updates_generator
|
||||
self.tablelist_generator = self.__get_updates_from_rar
|
||||
imp = Importer()
|
||||
if full_base:
|
||||
self.updalist_generator = imp.get_full()
|
||||
else:
|
||||
self.updalist_generator = imp.get_updates()
|
||||
self.updalist_generator = updates_generator
|
||||
else:
|
||||
assert path.isdir(self.mode), "Invalid directory {}".format(self.mode)
|
||||
self.updalist_generator = self.__get_updates_from_folder(self.mode)
|
||||
@@ -56,12 +53,13 @@ class AoUpdater:
|
||||
aoparser = AoDataParser(table_xmlentry, chunck_size)
|
||||
aoparser.parse(lambda x, y: self.db_handler.bulk_csv(operation_type, table_xmlentry.table_name, x, y))
|
||||
|
||||
def create(self):
|
||||
self.__init_update_entries(True)
|
||||
def create(self, updates_generator):
|
||||
self.__init_update_entries(updates_generator)
|
||||
self.db_handler.pre_create()
|
||||
|
||||
for update_entry in self.updalist_generator:
|
||||
for table_entry in self.tablelist_generator(update_entry['url']):
|
||||
logging.info("Processing update #{}".format(update_entry['intver']))
|
||||
for table_entry in self.tablelist_generator(update_entry['complete_url']):
|
||||
if table_entry.operation_type == AoXmlTableEntry.OperationType.update:
|
||||
table_entry.operation_type = AoXmlTableEntry.OperationType.create
|
||||
self.process_single_entry(table_entry.operation_type, table_entry)
|
||||
@@ -70,18 +68,13 @@ class AoUpdater:
|
||||
|
||||
logging.info("Create success")
|
||||
|
||||
def update(self, count=1):
|
||||
self.__init_update_entries(False)
|
||||
def update(self, updates_generator):
|
||||
self.__init_update_entries(updates_generator)
|
||||
self.db_handler.pre_update()
|
||||
|
||||
counter = 0
|
||||
for update_entry in self.updalist_generator:
|
||||
counter += 1
|
||||
if counter > count:
|
||||
logging.warning("Maximum count of updates ({}) are processed - exit".format(count))
|
||||
break
|
||||
|
||||
for table_entry in self.tablelist_generator(update_entry['url']):
|
||||
for update_entry in self.updates_generator:
|
||||
logging.info("Processing update #{}".format(update_entry['intver']))
|
||||
for table_entry in self.tablelist_generator(update_entry['delta_url']):
|
||||
self.process_single_entry(table_entry.operation_type, table_entry)
|
||||
|
||||
logging.info("Update success")
|
||||
Reference in New Issue
Block a user