Добавлена возможность обновления (создания) конкретной версии

This commit is contained in:
Jack Stdin
2016-01-17 21:08:01 +03:00
parent 4d565e5808
commit 0bd79b1311
14 changed files with 176 additions and 102 deletions

View File

@@ -1,39 +0,0 @@
# -*- coding: utf-8 -*-
from pysimplesoap.client import SoapClient
class Importer:
def __init__(self):
self.client = SoapClient(
location="http://fias.nalog.ru/WebServices/Public/DownloadService.asmx",
action='http://fias.nalog.ru/WebServices/Public/DownloadService.asmx/',
namespace="http://fias.nalog.ru/WebServices/Public/DownloadService.asmx",
soap_ns='soap', trace=False, ns=False)
def get_current_fias_version(self):
return 224 # TODO FIXIT
def get_full(self):
response = self.client.GetLastDownloadFileInfo()
assert response, "Response is null"
downloadfileinfo = response.GetLastDownloadFileInfoResponse.GetLastDownloadFileInfoResult
assert downloadfileinfo.VersionId < self.get_current_fias_version(), "DB is already up-to-date"
yield dict(intver=int(downloadfileinfo.VersionId), strver=str(downloadfileinfo.TextVersion),
url=str(downloadfileinfo.FiasCompleteXmlUrl))
# return (intver, strver, url)
def get_updates(self):
response = self.client.GetAllDownloadFileInfo()
assert response, "Response is null"
current_fias_version = self.get_current_fias_version()
for DownloadFileInfo in response.GetAllDownloadFileInfoResponse.GetAllDownloadFileInfoResult.DownloadFileInfo:
if int(DownloadFileInfo.VersionId) > current_fias_version:
yield dict(intver=int(DownloadFileInfo.VersionId), strver=str(DownloadFileInfo.TextVersion),
url=str(DownloadFileInfo.FiasDeltaXmlUrl))

17
aore/fias/fiasfactory.py Normal file
View File

@@ -0,0 +1,17 @@
# -*- coding: utf-8 -*-
from aore.fias.search import SphinxSearch
class FiasFactory:
def __init__(self):
self.searcher = SphinxSearch()
# text - строка поиска
# strong - строгий поиск или "мягкий" (с допущением ошибок, опечаток)
# out_format - "full" or "simple" - полный (подробно для каждого подпункта) или простой (только строка и AOID)
def find(self, text, strong=False, out_format="simple"):
try:
results = self.searcher.find(text, strong)
except:
return []

View File

@@ -4,7 +4,7 @@ import re
import Levenshtein
import psycopg2
import aore.sphinxapi as sphinxapi
import sphinxapi
from aore.config import db as dbparams, sphinx_index_sugg, sphinx_index_addjobj
from aore.dbutils.dbimpl import DBImpl
@@ -80,12 +80,12 @@ class SphinxSearch:
phrase = unicode(phrase).replace('-', '').replace('@', '').lower()
return re.split(r"[ ,:.#$]+", phrase)
def __add_word_variations(self, word_entry):
if word_entry.MT_MANY_SUGG:
def __add_word_variations(self, word_entry, strong):
if word_entry.MT_MANY_SUGG and not strong:
suggs = self.__get_suggest(word_entry.word, self.rating_limit_soft, 6)
for suggestion in suggs:
word_entry.add_variation(suggestion[0])
if word_entry.MT_SOME_SUGG:
if word_entry.MT_SOME_SUGG and not strong:
suggs = self.__get_suggest(word_entry.word, self.rating_limit_hard, 3)
for suggestion in suggs:
word_entry.add_variation(suggestion[0])
@@ -96,16 +96,18 @@ class SphinxSearch:
if word_entry.MT_ADD_SOCR:
word_entry.add_variation_socr()
def __get_word_entries(self, words):
def __get_word_entries(self, words, strong):
for word in words:
if word != '':
we = WordEntry(self.db, word)
self.__add_word_variations(we)
self.__add_word_variations(we, strong)
if we.get_variations() == "()":
raise BaseException("Cannot process sentence.")
yield we
def find(self, text):
def find(self, text, strong):
words = self.__split_phrase(text)
word_entries = self.__get_word_entries(words)
word_entries = self.__get_word_entries(words, strong)
sentence = "{}".format(" MAYBE ".join(x.get_variations() for x in word_entries))
self.__configure(sphinx_index_addjobj)
@@ -114,4 +116,4 @@ class SphinxSearch:
results = []
for ma in rs['matches']:
results.append([ma['attrs']['aoid'], ma['attrs']['fullname'], ma['weight']])
print results
return results

View File

@@ -5,9 +5,9 @@ import os
from bottle import template
from aore.aoutils.aoxmltableentry import AoXmlTableEntry
from aore.updater.aoxmltableentry import AoXmlTableEntry
from aore.updater.dbhandler import DbHandler
from aore.config import db as dbconfig, sphinx_index_addjobj, sphinx_var_dir, trashfolder, sphinx_index_sugg
from aore.dbutils.dbhandler import DbHandler
from trigram import trigram

View File

@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
import os
from aore.aoutils.aoxmltableentry import AoXmlTableEntry
from aore.updater.aoxmltableentry import AoXmlTableEntry
from aore.config import trashfolder
from aore.dbutils.dbschemas import db_shemas
from xmlparser import XMLParser
@@ -18,18 +18,12 @@ class AoDataParser:
self.pagesize = pagesize
self.currentpage = 0
self.counter = 0
self.addrobj_filter = self.datasource.table_name == 'ADDROBJ' and self.datasource.operation_type == AoXmlTableEntry.OperationType.create
self.base_filename = ""
self.csv_file = None
self.data_bereit_callback = None
def import_update(self, attr):
# Addrobj anvanced filter
if self.addrobj_filter:
if attr['ACTSTATUS'] == '0' or 'NEXTID' in attr:
return
if self.counter > self.pagesize:
# Send old file to DB engine
if self.csv_file:

View File

@@ -5,7 +5,7 @@ import logging
import psycopg2
from bottle import template
from aore.aoutils.aoxmltableentry import AoXmlTableEntry
from aore.updater.aoxmltableentry import AoXmlTableEntry
from aore.config import db as dbparams
from aore.dbutils.dbimpl import DBImpl
from aore.dbutils.dbschemas import db_shemas

View File

@@ -0,0 +1,26 @@
# -*- coding: utf-8 -*-
from pysimplesoap.client import SoapClient
class SoapReceiver:
def __init__(self):
self.client = SoapClient(
location="http://fias.nalog.ru/WebServices/Public/DownloadService.asmx",
action='http://fias.nalog.ru/WebServices/Public/DownloadService.asmx/',
namespace="http://fias.nalog.ru/WebServices/Public/DownloadService.asmx",
soap_ns='soap', trace=False, ns=False)
def get_current_fias_version(self):
return 224 # TODO FIXIT
# return (intver, strver, url)
def get_update_list(self):
response = self.client.GetAllDownloadFileInfo()
assert response, "Response is null"
for DownloadFileInfo in response.GetAllDownloadFileInfoResponse.GetAllDownloadFileInfoResult.DownloadFileInfo:
yield dict(intver=int(DownloadFileInfo.VersionId), strver=str(DownloadFileInfo.TextVersion),
delta_url=str(DownloadFileInfo.FiasDeltaXmlUrl),
complete_url=str(DownloadFileInfo.FiasCompleteXmlUrl))

View File

@@ -3,15 +3,15 @@
import logging
from os import walk, path
from aore.aoutils.aodataparser import AoDataParser
from aore.aoutils.aorar import AoRar
from aore.aoutils.aoxmltableentry import AoXmlTableEntry
from aore.aoutils.importer import Importer
from aore.dbutils.dbhandler import DbHandler
from aore.updater.aodataparser import AoDataParser
from aore.updater.aorar import AoRar
from aore.updater.aoxmltableentry import AoXmlTableEntry
from aore.updater.dbhandler import DbHandler
from aore.updater.soapreceiver import SoapReceiver
from aore.dbutils.dbschemas import allowed_tables
class AoUpdater:
class Updater:
# Source: "http", directory (as a full path to unpacked xmls)
def __init__(self, source="http"):
self.db_handler = DbHandler()
@@ -31,7 +31,7 @@ class AoUpdater:
def __get_updates_from_folder(self, foldername):
# TODO: Вычислять версию, если берем данные из каталога
yield dict(intver=0, textver="Unknown", url=foldername)
yield dict(intver=0, textver="Unknown", delta_url=foldername, complete_url=foldername)
def __get_updates_from_rar(self, url):
aorar = AoRar()
@@ -39,14 +39,11 @@ class AoUpdater:
for table_entry in aorar.get_table_entries(fname, allowed_tables):
yield table_entry
def __init_update_entries(self, full_base):
def __init_update_entries(self, updates_generator):
if self.mode == "http":
assert updates_generator
self.tablelist_generator = self.__get_updates_from_rar
imp = Importer()
if full_base:
self.updalist_generator = imp.get_full()
else:
self.updalist_generator = imp.get_updates()
self.updalist_generator = updates_generator
else:
assert path.isdir(self.mode), "Invalid directory {}".format(self.mode)
self.updalist_generator = self.__get_updates_from_folder(self.mode)
@@ -56,12 +53,13 @@ class AoUpdater:
aoparser = AoDataParser(table_xmlentry, chunck_size)
aoparser.parse(lambda x, y: self.db_handler.bulk_csv(operation_type, table_xmlentry.table_name, x, y))
def create(self):
self.__init_update_entries(True)
def create(self, updates_generator):
self.__init_update_entries(updates_generator)
self.db_handler.pre_create()
for update_entry in self.updalist_generator:
for table_entry in self.tablelist_generator(update_entry['url']):
logging.info("Processing update #{}".format(update_entry['intver']))
for table_entry in self.tablelist_generator(update_entry['complete_url']):
if table_entry.operation_type == AoXmlTableEntry.OperationType.update:
table_entry.operation_type = AoXmlTableEntry.OperationType.create
self.process_single_entry(table_entry.operation_type, table_entry)
@@ -70,18 +68,13 @@ class AoUpdater:
logging.info("Create success")
def update(self, count=1):
self.__init_update_entries(False)
def update(self, updates_generator):
self.__init_update_entries(updates_generator)
self.db_handler.pre_update()
counter = 0
for update_entry in self.updalist_generator:
counter += 1
if counter > count:
logging.warning("Maximum count of updates ({}) are processed - exit".format(count))
break
for table_entry in self.tablelist_generator(update_entry['url']):
for update_entry in self.updates_generator:
logging.info("Processing update #{}".format(update_entry['intver']))
for table_entry in self.tablelist_generator(update_entry['delta_url']):
self.process_single_entry(table_entry.operation_type, table_entry)
logging.info("Update success")