Добавлена возможность обновления (создания) конкретной версии
This commit is contained in:
parent
4d565e5808
commit
0bd79b1311
23
README.md
23
README.md
@ -1,11 +1,28 @@
|
||||
# py-fias
|
||||
WSGI application they can serve FIAS (Russian Address Object DB)
|
||||
|
||||
Простое приложение для работы с БД ФИАС, написано для Python 2.7
|
||||
|
||||
## Установка
|
||||
Протестирована работа на следующих ОС: Windows (8.1) и Debian Jessie
|
||||
|
||||
Предполагается, что у Вас уже установлена БД PostgreSql версии 9.5, интерпретатор Python 2.7
|
||||
### Зависимости
|
||||
|
||||
1. Windows
|
||||
1. Установим *уйню...
|
||||
Для работы приложения необходимо достаточное кол-во RAM (1Gb+) и 4.5Gb места на диске
|
||||
(3-3.5Gb для скачивания архива с базой и 300-400Mb для индексов Sphinx). Также необходимы root права
|
||||
(или Администратора, для OS Windows), для работы демона Sphinx и предварительной установки.
|
||||
|
||||
Предварительно нужно установить и настроить:
|
||||
|
||||
1. Python 2.7 [Windows](https://www.python.org/downloads/windows/), [Debian](https://www.python.org/downloads/source/)
|
||||
(`sudo apt-get install python2.7 python2.7-dev`), pip
|
||||
|
||||
2. PostgreSql 9.5 и выше (из-за _ON CONFLICT_)
|
||||
|
||||
3. Sphinx 2.2.3 и новее (из-за синтаксиса _MAYBE_)
|
||||
|
||||
### Windows
|
||||
1. Установить sphinxapi последней версии:
|
||||
|
||||
`python -m pip install https://github.com/Romamo/sphinxapi/zipball/master`
|
||||
|
@ -1,39 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from pysimplesoap.client import SoapClient
|
||||
|
||||
|
||||
class Importer:
|
||||
def __init__(self):
|
||||
self.client = SoapClient(
|
||||
location="http://fias.nalog.ru/WebServices/Public/DownloadService.asmx",
|
||||
action='http://fias.nalog.ru/WebServices/Public/DownloadService.asmx/',
|
||||
namespace="http://fias.nalog.ru/WebServices/Public/DownloadService.asmx",
|
||||
soap_ns='soap', trace=False, ns=False)
|
||||
|
||||
def get_current_fias_version(self):
|
||||
return 224 # TODO FIXIT
|
||||
|
||||
def get_full(self):
|
||||
response = self.client.GetLastDownloadFileInfo()
|
||||
|
||||
assert response, "Response is null"
|
||||
downloadfileinfo = response.GetLastDownloadFileInfoResponse.GetLastDownloadFileInfoResult
|
||||
|
||||
assert downloadfileinfo.VersionId < self.get_current_fias_version(), "DB is already up-to-date"
|
||||
|
||||
yield dict(intver=int(downloadfileinfo.VersionId), strver=str(downloadfileinfo.TextVersion),
|
||||
url=str(downloadfileinfo.FiasCompleteXmlUrl))
|
||||
|
||||
# return (intver, strver, url)
|
||||
def get_updates(self):
|
||||
response = self.client.GetAllDownloadFileInfo()
|
||||
|
||||
assert response, "Response is null"
|
||||
|
||||
current_fias_version = self.get_current_fias_version()
|
||||
|
||||
for DownloadFileInfo in response.GetAllDownloadFileInfoResponse.GetAllDownloadFileInfoResult.DownloadFileInfo:
|
||||
if int(DownloadFileInfo.VersionId) > current_fias_version:
|
||||
yield dict(intver=int(DownloadFileInfo.VersionId), strver=str(DownloadFileInfo.TextVersion),
|
||||
url=str(DownloadFileInfo.FiasDeltaXmlUrl))
|
17
aore/fias/fiasfactory.py
Normal file
17
aore/fias/fiasfactory.py
Normal file
@ -0,0 +1,17 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from aore.fias.search import SphinxSearch
|
||||
|
||||
|
||||
class FiasFactory:
|
||||
def __init__(self):
|
||||
self.searcher = SphinxSearch()
|
||||
|
||||
# text - строка поиска
|
||||
# strong - строгий поиск или "мягкий" (с допущением ошибок, опечаток)
|
||||
# out_format - "full" or "simple" - полный (подробно для каждого подпункта) или простой (только строка и AOID)
|
||||
def find(self, text, strong=False, out_format="simple"):
|
||||
try:
|
||||
results = self.searcher.find(text, strong)
|
||||
|
||||
except:
|
||||
return []
|
@ -4,7 +4,7 @@ import re
|
||||
|
||||
import Levenshtein
|
||||
import psycopg2
|
||||
import aore.sphinxapi as sphinxapi
|
||||
import sphinxapi
|
||||
|
||||
from aore.config import db as dbparams, sphinx_index_sugg, sphinx_index_addjobj
|
||||
from aore.dbutils.dbimpl import DBImpl
|
||||
@ -80,12 +80,12 @@ class SphinxSearch:
|
||||
phrase = unicode(phrase).replace('-', '').replace('@', '').lower()
|
||||
return re.split(r"[ ,:.#$]+", phrase)
|
||||
|
||||
def __add_word_variations(self, word_entry):
|
||||
if word_entry.MT_MANY_SUGG:
|
||||
def __add_word_variations(self, word_entry, strong):
|
||||
if word_entry.MT_MANY_SUGG and not strong:
|
||||
suggs = self.__get_suggest(word_entry.word, self.rating_limit_soft, 6)
|
||||
for suggestion in suggs:
|
||||
word_entry.add_variation(suggestion[0])
|
||||
if word_entry.MT_SOME_SUGG:
|
||||
if word_entry.MT_SOME_SUGG and not strong:
|
||||
suggs = self.__get_suggest(word_entry.word, self.rating_limit_hard, 3)
|
||||
for suggestion in suggs:
|
||||
word_entry.add_variation(suggestion[0])
|
||||
@ -96,16 +96,18 @@ class SphinxSearch:
|
||||
if word_entry.MT_ADD_SOCR:
|
||||
word_entry.add_variation_socr()
|
||||
|
||||
def __get_word_entries(self, words):
|
||||
def __get_word_entries(self, words, strong):
|
||||
for word in words:
|
||||
if word != '':
|
||||
we = WordEntry(self.db, word)
|
||||
self.__add_word_variations(we)
|
||||
self.__add_word_variations(we, strong)
|
||||
if we.get_variations() == "()":
|
||||
raise BaseException("Cannot process sentence.")
|
||||
yield we
|
||||
|
||||
def find(self, text):
|
||||
def find(self, text, strong):
|
||||
words = self.__split_phrase(text)
|
||||
word_entries = self.__get_word_entries(words)
|
||||
word_entries = self.__get_word_entries(words, strong)
|
||||
sentence = "{}".format(" MAYBE ".join(x.get_variations() for x in word_entries))
|
||||
|
||||
self.__configure(sphinx_index_addjobj)
|
||||
@ -114,4 +116,4 @@ class SphinxSearch:
|
||||
results = []
|
||||
for ma in rs['matches']:
|
||||
results.append([ma['attrs']['aoid'], ma['attrs']['fullname'], ma['weight']])
|
||||
print results
|
||||
return results
|
||||
|
@ -5,9 +5,9 @@ import os
|
||||
|
||||
from bottle import template
|
||||
|
||||
from aore.aoutils.aoxmltableentry import AoXmlTableEntry
|
||||
from aore.updater.aoxmltableentry import AoXmlTableEntry
|
||||
from aore.updater.dbhandler import DbHandler
|
||||
from aore.config import db as dbconfig, sphinx_index_addjobj, sphinx_var_dir, trashfolder, sphinx_index_sugg
|
||||
from aore.dbutils.dbhandler import DbHandler
|
||||
from trigram import trigram
|
||||
|
||||
|
||||
|
@ -1,7 +1,7 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import os
|
||||
|
||||
from aore.aoutils.aoxmltableentry import AoXmlTableEntry
|
||||
from aore.updater.aoxmltableentry import AoXmlTableEntry
|
||||
from aore.config import trashfolder
|
||||
from aore.dbutils.dbschemas import db_shemas
|
||||
from xmlparser import XMLParser
|
||||
@ -18,18 +18,12 @@ class AoDataParser:
|
||||
self.pagesize = pagesize
|
||||
self.currentpage = 0
|
||||
self.counter = 0
|
||||
self.addrobj_filter = self.datasource.table_name == 'ADDROBJ' and self.datasource.operation_type == AoXmlTableEntry.OperationType.create
|
||||
|
||||
self.base_filename = ""
|
||||
self.csv_file = None
|
||||
self.data_bereit_callback = None
|
||||
|
||||
def import_update(self, attr):
|
||||
# Addrobj anvanced filter
|
||||
if self.addrobj_filter:
|
||||
if attr['ACTSTATUS'] == '0' or 'NEXTID' in attr:
|
||||
return
|
||||
|
||||
if self.counter > self.pagesize:
|
||||
# Send old file to DB engine
|
||||
if self.csv_file:
|
@ -5,7 +5,7 @@ import logging
|
||||
import psycopg2
|
||||
from bottle import template
|
||||
|
||||
from aore.aoutils.aoxmltableentry import AoXmlTableEntry
|
||||
from aore.updater.aoxmltableentry import AoXmlTableEntry
|
||||
from aore.config import db as dbparams
|
||||
from aore.dbutils.dbimpl import DBImpl
|
||||
from aore.dbutils.dbschemas import db_shemas
|
26
aore/updater/soapreceiver.py
Normal file
26
aore/updater/soapreceiver.py
Normal file
@ -0,0 +1,26 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from pysimplesoap.client import SoapClient
|
||||
|
||||
|
||||
class SoapReceiver:
|
||||
def __init__(self):
|
||||
self.client = SoapClient(
|
||||
location="http://fias.nalog.ru/WebServices/Public/DownloadService.asmx",
|
||||
action='http://fias.nalog.ru/WebServices/Public/DownloadService.asmx/',
|
||||
namespace="http://fias.nalog.ru/WebServices/Public/DownloadService.asmx",
|
||||
soap_ns='soap', trace=False, ns=False)
|
||||
|
||||
def get_current_fias_version(self):
|
||||
return 224 # TODO FIXIT
|
||||
|
||||
# return (intver, strver, url)
|
||||
def get_update_list(self):
|
||||
response = self.client.GetAllDownloadFileInfo()
|
||||
|
||||
assert response, "Response is null"
|
||||
|
||||
for DownloadFileInfo in response.GetAllDownloadFileInfoResponse.GetAllDownloadFileInfoResult.DownloadFileInfo:
|
||||
yield dict(intver=int(DownloadFileInfo.VersionId), strver=str(DownloadFileInfo.TextVersion),
|
||||
delta_url=str(DownloadFileInfo.FiasDeltaXmlUrl),
|
||||
complete_url=str(DownloadFileInfo.FiasCompleteXmlUrl))
|
@ -3,15 +3,15 @@
|
||||
import logging
|
||||
from os import walk, path
|
||||
|
||||
from aore.aoutils.aodataparser import AoDataParser
|
||||
from aore.aoutils.aorar import AoRar
|
||||
from aore.aoutils.aoxmltableentry import AoXmlTableEntry
|
||||
from aore.aoutils.importer import Importer
|
||||
from aore.dbutils.dbhandler import DbHandler
|
||||
from aore.updater.aodataparser import AoDataParser
|
||||
from aore.updater.aorar import AoRar
|
||||
from aore.updater.aoxmltableentry import AoXmlTableEntry
|
||||
from aore.updater.dbhandler import DbHandler
|
||||
from aore.updater.soapreceiver import SoapReceiver
|
||||
from aore.dbutils.dbschemas import allowed_tables
|
||||
|
||||
|
||||
class AoUpdater:
|
||||
class Updater:
|
||||
# Source: "http", directory (as a full path to unpacked xmls)
|
||||
def __init__(self, source="http"):
|
||||
self.db_handler = DbHandler()
|
||||
@ -31,7 +31,7 @@ class AoUpdater:
|
||||
|
||||
def __get_updates_from_folder(self, foldername):
|
||||
# TODO: Вычислять версию, если берем данные из каталога
|
||||
yield dict(intver=0, textver="Unknown", url=foldername)
|
||||
yield dict(intver=0, textver="Unknown", delta_url=foldername, complete_url=foldername)
|
||||
|
||||
def __get_updates_from_rar(self, url):
|
||||
aorar = AoRar()
|
||||
@ -39,14 +39,11 @@ class AoUpdater:
|
||||
for table_entry in aorar.get_table_entries(fname, allowed_tables):
|
||||
yield table_entry
|
||||
|
||||
def __init_update_entries(self, full_base):
|
||||
def __init_update_entries(self, updates_generator):
|
||||
if self.mode == "http":
|
||||
assert updates_generator
|
||||
self.tablelist_generator = self.__get_updates_from_rar
|
||||
imp = Importer()
|
||||
if full_base:
|
||||
self.updalist_generator = imp.get_full()
|
||||
else:
|
||||
self.updalist_generator = imp.get_updates()
|
||||
self.updalist_generator = updates_generator
|
||||
else:
|
||||
assert path.isdir(self.mode), "Invalid directory {}".format(self.mode)
|
||||
self.updalist_generator = self.__get_updates_from_folder(self.mode)
|
||||
@ -56,12 +53,13 @@ class AoUpdater:
|
||||
aoparser = AoDataParser(table_xmlentry, chunck_size)
|
||||
aoparser.parse(lambda x, y: self.db_handler.bulk_csv(operation_type, table_xmlentry.table_name, x, y))
|
||||
|
||||
def create(self):
|
||||
self.__init_update_entries(True)
|
||||
def create(self, updates_generator):
|
||||
self.__init_update_entries(updates_generator)
|
||||
self.db_handler.pre_create()
|
||||
|
||||
for update_entry in self.updalist_generator:
|
||||
for table_entry in self.tablelist_generator(update_entry['url']):
|
||||
logging.info("Processing update #{}".format(update_entry['intver']))
|
||||
for table_entry in self.tablelist_generator(update_entry['complete_url']):
|
||||
if table_entry.operation_type == AoXmlTableEntry.OperationType.update:
|
||||
table_entry.operation_type = AoXmlTableEntry.OperationType.create
|
||||
self.process_single_entry(table_entry.operation_type, table_entry)
|
||||
@ -70,18 +68,13 @@ class AoUpdater:
|
||||
|
||||
logging.info("Create success")
|
||||
|
||||
def update(self, count=1):
|
||||
self.__init_update_entries(False)
|
||||
def update(self, updates_generator):
|
||||
self.__init_update_entries(updates_generator)
|
||||
self.db_handler.pre_update()
|
||||
|
||||
counter = 0
|
||||
for update_entry in self.updalist_generator:
|
||||
counter += 1
|
||||
if counter > count:
|
||||
logging.warning("Maximum count of updates ({}) are processed - exit".format(count))
|
||||
break
|
||||
|
||||
for table_entry in self.tablelist_generator(update_entry['url']):
|
||||
for update_entry in self.updates_generator:
|
||||
logging.info("Processing update #{}".format(update_entry['intver']))
|
||||
for table_entry in self.tablelist_generator(update_entry['delta_url']):
|
||||
self.process_single_entry(table_entry.operation_type, table_entry)
|
||||
|
||||
logging.info("Update success")
|
94
manage.py
94
manage.py
@ -2,30 +2,82 @@
|
||||
|
||||
import optparse
|
||||
|
||||
from aore.aoutils.aoupdater import AoUpdater
|
||||
from aore.miscutils.sphinx import SphinxHelper
|
||||
from aore.fias.search import SphinxSearch
|
||||
from aore.miscutils.sphinx import SphinxHelper
|
||||
from aore.updater.updater import Updater
|
||||
from aore.updater.soapreceiver import SoapReceiver
|
||||
|
||||
|
||||
def update_base(xml_source, updates_count):
|
||||
aoupdater = AoUpdater(xml_source)
|
||||
aoupdater.update(updates_count)
|
||||
def print_fias_versions():
|
||||
imp = SoapReceiver()
|
||||
current_version = imp.get_current_fias_version()
|
||||
all_versions = imp.get_update_list()
|
||||
|
||||
print("Installed version: {}".format(current_version))
|
||||
print("Avaliable updates:")
|
||||
print("Number\t\tDate")
|
||||
for upd in all_versions:
|
||||
mark_current = (' ', '*')[int(upd['intver']) == current_version]
|
||||
print "{}{}\t\t{}".format(mark_current, upd['intver'], upd['strver'])
|
||||
|
||||
|
||||
def create_base(xml_source):
|
||||
aoupdater = AoUpdater(xml_source)
|
||||
aoupdater.create()
|
||||
def parse_update_str(updates_str):
|
||||
if updates_str == "all":
|
||||
return None
|
||||
|
||||
upd_list = updates_str.lower().replace(' ','').split(',')
|
||||
out_list = []
|
||||
|
||||
for u_entry in upd_list:
|
||||
if '-' in u_entry:
|
||||
u_range = u_entry.split('-')
|
||||
out_list += range(int(u_range[0]), int(u_range[1]))
|
||||
else:
|
||||
out_list.append(int(u_entry))
|
||||
|
||||
return out_list
|
||||
|
||||
|
||||
def get_allowed_updates(updates_str, mode = "create"):
|
||||
imp = SoapReceiver()
|
||||
current_version = imp.get_current_fias_version()
|
||||
all_versions = [x for x in imp.get_update_list()]
|
||||
|
||||
user_defined_list = parse_update_str(updates_str)
|
||||
out_list = []
|
||||
|
||||
if mode == "create" and not user_defined_list:
|
||||
yield all_versions[-1]
|
||||
|
||||
assert (mode == "create" and len(user_defined_list) == 1)
|
||||
|
||||
for uv in all_versions:
|
||||
uv_ver = uv['intver']
|
||||
if uv_ver > current_version and (not user_defined_list or uv_ver in user_defined_list):
|
||||
out_list.append(uv)
|
||||
|
||||
out_list.sort(key=lambda x: x['intver'])
|
||||
for ol_entry in out_list:
|
||||
yield ol_entry
|
||||
|
||||
|
||||
def main():
|
||||
# Parse options
|
||||
p = optparse.OptionParser()
|
||||
p.add_option('--database', '-b', action="store", type="string",
|
||||
help="Manage database. Value: create - create new DB, update - update existing DB without loose the data")
|
||||
p.add_option('--update-count', '-u', default=1, type="int",
|
||||
help="Count of updates to process, only for '--database update' option")
|
||||
help="Manage database. Values: "
|
||||
"create - create new DB, "
|
||||
"update - update existing DB without loose the data")
|
||||
p.add_option('--update-version', '-u', default="all", type="string",
|
||||
help="Valid for updating via HTTP. "
|
||||
"Versions of updates to process. Can be 111 or 111-222 or 111,222,333."
|
||||
"For '--database-create' only one value is necessary. If not specified, "
|
||||
"all updates will be processed (for '--database update') or last DB snapshot "
|
||||
"(for '--database create')")
|
||||
p.add_option('--show-versions', '-v', action="store_true", dest="show_versions", default=False,
|
||||
help="Show allowed fias versions")
|
||||
p.add_option('--source', '-s', default="http",
|
||||
help="Create/update DB from source. Value: \"http\" or absolute path to folder")
|
||||
help="Create/update DB from source. Value: 'http' or absolute path to folder containing XMLs")
|
||||
p.add_option('--sphinx-configure', '-c', action="store_true", dest="sphinx", default="False",
|
||||
help="Configure sphinx. Creates sphinx.conf specified in '--output-conf'")
|
||||
p.add_option('--indexer-path', '-i',
|
||||
@ -37,14 +89,25 @@ def main():
|
||||
|
||||
options, arguments = p.parse_args()
|
||||
|
||||
# Show FIAS updates
|
||||
if options.show_versions:
|
||||
print_fias_versions()
|
||||
return
|
||||
|
||||
# Manage DB
|
||||
if options.database:
|
||||
# create new database
|
||||
aoupdater = Updater(options.source)
|
||||
allowed_updates = None
|
||||
if options.source == "http":
|
||||
allowed_updates = get_allowed_updates(options.update_version)
|
||||
|
||||
if options.database == "create":
|
||||
create_base(options.source)
|
||||
aoupdater.create(allowed_updates)
|
||||
|
||||
# update database
|
||||
if options.database == "update":
|
||||
update_base(options.source, int(options.update_count))
|
||||
aoupdater.update(allowed_updates)
|
||||
|
||||
# Manage Sphinx
|
||||
if options.sphinx and options.indexer_path and options.output_conf:
|
||||
@ -54,7 +117,8 @@ def main():
|
||||
# 4 Debug purposes..
|
||||
if options.test:
|
||||
sph = SphinxSearch()
|
||||
sph.find('кридовая паскаул')
|
||||
sph.find('кедровая пасраул')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
Loading…
x
Reference in New Issue
Block a user