Добавлена возможность импорта из rar-архива. (Fixes #6)

This commit is contained in:
Jack Stdin 2016-02-14 16:02:23 +03:00
parent a0a3ec83c6
commit cb1da8dd5f
3 changed files with 75 additions and 58 deletions

100
README.md
View File

@ -12,6 +12,7 @@ Python application that can operate with FIAS (Russian Address Object DB)
импорта из внешних систем).
2. Автоматическое развертывание базы ФИАС
- Из директории с файлами XML (like 'AS_ADDROBJ_20160107_xxx.XML').
- Из локального файла архива (.rar).
- Напрямую с HTTP сервера ФНС.
3. Актуалиация базы (из XML, HTTP) с возможностью выбора необходимых обновлений.
@ -34,35 +35,35 @@ _Внимание_! Только Python 2.7, только PostgreSQL, тольк
Предварительно обязательно установить и настроить:
1. Python 2.7.x, pip
Для Windows качаем - ставим, для Debian:
```
sudo apt-get install python-setuptools
sudo easy_install pip
sudo pip install --upgrade pip
```
Для Windows качаем - ставим, для Debian:
```
sudo apt-get install python-setuptools
sudo easy_install pip
sudo pip install --upgrade pip
```
2. PostgreSql 9.5 и выше (из-за синтаксиса _ON CONFLICT ... DO_)
Для Windows, как обычно, [качаем](http://www.enterprisedb.com/products-services-training/pgdownload#windows) - ставим, для Debian:
```
sudo sh -c 'echo deb http://apt.postgresql.org/pub/repos/apt/ trusty-pgdg main 9.5 > /etc/apt/sources.list.d/postgresql.list'
wget --quiet -O - https://www.postgresql.org/media/keys/ACCC4CF8.asc | sudo apt-key add -
sudo apt-get update
sudo apt-get install postgresql-9.5
```
Затем создайте пользователя и базу данных.
Для Windows, как обычно, [качаем](http://www.enterprisedb.com/products-services-training/pgdownload#windows) - ставим, для Debian:
```
sudo sh -c 'echo deb http://apt.postgresql.org/pub/repos/apt/ trusty-pgdg main 9.5 > /etc/apt/sources.list.d/postgresql.list'
wget --quiet -O - https://www.postgresql.org/media/keys/ACCC4CF8.asc | sudo apt-key add -
sudo apt-get update
sudo apt-get install postgresql-9.5
```
Затем создайте пользователя и базу данных.
3. Sphinx 2.2.1 и новее:
[Windows](http://sphinxsearch.com/downloads/release/), Debian:
```
cd /tmp
wget http://sphinxsearch.com/files/sphinx-2.2.10-release.tar.gz
tar xzf sphinx-2.2.10-release.tar.gz
cd sphinx-2.2.10-release
sudo apt-get install postgresql-server-dev-9.5
./configure --without-mysql --with-pgsql
make
sudo make install
```
[Windows](http://sphinxsearch.com/downloads/release/), Debian:
```
cd /tmp
wget http://sphinxsearch.com/files/sphinx-2.2.10-release.tar.gz
tar xzf sphinx-2.2.10-release.tar.gz
cd sphinx-2.2.10-release
sudo apt-get install postgresql-server-dev-9.5
./configure --without-mysql --with-pgsql
make
sudo make install
```
4. Web-сервер с поддержкой WSGI, любой, по Вашему желанию.
@ -70,36 +71,37 @@ sudo make install
1. Установить lxml, через pip не ставится, так что качаем [отсюда](https://pypi.python.org/pypi/lxml/3.5.0).
2. Установить unrar.exe (можно установить WinRar целиком).
3. Установить sphinxapi последней версии (либо взять из директории Sphinx):
```
python -m pip install https://github.com/Romamo/sphinxapi/zipball/master
```
```
python -m pip install https://github.com/Romamo/sphinxapi/zipball/master
```
### Debian Linux
1. Установить libxml
```
sudo apt-get install python-dev libxml2 libxml2-dev libxslt-dev
```
```
sudo apt-get install python-dev libxml2 libxml2-dev libxslt-dev
```
2. Установить unrar (non-free)
```
sudo sh -c 'echo deb ftp://ftp.us.debian.org/debian/ stable main non-free > /etc/apt/sources.list.d/non-free.list'
sudo apt-get update
sudo apt-get install unrar
```
```
sudo sh -c 'echo deb ftp://ftp.us.debian.org/debian/ stable main non-free > /etc/apt/sources.list.d/non-free.list'
sudo apt-get update
sudo apt-get install unrar
```
3. Установить sphinxapi последней версии:
```
pip install https://github.com/Romamo/sphinxapi/zipball/master
```
```
pip install https://github.com/Romamo/sphinxapi/zipball/master
```
4. Установить, собственно, приложение:
- полностью:
```
sudo mkdir -p /var/www/py-phias
sudo chown www-fias: /var/www/py-phias
wget https://github.com/jar3b/py-phias/archive/v0.0.1.tar.gz
sudo -u www-fias tar xzf v0.0.1.tar.gz -C /var/www/py-phias --strip-components=1
sudo pip install -r requirements.txt
```
```
sudo mkdir -p /var/www/py-phias
sudo chown www-fias: /var/www/py-phias
wget https://github.com/jar3b/py-phias/archive/v0.0.1.tar.gz
sudo -u www-fias tar xzf v0.0.1.tar.gz -C /var/www/py-phias --strip-components=1
cd /var/www/py-phias
sudo pip install -r requirements.txt
```
- как библиотеку:
```
python -m pip install ....
```
```
python -m pip install ....
```

View File

@ -18,7 +18,7 @@ class Updater:
# Source: "http", directory (as a full path to unpacked xmls)
def __init__(self, source="http"):
self.db_handler = DbHandler()
self.mode = source
self.source = source
self.updalist_generator = None
self.tablelist_generator = None
self.allowed_tables = None
@ -61,19 +61,33 @@ class Updater:
def __get_updates_from_rar(self, url):
aorar = AoRar()
fname = aorar.download(url)
fname = None
if url.startswith("http://") or url.startswith("https://"):
fname = aorar.download(url)
if url.endswith(".rar") and path.isfile(url):
fname = url
assert fname, "No source was specified"
for table_entry in aorar.get_table_entries(fname, allowed_tables):
yield table_entry
def __init_update_entries(self, updates_generator):
if self.mode == "http":
if self.source == "http":
assert updates_generator, "No generator"
self.tablelist_generator = self.__get_updates_from_rar
self.updalist_generator = updates_generator
else:
assert path.isdir(self.mode), "Invalid directory {}".format(self.mode)
self.updalist_generator = self.__get_updates_from_folder(self.mode)
return
if self.source.endswith(".rar"):
self.tablelist_generator = self.__get_updates_from_rar
self.updalist_generator = self.__get_updates_from_folder(self.source)
return
if path.isdir(self.source):
self.tablelist_generator = self.__get_entries_from_folder
self.updalist_generator = self.__get_updates_from_folder(self.source)
assert self.tablelist_generator, "No valid source."
def process_single_entry(self, operation_type, table_xmlentry, chunck_size=50000):
aoparser = AoDataParser(table_xmlentry, chunck_size)

View File

@ -9,7 +9,7 @@ from aore.miscutils.sphinx import SphinxHelper
from aore.updater.soapreceiver import SoapReceiver
from aore.updater.updater import Updater
logging.basicConfig(format='%(asctime)s %(message)s', level=logging.WARNING)
logging.basicConfig(format='%(asctime)s %(message)s', level=logging.INFO)
def is_root():
@ -92,7 +92,8 @@ def main():
help="Show available fias versions. "
"These version numbers are required for the '--update-version' option")
p.add_option('--source', '-s', default="http",
help="Create/update DB from source. Value: 'http' or absolute path to folder containing XMLs")
help="Create/update DB from source. Value: 'http', absolute path to folder containing XMLs "
"or absolute path to rar file.")
p.add_option('--sphinx-configure', '-c', action="store_true", dest="sphinx", default="False",
help="Configure Sphinx. Creates a sphinx.conf file specified in '--output-conf'")
p.add_option('--indexer-path', '-i',
@ -116,7 +117,7 @@ def main():
if options.database:
if not is_root():
print "This option need to be run with elevated privileges."
return
# return
# create new database
aoupdater = Updater(options.source)