Day 2, full DB import/update/delete from dir with XML or HTTP

This commit is contained in:
Jack Stdin
2016-01-13 17:38:01 +03:00
parent 3aeb00d82a
commit 67f6943dce
14 changed files with 146 additions and 67 deletions

View File

@@ -18,17 +18,23 @@ class AoDataParser:
self.pagesize = pagesize
self.currentpage = 0
self.counter = 0
self.addrobj_filter = self.datasource.table_name == 'ADDROBJ' and self.datasource.operation_type == AoXmlTableEntry.OperationType.create
self.base_filename = ""
self.csv_file = None
self.data_bereit_callback = None
def import_update(self, attr):
# Addrobj anvanced filter
if self.addrobj_filter:
if attr['ACTSTATUS'] == '0' or 'NEXTID' in attr:
return
if self.counter > self.pagesize:
# Send old file to DB engine
if self.csv_file:
self.csv_file.close()
self.data_bereit_callback(os.path.abspath(self.csv_file.name))
self.data_bereit_callback(self.counter, os.path.abspath(self.csv_file.name))
os.remove(self.csv_file.name)
# Prepare to next iteration
@@ -49,20 +55,21 @@ class AoDataParser:
# Output - sql query
def parse(self, data_callback):
if self.datasource.operation_type == AoXmlTableEntry.OperationType.update:
self.data_bereit_callback = data_callback
self.currentpage = 0
self.base_filename = trashfolder + "fd_" + str(self.datasource.operation_type) + "_" + \
self.datasource.table_name + ".csv.part{}"
self.counter = self.pagesize + 1
self.data_bereit_callback = data_callback
self.currentpage = 0
self.base_filename = \
trashfolder + "fd_" + \
str(self.datasource.operation_type) + "_" + \
self.datasource.table_name + ".csv.part{}"
self.counter = self.pagesize + 1
xml_parser = XMLParser(self.import_update)
src = self.datasource.open()
xml_parser.parse_buffer(src, db_shemas[self.datasource.table_name].xml_tag)
xml_parser = XMLParser(self.import_update)
src = self.datasource.open()
xml_parser.parse_buffer(src, db_shemas[self.datasource.table_name].xml_tag)
# Send last file to db processor
if self.csv_file:
self.csv_file.close()
self.data_bereit_callback(os.path.abspath(self.csv_file.name))
os.remove(self.csv_file.name)
src.close()
# Send last file to db processor
if self.csv_file:
self.csv_file.close()
self.data_bereit_callback(self.counter, os.path.abspath(self.csv_file.name))
os.remove(self.csv_file.name)
src.close()

View File

@@ -1,5 +1,6 @@
# -*- coding: utf-8 -*-
import logging
import os.path
from traceback import format_exc
@@ -15,7 +16,7 @@ class AoRar:
rarfile.UNRAR_TOOL = unrar
def download(self, url):
print("Downloading {}".format(url))
logging.info("Downloading {}".format(url))
try:
local_filename = os.path.abspath(trashfolder + url.split('/')[-1])
if os.path.isfile(local_filename):
@@ -28,10 +29,9 @@ class AoRar:
if chunk:
f.write(chunk)
except:
print("Error downloading. Reason : {}".format(format_exc()))
return None
raise BaseException("Error downloading. Reason : {}".format(format_exc()))
print("Downloaded {} bytes".format(request.headers['Content-length']))
logging.info("Downloaded {} bytes".format(request.headers['Content-length']))
return local_filename
def get_table_entries(self, file_name, allowed_tables):
@@ -43,7 +43,7 @@ class AoRar:
if xmltable.table_name in allowed_tables:
yield xmltable
else:
print "Done"
# os.remove(file_name) TODO : Uncomment
logging.info("All entries processed")
os.remove(file_name)
else:
print("No file specified or not exists")
logging.error("No file specified or not exists")

View File

@@ -14,7 +14,6 @@ from aore.dbutils.dbschemas import allowed_tables
class AoUpdater:
# Source: "http", directory (as a full path to unpacked xmls)
def __init__(self, source="http"):
logging.basicConfig(format='%(asctime)s %(message)s')
self.db_handler = DbHandler()
self.mode = source
self.updalist_generator = None
@@ -53,9 +52,9 @@ class AoUpdater:
self.updalist_generator = self.__get_updates_from_folder(self.mode)
self.tablelist_generator = self.__get_entries_from_folder
def process_single_entry(self, table_xmlentry, chunck_size=50000):
def process_single_entry(self, operation_type, table_xmlentry, chunck_size=50000):
aoparser = AoDataParser(table_xmlentry, chunck_size)
aoparser.parse(lambda x: self.db_handler.bulk_csv(chunck_size, table_xmlentry.table_name, x))
aoparser.parse(lambda x, y: self.db_handler.bulk_csv(operation_type, table_xmlentry.table_name, x, y))
def create(self):
self.__init_update_entries(True)
@@ -63,9 +62,11 @@ class AoUpdater:
for update_entry in self.updalist_generator:
for table_entry in self.tablelist_generator(update_entry['url']):
self.process_single_entry(table_entry)
if table_entry.operation_type == AoXmlTableEntry.OperationType.update:
table_entry.operation_type = AoXmlTableEntry.OperationType.create
self.process_single_entry(table_entry.operation_type, table_entry)
logging.warning("Create success")
logging.info("Create success")
def update(self, count=1):
self.__init_update_entries(False)
@@ -79,6 +80,6 @@ class AoUpdater:
break
for table_entry in self.tablelist_generator(update_entry['url']):
self.process_single_entry(table_entry)
self.process_single_entry(table_entry.operation_type, table_entry)
logging.warning("Update success")
logging.info("Update success")

View File

@@ -9,6 +9,7 @@ class AoXmlTableEntry:
class OperationType(Enum):
update = 1
delete = 0
create = 2
def __str__(self):
return self._name_