diff --git a/aore/aoutils/aoupdater.py b/aore/aoutils/aoupdater.py index bb36b27..28eabf5 100644 --- a/aore/aoutils/aoupdater.py +++ b/aore/aoutils/aoupdater.py @@ -66,6 +66,8 @@ class AoUpdater: table_entry.operation_type = AoXmlTableEntry.OperationType.create self.process_single_entry(table_entry.operation_type, table_entry) + self.db_handler.post_create() + logging.info("Create success") def update(self, count=1): @@ -76,7 +78,7 @@ class AoUpdater: for update_entry in self.updalist_generator: counter += 1 if counter > count: - logging.warning("Maximum count of updates are processed - exit") + logging.warning("Maximum count of updates ({}) are processed - exit".format(count)) break for table_entry in self.tablelist_generator(update_entry['url']): diff --git a/aore/config.py b/aore/config.py index b354591..d63e107 100644 --- a/aore/config.py +++ b/aore/config.py @@ -12,12 +12,14 @@ DB_INSTANCES = dict( user="postgres", password="intercon", database="postgres", + port=5432 ), production=dict( host="localhost", user="***", password="***", database="***", + port=5432 ) ) @@ -26,10 +28,17 @@ UNRAR_PATHES = dict( production="unrar" ) +SPHINX_VAR_DIRS = dict( + test="C:/Sphinx", + production="/var/sphinx" +) + # Uncomment if you want to specify config_type manually # config_type = "test" # Main section +sphinx_index_name="sph_addrobj" +sphinx_var_dir=SPHINX_VAR_DIRS[config_type] db = DB_INSTANCES[config_type] unrar = UNRAR_PATHES[config_type] trashfolder = "files/" diff --git a/aore/dbutils/dbhandler.py b/aore/dbutils/dbhandler.py index fb57e55..b877cad 100644 --- a/aore/dbutils/dbhandler.py +++ b/aore/dbutils/dbhandler.py @@ -3,6 +3,7 @@ import logging import psycopg2 +from bottle import template from aore.aoutils.aoxmltableentry import AoXmlTableEntry from aore.config import db as dbparams @@ -14,28 +15,13 @@ class DbHandler: def __init__(self): self.db = DBImpl(psycopg2, dbparams) - f = open("aore/templates/postgre/bulk_create.sql") - self.syntax_bulk_create = f.read() - f.close() - - f = open("aore/templates/postgre/bulk_update.sql") - self.syntax_bulk_update = f.read() - f.close() - - f = open("aore/templates/postgre/bulk_delete.sql") - self.syntax_bulk_delete = f.read() - f.close() - def bulk_csv(self, operation_type, table_name, processed_count, csv_file_name): sql_query = None # simple add new reocrds if operation_type == AoXmlTableEntry.OperationType.create: - sql_query = self.syntax_bulk_create \ - .replace("%tab%", "\t") \ - .replace("%tablename%", table_name) \ - .replace("%fieldslist%", ", ".join(db_shemas[table_name].fields)) \ - .replace("%csvname%", csv_file_name) + sql_query = template('aore/templates/postgre/bulk_create.sql', delim='\t', tablename=table_name, + fieldslist=", ".join(db_shemas[table_name].fields), csvname=csv_file_name) # update table if operation_type == AoXmlTableEntry.OperationType.update: @@ -45,41 +31,34 @@ class DbHandler: fields_update_list += "{}=EXCLUDED.{}, ".format(field, field) fields_update_list = fields_update_list[:-2] - sql_query = self.syntax_bulk_update \ - .replace("%tab%", "\t") \ - .replace("%tablename%", table_name) \ - .replace("%fieldslist%", ", ".join(db_shemas[table_name].fields)) \ - .replace("%csvname%", csv_file_name) \ - .replace("%uniquekey%", db_shemas[table_name].unique_field) \ - .replace("%updaterule%", fields_update_list) - - if table_name == "ADDROBJ": - sql_query += "DELETE FROM \"%tablename%\" WHERE %filterrule%;" \ - .replace("%tablename%", table_name) \ - .replace("%filterrule%", - "ACTSTATUS = FALSE OR NEXTID IS NOT NULL") + sql_query = template('aore/templates/postgre/bulk_update.sql', delim='\t', tablename=table_name, + fieldslist=", ".join(db_shemas[table_name].fields), csvname=csv_file_name, + uniquekey=db_shemas[table_name].unique_field, updaterule=fields_update_list) # delete records from table if operation_type == AoXmlTableEntry.OperationType.delete: - sql_query = self.syntax_bulk_delete \ - .replace("%tab%", "\t") \ - .replace("%tablename%", table_name) \ - .replace("%fieldslist%", ", ".join(db_shemas[table_name].fields)) \ - .replace("%csvname%", csv_file_name) \ - .replace("%uniquekey%", db_shemas[table_name].unique_field) + sql_query = template('aore/templates/postgre/bulk_delete.sql', delim='\t', tablename=table_name, + fieldslist=", ".join(db_shemas[table_name].fields), csvname=csv_file_name, + uniquekey=db_shemas[table_name].unique_field) assert sql_query, "Invalid operation type: {}".format(operation_type) self.db.execute(sql_query) - logging.info("Processed {} queries FROM {}".format(processed_count-1, csv_file_name)) + logging.info("Processed {} queries FROM {}".format(processed_count - 1, csv_file_name)) def pre_create(self): - f = open("aore/templates/postgre/pre_create.sql") - sql_query = f.read() - f.close() + logging.info("Prepare to create DB structure...") + sql_query = template("aore/templates/postgre/pre_create.sql") self.db.execute(sql_query) + def post_create(self): + logging.info("Indexing ADDROBJ...") + sql_query = template("aore/templates/postgre/post_create.sql") + + self.db.execute(sql_query) + logging.info("Indexing done.") + def pre_update(self): # TODO: update actions pass diff --git a/aore/miscutils/__init__.py b/aore/miscutils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/aore/miscutils/sphinx.py b/aore/miscutils/sphinx.py new file mode 100644 index 0000000..1b047ef --- /dev/null +++ b/aore/miscutils/sphinx.py @@ -0,0 +1,38 @@ +# -*- coding: utf-8 -*- + +import logging +import os + +from bottle import template + +from aore.config import db as dbconfig, sphinx_index_name, sphinx_var_dir + + +def produce_sphinx_config(config_name): + logging.info("Creating {}".format(config_name)) + conf_data = template('aore/templates/sphinx/data.conf', db_host=dbconfig['host'], db_user=dbconfig['user'], + db_password=dbconfig['password'], + db_name=dbconfig['database'], db_port=dbconfig['port'], + sql_query=template('aore/templates/postgre/sphinx_query.sql').replace("\n"," \\\n"), index_name=sphinx_index_name, + sphinx_var_path=sphinx_var_dir) + + conf_data += "\n" + template('aore/templates/sphinx/sphinx.conf', sphinx_var_path=sphinx_var_dir) + + if os.path.isfile(config_name): + choice = raw_input( + "WARNING! File {} already exists. It will be overwritten, " + "all settings all setting will be lost! Are you sure? [y/n]: ".format( + config_name)) + if choice.lower() != 'y': + logging.warning("Aborted.") + return + + conf_file = open(config_name, "w") + conf_file.write(conf_data) + conf_file.close() + + logging.info("Success! Re-index db: \n" + "\t$indexer -c {} --all --rotate\n" + "and then re/start your Sphinx:\n" + "\t$/etc/init.d/sphinxsearch stop\n" + "\t$/etc/init.d/sphinxsearch start".format(config_name)) diff --git a/aore/templates/postgre/bulk_create.sql b/aore/templates/postgre/bulk_create.sql index a04ecae..23a3af6 100644 --- a/aore/templates/postgre/bulk_create.sql +++ b/aore/templates/postgre/bulk_create.sql @@ -1 +1 @@ -COPY "%tablename%" (%fieldslist%) FROM '%csvname%' DELIMITER '%tab%' NULL 'NULL' \ No newline at end of file +COPY "{{tablename}}" ({{fieldslist}}) FROM '{{csvname}}' DELIMITER '{{delim}}' NULL 'NULL' \ No newline at end of file diff --git a/aore/templates/postgre/bulk_delete.sql b/aore/templates/postgre/bulk_delete.sql index b0e90a5..47e27fa 100644 --- a/aore/templates/postgre/bulk_delete.sql +++ b/aore/templates/postgre/bulk_delete.sql @@ -1,5 +1,5 @@ -DROP TABLE IF EXISTS "%tablename%_TEMP"; -CREATE TEMP TABLE "%tablename%_TEMP" ON COMMIT DROP AS SELECT * - FROM "%tablename%" WITH NO DATA; -COPY "%tablename%_TEMP" (%fieldslist%) FROM '%csvname%' DELIMITER '%tab%' NULL 'NULL'; -DELETE FROM "%tablename%" WHERE %uniquekey% IN (SELECT %uniquekey% FROM "%tablename%_TEMP"); \ No newline at end of file +DROP TABLE IF EXISTS "{{tablename}}_TEMP"; +CREATE TEMP TABLE "{{tablename}}_TEMP" ON COMMIT DROP AS SELECT * + FROM "{{tablename}}" WITH NO DATA; +COPY "{{tablename}}_TEMP" ({{fieldslist}}) FROM '{{csvname}}' DELIMITER '{{delim}}' NULL 'NULL'; +DELETE FROM "{{tablename}}" WHERE {{uniquekey}} IN (SELECT {{uniquekey}} FROM "{{tablename}}_TEMP"); \ No newline at end of file diff --git a/aore/templates/postgre/bulk_update.sql b/aore/templates/postgre/bulk_update.sql index 36aa3b8..43a2d3e 100644 --- a/aore/templates/postgre/bulk_update.sql +++ b/aore/templates/postgre/bulk_update.sql @@ -1,7 +1,10 @@ -DROP TABLE IF EXISTS "%tablename%_TEMP"; -CREATE TEMP TABLE "%tablename%_TEMP" ON COMMIT DROP AS SELECT * - FROM "%tablename%" WITH NO DATA; -COPY "%tablename%_TEMP" (%fieldslist%) FROM '%csvname%' DELIMITER '%tab%' NULL 'NULL'; -INSERT INTO "%tablename%" (%fieldslist%) SELECT %fieldslist% +DROP TABLE IF EXISTS "{{tablename}}_TEMP"; +CREATE TEMP TABLE "{{tablename}}_TEMP" ON COMMIT DROP AS SELECT * + FROM "{{tablename}}" WITH NO DATA; +COPY "{{tablename}}_TEMP" ({{fieldslist}}) FROM '{{csvname}}' DELIMITER '{{delim}}' NULL 'NULL'; +INSERT INTO "{{tablename}}" ({{fieldslist}}) SELECT {{fieldslist}} FROM -"%tablename%_TEMP" ON CONFLICT (%uniquekey%) DO UPDATE SET %updaterule%; \ No newline at end of file +"{{tablename}}_TEMP" ON CONFLICT ({{uniquekey}}) DO UPDATE SET {{updaterule}}; +% if tablename=="ADDROBJ": +DELETE FROM "{{tablename}}" WHERE ACTSTATUS = FALSE OR NEXTID IS NOT NULL; +% end \ No newline at end of file diff --git a/aore/templates/postgre/post_create.sql b/aore/templates/postgre/post_create.sql new file mode 100644 index 0000000..49e52fb --- /dev/null +++ b/aore/templates/postgre/post_create.sql @@ -0,0 +1,4 @@ +CREATE INDEX "sphinx_ind_aolevel" ON "ADDROBJ" USING btree ("aolevel"); +CREATE INDEX "sphinx_ind_parentguid" ON "ADDROBJ" USING btree ("parentguid"); +CREATE INDEX "sphinx_ind_livestatus" ON "ADDROBJ" USING btree ("livestatus"); +CREATE INDEX "sphinx_ind_aoguid" ON "ADDROBJ" USING btree ("aoguid"); \ No newline at end of file diff --git a/aore/templates/postgre/pre_create.sql b/aore/templates/postgre/pre_create.sql index 942e5c4..aff9d28 100644 --- a/aore/templates/postgre/pre_create.sql +++ b/aore/templates/postgre/pre_create.sql @@ -1,5 +1,5 @@ -DROP TABLE IF EXISTS "public"."ADDROBJ"; -CREATE TABLE "public"."ADDROBJ" ( +DROP TABLE IF EXISTS "ADDROBJ"; +CREATE TABLE "ADDROBJ" ( "id" SERIAL4 NOT NULL, "aoid" UUID NOT NULL, "aoguid" UUID, @@ -15,8 +15,8 @@ CREATE TABLE "public"."ADDROBJ" ( ) WITH (OIDS =FALSE ); -DROP TABLE IF EXISTS "public"."SOCRBASE"; -CREATE TABLE "public"."SOCRBASE" ( +DROP TABLE IF EXISTS "SOCRBASE"; +CREATE TABLE "SOCRBASE" ( "id" SERIAL4 NOT NULL, "level" INT2, "scname" VARCHAR(10), diff --git a/aore/templates/postgre/sphinx_query.sql b/aore/templates/postgre/sphinx_query.sql new file mode 100644 index 0000000..cf22267 --- /dev/null +++ b/aore/templates/postgre/sphinx_query.sql @@ -0,0 +1,14 @@ + + WITH RECURSIVE PATH (cnt, aoid, aoguid, aolevel, fullname) AS ( + SELECT ao.id as cnt, ao.aoid, ao.aoguid, ao.aolevel, + ao.shortname || ' ' || ao.formalname AS fullname + FROM "ADDROBJ" AS ao + WHERE aolevel = 1 AND livestatus = TRUE + UNION + SELECT child.id as cnt, child.aoid, child.aoguid, child.aolevel, + PATH.fullname || ', ' || child.shortname || ' ' || child.formalname AS fullname + FROM "ADDROBJ" AS child + , PATH + WHERE child.parentguid = PATH.aoguid AND livestatus = TRUE + ) + SELECT * FROM PATH WHERE AOLEVEL NOT IN (1,3) \ No newline at end of file diff --git a/aore/templates/sphinx/data.conf b/aore/templates/sphinx/data.conf new file mode 100644 index 0000000..0e5b341 --- /dev/null +++ b/aore/templates/sphinx/data.conf @@ -0,0 +1,41 @@ +source src_{{index_name}} +{ + type = pgsql + sql_host = {{db_host}} + sql_user = {{db_user}} + sql_pass = {{db_password}} + sql_db = {{db_name}} + sql_port = {{db_port}} + + sql_query = {{!sql_query}} + + sql_field_string = fullname + sql_attr_string = aoid + sql_attr_string = aoguid + sql_attr_uint = aolevel +} + +index index_{{ index_name }} +{ + docinfo = extern + morphology = stem_ru + min_stemming_len = 2 + + stopwords = + min_word_len = 2 + charset_type = utf-8 + min_prefix_len = 1 + min_infix_len = 0 + enable_star = 1 + + # strip html by default + html_strip = 1 + + ignore_chars = @, - + charset_table = 0..9, A..Z->a..z, _, a..z, \ + U+0401->U+0435, U+0451->U+0435, \ + U+410..U+42F->U+430..U+44F, U+430..U+44F + + source = src_{{index_name}} + path = {{sphinx_var_path}}/data/index_{{index_name}} +} \ No newline at end of file diff --git a/aore/templates/sphinx/sphinx.conf b/aore/templates/sphinx/sphinx.conf new file mode 100644 index 0000000..aa890d1 --- /dev/null +++ b/aore/templates/sphinx/sphinx.conf @@ -0,0 +1,63 @@ +indexer +{ + # memory limit, in bytes, kiloytes (16384K) or megabytes (256M) + # optional, default is 32M, max is 2047M, recommended is 256M to 1024M + mem_limit = 256M + + # maximum IO calls per second (for I/O throttling) + # optional, default is 0 (unlimited) + # + # max_iops = 40 + + # maximum IO call size, bytes (for I/O throttling) + # optional, default is 0 (unlimited) + # + max_iosize = 524288 +} + +searchd +{ + + listen = 127.0.0.1:9312 + + # required by RT-indexes + workers = threads + + # log file, searchd run info is logged here + # optional, default is 'searchd.log' + log = {{sphinx_var_path}}/log/searchd.log + + # query log file, all search queries are logged here + # optional, default is empty (do not log queries) + query_log = {{sphinx_var_path}}/log/query.log + + # client read timeout, seconds + # optional, default is 5 + read_timeout = 5 + + # maximum amount of children to fork (concurrent searches to run) + # optional, default is 0 (unlimited) + max_children = 30 + + # PID file, searchd process ID file name + # mandatory + pid_file = {{sphinx_var_path}}/run/searchd.pid + + # max amount of matches the daemon ever keeps in RAM, per-index + # WARNING, THERE'S ALSO PER-QUERY LIMIT, SEE SetLimits() API CALL + # default is 1000 (just like Google) + max_matches = 1000 + + # seamless rotate, prevents rotate stalls if precaching huge datasets + # optional, default is 1 + seamless_rotate = 1 + + # whether to forcibly preopen all indexes on startup + # optional, default is 0 (do not preopen) + preopen_indexes = 0 + + # whether to unlink .old index copies on succesful rotation. + # optional, default is 1 (do unlink) + unlink_old = 1 +} + diff --git a/manage.py b/manage.py index 5b1a494..c6bb8ee 100644 --- a/manage.py +++ b/manage.py @@ -3,6 +3,7 @@ import optparse from aore.aoutils.aoupdater import AoUpdater +from aore.miscutils.sphinx import produce_sphinx_config def update_base(xml_source, updates_count): @@ -24,6 +25,8 @@ def main(): help="Count of updates to process, only for '--database update' option") p.add_option('--source', '-s', default="http", help="Create/update DB from source. Value: \"http\" or absolute path to folder") + p.add_option('--sphinx-configure', '-c', action="store", type="string", + help="Get Sphinx config. Value: /path/to/sphinx.conf") options, arguments = p.parse_args() if options.database: @@ -34,6 +37,9 @@ def main(): if options.database == "update": update_base(options.source, int(options.update_count)) + if options.sphinx_configure: + produce_sphinx_config(options.sphinx_configure) + if __name__ == '__main__': main()