Add indexes and sphinx.conf (conf and query stolen from https://github.com/Yuego/django-fias)
This commit is contained in:
parent
67f6943dce
commit
759efc43ee
@ -66,6 +66,8 @@ class AoUpdater:
|
||||
table_entry.operation_type = AoXmlTableEntry.OperationType.create
|
||||
self.process_single_entry(table_entry.operation_type, table_entry)
|
||||
|
||||
self.db_handler.post_create()
|
||||
|
||||
logging.info("Create success")
|
||||
|
||||
def update(self, count=1):
|
||||
@ -76,7 +78,7 @@ class AoUpdater:
|
||||
for update_entry in self.updalist_generator:
|
||||
counter += 1
|
||||
if counter > count:
|
||||
logging.warning("Maximum count of updates are processed - exit")
|
||||
logging.warning("Maximum count of updates ({}) are processed - exit".format(count))
|
||||
break
|
||||
|
||||
for table_entry in self.tablelist_generator(update_entry['url']):
|
||||
|
@ -12,12 +12,14 @@ DB_INSTANCES = dict(
|
||||
user="postgres",
|
||||
password="intercon",
|
||||
database="postgres",
|
||||
port=5432
|
||||
),
|
||||
production=dict(
|
||||
host="localhost",
|
||||
user="***",
|
||||
password="***",
|
||||
database="***",
|
||||
port=5432
|
||||
)
|
||||
)
|
||||
|
||||
@ -26,10 +28,17 @@ UNRAR_PATHES = dict(
|
||||
production="unrar"
|
||||
)
|
||||
|
||||
SPHINX_VAR_DIRS = dict(
|
||||
test="C:/Sphinx",
|
||||
production="/var/sphinx"
|
||||
)
|
||||
|
||||
# Uncomment if you want to specify config_type manually
|
||||
# config_type = "test"
|
||||
|
||||
# Main section
|
||||
sphinx_index_name="sph_addrobj"
|
||||
sphinx_var_dir=SPHINX_VAR_DIRS[config_type]
|
||||
db = DB_INSTANCES[config_type]
|
||||
unrar = UNRAR_PATHES[config_type]
|
||||
trashfolder = "files/"
|
||||
|
@ -3,6 +3,7 @@
|
||||
import logging
|
||||
|
||||
import psycopg2
|
||||
from bottle import template
|
||||
|
||||
from aore.aoutils.aoxmltableentry import AoXmlTableEntry
|
||||
from aore.config import db as dbparams
|
||||
@ -14,28 +15,13 @@ class DbHandler:
|
||||
def __init__(self):
|
||||
self.db = DBImpl(psycopg2, dbparams)
|
||||
|
||||
f = open("aore/templates/postgre/bulk_create.sql")
|
||||
self.syntax_bulk_create = f.read()
|
||||
f.close()
|
||||
|
||||
f = open("aore/templates/postgre/bulk_update.sql")
|
||||
self.syntax_bulk_update = f.read()
|
||||
f.close()
|
||||
|
||||
f = open("aore/templates/postgre/bulk_delete.sql")
|
||||
self.syntax_bulk_delete = f.read()
|
||||
f.close()
|
||||
|
||||
def bulk_csv(self, operation_type, table_name, processed_count, csv_file_name):
|
||||
sql_query = None
|
||||
|
||||
# simple add new reocrds
|
||||
if operation_type == AoXmlTableEntry.OperationType.create:
|
||||
sql_query = self.syntax_bulk_create \
|
||||
.replace("%tab%", "\t") \
|
||||
.replace("%tablename%", table_name) \
|
||||
.replace("%fieldslist%", ", ".join(db_shemas[table_name].fields)) \
|
||||
.replace("%csvname%", csv_file_name)
|
||||
sql_query = template('aore/templates/postgre/bulk_create.sql', delim='\t', tablename=table_name,
|
||||
fieldslist=", ".join(db_shemas[table_name].fields), csvname=csv_file_name)
|
||||
|
||||
# update table
|
||||
if operation_type == AoXmlTableEntry.OperationType.update:
|
||||
@ -45,41 +31,34 @@ class DbHandler:
|
||||
fields_update_list += "{}=EXCLUDED.{}, ".format(field, field)
|
||||
fields_update_list = fields_update_list[:-2]
|
||||
|
||||
sql_query = self.syntax_bulk_update \
|
||||
.replace("%tab%", "\t") \
|
||||
.replace("%tablename%", table_name) \
|
||||
.replace("%fieldslist%", ", ".join(db_shemas[table_name].fields)) \
|
||||
.replace("%csvname%", csv_file_name) \
|
||||
.replace("%uniquekey%", db_shemas[table_name].unique_field) \
|
||||
.replace("%updaterule%", fields_update_list)
|
||||
|
||||
if table_name == "ADDROBJ":
|
||||
sql_query += "DELETE FROM \"%tablename%\" WHERE %filterrule%;" \
|
||||
.replace("%tablename%", table_name) \
|
||||
.replace("%filterrule%",
|
||||
"ACTSTATUS = FALSE OR NEXTID IS NOT NULL")
|
||||
sql_query = template('aore/templates/postgre/bulk_update.sql', delim='\t', tablename=table_name,
|
||||
fieldslist=", ".join(db_shemas[table_name].fields), csvname=csv_file_name,
|
||||
uniquekey=db_shemas[table_name].unique_field, updaterule=fields_update_list)
|
||||
|
||||
# delete records from table
|
||||
if operation_type == AoXmlTableEntry.OperationType.delete:
|
||||
sql_query = self.syntax_bulk_delete \
|
||||
.replace("%tab%", "\t") \
|
||||
.replace("%tablename%", table_name) \
|
||||
.replace("%fieldslist%", ", ".join(db_shemas[table_name].fields)) \
|
||||
.replace("%csvname%", csv_file_name) \
|
||||
.replace("%uniquekey%", db_shemas[table_name].unique_field)
|
||||
sql_query = template('aore/templates/postgre/bulk_delete.sql', delim='\t', tablename=table_name,
|
||||
fieldslist=", ".join(db_shemas[table_name].fields), csvname=csv_file_name,
|
||||
uniquekey=db_shemas[table_name].unique_field)
|
||||
|
||||
assert sql_query, "Invalid operation type: {}".format(operation_type)
|
||||
|
||||
self.db.execute(sql_query)
|
||||
logging.info("Processed {} queries FROM {}".format(processed_count-1, csv_file_name))
|
||||
logging.info("Processed {} queries FROM {}".format(processed_count - 1, csv_file_name))
|
||||
|
||||
def pre_create(self):
|
||||
f = open("aore/templates/postgre/pre_create.sql")
|
||||
sql_query = f.read()
|
||||
f.close()
|
||||
logging.info("Prepare to create DB structure...")
|
||||
sql_query = template("aore/templates/postgre/pre_create.sql")
|
||||
|
||||
self.db.execute(sql_query)
|
||||
|
||||
def post_create(self):
|
||||
logging.info("Indexing ADDROBJ...")
|
||||
sql_query = template("aore/templates/postgre/post_create.sql")
|
||||
|
||||
self.db.execute(sql_query)
|
||||
logging.info("Indexing done.")
|
||||
|
||||
def pre_update(self):
|
||||
# TODO: update actions
|
||||
pass
|
||||
|
0
aore/miscutils/__init__.py
Normal file
0
aore/miscutils/__init__.py
Normal file
38
aore/miscutils/sphinx.py
Normal file
38
aore/miscutils/sphinx.py
Normal file
@ -0,0 +1,38 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import logging
|
||||
import os
|
||||
|
||||
from bottle import template
|
||||
|
||||
from aore.config import db as dbconfig, sphinx_index_name, sphinx_var_dir
|
||||
|
||||
|
||||
def produce_sphinx_config(config_name):
|
||||
logging.info("Creating {}".format(config_name))
|
||||
conf_data = template('aore/templates/sphinx/data.conf', db_host=dbconfig['host'], db_user=dbconfig['user'],
|
||||
db_password=dbconfig['password'],
|
||||
db_name=dbconfig['database'], db_port=dbconfig['port'],
|
||||
sql_query=template('aore/templates/postgre/sphinx_query.sql').replace("\n"," \\\n"), index_name=sphinx_index_name,
|
||||
sphinx_var_path=sphinx_var_dir)
|
||||
|
||||
conf_data += "\n" + template('aore/templates/sphinx/sphinx.conf', sphinx_var_path=sphinx_var_dir)
|
||||
|
||||
if os.path.isfile(config_name):
|
||||
choice = raw_input(
|
||||
"WARNING! File {} already exists. It will be overwritten, "
|
||||
"all settings all setting will be lost! Are you sure? [y/n]: ".format(
|
||||
config_name))
|
||||
if choice.lower() != 'y':
|
||||
logging.warning("Aborted.")
|
||||
return
|
||||
|
||||
conf_file = open(config_name, "w")
|
||||
conf_file.write(conf_data)
|
||||
conf_file.close()
|
||||
|
||||
logging.info("Success! Re-index db: \n"
|
||||
"\t$indexer -c {} --all --rotate\n"
|
||||
"and then re/start your Sphinx:\n"
|
||||
"\t$/etc/init.d/sphinxsearch stop\n"
|
||||
"\t$/etc/init.d/sphinxsearch start".format(config_name))
|
@ -1 +1 @@
|
||||
COPY "%tablename%" (%fieldslist%) FROM '%csvname%' DELIMITER '%tab%' NULL 'NULL'
|
||||
COPY "{{tablename}}" ({{fieldslist}}) FROM '{{csvname}}' DELIMITER '{{delim}}' NULL 'NULL'
|
@ -1,5 +1,5 @@
|
||||
DROP TABLE IF EXISTS "%tablename%_TEMP";
|
||||
CREATE TEMP TABLE "%tablename%_TEMP" ON COMMIT DROP AS SELECT *
|
||||
FROM "%tablename%" WITH NO DATA;
|
||||
COPY "%tablename%_TEMP" (%fieldslist%) FROM '%csvname%' DELIMITER '%tab%' NULL 'NULL';
|
||||
DELETE FROM "%tablename%" WHERE %uniquekey% IN (SELECT %uniquekey% FROM "%tablename%_TEMP");
|
||||
DROP TABLE IF EXISTS "{{tablename}}_TEMP";
|
||||
CREATE TEMP TABLE "{{tablename}}_TEMP" ON COMMIT DROP AS SELECT *
|
||||
FROM "{{tablename}}" WITH NO DATA;
|
||||
COPY "{{tablename}}_TEMP" ({{fieldslist}}) FROM '{{csvname}}' DELIMITER '{{delim}}' NULL 'NULL';
|
||||
DELETE FROM "{{tablename}}" WHERE {{uniquekey}} IN (SELECT {{uniquekey}} FROM "{{tablename}}_TEMP");
|
@ -1,7 +1,10 @@
|
||||
DROP TABLE IF EXISTS "%tablename%_TEMP";
|
||||
CREATE TEMP TABLE "%tablename%_TEMP" ON COMMIT DROP AS SELECT *
|
||||
FROM "%tablename%" WITH NO DATA;
|
||||
COPY "%tablename%_TEMP" (%fieldslist%) FROM '%csvname%' DELIMITER '%tab%' NULL 'NULL';
|
||||
INSERT INTO "%tablename%" (%fieldslist%) SELECT %fieldslist%
|
||||
DROP TABLE IF EXISTS "{{tablename}}_TEMP";
|
||||
CREATE TEMP TABLE "{{tablename}}_TEMP" ON COMMIT DROP AS SELECT *
|
||||
FROM "{{tablename}}" WITH NO DATA;
|
||||
COPY "{{tablename}}_TEMP" ({{fieldslist}}) FROM '{{csvname}}' DELIMITER '{{delim}}' NULL 'NULL';
|
||||
INSERT INTO "{{tablename}}" ({{fieldslist}}) SELECT {{fieldslist}}
|
||||
FROM
|
||||
"%tablename%_TEMP" ON CONFLICT (%uniquekey%) DO UPDATE SET %updaterule%;
|
||||
"{{tablename}}_TEMP" ON CONFLICT ({{uniquekey}}) DO UPDATE SET {{updaterule}};
|
||||
% if tablename=="ADDROBJ":
|
||||
DELETE FROM "{{tablename}}" WHERE ACTSTATUS = FALSE OR NEXTID IS NOT NULL;
|
||||
% end
|
4
aore/templates/postgre/post_create.sql
Normal file
4
aore/templates/postgre/post_create.sql
Normal file
@ -0,0 +1,4 @@
|
||||
CREATE INDEX "sphinx_ind_aolevel" ON "ADDROBJ" USING btree ("aolevel");
|
||||
CREATE INDEX "sphinx_ind_parentguid" ON "ADDROBJ" USING btree ("parentguid");
|
||||
CREATE INDEX "sphinx_ind_livestatus" ON "ADDROBJ" USING btree ("livestatus");
|
||||
CREATE INDEX "sphinx_ind_aoguid" ON "ADDROBJ" USING btree ("aoguid");
|
@ -1,5 +1,5 @@
|
||||
DROP TABLE IF EXISTS "public"."ADDROBJ";
|
||||
CREATE TABLE "public"."ADDROBJ" (
|
||||
DROP TABLE IF EXISTS "ADDROBJ";
|
||||
CREATE TABLE "ADDROBJ" (
|
||||
"id" SERIAL4 NOT NULL,
|
||||
"aoid" UUID NOT NULL,
|
||||
"aoguid" UUID,
|
||||
@ -15,8 +15,8 @@ CREATE TABLE "public"."ADDROBJ" (
|
||||
)
|
||||
WITH (OIDS =FALSE
|
||||
);
|
||||
DROP TABLE IF EXISTS "public"."SOCRBASE";
|
||||
CREATE TABLE "public"."SOCRBASE" (
|
||||
DROP TABLE IF EXISTS "SOCRBASE";
|
||||
CREATE TABLE "SOCRBASE" (
|
||||
"id" SERIAL4 NOT NULL,
|
||||
"level" INT2,
|
||||
"scname" VARCHAR(10),
|
||||
|
14
aore/templates/postgre/sphinx_query.sql
Normal file
14
aore/templates/postgre/sphinx_query.sql
Normal file
@ -0,0 +1,14 @@
|
||||
|
||||
WITH RECURSIVE PATH (cnt, aoid, aoguid, aolevel, fullname) AS (
|
||||
SELECT ao.id as cnt, ao.aoid, ao.aoguid, ao.aolevel,
|
||||
ao.shortname || ' ' || ao.formalname AS fullname
|
||||
FROM "ADDROBJ" AS ao
|
||||
WHERE aolevel = 1 AND livestatus = TRUE
|
||||
UNION
|
||||
SELECT child.id as cnt, child.aoid, child.aoguid, child.aolevel,
|
||||
PATH.fullname || ', ' || child.shortname || ' ' || child.formalname AS fullname
|
||||
FROM "ADDROBJ" AS child
|
||||
, PATH
|
||||
WHERE child.parentguid = PATH.aoguid AND livestatus = TRUE
|
||||
)
|
||||
SELECT * FROM PATH WHERE AOLEVEL NOT IN (1,3)
|
41
aore/templates/sphinx/data.conf
Normal file
41
aore/templates/sphinx/data.conf
Normal file
@ -0,0 +1,41 @@
|
||||
source src_{{index_name}}
|
||||
{
|
||||
type = pgsql
|
||||
sql_host = {{db_host}}
|
||||
sql_user = {{db_user}}
|
||||
sql_pass = {{db_password}}
|
||||
sql_db = {{db_name}}
|
||||
sql_port = {{db_port}}
|
||||
|
||||
sql_query = {{!sql_query}}
|
||||
|
||||
sql_field_string = fullname
|
||||
sql_attr_string = aoid
|
||||
sql_attr_string = aoguid
|
||||
sql_attr_uint = aolevel
|
||||
}
|
||||
|
||||
index index_{{ index_name }}
|
||||
{
|
||||
docinfo = extern
|
||||
morphology = stem_ru
|
||||
min_stemming_len = 2
|
||||
|
||||
stopwords =
|
||||
min_word_len = 2
|
||||
charset_type = utf-8
|
||||
min_prefix_len = 1
|
||||
min_infix_len = 0
|
||||
enable_star = 1
|
||||
|
||||
# strip html by default
|
||||
html_strip = 1
|
||||
|
||||
ignore_chars = @, -
|
||||
charset_table = 0..9, A..Z->a..z, _, a..z, \
|
||||
U+0401->U+0435, U+0451->U+0435, \
|
||||
U+410..U+42F->U+430..U+44F, U+430..U+44F
|
||||
|
||||
source = src_{{index_name}}
|
||||
path = {{sphinx_var_path}}/data/index_{{index_name}}
|
||||
}
|
63
aore/templates/sphinx/sphinx.conf
Normal file
63
aore/templates/sphinx/sphinx.conf
Normal file
@ -0,0 +1,63 @@
|
||||
indexer
|
||||
{
|
||||
# memory limit, in bytes, kiloytes (16384K) or megabytes (256M)
|
||||
# optional, default is 32M, max is 2047M, recommended is 256M to 1024M
|
||||
mem_limit = 256M
|
||||
|
||||
# maximum IO calls per second (for I/O throttling)
|
||||
# optional, default is 0 (unlimited)
|
||||
#
|
||||
# max_iops = 40
|
||||
|
||||
# maximum IO call size, bytes (for I/O throttling)
|
||||
# optional, default is 0 (unlimited)
|
||||
#
|
||||
max_iosize = 524288
|
||||
}
|
||||
|
||||
searchd
|
||||
{
|
||||
|
||||
listen = 127.0.0.1:9312
|
||||
|
||||
# required by RT-indexes
|
||||
workers = threads
|
||||
|
||||
# log file, searchd run info is logged here
|
||||
# optional, default is 'searchd.log'
|
||||
log = {{sphinx_var_path}}/log/searchd.log
|
||||
|
||||
# query log file, all search queries are logged here
|
||||
# optional, default is empty (do not log queries)
|
||||
query_log = {{sphinx_var_path}}/log/query.log
|
||||
|
||||
# client read timeout, seconds
|
||||
# optional, default is 5
|
||||
read_timeout = 5
|
||||
|
||||
# maximum amount of children to fork (concurrent searches to run)
|
||||
# optional, default is 0 (unlimited)
|
||||
max_children = 30
|
||||
|
||||
# PID file, searchd process ID file name
|
||||
# mandatory
|
||||
pid_file = {{sphinx_var_path}}/run/searchd.pid
|
||||
|
||||
# max amount of matches the daemon ever keeps in RAM, per-index
|
||||
# WARNING, THERE'S ALSO PER-QUERY LIMIT, SEE SetLimits() API CALL
|
||||
# default is 1000 (just like Google)
|
||||
max_matches = 1000
|
||||
|
||||
# seamless rotate, prevents rotate stalls if precaching huge datasets
|
||||
# optional, default is 1
|
||||
seamless_rotate = 1
|
||||
|
||||
# whether to forcibly preopen all indexes on startup
|
||||
# optional, default is 0 (do not preopen)
|
||||
preopen_indexes = 0
|
||||
|
||||
# whether to unlink .old index copies on succesful rotation.
|
||||
# optional, default is 1 (do unlink)
|
||||
unlink_old = 1
|
||||
}
|
||||
|
@ -3,6 +3,7 @@
|
||||
import optparse
|
||||
|
||||
from aore.aoutils.aoupdater import AoUpdater
|
||||
from aore.miscutils.sphinx import produce_sphinx_config
|
||||
|
||||
|
||||
def update_base(xml_source, updates_count):
|
||||
@ -24,6 +25,8 @@ def main():
|
||||
help="Count of updates to process, only for '--database update' option")
|
||||
p.add_option('--source', '-s', default="http",
|
||||
help="Create/update DB from source. Value: \"http\" or absolute path to folder")
|
||||
p.add_option('--sphinx-configure', '-c', action="store", type="string",
|
||||
help="Get Sphinx config. Value: /path/to/sphinx.conf")
|
||||
options, arguments = p.parse_args()
|
||||
|
||||
if options.database:
|
||||
@ -34,6 +37,9 @@ def main():
|
||||
if options.database == "update":
|
||||
update_base(options.source, int(options.update_count))
|
||||
|
||||
if options.sphinx_configure:
|
||||
produce_sphinx_config(options.sphinx_configure)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
Loading…
x
Reference in New Issue
Block a user