Add indexes and sphinx.conf (conf and query stolen from https://github.com/Yuego/django-fias)
This commit is contained in:
parent
67f6943dce
commit
759efc43ee
@ -66,6 +66,8 @@ class AoUpdater:
|
|||||||
table_entry.operation_type = AoXmlTableEntry.OperationType.create
|
table_entry.operation_type = AoXmlTableEntry.OperationType.create
|
||||||
self.process_single_entry(table_entry.operation_type, table_entry)
|
self.process_single_entry(table_entry.operation_type, table_entry)
|
||||||
|
|
||||||
|
self.db_handler.post_create()
|
||||||
|
|
||||||
logging.info("Create success")
|
logging.info("Create success")
|
||||||
|
|
||||||
def update(self, count=1):
|
def update(self, count=1):
|
||||||
@ -76,7 +78,7 @@ class AoUpdater:
|
|||||||
for update_entry in self.updalist_generator:
|
for update_entry in self.updalist_generator:
|
||||||
counter += 1
|
counter += 1
|
||||||
if counter > count:
|
if counter > count:
|
||||||
logging.warning("Maximum count of updates are processed - exit")
|
logging.warning("Maximum count of updates ({}) are processed - exit".format(count))
|
||||||
break
|
break
|
||||||
|
|
||||||
for table_entry in self.tablelist_generator(update_entry['url']):
|
for table_entry in self.tablelist_generator(update_entry['url']):
|
||||||
|
@ -12,12 +12,14 @@ DB_INSTANCES = dict(
|
|||||||
user="postgres",
|
user="postgres",
|
||||||
password="intercon",
|
password="intercon",
|
||||||
database="postgres",
|
database="postgres",
|
||||||
|
port=5432
|
||||||
),
|
),
|
||||||
production=dict(
|
production=dict(
|
||||||
host="localhost",
|
host="localhost",
|
||||||
user="***",
|
user="***",
|
||||||
password="***",
|
password="***",
|
||||||
database="***",
|
database="***",
|
||||||
|
port=5432
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -26,10 +28,17 @@ UNRAR_PATHES = dict(
|
|||||||
production="unrar"
|
production="unrar"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
SPHINX_VAR_DIRS = dict(
|
||||||
|
test="C:/Sphinx",
|
||||||
|
production="/var/sphinx"
|
||||||
|
)
|
||||||
|
|
||||||
# Uncomment if you want to specify config_type manually
|
# Uncomment if you want to specify config_type manually
|
||||||
# config_type = "test"
|
# config_type = "test"
|
||||||
|
|
||||||
# Main section
|
# Main section
|
||||||
|
sphinx_index_name="sph_addrobj"
|
||||||
|
sphinx_var_dir=SPHINX_VAR_DIRS[config_type]
|
||||||
db = DB_INSTANCES[config_type]
|
db = DB_INSTANCES[config_type]
|
||||||
unrar = UNRAR_PATHES[config_type]
|
unrar = UNRAR_PATHES[config_type]
|
||||||
trashfolder = "files/"
|
trashfolder = "files/"
|
||||||
|
@ -3,6 +3,7 @@
|
|||||||
import logging
|
import logging
|
||||||
|
|
||||||
import psycopg2
|
import psycopg2
|
||||||
|
from bottle import template
|
||||||
|
|
||||||
from aore.aoutils.aoxmltableentry import AoXmlTableEntry
|
from aore.aoutils.aoxmltableentry import AoXmlTableEntry
|
||||||
from aore.config import db as dbparams
|
from aore.config import db as dbparams
|
||||||
@ -14,28 +15,13 @@ class DbHandler:
|
|||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.db = DBImpl(psycopg2, dbparams)
|
self.db = DBImpl(psycopg2, dbparams)
|
||||||
|
|
||||||
f = open("aore/templates/postgre/bulk_create.sql")
|
|
||||||
self.syntax_bulk_create = f.read()
|
|
||||||
f.close()
|
|
||||||
|
|
||||||
f = open("aore/templates/postgre/bulk_update.sql")
|
|
||||||
self.syntax_bulk_update = f.read()
|
|
||||||
f.close()
|
|
||||||
|
|
||||||
f = open("aore/templates/postgre/bulk_delete.sql")
|
|
||||||
self.syntax_bulk_delete = f.read()
|
|
||||||
f.close()
|
|
||||||
|
|
||||||
def bulk_csv(self, operation_type, table_name, processed_count, csv_file_name):
|
def bulk_csv(self, operation_type, table_name, processed_count, csv_file_name):
|
||||||
sql_query = None
|
sql_query = None
|
||||||
|
|
||||||
# simple add new reocrds
|
# simple add new reocrds
|
||||||
if operation_type == AoXmlTableEntry.OperationType.create:
|
if operation_type == AoXmlTableEntry.OperationType.create:
|
||||||
sql_query = self.syntax_bulk_create \
|
sql_query = template('aore/templates/postgre/bulk_create.sql', delim='\t', tablename=table_name,
|
||||||
.replace("%tab%", "\t") \
|
fieldslist=", ".join(db_shemas[table_name].fields), csvname=csv_file_name)
|
||||||
.replace("%tablename%", table_name) \
|
|
||||||
.replace("%fieldslist%", ", ".join(db_shemas[table_name].fields)) \
|
|
||||||
.replace("%csvname%", csv_file_name)
|
|
||||||
|
|
||||||
# update table
|
# update table
|
||||||
if operation_type == AoXmlTableEntry.OperationType.update:
|
if operation_type == AoXmlTableEntry.OperationType.update:
|
||||||
@ -45,28 +31,15 @@ class DbHandler:
|
|||||||
fields_update_list += "{}=EXCLUDED.{}, ".format(field, field)
|
fields_update_list += "{}=EXCLUDED.{}, ".format(field, field)
|
||||||
fields_update_list = fields_update_list[:-2]
|
fields_update_list = fields_update_list[:-2]
|
||||||
|
|
||||||
sql_query = self.syntax_bulk_update \
|
sql_query = template('aore/templates/postgre/bulk_update.sql', delim='\t', tablename=table_name,
|
||||||
.replace("%tab%", "\t") \
|
fieldslist=", ".join(db_shemas[table_name].fields), csvname=csv_file_name,
|
||||||
.replace("%tablename%", table_name) \
|
uniquekey=db_shemas[table_name].unique_field, updaterule=fields_update_list)
|
||||||
.replace("%fieldslist%", ", ".join(db_shemas[table_name].fields)) \
|
|
||||||
.replace("%csvname%", csv_file_name) \
|
|
||||||
.replace("%uniquekey%", db_shemas[table_name].unique_field) \
|
|
||||||
.replace("%updaterule%", fields_update_list)
|
|
||||||
|
|
||||||
if table_name == "ADDROBJ":
|
|
||||||
sql_query += "DELETE FROM \"%tablename%\" WHERE %filterrule%;" \
|
|
||||||
.replace("%tablename%", table_name) \
|
|
||||||
.replace("%filterrule%",
|
|
||||||
"ACTSTATUS = FALSE OR NEXTID IS NOT NULL")
|
|
||||||
|
|
||||||
# delete records from table
|
# delete records from table
|
||||||
if operation_type == AoXmlTableEntry.OperationType.delete:
|
if operation_type == AoXmlTableEntry.OperationType.delete:
|
||||||
sql_query = self.syntax_bulk_delete \
|
sql_query = template('aore/templates/postgre/bulk_delete.sql', delim='\t', tablename=table_name,
|
||||||
.replace("%tab%", "\t") \
|
fieldslist=", ".join(db_shemas[table_name].fields), csvname=csv_file_name,
|
||||||
.replace("%tablename%", table_name) \
|
uniquekey=db_shemas[table_name].unique_field)
|
||||||
.replace("%fieldslist%", ", ".join(db_shemas[table_name].fields)) \
|
|
||||||
.replace("%csvname%", csv_file_name) \
|
|
||||||
.replace("%uniquekey%", db_shemas[table_name].unique_field)
|
|
||||||
|
|
||||||
assert sql_query, "Invalid operation type: {}".format(operation_type)
|
assert sql_query, "Invalid operation type: {}".format(operation_type)
|
||||||
|
|
||||||
@ -74,12 +47,18 @@ class DbHandler:
|
|||||||
logging.info("Processed {} queries FROM {}".format(processed_count - 1, csv_file_name))
|
logging.info("Processed {} queries FROM {}".format(processed_count - 1, csv_file_name))
|
||||||
|
|
||||||
def pre_create(self):
|
def pre_create(self):
|
||||||
f = open("aore/templates/postgre/pre_create.sql")
|
logging.info("Prepare to create DB structure...")
|
||||||
sql_query = f.read()
|
sql_query = template("aore/templates/postgre/pre_create.sql")
|
||||||
f.close()
|
|
||||||
|
|
||||||
self.db.execute(sql_query)
|
self.db.execute(sql_query)
|
||||||
|
|
||||||
|
def post_create(self):
|
||||||
|
logging.info("Indexing ADDROBJ...")
|
||||||
|
sql_query = template("aore/templates/postgre/post_create.sql")
|
||||||
|
|
||||||
|
self.db.execute(sql_query)
|
||||||
|
logging.info("Indexing done.")
|
||||||
|
|
||||||
def pre_update(self):
|
def pre_update(self):
|
||||||
# TODO: update actions
|
# TODO: update actions
|
||||||
pass
|
pass
|
||||||
|
0
aore/miscutils/__init__.py
Normal file
0
aore/miscutils/__init__.py
Normal file
38
aore/miscutils/sphinx.py
Normal file
38
aore/miscutils/sphinx.py
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
|
||||||
|
from bottle import template
|
||||||
|
|
||||||
|
from aore.config import db as dbconfig, sphinx_index_name, sphinx_var_dir
|
||||||
|
|
||||||
|
|
||||||
|
def produce_sphinx_config(config_name):
|
||||||
|
logging.info("Creating {}".format(config_name))
|
||||||
|
conf_data = template('aore/templates/sphinx/data.conf', db_host=dbconfig['host'], db_user=dbconfig['user'],
|
||||||
|
db_password=dbconfig['password'],
|
||||||
|
db_name=dbconfig['database'], db_port=dbconfig['port'],
|
||||||
|
sql_query=template('aore/templates/postgre/sphinx_query.sql').replace("\n"," \\\n"), index_name=sphinx_index_name,
|
||||||
|
sphinx_var_path=sphinx_var_dir)
|
||||||
|
|
||||||
|
conf_data += "\n" + template('aore/templates/sphinx/sphinx.conf', sphinx_var_path=sphinx_var_dir)
|
||||||
|
|
||||||
|
if os.path.isfile(config_name):
|
||||||
|
choice = raw_input(
|
||||||
|
"WARNING! File {} already exists. It will be overwritten, "
|
||||||
|
"all settings all setting will be lost! Are you sure? [y/n]: ".format(
|
||||||
|
config_name))
|
||||||
|
if choice.lower() != 'y':
|
||||||
|
logging.warning("Aborted.")
|
||||||
|
return
|
||||||
|
|
||||||
|
conf_file = open(config_name, "w")
|
||||||
|
conf_file.write(conf_data)
|
||||||
|
conf_file.close()
|
||||||
|
|
||||||
|
logging.info("Success! Re-index db: \n"
|
||||||
|
"\t$indexer -c {} --all --rotate\n"
|
||||||
|
"and then re/start your Sphinx:\n"
|
||||||
|
"\t$/etc/init.d/sphinxsearch stop\n"
|
||||||
|
"\t$/etc/init.d/sphinxsearch start".format(config_name))
|
@ -1 +1 @@
|
|||||||
COPY "%tablename%" (%fieldslist%) FROM '%csvname%' DELIMITER '%tab%' NULL 'NULL'
|
COPY "{{tablename}}" ({{fieldslist}}) FROM '{{csvname}}' DELIMITER '{{delim}}' NULL 'NULL'
|
@ -1,5 +1,5 @@
|
|||||||
DROP TABLE IF EXISTS "%tablename%_TEMP";
|
DROP TABLE IF EXISTS "{{tablename}}_TEMP";
|
||||||
CREATE TEMP TABLE "%tablename%_TEMP" ON COMMIT DROP AS SELECT *
|
CREATE TEMP TABLE "{{tablename}}_TEMP" ON COMMIT DROP AS SELECT *
|
||||||
FROM "%tablename%" WITH NO DATA;
|
FROM "{{tablename}}" WITH NO DATA;
|
||||||
COPY "%tablename%_TEMP" (%fieldslist%) FROM '%csvname%' DELIMITER '%tab%' NULL 'NULL';
|
COPY "{{tablename}}_TEMP" ({{fieldslist}}) FROM '{{csvname}}' DELIMITER '{{delim}}' NULL 'NULL';
|
||||||
DELETE FROM "%tablename%" WHERE %uniquekey% IN (SELECT %uniquekey% FROM "%tablename%_TEMP");
|
DELETE FROM "{{tablename}}" WHERE {{uniquekey}} IN (SELECT {{uniquekey}} FROM "{{tablename}}_TEMP");
|
@ -1,7 +1,10 @@
|
|||||||
DROP TABLE IF EXISTS "%tablename%_TEMP";
|
DROP TABLE IF EXISTS "{{tablename}}_TEMP";
|
||||||
CREATE TEMP TABLE "%tablename%_TEMP" ON COMMIT DROP AS SELECT *
|
CREATE TEMP TABLE "{{tablename}}_TEMP" ON COMMIT DROP AS SELECT *
|
||||||
FROM "%tablename%" WITH NO DATA;
|
FROM "{{tablename}}" WITH NO DATA;
|
||||||
COPY "%tablename%_TEMP" (%fieldslist%) FROM '%csvname%' DELIMITER '%tab%' NULL 'NULL';
|
COPY "{{tablename}}_TEMP" ({{fieldslist}}) FROM '{{csvname}}' DELIMITER '{{delim}}' NULL 'NULL';
|
||||||
INSERT INTO "%tablename%" (%fieldslist%) SELECT %fieldslist%
|
INSERT INTO "{{tablename}}" ({{fieldslist}}) SELECT {{fieldslist}}
|
||||||
FROM
|
FROM
|
||||||
"%tablename%_TEMP" ON CONFLICT (%uniquekey%) DO UPDATE SET %updaterule%;
|
"{{tablename}}_TEMP" ON CONFLICT ({{uniquekey}}) DO UPDATE SET {{updaterule}};
|
||||||
|
% if tablename=="ADDROBJ":
|
||||||
|
DELETE FROM "{{tablename}}" WHERE ACTSTATUS = FALSE OR NEXTID IS NOT NULL;
|
||||||
|
% end
|
4
aore/templates/postgre/post_create.sql
Normal file
4
aore/templates/postgre/post_create.sql
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
CREATE INDEX "sphinx_ind_aolevel" ON "ADDROBJ" USING btree ("aolevel");
|
||||||
|
CREATE INDEX "sphinx_ind_parentguid" ON "ADDROBJ" USING btree ("parentguid");
|
||||||
|
CREATE INDEX "sphinx_ind_livestatus" ON "ADDROBJ" USING btree ("livestatus");
|
||||||
|
CREATE INDEX "sphinx_ind_aoguid" ON "ADDROBJ" USING btree ("aoguid");
|
@ -1,5 +1,5 @@
|
|||||||
DROP TABLE IF EXISTS "public"."ADDROBJ";
|
DROP TABLE IF EXISTS "ADDROBJ";
|
||||||
CREATE TABLE "public"."ADDROBJ" (
|
CREATE TABLE "ADDROBJ" (
|
||||||
"id" SERIAL4 NOT NULL,
|
"id" SERIAL4 NOT NULL,
|
||||||
"aoid" UUID NOT NULL,
|
"aoid" UUID NOT NULL,
|
||||||
"aoguid" UUID,
|
"aoguid" UUID,
|
||||||
@ -15,8 +15,8 @@ CREATE TABLE "public"."ADDROBJ" (
|
|||||||
)
|
)
|
||||||
WITH (OIDS =FALSE
|
WITH (OIDS =FALSE
|
||||||
);
|
);
|
||||||
DROP TABLE IF EXISTS "public"."SOCRBASE";
|
DROP TABLE IF EXISTS "SOCRBASE";
|
||||||
CREATE TABLE "public"."SOCRBASE" (
|
CREATE TABLE "SOCRBASE" (
|
||||||
"id" SERIAL4 NOT NULL,
|
"id" SERIAL4 NOT NULL,
|
||||||
"level" INT2,
|
"level" INT2,
|
||||||
"scname" VARCHAR(10),
|
"scname" VARCHAR(10),
|
||||||
|
14
aore/templates/postgre/sphinx_query.sql
Normal file
14
aore/templates/postgre/sphinx_query.sql
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
|
||||||
|
WITH RECURSIVE PATH (cnt, aoid, aoguid, aolevel, fullname) AS (
|
||||||
|
SELECT ao.id as cnt, ao.aoid, ao.aoguid, ao.aolevel,
|
||||||
|
ao.shortname || ' ' || ao.formalname AS fullname
|
||||||
|
FROM "ADDROBJ" AS ao
|
||||||
|
WHERE aolevel = 1 AND livestatus = TRUE
|
||||||
|
UNION
|
||||||
|
SELECT child.id as cnt, child.aoid, child.aoguid, child.aolevel,
|
||||||
|
PATH.fullname || ', ' || child.shortname || ' ' || child.formalname AS fullname
|
||||||
|
FROM "ADDROBJ" AS child
|
||||||
|
, PATH
|
||||||
|
WHERE child.parentguid = PATH.aoguid AND livestatus = TRUE
|
||||||
|
)
|
||||||
|
SELECT * FROM PATH WHERE AOLEVEL NOT IN (1,3)
|
41
aore/templates/sphinx/data.conf
Normal file
41
aore/templates/sphinx/data.conf
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
source src_{{index_name}}
|
||||||
|
{
|
||||||
|
type = pgsql
|
||||||
|
sql_host = {{db_host}}
|
||||||
|
sql_user = {{db_user}}
|
||||||
|
sql_pass = {{db_password}}
|
||||||
|
sql_db = {{db_name}}
|
||||||
|
sql_port = {{db_port}}
|
||||||
|
|
||||||
|
sql_query = {{!sql_query}}
|
||||||
|
|
||||||
|
sql_field_string = fullname
|
||||||
|
sql_attr_string = aoid
|
||||||
|
sql_attr_string = aoguid
|
||||||
|
sql_attr_uint = aolevel
|
||||||
|
}
|
||||||
|
|
||||||
|
index index_{{ index_name }}
|
||||||
|
{
|
||||||
|
docinfo = extern
|
||||||
|
morphology = stem_ru
|
||||||
|
min_stemming_len = 2
|
||||||
|
|
||||||
|
stopwords =
|
||||||
|
min_word_len = 2
|
||||||
|
charset_type = utf-8
|
||||||
|
min_prefix_len = 1
|
||||||
|
min_infix_len = 0
|
||||||
|
enable_star = 1
|
||||||
|
|
||||||
|
# strip html by default
|
||||||
|
html_strip = 1
|
||||||
|
|
||||||
|
ignore_chars = @, -
|
||||||
|
charset_table = 0..9, A..Z->a..z, _, a..z, \
|
||||||
|
U+0401->U+0435, U+0451->U+0435, \
|
||||||
|
U+410..U+42F->U+430..U+44F, U+430..U+44F
|
||||||
|
|
||||||
|
source = src_{{index_name}}
|
||||||
|
path = {{sphinx_var_path}}/data/index_{{index_name}}
|
||||||
|
}
|
63
aore/templates/sphinx/sphinx.conf
Normal file
63
aore/templates/sphinx/sphinx.conf
Normal file
@ -0,0 +1,63 @@
|
|||||||
|
indexer
|
||||||
|
{
|
||||||
|
# memory limit, in bytes, kiloytes (16384K) or megabytes (256M)
|
||||||
|
# optional, default is 32M, max is 2047M, recommended is 256M to 1024M
|
||||||
|
mem_limit = 256M
|
||||||
|
|
||||||
|
# maximum IO calls per second (for I/O throttling)
|
||||||
|
# optional, default is 0 (unlimited)
|
||||||
|
#
|
||||||
|
# max_iops = 40
|
||||||
|
|
||||||
|
# maximum IO call size, bytes (for I/O throttling)
|
||||||
|
# optional, default is 0 (unlimited)
|
||||||
|
#
|
||||||
|
max_iosize = 524288
|
||||||
|
}
|
||||||
|
|
||||||
|
searchd
|
||||||
|
{
|
||||||
|
|
||||||
|
listen = 127.0.0.1:9312
|
||||||
|
|
||||||
|
# required by RT-indexes
|
||||||
|
workers = threads
|
||||||
|
|
||||||
|
# log file, searchd run info is logged here
|
||||||
|
# optional, default is 'searchd.log'
|
||||||
|
log = {{sphinx_var_path}}/log/searchd.log
|
||||||
|
|
||||||
|
# query log file, all search queries are logged here
|
||||||
|
# optional, default is empty (do not log queries)
|
||||||
|
query_log = {{sphinx_var_path}}/log/query.log
|
||||||
|
|
||||||
|
# client read timeout, seconds
|
||||||
|
# optional, default is 5
|
||||||
|
read_timeout = 5
|
||||||
|
|
||||||
|
# maximum amount of children to fork (concurrent searches to run)
|
||||||
|
# optional, default is 0 (unlimited)
|
||||||
|
max_children = 30
|
||||||
|
|
||||||
|
# PID file, searchd process ID file name
|
||||||
|
# mandatory
|
||||||
|
pid_file = {{sphinx_var_path}}/run/searchd.pid
|
||||||
|
|
||||||
|
# max amount of matches the daemon ever keeps in RAM, per-index
|
||||||
|
# WARNING, THERE'S ALSO PER-QUERY LIMIT, SEE SetLimits() API CALL
|
||||||
|
# default is 1000 (just like Google)
|
||||||
|
max_matches = 1000
|
||||||
|
|
||||||
|
# seamless rotate, prevents rotate stalls if precaching huge datasets
|
||||||
|
# optional, default is 1
|
||||||
|
seamless_rotate = 1
|
||||||
|
|
||||||
|
# whether to forcibly preopen all indexes on startup
|
||||||
|
# optional, default is 0 (do not preopen)
|
||||||
|
preopen_indexes = 0
|
||||||
|
|
||||||
|
# whether to unlink .old index copies on succesful rotation.
|
||||||
|
# optional, default is 1 (do unlink)
|
||||||
|
unlink_old = 1
|
||||||
|
}
|
||||||
|
|
@ -3,6 +3,7 @@
|
|||||||
import optparse
|
import optparse
|
||||||
|
|
||||||
from aore.aoutils.aoupdater import AoUpdater
|
from aore.aoutils.aoupdater import AoUpdater
|
||||||
|
from aore.miscutils.sphinx import produce_sphinx_config
|
||||||
|
|
||||||
|
|
||||||
def update_base(xml_source, updates_count):
|
def update_base(xml_source, updates_count):
|
||||||
@ -24,6 +25,8 @@ def main():
|
|||||||
help="Count of updates to process, only for '--database update' option")
|
help="Count of updates to process, only for '--database update' option")
|
||||||
p.add_option('--source', '-s', default="http",
|
p.add_option('--source', '-s', default="http",
|
||||||
help="Create/update DB from source. Value: \"http\" or absolute path to folder")
|
help="Create/update DB from source. Value: \"http\" or absolute path to folder")
|
||||||
|
p.add_option('--sphinx-configure', '-c', action="store", type="string",
|
||||||
|
help="Get Sphinx config. Value: /path/to/sphinx.conf")
|
||||||
options, arguments = p.parse_args()
|
options, arguments = p.parse_args()
|
||||||
|
|
||||||
if options.database:
|
if options.database:
|
||||||
@ -34,6 +37,9 @@ def main():
|
|||||||
if options.database == "update":
|
if options.database == "update":
|
||||||
update_base(options.source, int(options.update_count))
|
update_base(options.source, int(options.update_count))
|
||||||
|
|
||||||
|
if options.sphinx_configure:
|
||||||
|
produce_sphinx_config(options.sphinx_configure)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
main()
|
main()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user