Попытка сделать нормальный поиск

This commit is contained in:
Jack Stdin 2016-01-14 16:51:34 +03:00
parent 759efc43ee
commit 8156fa3d8d
7 changed files with 94 additions and 20 deletions

View File

@ -1,2 +1,11 @@
# py-fias
Simple fias app written in Python 2.7
WSGI application they can serve FIAS (Russian Address Object DB)
Простое приложение для работы с БД ФИАС, написано для Python 2.7
## Установка
Протестирована работа на следующих ОС: Windows (8.1) и Debian Jessie
Предполагается, что у Вас уже установлена БД PostgreSql версии 9.5, интерпретатор Python 2.7
1. Windows
1. Установим *уйню...

View File

@ -37,7 +37,8 @@ SPHINX_VAR_DIRS = dict(
# config_type = "test"
# Main section
sphinx_index_name="sph_addrobj"
sphinx_index_addjobj="idx_fias_addrobj"
sphinx_index_sugg="idx_fias_sugg"
sphinx_var_dir=SPHINX_VAR_DIRS[config_type]
db = DB_INSTANCES[config_type]
unrar = UNRAR_PATHES[config_type]

0
aore/fias/__init__.py Normal file
View File

20
aore/fias/search.py Normal file
View File

@ -0,0 +1,20 @@
# -*- coding: utf-8 -*-
import sphinxapi
import logging
import json
class SphinxSearch:
def __init__(self):
self.client = sphinxapi.SphinxClient()
self.client.SetServer("localhost", 9312)
self.client.SetLimits(0, 10)
def find(self, text):
# TODO: ADD index
logging.info("12")
result = self.client.Query(text)
print json.dumps(result)
logging.info("12")

View File

@ -5,18 +5,59 @@ import os
from bottle import template
from aore.config import db as dbconfig, sphinx_index_name, sphinx_var_dir
from aore.config import db as dbconfig, sphinx_index_addjobj, sphinx_var_dir, trashfolder
def produce_sphinx_config(config_name):
logging.info("Creating {}".format(config_name))
conf_data = template('aore/templates/sphinx/data.conf', db_host=dbconfig['host'], db_user=dbconfig['user'],
def configure_sphinx(indexer_binary):
logging.info("Start configuring Sphinx...")
# Create ADDROBJ config
addrobj_cfg_name = get_addrobj_config()
# Indexing it...
run_index_cmd = "{} -c {} --all".format(indexer_binary, addrobj_cfg_name)
logging.info("Run indexer (indexing ADDROBJ)...")
os.system(run_index_cmd)
logging.info("{} index was created.".format(sphinx_index_addjobj))
# Produce dict file
sugg_dict_name = get_suggestion_dict(indexer_binary, addrobj_cfg_name)
def get_suggestion_dict(indexer_binary, addrobj_cfg_name):
logging.info("Make suggestion dict...")
dict_file_name = os.path.abspath(trashfolder + "suggdict.txt")
run_builddict_cmd = "{} {} -c {} --buildstops {} 200000 --buildfreqs".format(indexer_binary, sphinx_index_addjobj,
addrobj_cfg_name, dict_file_name)
os.system(run_builddict_cmd)
logging.info("Done.")
return dict_file_name
def get_addrobj_config():
config_fname = os.path.abspath(trashfolder + "addrobj.conf")
logging.info("Creating config {}".format(config_fname))
conf_data = template('aore/templates/sphinx/idx_addrobj.conf', db_host=dbconfig['host'], db_user=dbconfig['user'],
db_password=dbconfig['password'],
db_name=dbconfig['database'], db_port=dbconfig['port'],
sql_query=template('aore/templates/postgre/sphinx_query.sql').replace("\n"," \\\n"), index_name=sphinx_index_name,
sql_query=template('aore/templates/postgre/sphinx_query.sql').replace("\n", " \\\n"),
index_name=sphinx_index_addjobj,
sphinx_var_path=sphinx_var_dir)
conf_data += "\n" + template('aore/templates/sphinx/sphinx.conf', sphinx_var_path=sphinx_var_dir)
f = open(config_fname, "w")
f.write(conf_data)
f.close()
logging.info("Done.")
return config_fname
# TRASH
def produce_sphinx_config(config_name):
conf_data = template('aore/templates/sphinx/sphinx.conf', sphinx_var_path=sphinx_var_dir)
if os.path.isfile(config_name):
choice = raw_input(

View File

@ -1,4 +1,4 @@
source src_{{index_name}}
source {{index_name}}
{
type = pgsql
sql_host = {{db_host}}
@ -15,14 +15,14 @@ source src_{{index_name}}
sql_attr_uint = aolevel
}
index index_{{ index_name }}
index {{ index_name }}
{
docinfo = extern
morphology = stem_ru
min_stemming_len = 2
min_stemming_len = 3
stopwords =
min_word_len = 2
min_word_len = 1
charset_type = utf-8
min_prefix_len = 1
min_infix_len = 0
@ -36,6 +36,6 @@ index index_{{ index_name }}
U+0401->U+0435, U+0451->U+0435, \
U+410..U+42F->U+430..U+44F, U+430..U+44F
source = src_{{index_name}}
path = {{sphinx_var_path}}/data/index_{{index_name}}
source = {{index_name}}
path = {{sphinx_var_path}}/data/{{index_name}}
}

View File

@ -3,7 +3,8 @@
import optparse
from aore.aoutils.aoupdater import AoUpdater
from aore.miscutils.sphinx import produce_sphinx_config
from aore.miscutils.sphinx import configure_sphinx
from aore.fias.search import SphinxSearch
def update_base(xml_source, updates_count):
@ -25,8 +26,11 @@ def main():
help="Count of updates to process, only for '--database update' option")
p.add_option('--source', '-s', default="http",
help="Create/update DB from source. Value: \"http\" or absolute path to folder")
p.add_option('--sphinx-configure', '-c', action="store", type="string",
help="Get Sphinx config. Value: /path/to/sphinx.conf")
p.add_option('--sphinx-configure', '-c', action="store_true", dest="sphinx", default="False",
help="Configure sphinx. Creates sphinx.conf in working direcory")
p.add_option('--indexer-path', '-i',
help="Path to sphinx indexer binary. Must be specified for '--sphinx-configure'")
options, arguments = p.parse_args()
if options.database:
@ -37,9 +41,8 @@ def main():
if options.database == "update":
update_base(options.source, int(options.update_count))
if options.sphinx_configure:
produce_sphinx_config(options.sphinx_configure)
if options.sphinx and options.indexer_path:
configure_sphinx(options.indexer_path)
if __name__ == '__main__':
main()