Начало работы с парсером входных данных (строки адресов)

2016-01-15 17:06:14 +03:00
parent f91fb27150
commit 327a1c994e
2 changed files with 37 additions and 15 deletions
--- a/aore/fias/search.py
+++ b/aore/fias/search.py
@@ -1,7 +1,6 @@
 # -*- coding: utf-8 -*-
-import json
+import re
 import logging
 import Levenshtein
 import psycopg2
@@ -38,8 +37,9 @@ class SphinxSearch:
            SRANK_EXACTLY_TYPING=['01', '11'],  # Точно - слово недопечатано, не надо подсказок, только word*
            SRANK_PROBABLY_TYPING=['0*'],  # Возможно - слово недопечатано, немного подсказок и word*
            SRANK_PROBABLY_FOUND=['10'],  # Возможно - слово введено точно, немного подсказок, без word*
-            SRANK_PROBABLY_COMPLEX=['1*']
+            SRANK_PROBABLY_COMPLEX=['1*'],
            # Возможно, слово сложное, есть и точное совпадние, по маске Нужно немного подсказок и word*
            SRANK_PROBABLY_SOCR=['1!']  # Возможно - сокращение, не трогаем вообще
        )
        def __init__(self, rtype):
@@ -47,8 +47,11 @@ class SphinxSearch:
            for x, y in self.names.iteritems():
                self.__dict__[x] = self.rtype in y
        def __str__(self):
            return ", ".join([x for x in self.names if self.__dict__[x]])
    def __get_strong_and_uncomplete_ranks(self, word):
-        word_len = str(len(word) / 2)
+        word_len = len(word)
        sql_qry = "SELECT COUNT(*) FROM \"AOTRIG\" WHERE word LIKE '{}%' AND LENGTH(word) > {} " \
                  "UNION ALL SELECT COUNT(*) FROM \"AOTRIG\" WHERE word='{}'".format(
            word, word_len, word)
@@ -56,14 +59,18 @@ class SphinxSearch:
        result = self.db.get_rows(sql_qry)
        strong_rank = result[1][0]
        uncomplete_rank = result[0][0]
-        if uncomplete_rank > 1:
+
-            uncomplete_rank = '*'
+        if uncomplete_rank > 1000 and word_len < 4:
            uncomplete_rank = '!'
        else:
            if uncomplete_rank > 1:
                uncomplete_rank = '*'
        return self.SRankType(str(strong_rank) + str(uncomplete_rank))
-    def get_suggest(self, word):
+    def __get_suggest(self, word):
        word_len = str(len(word) / 2)
-        trigrammed_word = '"{}"/2'.format(trigram(word))
+        trigrammed_word = '"{}"/1'.format(trigram(word))
        self.__configure("idx_fias_sugg", word_len)
        result = self.client.Query(trigrammed_word, 'idx_fias_sugg')
@@ -84,9 +91,17 @@ class SphinxSearch:
            print x[0], x[1]
        return outlist
    def __split_phrase(self, phrase):
        phrase = unicode(phrase).replace('-', '').replace('@', '').lower()
        return re.split(r"[ ,:.]+", phrase)
    def __process_word(self, word):
        print word, self.__get_strong_and_uncomplete_ranks(word)
    def find(self, text):
-        # TODO: ADD index
+        words = self.__split_phrase(text)
-        logging.info("12")
+        for word in words:
-        result = self.client.Query(text)
+            self.__process_word(word)
-        print json.dumps(result)
+            # result = self.client.Query(text)
-        logging.info("12")
+            # print json.dumps(result)
            # logging.info("12")
--- a/manage.py
+++ b/manage.py
@@ -32,9 +32,12 @@ def main():
                 help="Path to sphinx indexer binary. Must be specified for '--sphinx-configure'")
    p.add_option('--output-conf', '-o',
                 help="Output config filename. Must be specified for '--sphinx-configure'")
    p.add_option('--test', '-t', action="store_true", dest="test",
                 help="Test")
    options, arguments = p.parse_args()
    # Manage DB
    if options.database:
        # create new database
        if options.database == "create":
@@ -43,11 +46,15 @@ def main():
        if options.database == "update":
            update_base(options.source, int(options.update_count))
    # Manage Sphinx
    if options.sphinx and options.indexer_path and options.output_conf:
        sphinxh = SphinxHelper()
        sphinxh.configure_indexer(options.indexer_path, options.output_conf)
    # 4 Debug purposes..
    if options.test:
        sph = SphinxSearch()
        sph.find('город Гавно д. пидарская, ул Кощеева')
 if __name__ == '__main__':
    #sph = SphinxSearch()
    #sph.get_suggest('апасьево')
    main()