From ce934b9f1bc597ba53c0b82b67cd3bcb3d14a5ac Mon Sep 17 00:00:00 2001
From: Jack Stdin <hellotan@live.ru>
Date: Thu, 18 Feb 2016 13:18:38 +0300
Subject: [PATCH] =?UTF-8?q?=D0=94=D0=BE=D0=B1=D0=B0=D0=B2=D0=BB=D0=B5?=
 =?UTF-8?q?=D0=BD=D0=B0=20=D0=BF=D0=BE=D1=81=D1=82-=D0=BE=D0=B1=D1=80?=
 =?UTF-8?q?=D0=B0=D0=B1=D0=BE=D1=82=D0=BA=D0=B0=20=D1=87=D0=B0=D1=81=D1=82?=
 =?UTF-8?q?=D0=BE=20=D0=B2=D1=81=D1=82=D1=80=D0=B5=D1=87=D0=B0=D1=8E=D1=89?=
 =?UTF-8?q?=D0=B8=D1=85=D1=81=D1=8F=20=D1=81=D0=BB=D0=BE=D0=B2?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 aore/fias/search.py    | 32 +++++++++++++++++++++++++++-----
 aore/fias/wordentry.py |  7 +++++++
 2 files changed, 34 insertions(+), 5 deletions(-)

diff --git a/aore/fias/search.py b/aore/fias/search.py
index 8b80d39..b771b39 100644
--- a/aore/fias/search.py
+++ b/aore/fias/search.py
@@ -1,13 +1,14 @@
 # -*- coding: utf-8 -*-
-import logging
 import re
 
 import Levenshtein
 import sphinxapi
+import time
 
 from aore.config import sphinx_conf
 from aore.fias.wordentry import WordEntry
 from aore.miscutils.trigram import trigram
+from aore.config import basic
 
 
 class SphinxSearch:
@@ -24,6 +25,8 @@ class SphinxSearch:
     regression_coef = 0.08
     max_result = 10
 
+    exclude_freq_words = True
+
     def __init__(self, db):
         self.db = db
         self.client_sugg = sphinxapi.SphinxClient()
@@ -96,15 +99,20 @@ class SphinxSearch:
         if word_entry.MT_ADD_SOCR:
             word_entry.add_variation_socr()
 
+    # Получает список объектов (слово), пропуская часто используемые слова
     def __get_word_entries(self, words, strong):
         we_list = []
         for word in words:
             if word != '':
                 we = WordEntry(self.db, word)
-                self.__add_word_variations(we, strong)
+                if self.exclude_freq_words and we.is_freq_word:
+                    pass
+                else:
+                    self.__add_word_variations(we, strong)
+
+                    assert we.get_variations() != "", "Cannot process sentence."
+                    we_list.append(we)
 
-                assert we.get_variations() != "", "Cannot process sentence."
-                we_list.append(we)
         return we_list
 
     def find(self, text, strong):
@@ -112,15 +120,29 @@ class SphinxSearch:
             phrase = unicode(phrase).replace('-', '').replace('@', '').lower()
             return re.split(r"[ ,:.#$]+", phrase)
 
+        # сплитим текст на слова
         words = split_phrase(text)
+
+        # получаем список объектов
         word_entries = self.__get_word_entries(words, strong)
         word_count = len(word_entries)
+
+        # проверяем, есть ли вообще что-либо в списке объектов слов (или же все убрали как частое)
+        assert word_count > 0, "No legal words is specified"
+
+        # формируем строки для поиска в Сфинксе
         for x in range(word_count, max(0, word_count - 3), -1):
             self.client_show.AddQuery("\"{}\"/{}".format(" ".join(x.get_variations() for x in word_entries), x),
                                       sphinx_conf.index_addjobj)
 
         self.__configure(sphinx_conf.index_addjobj)
+
+        start_t = time.time()
         rs = self.client_show.RunQueries()
+        elapsed_t = time.time() - start_t
+
+        if basic.logging:
+            print(elapsed_t)
 
         results = []
         parsed_ids = []
@@ -132,7 +154,7 @@ class SphinxSearch:
                 if not ma['attrs']['aoid'] in parsed_ids:
                     parsed_ids.append(ma['attrs']['aoid'])
                     results.append(
-                        dict(aoid=ma['attrs']['aoid'], text=ma['attrs']['fullname'], ratio=ma['weight'], cort=i))
+                        dict(aoid=ma['attrs']['aoid'], text=unicode(ma['attrs']['fullname']), ratio=ma['weight'], cort=i))
 
         results.sort(key=lambda x: Levenshtein.ratio(text, x['text']), reverse=True)
 
diff --git a/aore/fias/wordentry.py b/aore/fias/wordentry.py
index 03b0b63..2e34047 100644
--- a/aore/fias/wordentry.py
+++ b/aore/fias/wordentry.py
@@ -112,3 +112,10 @@ class WordEntry:
 
     def get_type(self):
         return ", ".join([x for x in self.match_types if self.__dict__[x]])
+
+    def __unicode__(self):
+        return self.word
+
+    def __str__(self):
+        return str(self.word)
+