Немного улучшен поиск (через несколько query)
This commit is contained in:
parent
f2d71efe2e
commit
16a144205b
@ -22,22 +22,24 @@ class SphinxSearch:
|
|||||||
self.rating_limit_hard_count = 3
|
self.rating_limit_hard_count = 3
|
||||||
|
|
||||||
self.default_rating_delta = 2
|
self.default_rating_delta = 2
|
||||||
self.regression_coef = 0.04
|
self.regression_coef = 0.08
|
||||||
|
|
||||||
|
self.max_result = 10
|
||||||
|
|
||||||
self.db = db
|
self.db = db
|
||||||
self.client_sugg = sphinxapi.SphinxClient()
|
self.client_sugg = sphinxapi.SphinxClient()
|
||||||
self.client_sugg.SetServer(sphinx_conf.host_name, sphinx_conf.port)
|
self.client_sugg.SetServer(sphinx_conf.host_name, sphinx_conf.port)
|
||||||
self.client_sugg.SetLimits(0, 10)
|
self.client_sugg.SetLimits(0, self.max_result)
|
||||||
self.client_sugg.SetConnectTimeout(3.0)
|
self.client_sugg.SetConnectTimeout(3.0)
|
||||||
|
|
||||||
self.client_show = sphinxapi.SphinxClient()
|
self.client_show = sphinxapi.SphinxClient()
|
||||||
self.client_show.SetServer(sphinx_conf.host_name, sphinx_conf.port)
|
self.client_show.SetServer(sphinx_conf.host_name, sphinx_conf.port)
|
||||||
self.client_show.SetLimits(0, 10)
|
self.client_show.SetLimits(0, self.max_result)
|
||||||
self.client_show.SetConnectTimeout(3.0)
|
self.client_show.SetConnectTimeout(3.0)
|
||||||
|
|
||||||
def __configure(self, index_name, wlen=None):
|
def __configure(self, index_name, wlen=None):
|
||||||
if index_name == sphinx_conf.index_sugg and wlen:
|
if index_name == sphinx_conf.index_sugg and wlen:
|
||||||
self.client_sugg.SetRankingMode(sphinxapi.SPH_RANK_BM25)
|
self.client_sugg.SetRankingMode(sphinxapi.SPH_RANK_WORDCOUNT)
|
||||||
self.client_sugg.SetFilterRange("len", int(wlen) - self.delta_len, int(wlen) + self.delta_len)
|
self.client_sugg.SetFilterRange("len", int(wlen) - self.delta_len, int(wlen) + self.delta_len)
|
||||||
self.client_sugg.SetSelect("word, len, @weight+{}-abs(len-{}) AS krank".format(self.delta_len, wlen))
|
self.client_sugg.SetSelect("word, len, @weight+{}-abs(len-{}) AS krank".format(self.delta_len, wlen))
|
||||||
self.client_sugg.SetSortMode(sphinxapi.SPH_SORT_EXTENDED, "krank DESC")
|
self.client_sugg.SetSortMode(sphinxapi.SPH_SORT_EXTENDED, "krank DESC")
|
||||||
@ -98,30 +100,40 @@ class SphinxSearch:
|
|||||||
word_entry.add_variation_socr()
|
word_entry.add_variation_socr()
|
||||||
|
|
||||||
def __get_word_entries(self, words, strong):
|
def __get_word_entries(self, words, strong):
|
||||||
|
we_list = []
|
||||||
for word in words:
|
for word in words:
|
||||||
if not strong and len(word) < self.word_length_soft:
|
|
||||||
continue
|
|
||||||
if word != '':
|
if word != '':
|
||||||
we = WordEntry(self.db, word)
|
we = WordEntry(self.db, word)
|
||||||
self.__add_word_variations(we, strong)
|
self.__add_word_variations(we, strong)
|
||||||
|
|
||||||
assert we.get_variations() != "()", "Cannot process sentence."
|
assert we.get_variations() != "", "Cannot process sentence."
|
||||||
yield we
|
we_list.append(we)
|
||||||
|
return we_list
|
||||||
|
|
||||||
def find(self, text, strong):
|
def find(self, text, strong):
|
||||||
logging.info("FIND ")
|
|
||||||
words = self.__split_phrase(text)
|
words = self.__split_phrase(text)
|
||||||
word_entries = self.__get_word_entries(words, strong)
|
word_entries = self.__get_word_entries(words, strong)
|
||||||
sentence = "{}".format(" MAYBE ".join(x.get_variations() for x in word_entries))
|
word_count = len(word_entries)
|
||||||
|
for x in range(word_count, max(0, word_count - 3), -1):
|
||||||
|
self.client_show.AddQuery("\"{}\"/{}".format(" ".join(x.get_variations() for x in word_entries), x),
|
||||||
|
sphinx_conf.index_addjobj)
|
||||||
|
|
||||||
self.__configure(sphinx_conf.index_addjobj)
|
self.__configure(sphinx_conf.index_addjobj)
|
||||||
logging.info("QUERY " + sentence)
|
logging.info("QUERY ")
|
||||||
rs = self.client_show.Query(sentence, sphinx_conf.index_addjobj)
|
rs = self.client_show.RunQueries()
|
||||||
logging.info("OK")
|
logging.info("OK")
|
||||||
|
|
||||||
results = []
|
results = []
|
||||||
for ma in rs['matches']:
|
parsed_ids = []
|
||||||
results.append(dict(aoid=ma['attrs']['aoid'], text=ma['attrs']['fullname'], ratio=ma['weight']))
|
|
||||||
|
for i in range(0, len(rs)):
|
||||||
|
for ma in rs[i]['matches']:
|
||||||
|
if len(results) >= self.max_result:
|
||||||
|
break
|
||||||
|
if not ma['attrs']['aoid'] in parsed_ids:
|
||||||
|
parsed_ids.append(ma['attrs']['aoid'])
|
||||||
|
results.append(
|
||||||
|
dict(aoid=ma['attrs']['aoid'], text=ma['attrs']['fullname'], ratio=ma['weight'], cort=i))
|
||||||
|
|
||||||
if strong:
|
if strong:
|
||||||
results.sort(key=lambda x: Levenshtein.ratio(text, x['text']), reverse=True)
|
results.sort(key=lambda x: Levenshtein.ratio(text, x['text']), reverse=True)
|
||||||
|
@ -71,14 +71,16 @@ class WordEntry:
|
|||||||
self.variations.append(variation_string)
|
self.variations.append(variation_string)
|
||||||
|
|
||||||
def get_variations(self):
|
def get_variations(self):
|
||||||
return "({})".format(" | ".join(self.variations))
|
if len(self.variations) == 1:
|
||||||
|
return self.variations[0]
|
||||||
|
return "{}".format(" ".join(self.variations))
|
||||||
|
|
||||||
def __get_ranks(self):
|
def __get_ranks(self):
|
||||||
sql_qry = "SELECT COUNT(*), NULL FROM \"AOTRIG\" WHERE word LIKE '{}%' AND LENGTH(word) > {} " \
|
sql_qry = "SELECT COUNT(*), NULL FROM \"AOTRIG\" WHERE word LIKE '{}%' AND LENGTH(word) > {} " \
|
||||||
"UNION ALL SELECT COUNT(*), NULL FROM \"AOTRIG\" WHERE word='{}' " \
|
"UNION ALL SELECT COUNT(*), NULL FROM \"AOTRIG\" WHERE word='{}' " \
|
||||||
"UNION ALL SELECT COUNT(*), MAX(scname) FROM \"SOCRBASE\" WHERE socrname ILIKE '{}'" \
|
"UNION ALL SELECT COUNT(*), MAX(scname) FROM \"SOCRBASE\" WHERE socrname ILIKE '{}'" \
|
||||||
"UNION ALL SELECT COUNT(*), NULL FROM \"SOCRBASE\" WHERE scname ILIKE '{}';".format(
|
"UNION ALL SELECT COUNT(*), NULL FROM \"SOCRBASE\" WHERE scname ILIKE '{}';".format(
|
||||||
self.word, self.word_len, self.word, self.word, self.word)
|
self.word, self.word_len, self.word, self.word, self.word)
|
||||||
|
|
||||||
result = self.db.get_rows(sql_qry)
|
result = self.db.get_rows(sql_qry)
|
||||||
|
|
||||||
|
@ -26,13 +26,10 @@ def normalize(aoid):
|
|||||||
@app.route('/find/<text>')
|
@app.route('/find/<text>')
|
||||||
@app.route('/find/<text>/<strong>')
|
@app.route('/find/<text>/<strong>')
|
||||||
def find(text, strong=False):
|
def find(text, strong=False):
|
||||||
logging.warning("START")
|
|
||||||
strong = (strong == "strong")
|
strong = (strong == "strong")
|
||||||
response.content_type = 'application/json'
|
response.content_type = 'application/json'
|
||||||
|
|
||||||
res = json.dumps(fias_factory.find(text, strong))
|
return json.dumps(fias_factory.find(text, strong))
|
||||||
logging.warning("END")
|
|
||||||
return res
|
|
||||||
|
|
||||||
|
|
||||||
@app.error(404)
|
@app.error(404)
|
||||||
|
@ -5,4 +5,4 @@ from aore import phias
|
|||||||
application = phias.app
|
application = phias.app
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
application.run(host='localhost', port=55001, debug=True)
|
application.run(host='0.0.0.0', port=55001, debug=True)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user