Added support for text search and text_score.

This commit is contained in:
Wilson Júnior 2014-07-07 20:24:37 -03:00
parent c6e846e0ae
commit f7ebf8dedd
5 changed files with 449 additions and 197 deletions

View File

@ -12,3 +12,4 @@ User Guide
querying
gridfs
signals
text-indexes

View File

@ -0,0 +1,47 @@
===========
Text Search
===========
After MongoDB 2.4 version, supports search documents by text indexes.
Defining a Document with text index
===================================
Use the *$* prefix to set a text index, Look the declaration::
class News(Document):
title = StringField()
content = StringField()
is_active = BooleanField()
meta = {'indexes': [
{'fields': ['$title', "$content"],
'default_language': 'english',
'weight': {'title': 10, 'content': 2}
}
]}
Querying
========
Saving a document::
News(title="Using mongodb text search",
content="Testing text search").save()
News(title="MongoEngine 0.9 released",
content="Various improvements").save()
Next, start a text search using :attr:`QuerySet.search_text` method::
document = News.objects.search_text('testing').first()
document.title # may be: "Using mongodb text search"
document = News.objects.search_text('released').first()
document.title # may be: "MongoEngine 0.9 released"

View File

@ -41,6 +41,7 @@ class InvalidCollectionError(Exception):
class EmbeddedDocument(BaseDocument):
"""A :class:`~mongoengine.Document` that isn't stored in its own
collection. :class:`~mongoengine.EmbeddedDocument`\ s should be used as
fields on :class:`~mongoengine.Document`\ s through the
@ -59,7 +60,7 @@ class EmbeddedDocument(BaseDocument):
# The __metaclass__ attribute is removed by 2to3 when running with Python3
# my_metaclass is defined so that metaclass can be queried in Python 2 & 3
my_metaclass = DocumentMetaclass
my_metaclass = DocumentMetaclass
__metaclass__ = DocumentMetaclass
def __init__(self, *args, **kwargs):
@ -77,6 +78,7 @@ class EmbeddedDocument(BaseDocument):
class Document(BaseDocument):
"""The base class used for defining the structure and properties of
collections of documents stored in MongoDB. Inherit from this class, and
add fields as class attributes to define a document's structure.
@ -124,14 +126,15 @@ class Document(BaseDocument):
# The __metaclass__ attribute is removed by 2to3 when running with Python3
# my_metaclass is defined so that metaclass can be queried in Python 2 & 3
my_metaclass = TopLevelDocumentMetaclass
my_metaclass = TopLevelDocumentMetaclass
__metaclass__ = TopLevelDocumentMetaclass
__slots__ = ('__objects' )
__slots__ = ('__objects')
def pk():
"""Primary key alias
"""
def fget(self):
return getattr(self, self._meta['id_field'])
@ -140,6 +143,13 @@ class Document(BaseDocument):
return property(fget, fset)
pk = pk()
@property
def text_score(self):
"""
Used for text searchs
"""
return self._data.get('text_score')
@classmethod
def _get_db(cls):
"""Some Model using other db_alias"""
@ -165,7 +175,7 @@ class Document(BaseDocument):
if options.get('max') != max_documents or \
options.get('size') != max_size:
msg = (('Cannot create collection "%s" as a capped '
'collection as it already exists')
'collection as it already exists')
% cls._collection)
raise InvalidCollectionError(msg)
else:
@ -282,9 +292,9 @@ class Document(BaseDocument):
upsert=upsert, **write_concern)
created = is_new_object(last_error)
if cascade is None:
cascade = self._meta.get('cascade', False) or cascade_kwargs is not None
cascade = self._meta.get(
'cascade', False) or cascade_kwargs is not None
if cascade:
kwargs = {
@ -377,7 +387,8 @@ class Document(BaseDocument):
del(query["_cls"])
return self._qs.filter(**query).update_one(**kwargs)
else:
raise OperationError('attempt to update a document not yet saved')
raise OperationError(
'attempt to update a document not yet saved')
# Need to add shard key to query, or you get an error
return self._qs.filter(**self._object_key).update_one(**kwargs)
@ -396,7 +407,8 @@ class Document(BaseDocument):
signals.pre_delete.send(self.__class__, document=self)
try:
self._qs.filter(**self._object_key).delete(write_concern=write_concern, _from_doc_delete=True)
self._qs.filter(
**self._object_key).delete(write_concern=write_concern, _from_doc_delete=True)
except pymongo.errors.OperationFailure, err:
message = u'Could not delete document (%s)' % err.message
raise OperationError(message)
@ -483,8 +495,8 @@ class Document(BaseDocument):
if not self.pk:
raise self.DoesNotExist("Document does not exist")
obj = self._qs.read_preference(ReadPreference.PRIMARY).filter(
**self._object_key).only(*fields).limit(1
).select_related(max_depth=max_depth)
**self._object_key).only(*fields).limit(1
).select_related(max_depth=max_depth)
if obj:
obj = obj[0]
@ -528,8 +540,8 @@ class Document(BaseDocument):
object.
"""
classes = [get_document(class_name)
for class_name in cls._subclasses
if class_name != cls.__name__] + [cls]
for class_name in cls._subclasses
if class_name != cls.__name__] + [cls]
documents = [get_document(class_name)
for class_name in document_cls._subclasses
if class_name != document_cls.__name__] + [document_cls]
@ -551,7 +563,7 @@ class Document(BaseDocument):
@classmethod
def ensure_index(cls, key_or_list, drop_dups=False, background=False,
**kwargs):
**kwargs):
"""Ensure that the given indexes are in place.
:param key_or_list: a single index key or a list of index keys (to
@ -606,7 +618,7 @@ class Document(BaseDocument):
# If _cls is being used (for polymorphism), it needs an index,
# only if another index doesn't begin with _cls
if (index_cls and not cls_indexed and
cls._meta.get('allow_inheritance', ALLOW_INHERITANCE) is True):
cls._meta.get('allow_inheritance', ALLOW_INHERITANCE) is True):
collection.ensure_index('_cls', background=background,
**index_opts)
@ -621,24 +633,25 @@ class Document(BaseDocument):
# get all the base classes, subclasses and sieblings
classes = []
def get_classes(cls):
if (cls not in classes and
isinstance(cls, TopLevelDocumentMetaclass)):
isinstance(cls, TopLevelDocumentMetaclass)):
classes.append(cls)
for base_cls in cls.__bases__:
if (isinstance(base_cls, TopLevelDocumentMetaclass) and
base_cls != Document and
not base_cls._meta.get('abstract') and
base_cls._get_collection().full_name == cls._get_collection().full_name and
base_cls not in classes):
base_cls != Document and
not base_cls._meta.get('abstract') and
base_cls._get_collection().full_name == cls._get_collection().full_name and
base_cls not in classes):
classes.append(base_cls)
get_classes(base_cls)
for subclass in cls.__subclasses__():
if (isinstance(base_cls, TopLevelDocumentMetaclass) and
subclass._get_collection().full_name == cls._get_collection().full_name and
subclass not in classes):
subclass._get_collection().full_name == cls._get_collection().full_name and
subclass not in classes):
classes.append(subclass)
get_classes(subclass)
@ -666,8 +679,8 @@ class Document(BaseDocument):
if [(u'_id', 1)] not in indexes:
indexes.append([(u'_id', 1)])
if (cls._meta.get('index_cls', True) and
cls._meta.get('allow_inheritance', ALLOW_INHERITANCE) is True):
indexes.append([(u'_cls', 1)])
cls._meta.get('allow_inheritance', ALLOW_INHERITANCE) is True):
indexes.append([(u'_cls', 1)])
return indexes
@ -678,7 +691,8 @@ class Document(BaseDocument):
"""
required = cls.list_indexes()
existing = [info['key'] for info in cls._get_collection().index_information().values()]
existing = [info['key']
for info in cls._get_collection().index_information().values()]
missing = [index for index in required if index not in existing]
extra = [index for index in existing if index not in required]
@ -696,6 +710,7 @@ class Document(BaseDocument):
class DynamicDocument(Document):
"""A Dynamic Document class allowing flexible, expandable and uncontrolled
schemas. As a :class:`~mongoengine.Document` subclass, acts in the same
way as an ordinary document but has expando style properties. Any data
@ -711,7 +726,7 @@ class DynamicDocument(Document):
# The __metaclass__ attribute is removed by 2to3 when running with Python3
# my_metaclass is defined so that metaclass can be queried in Python 2 & 3
my_metaclass = TopLevelDocumentMetaclass
my_metaclass = TopLevelDocumentMetaclass
__metaclass__ = TopLevelDocumentMetaclass
_dynamic = True
@ -727,6 +742,7 @@ class DynamicDocument(Document):
class DynamicEmbeddedDocument(EmbeddedDocument):
"""A Dynamic Embedded Document class allowing flexible, expandable and
uncontrolled schemas. See :class:`~mongoengine.DynamicDocument` for more
information about dynamic documents.
@ -734,7 +750,7 @@ class DynamicEmbeddedDocument(EmbeddedDocument):
# The __metaclass__ attribute is removed by 2to3 when running with Python3
# my_metaclass is defined so that metaclass can be queried in Python 2 & 3
my_metaclass = DocumentMetaclass
my_metaclass = DocumentMetaclass
__metaclass__ = DocumentMetaclass
_dynamic = True
@ -753,6 +769,7 @@ class DynamicEmbeddedDocument(EmbeddedDocument):
class MapReduceDocument(object):
"""A document returned from a map/reduce query.
:param collection: An instance of :class:`~pymongo.Collection`
@ -783,7 +800,7 @@ class MapReduceDocument(object):
try:
self.key = id_field_type(self.key)
except:
raise Exception("Could not cast key as %s" % \
raise Exception("Could not cast key as %s" %
id_field_type.__name__)
if not hasattr(self, "_key_object"):

View File

@ -39,6 +39,7 @@ RE_TYPE = type(re.compile(''))
class BaseQuerySet(object):
"""A set of results returned from a query. Wraps a MongoDB cursor,
providing :class:`~mongoengine.Document` objects as the results.
"""
@ -64,6 +65,8 @@ class BaseQuerySet(object):
self._none = False
self._as_pymongo = False
self._as_pymongo_coerce = False
self._search_text = None
self._include_text_scores = False
# If inheritance is allowed, only return instances and instances of
# subclasses of the class being used
@ -71,7 +74,8 @@ class BaseQuerySet(object):
if len(self._document._subclasses) == 1:
self._initial_query = {"_cls": self._document._subclasses[0]}
else:
self._initial_query = {"_cls": {"$in": self._document._subclasses}}
self._initial_query = {
"_cls": {"$in": self._document._subclasses}}
self._loaded_fields = QueryFieldList(always_include=['_cls'])
self._cursor_obj = None
self._limit = None
@ -148,6 +152,7 @@ class BaseQuerySet(object):
return queryset._get_scalar(
queryset._document._from_son(queryset._cursor[key],
_auto_dereference=self._auto_dereference))
if queryset._as_pymongo:
return queryset._get_as_pymongo(queryset._cursor[key])
return queryset._document._from_son(queryset._cursor[key],
@ -184,6 +189,35 @@ class BaseQuerySet(object):
"""
return self.__call__(*q_objs, **query)
def search_text(self, text, language=None, include_text_scores=False):
"""
Start a text search, using text indexes.
:param language: The language that determines the list of stop words
for the search and the rules for the stemmer and tokenizer.
If not specified, the search uses the default language of the index.
For supported languages, see `Text Search Languages <http://docs.mongodb.org/manual/reference/text-search-languages/#text-search-languages>`.
:param include_text_scores: If True, automaticaly add a text_score attribute to Document.
"""
queryset = self.clone()
if queryset._search_text:
raise OperationError(
"Is not possible to use search_text two times.")
query_kwargs = {'$search': text}
if language:
query_kwargs['$language'] = language
queryset._query_obj &= Q(__raw__={'$text': query_kwargs})
queryset._mongo_query = None
queryset._cursor_obj = None
queryset._search_text = text
queryset._include_text_scores = include_text_scores
return queryset
def get(self, *q_objs, **query):
"""Retrieve the the matching object raising
:class:`~mongoengine.queryset.MultipleObjectsReturned` or
@ -322,10 +356,10 @@ class BaseQuerySet(object):
try:
ids = self._collection.insert(raw, **write_concern)
except pymongo.errors.DuplicateKeyError, err:
message = 'Could not save document (%s)';
message = 'Could not save document (%s)'
raise NotUniqueError(message % unicode(err))
except pymongo.errors.OperationFailure, err:
message = 'Could not save document (%s)';
message = 'Could not save document (%s)'
if re.match('^E1100[01] duplicate key', unicode(err)):
# E11000 - duplicate key error index
# E11001 - duplicate key on update
@ -408,7 +442,7 @@ class BaseQuerySet(object):
ref_q = document_cls.objects(**{field_name + '__in': self})
ref_q_count = ref_q.count()
if (doc != document_cls and ref_q_count > 0
or (doc == document_cls and ref_q_count > 0)):
or (doc == document_cls and ref_q_count > 0)):
ref_q.delete(write_concern=write_concern)
elif rule == NULLIFY:
document_cls.objects(**{field_name + '__in': self}).update(
@ -418,7 +452,8 @@ class BaseQuerySet(object):
write_concern=write_concern,
**{'pull_all__%s' % field_name: self})
queryset._collection.remove(queryset._query, write_concern=write_concern)
queryset._collection.remove(
queryset._query, write_concern=write_concern)
def update(self, upsert=False, multi=True, write_concern=None,
full_result=False, **update):
@ -515,7 +550,8 @@ class BaseQuerySet(object):
raise OperationError("Conflicting parameters: remove and new")
if not update and not upsert and not remove:
raise OperationError("No update parameters, must either update or remove")
raise OperationError(
"No update parameters, must either update or remove")
queryset = self.clone()
query = queryset._query
@ -622,13 +658,15 @@ class BaseQuerySet(object):
:class:`~mongoengine.queryset.base.BaseQuerySet` into another child class
"""
if not isinstance(cls, BaseQuerySet):
raise OperationError('%s is not a subclass of BaseQuerySet' % cls.__name__)
raise OperationError(
'%s is not a subclass of BaseQuerySet' % cls.__name__)
copy_props = ('_mongo_query', '_initial_query', '_none', '_query_obj',
'_where_clause', '_loaded_fields', '_ordering', '_snapshot',
'_timeout', '_class_check', '_slave_okay', '_read_preference',
'_iter', '_scalar', '_as_pymongo', '_as_pymongo_coerce',
'_limit', '_skip', '_hint', '_auto_dereference')
'_limit', '_skip', '_hint', '_auto_dereference',
'_search_text', '_include_text_scores')
for prop in copy_props:
val = getattr(self, prop)
@ -714,11 +752,14 @@ class BaseQuerySet(object):
distinct = self._dereference(queryset._cursor.distinct(field), 1,
name=field, instance=self._document)
# We may need to cast to the correct type eg. ListField(EmbeddedDocumentField)
doc_field = getattr(self._document._fields.get(field), "field", None)
# We may need to cast to the correct type eg.
# ListField(EmbeddedDocumentField)
doc_field = getattr(
self._document._fields.get(field), "field", None)
instance = getattr(doc_field, "document_type", False)
EmbeddedDocumentField = _import_class('EmbeddedDocumentField')
GenericEmbeddedDocumentField = _import_class('GenericEmbeddedDocumentField')
GenericEmbeddedDocumentField = _import_class(
'GenericEmbeddedDocumentField')
if instance and isinstance(doc_field, (EmbeddedDocumentField,
GenericEmbeddedDocumentField)):
distinct = [instance(**doc) for doc in distinct]
@ -799,7 +840,8 @@ class BaseQuerySet(object):
for value, group in itertools.groupby(fields, lambda x: x[1]):
fields = [field for field, value in group]
fields = queryset._fields_to_dbfields(fields)
queryset._loaded_fields += QueryFieldList(fields, value=value, _only_called=_only_called)
queryset._loaded_fields += QueryFieldList(
fields, value=value, _only_called=_only_called)
return queryset
@ -1036,7 +1078,6 @@ class BaseQuerySet(object):
ordered_output.append(('db', get_db(db_alias).name))
del remaing_args[0]
for part in remaing_args:
value = output.get(part)
if value:
@ -1292,6 +1333,13 @@ class BaseQuerySet(object):
cursor_args['slave_okay'] = self._slave_okay
if self._loaded_fields:
cursor_args['fields'] = self._loaded_fields.as_dict()
if self._include_text_scores:
if 'fields' not in cursor_args:
cursor_args['fields'] = {}
cursor_args['fields']['text_score'] = {'$meta': "textScore"}
return cursor_args
@property

File diff suppressed because it is too large Load Diff