Added support for text search and text_score.

This commit is contained in:
Wilson Júnior 2014-07-07 20:24:37 -03:00
parent c6e846e0ae
commit f7ebf8dedd
5 changed files with 449 additions and 197 deletions

View File

@ -12,3 +12,4 @@ User Guide
querying querying
gridfs gridfs
signals signals
text-indexes

View File

@ -0,0 +1,47 @@
===========
Text Search
===========
After MongoDB 2.4 version, supports search documents by text indexes.
Defining a Document with text index
===================================
Use the *$* prefix to set a text index, Look the declaration::
class News(Document):
title = StringField()
content = StringField()
is_active = BooleanField()
meta = {'indexes': [
{'fields': ['$title', "$content"],
'default_language': 'english',
'weight': {'title': 10, 'content': 2}
}
]}
Querying
========
Saving a document::
News(title="Using mongodb text search",
content="Testing text search").save()
News(title="MongoEngine 0.9 released",
content="Various improvements").save()
Next, start a text search using :attr:`QuerySet.search_text` method::
document = News.objects.search_text('testing').first()
document.title # may be: "Using mongodb text search"
document = News.objects.search_text('released').first()
document.title # may be: "MongoEngine 0.9 released"

View File

@ -41,6 +41,7 @@ class InvalidCollectionError(Exception):
class EmbeddedDocument(BaseDocument): class EmbeddedDocument(BaseDocument):
"""A :class:`~mongoengine.Document` that isn't stored in its own """A :class:`~mongoengine.Document` that isn't stored in its own
collection. :class:`~mongoengine.EmbeddedDocument`\ s should be used as collection. :class:`~mongoengine.EmbeddedDocument`\ s should be used as
fields on :class:`~mongoengine.Document`\ s through the fields on :class:`~mongoengine.Document`\ s through the
@ -59,7 +60,7 @@ class EmbeddedDocument(BaseDocument):
# The __metaclass__ attribute is removed by 2to3 when running with Python3 # The __metaclass__ attribute is removed by 2to3 when running with Python3
# my_metaclass is defined so that metaclass can be queried in Python 2 & 3 # my_metaclass is defined so that metaclass can be queried in Python 2 & 3
my_metaclass = DocumentMetaclass my_metaclass = DocumentMetaclass
__metaclass__ = DocumentMetaclass __metaclass__ = DocumentMetaclass
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
@ -77,6 +78,7 @@ class EmbeddedDocument(BaseDocument):
class Document(BaseDocument): class Document(BaseDocument):
"""The base class used for defining the structure and properties of """The base class used for defining the structure and properties of
collections of documents stored in MongoDB. Inherit from this class, and collections of documents stored in MongoDB. Inherit from this class, and
add fields as class attributes to define a document's structure. add fields as class attributes to define a document's structure.
@ -124,14 +126,15 @@ class Document(BaseDocument):
# The __metaclass__ attribute is removed by 2to3 when running with Python3 # The __metaclass__ attribute is removed by 2to3 when running with Python3
# my_metaclass is defined so that metaclass can be queried in Python 2 & 3 # my_metaclass is defined so that metaclass can be queried in Python 2 & 3
my_metaclass = TopLevelDocumentMetaclass my_metaclass = TopLevelDocumentMetaclass
__metaclass__ = TopLevelDocumentMetaclass __metaclass__ = TopLevelDocumentMetaclass
__slots__ = ('__objects' ) __slots__ = ('__objects')
def pk(): def pk():
"""Primary key alias """Primary key alias
""" """
def fget(self): def fget(self):
return getattr(self, self._meta['id_field']) return getattr(self, self._meta['id_field'])
@ -140,6 +143,13 @@ class Document(BaseDocument):
return property(fget, fset) return property(fget, fset)
pk = pk() pk = pk()
@property
def text_score(self):
"""
Used for text searchs
"""
return self._data.get('text_score')
@classmethod @classmethod
def _get_db(cls): def _get_db(cls):
"""Some Model using other db_alias""" """Some Model using other db_alias"""
@ -165,7 +175,7 @@ class Document(BaseDocument):
if options.get('max') != max_documents or \ if options.get('max') != max_documents or \
options.get('size') != max_size: options.get('size') != max_size:
msg = (('Cannot create collection "%s" as a capped ' msg = (('Cannot create collection "%s" as a capped '
'collection as it already exists') 'collection as it already exists')
% cls._collection) % cls._collection)
raise InvalidCollectionError(msg) raise InvalidCollectionError(msg)
else: else:
@ -282,9 +292,9 @@ class Document(BaseDocument):
upsert=upsert, **write_concern) upsert=upsert, **write_concern)
created = is_new_object(last_error) created = is_new_object(last_error)
if cascade is None: if cascade is None:
cascade = self._meta.get('cascade', False) or cascade_kwargs is not None cascade = self._meta.get(
'cascade', False) or cascade_kwargs is not None
if cascade: if cascade:
kwargs = { kwargs = {
@ -377,7 +387,8 @@ class Document(BaseDocument):
del(query["_cls"]) del(query["_cls"])
return self._qs.filter(**query).update_one(**kwargs) return self._qs.filter(**query).update_one(**kwargs)
else: else:
raise OperationError('attempt to update a document not yet saved') raise OperationError(
'attempt to update a document not yet saved')
# Need to add shard key to query, or you get an error # Need to add shard key to query, or you get an error
return self._qs.filter(**self._object_key).update_one(**kwargs) return self._qs.filter(**self._object_key).update_one(**kwargs)
@ -396,7 +407,8 @@ class Document(BaseDocument):
signals.pre_delete.send(self.__class__, document=self) signals.pre_delete.send(self.__class__, document=self)
try: try:
self._qs.filter(**self._object_key).delete(write_concern=write_concern, _from_doc_delete=True) self._qs.filter(
**self._object_key).delete(write_concern=write_concern, _from_doc_delete=True)
except pymongo.errors.OperationFailure, err: except pymongo.errors.OperationFailure, err:
message = u'Could not delete document (%s)' % err.message message = u'Could not delete document (%s)' % err.message
raise OperationError(message) raise OperationError(message)
@ -483,8 +495,8 @@ class Document(BaseDocument):
if not self.pk: if not self.pk:
raise self.DoesNotExist("Document does not exist") raise self.DoesNotExist("Document does not exist")
obj = self._qs.read_preference(ReadPreference.PRIMARY).filter( obj = self._qs.read_preference(ReadPreference.PRIMARY).filter(
**self._object_key).only(*fields).limit(1 **self._object_key).only(*fields).limit(1
).select_related(max_depth=max_depth) ).select_related(max_depth=max_depth)
if obj: if obj:
obj = obj[0] obj = obj[0]
@ -528,8 +540,8 @@ class Document(BaseDocument):
object. object.
""" """
classes = [get_document(class_name) classes = [get_document(class_name)
for class_name in cls._subclasses for class_name in cls._subclasses
if class_name != cls.__name__] + [cls] if class_name != cls.__name__] + [cls]
documents = [get_document(class_name) documents = [get_document(class_name)
for class_name in document_cls._subclasses for class_name in document_cls._subclasses
if class_name != document_cls.__name__] + [document_cls] if class_name != document_cls.__name__] + [document_cls]
@ -551,7 +563,7 @@ class Document(BaseDocument):
@classmethod @classmethod
def ensure_index(cls, key_or_list, drop_dups=False, background=False, def ensure_index(cls, key_or_list, drop_dups=False, background=False,
**kwargs): **kwargs):
"""Ensure that the given indexes are in place. """Ensure that the given indexes are in place.
:param key_or_list: a single index key or a list of index keys (to :param key_or_list: a single index key or a list of index keys (to
@ -606,7 +618,7 @@ class Document(BaseDocument):
# If _cls is being used (for polymorphism), it needs an index, # If _cls is being used (for polymorphism), it needs an index,
# only if another index doesn't begin with _cls # only if another index doesn't begin with _cls
if (index_cls and not cls_indexed and if (index_cls and not cls_indexed and
cls._meta.get('allow_inheritance', ALLOW_INHERITANCE) is True): cls._meta.get('allow_inheritance', ALLOW_INHERITANCE) is True):
collection.ensure_index('_cls', background=background, collection.ensure_index('_cls', background=background,
**index_opts) **index_opts)
@ -621,24 +633,25 @@ class Document(BaseDocument):
# get all the base classes, subclasses and sieblings # get all the base classes, subclasses and sieblings
classes = [] classes = []
def get_classes(cls): def get_classes(cls):
if (cls not in classes and if (cls not in classes and
isinstance(cls, TopLevelDocumentMetaclass)): isinstance(cls, TopLevelDocumentMetaclass)):
classes.append(cls) classes.append(cls)
for base_cls in cls.__bases__: for base_cls in cls.__bases__:
if (isinstance(base_cls, TopLevelDocumentMetaclass) and if (isinstance(base_cls, TopLevelDocumentMetaclass) and
base_cls != Document and base_cls != Document and
not base_cls._meta.get('abstract') and not base_cls._meta.get('abstract') and
base_cls._get_collection().full_name == cls._get_collection().full_name and base_cls._get_collection().full_name == cls._get_collection().full_name and
base_cls not in classes): base_cls not in classes):
classes.append(base_cls) classes.append(base_cls)
get_classes(base_cls) get_classes(base_cls)
for subclass in cls.__subclasses__(): for subclass in cls.__subclasses__():
if (isinstance(base_cls, TopLevelDocumentMetaclass) and if (isinstance(base_cls, TopLevelDocumentMetaclass) and
subclass._get_collection().full_name == cls._get_collection().full_name and subclass._get_collection().full_name == cls._get_collection().full_name and
subclass not in classes): subclass not in classes):
classes.append(subclass) classes.append(subclass)
get_classes(subclass) get_classes(subclass)
@ -666,8 +679,8 @@ class Document(BaseDocument):
if [(u'_id', 1)] not in indexes: if [(u'_id', 1)] not in indexes:
indexes.append([(u'_id', 1)]) indexes.append([(u'_id', 1)])
if (cls._meta.get('index_cls', True) and if (cls._meta.get('index_cls', True) and
cls._meta.get('allow_inheritance', ALLOW_INHERITANCE) is True): cls._meta.get('allow_inheritance', ALLOW_INHERITANCE) is True):
indexes.append([(u'_cls', 1)]) indexes.append([(u'_cls', 1)])
return indexes return indexes
@ -678,7 +691,8 @@ class Document(BaseDocument):
""" """
required = cls.list_indexes() required = cls.list_indexes()
existing = [info['key'] for info in cls._get_collection().index_information().values()] existing = [info['key']
for info in cls._get_collection().index_information().values()]
missing = [index for index in required if index not in existing] missing = [index for index in required if index not in existing]
extra = [index for index in existing if index not in required] extra = [index for index in existing if index not in required]
@ -696,6 +710,7 @@ class Document(BaseDocument):
class DynamicDocument(Document): class DynamicDocument(Document):
"""A Dynamic Document class allowing flexible, expandable and uncontrolled """A Dynamic Document class allowing flexible, expandable and uncontrolled
schemas. As a :class:`~mongoengine.Document` subclass, acts in the same schemas. As a :class:`~mongoengine.Document` subclass, acts in the same
way as an ordinary document but has expando style properties. Any data way as an ordinary document but has expando style properties. Any data
@ -711,7 +726,7 @@ class DynamicDocument(Document):
# The __metaclass__ attribute is removed by 2to3 when running with Python3 # The __metaclass__ attribute is removed by 2to3 when running with Python3
# my_metaclass is defined so that metaclass can be queried in Python 2 & 3 # my_metaclass is defined so that metaclass can be queried in Python 2 & 3
my_metaclass = TopLevelDocumentMetaclass my_metaclass = TopLevelDocumentMetaclass
__metaclass__ = TopLevelDocumentMetaclass __metaclass__ = TopLevelDocumentMetaclass
_dynamic = True _dynamic = True
@ -727,6 +742,7 @@ class DynamicDocument(Document):
class DynamicEmbeddedDocument(EmbeddedDocument): class DynamicEmbeddedDocument(EmbeddedDocument):
"""A Dynamic Embedded Document class allowing flexible, expandable and """A Dynamic Embedded Document class allowing flexible, expandable and
uncontrolled schemas. See :class:`~mongoengine.DynamicDocument` for more uncontrolled schemas. See :class:`~mongoengine.DynamicDocument` for more
information about dynamic documents. information about dynamic documents.
@ -734,7 +750,7 @@ class DynamicEmbeddedDocument(EmbeddedDocument):
# The __metaclass__ attribute is removed by 2to3 when running with Python3 # The __metaclass__ attribute is removed by 2to3 when running with Python3
# my_metaclass is defined so that metaclass can be queried in Python 2 & 3 # my_metaclass is defined so that metaclass can be queried in Python 2 & 3
my_metaclass = DocumentMetaclass my_metaclass = DocumentMetaclass
__metaclass__ = DocumentMetaclass __metaclass__ = DocumentMetaclass
_dynamic = True _dynamic = True
@ -753,6 +769,7 @@ class DynamicEmbeddedDocument(EmbeddedDocument):
class MapReduceDocument(object): class MapReduceDocument(object):
"""A document returned from a map/reduce query. """A document returned from a map/reduce query.
:param collection: An instance of :class:`~pymongo.Collection` :param collection: An instance of :class:`~pymongo.Collection`
@ -783,7 +800,7 @@ class MapReduceDocument(object):
try: try:
self.key = id_field_type(self.key) self.key = id_field_type(self.key)
except: except:
raise Exception("Could not cast key as %s" % \ raise Exception("Could not cast key as %s" %
id_field_type.__name__) id_field_type.__name__)
if not hasattr(self, "_key_object"): if not hasattr(self, "_key_object"):

View File

@ -39,6 +39,7 @@ RE_TYPE = type(re.compile(''))
class BaseQuerySet(object): class BaseQuerySet(object):
"""A set of results returned from a query. Wraps a MongoDB cursor, """A set of results returned from a query. Wraps a MongoDB cursor,
providing :class:`~mongoengine.Document` objects as the results. providing :class:`~mongoengine.Document` objects as the results.
""" """
@ -64,6 +65,8 @@ class BaseQuerySet(object):
self._none = False self._none = False
self._as_pymongo = False self._as_pymongo = False
self._as_pymongo_coerce = False self._as_pymongo_coerce = False
self._search_text = None
self._include_text_scores = False
# If inheritance is allowed, only return instances and instances of # If inheritance is allowed, only return instances and instances of
# subclasses of the class being used # subclasses of the class being used
@ -71,7 +74,8 @@ class BaseQuerySet(object):
if len(self._document._subclasses) == 1: if len(self._document._subclasses) == 1:
self._initial_query = {"_cls": self._document._subclasses[0]} self._initial_query = {"_cls": self._document._subclasses[0]}
else: else:
self._initial_query = {"_cls": {"$in": self._document._subclasses}} self._initial_query = {
"_cls": {"$in": self._document._subclasses}}
self._loaded_fields = QueryFieldList(always_include=['_cls']) self._loaded_fields = QueryFieldList(always_include=['_cls'])
self._cursor_obj = None self._cursor_obj = None
self._limit = None self._limit = None
@ -148,6 +152,7 @@ class BaseQuerySet(object):
return queryset._get_scalar( return queryset._get_scalar(
queryset._document._from_son(queryset._cursor[key], queryset._document._from_son(queryset._cursor[key],
_auto_dereference=self._auto_dereference)) _auto_dereference=self._auto_dereference))
if queryset._as_pymongo: if queryset._as_pymongo:
return queryset._get_as_pymongo(queryset._cursor[key]) return queryset._get_as_pymongo(queryset._cursor[key])
return queryset._document._from_son(queryset._cursor[key], return queryset._document._from_son(queryset._cursor[key],
@ -184,6 +189,35 @@ class BaseQuerySet(object):
""" """
return self.__call__(*q_objs, **query) return self.__call__(*q_objs, **query)
def search_text(self, text, language=None, include_text_scores=False):
"""
Start a text search, using text indexes.
:param language: The language that determines the list of stop words
for the search and the rules for the stemmer and tokenizer.
If not specified, the search uses the default language of the index.
For supported languages, see `Text Search Languages <http://docs.mongodb.org/manual/reference/text-search-languages/#text-search-languages>`.
:param include_text_scores: If True, automaticaly add a text_score attribute to Document.
"""
queryset = self.clone()
if queryset._search_text:
raise OperationError(
"Is not possible to use search_text two times.")
query_kwargs = {'$search': text}
if language:
query_kwargs['$language'] = language
queryset._query_obj &= Q(__raw__={'$text': query_kwargs})
queryset._mongo_query = None
queryset._cursor_obj = None
queryset._search_text = text
queryset._include_text_scores = include_text_scores
return queryset
def get(self, *q_objs, **query): def get(self, *q_objs, **query):
"""Retrieve the the matching object raising """Retrieve the the matching object raising
:class:`~mongoengine.queryset.MultipleObjectsReturned` or :class:`~mongoengine.queryset.MultipleObjectsReturned` or
@ -322,10 +356,10 @@ class BaseQuerySet(object):
try: try:
ids = self._collection.insert(raw, **write_concern) ids = self._collection.insert(raw, **write_concern)
except pymongo.errors.DuplicateKeyError, err: except pymongo.errors.DuplicateKeyError, err:
message = 'Could not save document (%s)'; message = 'Could not save document (%s)'
raise NotUniqueError(message % unicode(err)) raise NotUniqueError(message % unicode(err))
except pymongo.errors.OperationFailure, err: except pymongo.errors.OperationFailure, err:
message = 'Could not save document (%s)'; message = 'Could not save document (%s)'
if re.match('^E1100[01] duplicate key', unicode(err)): if re.match('^E1100[01] duplicate key', unicode(err)):
# E11000 - duplicate key error index # E11000 - duplicate key error index
# E11001 - duplicate key on update # E11001 - duplicate key on update
@ -408,7 +442,7 @@ class BaseQuerySet(object):
ref_q = document_cls.objects(**{field_name + '__in': self}) ref_q = document_cls.objects(**{field_name + '__in': self})
ref_q_count = ref_q.count() ref_q_count = ref_q.count()
if (doc != document_cls and ref_q_count > 0 if (doc != document_cls and ref_q_count > 0
or (doc == document_cls and ref_q_count > 0)): or (doc == document_cls and ref_q_count > 0)):
ref_q.delete(write_concern=write_concern) ref_q.delete(write_concern=write_concern)
elif rule == NULLIFY: elif rule == NULLIFY:
document_cls.objects(**{field_name + '__in': self}).update( document_cls.objects(**{field_name + '__in': self}).update(
@ -418,7 +452,8 @@ class BaseQuerySet(object):
write_concern=write_concern, write_concern=write_concern,
**{'pull_all__%s' % field_name: self}) **{'pull_all__%s' % field_name: self})
queryset._collection.remove(queryset._query, write_concern=write_concern) queryset._collection.remove(
queryset._query, write_concern=write_concern)
def update(self, upsert=False, multi=True, write_concern=None, def update(self, upsert=False, multi=True, write_concern=None,
full_result=False, **update): full_result=False, **update):
@ -515,7 +550,8 @@ class BaseQuerySet(object):
raise OperationError("Conflicting parameters: remove and new") raise OperationError("Conflicting parameters: remove and new")
if not update and not upsert and not remove: if not update and not upsert and not remove:
raise OperationError("No update parameters, must either update or remove") raise OperationError(
"No update parameters, must either update or remove")
queryset = self.clone() queryset = self.clone()
query = queryset._query query = queryset._query
@ -622,13 +658,15 @@ class BaseQuerySet(object):
:class:`~mongoengine.queryset.base.BaseQuerySet` into another child class :class:`~mongoengine.queryset.base.BaseQuerySet` into another child class
""" """
if not isinstance(cls, BaseQuerySet): if not isinstance(cls, BaseQuerySet):
raise OperationError('%s is not a subclass of BaseQuerySet' % cls.__name__) raise OperationError(
'%s is not a subclass of BaseQuerySet' % cls.__name__)
copy_props = ('_mongo_query', '_initial_query', '_none', '_query_obj', copy_props = ('_mongo_query', '_initial_query', '_none', '_query_obj',
'_where_clause', '_loaded_fields', '_ordering', '_snapshot', '_where_clause', '_loaded_fields', '_ordering', '_snapshot',
'_timeout', '_class_check', '_slave_okay', '_read_preference', '_timeout', '_class_check', '_slave_okay', '_read_preference',
'_iter', '_scalar', '_as_pymongo', '_as_pymongo_coerce', '_iter', '_scalar', '_as_pymongo', '_as_pymongo_coerce',
'_limit', '_skip', '_hint', '_auto_dereference') '_limit', '_skip', '_hint', '_auto_dereference',
'_search_text', '_include_text_scores')
for prop in copy_props: for prop in copy_props:
val = getattr(self, prop) val = getattr(self, prop)
@ -714,11 +752,14 @@ class BaseQuerySet(object):
distinct = self._dereference(queryset._cursor.distinct(field), 1, distinct = self._dereference(queryset._cursor.distinct(field), 1,
name=field, instance=self._document) name=field, instance=self._document)
# We may need to cast to the correct type eg. ListField(EmbeddedDocumentField) # We may need to cast to the correct type eg.
doc_field = getattr(self._document._fields.get(field), "field", None) # ListField(EmbeddedDocumentField)
doc_field = getattr(
self._document._fields.get(field), "field", None)
instance = getattr(doc_field, "document_type", False) instance = getattr(doc_field, "document_type", False)
EmbeddedDocumentField = _import_class('EmbeddedDocumentField') EmbeddedDocumentField = _import_class('EmbeddedDocumentField')
GenericEmbeddedDocumentField = _import_class('GenericEmbeddedDocumentField') GenericEmbeddedDocumentField = _import_class(
'GenericEmbeddedDocumentField')
if instance and isinstance(doc_field, (EmbeddedDocumentField, if instance and isinstance(doc_field, (EmbeddedDocumentField,
GenericEmbeddedDocumentField)): GenericEmbeddedDocumentField)):
distinct = [instance(**doc) for doc in distinct] distinct = [instance(**doc) for doc in distinct]
@ -799,7 +840,8 @@ class BaseQuerySet(object):
for value, group in itertools.groupby(fields, lambda x: x[1]): for value, group in itertools.groupby(fields, lambda x: x[1]):
fields = [field for field, value in group] fields = [field for field, value in group]
fields = queryset._fields_to_dbfields(fields) fields = queryset._fields_to_dbfields(fields)
queryset._loaded_fields += QueryFieldList(fields, value=value, _only_called=_only_called) queryset._loaded_fields += QueryFieldList(
fields, value=value, _only_called=_only_called)
return queryset return queryset
@ -1036,7 +1078,6 @@ class BaseQuerySet(object):
ordered_output.append(('db', get_db(db_alias).name)) ordered_output.append(('db', get_db(db_alias).name))
del remaing_args[0] del remaing_args[0]
for part in remaing_args: for part in remaing_args:
value = output.get(part) value = output.get(part)
if value: if value:
@ -1292,6 +1333,13 @@ class BaseQuerySet(object):
cursor_args['slave_okay'] = self._slave_okay cursor_args['slave_okay'] = self._slave_okay
if self._loaded_fields: if self._loaded_fields:
cursor_args['fields'] = self._loaded_fields.as_dict() cursor_args['fields'] = self._loaded_fields.as_dict()
if self._include_text_scores:
if 'fields' not in cursor_args:
cursor_args['fields'] = {}
cursor_args['fields']['text_score'] = {'$meta': "textScore"}
return cursor_args return cursor_args
@property @property

File diff suppressed because it is too large Load Diff