Added support for text search and text_score.
This commit is contained in:
parent
c6e846e0ae
commit
f7ebf8dedd
@ -12,3 +12,4 @@ User Guide
|
||||
querying
|
||||
gridfs
|
||||
signals
|
||||
text-indexes
|
||||
|
47
docs/guide/text-indexes.rst
Normal file
47
docs/guide/text-indexes.rst
Normal file
@ -0,0 +1,47 @@
|
||||
===========
|
||||
Text Search
|
||||
===========
|
||||
|
||||
After MongoDB 2.4 version, supports search documents by text indexes.
|
||||
|
||||
|
||||
Defining a Document with text index
|
||||
===================================
|
||||
Use the *$* prefix to set a text index, Look the declaration::
|
||||
|
||||
class News(Document):
|
||||
title = StringField()
|
||||
content = StringField()
|
||||
is_active = BooleanField()
|
||||
|
||||
meta = {'indexes': [
|
||||
{'fields': ['$title', "$content"],
|
||||
'default_language': 'english',
|
||||
'weight': {'title': 10, 'content': 2}
|
||||
}
|
||||
]}
|
||||
|
||||
|
||||
|
||||
Querying
|
||||
========
|
||||
|
||||
Saving a document::
|
||||
|
||||
News(title="Using mongodb text search",
|
||||
content="Testing text search").save()
|
||||
|
||||
News(title="MongoEngine 0.9 released",
|
||||
content="Various improvements").save()
|
||||
|
||||
Next, start a text search using :attr:`QuerySet.search_text` method::
|
||||
|
||||
document = News.objects.search_text('testing').first()
|
||||
document.title # may be: "Using mongodb text search"
|
||||
|
||||
document = News.objects.search_text('released').first()
|
||||
document.title # may be: "MongoEngine 0.9 released"
|
||||
|
||||
|
||||
|
||||
|
@ -41,6 +41,7 @@ class InvalidCollectionError(Exception):
|
||||
|
||||
|
||||
class EmbeddedDocument(BaseDocument):
|
||||
|
||||
"""A :class:`~mongoengine.Document` that isn't stored in its own
|
||||
collection. :class:`~mongoengine.EmbeddedDocument`\ s should be used as
|
||||
fields on :class:`~mongoengine.Document`\ s through the
|
||||
@ -59,7 +60,7 @@ class EmbeddedDocument(BaseDocument):
|
||||
|
||||
# The __metaclass__ attribute is removed by 2to3 when running with Python3
|
||||
# my_metaclass is defined so that metaclass can be queried in Python 2 & 3
|
||||
my_metaclass = DocumentMetaclass
|
||||
my_metaclass = DocumentMetaclass
|
||||
__metaclass__ = DocumentMetaclass
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
@ -77,6 +78,7 @@ class EmbeddedDocument(BaseDocument):
|
||||
|
||||
|
||||
class Document(BaseDocument):
|
||||
|
||||
"""The base class used for defining the structure and properties of
|
||||
collections of documents stored in MongoDB. Inherit from this class, and
|
||||
add fields as class attributes to define a document's structure.
|
||||
@ -124,14 +126,15 @@ class Document(BaseDocument):
|
||||
|
||||
# The __metaclass__ attribute is removed by 2to3 when running with Python3
|
||||
# my_metaclass is defined so that metaclass can be queried in Python 2 & 3
|
||||
my_metaclass = TopLevelDocumentMetaclass
|
||||
my_metaclass = TopLevelDocumentMetaclass
|
||||
__metaclass__ = TopLevelDocumentMetaclass
|
||||
|
||||
__slots__ = ('__objects' )
|
||||
__slots__ = ('__objects')
|
||||
|
||||
def pk():
|
||||
"""Primary key alias
|
||||
"""
|
||||
|
||||
def fget(self):
|
||||
return getattr(self, self._meta['id_field'])
|
||||
|
||||
@ -140,6 +143,13 @@ class Document(BaseDocument):
|
||||
return property(fget, fset)
|
||||
pk = pk()
|
||||
|
||||
@property
|
||||
def text_score(self):
|
||||
"""
|
||||
Used for text searchs
|
||||
"""
|
||||
return self._data.get('text_score')
|
||||
|
||||
@classmethod
|
||||
def _get_db(cls):
|
||||
"""Some Model using other db_alias"""
|
||||
@ -165,7 +175,7 @@ class Document(BaseDocument):
|
||||
if options.get('max') != max_documents or \
|
||||
options.get('size') != max_size:
|
||||
msg = (('Cannot create collection "%s" as a capped '
|
||||
'collection as it already exists')
|
||||
'collection as it already exists')
|
||||
% cls._collection)
|
||||
raise InvalidCollectionError(msg)
|
||||
else:
|
||||
@ -282,9 +292,9 @@ class Document(BaseDocument):
|
||||
upsert=upsert, **write_concern)
|
||||
created = is_new_object(last_error)
|
||||
|
||||
|
||||
if cascade is None:
|
||||
cascade = self._meta.get('cascade', False) or cascade_kwargs is not None
|
||||
cascade = self._meta.get(
|
||||
'cascade', False) or cascade_kwargs is not None
|
||||
|
||||
if cascade:
|
||||
kwargs = {
|
||||
@ -377,7 +387,8 @@ class Document(BaseDocument):
|
||||
del(query["_cls"])
|
||||
return self._qs.filter(**query).update_one(**kwargs)
|
||||
else:
|
||||
raise OperationError('attempt to update a document not yet saved')
|
||||
raise OperationError(
|
||||
'attempt to update a document not yet saved')
|
||||
|
||||
# Need to add shard key to query, or you get an error
|
||||
return self._qs.filter(**self._object_key).update_one(**kwargs)
|
||||
@ -396,7 +407,8 @@ class Document(BaseDocument):
|
||||
signals.pre_delete.send(self.__class__, document=self)
|
||||
|
||||
try:
|
||||
self._qs.filter(**self._object_key).delete(write_concern=write_concern, _from_doc_delete=True)
|
||||
self._qs.filter(
|
||||
**self._object_key).delete(write_concern=write_concern, _from_doc_delete=True)
|
||||
except pymongo.errors.OperationFailure, err:
|
||||
message = u'Could not delete document (%s)' % err.message
|
||||
raise OperationError(message)
|
||||
@ -483,8 +495,8 @@ class Document(BaseDocument):
|
||||
if not self.pk:
|
||||
raise self.DoesNotExist("Document does not exist")
|
||||
obj = self._qs.read_preference(ReadPreference.PRIMARY).filter(
|
||||
**self._object_key).only(*fields).limit(1
|
||||
).select_related(max_depth=max_depth)
|
||||
**self._object_key).only(*fields).limit(1
|
||||
).select_related(max_depth=max_depth)
|
||||
|
||||
if obj:
|
||||
obj = obj[0]
|
||||
@ -528,8 +540,8 @@ class Document(BaseDocument):
|
||||
object.
|
||||
"""
|
||||
classes = [get_document(class_name)
|
||||
for class_name in cls._subclasses
|
||||
if class_name != cls.__name__] + [cls]
|
||||
for class_name in cls._subclasses
|
||||
if class_name != cls.__name__] + [cls]
|
||||
documents = [get_document(class_name)
|
||||
for class_name in document_cls._subclasses
|
||||
if class_name != document_cls.__name__] + [document_cls]
|
||||
@ -551,7 +563,7 @@ class Document(BaseDocument):
|
||||
|
||||
@classmethod
|
||||
def ensure_index(cls, key_or_list, drop_dups=False, background=False,
|
||||
**kwargs):
|
||||
**kwargs):
|
||||
"""Ensure that the given indexes are in place.
|
||||
|
||||
:param key_or_list: a single index key or a list of index keys (to
|
||||
@ -606,7 +618,7 @@ class Document(BaseDocument):
|
||||
# If _cls is being used (for polymorphism), it needs an index,
|
||||
# only if another index doesn't begin with _cls
|
||||
if (index_cls and not cls_indexed and
|
||||
cls._meta.get('allow_inheritance', ALLOW_INHERITANCE) is True):
|
||||
cls._meta.get('allow_inheritance', ALLOW_INHERITANCE) is True):
|
||||
collection.ensure_index('_cls', background=background,
|
||||
**index_opts)
|
||||
|
||||
@ -621,24 +633,25 @@ class Document(BaseDocument):
|
||||
|
||||
# get all the base classes, subclasses and sieblings
|
||||
classes = []
|
||||
|
||||
def get_classes(cls):
|
||||
|
||||
if (cls not in classes and
|
||||
isinstance(cls, TopLevelDocumentMetaclass)):
|
||||
isinstance(cls, TopLevelDocumentMetaclass)):
|
||||
classes.append(cls)
|
||||
|
||||
for base_cls in cls.__bases__:
|
||||
if (isinstance(base_cls, TopLevelDocumentMetaclass) and
|
||||
base_cls != Document and
|
||||
not base_cls._meta.get('abstract') and
|
||||
base_cls._get_collection().full_name == cls._get_collection().full_name and
|
||||
base_cls not in classes):
|
||||
base_cls != Document and
|
||||
not base_cls._meta.get('abstract') and
|
||||
base_cls._get_collection().full_name == cls._get_collection().full_name and
|
||||
base_cls not in classes):
|
||||
classes.append(base_cls)
|
||||
get_classes(base_cls)
|
||||
for subclass in cls.__subclasses__():
|
||||
if (isinstance(base_cls, TopLevelDocumentMetaclass) and
|
||||
subclass._get_collection().full_name == cls._get_collection().full_name and
|
||||
subclass not in classes):
|
||||
subclass._get_collection().full_name == cls._get_collection().full_name and
|
||||
subclass not in classes):
|
||||
classes.append(subclass)
|
||||
get_classes(subclass)
|
||||
|
||||
@ -666,8 +679,8 @@ class Document(BaseDocument):
|
||||
if [(u'_id', 1)] not in indexes:
|
||||
indexes.append([(u'_id', 1)])
|
||||
if (cls._meta.get('index_cls', True) and
|
||||
cls._meta.get('allow_inheritance', ALLOW_INHERITANCE) is True):
|
||||
indexes.append([(u'_cls', 1)])
|
||||
cls._meta.get('allow_inheritance', ALLOW_INHERITANCE) is True):
|
||||
indexes.append([(u'_cls', 1)])
|
||||
|
||||
return indexes
|
||||
|
||||
@ -678,7 +691,8 @@ class Document(BaseDocument):
|
||||
"""
|
||||
|
||||
required = cls.list_indexes()
|
||||
existing = [info['key'] for info in cls._get_collection().index_information().values()]
|
||||
existing = [info['key']
|
||||
for info in cls._get_collection().index_information().values()]
|
||||
missing = [index for index in required if index not in existing]
|
||||
extra = [index for index in existing if index not in required]
|
||||
|
||||
@ -696,6 +710,7 @@ class Document(BaseDocument):
|
||||
|
||||
|
||||
class DynamicDocument(Document):
|
||||
|
||||
"""A Dynamic Document class allowing flexible, expandable and uncontrolled
|
||||
schemas. As a :class:`~mongoengine.Document` subclass, acts in the same
|
||||
way as an ordinary document but has expando style properties. Any data
|
||||
@ -711,7 +726,7 @@ class DynamicDocument(Document):
|
||||
|
||||
# The __metaclass__ attribute is removed by 2to3 when running with Python3
|
||||
# my_metaclass is defined so that metaclass can be queried in Python 2 & 3
|
||||
my_metaclass = TopLevelDocumentMetaclass
|
||||
my_metaclass = TopLevelDocumentMetaclass
|
||||
__metaclass__ = TopLevelDocumentMetaclass
|
||||
|
||||
_dynamic = True
|
||||
@ -727,6 +742,7 @@ class DynamicDocument(Document):
|
||||
|
||||
|
||||
class DynamicEmbeddedDocument(EmbeddedDocument):
|
||||
|
||||
"""A Dynamic Embedded Document class allowing flexible, expandable and
|
||||
uncontrolled schemas. See :class:`~mongoengine.DynamicDocument` for more
|
||||
information about dynamic documents.
|
||||
@ -734,7 +750,7 @@ class DynamicEmbeddedDocument(EmbeddedDocument):
|
||||
|
||||
# The __metaclass__ attribute is removed by 2to3 when running with Python3
|
||||
# my_metaclass is defined so that metaclass can be queried in Python 2 & 3
|
||||
my_metaclass = DocumentMetaclass
|
||||
my_metaclass = DocumentMetaclass
|
||||
__metaclass__ = DocumentMetaclass
|
||||
|
||||
_dynamic = True
|
||||
@ -753,6 +769,7 @@ class DynamicEmbeddedDocument(EmbeddedDocument):
|
||||
|
||||
|
||||
class MapReduceDocument(object):
|
||||
|
||||
"""A document returned from a map/reduce query.
|
||||
|
||||
:param collection: An instance of :class:`~pymongo.Collection`
|
||||
@ -783,7 +800,7 @@ class MapReduceDocument(object):
|
||||
try:
|
||||
self.key = id_field_type(self.key)
|
||||
except:
|
||||
raise Exception("Could not cast key as %s" % \
|
||||
raise Exception("Could not cast key as %s" %
|
||||
id_field_type.__name__)
|
||||
|
||||
if not hasattr(self, "_key_object"):
|
||||
|
@ -39,6 +39,7 @@ RE_TYPE = type(re.compile(''))
|
||||
|
||||
|
||||
class BaseQuerySet(object):
|
||||
|
||||
"""A set of results returned from a query. Wraps a MongoDB cursor,
|
||||
providing :class:`~mongoengine.Document` objects as the results.
|
||||
"""
|
||||
@ -64,6 +65,8 @@ class BaseQuerySet(object):
|
||||
self._none = False
|
||||
self._as_pymongo = False
|
||||
self._as_pymongo_coerce = False
|
||||
self._search_text = None
|
||||
self._include_text_scores = False
|
||||
|
||||
# If inheritance is allowed, only return instances and instances of
|
||||
# subclasses of the class being used
|
||||
@ -71,7 +74,8 @@ class BaseQuerySet(object):
|
||||
if len(self._document._subclasses) == 1:
|
||||
self._initial_query = {"_cls": self._document._subclasses[0]}
|
||||
else:
|
||||
self._initial_query = {"_cls": {"$in": self._document._subclasses}}
|
||||
self._initial_query = {
|
||||
"_cls": {"$in": self._document._subclasses}}
|
||||
self._loaded_fields = QueryFieldList(always_include=['_cls'])
|
||||
self._cursor_obj = None
|
||||
self._limit = None
|
||||
@ -148,6 +152,7 @@ class BaseQuerySet(object):
|
||||
return queryset._get_scalar(
|
||||
queryset._document._from_son(queryset._cursor[key],
|
||||
_auto_dereference=self._auto_dereference))
|
||||
|
||||
if queryset._as_pymongo:
|
||||
return queryset._get_as_pymongo(queryset._cursor[key])
|
||||
return queryset._document._from_son(queryset._cursor[key],
|
||||
@ -184,6 +189,35 @@ class BaseQuerySet(object):
|
||||
"""
|
||||
return self.__call__(*q_objs, **query)
|
||||
|
||||
def search_text(self, text, language=None, include_text_scores=False):
|
||||
"""
|
||||
Start a text search, using text indexes.
|
||||
|
||||
:param language: The language that determines the list of stop words
|
||||
for the search and the rules for the stemmer and tokenizer.
|
||||
If not specified, the search uses the default language of the index.
|
||||
For supported languages, see `Text Search Languages <http://docs.mongodb.org/manual/reference/text-search-languages/#text-search-languages>`.
|
||||
|
||||
:param include_text_scores: If True, automaticaly add a text_score attribute to Document.
|
||||
|
||||
"""
|
||||
queryset = self.clone()
|
||||
if queryset._search_text:
|
||||
raise OperationError(
|
||||
"Is not possible to use search_text two times.")
|
||||
|
||||
query_kwargs = {'$search': text}
|
||||
if language:
|
||||
query_kwargs['$language'] = language
|
||||
|
||||
queryset._query_obj &= Q(__raw__={'$text': query_kwargs})
|
||||
queryset._mongo_query = None
|
||||
queryset._cursor_obj = None
|
||||
queryset._search_text = text
|
||||
queryset._include_text_scores = include_text_scores
|
||||
|
||||
return queryset
|
||||
|
||||
def get(self, *q_objs, **query):
|
||||
"""Retrieve the the matching object raising
|
||||
:class:`~mongoengine.queryset.MultipleObjectsReturned` or
|
||||
@ -322,10 +356,10 @@ class BaseQuerySet(object):
|
||||
try:
|
||||
ids = self._collection.insert(raw, **write_concern)
|
||||
except pymongo.errors.DuplicateKeyError, err:
|
||||
message = 'Could not save document (%s)';
|
||||
message = 'Could not save document (%s)'
|
||||
raise NotUniqueError(message % unicode(err))
|
||||
except pymongo.errors.OperationFailure, err:
|
||||
message = 'Could not save document (%s)';
|
||||
message = 'Could not save document (%s)'
|
||||
if re.match('^E1100[01] duplicate key', unicode(err)):
|
||||
# E11000 - duplicate key error index
|
||||
# E11001 - duplicate key on update
|
||||
@ -408,7 +442,7 @@ class BaseQuerySet(object):
|
||||
ref_q = document_cls.objects(**{field_name + '__in': self})
|
||||
ref_q_count = ref_q.count()
|
||||
if (doc != document_cls and ref_q_count > 0
|
||||
or (doc == document_cls and ref_q_count > 0)):
|
||||
or (doc == document_cls and ref_q_count > 0)):
|
||||
ref_q.delete(write_concern=write_concern)
|
||||
elif rule == NULLIFY:
|
||||
document_cls.objects(**{field_name + '__in': self}).update(
|
||||
@ -418,7 +452,8 @@ class BaseQuerySet(object):
|
||||
write_concern=write_concern,
|
||||
**{'pull_all__%s' % field_name: self})
|
||||
|
||||
queryset._collection.remove(queryset._query, write_concern=write_concern)
|
||||
queryset._collection.remove(
|
||||
queryset._query, write_concern=write_concern)
|
||||
|
||||
def update(self, upsert=False, multi=True, write_concern=None,
|
||||
full_result=False, **update):
|
||||
@ -515,7 +550,8 @@ class BaseQuerySet(object):
|
||||
raise OperationError("Conflicting parameters: remove and new")
|
||||
|
||||
if not update and not upsert and not remove:
|
||||
raise OperationError("No update parameters, must either update or remove")
|
||||
raise OperationError(
|
||||
"No update parameters, must either update or remove")
|
||||
|
||||
queryset = self.clone()
|
||||
query = queryset._query
|
||||
@ -622,13 +658,15 @@ class BaseQuerySet(object):
|
||||
:class:`~mongoengine.queryset.base.BaseQuerySet` into another child class
|
||||
"""
|
||||
if not isinstance(cls, BaseQuerySet):
|
||||
raise OperationError('%s is not a subclass of BaseQuerySet' % cls.__name__)
|
||||
raise OperationError(
|
||||
'%s is not a subclass of BaseQuerySet' % cls.__name__)
|
||||
|
||||
copy_props = ('_mongo_query', '_initial_query', '_none', '_query_obj',
|
||||
'_where_clause', '_loaded_fields', '_ordering', '_snapshot',
|
||||
'_timeout', '_class_check', '_slave_okay', '_read_preference',
|
||||
'_iter', '_scalar', '_as_pymongo', '_as_pymongo_coerce',
|
||||
'_limit', '_skip', '_hint', '_auto_dereference')
|
||||
'_limit', '_skip', '_hint', '_auto_dereference',
|
||||
'_search_text', '_include_text_scores')
|
||||
|
||||
for prop in copy_props:
|
||||
val = getattr(self, prop)
|
||||
@ -714,11 +752,14 @@ class BaseQuerySet(object):
|
||||
distinct = self._dereference(queryset._cursor.distinct(field), 1,
|
||||
name=field, instance=self._document)
|
||||
|
||||
# We may need to cast to the correct type eg. ListField(EmbeddedDocumentField)
|
||||
doc_field = getattr(self._document._fields.get(field), "field", None)
|
||||
# We may need to cast to the correct type eg.
|
||||
# ListField(EmbeddedDocumentField)
|
||||
doc_field = getattr(
|
||||
self._document._fields.get(field), "field", None)
|
||||
instance = getattr(doc_field, "document_type", False)
|
||||
EmbeddedDocumentField = _import_class('EmbeddedDocumentField')
|
||||
GenericEmbeddedDocumentField = _import_class('GenericEmbeddedDocumentField')
|
||||
GenericEmbeddedDocumentField = _import_class(
|
||||
'GenericEmbeddedDocumentField')
|
||||
if instance and isinstance(doc_field, (EmbeddedDocumentField,
|
||||
GenericEmbeddedDocumentField)):
|
||||
distinct = [instance(**doc) for doc in distinct]
|
||||
@ -799,7 +840,8 @@ class BaseQuerySet(object):
|
||||
for value, group in itertools.groupby(fields, lambda x: x[1]):
|
||||
fields = [field for field, value in group]
|
||||
fields = queryset._fields_to_dbfields(fields)
|
||||
queryset._loaded_fields += QueryFieldList(fields, value=value, _only_called=_only_called)
|
||||
queryset._loaded_fields += QueryFieldList(
|
||||
fields, value=value, _only_called=_only_called)
|
||||
|
||||
return queryset
|
||||
|
||||
@ -1036,7 +1078,6 @@ class BaseQuerySet(object):
|
||||
ordered_output.append(('db', get_db(db_alias).name))
|
||||
del remaing_args[0]
|
||||
|
||||
|
||||
for part in remaing_args:
|
||||
value = output.get(part)
|
||||
if value:
|
||||
@ -1292,6 +1333,13 @@ class BaseQuerySet(object):
|
||||
cursor_args['slave_okay'] = self._slave_okay
|
||||
if self._loaded_fields:
|
||||
cursor_args['fields'] = self._loaded_fields.as_dict()
|
||||
|
||||
if self._include_text_scores:
|
||||
if 'fields' not in cursor_args:
|
||||
cursor_args['fields'] = {}
|
||||
|
||||
cursor_args['fields']['text_score'] = {'$meta': "textScore"}
|
||||
|
||||
return cursor_args
|
||||
|
||||
@property
|
||||
|
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user