From e6c0280b40f18074c0a6267b29f2a77f21c1c779 Mon Sep 17 00:00:00 2001 From: Emmanuel Leblond Date: Mon, 30 Oct 2017 18:15:51 +0100 Subject: [PATCH] Add LazyReferenceField --- docs/changelog.rst | 6 +- mongoengine/base/__init__.py | 2 +- mongoengine/base/datastructures.py | 42 ++++- mongoengine/fields.py | 139 ++++++++++++++- tests/fields/fields.py | 276 ++++++++++++++++++++++++++++- 5 files changed, 456 insertions(+), 9 deletions(-) diff --git a/docs/changelog.rst b/docs/changelog.rst index f04ab314..834fbee2 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -2,9 +2,9 @@ Changelog ========= -Development -=========== -- (Fill this out as you fix issues and develop your features). +Changes in 0.15.0 +================= +- Add LazyReferenceField to address #1230 Changes in 0.14.1 ================= diff --git a/mongoengine/base/__init__.py b/mongoengine/base/__init__.py index da31b922..e069a147 100644 --- a/mongoengine/base/__init__.py +++ b/mongoengine/base/__init__.py @@ -15,7 +15,7 @@ __all__ = ( 'UPDATE_OPERATORS', '_document_registry', 'get_document', # datastructures - 'BaseDict', 'BaseList', 'EmbeddedDocumentList', + 'BaseDict', 'BaseList', 'EmbeddedDocumentList', 'LazyReference', # document 'BaseDocument', diff --git a/mongoengine/base/datastructures.py b/mongoengine/base/datastructures.py index 14fe95e9..043df471 100644 --- a/mongoengine/base/datastructures.py +++ b/mongoengine/base/datastructures.py @@ -2,11 +2,12 @@ import itertools import weakref import six +from bson import DBRef from mongoengine.common import _import_class from mongoengine.errors import DoesNotExist, MultipleObjectsReturned -__all__ = ('BaseDict', 'BaseList', 'EmbeddedDocumentList') +__all__ = ('BaseDict', 'BaseList', 'EmbeddedDocumentList', 'LazyReference') class BaseDict(dict): @@ -445,3 +446,42 @@ class StrictDict(object): cls._classes[allowed_keys] = SpecificStrictDict return cls._classes[allowed_keys] + + +class LazyReference(DBRef): + __slots__ = ('_cached_doc', 'passthrough', 'document_type') + + def fetch(self, force=False): + if not self._cached_doc or force: + self._cached_doc = self.document_type.objects.get(pk=self.pk) + if not self._cached_doc: + raise DoesNotExist('Trying to dereference unknown document %s' % (self)) + return self._cached_doc + + @property + def pk(self): + return self.id + + def __init__(self, document_type, pk, cached_doc=None, passthrough=False): + self.document_type = document_type + self._cached_doc = cached_doc + self.passthrough = passthrough + super(LazyReference, self).__init__(self.document_type._get_collection_name(), pk) + + def __getitem__(self, name): + if not self.passthrough: + raise KeyError() + document = self.fetch() + return document[name] + + def __getattr__(self, name): + if not object.__getattribute__(self, 'passthrough'): + raise AttributeError() + document = self.fetch() + try: + return document[name] + except KeyError: + raise AttributeError() + + def __repr__(self): + return "" % (self.document_type, self.pk) diff --git a/mongoengine/fields.py b/mongoengine/fields.py index fffba7ac..73a62bc5 100644 --- a/mongoengine/fields.py +++ b/mongoengine/fields.py @@ -26,7 +26,8 @@ except ImportError: Int64 = long from mongoengine.base import (BaseDocument, BaseField, ComplexBaseField, - GeoJsonBaseField, ObjectIdField, get_document) + GeoJsonBaseField, ObjectIdField, get_document, + LazyReference) from mongoengine.connection import DEFAULT_CONNECTION_NAME, get_db from mongoengine.document import Document, EmbeddedDocument from mongoengine.errors import DoesNotExist, InvalidQueryError, ValidationError @@ -46,6 +47,8 @@ __all__ = ( 'GenericEmbeddedDocumentField', 'DynamicField', 'ListField', 'SortedListField', 'EmbeddedDocumentListField', 'DictField', 'MapField', 'ReferenceField', 'CachedReferenceField', + 'LazyReferenceField', + # 'GenericLazyReferenceField', 'GenericReferenceField', 'BinaryField', 'GridFSError', 'GridFSProxy', 'FileField', 'ImageGridFsProxy', 'ImproperlyConfigured', 'ImageField', 'GeoPointField', 'PointField', 'LineStringField', 'PolygonField', @@ -953,6 +956,15 @@ class ReferenceField(BaseField): """A reference to a document that will be automatically dereferenced on access (lazily). + Note this means you will get a database I/O access everytime you access + this field. This is necessary because the field returns a :class:`~mongoengine.Document` + which precise type can depend of the value of the `_cls` field present in the + document in database. + In short, using this type of field can lead to poor performances (especially + if you access this field only to retrieve it `pk` field which is already + known before dereference). To solve this you should consider using the + :class:`~mongoengine.fields.LazyReferenceField`. + Use the `reverse_delete_rule` to handle what should happen if the document the field is referencing is deleted. EmbeddedDocuments, DictFields and MapFields does not support reverse_delete_rule and an `InvalidDocumentError` @@ -1087,8 +1099,8 @@ class ReferenceField(BaseField): def validate(self, value): - if not isinstance(value, (self.document_type, DBRef, ObjectId)): - self.error('A ReferenceField only accepts DBRef, ObjectId or documents') + if not isinstance(value, (self.document_type, LazyReference, DBRef, ObjectId)): + self.error('A ReferenceField only accepts DBRef, LazyReference, ObjectId or documents') if isinstance(value, Document) and value.id is None: self.error('You can only reference documents once they have been ' @@ -2141,3 +2153,124 @@ class MultiPolygonField(GeoJsonBaseField): .. versionadded:: 0.9 """ _type = 'MultiPolygon' + + +class LazyReferenceField(BaseField): + """A really lazy reference to a document. + Unlike the :class:`~mongoengine.fields.ReferenceField` it must be manually + dereferenced using it ``fetch()`` method. + """ + + def __init__(self, document_type, passthrough=False, dbref=False, + reverse_delete_rule=DO_NOTHING, **kwargs): + """Initialises the Reference Field. + + :param dbref: Store the reference as :class:`~pymongo.dbref.DBRef` + or as the :class:`~pymongo.objectid.ObjectId`.id . + :param reverse_delete_rule: Determines what to do when the referring + object is deleted + :param passthrough: When trying to access unknown fields, the + :class:`~mongoengine.base.datastructure.LazyReference` instance will + automatically call `fetch()` and try to retrive the field on the fetched + document. Note this only work getting field (not setting or deleting). + """ + if ( + not isinstance(document_type, six.string_types) and + not issubclass(document_type, Document) + ): + self.error('Argument to LazyReferenceField constructor must be a ' + 'document class or a string') + + self.dbref = dbref + self.passthrough = passthrough + self.document_type_obj = document_type + self.reverse_delete_rule = reverse_delete_rule + super(LazyReferenceField, self).__init__(**kwargs) + + @property + def document_type(self): + if isinstance(self.document_type_obj, six.string_types): + if self.document_type_obj == RECURSIVE_REFERENCE_CONSTANT: + self.document_type_obj = self.owner_document + else: + self.document_type_obj = get_document(self.document_type_obj) + return self.document_type_obj + + def __get__(self, instance, owner): + """Descriptor to allow lazy dereferencing.""" + if instance is None: + # Document class being used rather than a document object + return self + + value = instance._data.get(self.name) + if isinstance(value, LazyReference): + if value.passthrough != self.passthrough: + instance._data[self.name] = LazyReference( + value.document_type, value.pk, passthrough=self.passthrough) + elif value is not None: + if isinstance(value, self.document_type): + value = LazyReference(self.document_type, value.pk, passthrough=self.passthrough) + elif isinstance(value, DBRef): + value = LazyReference(self.document_type, value.id, passthrough=self.passthrough) + else: + # value is the primary key of the referenced document + value = LazyReference(self.document_type, value, passthrough=self.passthrough) + instance._data[self.name] = value + + return super(LazyReferenceField, self).__get__(instance, owner) + + def to_mongo(self, value): + if isinstance(value, LazyReference): + pk = value.pk + elif isinstance(value, self.document_type): + pk = value.pk + elif isinstance(value, DBRef): + pk = value.id + else: + # value is the primary key of the referenced document + pk = value + id_field_name = self.document_type._meta['id_field'] + id_field = self.document_type._fields[id_field_name] + pk = id_field.to_mongo(pk) + if self.dbref: + return DBRef(self.document_type._get_collection_name(), pk) + else: + return pk + + def validate(self, value): + if isinstance(value, LazyReference): + if not issubclass(value.document_type, self.document_type): + self.error('Reference must be on a `%s` document.' % self.document_type) + pk = value.pk + elif isinstance(value, self.document_type): + pk = value.pk + elif isinstance(value, DBRef): + # TODO: check collection ? + collection = self.document_type._get_collection_name() + if value.collection != collection: + self.error("DBRef on bad collection (must be on `%s`)" % collection) + pk = value.id + else: + # value is the primary key of the referenced document + id_field_name = self.document_type._meta['id_field'] + id_field = getattr(self.document_type, id_field_name) + pk = value + try: + id_field.validate(pk) + except ValidationError: + self.error("value should be `{0}` document, LazyReference or DBRef on `{0}` " + "or `{0}`'s primary key (i.e. `{1}`)".format( + self.document_type.__name__, type(id_field).__name__)) + + if pk is None: + self.error('You can only reference documents once they have been ' + 'saved to the database') + + def prepare_query_value(self, op, value): + if value is None: + return None + super(LazyReferenceField, self).prepare_query_value(op, value) + return self.to_mongo(value) + + def lookup_member(self, member_name): + return self.document_type._fields.get(member_name) diff --git a/tests/fields/fields.py b/tests/fields/fields.py index 7a0ccc25..84156622 100644 --- a/tests/fields/fields.py +++ b/tests/fields/fields.py @@ -26,7 +26,7 @@ except ImportError: from mongoengine import * from mongoengine.connection import get_db from mongoengine.base import (BaseDict, BaseField, EmbeddedDocumentList, - _document_registry) + _document_registry, LazyReference) from tests.utils import MongoDBTestCase @@ -931,7 +931,9 @@ class FieldTest(MongoDBTestCase): comments = ListField(EmbeddedDocumentField(Comment)) tags = ListField(StringField()) authors = ListField(ReferenceField(User)) + authors_as_lazy = ListField(LazyReferenceField(User)) generic = ListField(GenericReferenceField()) + # generic_as_lazy = ListField(LazyGenericReferenceField()) User.drop_collection() BlogPost.drop_collection() @@ -969,6 +971,15 @@ class FieldTest(MongoDBTestCase): post.authors = [user] post.validate() + post.authors_as_lazy = [Comment()] + self.assertRaises(ValidationError, post.validate) + + post.authors_as_lazy = [User()] + self.assertRaises(ValidationError, post.validate) + + post.authors_as_lazy = [user] + post.validate() + post.generic = [1, 2] self.assertRaises(ValidationError, post.validate) @@ -981,6 +992,18 @@ class FieldTest(MongoDBTestCase): post.generic = [user] post.validate() + # post.generic_as_lazy = [1, 2] + # self.assertRaises(ValidationError, post.validate) + + # post.generic_as_lazy = [User(), Comment()] + # self.assertRaises(ValidationError, post.validate) + + # post.generic_as_lazy = [Comment()] + # self.assertRaises(ValidationError, post.validate) + + # post.generic_as_lazy = [user] + # post.validate() + def test_sorted_list_sorting(self): """Ensure that a sorted list field properly sorts values. """ @@ -4598,5 +4621,256 @@ class CachedReferenceFieldTest(MongoDBTestCase): self.assertTrue(isinstance(ocorrence.animal, Animal)) +class LazyReferenceFieldTest(MongoDBTestCase): + def test_lazy_reference_config(self): + # Make sure ReferenceField only accepts a document class or a string + # with a document class name. + self.assertRaises(ValidationError, LazyReferenceField, EmbeddedDocument) + + def test_lazy_reference_simple(self): + class Animal(Document): + name = StringField() + tag = StringField() + + class Ocurrence(Document): + person = StringField() + animal = LazyReferenceField(Animal) + + Animal.drop_collection() + Ocurrence.drop_collection() + + animal = Animal(name="Leopard", tag="heavy").save() + Ocurrence(person="test", animal=animal).save() + p = Ocurrence.objects.get() + self.assertIsInstance(p.animal, LazyReference) + fetched_animal = p.animal.fetch() + self.assertEqual(fetched_animal, animal) + # `fetch` keep cache on referenced document by default... + animal.tag = "not so heavy" + animal.save() + double_fetch = p.animal.fetch() + self.assertIs(fetched_animal, double_fetch) + self.assertEqual(double_fetch.tag, "heavy") + # ...unless specified otherwise + fetch_force = p.animal.fetch(force=True) + self.assertIsNot(fetch_force, fetched_animal) + self.assertEqual(fetch_force.tag, "not so heavy") + + def test_lazy_reference_fetch_invalid_ref(self): + class Animal(Document): + name = StringField() + tag = StringField() + + class Ocurrence(Document): + person = StringField() + animal = LazyReferenceField(Animal) + + Animal.drop_collection() + Ocurrence.drop_collection() + + animal = Animal(name="Leopard", tag="heavy").save() + Ocurrence(person="test", animal=animal).save() + animal.delete() + p = Ocurrence.objects.get() + self.assertIsInstance(p.animal, LazyReference) + with self.assertRaises(DoesNotExist): + p.animal.fetch() + + def test_lazy_reference_set(self): + class Animal(Document): + meta = {'allow_inheritance': True} + + name = StringField() + tag = StringField() + + class Ocurrence(Document): + person = StringField() + animal = LazyReferenceField(Animal) + + Animal.drop_collection() + Ocurrence.drop_collection() + + class SubAnimal(Animal): + nick = StringField() + + animal = Animal(name="Leopard", tag="heavy").save() + sub_animal = SubAnimal(nick='doggo', name='dog').save() + for ref in ( + animal, + animal.pk, + DBRef(animal._get_collection_name(), animal.pk), + LazyReference(Animal, animal.pk), + + sub_animal, + sub_animal.pk, + DBRef(sub_animal._get_collection_name(), sub_animal.pk), + LazyReference(SubAnimal, sub_animal.pk), + ): + p = Ocurrence(person="test", animal=ref).save() + p.reload() + self.assertIsInstance(p.animal, LazyReference) + p.animal.fetch() + + def test_lazy_reference_bad_set(self): + class Animal(Document): + name = StringField() + tag = StringField() + + class Ocurrence(Document): + person = StringField() + animal = LazyReferenceField(Animal) + + Animal.drop_collection() + Ocurrence.drop_collection() + + class BadDoc(Document): + pass + + animal = Animal(name="Leopard", tag="heavy").save() + baddoc = BadDoc().save() + for bad in ( + 42, + 'foo', + baddoc, + DBRef(baddoc._get_collection_name(), animal.pk), + LazyReference(BadDoc, animal.pk) + ): + with self.assertRaises(ValidationError): + p = Ocurrence(person="test", animal=bad).save() + + def test_lazy_reference_query_conversion(self): + """Ensure that LazyReferenceFields can be queried using objects and values + of the type of the primary key of the referenced object. + """ + class Member(Document): + user_num = IntField(primary_key=True) + + class BlogPost(Document): + title = StringField() + author = LazyReferenceField(Member, dbref=False) + + Member.drop_collection() + BlogPost.drop_collection() + + m1 = Member(user_num=1) + m1.save() + m2 = Member(user_num=2) + m2.save() + + post1 = BlogPost(title='post 1', author=m1) + post1.save() + + post2 = BlogPost(title='post 2', author=m2) + post2.save() + + post = BlogPost.objects(author=m1).first() + self.assertEqual(post.id, post1.id) + + post = BlogPost.objects(author=m2).first() + self.assertEqual(post.id, post2.id) + + # Same thing by passing a LazyReference instance + post = BlogPost.objects(author=LazyReference(Member, m2.pk)).first() + self.assertEqual(post.id, post2.id) + + def test_lazy_reference_query_conversion_dbref(self): + """Ensure that LazyReferenceFields can be queried using objects and values + of the type of the primary key of the referenced object. + """ + class Member(Document): + user_num = IntField(primary_key=True) + + class BlogPost(Document): + title = StringField() + author = LazyReferenceField(Member, dbref=True) + + Member.drop_collection() + BlogPost.drop_collection() + + m1 = Member(user_num=1) + m1.save() + m2 = Member(user_num=2) + m2.save() + + post1 = BlogPost(title='post 1', author=m1) + post1.save() + + post2 = BlogPost(title='post 2', author=m2) + post2.save() + + post = BlogPost.objects(author=m1).first() + self.assertEqual(post.id, post1.id) + + post = BlogPost.objects(author=m2).first() + self.assertEqual(post.id, post2.id) + + # Same thing by passing a LazyReference instance + post = BlogPost.objects(author=LazyReference(Member, m2.pk)).first() + self.assertEqual(post.id, post2.id) + + def test_lazy_reference_passthrough(self): + class Animal(Document): + name = StringField() + tag = StringField() + + class Ocurrence(Document): + animal = LazyReferenceField(Animal, passthrough=False) + animal_passthrough = LazyReferenceField(Animal, passthrough=True) + + Animal.drop_collection() + Ocurrence.drop_collection() + + animal = Animal(name="Leopard", tag="heavy").save() + Ocurrence(animal=animal, animal_passthrough=animal).save() + p = Ocurrence.objects.get() + self.assertIsInstance(p.animal, LazyReference) + with self.assertRaises(KeyError): + p.animal['name'] + with self.assertRaises(AttributeError): + p.animal.name + self.assertEqual(p.animal.pk, animal.pk) + + self.assertEqual(p.animal_passthrough.name, "Leopard") + self.assertEqual(p.animal_passthrough['name'], "Leopard") + + # Should not be able to access referenced document's methods + with self.assertRaises(AttributeError): + p.animal.save + with self.assertRaises(KeyError): + p.animal['save'] + + def test_lazy_reference_not_set(self): + class Animal(Document): + name = StringField() + tag = StringField() + + class Ocurrence(Document): + person = StringField() + animal = LazyReferenceField(Animal) + + Animal.drop_collection() + Ocurrence.drop_collection() + + Ocurrence(person='foo').save() + p = Ocurrence.objects.get() + self.assertIs(p.animal, None) + + def test_lazy_reference_equality(self): + class Animal(Document): + name = StringField() + tag = StringField() + + Animal.drop_collection() + + animal = Animal(name="Leopard", tag="heavy").save() + animalref = LazyReference(Animal, animal.pk) + self.assertEqual(animal, animalref) + self.assertEqual(animalref, animal) + + other_animalref = LazyReference(Animal, ObjectId("54495ad94c934721ede76f90")) + self.assertNotEqual(animal, other_animalref) + self.assertNotEqual(other_animalref, animal) + + if __name__ == '__main__': unittest.main()