Add LazyReferenceField

This commit is contained in:
Emmanuel Leblond
2017-10-30 18:15:51 +01:00
parent a1494c4c93
commit e6c0280b40
5 changed files with 456 additions and 9 deletions

View File

@@ -2,9 +2,9 @@
Changelog Changelog
========= =========
Development Changes in 0.15.0
=========== =================
- (Fill this out as you fix issues and develop your features). - Add LazyReferenceField to address #1230
Changes in 0.14.1 Changes in 0.14.1
================= =================

View File

@@ -15,7 +15,7 @@ __all__ = (
'UPDATE_OPERATORS', '_document_registry', 'get_document', 'UPDATE_OPERATORS', '_document_registry', 'get_document',
# datastructures # datastructures
'BaseDict', 'BaseList', 'EmbeddedDocumentList', 'BaseDict', 'BaseList', 'EmbeddedDocumentList', 'LazyReference',
# document # document
'BaseDocument', 'BaseDocument',

View File

@@ -2,11 +2,12 @@ import itertools
import weakref import weakref
import six import six
from bson import DBRef
from mongoengine.common import _import_class from mongoengine.common import _import_class
from mongoengine.errors import DoesNotExist, MultipleObjectsReturned from mongoengine.errors import DoesNotExist, MultipleObjectsReturned
__all__ = ('BaseDict', 'BaseList', 'EmbeddedDocumentList') __all__ = ('BaseDict', 'BaseList', 'EmbeddedDocumentList', 'LazyReference')
class BaseDict(dict): class BaseDict(dict):
@@ -445,3 +446,42 @@ class StrictDict(object):
cls._classes[allowed_keys] = SpecificStrictDict cls._classes[allowed_keys] = SpecificStrictDict
return cls._classes[allowed_keys] return cls._classes[allowed_keys]
class LazyReference(DBRef):
__slots__ = ('_cached_doc', 'passthrough', 'document_type')
def fetch(self, force=False):
if not self._cached_doc or force:
self._cached_doc = self.document_type.objects.get(pk=self.pk)
if not self._cached_doc:
raise DoesNotExist('Trying to dereference unknown document %s' % (self))
return self._cached_doc
@property
def pk(self):
return self.id
def __init__(self, document_type, pk, cached_doc=None, passthrough=False):
self.document_type = document_type
self._cached_doc = cached_doc
self.passthrough = passthrough
super(LazyReference, self).__init__(self.document_type._get_collection_name(), pk)
def __getitem__(self, name):
if not self.passthrough:
raise KeyError()
document = self.fetch()
return document[name]
def __getattr__(self, name):
if not object.__getattribute__(self, 'passthrough'):
raise AttributeError()
document = self.fetch()
try:
return document[name]
except KeyError:
raise AttributeError()
def __repr__(self):
return "<LazyReference(%s, %r)>" % (self.document_type, self.pk)

View File

@@ -26,7 +26,8 @@ except ImportError:
Int64 = long Int64 = long
from mongoengine.base import (BaseDocument, BaseField, ComplexBaseField, from mongoengine.base import (BaseDocument, BaseField, ComplexBaseField,
GeoJsonBaseField, ObjectIdField, get_document) GeoJsonBaseField, ObjectIdField, get_document,
LazyReference)
from mongoengine.connection import DEFAULT_CONNECTION_NAME, get_db from mongoengine.connection import DEFAULT_CONNECTION_NAME, get_db
from mongoengine.document import Document, EmbeddedDocument from mongoengine.document import Document, EmbeddedDocument
from mongoengine.errors import DoesNotExist, InvalidQueryError, ValidationError from mongoengine.errors import DoesNotExist, InvalidQueryError, ValidationError
@@ -46,6 +47,8 @@ __all__ = (
'GenericEmbeddedDocumentField', 'DynamicField', 'ListField', 'GenericEmbeddedDocumentField', 'DynamicField', 'ListField',
'SortedListField', 'EmbeddedDocumentListField', 'DictField', 'SortedListField', 'EmbeddedDocumentListField', 'DictField',
'MapField', 'ReferenceField', 'CachedReferenceField', 'MapField', 'ReferenceField', 'CachedReferenceField',
'LazyReferenceField',
# 'GenericLazyReferenceField',
'GenericReferenceField', 'BinaryField', 'GridFSError', 'GridFSProxy', 'GenericReferenceField', 'BinaryField', 'GridFSError', 'GridFSProxy',
'FileField', 'ImageGridFsProxy', 'ImproperlyConfigured', 'ImageField', 'FileField', 'ImageGridFsProxy', 'ImproperlyConfigured', 'ImageField',
'GeoPointField', 'PointField', 'LineStringField', 'PolygonField', 'GeoPointField', 'PointField', 'LineStringField', 'PolygonField',
@@ -953,6 +956,15 @@ class ReferenceField(BaseField):
"""A reference to a document that will be automatically dereferenced on """A reference to a document that will be automatically dereferenced on
access (lazily). access (lazily).
Note this means you will get a database I/O access everytime you access
this field. This is necessary because the field returns a :class:`~mongoengine.Document`
which precise type can depend of the value of the `_cls` field present in the
document in database.
In short, using this type of field can lead to poor performances (especially
if you access this field only to retrieve it `pk` field which is already
known before dereference). To solve this you should consider using the
:class:`~mongoengine.fields.LazyReferenceField`.
Use the `reverse_delete_rule` to handle what should happen if the document Use the `reverse_delete_rule` to handle what should happen if the document
the field is referencing is deleted. EmbeddedDocuments, DictFields and the field is referencing is deleted. EmbeddedDocuments, DictFields and
MapFields does not support reverse_delete_rule and an `InvalidDocumentError` MapFields does not support reverse_delete_rule and an `InvalidDocumentError`
@@ -1087,8 +1099,8 @@ class ReferenceField(BaseField):
def validate(self, value): def validate(self, value):
if not isinstance(value, (self.document_type, DBRef, ObjectId)): if not isinstance(value, (self.document_type, LazyReference, DBRef, ObjectId)):
self.error('A ReferenceField only accepts DBRef, ObjectId or documents') self.error('A ReferenceField only accepts DBRef, LazyReference, ObjectId or documents')
if isinstance(value, Document) and value.id is None: if isinstance(value, Document) and value.id is None:
self.error('You can only reference documents once they have been ' self.error('You can only reference documents once they have been '
@@ -2141,3 +2153,124 @@ class MultiPolygonField(GeoJsonBaseField):
.. versionadded:: 0.9 .. versionadded:: 0.9
""" """
_type = 'MultiPolygon' _type = 'MultiPolygon'
class LazyReferenceField(BaseField):
"""A really lazy reference to a document.
Unlike the :class:`~mongoengine.fields.ReferenceField` it must be manually
dereferenced using it ``fetch()`` method.
"""
def __init__(self, document_type, passthrough=False, dbref=False,
reverse_delete_rule=DO_NOTHING, **kwargs):
"""Initialises the Reference Field.
:param dbref: Store the reference as :class:`~pymongo.dbref.DBRef`
or as the :class:`~pymongo.objectid.ObjectId`.id .
:param reverse_delete_rule: Determines what to do when the referring
object is deleted
:param passthrough: When trying to access unknown fields, the
:class:`~mongoengine.base.datastructure.LazyReference` instance will
automatically call `fetch()` and try to retrive the field on the fetched
document. Note this only work getting field (not setting or deleting).
"""
if (
not isinstance(document_type, six.string_types) and
not issubclass(document_type, Document)
):
self.error('Argument to LazyReferenceField constructor must be a '
'document class or a string')
self.dbref = dbref
self.passthrough = passthrough
self.document_type_obj = document_type
self.reverse_delete_rule = reverse_delete_rule
super(LazyReferenceField, self).__init__(**kwargs)
@property
def document_type(self):
if isinstance(self.document_type_obj, six.string_types):
if self.document_type_obj == RECURSIVE_REFERENCE_CONSTANT:
self.document_type_obj = self.owner_document
else:
self.document_type_obj = get_document(self.document_type_obj)
return self.document_type_obj
def __get__(self, instance, owner):
"""Descriptor to allow lazy dereferencing."""
if instance is None:
# Document class being used rather than a document object
return self
value = instance._data.get(self.name)
if isinstance(value, LazyReference):
if value.passthrough != self.passthrough:
instance._data[self.name] = LazyReference(
value.document_type, value.pk, passthrough=self.passthrough)
elif value is not None:
if isinstance(value, self.document_type):
value = LazyReference(self.document_type, value.pk, passthrough=self.passthrough)
elif isinstance(value, DBRef):
value = LazyReference(self.document_type, value.id, passthrough=self.passthrough)
else:
# value is the primary key of the referenced document
value = LazyReference(self.document_type, value, passthrough=self.passthrough)
instance._data[self.name] = value
return super(LazyReferenceField, self).__get__(instance, owner)
def to_mongo(self, value):
if isinstance(value, LazyReference):
pk = value.pk
elif isinstance(value, self.document_type):
pk = value.pk
elif isinstance(value, DBRef):
pk = value.id
else:
# value is the primary key of the referenced document
pk = value
id_field_name = self.document_type._meta['id_field']
id_field = self.document_type._fields[id_field_name]
pk = id_field.to_mongo(pk)
if self.dbref:
return DBRef(self.document_type._get_collection_name(), pk)
else:
return pk
def validate(self, value):
if isinstance(value, LazyReference):
if not issubclass(value.document_type, self.document_type):
self.error('Reference must be on a `%s` document.' % self.document_type)
pk = value.pk
elif isinstance(value, self.document_type):
pk = value.pk
elif isinstance(value, DBRef):
# TODO: check collection ?
collection = self.document_type._get_collection_name()
if value.collection != collection:
self.error("DBRef on bad collection (must be on `%s`)" % collection)
pk = value.id
else:
# value is the primary key of the referenced document
id_field_name = self.document_type._meta['id_field']
id_field = getattr(self.document_type, id_field_name)
pk = value
try:
id_field.validate(pk)
except ValidationError:
self.error("value should be `{0}` document, LazyReference or DBRef on `{0}` "
"or `{0}`'s primary key (i.e. `{1}`)".format(
self.document_type.__name__, type(id_field).__name__))
if pk is None:
self.error('You can only reference documents once they have been '
'saved to the database')
def prepare_query_value(self, op, value):
if value is None:
return None
super(LazyReferenceField, self).prepare_query_value(op, value)
return self.to_mongo(value)
def lookup_member(self, member_name):
return self.document_type._fields.get(member_name)

View File

@@ -26,7 +26,7 @@ except ImportError:
from mongoengine import * from mongoengine import *
from mongoengine.connection import get_db from mongoengine.connection import get_db
from mongoengine.base import (BaseDict, BaseField, EmbeddedDocumentList, from mongoengine.base import (BaseDict, BaseField, EmbeddedDocumentList,
_document_registry) _document_registry, LazyReference)
from tests.utils import MongoDBTestCase from tests.utils import MongoDBTestCase
@@ -931,7 +931,9 @@ class FieldTest(MongoDBTestCase):
comments = ListField(EmbeddedDocumentField(Comment)) comments = ListField(EmbeddedDocumentField(Comment))
tags = ListField(StringField()) tags = ListField(StringField())
authors = ListField(ReferenceField(User)) authors = ListField(ReferenceField(User))
authors_as_lazy = ListField(LazyReferenceField(User))
generic = ListField(GenericReferenceField()) generic = ListField(GenericReferenceField())
# generic_as_lazy = ListField(LazyGenericReferenceField())
User.drop_collection() User.drop_collection()
BlogPost.drop_collection() BlogPost.drop_collection()
@@ -969,6 +971,15 @@ class FieldTest(MongoDBTestCase):
post.authors = [user] post.authors = [user]
post.validate() post.validate()
post.authors_as_lazy = [Comment()]
self.assertRaises(ValidationError, post.validate)
post.authors_as_lazy = [User()]
self.assertRaises(ValidationError, post.validate)
post.authors_as_lazy = [user]
post.validate()
post.generic = [1, 2] post.generic = [1, 2]
self.assertRaises(ValidationError, post.validate) self.assertRaises(ValidationError, post.validate)
@@ -981,6 +992,18 @@ class FieldTest(MongoDBTestCase):
post.generic = [user] post.generic = [user]
post.validate() post.validate()
# post.generic_as_lazy = [1, 2]
# self.assertRaises(ValidationError, post.validate)
# post.generic_as_lazy = [User(), Comment()]
# self.assertRaises(ValidationError, post.validate)
# post.generic_as_lazy = [Comment()]
# self.assertRaises(ValidationError, post.validate)
# post.generic_as_lazy = [user]
# post.validate()
def test_sorted_list_sorting(self): def test_sorted_list_sorting(self):
"""Ensure that a sorted list field properly sorts values. """Ensure that a sorted list field properly sorts values.
""" """
@@ -4598,5 +4621,256 @@ class CachedReferenceFieldTest(MongoDBTestCase):
self.assertTrue(isinstance(ocorrence.animal, Animal)) self.assertTrue(isinstance(ocorrence.animal, Animal))
class LazyReferenceFieldTest(MongoDBTestCase):
def test_lazy_reference_config(self):
# Make sure ReferenceField only accepts a document class or a string
# with a document class name.
self.assertRaises(ValidationError, LazyReferenceField, EmbeddedDocument)
def test_lazy_reference_simple(self):
class Animal(Document):
name = StringField()
tag = StringField()
class Ocurrence(Document):
person = StringField()
animal = LazyReferenceField(Animal)
Animal.drop_collection()
Ocurrence.drop_collection()
animal = Animal(name="Leopard", tag="heavy").save()
Ocurrence(person="test", animal=animal).save()
p = Ocurrence.objects.get()
self.assertIsInstance(p.animal, LazyReference)
fetched_animal = p.animal.fetch()
self.assertEqual(fetched_animal, animal)
# `fetch` keep cache on referenced document by default...
animal.tag = "not so heavy"
animal.save()
double_fetch = p.animal.fetch()
self.assertIs(fetched_animal, double_fetch)
self.assertEqual(double_fetch.tag, "heavy")
# ...unless specified otherwise
fetch_force = p.animal.fetch(force=True)
self.assertIsNot(fetch_force, fetched_animal)
self.assertEqual(fetch_force.tag, "not so heavy")
def test_lazy_reference_fetch_invalid_ref(self):
class Animal(Document):
name = StringField()
tag = StringField()
class Ocurrence(Document):
person = StringField()
animal = LazyReferenceField(Animal)
Animal.drop_collection()
Ocurrence.drop_collection()
animal = Animal(name="Leopard", tag="heavy").save()
Ocurrence(person="test", animal=animal).save()
animal.delete()
p = Ocurrence.objects.get()
self.assertIsInstance(p.animal, LazyReference)
with self.assertRaises(DoesNotExist):
p.animal.fetch()
def test_lazy_reference_set(self):
class Animal(Document):
meta = {'allow_inheritance': True}
name = StringField()
tag = StringField()
class Ocurrence(Document):
person = StringField()
animal = LazyReferenceField(Animal)
Animal.drop_collection()
Ocurrence.drop_collection()
class SubAnimal(Animal):
nick = StringField()
animal = Animal(name="Leopard", tag="heavy").save()
sub_animal = SubAnimal(nick='doggo', name='dog').save()
for ref in (
animal,
animal.pk,
DBRef(animal._get_collection_name(), animal.pk),
LazyReference(Animal, animal.pk),
sub_animal,
sub_animal.pk,
DBRef(sub_animal._get_collection_name(), sub_animal.pk),
LazyReference(SubAnimal, sub_animal.pk),
):
p = Ocurrence(person="test", animal=ref).save()
p.reload()
self.assertIsInstance(p.animal, LazyReference)
p.animal.fetch()
def test_lazy_reference_bad_set(self):
class Animal(Document):
name = StringField()
tag = StringField()
class Ocurrence(Document):
person = StringField()
animal = LazyReferenceField(Animal)
Animal.drop_collection()
Ocurrence.drop_collection()
class BadDoc(Document):
pass
animal = Animal(name="Leopard", tag="heavy").save()
baddoc = BadDoc().save()
for bad in (
42,
'foo',
baddoc,
DBRef(baddoc._get_collection_name(), animal.pk),
LazyReference(BadDoc, animal.pk)
):
with self.assertRaises(ValidationError):
p = Ocurrence(person="test", animal=bad).save()
def test_lazy_reference_query_conversion(self):
"""Ensure that LazyReferenceFields can be queried using objects and values
of the type of the primary key of the referenced object.
"""
class Member(Document):
user_num = IntField(primary_key=True)
class BlogPost(Document):
title = StringField()
author = LazyReferenceField(Member, dbref=False)
Member.drop_collection()
BlogPost.drop_collection()
m1 = Member(user_num=1)
m1.save()
m2 = Member(user_num=2)
m2.save()
post1 = BlogPost(title='post 1', author=m1)
post1.save()
post2 = BlogPost(title='post 2', author=m2)
post2.save()
post = BlogPost.objects(author=m1).first()
self.assertEqual(post.id, post1.id)
post = BlogPost.objects(author=m2).first()
self.assertEqual(post.id, post2.id)
# Same thing by passing a LazyReference instance
post = BlogPost.objects(author=LazyReference(Member, m2.pk)).first()
self.assertEqual(post.id, post2.id)
def test_lazy_reference_query_conversion_dbref(self):
"""Ensure that LazyReferenceFields can be queried using objects and values
of the type of the primary key of the referenced object.
"""
class Member(Document):
user_num = IntField(primary_key=True)
class BlogPost(Document):
title = StringField()
author = LazyReferenceField(Member, dbref=True)
Member.drop_collection()
BlogPost.drop_collection()
m1 = Member(user_num=1)
m1.save()
m2 = Member(user_num=2)
m2.save()
post1 = BlogPost(title='post 1', author=m1)
post1.save()
post2 = BlogPost(title='post 2', author=m2)
post2.save()
post = BlogPost.objects(author=m1).first()
self.assertEqual(post.id, post1.id)
post = BlogPost.objects(author=m2).first()
self.assertEqual(post.id, post2.id)
# Same thing by passing a LazyReference instance
post = BlogPost.objects(author=LazyReference(Member, m2.pk)).first()
self.assertEqual(post.id, post2.id)
def test_lazy_reference_passthrough(self):
class Animal(Document):
name = StringField()
tag = StringField()
class Ocurrence(Document):
animal = LazyReferenceField(Animal, passthrough=False)
animal_passthrough = LazyReferenceField(Animal, passthrough=True)
Animal.drop_collection()
Ocurrence.drop_collection()
animal = Animal(name="Leopard", tag="heavy").save()
Ocurrence(animal=animal, animal_passthrough=animal).save()
p = Ocurrence.objects.get()
self.assertIsInstance(p.animal, LazyReference)
with self.assertRaises(KeyError):
p.animal['name']
with self.assertRaises(AttributeError):
p.animal.name
self.assertEqual(p.animal.pk, animal.pk)
self.assertEqual(p.animal_passthrough.name, "Leopard")
self.assertEqual(p.animal_passthrough['name'], "Leopard")
# Should not be able to access referenced document's methods
with self.assertRaises(AttributeError):
p.animal.save
with self.assertRaises(KeyError):
p.animal['save']
def test_lazy_reference_not_set(self):
class Animal(Document):
name = StringField()
tag = StringField()
class Ocurrence(Document):
person = StringField()
animal = LazyReferenceField(Animal)
Animal.drop_collection()
Ocurrence.drop_collection()
Ocurrence(person='foo').save()
p = Ocurrence.objects.get()
self.assertIs(p.animal, None)
def test_lazy_reference_equality(self):
class Animal(Document):
name = StringField()
tag = StringField()
Animal.drop_collection()
animal = Animal(name="Leopard", tag="heavy").save()
animalref = LazyReference(Animal, animal.pk)
self.assertEqual(animal, animalref)
self.assertEqual(animalref, animal)
other_animalref = LazyReference(Animal, ObjectId("54495ad94c934721ede76f90"))
self.assertNotEqual(animal, other_animalref)
self.assertNotEqual(other_animalref, animal)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()