Merge pull request #947 from YoApp/optimization_issue888

Major dereferencing optimizations and fix for de-pickling outdated documents [migrated 921]
This commit is contained in:
Omer Katz 2015-04-09 10:06:12 +03:00
commit d600ade40c
6 changed files with 50 additions and 9 deletions

View File

@ -218,3 +218,4 @@ that much better:
* Matthew Ellison (https://github.com/seglberg) * Matthew Ellison (https://github.com/seglberg)
* Jimmy Shen (https://github.com/jimmyshen) * Jimmy Shen (https://github.com/jimmyshen)
* J. Fernando Sánchez (https://github.com/balkian) * J. Fernando Sánchez (https://github.com/balkian)
* Michael Chase (https://github.com/rxsegrxup)

View File

@ -7,6 +7,8 @@ Changes in 0.9.X - DEV
====================== ======================
- ListField of embedded docs doesn't set the _instance attribute when iterating over it #914 - ListField of embedded docs doesn't set the _instance attribute when iterating over it #914
- Support += and *= for ListField #595 - Support += and *= for ListField #595
- Use sets for populating dbrefs to dereference
- Fixed unpickled documents replacing the global field's list. #888
Changes in 0.9.0 Changes in 0.9.0
================ ================

View File

@ -206,7 +206,12 @@ class BaseDocument(object):
if k in data: if k in data:
setattr(self, k, data[k]) setattr(self, k, data[k])
if '_fields_ordered' in data: if '_fields_ordered' in data:
setattr(type(self), '_fields_ordered', data['_fields_ordered']) if self._dynamic:
setattr(self, '_fields_ordered', data['_fields_ordered'])
else:
_super_fields_ordered = type(self)._fields_ordered
setattr(self, '_fields_ordered', _super_fields_ordered)
dynamic_fields = data.get('_dynamic_fields') or SON() dynamic_fields = data.get('_dynamic_fields') or SON()
for k in dynamic_fields.keys(): for k in dynamic_fields.keys():
setattr(self, k, data["_data"].get(k)) setattr(self, k, data["_data"].get(k))

View File

@ -102,24 +102,24 @@ class DeReference(object):
for field_name, field in item._fields.iteritems(): for field_name, field in item._fields.iteritems():
v = item._data.get(field_name, None) v = item._data.get(field_name, None)
if isinstance(v, (DBRef)): if isinstance(v, (DBRef)):
reference_map.setdefault(field.document_type, []).append(v.id) reference_map.setdefault(field.document_type, set()).add(v.id)
elif isinstance(v, (dict, SON)) and '_ref' in v: elif isinstance(v, (dict, SON)) and '_ref' in v:
reference_map.setdefault(get_document(v['_cls']), []).append(v['_ref'].id) reference_map.setdefault(get_document(v['_cls']), set()).add(v['_ref'].id)
elif isinstance(v, (dict, list, tuple)) and depth <= self.max_depth: elif isinstance(v, (dict, list, tuple)) and depth <= self.max_depth:
field_cls = getattr(getattr(field, 'field', None), 'document_type', None) field_cls = getattr(getattr(field, 'field', None), 'document_type', None)
references = self._find_references(v, depth) references = self._find_references(v, depth)
for key, refs in references.iteritems(): for key, refs in references.iteritems():
if isinstance(field_cls, (Document, TopLevelDocumentMetaclass)): if isinstance(field_cls, (Document, TopLevelDocumentMetaclass)):
key = field_cls key = field_cls
reference_map.setdefault(key, []).extend(refs) reference_map.setdefault(key, set()).update(refs)
elif isinstance(item, (DBRef)): elif isinstance(item, (DBRef)):
reference_map.setdefault(item.collection, []).append(item.id) reference_map.setdefault(item.collection, set()).add(item.id)
elif isinstance(item, (dict, SON)) and '_ref' in item: elif isinstance(item, (dict, SON)) and '_ref' in item:
reference_map.setdefault(get_document(item['_cls']), []).append(item['_ref'].id) reference_map.setdefault(get_document(item['_cls']), set()).add(item['_ref'].id)
elif isinstance(item, (dict, list, tuple)) and depth - 1 <= self.max_depth: elif isinstance(item, (dict, list, tuple)) and depth - 1 <= self.max_depth:
references = self._find_references(item, depth - 1) references = self._find_references(item, depth - 1)
for key, refs in references.iteritems(): for key, refs in references.iteritems():
reference_map.setdefault(key, []).extend(refs) reference_map.setdefault(key, set()).update(refs)
return reference_map return reference_map
@ -128,8 +128,8 @@ class DeReference(object):
""" """
object_map = {} object_map = {}
for collection, dbrefs in self.reference_map.iteritems(): for collection, dbrefs in self.reference_map.iteritems():
keys = object_map.keys() refs = [dbref for dbref in dbrefs
refs = list(set([dbref for dbref in dbrefs if unicode(dbref).encode('utf-8') not in keys])) if unicode(dbref).encode('utf-8') not in object_map]
if hasattr(collection, 'objects'): # We have a document class for the refs if hasattr(collection, 'objects'): # We have a document class for the refs
references = collection.objects.in_bulk(refs) references = collection.objects.in_bulk(refs)
for key, doc in references.iteritems(): for key, doc in references.iteritems():

View File

@ -10,6 +10,7 @@ import uuid
from datetime import datetime from datetime import datetime
from bson import DBRef, ObjectId from bson import DBRef, ObjectId
from tests import fixtures
from tests.fixtures import (PickleEmbedded, PickleTest, PickleSignalsTest, from tests.fixtures import (PickleEmbedded, PickleTest, PickleSignalsTest,
PickleDyanmicEmbedded, PickleDynamicTest) PickleDyanmicEmbedded, PickleDynamicTest)
@ -2085,6 +2086,29 @@ class InstanceTest(unittest.TestCase):
self.assertEqual(pickle_doc.string, "Two") self.assertEqual(pickle_doc.string, "Two")
self.assertEqual(pickle_doc.lists, ["1", "2", "3"]) self.assertEqual(pickle_doc.lists, ["1", "2", "3"])
def test_regular_document_pickle(self):
pickle_doc = PickleTest(number=1, string="One", lists=['1', '2'])
pickled_doc = pickle.dumps(pickle_doc) # make sure pickling works even before the doc is saved
pickle_doc.save()
pickled_doc = pickle.dumps(pickle_doc)
# Test that when a document's definition changes the new
# definition is used
fixtures.PickleTest = fixtures.NewDocumentPickleTest
resurrected = pickle.loads(pickled_doc)
self.assertEqual(resurrected.__class__,
fixtures.NewDocumentPickleTest)
self.assertEqual(resurrected._fields_ordered,
fixtures.NewDocumentPickleTest._fields_ordered)
self.assertNotEqual(resurrected._fields_ordered,
pickle_doc._fields_ordered)
# The local PickleTest is still a ref to the original
fixtures.PickleTest = PickleTest
def test_dynamic_document_pickle(self): def test_dynamic_document_pickle(self):
pickle_doc = PickleDynamicTest( pickle_doc = PickleDynamicTest(

View File

@ -17,6 +17,15 @@ class PickleTest(Document):
photo = FileField() photo = FileField()
class NewDocumentPickleTest(Document):
number = IntField()
string = StringField(choices=(('One', '1'), ('Two', '2')))
embedded = EmbeddedDocumentField(PickleEmbedded)
lists = ListField(StringField())
photo = FileField()
new_field = StringField()
class PickleDyanmicEmbedded(DynamicEmbeddedDocument): class PickleDyanmicEmbedded(DynamicEmbeddedDocument):
date = DateTimeField(default=datetime.now) date = DateTimeField(default=datetime.now)