Started work on performance

Added an initial benchmark.py
Much more performant than 0.5.2 but still work todo.
This commit is contained in:
Ross Lawley 2011-12-16 12:41:42 +00:00
parent 7614b92197
commit 6d9bfff19c
5 changed files with 274 additions and 81 deletions

182
benchmark.py Normal file
View File

@ -0,0 +1,182 @@
#!/usr/bin/env python
import timeit
def cprofile_main():
from pymongo import Connection
connection = Connection()
connection.drop_database('timeit_test')
connection.disconnect()
from mongoengine import Document, DictField, connect
connect("timeit_test")
class Noddy(Document):
fields = DictField()
for i in xrange(1):
noddy = Noddy()
for j in range(20):
noddy.fields["key" + str(j)] = "value " + str(j)
noddy.save()
def main():
"""
0.4 Performance Figures ...
----------------------------------------------------------------------------------------------------
Creating 10000 dictionaries - Pymongo
1.1141769886
----------------------------------------------------------------------------------------------------
Creating 10000 dictionaries - MongoEngine
2.37724113464
----------------------------------------------------------------------------------------------------
Creating 10000 dictionaries - MongoEngine, safe=False, validate=False
1.92479610443
0.5.X
----------------------------------------------------------------------------------------------------
Creating 10000 dictionaries - Pymongo
1.10552310944
----------------------------------------------------------------------------------------------------
Creating 10000 dictionaries - MongoEngine
16.5169169903
----------------------------------------------------------------------------------------------------
Creating 10000 dictionaries - MongoEngine, safe=False, validate=False
14.9446101189
----------------------------------------------------------------------------------------------------
Creating 10000 dictionaries - MongoEngine, safe=False, validate=False, cascade=False
14.912801981
----------------------------------------------------------------------------------------------------
Creating 10000 dictionaries - MongoEngine, force=True
14.9617750645
Performance
----------------------------------------------------------------------------------------------------
Creating 10000 dictionaries - Pymongo
1.10072994232
----------------------------------------------------------------------------------------------------
Creating 10000 dictionaries - MongoEngine
5.27341103554
----------------------------------------------------------------------------------------------------
Creating 10000 dictionaries - MongoEngine, safe=False, validate=False
4.49365401268
----------------------------------------------------------------------------------------------------
Creating 10000 dictionaries - MongoEngine, safe=False, validate=False, cascade=False
4.43459296227
----------------------------------------------------------------------------------------------------
Creating 10000 dictionaries - MongoEngine, force=True
4.40114378929
"""
setup = """
from pymongo import Connection
connection = Connection()
connection.drop_database('timeit_test')
"""
stmt = """
from pymongo import Connection
connection = Connection()
db = connection.timeit_test
noddy = db.noddy
for i in xrange(10000):
example = {'fields': {}}
for j in range(20):
example['fields']["key"+str(j)] = "value "+str(j)
noddy.insert(example)
myNoddys = noddy.find()
[n for n in myNoddys] # iterate
"""
print "-" * 100
print """Creating 10000 dictionaries - Pymongo"""
t = timeit.Timer(stmt=stmt, setup=setup)
print t.timeit(1)
setup = """
from pymongo import Connection
connection = Connection()
connection.drop_database('timeit_test')
connection.disconnect()
from mongoengine import Document, DictField, connect
connect("timeit_test")
class Noddy(Document):
fields = DictField()
"""
stmt = """
for i in xrange(10000):
noddy = Noddy()
for j in range(20):
noddy.fields["key"+str(j)] = "value "+str(j)
noddy.save()
myNoddys = Noddy.objects()
[n for n in myNoddys] # iterate
"""
print "-" * 100
print """Creating 10000 dictionaries - MongoEngine"""
t = timeit.Timer(stmt=stmt, setup=setup)
print t.timeit(1)
stmt = """
for i in xrange(10000):
noddy = Noddy()
for j in range(20):
noddy.fields["key"+str(j)] = "value "+str(j)
noddy.save(safe=False, validate=False)
myNoddys = Noddy.objects()
[n for n in myNoddys] # iterate
"""
print "-" * 100
print """Creating 10000 dictionaries - MongoEngine, safe=False, validate=False"""
t = timeit.Timer(stmt=stmt, setup=setup)
print t.timeit(1)
stmt = """
for i in xrange(10000):
noddy = Noddy()
for j in range(20):
noddy.fields["key"+str(j)] = "value "+str(j)
noddy.save(safe=False, validate=False, cascade=False)
myNoddys = Noddy.objects()
[n for n in myNoddys] # iterate
"""
print "-" * 100
print """Creating 10000 dictionaries - MongoEngine, safe=False, validate=False, cascade=False"""
t = timeit.Timer(stmt=stmt, setup=setup)
print t.timeit(1)
stmt = """
for i in xrange(10000):
noddy = Noddy()
for j in range(20):
noddy.fields["key"+str(j)] = "value "+str(j)
noddy.save(force_insert=True, safe=False, validate=False, cascade=False)
myNoddys = Noddy.objects()
[n for n in myNoddys] # iterate
"""
print "-" * 100
print """Creating 10000 dictionaries - MongoEngine, force=True"""
t = timeit.Timer(stmt=stmt, setup=setup)
print t.timeit(1)
if __name__ == "__main__":
main()

View File

@ -153,15 +153,6 @@ class BaseField(object):
if callable(value): if callable(value):
value = value() value = value()
# Convert lists / values so we can watch for any changes on them
if isinstance(value, (list, tuple)) and not isinstance(value, BaseList):
observer = DataObserver(instance, self.name)
value = BaseList(value, observer)
instance._data[self.name] = value
elif isinstance(value, dict) and not isinstance(value, BaseDict):
observer = DataObserver(instance, self.name)
value = BaseDict(value, observer)
instance._data[self.name] = value
return value return value
def __set__(self, instance, value): def __set__(self, instance, value):
@ -231,6 +222,7 @@ class ComplexBaseField(BaseField):
""" """
field = None field = None
_dereference = False
def __get__(self, instance, owner): def __get__(self, instance, owner):
"""Descriptor to automatically dereference references. """Descriptor to automatically dereference references.
@ -239,11 +231,39 @@ class ComplexBaseField(BaseField):
# Document class being used rather than a document object # Document class being used rather than a document object
return self return self
from dereference import dereference if not self._dereference and instance._initialised:
instance._data[self.name] = dereference( from dereference import dereference
instance._data.get(self.name), max_depth=1, instance=instance, name=self.name self._dereference = dereference # Cached
) instance._data[self.name] = self._dereference(
return super(ComplexBaseField, self).__get__(instance, owner) instance._data.get(self.name), max_depth=1, instance=instance,
name=self.name
)
value = super(ComplexBaseField, self).__get__(instance, owner)
# Convert lists / values so we can watch for any changes on them
if isinstance(value, (list, tuple)) and not isinstance(value, BaseList):
value = BaseList(value, instance, self.name)
instance._data[self.name] = value
elif isinstance(value, dict) and not isinstance(value, BaseDict):
value = BaseDict(value, instance, self.name)
instance._data[self.name] = value
if self._dereference and instance._initialised and \
isinstance(value, (BaseList, BaseDict)) and not value._dereferenced:
value = self._dereference(
value, max_depth=1, instance=instance, name=self.name
)
value._dereferenced = True
instance._data[self.name] = value
return value
def __set__(self, instance, value):
"""Descriptor for assigning a value to a field in a document.
"""
instance._data[self.name] = value
instance._mark_as_changed(self.name)
def to_python(self, value): def to_python(self, value):
"""Convert a MongoDB-compatible type to a Python type. """Convert a MongoDB-compatible type to a Python type.
@ -727,12 +747,13 @@ class BaseDocument(object):
_dynamic = False _dynamic = False
_created = True _created = True
_dynamic_lock = True
_initialised = False
def __init__(self, **values): def __init__(self, **values):
signals.pre_init.send(self.__class__, document=self, values=values) signals.pre_init.send(self.__class__, document=self, values=values)
self._data = {} self._data = {}
self._initialised = False
# Assign default values to instance # Assign default values to instance
for attr_name, field in self._fields.items(): for attr_name, field in self._fields.items():
@ -754,18 +775,19 @@ class BaseDocument(object):
# Set any get_fieldname_display methods # Set any get_fieldname_display methods
self.__set_field_display() self.__set_field_display()
# Flag initialised
self._initialised = True
if self._dynamic: if self._dynamic:
self._dynamic_lock = False
for key, value in dynamic_data.items(): for key, value in dynamic_data.items():
setattr(self, key, value) setattr(self, key, value)
# Flag initialised
self._initialised = True
signals.post_init.send(self.__class__, document=self) signals.post_init.send(self.__class__, document=self)
def __setattr__(self, name, value): def __setattr__(self, name, value):
# Handle dynamic data only if an initialised dynamic document # Handle dynamic data only if an initialised dynamic document
if self._dynamic and getattr(self, '_initialised', False): if self._dynamic and not self._dynamic_lock:
field = None field = None
if not hasattr(self, name) and not name.startswith('_'): if not hasattr(self, name) and not name.startswith('_'):
@ -825,11 +847,9 @@ class BaseDocument(object):
# Convert lists / values so we can watch for any changes on them # Convert lists / values so we can watch for any changes on them
if isinstance(value, (list, tuple)) and not isinstance(value, BaseList): if isinstance(value, (list, tuple)) and not isinstance(value, BaseList):
observer = DataObserver(self, name) value = BaseList(value, self, name)
value = BaseList(value, observer)
elif isinstance(value, dict) and not isinstance(value, BaseDict): elif isinstance(value, dict) and not isinstance(value, BaseDict):
observer = DataObserver(self, name) value = BaseDict(value, self, name)
value = BaseDict(value, observer)
return value return value
@ -1147,33 +1167,25 @@ class BaseDocument(object):
return hash(self.pk) return hash(self.pk)
class DataObserver(object):
__slots__ = ["instance", "name"]
def __init__(self, instance, name):
self.instance = instance
self.name = name
def updated(self):
if hasattr(self.instance, '_mark_as_changed'):
self.instance._mark_as_changed(self.name)
class BaseList(list): class BaseList(list):
"""A special list so we can watch any changes """A special list so we can watch any changes
""" """
def __init__(self, list_items, observer): _dereferenced = False
self.observer = observer _instance = None
_name = None
def __init__(self, list_items, instance, name):
self._instance = instance
self._name = name
super(BaseList, self).__init__(list_items) super(BaseList, self).__init__(list_items)
def __setitem__(self, *args, **kwargs): def __setitem__(self, *args, **kwargs):
self._updated() self._mark_as_changed()
super(BaseList, self).__setitem__(*args, **kwargs) super(BaseList, self).__setitem__(*args, **kwargs)
def __delitem__(self, *args, **kwargs): def __delitem__(self, *args, **kwargs):
self._updated() self._mark_as_changed()
super(BaseList, self).__delitem__(*args, **kwargs) super(BaseList, self).__delitem__(*args, **kwargs)
def __getstate__(self): def __getstate__(self):
@ -1182,90 +1194,94 @@ class BaseList(list):
def __setstate__(self, state): def __setstate__(self, state):
self = state self = state
return self
def append(self, *args, **kwargs): def append(self, *args, **kwargs):
self._updated() self._mark_as_changed()
return super(BaseList, self).append(*args, **kwargs) return super(BaseList, self).append(*args, **kwargs)
def extend(self, *args, **kwargs): def extend(self, *args, **kwargs):
self._updated() self._mark_as_changed()
return super(BaseList, self).extend(*args, **kwargs) return super(BaseList, self).extend(*args, **kwargs)
def insert(self, *args, **kwargs): def insert(self, *args, **kwargs):
self._updated() self._mark_as_changed()
return super(BaseList, self).insert(*args, **kwargs) return super(BaseList, self).insert(*args, **kwargs)
def pop(self, *args, **kwargs): def pop(self, *args, **kwargs):
self._updated() self._mark_as_changed()
return super(BaseList, self).pop(*args, **kwargs) return super(BaseList, self).pop(*args, **kwargs)
def remove(self, *args, **kwargs): def remove(self, *args, **kwargs):
self._updated() self._mark_as_changed()
return super(BaseList, self).remove(*args, **kwargs) return super(BaseList, self).remove(*args, **kwargs)
def reverse(self, *args, **kwargs): def reverse(self, *args, **kwargs):
self._updated() self._mark_as_changed()
return super(BaseList, self).reverse(*args, **kwargs) return super(BaseList, self).reverse(*args, **kwargs)
def sort(self, *args, **kwargs): def sort(self, *args, **kwargs):
self._updated() self._mark_as_changed()
return super(BaseList, self).sort(*args, **kwargs) return super(BaseList, self).sort(*args, **kwargs)
def _updated(self): def _mark_as_changed(self):
try: if hasattr(self._instance, '_mark_as_changed'):
self.observer.updated() self._instance._mark_as_changed(self._name)
except AttributeError:
pass
class BaseDict(dict): class BaseDict(dict):
"""A special dict so we can watch any changes """A special dict so we can watch any changes
""" """
def __init__(self, dict_items, observer): _dereferenced = False
self.observer = observer _instance = None
_name = None
def __init__(self, dict_items, instance, name):
self._instance = instance
self._name = name
super(BaseDict, self).__init__(dict_items) super(BaseDict, self).__init__(dict_items)
def __setitem__(self, *args, **kwargs): def __setitem__(self, *args, **kwargs):
self._updated() self._mark_as_changed()
super(BaseDict, self).__setitem__(*args, **kwargs) super(BaseDict, self).__setitem__(*args, **kwargs)
def __delete__(self, *args, **kwargs): def __delete__(self, *args, **kwargs):
self._updated() self._mark_as_changed()
super(BaseDict, self).__delete__(*args, **kwargs) super(BaseDict, self).__delete__(*args, **kwargs)
def __delitem__(self, *args, **kwargs): def __delitem__(self, *args, **kwargs):
self._updated() self._mark_as_changed()
super(BaseDict, self).__delitem__(*args, **kwargs) super(BaseDict, self).__delitem__(*args, **kwargs)
def __delattr__(self, *args, **kwargs): def __delattr__(self, *args, **kwargs):
self._updated() self._mark_as_changed()
super(BaseDict, self).__delattr__(*args, **kwargs) super(BaseDict, self).__delattr__(*args, **kwargs)
def __getstate__(self): def __getstate__(self):
self.observer = None self.instance = None
self._dereferenced = False
return self return self
def __setstate__(self, state): def __setstate__(self, state):
self = state self = state
return self
def clear(self, *args, **kwargs): def clear(self, *args, **kwargs):
self._updated() self._mark_as_changed()
super(BaseDict, self).clear(*args, **kwargs) super(BaseDict, self).clear(*args, **kwargs)
def pop(self, *args, **kwargs): def pop(self, *args, **kwargs):
self._updated() self._mark_as_changed()
super(BaseDict, self).pop(*args, **kwargs) super(BaseDict, self).pop(*args, **kwargs)
def popitem(self, *args, **kwargs): def popitem(self, *args, **kwargs):
self._updated() self._mark_as_changed()
super(BaseDict, self).popitem(*args, **kwargs) super(BaseDict, self).popitem(*args, **kwargs)
def _updated(self): def _mark_as_changed(self):
try: if hasattr(self._instance, '_mark_as_changed'):
self.observer.updated() self._instance._mark_as_changed(self._name)
except AttributeError:
pass
if sys.version_info < (2, 5): if sys.version_info < (2, 5):
# Prior to Python 2.5, Exception was an old-style class # Prior to Python 2.5, Exception was an old-style class

View File

@ -1,7 +1,6 @@
import pymongo import pymongo
from base import (BaseDict, BaseList, DataObserver, from base import (BaseDict, BaseList, TopLevelDocumentMetaclass, get_document)
TopLevelDocumentMetaclass, get_document)
from fields import ReferenceField from fields import ReferenceField
from connection import get_db from connection import get_db
from queryset import QuerySet from queryset import QuerySet
@ -134,11 +133,10 @@ class DeReference(object):
return items return items
if instance: if instance:
observer = DataObserver(instance, name)
if isinstance(items, dict): if isinstance(items, dict):
return BaseDict(items, observer) return BaseDict(items, instance, name)
else: else:
return BaseList(items, observer) return BaseList(items, instance, name)
if isinstance(items, (dict, pymongo.son.SON)): if isinstance(items, (dict, pymongo.son.SON)):
if '_ref' in items: if '_ref' in items:
@ -183,10 +181,9 @@ class DeReference(object):
data[k] = self.object_map.get(v.id, v) data[k] = self.object_map.get(v.id, v)
if instance and name: if instance and name:
observer = DataObserver(instance, name)
if is_list: if is_list:
return BaseList(data, observer) return BaseList(data, instance, name)
return BaseDict(data, observer) return BaseDict(data, instance, name)
depth += 1 depth += 1
return data return data

View File

@ -1,6 +1,6 @@
from mongoengine import signals from mongoengine import signals
from base import (DocumentMetaclass, TopLevelDocumentMetaclass, BaseDocument, from base import (DocumentMetaclass, TopLevelDocumentMetaclass, BaseDocument,
BaseDict, BaseList, DataObserver) BaseDict, BaseList)
from queryset import OperationError from queryset import OperationError
from connection import get_db, DEFAULT_CONNECTION_NAME from connection import get_db, DEFAULT_CONNECTION_NAME
@ -304,12 +304,10 @@ class Document(BaseDocument):
""" """
if isinstance(value, BaseDict): if isinstance(value, BaseDict):
value = [(k, self._reload(k, v)) for k, v in value.items()] value = [(k, self._reload(k, v)) for k, v in value.items()]
observer = DataObserver(self, key) value = BaseDict(value, self, key)
value = BaseDict(value, observer)
elif isinstance(value, BaseList): elif isinstance(value, BaseList):
value = [self._reload(key, v) for v in value] value = [self._reload(key, v) for v in value]
observer = DataObserver(self, key) value = BaseList(value, self, key)
value = BaseList(value, observer)
elif isinstance(value, (EmbeddedDocument, DynamicEmbeddedDocument)): elif isinstance(value, (EmbeddedDocument, DynamicEmbeddedDocument)):
value._changed_fields = [] value._changed_fields = []
return value return value

View File

@ -11,7 +11,7 @@ import uuid
from base import (BaseField, ComplexBaseField, ObjectIdField, from base import (BaseField, ComplexBaseField, ObjectIdField,
ValidationError, get_document) ValidationError, get_document)
from queryset import DO_NOTHING from queryset import DO_NOTHING, QuerySet
from document import Document, EmbeddedDocument from document import Document, EmbeddedDocument
from connection import get_db, DEFAULT_CONNECTION_NAME from connection import get_db, DEFAULT_CONNECTION_NAME
from operator import itemgetter from operator import itemgetter
@ -491,7 +491,7 @@ class ListField(ComplexBaseField):
def validate(self, value): def validate(self, value):
"""Make sure that a list of valid fields is being used. """Make sure that a list of valid fields is being used.
""" """
if (not isinstance(value, (list, tuple)) or if (not isinstance(value, (list, tuple, QuerySet)) or
isinstance(value, basestring)): isinstance(value, basestring)):
self.error('Only lists and tuples may be used in a list field') self.error('Only lists and tuples may be used in a list field')
super(ListField, self).validate(value) super(ListField, self).validate(value)