Started work on performance

Added an initial benchmark.py
Much more performant than 0.5.2 but still work todo.
This commit is contained in:
Ross Lawley 2011-12-16 12:41:42 +00:00
parent 7614b92197
commit 6d9bfff19c
5 changed files with 274 additions and 81 deletions

182
benchmark.py Normal file
View File

@ -0,0 +1,182 @@
#!/usr/bin/env python
import timeit
def cprofile_main():
from pymongo import Connection
connection = Connection()
connection.drop_database('timeit_test')
connection.disconnect()
from mongoengine import Document, DictField, connect
connect("timeit_test")
class Noddy(Document):
fields = DictField()
for i in xrange(1):
noddy = Noddy()
for j in range(20):
noddy.fields["key" + str(j)] = "value " + str(j)
noddy.save()
def main():
"""
0.4 Performance Figures ...
----------------------------------------------------------------------------------------------------
Creating 10000 dictionaries - Pymongo
1.1141769886
----------------------------------------------------------------------------------------------------
Creating 10000 dictionaries - MongoEngine
2.37724113464
----------------------------------------------------------------------------------------------------
Creating 10000 dictionaries - MongoEngine, safe=False, validate=False
1.92479610443
0.5.X
----------------------------------------------------------------------------------------------------
Creating 10000 dictionaries - Pymongo
1.10552310944
----------------------------------------------------------------------------------------------------
Creating 10000 dictionaries - MongoEngine
16.5169169903
----------------------------------------------------------------------------------------------------
Creating 10000 dictionaries - MongoEngine, safe=False, validate=False
14.9446101189
----------------------------------------------------------------------------------------------------
Creating 10000 dictionaries - MongoEngine, safe=False, validate=False, cascade=False
14.912801981
----------------------------------------------------------------------------------------------------
Creating 10000 dictionaries - MongoEngine, force=True
14.9617750645
Performance
----------------------------------------------------------------------------------------------------
Creating 10000 dictionaries - Pymongo
1.10072994232
----------------------------------------------------------------------------------------------------
Creating 10000 dictionaries - MongoEngine
5.27341103554
----------------------------------------------------------------------------------------------------
Creating 10000 dictionaries - MongoEngine, safe=False, validate=False
4.49365401268
----------------------------------------------------------------------------------------------------
Creating 10000 dictionaries - MongoEngine, safe=False, validate=False, cascade=False
4.43459296227
----------------------------------------------------------------------------------------------------
Creating 10000 dictionaries - MongoEngine, force=True
4.40114378929
"""
setup = """
from pymongo import Connection
connection = Connection()
connection.drop_database('timeit_test')
"""
stmt = """
from pymongo import Connection
connection = Connection()
db = connection.timeit_test
noddy = db.noddy
for i in xrange(10000):
example = {'fields': {}}
for j in range(20):
example['fields']["key"+str(j)] = "value "+str(j)
noddy.insert(example)
myNoddys = noddy.find()
[n for n in myNoddys] # iterate
"""
print "-" * 100
print """Creating 10000 dictionaries - Pymongo"""
t = timeit.Timer(stmt=stmt, setup=setup)
print t.timeit(1)
setup = """
from pymongo import Connection
connection = Connection()
connection.drop_database('timeit_test')
connection.disconnect()
from mongoengine import Document, DictField, connect
connect("timeit_test")
class Noddy(Document):
fields = DictField()
"""
stmt = """
for i in xrange(10000):
noddy = Noddy()
for j in range(20):
noddy.fields["key"+str(j)] = "value "+str(j)
noddy.save()
myNoddys = Noddy.objects()
[n for n in myNoddys] # iterate
"""
print "-" * 100
print """Creating 10000 dictionaries - MongoEngine"""
t = timeit.Timer(stmt=stmt, setup=setup)
print t.timeit(1)
stmt = """
for i in xrange(10000):
noddy = Noddy()
for j in range(20):
noddy.fields["key"+str(j)] = "value "+str(j)
noddy.save(safe=False, validate=False)
myNoddys = Noddy.objects()
[n for n in myNoddys] # iterate
"""
print "-" * 100
print """Creating 10000 dictionaries - MongoEngine, safe=False, validate=False"""
t = timeit.Timer(stmt=stmt, setup=setup)
print t.timeit(1)
stmt = """
for i in xrange(10000):
noddy = Noddy()
for j in range(20):
noddy.fields["key"+str(j)] = "value "+str(j)
noddy.save(safe=False, validate=False, cascade=False)
myNoddys = Noddy.objects()
[n for n in myNoddys] # iterate
"""
print "-" * 100
print """Creating 10000 dictionaries - MongoEngine, safe=False, validate=False, cascade=False"""
t = timeit.Timer(stmt=stmt, setup=setup)
print t.timeit(1)
stmt = """
for i in xrange(10000):
noddy = Noddy()
for j in range(20):
noddy.fields["key"+str(j)] = "value "+str(j)
noddy.save(force_insert=True, safe=False, validate=False, cascade=False)
myNoddys = Noddy.objects()
[n for n in myNoddys] # iterate
"""
print "-" * 100
print """Creating 10000 dictionaries - MongoEngine, force=True"""
t = timeit.Timer(stmt=stmt, setup=setup)
print t.timeit(1)
if __name__ == "__main__":
main()

View File

@ -153,15 +153,6 @@ class BaseField(object):
if callable(value):
value = value()
# Convert lists / values so we can watch for any changes on them
if isinstance(value, (list, tuple)) and not isinstance(value, BaseList):
observer = DataObserver(instance, self.name)
value = BaseList(value, observer)
instance._data[self.name] = value
elif isinstance(value, dict) and not isinstance(value, BaseDict):
observer = DataObserver(instance, self.name)
value = BaseDict(value, observer)
instance._data[self.name] = value
return value
def __set__(self, instance, value):
@ -231,6 +222,7 @@ class ComplexBaseField(BaseField):
"""
field = None
_dereference = False
def __get__(self, instance, owner):
"""Descriptor to automatically dereference references.
@ -239,11 +231,39 @@ class ComplexBaseField(BaseField):
# Document class being used rather than a document object
return self
from dereference import dereference
instance._data[self.name] = dereference(
instance._data.get(self.name), max_depth=1, instance=instance, name=self.name
)
return super(ComplexBaseField, self).__get__(instance, owner)
if not self._dereference and instance._initialised:
from dereference import dereference
self._dereference = dereference # Cached
instance._data[self.name] = self._dereference(
instance._data.get(self.name), max_depth=1, instance=instance,
name=self.name
)
value = super(ComplexBaseField, self).__get__(instance, owner)
# Convert lists / values so we can watch for any changes on them
if isinstance(value, (list, tuple)) and not isinstance(value, BaseList):
value = BaseList(value, instance, self.name)
instance._data[self.name] = value
elif isinstance(value, dict) and not isinstance(value, BaseDict):
value = BaseDict(value, instance, self.name)
instance._data[self.name] = value
if self._dereference and instance._initialised and \
isinstance(value, (BaseList, BaseDict)) and not value._dereferenced:
value = self._dereference(
value, max_depth=1, instance=instance, name=self.name
)
value._dereferenced = True
instance._data[self.name] = value
return value
def __set__(self, instance, value):
"""Descriptor for assigning a value to a field in a document.
"""
instance._data[self.name] = value
instance._mark_as_changed(self.name)
def to_python(self, value):
"""Convert a MongoDB-compatible type to a Python type.
@ -727,12 +747,13 @@ class BaseDocument(object):
_dynamic = False
_created = True
_dynamic_lock = True
_initialised = False
def __init__(self, **values):
signals.pre_init.send(self.__class__, document=self, values=values)
self._data = {}
self._initialised = False
# Assign default values to instance
for attr_name, field in self._fields.items():
@ -754,18 +775,19 @@ class BaseDocument(object):
# Set any get_fieldname_display methods
self.__set_field_display()
# Flag initialised
self._initialised = True
if self._dynamic:
self._dynamic_lock = False
for key, value in dynamic_data.items():
setattr(self, key, value)
# Flag initialised
self._initialised = True
signals.post_init.send(self.__class__, document=self)
def __setattr__(self, name, value):
# Handle dynamic data only if an initialised dynamic document
if self._dynamic and getattr(self, '_initialised', False):
if self._dynamic and not self._dynamic_lock:
field = None
if not hasattr(self, name) and not name.startswith('_'):
@ -825,11 +847,9 @@ class BaseDocument(object):
# Convert lists / values so we can watch for any changes on them
if isinstance(value, (list, tuple)) and not isinstance(value, BaseList):
observer = DataObserver(self, name)
value = BaseList(value, observer)
value = BaseList(value, self, name)
elif isinstance(value, dict) and not isinstance(value, BaseDict):
observer = DataObserver(self, name)
value = BaseDict(value, observer)
value = BaseDict(value, self, name)
return value
@ -1147,33 +1167,25 @@ class BaseDocument(object):
return hash(self.pk)
class DataObserver(object):
__slots__ = ["instance", "name"]
def __init__(self, instance, name):
self.instance = instance
self.name = name
def updated(self):
if hasattr(self.instance, '_mark_as_changed'):
self.instance._mark_as_changed(self.name)
class BaseList(list):
"""A special list so we can watch any changes
"""
def __init__(self, list_items, observer):
self.observer = observer
_dereferenced = False
_instance = None
_name = None
def __init__(self, list_items, instance, name):
self._instance = instance
self._name = name
super(BaseList, self).__init__(list_items)
def __setitem__(self, *args, **kwargs):
self._updated()
self._mark_as_changed()
super(BaseList, self).__setitem__(*args, **kwargs)
def __delitem__(self, *args, **kwargs):
self._updated()
self._mark_as_changed()
super(BaseList, self).__delitem__(*args, **kwargs)
def __getstate__(self):
@ -1182,90 +1194,94 @@ class BaseList(list):
def __setstate__(self, state):
self = state
return self
def append(self, *args, **kwargs):
self._updated()
self._mark_as_changed()
return super(BaseList, self).append(*args, **kwargs)
def extend(self, *args, **kwargs):
self._updated()
self._mark_as_changed()
return super(BaseList, self).extend(*args, **kwargs)
def insert(self, *args, **kwargs):
self._updated()
self._mark_as_changed()
return super(BaseList, self).insert(*args, **kwargs)
def pop(self, *args, **kwargs):
self._updated()
self._mark_as_changed()
return super(BaseList, self).pop(*args, **kwargs)
def remove(self, *args, **kwargs):
self._updated()
self._mark_as_changed()
return super(BaseList, self).remove(*args, **kwargs)
def reverse(self, *args, **kwargs):
self._updated()
self._mark_as_changed()
return super(BaseList, self).reverse(*args, **kwargs)
def sort(self, *args, **kwargs):
self._updated()
self._mark_as_changed()
return super(BaseList, self).sort(*args, **kwargs)
def _updated(self):
try:
self.observer.updated()
except AttributeError:
pass
def _mark_as_changed(self):
if hasattr(self._instance, '_mark_as_changed'):
self._instance._mark_as_changed(self._name)
class BaseDict(dict):
"""A special dict so we can watch any changes
"""
def __init__(self, dict_items, observer):
self.observer = observer
_dereferenced = False
_instance = None
_name = None
def __init__(self, dict_items, instance, name):
self._instance = instance
self._name = name
super(BaseDict, self).__init__(dict_items)
def __setitem__(self, *args, **kwargs):
self._updated()
self._mark_as_changed()
super(BaseDict, self).__setitem__(*args, **kwargs)
def __delete__(self, *args, **kwargs):
self._updated()
self._mark_as_changed()
super(BaseDict, self).__delete__(*args, **kwargs)
def __delitem__(self, *args, **kwargs):
self._updated()
self._mark_as_changed()
super(BaseDict, self).__delitem__(*args, **kwargs)
def __delattr__(self, *args, **kwargs):
self._updated()
self._mark_as_changed()
super(BaseDict, self).__delattr__(*args, **kwargs)
def __getstate__(self):
self.observer = None
self.instance = None
self._dereferenced = False
return self
def __setstate__(self, state):
self = state
return self
def clear(self, *args, **kwargs):
self._updated()
self._mark_as_changed()
super(BaseDict, self).clear(*args, **kwargs)
def pop(self, *args, **kwargs):
self._updated()
self._mark_as_changed()
super(BaseDict, self).pop(*args, **kwargs)
def popitem(self, *args, **kwargs):
self._updated()
self._mark_as_changed()
super(BaseDict, self).popitem(*args, **kwargs)
def _updated(self):
try:
self.observer.updated()
except AttributeError:
pass
def _mark_as_changed(self):
if hasattr(self._instance, '_mark_as_changed'):
self._instance._mark_as_changed(self._name)
if sys.version_info < (2, 5):
# Prior to Python 2.5, Exception was an old-style class

View File

@ -1,7 +1,6 @@
import pymongo
from base import (BaseDict, BaseList, DataObserver,
TopLevelDocumentMetaclass, get_document)
from base import (BaseDict, BaseList, TopLevelDocumentMetaclass, get_document)
from fields import ReferenceField
from connection import get_db
from queryset import QuerySet
@ -134,11 +133,10 @@ class DeReference(object):
return items
if instance:
observer = DataObserver(instance, name)
if isinstance(items, dict):
return BaseDict(items, observer)
return BaseDict(items, instance, name)
else:
return BaseList(items, observer)
return BaseList(items, instance, name)
if isinstance(items, (dict, pymongo.son.SON)):
if '_ref' in items:
@ -183,10 +181,9 @@ class DeReference(object):
data[k] = self.object_map.get(v.id, v)
if instance and name:
observer = DataObserver(instance, name)
if is_list:
return BaseList(data, observer)
return BaseDict(data, observer)
return BaseList(data, instance, name)
return BaseDict(data, instance, name)
depth += 1
return data

View File

@ -1,6 +1,6 @@
from mongoengine import signals
from base import (DocumentMetaclass, TopLevelDocumentMetaclass, BaseDocument,
BaseDict, BaseList, DataObserver)
BaseDict, BaseList)
from queryset import OperationError
from connection import get_db, DEFAULT_CONNECTION_NAME
@ -304,12 +304,10 @@ class Document(BaseDocument):
"""
if isinstance(value, BaseDict):
value = [(k, self._reload(k, v)) for k, v in value.items()]
observer = DataObserver(self, key)
value = BaseDict(value, observer)
value = BaseDict(value, self, key)
elif isinstance(value, BaseList):
value = [self._reload(key, v) for v in value]
observer = DataObserver(self, key)
value = BaseList(value, observer)
value = BaseList(value, self, key)
elif isinstance(value, (EmbeddedDocument, DynamicEmbeddedDocument)):
value._changed_fields = []
return value

View File

@ -11,7 +11,7 @@ import uuid
from base import (BaseField, ComplexBaseField, ObjectIdField,
ValidationError, get_document)
from queryset import DO_NOTHING
from queryset import DO_NOTHING, QuerySet
from document import Document, EmbeddedDocument
from connection import get_db, DEFAULT_CONNECTION_NAME
from operator import itemgetter
@ -491,7 +491,7 @@ class ListField(ComplexBaseField):
def validate(self, value):
"""Make sure that a list of valid fields is being used.
"""
if (not isinstance(value, (list, tuple)) or
if (not isinstance(value, (list, tuple, QuerySet)) or
isinstance(value, basestring)):
self.error('Only lists and tuples may be used in a list field')
super(ListField, self).validate(value)