Querysets now utilises a local cache

Changed __len__ behavour in the queryset (#247, #311)
This commit is contained in:
Ross Lawley 2013-05-10 13:57:32 +00:00
parent 9e513e08ae
commit 9251ce312b
6 changed files with 204 additions and 56 deletions

View File

@ -4,6 +4,8 @@ Changelog
Changes in 0.8.0
================
- Querysets now utilises a local cache
- Changed __len__ behavour in the queryset (#247, #311)
- Fixed querying string versions of ObjectIds issue with ReferenceField (#307)
- Added $setOnInsert support for upserts (#308)
- Upserts now possible with just query parameters (#309)
@ -25,7 +27,6 @@ Changes in 0.8.0
- Added SequenceField.set_next_value(value) helper (#159)
- Updated .only() behaviour - now like exclude it is chainable (#202)
- Added with_limit_and_skip support to count() (#235)
- Removed __len__ from queryset (#247)
- Objects queryset manager now inherited (#256)
- Updated connection to use MongoClient (#262, #274)
- Fixed db_alias and inherited Documents (#143)

View File

@ -235,12 +235,15 @@ update your code like so: ::
mammals = Animal.objects(type="mammal").filter(order="Carnivora") # The final queryset is assgined to mammals
[m for m in mammals] # This will return all carnivores
No more len
-----------
Len iterates the queryset
--------------------------
If you ever did len(queryset) it previously did a count() under the covers, this
caused some unusual issues - so now it has been removed in favour of the
explicit `queryset.count()` to update::
If you ever did `len(queryset)` it previously did a `count()` under the covers,
this caused some unusual issues. As `len(queryset)` is most often used by
`list(queryset)` we now cache the queryset results and use that for the length.
This isn't as performant as a `count()` and if you aren't iterating the
queryset you should upgrade to use count::
# Old code
len(Animal.objects(type="mammal"))

View File

@ -26,6 +26,7 @@ __all__ = ('QuerySet', 'DO_NOTHING', 'NULLIFY', 'CASCADE', 'DENY', 'PULL')
# The maximum number of items to display in a QuerySet.__repr__
REPR_OUTPUT_SIZE = 20
ITER_CHUNK_SIZE = 100
# Delete rules
DO_NOTHING = 0
@ -63,6 +64,9 @@ class QuerySet(object):
self._none = False
self._as_pymongo = False
self._as_pymongo_coerce = False
self._result_cache = []
self._has_more = True
self._len = None
# If inheritance is allowed, only return instances and instances of
# subclasses of the class being used
@ -109,13 +113,60 @@ class QuerySet(object):
queryset._class_check = class_check
return queryset
def __len__(self):
"""Since __len__ is called quite frequently (for example, as part of
list(qs) we populate the result cache and cache the length.
"""
if self._len is not None:
return self._len
if self._has_more:
# populate the cache
list(self._iter_results())
self._len = len(self._result_cache)
return self._len
def __iter__(self):
"""Support iterator protocol"""
queryset = self
if queryset._iter:
queryset = self.clone()
queryset.rewind()
return queryset
"""Iteration utilises a results cache which iterates the cursor
in batches of ``ITER_CHUNK_SIZE``.
If ``self._has_more`` the cursor hasn't been exhausted so cache then
batch. Otherwise iterate the result_cache.
"""
self._iter = True
if self._has_more:
return self._iter_results()
# iterating over the cache.
return iter(self._result_cache)
def _iter_results(self):
"""A generator for iterating over the result cache.
Also populates the cache if there are more possible results to yield.
Raises StopIteration when there are no more results"""
pos = 0
while True:
upper = len(self._result_cache)
while pos < upper:
yield self._result_cache[pos]
pos = pos + 1
if not self._has_more:
raise StopIteration
if len(self._result_cache) <= pos:
self._populate_cache()
def _populate_cache(self):
"""
Populates the result cache with ``ITER_CHUNK_SIZE`` more entries
(until the cursor is exhausted).
"""
if self._has_more:
try:
for i in xrange(ITER_CHUNK_SIZE):
self._result_cache.append(self.next())
except StopIteration:
self._has_more = False
def __getitem__(self, key):
"""Support skip and limit using getitem and slicing syntax.
@ -157,22 +208,15 @@ class QuerySet(object):
def __repr__(self):
"""Provides the string representation of the QuerySet
.. versionchanged:: 0.6.13 Now doesnt modify the cursor
"""
if self._iter:
return '.. queryset mid-iteration ..'
data = []
for i in xrange(REPR_OUTPUT_SIZE + 1):
try:
data.append(self.next())
except StopIteration:
break
self._populate_cache()
data = self._result_cache[:REPR_OUTPUT_SIZE + 1]
if len(data) > REPR_OUTPUT_SIZE:
data[-1] = "...(remaining elements truncated)..."
self.rewind()
return repr(data)
# Core functions
@ -201,7 +245,7 @@ class QuerySet(object):
result = queryset.next()
except StopIteration:
msg = ("%s matching query does not exist."
% queryset._document._class_name)
% queryset._document._class_name)
raise queryset._document.DoesNotExist(msg)
try:
queryset.next()
@ -352,7 +396,12 @@ class QuerySet(object):
"""
if self._limit == 0:
return 0
return self._cursor.count(with_limit_and_skip=with_limit_and_skip)
if with_limit_and_skip and self._len is not None:
return self._len
count = self._cursor.count(with_limit_and_skip=with_limit_and_skip)
if with_limit_and_skip:
self._len = count
return count
def delete(self, write_concern=None):
"""Delete the documents matched by the query.
@ -910,7 +959,7 @@ class QuerySet(object):
mr_args['out'] = output
results = getattr(queryset._collection, map_reduce_function)(
map_f, reduce_f, **mr_args)
map_f, reduce_f, **mr_args)
if map_reduce_function == 'map_reduce':
results = results.find()
@ -1084,20 +1133,18 @@ class QuerySet(object):
def next(self):
"""Wrap the result in a :class:`~mongoengine.Document` object.
"""
self._iter = True
try:
if self._limit == 0 or self._none:
raise StopIteration
if self._scalar:
return self._get_scalar(self._document._from_son(
self._cursor.next()))
if self._as_pymongo:
return self._get_as_pymongo(self._cursor.next())
if self._limit == 0 or self._none:
raise StopIteration
return self._document._from_son(self._cursor.next())
except StopIteration, e:
self.rewind()
raise e
raw_doc = self._cursor.next()
if self._as_pymongo:
return self._get_as_pymongo(raw_doc)
doc = self._document._from_son(raw_doc)
if self._scalar:
return self._get_scalar(doc)
return doc
def rewind(self):
"""Rewind the cursor to its unevaluated state.

View File

@ -51,13 +51,13 @@ CLASSIFIERS = [
extra_opts = {}
if sys.version_info[0] == 3:
extra_opts['use_2to3'] = True
extra_opts['tests_require'] = ['nose', 'coverage', 'blinker']
extra_opts['tests_require'] = ['nose', 'coverage', 'blinker', 'jinja2']
extra_opts['packages'] = find_packages(exclude=('tests',))
if "test" in sys.argv or "nosetests" in sys.argv:
extra_opts['packages'].append("tests")
extra_opts['package_data'] = {"tests": ["fields/mongoengine.png", "fields/mongodb_leaf.png"]}
else:
extra_opts['tests_require'] = ['nose', 'coverage', 'blinker', 'django>=1.4.2', 'PIL']
extra_opts['tests_require'] = ['nose', 'coverage', 'blinker', 'django>=1.4.2', 'PIL', 'jinja2']
extra_opts['packages'] = find_packages(exclude=('tests',))
setup(name='mongoengine',

View File

@ -793,7 +793,7 @@ class QuerySetTest(unittest.TestCase):
p = p.snapshot(True).slave_okay(True).timeout(True)
self.assertEqual(p._cursor_args,
{'snapshot': True, 'slave_okay': True, 'timeout': True})
{'snapshot': True, 'slave_okay': True, 'timeout': True})
def test_repeated_iteration(self):
"""Ensure that QuerySet rewinds itself one iteration finishes.
@ -835,6 +835,7 @@ class QuerySetTest(unittest.TestCase):
self.assertTrue("Doc: 0" in docs_string)
self.assertEqual(docs.count(), 1000)
self.assertTrue('(remaining elements truncated)' in "%s" % docs)
# Limit and skip
docs = docs[1:4]
@ -3231,6 +3232,51 @@ class QuerySetTest(unittest.TestCase):
Organization))
self.assertTrue(isinstance(qs.first().organization, Organization))
def test_cached_queryset(self):
class Person(Document):
name = StringField()
Person.drop_collection()
for i in xrange(100):
Person(name="No: %s" % i).save()
with query_counter() as q:
self.assertEqual(q, 0)
people = Person.objects
[x for x in people]
self.assertEqual(100, len(people._result_cache))
self.assertEqual(None, people._len)
self.assertEqual(q, 1)
list(people)
self.assertEqual(100, people._len) # Caused by list calling len
self.assertEqual(q, 1)
people.count() # count is cached
self.assertEqual(q, 1)
def test_cache_not_cloned(self):
class User(Document):
name = StringField()
def __unicode__(self):
return self.name
User.drop_collection()
User(name="Alice").save()
User(name="Bob").save()
users = User.objects.all().order_by('name')
self.assertEqual("%s" % users, "[<User: Alice>, <User: Bob>]")
self.assertEqual(2, len(users._result_cache))
users = users.filter(name="Bob")
self.assertEqual("%s" % users, "[<User: Bob>]")
self.assertEqual(1, len(users._result_cache))
def test_nested_queryset_iterator(self):
# Try iterating the same queryset twice, nested.
names = ['Alice', 'Bob', 'Chuck', 'David', 'Eric', 'Francis', 'George']
@ -3247,30 +3293,34 @@ class QuerySetTest(unittest.TestCase):
User(name=name).save()
users = User.objects.all().order_by('name')
outer_count = 0
inner_count = 0
inner_total_count = 0
self.assertEqual(users.count(), 7)
with query_counter() as q:
self.assertEqual(q, 0)
for i, outer_user in enumerate(users):
self.assertEqual(outer_user.name, names[i])
outer_count += 1
inner_count = 0
# Calling len might disrupt the inner loop if there are bugs
self.assertEqual(users.count(), 7)
for j, inner_user in enumerate(users):
self.assertEqual(inner_user.name, names[j])
inner_count += 1
inner_total_count += 1
for i, outer_user in enumerate(users):
self.assertEqual(outer_user.name, names[i])
outer_count += 1
inner_count = 0
self.assertEqual(inner_count, 7) # inner loop should always be executed seven times
# Calling len might disrupt the inner loop if there are bugs
self.assertEqual(users.count(), 7)
self.assertEqual(outer_count, 7) # outer loop should be executed seven times total
self.assertEqual(inner_total_count, 7 * 7) # inner loop should be executed fourtynine times total
for j, inner_user in enumerate(users):
self.assertEqual(inner_user.name, names[j])
inner_count += 1
inner_total_count += 1
self.assertEqual(inner_count, 7) # inner loop should always be executed seven times
self.assertEqual(outer_count, 7) # outer loop should be executed seven times total
self.assertEqual(inner_total_count, 7 * 7) # inner loop should be executed fourtynine times total
self.assertEqual(q, 2)
if __name__ == '__main__':
unittest.main()

47
tests/test_jinja.py Normal file
View File

@ -0,0 +1,47 @@
import sys
sys.path[0:0] = [""]
import unittest
from mongoengine import *
import jinja2
class TemplateFilterTest(unittest.TestCase):
def setUp(self):
connect(db='mongoenginetest')
def test_jinja2(self):
env = jinja2.Environment()
class TestData(Document):
title = StringField()
description = StringField()
TestData.drop_collection()
examples = [('A', '1'),
('B', '2'),
('C', '3')]
for title, description in examples:
TestData(title=title, description=description).save()
tmpl = """
{%- for record in content -%}
{%- if loop.first -%}{ {%- endif -%}
"{{ record.title }}": "{{ record.description }}"
{%- if loop.last -%} }{%- else -%},{% endif -%}
{%- endfor -%}
"""
ctx = {'content': TestData.objects}
template = env.from_string(tmpl)
rendered = template.render(**ctx)
self.assertEqual('{"A": "1","B": "2","C": "3"}', rendered)
if __name__ == '__main__':
unittest.main()