From 9251ce312bc0545d3b86224e35a913029a86695e Mon Sep 17 00:00:00 2001 From: Ross Lawley Date: Fri, 10 May 2013 13:57:32 +0000 Subject: [PATCH] Querysets now utilises a local cache Changed __len__ behavour in the queryset (#247, #311) --- docs/changelog.rst | 3 +- docs/upgrade.rst | 13 ++-- mongoengine/queryset/queryset.py | 111 ++++++++++++++++++++++--------- setup.py | 4 +- tests/queryset/queryset.py | 82 ++++++++++++++++++----- tests/test_jinja.py | 47 +++++++++++++ 6 files changed, 204 insertions(+), 56 deletions(-) create mode 100644 tests/test_jinja.py diff --git a/docs/changelog.rst b/docs/changelog.rst index 842bc7d0..3b6813a0 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -4,6 +4,8 @@ Changelog Changes in 0.8.0 ================ +- Querysets now utilises a local cache +- Changed __len__ behavour in the queryset (#247, #311) - Fixed querying string versions of ObjectIds issue with ReferenceField (#307) - Added $setOnInsert support for upserts (#308) - Upserts now possible with just query parameters (#309) @@ -25,7 +27,6 @@ Changes in 0.8.0 - Added SequenceField.set_next_value(value) helper (#159) - Updated .only() behaviour - now like exclude it is chainable (#202) - Added with_limit_and_skip support to count() (#235) -- Removed __len__ from queryset (#247) - Objects queryset manager now inherited (#256) - Updated connection to use MongoClient (#262, #274) - Fixed db_alias and inherited Documents (#143) diff --git a/docs/upgrade.rst b/docs/upgrade.rst index c633c28d..fe9e4fa9 100644 --- a/docs/upgrade.rst +++ b/docs/upgrade.rst @@ -235,12 +235,15 @@ update your code like so: :: mammals = Animal.objects(type="mammal").filter(order="Carnivora") # The final queryset is assgined to mammals [m for m in mammals] # This will return all carnivores -No more len ------------ +Len iterates the queryset +-------------------------- -If you ever did len(queryset) it previously did a count() under the covers, this -caused some unusual issues - so now it has been removed in favour of the -explicit `queryset.count()` to update:: +If you ever did `len(queryset)` it previously did a `count()` under the covers, +this caused some unusual issues. As `len(queryset)` is most often used by +`list(queryset)` we now cache the queryset results and use that for the length. + +This isn't as performant as a `count()` and if you aren't iterating the +queryset you should upgrade to use count:: # Old code len(Animal.objects(type="mammal")) diff --git a/mongoengine/queryset/queryset.py b/mongoengine/queryset/queryset.py index 191afdd2..2d631831 100644 --- a/mongoengine/queryset/queryset.py +++ b/mongoengine/queryset/queryset.py @@ -26,6 +26,7 @@ __all__ = ('QuerySet', 'DO_NOTHING', 'NULLIFY', 'CASCADE', 'DENY', 'PULL') # The maximum number of items to display in a QuerySet.__repr__ REPR_OUTPUT_SIZE = 20 +ITER_CHUNK_SIZE = 100 # Delete rules DO_NOTHING = 0 @@ -63,6 +64,9 @@ class QuerySet(object): self._none = False self._as_pymongo = False self._as_pymongo_coerce = False + self._result_cache = [] + self._has_more = True + self._len = None # If inheritance is allowed, only return instances and instances of # subclasses of the class being used @@ -109,13 +113,60 @@ class QuerySet(object): queryset._class_check = class_check return queryset + def __len__(self): + """Since __len__ is called quite frequently (for example, as part of + list(qs) we populate the result cache and cache the length. + """ + if self._len is not None: + return self._len + if self._has_more: + # populate the cache + list(self._iter_results()) + + self._len = len(self._result_cache) + return self._len + def __iter__(self): - """Support iterator protocol""" - queryset = self - if queryset._iter: - queryset = self.clone() - queryset.rewind() - return queryset + """Iteration utilises a results cache which iterates the cursor + in batches of ``ITER_CHUNK_SIZE``. + + If ``self._has_more`` the cursor hasn't been exhausted so cache then + batch. Otherwise iterate the result_cache. + """ + self._iter = True + if self._has_more: + return self._iter_results() + + # iterating over the cache. + return iter(self._result_cache) + + def _iter_results(self): + """A generator for iterating over the result cache. + + Also populates the cache if there are more possible results to yield. + Raises StopIteration when there are no more results""" + pos = 0 + while True: + upper = len(self._result_cache) + while pos < upper: + yield self._result_cache[pos] + pos = pos + 1 + if not self._has_more: + raise StopIteration + if len(self._result_cache) <= pos: + self._populate_cache() + + def _populate_cache(self): + """ + Populates the result cache with ``ITER_CHUNK_SIZE`` more entries + (until the cursor is exhausted). + """ + if self._has_more: + try: + for i in xrange(ITER_CHUNK_SIZE): + self._result_cache.append(self.next()) + except StopIteration: + self._has_more = False def __getitem__(self, key): """Support skip and limit using getitem and slicing syntax. @@ -157,22 +208,15 @@ class QuerySet(object): def __repr__(self): """Provides the string representation of the QuerySet - - .. versionchanged:: 0.6.13 Now doesnt modify the cursor """ + if self._iter: return '.. queryset mid-iteration ..' - data = [] - for i in xrange(REPR_OUTPUT_SIZE + 1): - try: - data.append(self.next()) - except StopIteration: - break + self._populate_cache() + data = self._result_cache[:REPR_OUTPUT_SIZE + 1] if len(data) > REPR_OUTPUT_SIZE: data[-1] = "...(remaining elements truncated)..." - - self.rewind() return repr(data) # Core functions @@ -201,7 +245,7 @@ class QuerySet(object): result = queryset.next() except StopIteration: msg = ("%s matching query does not exist." - % queryset._document._class_name) + % queryset._document._class_name) raise queryset._document.DoesNotExist(msg) try: queryset.next() @@ -352,7 +396,12 @@ class QuerySet(object): """ if self._limit == 0: return 0 - return self._cursor.count(with_limit_and_skip=with_limit_and_skip) + if with_limit_and_skip and self._len is not None: + return self._len + count = self._cursor.count(with_limit_and_skip=with_limit_and_skip) + if with_limit_and_skip: + self._len = count + return count def delete(self, write_concern=None): """Delete the documents matched by the query. @@ -910,7 +959,7 @@ class QuerySet(object): mr_args['out'] = output results = getattr(queryset._collection, map_reduce_function)( - map_f, reduce_f, **mr_args) + map_f, reduce_f, **mr_args) if map_reduce_function == 'map_reduce': results = results.find() @@ -1084,20 +1133,18 @@ class QuerySet(object): def next(self): """Wrap the result in a :class:`~mongoengine.Document` object. """ - self._iter = True - try: - if self._limit == 0 or self._none: - raise StopIteration - if self._scalar: - return self._get_scalar(self._document._from_son( - self._cursor.next())) - if self._as_pymongo: - return self._get_as_pymongo(self._cursor.next()) + if self._limit == 0 or self._none: + raise StopIteration - return self._document._from_son(self._cursor.next()) - except StopIteration, e: - self.rewind() - raise e + raw_doc = self._cursor.next() + if self._as_pymongo: + return self._get_as_pymongo(raw_doc) + + doc = self._document._from_son(raw_doc) + if self._scalar: + return self._get_scalar(doc) + + return doc def rewind(self): """Rewind the cursor to its unevaluated state. diff --git a/setup.py b/setup.py index 10a6dbcb..594f7f81 100644 --- a/setup.py +++ b/setup.py @@ -51,13 +51,13 @@ CLASSIFIERS = [ extra_opts = {} if sys.version_info[0] == 3: extra_opts['use_2to3'] = True - extra_opts['tests_require'] = ['nose', 'coverage', 'blinker'] + extra_opts['tests_require'] = ['nose', 'coverage', 'blinker', 'jinja2'] extra_opts['packages'] = find_packages(exclude=('tests',)) if "test" in sys.argv or "nosetests" in sys.argv: extra_opts['packages'].append("tests") extra_opts['package_data'] = {"tests": ["fields/mongoengine.png", "fields/mongodb_leaf.png"]} else: - extra_opts['tests_require'] = ['nose', 'coverage', 'blinker', 'django>=1.4.2', 'PIL'] + extra_opts['tests_require'] = ['nose', 'coverage', 'blinker', 'django>=1.4.2', 'PIL', 'jinja2'] extra_opts['packages'] = find_packages(exclude=('tests',)) setup(name='mongoengine', diff --git a/tests/queryset/queryset.py b/tests/queryset/queryset.py index b9db297b..b9c13963 100644 --- a/tests/queryset/queryset.py +++ b/tests/queryset/queryset.py @@ -793,7 +793,7 @@ class QuerySetTest(unittest.TestCase): p = p.snapshot(True).slave_okay(True).timeout(True) self.assertEqual(p._cursor_args, - {'snapshot': True, 'slave_okay': True, 'timeout': True}) + {'snapshot': True, 'slave_okay': True, 'timeout': True}) def test_repeated_iteration(self): """Ensure that QuerySet rewinds itself one iteration finishes. @@ -835,6 +835,7 @@ class QuerySetTest(unittest.TestCase): self.assertTrue("Doc: 0" in docs_string) self.assertEqual(docs.count(), 1000) + self.assertTrue('(remaining elements truncated)' in "%s" % docs) # Limit and skip docs = docs[1:4] @@ -3231,6 +3232,51 @@ class QuerySetTest(unittest.TestCase): Organization)) self.assertTrue(isinstance(qs.first().organization, Organization)) + def test_cached_queryset(self): + class Person(Document): + name = StringField() + + Person.drop_collection() + for i in xrange(100): + Person(name="No: %s" % i).save() + + with query_counter() as q: + self.assertEqual(q, 0) + people = Person.objects + + [x for x in people] + self.assertEqual(100, len(people._result_cache)) + self.assertEqual(None, people._len) + self.assertEqual(q, 1) + + list(people) + self.assertEqual(100, people._len) # Caused by list calling len + self.assertEqual(q, 1) + + people.count() # count is cached + self.assertEqual(q, 1) + + def test_cache_not_cloned(self): + + class User(Document): + name = StringField() + + def __unicode__(self): + return self.name + + User.drop_collection() + + User(name="Alice").save() + User(name="Bob").save() + + users = User.objects.all().order_by('name') + self.assertEqual("%s" % users, "[, ]") + self.assertEqual(2, len(users._result_cache)) + + users = users.filter(name="Bob") + self.assertEqual("%s" % users, "[]") + self.assertEqual(1, len(users._result_cache)) + def test_nested_queryset_iterator(self): # Try iterating the same queryset twice, nested. names = ['Alice', 'Bob', 'Chuck', 'David', 'Eric', 'Francis', 'George'] @@ -3247,30 +3293,34 @@ class QuerySetTest(unittest.TestCase): User(name=name).save() users = User.objects.all().order_by('name') - outer_count = 0 inner_count = 0 inner_total_count = 0 - self.assertEqual(users.count(), 7) + with query_counter() as q: + self.assertEqual(q, 0) - for i, outer_user in enumerate(users): - self.assertEqual(outer_user.name, names[i]) - outer_count += 1 - inner_count = 0 - - # Calling len might disrupt the inner loop if there are bugs self.assertEqual(users.count(), 7) - for j, inner_user in enumerate(users): - self.assertEqual(inner_user.name, names[j]) - inner_count += 1 - inner_total_count += 1 + for i, outer_user in enumerate(users): + self.assertEqual(outer_user.name, names[i]) + outer_count += 1 + inner_count = 0 - self.assertEqual(inner_count, 7) # inner loop should always be executed seven times + # Calling len might disrupt the inner loop if there are bugs + self.assertEqual(users.count(), 7) - self.assertEqual(outer_count, 7) # outer loop should be executed seven times total - self.assertEqual(inner_total_count, 7 * 7) # inner loop should be executed fourtynine times total + for j, inner_user in enumerate(users): + self.assertEqual(inner_user.name, names[j]) + inner_count += 1 + inner_total_count += 1 + + self.assertEqual(inner_count, 7) # inner loop should always be executed seven times + + self.assertEqual(outer_count, 7) # outer loop should be executed seven times total + self.assertEqual(inner_total_count, 7 * 7) # inner loop should be executed fourtynine times total + + self.assertEqual(q, 2) if __name__ == '__main__': unittest.main() diff --git a/tests/test_jinja.py b/tests/test_jinja.py new file mode 100644 index 00000000..0449f868 --- /dev/null +++ b/tests/test_jinja.py @@ -0,0 +1,47 @@ +import sys +sys.path[0:0] = [""] + +import unittest + +from mongoengine import * + +import jinja2 + + +class TemplateFilterTest(unittest.TestCase): + + def setUp(self): + connect(db='mongoenginetest') + + def test_jinja2(self): + env = jinja2.Environment() + + class TestData(Document): + title = StringField() + description = StringField() + + TestData.drop_collection() + + examples = [('A', '1'), + ('B', '2'), + ('C', '3')] + + for title, description in examples: + TestData(title=title, description=description).save() + + tmpl = """ +{%- for record in content -%} + {%- if loop.first -%}{ {%- endif -%} + "{{ record.title }}": "{{ record.description }}" + {%- if loop.last -%} }{%- else -%},{% endif -%} +{%- endfor -%} +""" + ctx = {'content': TestData.objects} + template = env.from_string(tmpl) + rendered = template.render(**ctx) + + self.assertEqual('{"A": "1","B": "2","C": "3"}', rendered) + + +if __name__ == '__main__': + unittest.main()