Querysets now utilises a local cache

Changed __len__ behavour in the queryset (#247, #311)
2013-05-10 13:57:32 +00:00 · 2013-05-10 13:57:32 +00:00 · 9251ce312b
commit 9251ce312b
parent 9e513e08ae
6 changed files with 204 additions and 56 deletions
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@ -4,6 +4,8 @@ Changelog

 Changes in 0.8.0
 ================
+- Querysets now utilises a local cache
+- Changed __len__ behavour in the queryset (#247, #311)
 - Fixed querying string versions of ObjectIds issue with ReferenceField (#307)
 - Added $setOnInsert support for upserts (#308)
 - Upserts now possible with just query parameters (#309)
@ -25,7 +27,6 @@ Changes in 0.8.0
 - Added SequenceField.set_next_value(value) helper (#159)
 - Updated .only() behaviour - now like exclude it is chainable (#202)
 - Added with_limit_and_skip support to count() (#235)
- Removed __len__ from queryset (#247)
 - Objects queryset manager now inherited (#256)
 - Updated connection to use MongoClient (#262, #274)
 - Fixed db_alias and inherited Documents (#143)
--- a/docs/upgrade.rst
+++ b/docs/upgrade.rst
@ -235,12 +235,15 @@ update your code like so: ::
    mammals = Animal.objects(type="mammal").filter(order="Carnivora")  # The final queryset is assgined to mammals
    [m for m in mammals]                                               # This will return all carnivores

-No more len
-----------
+Len iterates the queryset
+--------------------------

-If you ever did len(queryset) it previously did a count() under the covers, this
-caused some unusual issues - so now it has been removed in favour of the
-explicit `queryset.count()` to update::
+If you ever did `len(queryset)` it previously did a `count()` under the covers,
+this caused some unusual issues.  As `len(queryset)` is most often used by
+`list(queryset)` we now cache the queryset results and use that for the length.
+
+This isn't as performant as a `count()` and if you aren't iterating the
+queryset you should upgrade to use count::

    # Old code
    len(Animal.objects(type="mammal"))
--- a/mongoengine/queryset/queryset.py
+++ b/mongoengine/queryset/queryset.py
@ -26,6 +26,7 @@ __all__ = ('QuerySet', 'DO_NOTHING', 'NULLIFY', 'CASCADE', 'DENY', 'PULL')

 # The maximum number of items to display in a QuerySet.__repr__
 REPR_OUTPUT_SIZE = 20
+ITER_CHUNK_SIZE = 100

 # Delete rules
 DO_NOTHING = 0
@ -63,6 +64,9 @@ class QuerySet(object):
        self._none = False
        self._as_pymongo = False
        self._as_pymongo_coerce = False
+        self._result_cache = []
+        self._has_more = True
+        self._len = None

        # If inheritance is allowed, only return instances and instances of
        # subclasses of the class being used
@ -109,13 +113,60 @@ class QuerySet(object):
        queryset._class_check = class_check
        return queryset

+    def __len__(self):
+        """Since __len__ is called quite frequently (for example, as part of
+        list(qs) we populate the result cache and cache the length.
+        """
+        if self._len is not None:
+            return self._len
+        if self._has_more:
+            # populate the cache
+            list(self._iter_results())
+
+        self._len = len(self._result_cache)
+        return self._len
+
    def __iter__(self):
-        """Support iterator protocol"""
-        queryset = self
-        if queryset._iter:
-            queryset = self.clone()
-        queryset.rewind()
-        return queryset
+        """Iteration utilises a results cache which iterates the cursor
+        in batches of ``ITER_CHUNK_SIZE``.
+
+        If ``self._has_more`` the cursor hasn't been exhausted so cache then
+        batch.  Otherwise iterate the result_cache.
+        """
+        self._iter = True
+        if self._has_more:
+            return self._iter_results()
+
+        # iterating over the cache.
+        return iter(self._result_cache)
+
+    def _iter_results(self):
+        """A generator for iterating over the result cache.
+
+        Also populates the cache if there are more possible results to yield.
+        Raises StopIteration when there are no more results"""
+        pos = 0
+        while True:
+            upper = len(self._result_cache)
+            while pos < upper:
+                yield self._result_cache[pos]
+                pos = pos + 1
+            if not self._has_more:
+                raise StopIteration
+            if len(self._result_cache) <= pos:
+                self._populate_cache()
+
+    def _populate_cache(self):
+        """
+        Populates the result cache with ``ITER_CHUNK_SIZE`` more entries
+        (until the cursor is exhausted).
+        """
+        if self._has_more:
+            try:
+                for i in xrange(ITER_CHUNK_SIZE):
+                    self._result_cache.append(self.next())
+            except StopIteration:
+                self._has_more = False

    def __getitem__(self, key):
        """Support skip and limit using getitem and slicing syntax.
@ -157,22 +208,15 @@ class QuerySet(object):

    def __repr__(self):
        """Provides the string representation of the QuerySet
-
-        .. versionchanged:: 0.6.13 Now doesnt modify the cursor
        """
+
        if self._iter:
            return '.. queryset mid-iteration ..'

-        data = []
-        for i in xrange(REPR_OUTPUT_SIZE + 1):
-            try:
-                data.append(self.next())
-            except StopIteration:
-                break
+        self._populate_cache()
+        data = self._result_cache[:REPR_OUTPUT_SIZE + 1]
        if len(data) > REPR_OUTPUT_SIZE:
            data[-1] = "...(remaining elements truncated)..."
-
-        self.rewind()
        return repr(data)

    # Core functions
@ -201,7 +245,7 @@ class QuerySet(object):
            result = queryset.next()
        except StopIteration:
            msg = ("%s matching query does not exist."
-                    % queryset._document._class_name)
+                   % queryset._document._class_name)
            raise queryset._document.DoesNotExist(msg)
        try:
            queryset.next()
@ -352,7 +396,12 @@ class QuerySet(object):
        """
        if self._limit == 0:
            return 0
-        return self._cursor.count(with_limit_and_skip=with_limit_and_skip)
+        if with_limit_and_skip and self._len is not None:
+            return self._len
+        count = self._cursor.count(with_limit_and_skip=with_limit_and_skip)
+        if with_limit_and_skip:
+            self._len = count
+        return count

    def delete(self, write_concern=None):
        """Delete the documents matched by the query.
@ -910,7 +959,7 @@ class QuerySet(object):
            mr_args['out'] = output

        results = getattr(queryset._collection, map_reduce_function)(
-                            map_f, reduce_f, **mr_args)
+                          map_f, reduce_f, **mr_args)

        if map_reduce_function == 'map_reduce':
            results = results.find()
@ -1084,20 +1133,18 @@ class QuerySet(object):
    def next(self):
        """Wrap the result in a :class:`~mongoengine.Document` object.
        """
-        self._iter = True
-        try:
-            if self._limit == 0 or self._none:
-                raise StopIteration
-            if self._scalar:
-                return self._get_scalar(self._document._from_son(
-                                        self._cursor.next()))
-            if self._as_pymongo:
-                return self._get_as_pymongo(self._cursor.next())
+        if self._limit == 0 or self._none:
+            raise StopIteration

-            return self._document._from_son(self._cursor.next())
-        except StopIteration, e:
-            self.rewind()
-            raise e
+        raw_doc = self._cursor.next()
+        if self._as_pymongo:
+            return self._get_as_pymongo(raw_doc)
+
+        doc = self._document._from_son(raw_doc)
+        if self._scalar:
+            return self._get_scalar(doc)
+
+        return doc

    def rewind(self):
        """Rewind the cursor to its unevaluated state.
--- a/setup.py
+++ b/setup.py
@ -51,13 +51,13 @@ CLASSIFIERS = [
 extra_opts = {}
 if sys.version_info[0] == 3:
    extra_opts['use_2to3'] = True
-    extra_opts['tests_require'] = ['nose', 'coverage', 'blinker']
+    extra_opts['tests_require'] = ['nose', 'coverage', 'blinker', 'jinja2']
    extra_opts['packages'] = find_packages(exclude=('tests',))
    if "test" in sys.argv or "nosetests" in sys.argv:
        extra_opts['packages'].append("tests")
        extra_opts['package_data'] = {"tests": ["fields/mongoengine.png", "fields/mongodb_leaf.png"]}
 else:
-    extra_opts['tests_require'] = ['nose', 'coverage', 'blinker', 'django>=1.4.2', 'PIL']
+    extra_opts['tests_require'] = ['nose', 'coverage', 'blinker', 'django>=1.4.2', 'PIL', 'jinja2']
    extra_opts['packages'] = find_packages(exclude=('tests',))

 setup(name='mongoengine',
--- a/tests/queryset/queryset.py
+++ b/tests/queryset/queryset.py
@ -793,7 +793,7 @@ class QuerySetTest(unittest.TestCase):

        p = p.snapshot(True).slave_okay(True).timeout(True)
        self.assertEqual(p._cursor_args,
-                {'snapshot': True, 'slave_okay': True, 'timeout': True})
+                         {'snapshot': True, 'slave_okay': True, 'timeout': True})

    def test_repeated_iteration(self):
        """Ensure that QuerySet rewinds itself one iteration finishes.
@ -835,6 +835,7 @@ class QuerySetTest(unittest.TestCase):
        self.assertTrue("Doc: 0" in docs_string)

        self.assertEqual(docs.count(), 1000)
+        self.assertTrue('(remaining elements truncated)' in "%s" % docs)

        # Limit and skip
        docs = docs[1:4]
@ -3231,6 +3232,51 @@ class QuerySetTest(unittest.TestCase):
                                    Organization))
        self.assertTrue(isinstance(qs.first().organization, Organization))

+    def test_cached_queryset(self):
+        class Person(Document):
+            name = StringField()
+
+        Person.drop_collection()
+        for i in xrange(100):
+            Person(name="No: %s" % i).save()
+
+        with query_counter() as q:
+            self.assertEqual(q, 0)
+            people = Person.objects
+
+            [x for x in people]
+            self.assertEqual(100, len(people._result_cache))
+            self.assertEqual(None, people._len)
+            self.assertEqual(q, 1)
+
+            list(people)
+            self.assertEqual(100, people._len)  # Caused by list calling len
+            self.assertEqual(q, 1)
+
+            people.count()  # count is cached
+            self.assertEqual(q, 1)
+
+    def test_cache_not_cloned(self):
+
+        class User(Document):
+            name = StringField()
+
+            def __unicode__(self):
+                return self.name
+
+        User.drop_collection()
+
+        User(name="Alice").save()
+        User(name="Bob").save()
+
+        users = User.objects.all().order_by('name')
+        self.assertEqual("%s" % users, "[<User: Alice>, <User: Bob>]")
+        self.assertEqual(2, len(users._result_cache))
+
+        users = users.filter(name="Bob")
+        self.assertEqual("%s" % users, "[<User: Bob>]")
+        self.assertEqual(1, len(users._result_cache))
+
    def test_nested_queryset_iterator(self):
        # Try iterating the same queryset twice, nested.
        names = ['Alice', 'Bob', 'Chuck', 'David', 'Eric', 'Francis', 'George']
@ -3247,30 +3293,34 @@ class QuerySetTest(unittest.TestCase):
            User(name=name).save()

        users = User.objects.all().order_by('name')
-
        outer_count = 0
        inner_count = 0
        inner_total_count = 0

-        self.assertEqual(users.count(), 7)
+        with query_counter() as q:
+            self.assertEqual(q, 0)

-        for i, outer_user in enumerate(users):
-            self.assertEqual(outer_user.name, names[i])
-            outer_count += 1
-            inner_count = 0
-
-            # Calling len might disrupt the inner loop if there are bugs
            self.assertEqual(users.count(), 7)

-            for j, inner_user in enumerate(users):
-                self.assertEqual(inner_user.name, names[j])
-                inner_count += 1
-                inner_total_count += 1
+            for i, outer_user in enumerate(users):
+                self.assertEqual(outer_user.name, names[i])
+                outer_count += 1
+                inner_count = 0

-            self.assertEqual(inner_count, 7)  # inner loop should always be executed seven times
+                # Calling len might disrupt the inner loop if there are bugs
+                self.assertEqual(users.count(), 7)

-        self.assertEqual(outer_count, 7)  # outer loop should be executed seven times total
-        self.assertEqual(inner_total_count, 7 * 7)  # inner loop should be executed fourtynine times total
+                for j, inner_user in enumerate(users):
+                    self.assertEqual(inner_user.name, names[j])
+                    inner_count += 1
+                    inner_total_count += 1
+
+                self.assertEqual(inner_count, 7)  # inner loop should always be executed seven times
+
+            self.assertEqual(outer_count, 7)  # outer loop should be executed seven times total
+            self.assertEqual(inner_total_count, 7 * 7)  # inner loop should be executed fourtynine times total
+
+            self.assertEqual(q, 2)

 if __name__ == '__main__':
    unittest.main()
--- a/tests/test_jinja.py
+++ b/tests/test_jinja.py
@ -0,0 +1,47 @@
+import sys
+sys.path[0:0] = [""]
+
+import unittest
+
+from mongoengine import *
+
+import jinja2
+
+
+class TemplateFilterTest(unittest.TestCase):
+
+    def setUp(self):
+        connect(db='mongoenginetest')
+
+    def test_jinja2(self):
+        env = jinja2.Environment()
+
+        class TestData(Document):
+            title = StringField()
+            description = StringField()
+
+        TestData.drop_collection()
+
+        examples = [('A', '1'),
+                    ('B', '2'),
+                    ('C', '3')]
+
+        for title, description in examples:
+            TestData(title=title, description=description).save()
+
+        tmpl = """
+{%- for record in content -%}
+    {%- if loop.first -%}{ {%- endif -%}
+    "{{ record.title }}": "{{ record.description }}"
+    {%- if loop.last -%} }{%- else -%},{% endif -%}
+{%- endfor -%}
+"""
+        ctx = {'content': TestData.objects}
+        template = env.from_string(tmpl)
+        rendered = template.render(**ctx)
+
+        self.assertEqual('{"A": "1","B": "2","C": "3"}', rendered)
+
+
+if __name__ == '__main__':
+    unittest.main()