Compare commits

...

2 Commits

Author SHA1 Message Date
Stefan Wojcik
894678da39 minor comments 2016-12-05 00:03:19 -05:00
Stefan Wojcik
0a66a4b8a9 Fix iteration within iteration 2016-12-04 23:55:25 -05:00
3 changed files with 92 additions and 12 deletions

View File

@ -275,6 +275,8 @@ class BaseQuerySet(object):
except StopIteration:
return result
# If we were able to retrieve the 2nd doc, rewind the cursor and
# raise the MultipleObjectsReturned exception.
queryset.rewind()
message = u'%d items returned, instead of 1' % queryset.count()
raise queryset._document.MultipleObjectsReturned(message)

View File

@ -27,9 +27,10 @@ class QuerySet(BaseQuerySet):
in batches of ``ITER_CHUNK_SIZE``.
If ``self._has_more`` the cursor hasn't been exhausted so cache then
batch. Otherwise iterate the result_cache.
batch. Otherwise iterate the result_cache.
"""
self._iter = True
if self._has_more:
return self._iter_results()
@ -42,10 +43,12 @@ class QuerySet(BaseQuerySet):
"""
if self._len is not None:
return self._len
# Populate the result cache with *all* of the docs in the cursor
if self._has_more:
# populate the cache
list(self._iter_results())
# Cache the length of the complete result cache and return it
self._len = len(self._result_cache)
return self._len
@ -64,18 +67,33 @@ class QuerySet(BaseQuerySet):
def _iter_results(self):
"""A generator for iterating over the result cache.
Also populates the cache if there are more possible results to yield.
Raises StopIteration when there are no more results"""
Also populates the cache if there are more possible results to
yield. Raises StopIteration when there are no more results.
"""
if self._result_cache is None:
self._result_cache = []
pos = 0
while True:
upper = len(self._result_cache)
while pos < upper:
# For all positions lower than the length of the current result
# cache, serve the docs straight from the cache w/o hitting the
# database.
# XXX it's VERY important to compute the len within the `while`
# condition because the result cache might expand mid-iteration
# (e.g. if we call len(qs) inside a loop that iterates over the
# queryset). Fortunately len(list) is O(1) in Python, so this
# doesn't cause performance issues.
while pos < len(self._result_cache):
yield self._result_cache[pos]
pos += 1
# Raise StopIteration if we already established there were no more
# docs in the db cursor.
if not self._has_more:
raise StopIteration
# Otherwise, populate more of the cache and repeat.
if len(self._result_cache) <= pos:
self._populate_cache()
@ -86,12 +104,22 @@ class QuerySet(BaseQuerySet):
"""
if self._result_cache is None:
self._result_cache = []
if self._has_more:
try:
for i in xrange(ITER_CHUNK_SIZE):
self._result_cache.append(self.next())
except StopIteration:
self._has_more = False
# Skip populating the cache if we already established there are no
# more docs to pull from the database.
if not self._has_more:
return
# Pull in ITER_CHUNK_SIZE docs from the database and store them in
# the result cache.
try:
for i in xrange(ITER_CHUNK_SIZE):
self._result_cache.append(self.next())
except StopIteration:
# Getting this exception means there are no more docs in the
# db cursor. Set _has_more to False so that we can use that
# information in other places.
self._has_more = False
def count(self, with_limit_and_skip=False):
"""Count the selected elements in the query.

View File

@ -4890,6 +4890,56 @@ class QuerySetTest(unittest.TestCase):
self.assertEqual(1, Doc.objects(item__type__="axe").count())
def test_len_during_iteration(self):
"""Tests that calling len on a queyset during iteration doesn't
stop paging.
"""
class Data(Document):
pass
for i in xrange(300):
Data().save()
records = Data.objects.limit(250)
# This should pull all 250 docs from mongo and populate the result
# cache
len(records)
# Assert that iterating over documents in the qs touches every
# document even if we call len(qs) midway through the iteration.
for i, r in enumerate(records):
if i == 58:
len(records)
self.assertEqual(i, 249)
# Assert the same behavior is true even if we didn't pre-populate the
# result cache.
records = Data.objects.limit(250)
for i, r in enumerate(records):
if i == 58:
len(records)
self.assertEqual(i, 249)
def test_iteration_within_iteration(self):
"""You should be able to reliably iterate over all the documents
in a given queryset even if there are multiple iterations of it
happening at the same time.
"""
class Data(Document):
pass
for i in xrange(300):
Data().save()
qs = Data.objects.limit(250)
for i, doc in enumerate(qs):
for j, doc2 in enumerate(qs):
pass
self.assertEqual(i, 249)
self.assertEqual(j, 249)
if __name__ == '__main__':
unittest.main()