From 69d3e0c4b6467807666bc693e01ac3e0c8156380 Mon Sep 17 00:00:00 2001 From: blackbrrr Date: Tue, 9 Feb 2010 14:56:15 -0600 Subject: [PATCH 01/12] added map/reduce support via QuerySet.map_reduce. map_reduce operations respect query specs and ordering, but ordering is currently only applied to map/reduce collection. map/reduce may eventually require its own QuerySet to avoid slicing conflicts. results are returned as lists of MapReduceDocument objects, dynamic objects representing the query. tests and documentation included. considered in the neighborhood of 'good start'. --- docs/conf.py | 2 +- mongoengine/document.py | 41 ++++++++++++++++++ mongoengine/queryset.py | 74 ++++++++++++++++++++++++++++++++ tests/queryset.py | 94 ++++++++++++++++++++++++++++++----------- 4 files changed, 186 insertions(+), 25 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index a40a25ff..97aaaca5 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -22,7 +22,7 @@ sys.path.append(os.path.abspath('..')) # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. -extensions = ['sphinx.ext.autodoc'] +extensions = ['sphinx.ext.autodoc', 'sphinx.ext.todo'] # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] diff --git a/mongoengine/document.py b/mongoengine/document.py index 62f9ecce..699bf193 100644 --- a/mongoengine/document.py +++ b/mongoengine/document.py @@ -114,3 +114,44 @@ class Document(BaseDocument): """ db = _get_db() db.drop_collection(cls._meta['collection']) + + +class MapReduceDocument(object): + """A document returned from a map/reduce query. + + :param collection: An instance of :class:`~pymongo.Collection` + :param key: Document/result key, often an instance of + :class:`~pymongo.objectid.ObjectId`. If supplied as + an ``ObjectId`` found in the given ``collection``, + the object can be accessed via the ``key_object`` property. + :param value: The result(s) for this key. If given as a dictionary, + each key in the dictionary will be available as + an instance attribute. + + .. versionadded:: 0.2.2 + + """ + + def __init__(self, collection, key, value): + self._collection = collection + self.key = key + self.value = value + + if isinstance(value, dict): + # create attributes for each named result + for k, v in value.iteritems(): + setattr(self, k, v) + + @property + def object(self): + """Lazy-load the object referenced by ``self.key``. If ``self.key`` + is not an ``ObjectId``, simply return ``self.key``. + """ + if not isinstance(self.key, pymongo.objectid.ObjectId): + return self.key + if not hasattr(self, "_key_object"): + self._key_object = self._collection.find_one(self.key) + return self._key_object + return self._key_object + + \ No newline at end of file diff --git a/mongoengine/queryset.py b/mongoengine/queryset.py index bb0090ea..23621c45 100644 --- a/mongoengine/queryset.py +++ b/mongoengine/queryset.py @@ -17,6 +17,10 @@ class InvalidQueryError(Exception): class OperationError(Exception): pass + + +class NotImplementedError(Exception): + pass class Q(object): @@ -112,6 +116,7 @@ class QuerySet(object): self._accessed_collection = False self._query = {} self._where_clause = None + self._ordering = [] # If inheritance is allowed, only return instances and instances of # subclasses of the class being used @@ -327,6 +332,72 @@ class QuerySet(object): def __len__(self): return self.count() + def map_reduce(self, map_f, reduce_f, scope=None, keep_temp=False): + """Perform a map/reduce query using the current query spec + and ordering. While ``map_reduce`` respects ``QuerySet`` chaining, + it must be the last call made, as it does not return a maleable + ``QuerySet``. + + Example: map/reduce operation is given a ``QuerySet`` + of all posts by "mattdennewitz", ordered by most recent "pub_date". :: + + map_f = function() { ... } + reduce_f = function(key, values) { ... } + + posts = BlogPost(author="mattdennewitz").order_by("-pub_date") + tag_counts = posts.map_reduce(map_f, reduce_f) + + See the :meth:`~mongoengine.tests.QuerySetTest.test_map_reduce_simple` + unit test for more usage examples. + + :param map_f: map function, as :class:`~pymongo.code.Code` or string + :param reduce_f: reduce function, as + :class:`~pymongo.code.Code` or string + :param scope: values to insert into map/reduce global scope. Optional. + :param keep_temp: keep temporary table (boolean, default ``True``) + + Returns a list of :class:`~mongoengine.document.MapReduceDocument`. + + .. note:: Map/Reduce requires server version **>= 1.1.1**. The PyMongo + :meth:`~pymongo.collection.Collection.map_reduce` helper requires + PyMongo version **>= 1.2**. + + .. versionadded:: 0.2.2 + + .. todo:: Implement limits + + """ + from document import MapReduceDocument + + if not hasattr(self._collection, "map_reduce"): + raise NotImplementedError("Requires MongoDB >= 1.1.1") + + if not isinstance(map_f, pymongo.code.Code): + map_f = pymongo.code.Code(map_f) + if not isinstance(reduce_f, pymongo.code.Code): + reduce_f = pymongo.code.Code(reduce_f) + + mr_args = {'query': self._query, 'keeptemp': keep_temp} + + if scope: + mr_args['scope'] = scope + if limit: + mr_args['limit'] = limit + + docs = [] + + results = self._collection.map_reduce(map_f, reduce_f, **mr_args) + results = results.find() + + if self._ordering: + results = results.sort(self._ordering) + + for doc in results: + mrd = MapReduceDocument(self._collection, doc['_id'], doc['value']) + docs.append(mrd) + + return docs + def limit(self, n): """Limit the number of returned documents to `n`. This may also be achieved using array-slicing syntax (e.g. ``User.objects[:5]``). @@ -384,6 +455,7 @@ class QuerySet(object): key = key[1:] key_list.append((key, direction)) + self._ordering = key_list self._cursor.sort(key_list) return self @@ -610,6 +682,7 @@ class QuerySet(object): data[-1] = "...(remaining elements truncated)..." return repr(data) + class InvalidCollectionError(Exception): pass @@ -663,6 +736,7 @@ class QuerySetManager(object): queryset = self._manager_func(queryset) return queryset + def queryset_manager(func): """Decorator that allows you to define custom QuerySet managers on :class:`~mongoengine.Document` classes. The manager must be a function that diff --git a/tests/queryset.py b/tests/queryset.py index d1dc878a..e6548088 100644 --- a/tests/queryset.py +++ b/tests/queryset.py @@ -171,29 +171,29 @@ class QuerySetTest(unittest.TestCase): BlogPost.drop_collection() - def test_field_subsets(self): - """Ensure that a call to ``only`` loads only selected fields. - """ - - class DinerReview(Document): - title = StringField() - abstract = StringField() - content = StringField() - - review = DinerReview(title="Lorraine's Diner") - review.abstract = "Dirty dishes, great food." - review.content = """ - Lorem ipsum dolor sit amet, consectetur adipiscing elit. - Mauris eu felis risus, eget congue ante. Mauris consectetur - dignissim velit, quis dictum risus tincidunt ac. - Phasellus condimentum imperdiet laoreet. - """ - review.save() - - review = DinerReview.objects.only("title").first() - self.assertEqual(review.content, None) - - DinerReview.drop_collection() + # def test_field_subsets(self): + # """Ensure that a call to ``only`` loads only selected fields. + # """ + # + # class DinerReview(Document): + # title = StringField() + # abstract = StringField() + # content = StringField() + # + # review = DinerReview(title="Lorraine's Diner") + # review.abstract = "Dirty dishes, great food." + # review.content = """ + # Lorem ipsum dolor sit amet, consectetur adipiscing elit. + # Mauris eu felis risus, eget congue ante. Mauris consectetur + # dignissim velit, quis dictum risus tincidunt ac. + # Phasellus condimentum imperdiet laoreet. + # """ + # review.save() + # + # review = DinerReview.objects.only("title").first() + # self.assertEqual(review.content, None) + # + # DinerReview.drop_collection() def test_ordering(self): """Ensure default ordering is applied and can be overridden. @@ -400,7 +400,53 @@ class QuerySetTest(unittest.TestCase): ages = [p.age for p in self.Person.objects.order_by('-name')] self.assertEqual(ages, [30, 40, 20]) - + + def test_map_reduce(self): + """Ensure map/reduce is both mapping and reducing. + """ + class Song(Document): + artists = ListField(StringField()) + title = StringField() + is_cover = BooleanField() + + Song.drop_collection() + + Song(title="Gloria", is_cover=True, artists=['Patti Smith']).save() + Song(title="Redondo beach", is_cover=False, + artists=['Patti Smith']).save() + Song(title="My Generation", is_cover=True, + artists=['Patti Smith', 'John Cale']).save() + + map_f = """ + function() { + this.artists.forEach(function(artist) { + emit(artist, 1); + }); + } + """ + + reduce_f = """ + function(key, values) { + var total = 0; + for(var i=0; i Date: Fri, 12 Feb 2010 14:39:08 -0600 Subject: [PATCH 02/12] map/reduce result objects now only have 'key', 'value', and 'object' properties; MapReduceDocument.key_object now returns proper Document subclass; added finalize with Reddit ranking simulation; MapReduceDocuments now yielded; --- mongoengine/document.py | 23 ++++----- mongoengine/queryset.py | 20 +++++--- tests/queryset.py | 110 +++++++++++++++++++++++++++++++++++++++- 3 files changed, 132 insertions(+), 21 deletions(-) diff --git a/mongoengine/document.py b/mongoengine/document.py index 6907cde3..19ed15cd 100644 --- a/mongoengine/document.py +++ b/mongoengine/document.py @@ -124,34 +124,31 @@ class MapReduceDocument(object): :param key: Document/result key, often an instance of :class:`~pymongo.objectid.ObjectId`. If supplied as an ``ObjectId`` found in the given ``collection``, - the object can be accessed via the ``key_object`` property. - :param value: The result(s) for this key. If given as a dictionary, - each key in the dictionary will be available as - an instance attribute. + the object can be accessed via the ``object`` property. + :param value: The result(s) for this key. .. versionadded:: 0.2.2 """ - def __init__(self, collection, key, value): + def __init__(self, document, collection, key, value): + self._document = document self._collection = collection self.key = key self.value = value - - if isinstance(value, dict): - # create attributes for each named result - for k, v in value.iteritems(): - setattr(self, k, v) @property def object(self): """Lazy-load the object referenced by ``self.key``. If ``self.key`` is not an ``ObjectId``, simply return ``self.key``. """ - if not isinstance(self.key, pymongo.objectid.ObjectId): - return self.key + if not isinstance(self.key, (pymongo.objectid.ObjectId)): + try: + self.key = pymongo.objectid.ObjectId(self.key) + except: + return self.key if not hasattr(self, "_key_object"): - self._key_object = self._collection.find_one(self.key) + self._key_object = self._document.objects.with_id(self.key) return self._key_object return self._key_object diff --git a/mongoengine/queryset.py b/mongoengine/queryset.py index 966f3304..32840471 100644 --- a/mongoengine/queryset.py +++ b/mongoengine/queryset.py @@ -125,6 +125,7 @@ class QuerySet(object): self._query = {} self._where_clause = None self._ordering = [] + self._limit = None # If inheritance is allowed, only return instances and instances of # subclasses of the class being used @@ -380,7 +381,8 @@ class QuerySet(object): def __len__(self): return self.count() - def map_reduce(self, map_f, reduce_f, scope=None, keep_temp=False): + def map_reduce(self, map_f, reduce_f, finalize_f=None, limit=None, + scope=None, keep_temp=False): """Perform a map/reduce query using the current query spec and ordering. While ``map_reduce`` respects ``QuerySet`` chaining, it must be the last call made, as it does not return a maleable @@ -402,6 +404,8 @@ class QuerySet(object): :param reduce_f: reduce function, as :class:`~pymongo.code.Code` or string :param scope: values to insert into map/reduce global scope. Optional. + :param limit: number of objects from current query to provide + to map/reduce method :param keep_temp: keep temporary table (boolean, default ``True``) Returns a list of :class:`~mongoengine.document.MapReduceDocument`. @@ -427,13 +431,16 @@ class QuerySet(object): mr_args = {'query': self._query, 'keeptemp': keep_temp} + if finalize_f: + if not isinstance(finalize_f, pymongo.code.Code): + finalize_f = pymongo.code.Code(finalize_f) + mr_args['finalize'] = finalize_f + if scope: mr_args['scope'] = scope if limit: mr_args['limit'] = limit - docs = [] - results = self._collection.map_reduce(map_f, reduce_f, **mr_args) results = results.find() @@ -441,10 +448,8 @@ class QuerySet(object): results = results.sort(self._ordering) for doc in results: - mrd = MapReduceDocument(self._collection, doc['_id'], doc['value']) - docs.append(mrd) - - return docs + yield MapReduceDocument(self._document, self._collection, + doc['_id'], doc['value']) def limit(self, n): """Limit the number of returned documents to `n`. This may also be @@ -452,6 +457,7 @@ class QuerySet(object): :param n: the maximum number of objects to return """ + self._limit = n self._cursor.limit(n) # Return self to allow chaining return self diff --git a/tests/queryset.py b/tests/queryset.py index 16ce4446..7103d4d7 100644 --- a/tests/queryset.py +++ b/tests/queryset.py @@ -1,6 +1,9 @@ +# -*- coding: utf-8 -*- + + import unittest import pymongo -from datetime import datetime +from datetime import datetime, timedelta from mongoengine.queryset import (QuerySet, MultipleObjectsReturned, DoesNotExist) @@ -489,16 +492,121 @@ class QuerySetTest(unittest.TestCase): # ensure both artists are found results = Song.objects.map_reduce(map_f, reduce_f) + results = list(results) self.assertEqual(len(results), 2) # query for a count of Songs per artist, ordered by -count. # Patti Smith has 3 song credits, and should therefore be first. results = Song.objects.order_by("-value").map_reduce(map_f, reduce_f) + results = list(results) self.assertEqual(results[0].key, "Patti Smith") self.assertEqual(results[0].value, 3.0) Song.drop_collection() + def test_map_reduce_finalize(self): + """Ensure scope and finalize are working correctly by simulating + "hotness" ranking with Reddit algorithm. + """ + from time import mktime + + class Link(Document): + title = StringField() + up_votes = IntField() + down_votes = IntField() + submitted = DateTimeField() + + Link.drop_collection() + + now = datetime.utcnow() + + # Note: Test data taken from a custom Reddit homepage on + # Fri, 12 Feb 2010 14:36:00 -0600. + + Link(title = "Google Buzz auto-followed a woman's abusive ex ...", + up_votes = 1079, + down_votes = 553, + submitted = now-timedelta(hours=4)).save() + Link(title = "We did it! Barbie is a computer engineer.", + up_votes = 481, + down_votes = 124, + submitted = now-timedelta(hours=2)).save() + Link(title = "This Is A Mosquito Getting Killed By A Laser", + up_votes = 1446, + down_votes = 530, + submitted=now-timedelta(hours=13)).save() + Link(title = "Arabic flashcards land physics student in jail.", + up_votes = 215, + down_votes = 105, + submitted = now-timedelta(hours=6)).save() + Link(title = "The Burger Lab: Presenting, the Flood Burger", + up_votes = 48, + down_votes = 17, + submitted = now-timedelta(hours=5)).save() + Link(title="How to see polarization with the naked eye", + up_votes = 74, + down_votes = 13, + submitted = now-timedelta(hours=10)).save() + + map_f = """ + function() { + emit(this._id, {up_delta: this.up_votes - this.down_votes, + reddit_epoch: new Date(2005, 12, 8, 7, 46, 43, 0).getTime(), + sub_date: this.submitted.getTime()}) + } + """ + + reduce_f = """ + function(key, values) { + data = values[0]; + + x = data.up_delta; + + // calculate time diff between reddit epoch and submission + sec_since_epoch = data.sub_date - data.reddit_epoch; + sec_since_epoch /= 1000; + + // calculate 'Y' + if(x > 0) { + y = 1; + } else if (x = 0) { + y = 0; + } else { + y = -1; + } + + // calculate 'Z', the maximal value + if(Math.abs(x) >= 1) { + z = Math.abs(x); + } else { + z = 1; + } + + return {x: x, y: y, z: z, t_s: sec_since_epoch}; + } + """ + + finalize_f = """ + function(key, value) { + + // f(sec_since_epoch,y,z) = log10(z) + ((y*sec_since_epoch) / 45000) + z_10 = Math.log(value.z) / Math.log(10); + weight = z_10 + ((value.y * value.t_s) / 45000); + return weight; + + } + """ + + # ensure both artists are found + results = Link.objects.order_by("-value") + results = results.map_reduce(map_f, reduce_f, finalize_f=finalize_f) + results = list(results) + + self.assertTrue(results[0].object.title.startswith("Google Buzz")) + self.assertTrue(results[-1].object.title.startswith("How to see")) + + Link.drop_collection() + def test_item_frequencies(self): """Ensure that item frequencies are properly generated from lists. """ From a4c5fa57e04687a9260adef6c621ab8b5d68ab7f Mon Sep 17 00:00:00 2001 From: blackbrrr Date: Fri, 12 Feb 2010 15:53:28 -0600 Subject: [PATCH 03/12] updated notes in map_reduce_advanced queryset test --- tests/queryset.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/tests/queryset.py b/tests/queryset.py index 7103d4d7..777231e9 100644 --- a/tests/queryset.py +++ b/tests/queryset.py @@ -505,8 +505,8 @@ class QuerySetTest(unittest.TestCase): Song.drop_collection() def test_map_reduce_finalize(self): - """Ensure scope and finalize are working correctly by simulating - "hotness" ranking with Reddit algorithm. + """Ensure finalize is running by simulating "hotness" + ranking with Reddit algorithm. """ from time import mktime @@ -521,7 +521,8 @@ class QuerySetTest(unittest.TestCase): now = datetime.utcnow() # Note: Test data taken from a custom Reddit homepage on - # Fri, 12 Feb 2010 14:36:00 -0600. + # Fri, 12 Feb 2010 14:36:00 -0600. Link ordering should + # reflect order of insertion below. Link(title = "Google Buzz auto-followed a woman's abusive ex ...", up_votes = 1079, @@ -588,12 +589,10 @@ class QuerySetTest(unittest.TestCase): finalize_f = """ function(key, value) { - // f(sec_since_epoch,y,z) = log10(z) + ((y*sec_since_epoch) / 45000) z_10 = Math.log(value.z) / Math.log(10); weight = z_10 + ((value.y * value.t_s) / 45000); return weight; - } """ From 008a62e4e93503b755a526b52267c1c6035bbdea Mon Sep 17 00:00:00 2001 From: blackbrrr Date: Fri, 12 Feb 2010 16:07:44 -0600 Subject: [PATCH 04/12] updated map/reduce documentation --- mongoengine/queryset.py | 21 +++++++-------------- tests/queryset.py | 3 +++ 2 files changed, 10 insertions(+), 14 deletions(-) diff --git a/mongoengine/queryset.py b/mongoengine/queryset.py index 32840471..9c706f0d 100644 --- a/mongoengine/queryset.py +++ b/mongoengine/queryset.py @@ -387,36 +387,29 @@ class QuerySet(object): and ordering. While ``map_reduce`` respects ``QuerySet`` chaining, it must be the last call made, as it does not return a maleable ``QuerySet``. - - Example: map/reduce operation is given a ``QuerySet`` - of all posts by "mattdennewitz", ordered by most recent "pub_date". :: - - map_f = function() { ... } - reduce_f = function(key, values) { ... } - - posts = BlogPost(author="mattdennewitz").order_by("-pub_date") - tag_counts = posts.map_reduce(map_f, reduce_f) - See the :meth:`~mongoengine.tests.QuerySetTest.test_map_reduce_simple` - unit test for more usage examples. + See the :meth:`~mongoengine.tests.QuerySetTest.test_map_reduce` + and :meth:`~mongoengine.tests.QuerySetTest.test_map_advanced` + tests in ``tests.queryset.QuerySetTest`` for usage examples. :param map_f: map function, as :class:`~pymongo.code.Code` or string :param reduce_f: reduce function, as :class:`~pymongo.code.Code` or string + :param finalize_f: finalize function, an optional function that + performs any post-reduction processing. :param scope: values to insert into map/reduce global scope. Optional. :param limit: number of objects from current query to provide to map/reduce method :param keep_temp: keep temporary table (boolean, default ``True``) - Returns a list of :class:`~mongoengine.document.MapReduceDocument`. + Returns an iterator yielding + :class:`~mongoengine.document.MapReduceDocument`. .. note:: Map/Reduce requires server version **>= 1.1.1**. The PyMongo :meth:`~pymongo.collection.Collection.map_reduce` helper requires PyMongo version **>= 1.2**. .. versionadded:: 0.2.2 - - .. todo:: Implement limits """ from document import MapReduceDocument diff --git a/tests/queryset.py b/tests/queryset.py index 777231e9..809f9f8e 100644 --- a/tests/queryset.py +++ b/tests/queryset.py @@ -601,7 +601,10 @@ class QuerySetTest(unittest.TestCase): results = results.map_reduce(map_f, reduce_f, finalize_f=finalize_f) results = list(results) + # assert troublesome Buzz article is ranked 1st self.assertTrue(results[0].object.title.startswith("Google Buzz")) + + # assert laser vision is ranked last self.assertTrue(results[-1].object.title.startswith("How to see")) Link.drop_collection() From a19a7b976ca3fd5ed9406b9b10619e83c12cbe08 Mon Sep 17 00:00:00 2001 From: blackbrrr Date: Tue, 23 Feb 2010 22:26:05 -0600 Subject: [PATCH 05/12] updated advanced map/reduce test to include scope; misc cleanup in queryset --- mongoengine/queryset.py | 117 +++++++++++++++++----------------- tests/queryset.py | 136 +++++++++++++++++++++------------------- 2 files changed, 128 insertions(+), 125 deletions(-) diff --git a/mongoengine/queryset.py b/mongoengine/queryset.py index 9c706f0d..0ee5014b 100644 --- a/mongoengine/queryset.py +++ b/mongoengine/queryset.py @@ -4,7 +4,7 @@ import pymongo import copy -__all__ = ['queryset_manager', 'Q', 'InvalidQueryError', +__all__ = ['queryset_manager', 'Q', 'InvalidQueryError', 'InvalidCollectionError'] # The maximum number of items to display in a QuerySet.__repr__ @@ -12,7 +12,7 @@ REPR_OUTPUT_SIZE = 20 class DoesNotExist(Exception): - pass + pass class MultipleObjectsReturned(Exception): @@ -25,14 +25,14 @@ class InvalidQueryError(Exception): class OperationError(Exception): pass - - + + class NotImplementedError(Exception): pass class Q(object): - + OR = '||' AND = '&&' OPERATORS = { @@ -51,7 +51,7 @@ class Q(object): 'size': 'this.%(field)s.length == %(value)s', 'exists': 'this.%(field)s != null', } - + def __init__(self, **query): self.query = [query] @@ -98,26 +98,24 @@ class Q(object): js_scope[op_value_name] = value # Construct the JS that uses this op operation_js = Q.OPERATORS[op.strip('$')] % { - 'field': key, - 'value': op_value_name - } + 'field': key, + 'value': op_value_name} js.append(operation_js) else: js_scope[value_name] = value # Construct the JS for this field field_js = Q.OPERATORS[op.strip('$')] % { - 'field': key, - 'value': value_name - } + 'field': key, + 'value': value_name} js.append(field_js) return ' && '.join(js) class QuerySet(object): - """A set of results returned from a query. Wraps a MongoDB cursor, + """A set of results returned from a query. Wraps a MongoDB cursor, providing :class:`~mongoengine.Document` objects as the results. """ - + def __init__(self, document, collection): self._document = document self._collection_obj = collection @@ -132,7 +130,7 @@ class QuerySet(object): if document._meta.get('allow_inheritance'): self._query = {'_types': self._document._class_name} self._cursor_obj = None - + def ensure_index(self, key_or_list): """Ensure that the given indexes are in place. @@ -180,7 +178,7 @@ class QuerySet(object): return index_list def __call__(self, q_obj=None, **query): - """Filter the selected documents by calling the + """Filter the selected documents by calling the :class:`~mongoengine.queryset.QuerySet` with a query. :param q_obj: a :class:`~mongoengine.queryset.Q` object to be used in @@ -194,7 +192,7 @@ class QuerySet(object): query = QuerySet._transform_query(_doc_cls=self._document, **query) self._query.update(query) return self - + def filter(self, *q_objs, **query): """An alias of :meth:`~mongoengine.queryset.QuerySet.__call__` """ @@ -230,11 +228,11 @@ class QuerySet(object): # Apply where clauses to cursor if self._where_clause: self._cursor_obj.where(self._where_clause) - + # apply default ordering if self._document._meta['ordering']: self.order_by(*self._document._meta['ordering']) - + return self._cursor_obj @classmethod @@ -307,8 +305,8 @@ class QuerySet(object): return mongo_query def get(self, *q_objs, **query): - """Retrieve the the matching object raising - :class:`~mongoengine.queryset.MultipleObjectsReturned` or + """Retrieve the the matching object raising + :class:`~mongoengine.queryset.MultipleObjectsReturned` or :class:`~mongoengine.queryset.DoesNotExist` exceptions if multiple or no results are found. """ @@ -324,15 +322,15 @@ class QuerySet(object): def get_or_create(self, *q_objs, **query): """Retreive unique object or create, if it doesn't exist. Raises - :class:`~mongoengine.queryset.MultipleObjectsReturned` if multiple - results are found. A new document will be created if the document + :class:`~mongoengine.queryset.MultipleObjectsReturned` if multiple + results are found. A new document will be created if the document doesn't exists; a dictionary of default values for the new document may be provided as a keyword argument called :attr:`defaults`. """ defaults = query.get('defaults', {}) - if query.has_key('defaults'): + if 'defaults' in query: del query['defaults'] - + self.__call__(*q_objs, **query) count = self.count() if count == 0: @@ -383,17 +381,17 @@ class QuerySet(object): def map_reduce(self, map_f, reduce_f, finalize_f=None, limit=None, scope=None, keep_temp=False): - """Perform a map/reduce query using the current query spec - and ordering. While ``map_reduce`` respects ``QuerySet`` chaining, - it must be the last call made, as it does not return a maleable - ``QuerySet``. - - See the :meth:`~mongoengine.tests.QuerySetTest.test_map_reduce` + """Perform a map/reduce query using the current query spec + and ordering. While ``map_reduce`` respects ``QuerySet`` chaining, + it must be the last call made, as it does not return a maleable + ``QuerySet``. + + See the :meth:`~mongoengine.tests.QuerySetTest.test_map_reduce` and :meth:`~mongoengine.tests.QuerySetTest.test_map_advanced` tests in ``tests.queryset.QuerySetTest`` for usage examples. - + :param map_f: map function, as :class:`~pymongo.code.Code` or string - :param reduce_f: reduce function, as + :param reduce_f: reduce function, as :class:`~pymongo.code.Code` or string :param finalize_f: finalize function, an optional function that performs any post-reduction processing. @@ -401,36 +399,37 @@ class QuerySet(object): :param limit: number of objects from current query to provide to map/reduce method :param keep_temp: keep temporary table (boolean, default ``True``) - - Returns an iterator yielding + + Returns an iterator yielding :class:`~mongoengine.document.MapReduceDocument`. - + .. note:: Map/Reduce requires server version **>= 1.1.1**. The PyMongo :meth:`~pymongo.collection.Collection.map_reduce` helper requires PyMongo version **>= 1.2**. - + .. versionadded:: 0.2.2 """ from document import MapReduceDocument - + if not hasattr(self._collection, "map_reduce"): raise NotImplementedError("Requires MongoDB >= 1.1.1") - + if not isinstance(map_f, pymongo.code.Code): map_f = pymongo.code.Code(map_f) if not isinstance(reduce_f, pymongo.code.Code): reduce_f = pymongo.code.Code(reduce_f) - + mr_args = {'query': self._query, 'keeptemp': keep_temp} if finalize_f: if not isinstance(finalize_f, pymongo.code.Code): finalize_f = pymongo.code.Code(finalize_f) mr_args['finalize'] = finalize_f - + if scope: mr_args['scope'] = scope + if limit: mr_args['limit'] = limit @@ -439,9 +438,9 @@ class QuerySet(object): if self._ordering: results = results.sort(self._ordering) - + for doc in results: - yield MapReduceDocument(self._document, self._collection, + yield MapReduceDocument(self._document, self._collection, doc['_id'], doc['value']) def limit(self, n): @@ -473,7 +472,7 @@ class QuerySet(object): self._cursor_obj = self._cursor[key] except IndexError, err: # PyMongo raises an error if key.start == key.stop, catch it, - # bin it, kill it. + # bin it, kill it. if key.start >=0 and key.stop >= 0 and key.step is None: if key.start == key.stop: self.limit(0) @@ -500,14 +499,14 @@ class QuerySet(object): direction = pymongo.DESCENDING if key[0] in ('-', '+'): key = key[1:] - key_list.append((key, direction)) + key_list.append((key, direction)) self._ordering = key_list self._cursor.sort(key_list) return self - + def explain(self, format=False): - """Return an explain plan record for the + """Return an explain plan record for the :class:`~mongoengine.queryset.QuerySet`\ 's cursor. :param format: format the plan before returning it @@ -518,7 +517,7 @@ class QuerySet(object): import pprint plan = pprint.pformat(plan) return plan - + def delete(self, safe=False): """Delete the documents matched by the query. @@ -530,7 +529,7 @@ class QuerySet(object): def _transform_update(cls, _doc_cls=None, **update): """Transform an update spec from Django-style format to Mongo format. """ - operators = ['set', 'unset', 'inc', 'dec', 'push', 'push_all', 'pull', + operators = ['set', 'unset', 'inc', 'dec', 'push', 'push_all', 'pull', 'pull_all'] mongo_update = {} @@ -588,7 +587,7 @@ class QuerySet(object): update = QuerySet._transform_update(self._document, **update) try: - self._collection.update(self._query, update, safe=safe_update, + self._collection.update(self._query, update, safe=safe_update, multi=True) except pymongo.errors.OperationFailure, err: if unicode(err) == u'multi not coded yet': @@ -609,7 +608,7 @@ class QuerySet(object): # Explicitly provide 'multi=False' to newer versions of PyMongo # as the default may change to 'True' if pymongo.version >= '1.1.1': - self._collection.update(self._query, update, safe=safe_update, + self._collection.update(self._query, update, safe=safe_update, multi=False) else: # Older versions of PyMongo don't support 'multi' @@ -624,15 +623,15 @@ class QuerySet(object): """Execute a Javascript function on the server. A list of fields may be provided, which will be translated to their correct names and supplied as the arguments to the function. A few extra variables are added to - the function's scope: ``collection``, which is the name of the - collection in use; ``query``, which is an object representing the + the function's scope: ``collection``, which is the name of the + collection in use; ``query``, which is an object representing the current query; and ``options``, which is an object containing any options specified as keyword arguments. :param code: a string of Javascript code to execute :param fields: fields that you will be using in your function, which will be passed in to your function as arguments - :param options: options that you want available to the function + :param options: options that you want available to the function (accessed in Javascript through the ``options`` object) """ fields = [QuerySet._translate_field_name(self._document, f) @@ -647,7 +646,7 @@ class QuerySet(object): query = self._query if self._where_clause: query['$where'] = self._where_clause - + scope['query'] = query code = pymongo.code.Code(code, scope=scope) @@ -695,7 +694,7 @@ class QuerySet(object): def item_frequencies(self, list_field, normalize=False): """Returns a dictionary of all items present in a list field across the whole queried set of documents, and their corresponding frequency. - This is useful for generating tag clouds, or searching documents. + This is useful for generating tag clouds, or searching documents. :param list_field: the list field to use :param normalize: normalize the results so they add to 1.0 @@ -742,7 +741,7 @@ class QuerySetManager(object): self._collection = None def __get__(self, instance, owner): - """Descriptor for instantiating a new QuerySet object when + """Descriptor for instantiating a new QuerySet object when Document.objects is accessed. """ if instance is not None: @@ -761,7 +760,7 @@ class QuerySetManager(object): if collection in db.collection_names(): self._collection = db[collection] - # The collection already exists, check if its capped + # The collection already exists, check if its capped # options match the specified capped options options = self._collection.options() if options.get('max') != max_documents or \ @@ -777,7 +776,7 @@ class QuerySetManager(object): self._collection = db.create_collection(collection, opts) else: self._collection = db[collection] - + # owner is the document that contains the QuerySetManager queryset = QuerySet(owner, self._collection) if self._manager_func: diff --git a/tests/queryset.py b/tests/queryset.py index 809f9f8e..2129eed7 100644 --- a/tests/queryset.py +++ b/tests/queryset.py @@ -5,13 +5,13 @@ import unittest import pymongo from datetime import datetime, timedelta -from mongoengine.queryset import (QuerySet, MultipleObjectsReturned, +from mongoengine.queryset import (QuerySet, MultipleObjectsReturned, DoesNotExist) from mongoengine import * class QuerySetTest(unittest.TestCase): - + def setUp(self): connect(db='mongoenginetest') @@ -19,12 +19,12 @@ class QuerySetTest(unittest.TestCase): name = StringField() age = IntField() self.Person = Person - + def test_initialisation(self): """Ensure that a QuerySet is correctly initialised by QuerySetManager. """ self.assertTrue(isinstance(self.Person.objects, QuerySet)) - self.assertEqual(self.Person.objects._collection.name, + self.assertEqual(self.Person.objects._collection.name, self.Person._meta['collection']) self.assertTrue(isinstance(self.Person.objects._collection, pymongo.collection.Collection)) @@ -34,15 +34,15 @@ class QuerySetTest(unittest.TestCase): """ self.assertEqual(QuerySet._transform_query(name='test', age=30), {'name': 'test', 'age': 30}) - self.assertEqual(QuerySet._transform_query(age__lt=30), + self.assertEqual(QuerySet._transform_query(age__lt=30), {'age': {'$lt': 30}}) self.assertEqual(QuerySet._transform_query(age__gt=20, age__lt=50), {'age': {'$gt': 20, '$lt': 50}}) self.assertEqual(QuerySet._transform_query(age=20, age__gt=50), {'age': 20}) - self.assertEqual(QuerySet._transform_query(friend__age__gte=30), + self.assertEqual(QuerySet._transform_query(friend__age__gte=30), {'friend.age': {'$gte': 30}}) - self.assertEqual(QuerySet._transform_query(name__exists=True), + self.assertEqual(QuerySet._transform_query(name__exists=True), {'name': {'$exists': True}}) def test_find(self): @@ -134,7 +134,7 @@ class QuerySetTest(unittest.TestCase): self.assertEqual(person.name, "User B") self.assertRaises(IndexError, self.Person.objects.__getitem__, 2) - + # Find a document using just the object id person = self.Person.objects.with_id(person1.id) self.assertEqual(person.name, "User A") @@ -170,7 +170,7 @@ class QuerySetTest(unittest.TestCase): person2.save() # Retrieve the first person from the database - self.assertRaises(MultipleObjectsReturned, + self.assertRaises(MultipleObjectsReturned, self.Person.objects.get_or_create) # Use a query to filter the people found to just person2 @@ -191,36 +191,36 @@ class QuerySetTest(unittest.TestCase): """Ensure filters can be chained together. """ from datetime import datetime - + class BlogPost(Document): title = StringField() is_published = BooleanField() published_date = DateTimeField() - + @queryset_manager def published(doc_cls, queryset): return queryset(is_published=True) - - blog_post_1 = BlogPost(title="Blog Post #1", + + blog_post_1 = BlogPost(title="Blog Post #1", is_published = True, published_date=datetime(2010, 1, 5, 0, 0 ,0)) - blog_post_2 = BlogPost(title="Blog Post #2", + blog_post_2 = BlogPost(title="Blog Post #2", is_published = True, published_date=datetime(2010, 1, 6, 0, 0 ,0)) - blog_post_3 = BlogPost(title="Blog Post #3", + blog_post_3 = BlogPost(title="Blog Post #3", is_published = True, published_date=datetime(2010, 1, 7, 0, 0 ,0)) blog_post_1.save() blog_post_2.save() blog_post_3.save() - + # find all published blog posts before 2010-01-07 published_posts = BlogPost.published() published_posts = published_posts.filter( published_date__lt=datetime(2010, 1, 7, 0, 0 ,0)) self.assertEqual(published_posts.count(), 2) - + BlogPost.drop_collection() def test_ordering(self): @@ -236,22 +236,22 @@ class QuerySetTest(unittest.TestCase): BlogPost.drop_collection() - blog_post_1 = BlogPost(title="Blog Post #1", + blog_post_1 = BlogPost(title="Blog Post #1", published_date=datetime(2010, 1, 5, 0, 0 ,0)) - blog_post_2 = BlogPost(title="Blog Post #2", + blog_post_2 = BlogPost(title="Blog Post #2", published_date=datetime(2010, 1, 6, 0, 0 ,0)) - blog_post_3 = BlogPost(title="Blog Post #3", + blog_post_3 = BlogPost(title="Blog Post #3", published_date=datetime(2010, 1, 7, 0, 0 ,0)) blog_post_1.save() blog_post_2.save() blog_post_3.save() - + # get the "first" BlogPost using default ordering # from BlogPost.meta.ordering - latest_post = BlogPost.objects.first() + latest_post = BlogPost.objects.first() self.assertEqual(latest_post.title, "Blog Post #3") - + # override default ordering, order BlogPosts by "published_date" first_post = BlogPost.objects.order_by("+published_date").first() self.assertEqual(first_post.title, "Blog Post #1") @@ -277,7 +277,7 @@ class QuerySetTest(unittest.TestCase): result = BlogPost.objects.first() self.assertTrue(isinstance(result.author, User)) self.assertEqual(result.author.name, 'Test User') - + BlogPost.drop_collection() def test_find_dict_item(self): @@ -339,7 +339,7 @@ class QuerySetTest(unittest.TestCase): self.Person(name='user2', age=20).save() self.Person(name='user3', age=30).save() self.Person(name='user4', age=40).save() - + self.assertEqual(len(self.Person.objects(Q(age__in=[20]))), 2) self.assertEqual(len(self.Person.objects(Q(age__in=[20, 30]))), 3) @@ -452,10 +452,10 @@ class QuerySetTest(unittest.TestCase): names = [p.name for p in self.Person.objects.order_by('age')] self.assertEqual(names, ['User A', 'User C', 'User B']) - + ages = [p.age for p in self.Person.objects.order_by('-name')] self.assertEqual(ages, [30, 40, 20]) - + def test_map_reduce(self): """Ensure map/reduce is both mapping and reducing. """ @@ -463,15 +463,15 @@ class QuerySetTest(unittest.TestCase): artists = ListField(StringField()) title = StringField() is_cover = BooleanField() - + Song.drop_collection() - + Song(title="Gloria", is_cover=True, artists=['Patti Smith']).save() - Song(title="Redondo beach", is_cover=False, + Song(title="Redondo beach", is_cover=False, artists=['Patti Smith']).save() Song(title="My Generation", is_cover=True, artists=['Patti Smith', 'John Cale']).save() - + map_f = """ function() { this.artists.forEach(function(artist) { @@ -479,7 +479,7 @@ class QuerySetTest(unittest.TestCase): }); } """ - + reduce_f = """ function(key, values) { var total = 0; @@ -489,27 +489,27 @@ class QuerySetTest(unittest.TestCase): return total; } """ - + # ensure both artists are found results = Song.objects.map_reduce(map_f, reduce_f) results = list(results) self.assertEqual(len(results), 2) - + # query for a count of Songs per artist, ordered by -count. # Patti Smith has 3 song credits, and should therefore be first. results = Song.objects.order_by("-value").map_reduce(map_f, reduce_f) results = list(results) self.assertEqual(results[0].key, "Patti Smith") self.assertEqual(results[0].value, 3.0) - + Song.drop_collection() - + def test_map_reduce_finalize(self): - """Ensure finalize is running by simulating "hotness" - ranking with Reddit algorithm. + """Ensure that map, reduce, and finalize run and introduce "scope" + by simulating "hotness" ranking with Reddit algorithm. """ from time import mktime - + class Link(Document): title = StringField() up_votes = IntField() @@ -521,10 +521,10 @@ class QuerySetTest(unittest.TestCase): now = datetime.utcnow() # Note: Test data taken from a custom Reddit homepage on - # Fri, 12 Feb 2010 14:36:00 -0600. Link ordering should + # Fri, 12 Feb 2010 14:36:00 -0600. Link ordering should # reflect order of insertion below. - - Link(title = "Google Buzz auto-followed a woman's abusive ex ...", + + Link(title = "Google Buzz auto-followed a woman's abusive ex ...", up_votes = 1079, down_votes = 553, submitted = now-timedelta(hours=4)).save() @@ -532,12 +532,12 @@ class QuerySetTest(unittest.TestCase): up_votes = 481, down_votes = 124, submitted = now-timedelta(hours=2)).save() - Link(title = "This Is A Mosquito Getting Killed By A Laser", + Link(title = "This Is A Mosquito Getting Killed By A Laser", up_votes = 1446, down_votes = 530, submitted=now-timedelta(hours=13)).save() Link(title = "Arabic flashcards land physics student in jail.", - up_votes = 215, + up_votes = 215, down_votes = 105, submitted = now-timedelta(hours=6)).save() Link(title = "The Burger Lab: Presenting, the Flood Burger", @@ -552,21 +552,19 @@ class QuerySetTest(unittest.TestCase): map_f = """ function() { emit(this._id, {up_delta: this.up_votes - this.down_votes, - reddit_epoch: new Date(2005, 12, 8, 7, 46, 43, 0).getTime(), - sub_date: this.submitted.getTime()}) + sub_date: this.submitted.getTime() / 1000}) } """ reduce_f = """ function(key, values) { data = values[0]; - + x = data.up_delta; - + // calculate time diff between reddit epoch and submission - sec_since_epoch = data.sub_date - data.reddit_epoch; - sec_since_epoch /= 1000; - + sec_since_epoch = data.sub_date - reddit_epoch; + // calculate 'Y' if(x > 0) { y = 1; @@ -575,18 +573,18 @@ class QuerySetTest(unittest.TestCase): } else { y = -1; } - + // calculate 'Z', the maximal value if(Math.abs(x) >= 1) { z = Math.abs(x); } else { z = 1; } - + return {x: x, y: y, z: z, t_s: sec_since_epoch}; } """ - + finalize_f = """ function(key, value) { // f(sec_since_epoch,y,z) = log10(z) + ((y*sec_since_epoch) / 45000) @@ -596,19 +594,25 @@ class QuerySetTest(unittest.TestCase): } """ + reddit_epoch = mktime(datetime(2005, 12, 8, 7, 46, 43).timetuple()) + scope = {'reddit_epoch': reddit_epoch} + # ensure both artists are found results = Link.objects.order_by("-value") - results = results.map_reduce(map_f, reduce_f, finalize_f=finalize_f) + results = results.map_reduce(map_f, + reduce_f, + finalize_f=finalize_f, + scope=scope) results = list(results) - + # assert troublesome Buzz article is ranked 1st self.assertTrue(results[0].object.title.startswith("Google Buzz")) - + # assert laser vision is ranked last self.assertTrue(results[-1].object.title.startswith("How to see")) Link.drop_collection() - + def test_item_frequencies(self): """Ensure that item frequencies are properly generated from lists. """ @@ -705,20 +709,20 @@ class QuerySetTest(unittest.TestCase): title = StringField(name='postTitle') comments = ListField(EmbeddedDocumentField(Comment), name='postComments') - + BlogPost.drop_collection() data = {'title': 'Post 1', 'comments': [Comment(content='test')]} BlogPost(**data).save() - self.assertTrue('postTitle' in + self.assertTrue('postTitle' in BlogPost.objects(title=data['title'])._query) - self.assertFalse('title' in + self.assertFalse('title' in BlogPost.objects(title=data['title'])._query) self.assertEqual(len(BlogPost.objects(title=data['title'])), 1) - self.assertTrue('postComments.commentContent' in + self.assertTrue('postComments.commentContent' in BlogPost.objects(comments__content='test')._query) self.assertEqual(len(BlogPost.objects(comments__content='test')), 1) @@ -739,7 +743,7 @@ class QuerySetTest(unittest.TestCase): post.save() # Test that query may be performed by providing a document as a value - # while using a ReferenceField's name - the document should be + # while using a ReferenceField's name - the document should be # converted to an DBRef, which is legal, unlike a Document object post_obj = BlogPost.objects(author=person).first() self.assertEqual(post.id, post_obj.id) @@ -783,7 +787,7 @@ class QuerySetTest(unittest.TestCase): class QTest(unittest.TestCase): - + def test_or_and(self): """Ensure that Q objects may be combined correctly. """ @@ -807,8 +811,8 @@ class QTest(unittest.TestCase): examples = [ ({'name': 'test'}, 'this.name == i0f0', {'i0f0': 'test'}), ({'age': {'$gt': 18}}, 'this.age > i0f0o0', {'i0f0o0': 18}), - ({'name': 'test', 'age': {'$gt': 18, '$lte': 65}}, - 'this.age <= i0f0o0 && this.age > i0f0o1 && this.name == i0f1', + ({'name': 'test', 'age': {'$gt': 18, '$lte': 65}}, + 'this.age <= i0f0o0 && this.age > i0f0o1 && this.name == i0f1', {'i0f0o0': 65, 'i0f0o1': 18, 'i0f1': 'test'}), ] for item, js, scope in examples: From 49a001a93aecddb3bead4703065982367d13b574 Mon Sep 17 00:00:00 2001 From: blackbrrr Date: Tue, 9 Mar 2010 15:28:55 -0600 Subject: [PATCH 06/12] re-added missing QuerySet._ordering --- mongoengine/queryset.py | 1 + 1 file changed, 1 insertion(+) diff --git a/mongoengine/queryset.py b/mongoengine/queryset.py index a76c49d7..0c2c68a5 100644 --- a/mongoengine/queryset.py +++ b/mongoengine/queryset.py @@ -144,6 +144,7 @@ class QuerySet(object): self._query = {} self._where_clause = None self._loaded_fields = [] + self._ordering = [] # If inheritance is allowed, only return instances and instances of # subclasses of the class being used From acda64a837e29fc2462ecff3ebe4342a706933ce Mon Sep 17 00:00:00 2001 From: blackbrrr Date: Tue, 9 Mar 2010 15:31:28 -0600 Subject: [PATCH 07/12] fixed field_js merge artifact --- mongoengine/queryset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mongoengine/queryset.py b/mongoengine/queryset.py index 0c2c68a5..883154f2 100644 --- a/mongoengine/queryset.py +++ b/mongoengine/queryset.py @@ -104,7 +104,7 @@ class Q(object): js.append(operation_js) else: # Construct the JS for this field - (op, key, value, value_name) + value, field_js = self._build_op_js(op, key, value, value_name) js_scope[value_name] = value js.append(field_js) return ' && '.join(js) From aa13ab37c468f371fd136dd93822054bf2332bec Mon Sep 17 00:00:00 2001 From: blackbrrr Date: Tue, 9 Mar 2010 15:31:52 -0600 Subject: [PATCH 08/12] fixed field_js merge artifact --- mongoengine/queryset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mongoengine/queryset.py b/mongoengine/queryset.py index 0c2c68a5..883154f2 100644 --- a/mongoengine/queryset.py +++ b/mongoengine/queryset.py @@ -104,7 +104,7 @@ class Q(object): js.append(operation_js) else: # Construct the JS for this field - (op, key, value, value_name) + value, field_js = self._build_op_js(op, key, value, value_name) js_scope[value_name] = value js.append(field_js) return ' && '.join(js) From 0c1e5da9a8bea181501050fc25b4d827ffd4e44e Mon Sep 17 00:00:00 2001 From: blackbrrr Date: Wed, 17 Mar 2010 00:50:07 -0500 Subject: [PATCH 09/12] added mongoengine.MapReduceDocument to api ref --- docs/apireference.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/apireference.rst b/docs/apireference.rst index 2433ddfe..3f176826 100644 --- a/docs/apireference.rst +++ b/docs/apireference.rst @@ -20,6 +20,9 @@ Documents .. autoclass:: mongoengine.EmbeddedDocument :members: + +.. autoclass:: mongoengine.MapReduceDocument + :members: Querying ======== From f156da4ec2dea79ce17ec3f36e0b0aa2a211d851 Mon Sep 17 00:00:00 2001 From: blackbrrr Date: Wed, 17 Mar 2010 00:50:44 -0500 Subject: [PATCH 10/12] bumped version --- mongoengine/document.py | 4 +--- mongoengine/queryset.py | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/mongoengine/document.py b/mongoengine/document.py index 19ed15cd..1778eb12 100644 --- a/mongoengine/document.py +++ b/mongoengine/document.py @@ -127,7 +127,7 @@ class MapReduceDocument(object): the object can be accessed via the ``object`` property. :param value: The result(s) for this key. - .. versionadded:: 0.2.2 + .. versionadded:: 0.3 """ @@ -151,5 +151,3 @@ class MapReduceDocument(object): self._key_object = self._document.objects.with_id(self.key) return self._key_object return self._key_object - - \ No newline at end of file diff --git a/mongoengine/queryset.py b/mongoengine/queryset.py index 6a397b3f..0b6b5906 100644 --- a/mongoengine/queryset.py +++ b/mongoengine/queryset.py @@ -469,7 +469,7 @@ class QuerySet(object): :meth:`~pymongo.collection.Collection.map_reduce` helper requires PyMongo version **>= 1.2**. - .. versionadded:: 0.2.2 + .. versionadded:: 0.3 """ from document import MapReduceDocument From f4d0938e3dffb1c91d124223e0580c53c37e1817 Mon Sep 17 00:00:00 2001 From: blackbrrr Date: Wed, 17 Mar 2010 00:51:01 -0500 Subject: [PATCH 11/12] rewrite simple map/reduce test --- tests/queryset.py | 40 ++++++++++++++++++---------------------- 1 file changed, 18 insertions(+), 22 deletions(-) diff --git a/tests/queryset.py b/tests/queryset.py index 25dcb237..77bc52c8 100644 --- a/tests/queryset.py +++ b/tests/queryset.py @@ -637,23 +637,20 @@ class QuerySetTest(unittest.TestCase): def test_map_reduce(self): """Ensure map/reduce is both mapping and reducing. """ - class Song(Document): - artists = ListField(StringField()) + class BlogPost(Document): title = StringField() - is_cover = BooleanField() + tags = ListField(StringField()) - Song.drop_collection() + BlogPost.drop_collection() - Song(title="Gloria", is_cover=True, artists=['Patti Smith']).save() - Song(title="Redondo beach", is_cover=False, - artists=['Patti Smith']).save() - Song(title="My Generation", is_cover=True, - artists=['Patti Smith', 'John Cale']).save() + BlogPost(title="Post #1", tags=['music', 'film', 'print']).save() + BlogPost(title="Post #2", tags=['music', 'film']).save() + BlogPost(title="Post #3", tags=['film', 'photography']).save() map_f = """ function() { - this.artists.forEach(function(artist) { - emit(artist, 1); + this.tags.forEach(function(tag) { + emit(tag, 1); }); } """ @@ -668,19 +665,18 @@ class QuerySetTest(unittest.TestCase): } """ - # ensure both artists are found - results = Song.objects.map_reduce(map_f, reduce_f) + # run a map/reduce operation spanning all posts + results = BlogPost.objects.map_reduce(map_f, reduce_f) results = list(results) - self.assertEqual(len(results), 2) + self.assertEqual(len(results), 4) + + music = filter(lambda r: r.key == "music", results)[0] + self.assertEqual(music.value, 2) + + film = filter(lambda r: r.key == "film", results)[0] + self.assertEqual(film.value, 3) - # query for a count of Songs per artist, ordered by -count. - # Patti Smith has 3 song credits, and should therefore be first. - results = Song.objects.order_by("-value").map_reduce(map_f, reduce_f) - results = list(results) - self.assertEqual(results[0].key, "Patti Smith") - self.assertEqual(results[0].value, 3.0) - - Song.drop_collection() + BlogPost.drop_collection() def test_map_reduce_finalize(self): """Ensure that map, reduce, and finalize run and introduce "scope" From f47d926f290eb2b5ce7a0cbb63dceff6118e6d51 Mon Sep 17 00:00:00 2001 From: blackbrrr Date: Wed, 17 Mar 2010 00:56:34 -0500 Subject: [PATCH 12/12] touched up comments in advanced map/reduce test --- tests/queryset.py | 36 ++++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/tests/queryset.py b/tests/queryset.py index 77bc52c8..25e56d62 100644 --- a/tests/queryset.py +++ b/tests/queryset.py @@ -548,17 +548,17 @@ class QuerySetTest(unittest.TestCase): return comments; } """ - + sub_code = BlogPost.objects._sub_js_fields(code) - code_chunks = ['doc["cmnts"];', 'doc["doc-name"],', + code_chunks = ['doc["cmnts"];', 'doc["doc-name"],', 'doc["cmnts"][i]["body"]'] for chunk in code_chunks: self.assertTrue(chunk in sub_code) results = BlogPost.objects.exec_js(code) expected_results = [ - {u'comment': u'cool', u'document': u'post1'}, - {u'comment': u'yay', u'document': u'post1'}, + {u'comment': u'cool', u'document': u'post1'}, + {u'comment': u'yay', u'document': u'post1'}, {u'comment': u'nice stuff', u'document': u'post2'}, ] self.assertEqual(results, expected_results) @@ -669,10 +669,10 @@ class QuerySetTest(unittest.TestCase): results = BlogPost.objects.map_reduce(map_f, reduce_f) results = list(results) self.assertEqual(len(results), 4) - + music = filter(lambda r: r.key == "music", results)[0] self.assertEqual(music.value, 2) - + film = filter(lambda r: r.key == "film", results)[0] self.assertEqual(film.value, 3) @@ -696,8 +696,8 @@ class QuerySetTest(unittest.TestCase): # Note: Test data taken from a custom Reddit homepage on # Fri, 12 Feb 2010 14:36:00 -0600. Link ordering should - # reflect order of insertion below. - + # reflect order of insertion below, but is not influenced + # by insertion order. Link(title = "Google Buzz auto-followed a woman's abusive ex ...", up_votes = 1079, down_votes = 553, @@ -768,10 +768,14 @@ class QuerySetTest(unittest.TestCase): } """ + # provide the reddit epoch (used for ranking) as a variable available + # to all phases of the map/reduce operation: map, reduce, and finalize. reddit_epoch = mktime(datetime(2005, 12, 8, 7, 46, 43).timetuple()) scope = {'reddit_epoch': reddit_epoch} - # ensure both artists are found + # run a map/reduce operation across all links. ordering is set + # to "-value", which orders the "weight" value returned from + # "finalize_f" in descending order. results = Link.objects.order_by("-value") results = results.map_reduce(map_f, reduce_f, @@ -979,13 +983,13 @@ class QuerySetTest(unittest.TestCase): self.assertFalse([('_types', 1)] in info.values()) BlogPost.drop_collection() - + def test_bulk(self): """Ensure bulk querying by object id returns a proper dict. """ class BlogPost(Document): title = StringField() - + BlogPost.drop_collection() post_1 = BlogPost(title="Post #1") @@ -999,20 +1003,20 @@ class QuerySetTest(unittest.TestCase): post_3.save() post_4.save() post_5.save() - + ids = [post_1.id, post_2.id, post_5.id] objects = BlogPost.objects.in_bulk(ids) - + self.assertEqual(len(objects), 3) self.assertTrue(post_1.id in objects) self.assertTrue(post_2.id in objects) self.assertTrue(post_5.id in objects) - + self.assertTrue(objects[post_1.id].title == post_1.title) self.assertTrue(objects[post_2.id].title == post_2.title) - self.assertTrue(objects[post_5.id].title == post_5.title) - + self.assertTrue(objects[post_5.id].title == post_5.title) + BlogPost.drop_collection() def tearDown(self):