diff --git a/mongoengine/document.py b/mongoengine/document.py index 6907cde3..19ed15cd 100644 --- a/mongoengine/document.py +++ b/mongoengine/document.py @@ -124,34 +124,31 @@ class MapReduceDocument(object): :param key: Document/result key, often an instance of :class:`~pymongo.objectid.ObjectId`. If supplied as an ``ObjectId`` found in the given ``collection``, - the object can be accessed via the ``key_object`` property. - :param value: The result(s) for this key. If given as a dictionary, - each key in the dictionary will be available as - an instance attribute. + the object can be accessed via the ``object`` property. + :param value: The result(s) for this key. .. versionadded:: 0.2.2 """ - def __init__(self, collection, key, value): + def __init__(self, document, collection, key, value): + self._document = document self._collection = collection self.key = key self.value = value - - if isinstance(value, dict): - # create attributes for each named result - for k, v in value.iteritems(): - setattr(self, k, v) @property def object(self): """Lazy-load the object referenced by ``self.key``. If ``self.key`` is not an ``ObjectId``, simply return ``self.key``. """ - if not isinstance(self.key, pymongo.objectid.ObjectId): - return self.key + if not isinstance(self.key, (pymongo.objectid.ObjectId)): + try: + self.key = pymongo.objectid.ObjectId(self.key) + except: + return self.key if not hasattr(self, "_key_object"): - self._key_object = self._collection.find_one(self.key) + self._key_object = self._document.objects.with_id(self.key) return self._key_object return self._key_object diff --git a/mongoengine/queryset.py b/mongoengine/queryset.py index 966f3304..32840471 100644 --- a/mongoengine/queryset.py +++ b/mongoengine/queryset.py @@ -125,6 +125,7 @@ class QuerySet(object): self._query = {} self._where_clause = None self._ordering = [] + self._limit = None # If inheritance is allowed, only return instances and instances of # subclasses of the class being used @@ -380,7 +381,8 @@ class QuerySet(object): def __len__(self): return self.count() - def map_reduce(self, map_f, reduce_f, scope=None, keep_temp=False): + def map_reduce(self, map_f, reduce_f, finalize_f=None, limit=None, + scope=None, keep_temp=False): """Perform a map/reduce query using the current query spec and ordering. While ``map_reduce`` respects ``QuerySet`` chaining, it must be the last call made, as it does not return a maleable @@ -402,6 +404,8 @@ class QuerySet(object): :param reduce_f: reduce function, as :class:`~pymongo.code.Code` or string :param scope: values to insert into map/reduce global scope. Optional. + :param limit: number of objects from current query to provide + to map/reduce method :param keep_temp: keep temporary table (boolean, default ``True``) Returns a list of :class:`~mongoengine.document.MapReduceDocument`. @@ -427,13 +431,16 @@ class QuerySet(object): mr_args = {'query': self._query, 'keeptemp': keep_temp} + if finalize_f: + if not isinstance(finalize_f, pymongo.code.Code): + finalize_f = pymongo.code.Code(finalize_f) + mr_args['finalize'] = finalize_f + if scope: mr_args['scope'] = scope if limit: mr_args['limit'] = limit - docs = [] - results = self._collection.map_reduce(map_f, reduce_f, **mr_args) results = results.find() @@ -441,10 +448,8 @@ class QuerySet(object): results = results.sort(self._ordering) for doc in results: - mrd = MapReduceDocument(self._collection, doc['_id'], doc['value']) - docs.append(mrd) - - return docs + yield MapReduceDocument(self._document, self._collection, + doc['_id'], doc['value']) def limit(self, n): """Limit the number of returned documents to `n`. This may also be @@ -452,6 +457,7 @@ class QuerySet(object): :param n: the maximum number of objects to return """ + self._limit = n self._cursor.limit(n) # Return self to allow chaining return self diff --git a/tests/queryset.py b/tests/queryset.py index 16ce4446..7103d4d7 100644 --- a/tests/queryset.py +++ b/tests/queryset.py @@ -1,6 +1,9 @@ +# -*- coding: utf-8 -*- + + import unittest import pymongo -from datetime import datetime +from datetime import datetime, timedelta from mongoengine.queryset import (QuerySet, MultipleObjectsReturned, DoesNotExist) @@ -489,16 +492,121 @@ class QuerySetTest(unittest.TestCase): # ensure both artists are found results = Song.objects.map_reduce(map_f, reduce_f) + results = list(results) self.assertEqual(len(results), 2) # query for a count of Songs per artist, ordered by -count. # Patti Smith has 3 song credits, and should therefore be first. results = Song.objects.order_by("-value").map_reduce(map_f, reduce_f) + results = list(results) self.assertEqual(results[0].key, "Patti Smith") self.assertEqual(results[0].value, 3.0) Song.drop_collection() + def test_map_reduce_finalize(self): + """Ensure scope and finalize are working correctly by simulating + "hotness" ranking with Reddit algorithm. + """ + from time import mktime + + class Link(Document): + title = StringField() + up_votes = IntField() + down_votes = IntField() + submitted = DateTimeField() + + Link.drop_collection() + + now = datetime.utcnow() + + # Note: Test data taken from a custom Reddit homepage on + # Fri, 12 Feb 2010 14:36:00 -0600. + + Link(title = "Google Buzz auto-followed a woman's abusive ex ...", + up_votes = 1079, + down_votes = 553, + submitted = now-timedelta(hours=4)).save() + Link(title = "We did it! Barbie is a computer engineer.", + up_votes = 481, + down_votes = 124, + submitted = now-timedelta(hours=2)).save() + Link(title = "This Is A Mosquito Getting Killed By A Laser", + up_votes = 1446, + down_votes = 530, + submitted=now-timedelta(hours=13)).save() + Link(title = "Arabic flashcards land physics student in jail.", + up_votes = 215, + down_votes = 105, + submitted = now-timedelta(hours=6)).save() + Link(title = "The Burger Lab: Presenting, the Flood Burger", + up_votes = 48, + down_votes = 17, + submitted = now-timedelta(hours=5)).save() + Link(title="How to see polarization with the naked eye", + up_votes = 74, + down_votes = 13, + submitted = now-timedelta(hours=10)).save() + + map_f = """ + function() { + emit(this._id, {up_delta: this.up_votes - this.down_votes, + reddit_epoch: new Date(2005, 12, 8, 7, 46, 43, 0).getTime(), + sub_date: this.submitted.getTime()}) + } + """ + + reduce_f = """ + function(key, values) { + data = values[0]; + + x = data.up_delta; + + // calculate time diff between reddit epoch and submission + sec_since_epoch = data.sub_date - data.reddit_epoch; + sec_since_epoch /= 1000; + + // calculate 'Y' + if(x > 0) { + y = 1; + } else if (x = 0) { + y = 0; + } else { + y = -1; + } + + // calculate 'Z', the maximal value + if(Math.abs(x) >= 1) { + z = Math.abs(x); + } else { + z = 1; + } + + return {x: x, y: y, z: z, t_s: sec_since_epoch}; + } + """ + + finalize_f = """ + function(key, value) { + + // f(sec_since_epoch,y,z) = log10(z) + ((y*sec_since_epoch) / 45000) + z_10 = Math.log(value.z) / Math.log(10); + weight = z_10 + ((value.y * value.t_s) / 45000); + return weight; + + } + """ + + # ensure both artists are found + results = Link.objects.order_by("-value") + results = results.map_reduce(map_f, reduce_f, finalize_f=finalize_f) + results = list(results) + + self.assertTrue(results[0].object.title.startswith("Google Buzz")) + self.assertTrue(results[-1].object.title.startswith("How to see")) + + Link.drop_collection() + def test_item_frequencies(self): """Ensure that item frequencies are properly generated from lists. """