added map/reduce support via QuerySet.map_reduce. map_reduce operations respect query specs and ordering, but ordering is currently only applied to map/reduce collection. map/reduce may eventually require its own QuerySet to avoid slicing conflicts. results are returned as lists of MapReduceDocument objects, dynamic objects representing the query. tests and documentation included. considered in the neighborhood of 'good start'.

This commit is contained in:
blackbrrr 2010-02-09 14:56:15 -06:00
parent 3fb6307596
commit 69d3e0c4b6
4 changed files with 186 additions and 25 deletions

View File

@ -22,7 +22,7 @@ sys.path.append(os.path.abspath('..'))
# Add any Sphinx extension module names here, as strings. They can be extensions
# coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
extensions = ['sphinx.ext.autodoc']
extensions = ['sphinx.ext.autodoc', 'sphinx.ext.todo']
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']

View File

@ -114,3 +114,44 @@ class Document(BaseDocument):
"""
db = _get_db()
db.drop_collection(cls._meta['collection'])
class MapReduceDocument(object):
"""A document returned from a map/reduce query.
:param collection: An instance of :class:`~pymongo.Collection`
:param key: Document/result key, often an instance of
:class:`~pymongo.objectid.ObjectId`. If supplied as
an ``ObjectId`` found in the given ``collection``,
the object can be accessed via the ``key_object`` property.
:param value: The result(s) for this key. If given as a dictionary,
each key in the dictionary will be available as
an instance attribute.
.. versionadded:: 0.2.2
"""
def __init__(self, collection, key, value):
self._collection = collection
self.key = key
self.value = value
if isinstance(value, dict):
# create attributes for each named result
for k, v in value.iteritems():
setattr(self, k, v)
@property
def object(self):
"""Lazy-load the object referenced by ``self.key``. If ``self.key``
is not an ``ObjectId``, simply return ``self.key``.
"""
if not isinstance(self.key, pymongo.objectid.ObjectId):
return self.key
if not hasattr(self, "_key_object"):
self._key_object = self._collection.find_one(self.key)
return self._key_object
return self._key_object

View File

@ -19,6 +19,10 @@ class OperationError(Exception):
pass
class NotImplementedError(Exception):
pass
class Q(object):
OR = '||'
@ -112,6 +116,7 @@ class QuerySet(object):
self._accessed_collection = False
self._query = {}
self._where_clause = None
self._ordering = []
# If inheritance is allowed, only return instances and instances of
# subclasses of the class being used
@ -327,6 +332,72 @@ class QuerySet(object):
def __len__(self):
return self.count()
def map_reduce(self, map_f, reduce_f, scope=None, keep_temp=False):
"""Perform a map/reduce query using the current query spec
and ordering. While ``map_reduce`` respects ``QuerySet`` chaining,
it must be the last call made, as it does not return a maleable
``QuerySet``.
Example: map/reduce operation is given a ``QuerySet``
of all posts by "mattdennewitz", ordered by most recent "pub_date". ::
map_f = function() { ... }
reduce_f = function(key, values) { ... }
posts = BlogPost(author="mattdennewitz").order_by("-pub_date")
tag_counts = posts.map_reduce(map_f, reduce_f)
See the :meth:`~mongoengine.tests.QuerySetTest.test_map_reduce_simple`
unit test for more usage examples.
:param map_f: map function, as :class:`~pymongo.code.Code` or string
:param reduce_f: reduce function, as
:class:`~pymongo.code.Code` or string
:param scope: values to insert into map/reduce global scope. Optional.
:param keep_temp: keep temporary table (boolean, default ``True``)
Returns a list of :class:`~mongoengine.document.MapReduceDocument`.
.. note:: Map/Reduce requires server version **>= 1.1.1**. The PyMongo
:meth:`~pymongo.collection.Collection.map_reduce` helper requires
PyMongo version **>= 1.2**.
.. versionadded:: 0.2.2
.. todo:: Implement limits
"""
from document import MapReduceDocument
if not hasattr(self._collection, "map_reduce"):
raise NotImplementedError("Requires MongoDB >= 1.1.1")
if not isinstance(map_f, pymongo.code.Code):
map_f = pymongo.code.Code(map_f)
if not isinstance(reduce_f, pymongo.code.Code):
reduce_f = pymongo.code.Code(reduce_f)
mr_args = {'query': self._query, 'keeptemp': keep_temp}
if scope:
mr_args['scope'] = scope
if limit:
mr_args['limit'] = limit
docs = []
results = self._collection.map_reduce(map_f, reduce_f, **mr_args)
results = results.find()
if self._ordering:
results = results.sort(self._ordering)
for doc in results:
mrd = MapReduceDocument(self._collection, doc['_id'], doc['value'])
docs.append(mrd)
return docs
def limit(self, n):
"""Limit the number of returned documents to `n`. This may also be
achieved using array-slicing syntax (e.g. ``User.objects[:5]``).
@ -384,6 +455,7 @@ class QuerySet(object):
key = key[1:]
key_list.append((key, direction))
self._ordering = key_list
self._cursor.sort(key_list)
return self
@ -610,6 +682,7 @@ class QuerySet(object):
data[-1] = "...(remaining elements truncated)..."
return repr(data)
class InvalidCollectionError(Exception):
pass
@ -663,6 +736,7 @@ class QuerySetManager(object):
queryset = self._manager_func(queryset)
return queryset
def queryset_manager(func):
"""Decorator that allows you to define custom QuerySet managers on
:class:`~mongoengine.Document` classes. The manager must be a function that

View File

@ -171,29 +171,29 @@ class QuerySetTest(unittest.TestCase):
BlogPost.drop_collection()
def test_field_subsets(self):
"""Ensure that a call to ``only`` loads only selected fields.
"""
class DinerReview(Document):
title = StringField()
abstract = StringField()
content = StringField()
review = DinerReview(title="Lorraine's Diner")
review.abstract = "Dirty dishes, great food."
review.content = """
Lorem ipsum dolor sit amet, consectetur adipiscing elit.
Mauris eu felis risus, eget congue ante. Mauris consectetur
dignissim velit, quis dictum risus tincidunt ac.
Phasellus condimentum imperdiet laoreet.
"""
review.save()
review = DinerReview.objects.only("title").first()
self.assertEqual(review.content, None)
DinerReview.drop_collection()
# def test_field_subsets(self):
# """Ensure that a call to ``only`` loads only selected fields.
# """
#
# class DinerReview(Document):
# title = StringField()
# abstract = StringField()
# content = StringField()
#
# review = DinerReview(title="Lorraine's Diner")
# review.abstract = "Dirty dishes, great food."
# review.content = """
# Lorem ipsum dolor sit amet, consectetur adipiscing elit.
# Mauris eu felis risus, eget congue ante. Mauris consectetur
# dignissim velit, quis dictum risus tincidunt ac.
# Phasellus condimentum imperdiet laoreet.
# """
# review.save()
#
# review = DinerReview.objects.only("title").first()
# self.assertEqual(review.content, None)
#
# DinerReview.drop_collection()
def test_ordering(self):
"""Ensure default ordering is applied and can be overridden.
@ -401,6 +401,52 @@ class QuerySetTest(unittest.TestCase):
ages = [p.age for p in self.Person.objects.order_by('-name')]
self.assertEqual(ages, [30, 40, 20])
def test_map_reduce(self):
"""Ensure map/reduce is both mapping and reducing.
"""
class Song(Document):
artists = ListField(StringField())
title = StringField()
is_cover = BooleanField()
Song.drop_collection()
Song(title="Gloria", is_cover=True, artists=['Patti Smith']).save()
Song(title="Redondo beach", is_cover=False,
artists=['Patti Smith']).save()
Song(title="My Generation", is_cover=True,
artists=['Patti Smith', 'John Cale']).save()
map_f = """
function() {
this.artists.forEach(function(artist) {
emit(artist, 1);
});
}
"""
reduce_f = """
function(key, values) {
var total = 0;
for(var i=0; i<values.length; i++) {
total += values[i];
}
return total;
}
"""
# ensure both artists are found
results = Song.objects.map_reduce(map_f, reduce_f)
self.assertEqual(len(results), 2)
# query for a count of Songs per artist, ordered by -count.
# Patti Smith has 3 song credits, and should therefore be first.
results = Song.objects.order_by("-value").map_reduce(map_f, reduce_f)
self.assertEqual(results[0].key, "Patti Smith")
self.assertEqual(results[0].value, 3.0)
Song.drop_collection()
def test_item_frequencies(self):
"""Ensure that item frequencies are properly generated from lists.
"""