updated advanced map/reduce test to include scope; misc cleanup in queryset

This commit is contained in:
blackbrrr 2010-02-23 22:26:05 -06:00
parent 008a62e4e9
commit a19a7b976c
2 changed files with 128 additions and 125 deletions

View File

@ -4,7 +4,7 @@ import pymongo
import copy
__all__ = ['queryset_manager', 'Q', 'InvalidQueryError',
__all__ = ['queryset_manager', 'Q', 'InvalidQueryError',
'InvalidCollectionError']
# The maximum number of items to display in a QuerySet.__repr__
@ -12,7 +12,7 @@ REPR_OUTPUT_SIZE = 20
class DoesNotExist(Exception):
pass
pass
class MultipleObjectsReturned(Exception):
@ -25,14 +25,14 @@ class InvalidQueryError(Exception):
class OperationError(Exception):
pass
class NotImplementedError(Exception):
pass
class Q(object):
OR = '||'
AND = '&&'
OPERATORS = {
@ -51,7 +51,7 @@ class Q(object):
'size': 'this.%(field)s.length == %(value)s',
'exists': 'this.%(field)s != null',
}
def __init__(self, **query):
self.query = [query]
@ -98,26 +98,24 @@ class Q(object):
js_scope[op_value_name] = value
# Construct the JS that uses this op
operation_js = Q.OPERATORS[op.strip('$')] % {
'field': key,
'value': op_value_name
}
'field': key,
'value': op_value_name}
js.append(operation_js)
else:
js_scope[value_name] = value
# Construct the JS for this field
field_js = Q.OPERATORS[op.strip('$')] % {
'field': key,
'value': value_name
}
'field': key,
'value': value_name}
js.append(field_js)
return ' && '.join(js)
class QuerySet(object):
"""A set of results returned from a query. Wraps a MongoDB cursor,
"""A set of results returned from a query. Wraps a MongoDB cursor,
providing :class:`~mongoengine.Document` objects as the results.
"""
def __init__(self, document, collection):
self._document = document
self._collection_obj = collection
@ -132,7 +130,7 @@ class QuerySet(object):
if document._meta.get('allow_inheritance'):
self._query = {'_types': self._document._class_name}
self._cursor_obj = None
def ensure_index(self, key_or_list):
"""Ensure that the given indexes are in place.
@ -180,7 +178,7 @@ class QuerySet(object):
return index_list
def __call__(self, q_obj=None, **query):
"""Filter the selected documents by calling the
"""Filter the selected documents by calling the
:class:`~mongoengine.queryset.QuerySet` with a query.
:param q_obj: a :class:`~mongoengine.queryset.Q` object to be used in
@ -194,7 +192,7 @@ class QuerySet(object):
query = QuerySet._transform_query(_doc_cls=self._document, **query)
self._query.update(query)
return self
def filter(self, *q_objs, **query):
"""An alias of :meth:`~mongoengine.queryset.QuerySet.__call__`
"""
@ -230,11 +228,11 @@ class QuerySet(object):
# Apply where clauses to cursor
if self._where_clause:
self._cursor_obj.where(self._where_clause)
# apply default ordering
if self._document._meta['ordering']:
self.order_by(*self._document._meta['ordering'])
return self._cursor_obj
@classmethod
@ -307,8 +305,8 @@ class QuerySet(object):
return mongo_query
def get(self, *q_objs, **query):
"""Retrieve the the matching object raising
:class:`~mongoengine.queryset.MultipleObjectsReturned` or
"""Retrieve the the matching object raising
:class:`~mongoengine.queryset.MultipleObjectsReturned` or
:class:`~mongoengine.queryset.DoesNotExist` exceptions if multiple or
no results are found.
"""
@ -324,15 +322,15 @@ class QuerySet(object):
def get_or_create(self, *q_objs, **query):
"""Retreive unique object or create, if it doesn't exist. Raises
:class:`~mongoengine.queryset.MultipleObjectsReturned` if multiple
results are found. A new document will be created if the document
:class:`~mongoengine.queryset.MultipleObjectsReturned` if multiple
results are found. A new document will be created if the document
doesn't exists; a dictionary of default values for the new document
may be provided as a keyword argument called :attr:`defaults`.
"""
defaults = query.get('defaults', {})
if query.has_key('defaults'):
if 'defaults' in query:
del query['defaults']
self.__call__(*q_objs, **query)
count = self.count()
if count == 0:
@ -383,17 +381,17 @@ class QuerySet(object):
def map_reduce(self, map_f, reduce_f, finalize_f=None, limit=None,
scope=None, keep_temp=False):
"""Perform a map/reduce query using the current query spec
and ordering. While ``map_reduce`` respects ``QuerySet`` chaining,
it must be the last call made, as it does not return a maleable
``QuerySet``.
See the :meth:`~mongoengine.tests.QuerySetTest.test_map_reduce`
"""Perform a map/reduce query using the current query spec
and ordering. While ``map_reduce`` respects ``QuerySet`` chaining,
it must be the last call made, as it does not return a maleable
``QuerySet``.
See the :meth:`~mongoengine.tests.QuerySetTest.test_map_reduce`
and :meth:`~mongoengine.tests.QuerySetTest.test_map_advanced`
tests in ``tests.queryset.QuerySetTest`` for usage examples.
:param map_f: map function, as :class:`~pymongo.code.Code` or string
:param reduce_f: reduce function, as
:param reduce_f: reduce function, as
:class:`~pymongo.code.Code` or string
:param finalize_f: finalize function, an optional function that
performs any post-reduction processing.
@ -401,36 +399,37 @@ class QuerySet(object):
:param limit: number of objects from current query to provide
to map/reduce method
:param keep_temp: keep temporary table (boolean, default ``True``)
Returns an iterator yielding
Returns an iterator yielding
:class:`~mongoengine.document.MapReduceDocument`.
.. note:: Map/Reduce requires server version **>= 1.1.1**. The PyMongo
:meth:`~pymongo.collection.Collection.map_reduce` helper requires
PyMongo version **>= 1.2**.
.. versionadded:: 0.2.2
"""
from document import MapReduceDocument
if not hasattr(self._collection, "map_reduce"):
raise NotImplementedError("Requires MongoDB >= 1.1.1")
if not isinstance(map_f, pymongo.code.Code):
map_f = pymongo.code.Code(map_f)
if not isinstance(reduce_f, pymongo.code.Code):
reduce_f = pymongo.code.Code(reduce_f)
mr_args = {'query': self._query, 'keeptemp': keep_temp}
if finalize_f:
if not isinstance(finalize_f, pymongo.code.Code):
finalize_f = pymongo.code.Code(finalize_f)
mr_args['finalize'] = finalize_f
if scope:
mr_args['scope'] = scope
if limit:
mr_args['limit'] = limit
@ -439,9 +438,9 @@ class QuerySet(object):
if self._ordering:
results = results.sort(self._ordering)
for doc in results:
yield MapReduceDocument(self._document, self._collection,
yield MapReduceDocument(self._document, self._collection,
doc['_id'], doc['value'])
def limit(self, n):
@ -473,7 +472,7 @@ class QuerySet(object):
self._cursor_obj = self._cursor[key]
except IndexError, err:
# PyMongo raises an error if key.start == key.stop, catch it,
# bin it, kill it.
# bin it, kill it.
if key.start >=0 and key.stop >= 0 and key.step is None:
if key.start == key.stop:
self.limit(0)
@ -500,14 +499,14 @@ class QuerySet(object):
direction = pymongo.DESCENDING
if key[0] in ('-', '+'):
key = key[1:]
key_list.append((key, direction))
key_list.append((key, direction))
self._ordering = key_list
self._cursor.sort(key_list)
return self
def explain(self, format=False):
"""Return an explain plan record for the
"""Return an explain plan record for the
:class:`~mongoengine.queryset.QuerySet`\ 's cursor.
:param format: format the plan before returning it
@ -518,7 +517,7 @@ class QuerySet(object):
import pprint
plan = pprint.pformat(plan)
return plan
def delete(self, safe=False):
"""Delete the documents matched by the query.
@ -530,7 +529,7 @@ class QuerySet(object):
def _transform_update(cls, _doc_cls=None, **update):
"""Transform an update spec from Django-style format to Mongo format.
"""
operators = ['set', 'unset', 'inc', 'dec', 'push', 'push_all', 'pull',
operators = ['set', 'unset', 'inc', 'dec', 'push', 'push_all', 'pull',
'pull_all']
mongo_update = {}
@ -588,7 +587,7 @@ class QuerySet(object):
update = QuerySet._transform_update(self._document, **update)
try:
self._collection.update(self._query, update, safe=safe_update,
self._collection.update(self._query, update, safe=safe_update,
multi=True)
except pymongo.errors.OperationFailure, err:
if unicode(err) == u'multi not coded yet':
@ -609,7 +608,7 @@ class QuerySet(object):
# Explicitly provide 'multi=False' to newer versions of PyMongo
# as the default may change to 'True'
if pymongo.version >= '1.1.1':
self._collection.update(self._query, update, safe=safe_update,
self._collection.update(self._query, update, safe=safe_update,
multi=False)
else:
# Older versions of PyMongo don't support 'multi'
@ -624,15 +623,15 @@ class QuerySet(object):
"""Execute a Javascript function on the server. A list of fields may be
provided, which will be translated to their correct names and supplied
as the arguments to the function. A few extra variables are added to
the function's scope: ``collection``, which is the name of the
collection in use; ``query``, which is an object representing the
the function's scope: ``collection``, which is the name of the
collection in use; ``query``, which is an object representing the
current query; and ``options``, which is an object containing any
options specified as keyword arguments.
:param code: a string of Javascript code to execute
:param fields: fields that you will be using in your function, which
will be passed in to your function as arguments
:param options: options that you want available to the function
:param options: options that you want available to the function
(accessed in Javascript through the ``options`` object)
"""
fields = [QuerySet._translate_field_name(self._document, f)
@ -647,7 +646,7 @@ class QuerySet(object):
query = self._query
if self._where_clause:
query['$where'] = self._where_clause
scope['query'] = query
code = pymongo.code.Code(code, scope=scope)
@ -695,7 +694,7 @@ class QuerySet(object):
def item_frequencies(self, list_field, normalize=False):
"""Returns a dictionary of all items present in a list field across
the whole queried set of documents, and their corresponding frequency.
This is useful for generating tag clouds, or searching documents.
This is useful for generating tag clouds, or searching documents.
:param list_field: the list field to use
:param normalize: normalize the results so they add to 1.0
@ -742,7 +741,7 @@ class QuerySetManager(object):
self._collection = None
def __get__(self, instance, owner):
"""Descriptor for instantiating a new QuerySet object when
"""Descriptor for instantiating a new QuerySet object when
Document.objects is accessed.
"""
if instance is not None:
@ -761,7 +760,7 @@ class QuerySetManager(object):
if collection in db.collection_names():
self._collection = db[collection]
# The collection already exists, check if its capped
# The collection already exists, check if its capped
# options match the specified capped options
options = self._collection.options()
if options.get('max') != max_documents or \
@ -777,7 +776,7 @@ class QuerySetManager(object):
self._collection = db.create_collection(collection, opts)
else:
self._collection = db[collection]
# owner is the document that contains the QuerySetManager
queryset = QuerySet(owner, self._collection)
if self._manager_func:

View File

@ -5,13 +5,13 @@ import unittest
import pymongo
from datetime import datetime, timedelta
from mongoengine.queryset import (QuerySet, MultipleObjectsReturned,
from mongoengine.queryset import (QuerySet, MultipleObjectsReturned,
DoesNotExist)
from mongoengine import *
class QuerySetTest(unittest.TestCase):
def setUp(self):
connect(db='mongoenginetest')
@ -19,12 +19,12 @@ class QuerySetTest(unittest.TestCase):
name = StringField()
age = IntField()
self.Person = Person
def test_initialisation(self):
"""Ensure that a QuerySet is correctly initialised by QuerySetManager.
"""
self.assertTrue(isinstance(self.Person.objects, QuerySet))
self.assertEqual(self.Person.objects._collection.name,
self.assertEqual(self.Person.objects._collection.name,
self.Person._meta['collection'])
self.assertTrue(isinstance(self.Person.objects._collection,
pymongo.collection.Collection))
@ -34,15 +34,15 @@ class QuerySetTest(unittest.TestCase):
"""
self.assertEqual(QuerySet._transform_query(name='test', age=30),
{'name': 'test', 'age': 30})
self.assertEqual(QuerySet._transform_query(age__lt=30),
self.assertEqual(QuerySet._transform_query(age__lt=30),
{'age': {'$lt': 30}})
self.assertEqual(QuerySet._transform_query(age__gt=20, age__lt=50),
{'age': {'$gt': 20, '$lt': 50}})
self.assertEqual(QuerySet._transform_query(age=20, age__gt=50),
{'age': 20})
self.assertEqual(QuerySet._transform_query(friend__age__gte=30),
self.assertEqual(QuerySet._transform_query(friend__age__gte=30),
{'friend.age': {'$gte': 30}})
self.assertEqual(QuerySet._transform_query(name__exists=True),
self.assertEqual(QuerySet._transform_query(name__exists=True),
{'name': {'$exists': True}})
def test_find(self):
@ -134,7 +134,7 @@ class QuerySetTest(unittest.TestCase):
self.assertEqual(person.name, "User B")
self.assertRaises(IndexError, self.Person.objects.__getitem__, 2)
# Find a document using just the object id
person = self.Person.objects.with_id(person1.id)
self.assertEqual(person.name, "User A")
@ -170,7 +170,7 @@ class QuerySetTest(unittest.TestCase):
person2.save()
# Retrieve the first person from the database
self.assertRaises(MultipleObjectsReturned,
self.assertRaises(MultipleObjectsReturned,
self.Person.objects.get_or_create)
# Use a query to filter the people found to just person2
@ -191,36 +191,36 @@ class QuerySetTest(unittest.TestCase):
"""Ensure filters can be chained together.
"""
from datetime import datetime
class BlogPost(Document):
title = StringField()
is_published = BooleanField()
published_date = DateTimeField()
@queryset_manager
def published(doc_cls, queryset):
return queryset(is_published=True)
blog_post_1 = BlogPost(title="Blog Post #1",
blog_post_1 = BlogPost(title="Blog Post #1",
is_published = True,
published_date=datetime(2010, 1, 5, 0, 0 ,0))
blog_post_2 = BlogPost(title="Blog Post #2",
blog_post_2 = BlogPost(title="Blog Post #2",
is_published = True,
published_date=datetime(2010, 1, 6, 0, 0 ,0))
blog_post_3 = BlogPost(title="Blog Post #3",
blog_post_3 = BlogPost(title="Blog Post #3",
is_published = True,
published_date=datetime(2010, 1, 7, 0, 0 ,0))
blog_post_1.save()
blog_post_2.save()
blog_post_3.save()
# find all published blog posts before 2010-01-07
published_posts = BlogPost.published()
published_posts = published_posts.filter(
published_date__lt=datetime(2010, 1, 7, 0, 0 ,0))
self.assertEqual(published_posts.count(), 2)
BlogPost.drop_collection()
def test_ordering(self):
@ -236,22 +236,22 @@ class QuerySetTest(unittest.TestCase):
BlogPost.drop_collection()
blog_post_1 = BlogPost(title="Blog Post #1",
blog_post_1 = BlogPost(title="Blog Post #1",
published_date=datetime(2010, 1, 5, 0, 0 ,0))
blog_post_2 = BlogPost(title="Blog Post #2",
blog_post_2 = BlogPost(title="Blog Post #2",
published_date=datetime(2010, 1, 6, 0, 0 ,0))
blog_post_3 = BlogPost(title="Blog Post #3",
blog_post_3 = BlogPost(title="Blog Post #3",
published_date=datetime(2010, 1, 7, 0, 0 ,0))
blog_post_1.save()
blog_post_2.save()
blog_post_3.save()
# get the "first" BlogPost using default ordering
# from BlogPost.meta.ordering
latest_post = BlogPost.objects.first()
latest_post = BlogPost.objects.first()
self.assertEqual(latest_post.title, "Blog Post #3")
# override default ordering, order BlogPosts by "published_date"
first_post = BlogPost.objects.order_by("+published_date").first()
self.assertEqual(first_post.title, "Blog Post #1")
@ -277,7 +277,7 @@ class QuerySetTest(unittest.TestCase):
result = BlogPost.objects.first()
self.assertTrue(isinstance(result.author, User))
self.assertEqual(result.author.name, 'Test User')
BlogPost.drop_collection()
def test_find_dict_item(self):
@ -339,7 +339,7 @@ class QuerySetTest(unittest.TestCase):
self.Person(name='user2', age=20).save()
self.Person(name='user3', age=30).save()
self.Person(name='user4', age=40).save()
self.assertEqual(len(self.Person.objects(Q(age__in=[20]))), 2)
self.assertEqual(len(self.Person.objects(Q(age__in=[20, 30]))), 3)
@ -452,10 +452,10 @@ class QuerySetTest(unittest.TestCase):
names = [p.name for p in self.Person.objects.order_by('age')]
self.assertEqual(names, ['User A', 'User C', 'User B'])
ages = [p.age for p in self.Person.objects.order_by('-name')]
self.assertEqual(ages, [30, 40, 20])
def test_map_reduce(self):
"""Ensure map/reduce is both mapping and reducing.
"""
@ -463,15 +463,15 @@ class QuerySetTest(unittest.TestCase):
artists = ListField(StringField())
title = StringField()
is_cover = BooleanField()
Song.drop_collection()
Song(title="Gloria", is_cover=True, artists=['Patti Smith']).save()
Song(title="Redondo beach", is_cover=False,
Song(title="Redondo beach", is_cover=False,
artists=['Patti Smith']).save()
Song(title="My Generation", is_cover=True,
artists=['Patti Smith', 'John Cale']).save()
map_f = """
function() {
this.artists.forEach(function(artist) {
@ -479,7 +479,7 @@ class QuerySetTest(unittest.TestCase):
});
}
"""
reduce_f = """
function(key, values) {
var total = 0;
@ -489,27 +489,27 @@ class QuerySetTest(unittest.TestCase):
return total;
}
"""
# ensure both artists are found
results = Song.objects.map_reduce(map_f, reduce_f)
results = list(results)
self.assertEqual(len(results), 2)
# query for a count of Songs per artist, ordered by -count.
# Patti Smith has 3 song credits, and should therefore be first.
results = Song.objects.order_by("-value").map_reduce(map_f, reduce_f)
results = list(results)
self.assertEqual(results[0].key, "Patti Smith")
self.assertEqual(results[0].value, 3.0)
Song.drop_collection()
def test_map_reduce_finalize(self):
"""Ensure finalize is running by simulating "hotness"
ranking with Reddit algorithm.
"""Ensure that map, reduce, and finalize run and introduce "scope"
by simulating "hotness" ranking with Reddit algorithm.
"""
from time import mktime
class Link(Document):
title = StringField()
up_votes = IntField()
@ -521,10 +521,10 @@ class QuerySetTest(unittest.TestCase):
now = datetime.utcnow()
# Note: Test data taken from a custom Reddit homepage on
# Fri, 12 Feb 2010 14:36:00 -0600. Link ordering should
# Fri, 12 Feb 2010 14:36:00 -0600. Link ordering should
# reflect order of insertion below.
Link(title = "Google Buzz auto-followed a woman's abusive ex ...",
Link(title = "Google Buzz auto-followed a woman's abusive ex ...",
up_votes = 1079,
down_votes = 553,
submitted = now-timedelta(hours=4)).save()
@ -532,12 +532,12 @@ class QuerySetTest(unittest.TestCase):
up_votes = 481,
down_votes = 124,
submitted = now-timedelta(hours=2)).save()
Link(title = "This Is A Mosquito Getting Killed By A Laser",
Link(title = "This Is A Mosquito Getting Killed By A Laser",
up_votes = 1446,
down_votes = 530,
submitted=now-timedelta(hours=13)).save()
Link(title = "Arabic flashcards land physics student in jail.",
up_votes = 215,
up_votes = 215,
down_votes = 105,
submitted = now-timedelta(hours=6)).save()
Link(title = "The Burger Lab: Presenting, the Flood Burger",
@ -552,21 +552,19 @@ class QuerySetTest(unittest.TestCase):
map_f = """
function() {
emit(this._id, {up_delta: this.up_votes - this.down_votes,
reddit_epoch: new Date(2005, 12, 8, 7, 46, 43, 0).getTime(),
sub_date: this.submitted.getTime()})
sub_date: this.submitted.getTime() / 1000})
}
"""
reduce_f = """
function(key, values) {
data = values[0];
x = data.up_delta;
// calculate time diff between reddit epoch and submission
sec_since_epoch = data.sub_date - data.reddit_epoch;
sec_since_epoch /= 1000;
sec_since_epoch = data.sub_date - reddit_epoch;
// calculate 'Y'
if(x > 0) {
y = 1;
@ -575,18 +573,18 @@ class QuerySetTest(unittest.TestCase):
} else {
y = -1;
}
// calculate 'Z', the maximal value
if(Math.abs(x) >= 1) {
z = Math.abs(x);
} else {
z = 1;
}
return {x: x, y: y, z: z, t_s: sec_since_epoch};
}
"""
finalize_f = """
function(key, value) {
// f(sec_since_epoch,y,z) = log10(z) + ((y*sec_since_epoch) / 45000)
@ -596,19 +594,25 @@ class QuerySetTest(unittest.TestCase):
}
"""
reddit_epoch = mktime(datetime(2005, 12, 8, 7, 46, 43).timetuple())
scope = {'reddit_epoch': reddit_epoch}
# ensure both artists are found
results = Link.objects.order_by("-value")
results = results.map_reduce(map_f, reduce_f, finalize_f=finalize_f)
results = results.map_reduce(map_f,
reduce_f,
finalize_f=finalize_f,
scope=scope)
results = list(results)
# assert troublesome Buzz article is ranked 1st
self.assertTrue(results[0].object.title.startswith("Google Buzz"))
# assert laser vision is ranked last
self.assertTrue(results[-1].object.title.startswith("How to see"))
Link.drop_collection()
def test_item_frequencies(self):
"""Ensure that item frequencies are properly generated from lists.
"""
@ -705,20 +709,20 @@ class QuerySetTest(unittest.TestCase):
title = StringField(name='postTitle')
comments = ListField(EmbeddedDocumentField(Comment),
name='postComments')
BlogPost.drop_collection()
data = {'title': 'Post 1', 'comments': [Comment(content='test')]}
BlogPost(**data).save()
self.assertTrue('postTitle' in
self.assertTrue('postTitle' in
BlogPost.objects(title=data['title'])._query)
self.assertFalse('title' in
self.assertFalse('title' in
BlogPost.objects(title=data['title'])._query)
self.assertEqual(len(BlogPost.objects(title=data['title'])), 1)
self.assertTrue('postComments.commentContent' in
self.assertTrue('postComments.commentContent' in
BlogPost.objects(comments__content='test')._query)
self.assertEqual(len(BlogPost.objects(comments__content='test')), 1)
@ -739,7 +743,7 @@ class QuerySetTest(unittest.TestCase):
post.save()
# Test that query may be performed by providing a document as a value
# while using a ReferenceField's name - the document should be
# while using a ReferenceField's name - the document should be
# converted to an DBRef, which is legal, unlike a Document object
post_obj = BlogPost.objects(author=person).first()
self.assertEqual(post.id, post_obj.id)
@ -783,7 +787,7 @@ class QuerySetTest(unittest.TestCase):
class QTest(unittest.TestCase):
def test_or_and(self):
"""Ensure that Q objects may be combined correctly.
"""
@ -807,8 +811,8 @@ class QTest(unittest.TestCase):
examples = [
({'name': 'test'}, 'this.name == i0f0', {'i0f0': 'test'}),
({'age': {'$gt': 18}}, 'this.age > i0f0o0', {'i0f0o0': 18}),
({'name': 'test', 'age': {'$gt': 18, '$lte': 65}},
'this.age <= i0f0o0 && this.age > i0f0o1 && this.name == i0f1',
({'name': 'test', 'age': {'$gt': 18, '$lte': 65}},
'this.age <= i0f0o0 && this.age > i0f0o1 && this.name == i0f1',
{'i0f0o0': 65, 'i0f0o1': 18, 'i0f1': 'test'}),
]
for item, js, scope in examples: