Added support for null / zero / false values in item_frequencies

refs /MongoEngine/mongoengine#40
This commit is contained in:
Ross Lawley 2012-07-19 12:08:07 +01:00
parent ae39ed94c9
commit d83e67c121
3 changed files with 93 additions and 38 deletions

View File

@ -4,6 +4,7 @@ Changelog
Changes in 0.6.15 Changes in 0.6.15
================= =================
- Added support for null / zero / false values in item_frequencies
- Fixed cascade save edge case - Fixed cascade save edge case
- Fixed geo index creation through reference fields - Fixed geo index creation through reference fields
- Added support for args / kwargs when using @queryset_manager - Added support for args / kwargs when using @queryset_manager

View File

@ -1718,10 +1718,11 @@ class QuerySet(object):
def _item_frequencies_map_reduce(self, field, normalize=False): def _item_frequencies_map_reduce(self, field, normalize=False):
map_func = """ map_func = """
function() { function() {
path = '{{~%(field)s}}'.split('.'); var path = '{{~%(field)s}}'.split('.');
field = this; var field = this;
for (p in path) { for (p in path) {
if (field) if (typeof field != 'undefined')
field = field[path[p]]; field = field[path[p]];
else else
break; break;
@ -1730,7 +1731,7 @@ class QuerySet(object):
field.forEach(function(item) { field.forEach(function(item) {
emit(item, 1); emit(item, 1);
}); });
} else if (field) { } else if (typeof field != 'undefined') {
emit(field, 1); emit(field, 1);
} else { } else {
emit(null, 1); emit(null, 1);
@ -1754,12 +1755,12 @@ class QuerySet(object):
if isinstance(key, float): if isinstance(key, float):
if int(key) == key: if int(key) == key:
key = int(key) key = int(key)
key = str(key) frequencies[key] = int(f.value)
frequencies[key] = f.value
if normalize: if normalize:
count = sum(frequencies.values()) count = sum(frequencies.values())
frequencies = dict([(k, v / count) for k, v in frequencies.items()]) frequencies = dict([(k, float(v) / count)
for k, v in frequencies.items()])
return frequencies return frequencies
@ -1767,12 +1768,11 @@ class QuerySet(object):
"""Uses exec_js to execute""" """Uses exec_js to execute"""
freq_func = """ freq_func = """
function(path) { function(path) {
path = path.split('.'); var path = path.split('.');
if (options.normalize) {
var total = 0.0; var total = 0.0;
db[collection].find(query).forEach(function(doc) { db[collection].find(query).forEach(function(doc) {
field = doc; var field = doc;
for (p in path) { for (p in path) {
if (field) if (field)
field = field[path[p]]; field = field[path[p]];
@ -1785,13 +1785,11 @@ class QuerySet(object):
total++; total++;
} }
}); });
}
var frequencies = {}; var frequencies = {};
var types = {};
var inc = 1.0; var inc = 1.0;
if (options.normalize) {
inc /= total;
}
db[collection].find(query).forEach(function(doc) { db[collection].find(query).forEach(function(doc) {
field = doc; field = doc;
for (p in path) { for (p in path) {
@ -1806,17 +1804,28 @@ class QuerySet(object):
}); });
} else { } else {
var item = field; var item = field;
types[item] = item;
frequencies[item] = inc + (isNaN(frequencies[item]) ? 0: frequencies[item]); frequencies[item] = inc + (isNaN(frequencies[item]) ? 0: frequencies[item]);
} }
}); });
return frequencies; return [total, frequencies, types];
} }
""" """
data = self.exec_js(freq_func, field, normalize=normalize) total, data, types = self.exec_js(freq_func, field)
if 'undefined' in data: values = dict([(types.get(k), int(v)) for k, v in data.iteritems()])
data[None] = data['undefined']
del(data['undefined']) if normalize:
return data values = dict([(k, float(v) / total) for k, v in values.items()])
frequencies = {}
for k, v in values.iteritems():
if isinstance(k, float):
if int(k) == k:
k = int(k)
frequencies[k] = v
return frequencies
def __repr__(self): def __repr__(self):
"""Provides the string representation of the QuerySet """Provides the string representation of the QuerySet

View File

@ -1994,9 +1994,9 @@ class QuerySetTest(unittest.TestCase):
# Check item_frequencies works for non-list fields # Check item_frequencies works for non-list fields
def test_assertions(f): def test_assertions(f):
self.assertEqual(set(['1', '2']), set(f.keys())) self.assertEqual(set([1, 2]), set(f.keys()))
self.assertEqual(f['1'], 1) self.assertEqual(f[1], 1)
self.assertEqual(f['2'], 2) self.assertEqual(f[2], 2)
exec_js = BlogPost.objects.item_frequencies('hits') exec_js = BlogPost.objects.item_frequencies('hits')
map_reduce = BlogPost.objects.item_frequencies('hits', map_reduce=True) map_reduce = BlogPost.objects.item_frequencies('hits', map_reduce=True)
@ -2096,7 +2096,6 @@ class QuerySetTest(unittest.TestCase):
data = EmbeddedDocumentField(Data, required=True) data = EmbeddedDocumentField(Data, required=True)
extra = EmbeddedDocumentField(Extra) extra = EmbeddedDocumentField(Extra)
Person.drop_collection() Person.drop_collection()
p = Person() p = Person()
@ -2114,6 +2113,52 @@ class QuerySetTest(unittest.TestCase):
ot = Person.objects.item_frequencies('extra.tag', map_reduce=True) ot = Person.objects.item_frequencies('extra.tag', map_reduce=True)
self.assertEquals(ot, {None: 1.0, u'friend': 1.0}) self.assertEquals(ot, {None: 1.0, u'friend': 1.0})
def test_item_frequencies_with_0_values(self):
class Test(Document):
val = IntField()
Test.drop_collection()
t = Test()
t.val = 0
t.save()
ot = Test.objects.item_frequencies('val', map_reduce=True)
self.assertEquals(ot, {0: 1})
ot = Test.objects.item_frequencies('val', map_reduce=False)
self.assertEquals(ot, {0: 1})
def test_item_frequencies_with_False_values(self):
class Test(Document):
val = BooleanField()
Test.drop_collection()
t = Test()
t.val = False
t.save()
ot = Test.objects.item_frequencies('val', map_reduce=True)
self.assertEquals(ot, {False: 1})
ot = Test.objects.item_frequencies('val', map_reduce=False)
self.assertEquals(ot, {False: 1})
def test_item_frequencies_normalize(self):
class Test(Document):
val = IntField()
Test.drop_collection()
for i in xrange(50):
Test(val=1).save()
for i in xrange(20):
Test(val=2).save()
freqs = Test.objects.item_frequencies('val', map_reduce=False, normalize=True)
self.assertEquals(freqs, {1: 50.0/70, 2: 20.0/70})
freqs = Test.objects.item_frequencies('val', map_reduce=True, normalize=True)
self.assertEquals(freqs, {1: 50.0/70, 2: 20.0/70})
def test_average(self): def test_average(self):
"""Ensure that field can be averaged correctly. """Ensure that field can be averaged correctly.
""" """