Merge branch 'master' of github.com:MongoEngine/mongoengine into fix_baselist_marked_changed_bug

2018-12-15 20:36:42 +01:00
parent 7ddbea697e d3a592e5bf
commit 4492874d08
57 changed files with 2051 additions and 1062 deletions
--- a/mongoengine/base/common.py
+++ b/mongoengine/base/common.py
@@ -3,10 +3,10 @@ from mongoengine.errors import NotRegistered
 __all__ = ('UPDATE_OPERATORS', 'get_document', '_document_registry')


-UPDATE_OPERATORS = set(['set', 'unset', 'inc', 'dec', 'mul',
-                        'pop', 'push', 'push_all', 'pull',
-                        'pull_all', 'add_to_set', 'set_on_insert',
-                        'min', 'max', 'rename'])
+UPDATE_OPERATORS = {'set', 'unset', 'inc', 'dec', 'mul',
+                    'pop', 'push', 'push_all', 'pull',
+                    'pull_all', 'add_to_set', 'set_on_insert',
+                    'min', 'max', 'rename'}


 _document_registry = {}
@@ -19,7 +19,7 @@ def get_document(name):
        # Possible old style name
        single_end = name.split('.')[-1]
        compound_end = '.%s' % single_end
-        possible_match = [k for k in _document_registry.keys()
+        possible_match = [k for k in _document_registry
                          if k.endswith(compound_end) or k == single_end]
        if len(possible_match) == 1:
            doc = _document_registry.get(possible_match.pop(), None)
--- a/mongoengine/base/datastructures.py
+++ b/mongoengine/base/datastructures.py
@@ -35,10 +35,9 @@ class BaseDict(dict):
    _name = None

    def __init__(self, dict_items, instance, name):
-        Document = _import_class('Document')
-        EmbeddedDocument = _import_class('EmbeddedDocument')
+        BaseDocument = _import_class('BaseDocument')

-        if isinstance(instance, (Document, EmbeddedDocument)):
+        if isinstance(instance, BaseDocument):
            self._instance = weakref.proxy(instance)
        self._name = name
        super(BaseDict, self).__init__(dict_items)
@@ -56,11 +55,11 @@ class BaseDict(dict):
        EmbeddedDocument = _import_class('EmbeddedDocument')
        if isinstance(value, EmbeddedDocument) and value._instance is None:
            value._instance = self._instance
-        elif not isinstance(value, BaseDict) and isinstance(value, dict):
+        elif isinstance(value, dict) and not isinstance(value, BaseDict):
            value = BaseDict(value, None, '%s.%s' % (self._name, key))
            super(BaseDict, self).__setitem__(key, value)
            value._instance = self._instance
-        elif not isinstance(value, BaseList) and isinstance(value, list):
+        elif isinstance(value, list) and not isinstance(value, BaseList):
            value = BaseList(value, None, '%s.%s' % (self._name, key))
            super(BaseDict, self).__setitem__(key, value)
            value._instance = self._instance
@@ -100,10 +99,9 @@ class BaseList(list):
    _name = None

    def __init__(self, list_items, instance, name):
-        Document = _import_class('Document')
-        EmbeddedDocument = _import_class('EmbeddedDocument')
+        BaseDocument = _import_class('BaseDocument')

-        if isinstance(instance, (Document, EmbeddedDocument)):
+        if isinstance(instance, BaseDocument):
            self._instance = weakref.proxy(instance)
        self._name = name
        super(BaseList, self).__init__(list_items)
@@ -119,12 +117,12 @@ class BaseList(list):
        EmbeddedDocument = _import_class('EmbeddedDocument')
        if isinstance(value, EmbeddedDocument) and value._instance is None:
            value._instance = self._instance
-        elif not isinstance(value, BaseDict) and isinstance(value, dict):
+        elif isinstance(value, dict) and not isinstance(value, BaseDict):
            # Replace dict by BaseDict
            value = BaseDict(value, None, '%s.%s' % (self._name, key))
            super(BaseList, self).__setitem__(key, value)
            value._instance = self._instance
-        elif not isinstance(value, BaseList) and isinstance(value, list):
+        elif isinstance(value, list) and not isinstance(value, BaseList):
            # Replace list by BaseList
            value = BaseList(value, None, '%s.%s' % (self._name, key))
            super(BaseList, self).__setitem__(key, value)
@@ -218,6 +216,9 @@ class EmbeddedDocumentList(BaseList):
        Filters the list by only including embedded documents with the
        given keyword arguments.

+        This method only supports simple comparison (e.g: .filter(name='John Doe'))
+        and does not support operators like __gte, __lte, __icontains like queryset.filter does
+
        :param kwargs: The keyword arguments corresponding to the fields to
         filter on. *Multiple arguments are treated as if they are ANDed
         together.*
@@ -358,7 +359,7 @@ class EmbeddedDocumentList(BaseList):

 class StrictDict(object):
    __slots__ = ()
-    _special_fields = set(['get', 'pop', 'iteritems', 'items', 'keys', 'create'])
+    _special_fields = {'get', 'pop', 'iteritems', 'items', 'keys', 'create'}
    _classes = {}

    def __init__(self, **kwargs):
--- a/mongoengine/base/document.py
+++ b/mongoengine/base/document.py
@@ -1,11 +1,8 @@
 import copy
 import numbers
-from collections import Hashable
 from functools import partial

-from bson import ObjectId, json_util
-from bson.dbref import DBRef
-from bson.son import SON
+from bson import DBRef, ObjectId, SON, json_util
 import pymongo
 import six

@@ -19,6 +16,7 @@ from mongoengine.base.fields import ComplexBaseField
 from mongoengine.common import _import_class
 from mongoengine.errors import (FieldDoesNotExist, InvalidDocumentError,
                                LookUpError, OperationError, ValidationError)
+from mongoengine.python_support import Hashable

 __all__ = ('BaseDocument', 'NON_FIELD_ERRORS')

@@ -302,7 +300,7 @@ class BaseDocument(object):
        data['_cls'] = self._class_name

        # only root fields ['test1.a', 'test2'] => ['test1', 'test2']
-        root_fields = set([f.split('.')[0] for f in fields])
+        root_fields = {f.split('.')[0] for f in fields}

        for field_name in self:
            if root_fields and field_name not in root_fields:
@@ -404,7 +402,15 @@ class BaseDocument(object):

    @classmethod
    def from_json(cls, json_data, created=False):
-        """Converts json data to an unsaved document instance"""
+        """Converts json data to a Document instance
+
+        :param json_data: The json data to load into the Document
+        :param created: If True, the document will be considered as a brand new document
+                        If False and an id is provided, it will consider that the data being
+                        loaded corresponds to what's already in the database (This has an impact of subsequent call to .save())
+                        If False and no id is provided, it will consider the data as a new document
+                        (default ``False``)
+        """
        return cls._from_son(json_util.loads(json_data), created=created)

    def __expand_dynamic_values(self, name, value):
@@ -495,7 +501,13 @@ class BaseDocument(object):

        self._changed_fields = []

-    def _nestable_types_changed_fields(self, changed_fields, key, data, inspected):
+    def _nestable_types_changed_fields(self, changed_fields, base_key, data):
+        """Inspect nested data for changed fields
+
+        :param changed_fields: Previously collected changed fields
+        :param base_key: The base key that must be used to prepend changes to this data
+        :param data: data to inspect for changes
+        """
        # Loop list / dict fields as they contain documents
        # Determine the iterator to use
        if not hasattr(data, 'items'):
@@ -503,68 +515,60 @@ class BaseDocument(object):
        else:
            iterator = data.iteritems()

-        for index, value in iterator:
-            list_key = '%s%s.' % (key, index)
+        for index_or_key, value in iterator:
+            item_key = '%s%s.' % (base_key, index_or_key)
            # don't check anything lower if this key is already marked
            # as changed.
-            if list_key[:-1] in changed_fields:
+            if item_key[:-1] in changed_fields:
                continue
+
            if hasattr(value, '_get_changed_fields'):
-                changed = value._get_changed_fields(inspected)
-                changed_fields += ['%s%s' % (list_key, k)
-                                   for k in changed if k]
+                changed = value._get_changed_fields()
+                changed_fields += ['%s%s' % (item_key, k) for k in changed if k]
            elif isinstance(value, (list, tuple, dict)):
                self._nestable_types_changed_fields(
-                    changed_fields, list_key, value, inspected)
+                    changed_fields, item_key, value)

-    def _get_changed_fields(self, inspected=None):
+    def _get_changed_fields(self):
        """Return a list of all fields that have explicitly been changed.
        """
        EmbeddedDocument = _import_class('EmbeddedDocument')
-        DynamicEmbeddedDocument = _import_class('DynamicEmbeddedDocument')
        ReferenceField = _import_class('ReferenceField')
+        GenericReferenceField = _import_class('GenericReferenceField')
        SortedListField = _import_class('SortedListField')

        changed_fields = []
        changed_fields += getattr(self, '_changed_fields', [])

-        inspected = inspected or set()
-        if hasattr(self, 'id') and isinstance(self.id, Hashable):
-            if self.id in inspected:
-                return changed_fields
-            inspected.add(self.id)
-
        for field_name in self._fields_ordered:
            db_field_name = self._db_field_map.get(field_name, field_name)
            key = '%s.' % db_field_name
            data = self._data.get(field_name, None)
            field = self._fields.get(field_name)

-            if hasattr(data, 'id'):
-                if data.id in inspected:
-                    continue
-            if isinstance(field, ReferenceField):
+            if db_field_name in changed_fields:
+                # Whole field already marked as changed, no need to go further
                continue
-            elif (
-                isinstance(data, (EmbeddedDocument, DynamicEmbeddedDocument)) and
-                db_field_name not in changed_fields
-            ):
+
+            if isinstance(field, ReferenceField):   # Don't follow referenced documents
+                continue
+
+            if isinstance(data, EmbeddedDocument):
                # Find all embedded fields that have been changed
-                changed = data._get_changed_fields(inspected)
+                changed = data._get_changed_fields()
                changed_fields += ['%s%s' % (key, k) for k in changed if k]
-            elif (isinstance(data, (list, tuple, dict)) and
-                    db_field_name not in changed_fields):
+            elif isinstance(data, (list, tuple, dict)):
                if (hasattr(field, 'field') and
-                        isinstance(field.field, ReferenceField)):
+                        isinstance(field.field, (ReferenceField, GenericReferenceField))):
                    continue
                elif isinstance(field, SortedListField) and field._ordering:
                    # if ordering is affected whole list is changed
-                    if any(map(lambda d: field._ordering in d._changed_fields, data)):
+                    if any(field._ordering in d._changed_fields for d in data):
                        changed_fields.append(db_field_name)
                        continue

                self._nestable_types_changed_fields(
-                    changed_fields, key, data, inspected)
+                    changed_fields, key, data)
        return changed_fields

    def _delta(self):
@@ -576,7 +580,6 @@ class BaseDocument(object):

        set_fields = self._get_changed_fields()
        unset_data = {}
-        parts = []
        if hasattr(self, '_changed_fields'):
            set_data = {}
            # Fetch each set item from its path
@@ -586,15 +589,13 @@ class BaseDocument(object):
                new_path = []
                for p in parts:
                    if isinstance(d, (ObjectId, DBRef)):
+                        # Don't dig in the references
                        break
-                    elif isinstance(d, list) and p.lstrip('-').isdigit():
-                        if p[0] == '-':
-                            p = str(len(d) + int(p))
-                        try:
-                            d = d[int(p)]
-                        except IndexError:
-                            d = None
+                    elif isinstance(d, list) and p.isdigit():
+                        # An item of a list (identified by its index) is updated
+                        d = d[int(p)]
                    elif hasattr(d, 'get'):
+                        # dict-like (dict, embedded document)
                        d = d.get(p)
                    new_path.append(p)
                path = '.'.join(new_path)
@@ -606,26 +607,26 @@ class BaseDocument(object):

        # Determine if any changed items were actually unset.
        for path, value in set_data.items():
-            if value or isinstance(value, (numbers.Number, bool)):
+            if value or isinstance(value, (numbers.Number, bool)):  # Account for 0 and True that are truthy
                continue

-            # If we've set a value that ain't the default value don't unset it.
-            default = None
+            parts = path.split('.')
+
            if (self._dynamic and len(parts) and parts[0] in
                    self._dynamic_fields):
                del set_data[path]
                unset_data[path] = 1
                continue
-            elif path in self._fields:
+
+            # If we've set a value that ain't the default value don't unset it.
+            default = None
+            if path in self._fields:
                default = self._fields[path].default
            else:  # Perform a full lookup for lists / embedded lookups
                d = self
-                parts = path.split('.')
                db_field_name = parts.pop()
                for p in parts:
-                    if isinstance(d, list) and p.lstrip('-').isdigit():
-                        if p[0] == '-':
-                            p = str(len(d) + int(p))
+                    if isinstance(d, list) and p.isdigit():
                        d = d[int(p)]
                    elif (hasattr(d, '__getattribute__') and
                          not isinstance(d, dict)):
@@ -643,10 +644,9 @@ class BaseDocument(object):
                        default = None

            if default is not None:
-                if callable(default):
-                    default = default()
+                default = default() if callable(default) else default

-            if default != value:
+            if value != default:
                continue

            del set_data[path]
@@ -692,7 +692,7 @@ class BaseDocument(object):

        fields = cls._fields
        if not _auto_dereference:
-            fields = copy.copy(fields)
+            fields = copy.deepcopy(fields)

        for field_name, field in fields.iteritems():
            field._auto_dereference = _auto_dereference
@@ -1083,6 +1083,6 @@ class BaseDocument(object):
            sep = getattr(field, 'display_sep', ' ')
            values = value if field.__class__.__name__ in ('ListField', 'SortedListField') else [value]
            return sep.join([
-                dict(field.choices).get(val, val)
+                six.text_type(dict(field.choices).get(val, val))
                for val in values or []])
        return value
--- a/mongoengine/base/fields.py
+++ b/mongoengine/base/fields.py
@@ -55,7 +55,7 @@ class BaseField(object):
            field.  Generally this is deprecated in favour of the
            `FIELD.validate` method
        :param choices: (optional) The valid choices
-        :param null: (optional) Is the field value can be null. If no and there is a default value
+        :param null: (optional) If the field value can be null. If no and there is a default value
            then the default value is set
        :param sparse: (optional) `sparse=True` combined with `unique=True` and `required=False`
            means that uniqueness won't be enforced for `None` values
@@ -130,7 +130,6 @@ class BaseField(object):
    def __set__(self, instance, value):
        """Descriptor for assigning a value to a field in a document.
        """
-
        # If setting to None and there is a default
        # Then set the value to the default value
        if value is None:
@@ -267,13 +266,15 @@ class ComplexBaseField(BaseField):
        ReferenceField = _import_class('ReferenceField')
        GenericReferenceField = _import_class('GenericReferenceField')
        EmbeddedDocumentListField = _import_class('EmbeddedDocumentListField')
-        dereference = (self._auto_dereference and
+
+        auto_dereference = instance._fields[self.name]._auto_dereference
+
+        dereference = (auto_dereference and
                       (self.field is None or isinstance(self.field,
                                                         (GenericReferenceField, ReferenceField))))

        _dereference = _import_class('DeReference')()

-        self._auto_dereference = instance._fields[self.name]._auto_dereference
        if instance._initialised and dereference and instance._data.get(self.name):
            instance._data[self.name] = _dereference(
                instance._data.get(self.name), max_depth=1, instance=instance,
@@ -294,7 +295,7 @@ class ComplexBaseField(BaseField):
            value = BaseDict(value, instance, self.name)
            instance._data[self.name] = value

-        if (self._auto_dereference and instance._initialised and
+        if (auto_dereference and instance._initialised and
                isinstance(value, (BaseList, BaseDict)) and
                not value._dereferenced):
            value = _dereference(
@@ -313,11 +314,16 @@ class ComplexBaseField(BaseField):
        if hasattr(value, 'to_python'):
            return value.to_python()

+        BaseDocument = _import_class('BaseDocument')
+        if isinstance(value, BaseDocument):
+            # Something is wrong, return the value as it is
+            return value
+
        is_list = False
        if not hasattr(value, 'items'):
            try:
                is_list = True
-                value = {k: v for k, v in enumerate(value)}
+                value = {idx: v for idx, v in enumerate(value)}
            except TypeError:  # Not iterable return the value
                return value

@@ -502,7 +508,7 @@ class GeoJsonBaseField(BaseField):
    def validate(self, value):
        """Validate the GeoJson object based on its type."""
        if isinstance(value, dict):
-            if set(value.keys()) == set(['type', 'coordinates']):
+            if set(value.keys()) == {'type', 'coordinates'}:
                if value['type'] != self._type:
                    self.error('%s type must be "%s"' %
                               (self._name, self._type))
--- a/mongoengine/base/metaclasses.py
+++ b/mongoengine/base/metaclasses.py
@@ -18,14 +18,14 @@ class DocumentMetaclass(type):
    """Metaclass for all documents."""

    # TODO lower complexity of this method
-    def __new__(cls, name, bases, attrs):
-        flattened_bases = cls._get_bases(bases)
-        super_new = super(DocumentMetaclass, cls).__new__
+    def __new__(mcs, name, bases, attrs):
+        flattened_bases = mcs._get_bases(bases)
+        super_new = super(DocumentMetaclass, mcs).__new__

        # If a base class just call super
        metaclass = attrs.get('my_metaclass')
        if metaclass and issubclass(metaclass, DocumentMetaclass):
-            return super_new(cls, name, bases, attrs)
+            return super_new(mcs, name, bases, attrs)

        attrs['_is_document'] = attrs.get('_is_document', False)
        attrs['_cached_reference_fields'] = []
@@ -121,7 +121,8 @@ class DocumentMetaclass(type):
                # inheritance of classes where inheritance is set to False
                allow_inheritance = base._meta.get('allow_inheritance')
                if not allow_inheritance and not base._meta.get('abstract'):
-                    raise ValueError('Document %s may not be subclassed' %
+                    raise ValueError('Document %s may not be subclassed. '
+                                     'To enable inheritance, use the "allow_inheritance" meta attribute.' %
                                     base.__name__)

        # Get superclasses from last base superclass
@@ -138,7 +139,7 @@ class DocumentMetaclass(type):
        attrs['_types'] = attrs['_subclasses']  # TODO depreciate _types

        # Create the new_class
-        new_class = super_new(cls, name, bases, attrs)
+        new_class = super_new(mcs, name, bases, attrs)

        # Set _subclasses
        for base in document_bases:
@@ -147,7 +148,7 @@ class DocumentMetaclass(type):
            base._types = base._subclasses  # TODO depreciate _types

        (Document, EmbeddedDocument, DictField,
-         CachedReferenceField) = cls._import_classes()
+         CachedReferenceField) = mcs._import_classes()

        if issubclass(new_class, Document):
            new_class._collection = None
@@ -219,29 +220,26 @@ class DocumentMetaclass(type):

        return new_class

-    def add_to_class(self, name, value):
-        setattr(self, name, value)
-
    @classmethod
-    def _get_bases(cls, bases):
+    def _get_bases(mcs, bases):
        if isinstance(bases, BasesTuple):
            return bases
        seen = []
-        bases = cls.__get_bases(bases)
+        bases = mcs.__get_bases(bases)
        unique_bases = (b for b in bases if not (b in seen or seen.append(b)))
        return BasesTuple(unique_bases)

    @classmethod
-    def __get_bases(cls, bases):
+    def __get_bases(mcs, bases):
        for base in bases:
            if base is object:
                continue
            yield base
-            for child_base in cls.__get_bases(base.__bases__):
+            for child_base in mcs.__get_bases(base.__bases__):
                yield child_base

    @classmethod
-    def _import_classes(cls):
+    def _import_classes(mcs):
        Document = _import_class('Document')
        EmbeddedDocument = _import_class('EmbeddedDocument')
        DictField = _import_class('DictField')
@@ -254,9 +252,9 @@ class TopLevelDocumentMetaclass(DocumentMetaclass):
    collection in the database.
    """

-    def __new__(cls, name, bases, attrs):
-        flattened_bases = cls._get_bases(bases)
-        super_new = super(TopLevelDocumentMetaclass, cls).__new__
+    def __new__(mcs, name, bases, attrs):
+        flattened_bases = mcs._get_bases(bases)
+        super_new = super(TopLevelDocumentMetaclass, mcs).__new__

        # Set default _meta data if base class, otherwise get user defined meta
        if attrs.get('my_metaclass') == TopLevelDocumentMetaclass:
@@ -319,7 +317,7 @@ class TopLevelDocumentMetaclass(DocumentMetaclass):
                    not parent_doc_cls._meta.get('abstract', False)):
                msg = 'Abstract document cannot have non-abstract base'
                raise ValueError(msg)
-            return super_new(cls, name, bases, attrs)
+            return super_new(mcs, name, bases, attrs)

        # Merge base class metas.
        # Uses a special MetaDict that handles various merging rules
@@ -360,7 +358,7 @@ class TopLevelDocumentMetaclass(DocumentMetaclass):
        attrs['_meta'] = meta

        # Call super and get the new class
-        new_class = super_new(cls, name, bases, attrs)
+        new_class = super_new(mcs, name, bases, attrs)

        meta = new_class._meta

@@ -394,7 +392,7 @@ class TopLevelDocumentMetaclass(DocumentMetaclass):
                                           '_auto_id_field', False)
        if not new_class._meta.get('id_field'):
            # After 0.10, find not existing names, instead of overwriting
-            id_name, id_db_name = cls.get_auto_id_names(new_class)
+            id_name, id_db_name = mcs.get_auto_id_names(new_class)
            new_class._auto_id_field = True
            new_class._meta['id_field'] = id_name
            new_class._fields[id_name] = ObjectIdField(db_field=id_db_name)
@@ -419,7 +417,7 @@ class TopLevelDocumentMetaclass(DocumentMetaclass):
        return new_class

    @classmethod
-    def get_auto_id_names(cls, new_class):
+    def get_auto_id_names(mcs, new_class):
        id_name, id_db_name = ('id', '_id')
        if id_name not in new_class._fields and \
                id_db_name not in (v.db_field for v in new_class._fields.values()):
--- a/mongoengine/base/utils.py
+++ b/mongoengine/base/utils.py
@@ -0,0 +1,22 @@
+import re
+
+
+class LazyRegexCompiler(object):
+    """Descriptor to allow lazy compilation of regex"""
+
+    def __init__(self, pattern, flags=0):
+        self._pattern = pattern
+        self._flags = flags
+        self._compiled_regex = None
+
+    @property
+    def compiled_regex(self):
+        if self._compiled_regex is None:
+            self._compiled_regex = re.compile(self._pattern, self._flags)
+        return self._compiled_regex
+
+    def __get__(self, instance, owner):
+        return self.compiled_regex
+
+    def __set__(self, instance, value):
+        raise AttributeError("Can not set attribute LazyRegexCompiler")