From 7a877a00d5f43a0da967e4fa457566b64a0b9bcb Mon Sep 17 00:00:00 2001 From: Ross Lawley Date: Mon, 1 Oct 2012 13:59:15 +0000 Subject: [PATCH] Updated URLField handle unicode and custom validator (MongoEngine/mongoengine#136) --- docs/changelog.rst | 1 + mongoengine/base.py | 6 +++--- mongoengine/fields.py | 36 +++++++++++++++++++++++------------- tests/test_fields.py | 3 +++ 4 files changed, 30 insertions(+), 16 deletions(-) diff --git a/docs/changelog.rst b/docs/changelog.rst index 524f1057..996b5ebd 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -4,6 +4,7 @@ Changelog Changes in 0.7.X ================ +- Updated URLField - can handle unicode and custom validator (MongoEngine/mongoengine#136) - Allow Django AuthenticationBackends to work with Django user (hmarr/mongoengine#573) - Fixed reload issue with ReferenceField where dbref=False (MongoEngine/mongoengine#138) diff --git a/mongoengine/base.py b/mongoengine/base.py index 92dcfa93..2041caa8 100644 --- a/mongoengine/base.py +++ b/mongoengine/base.py @@ -52,7 +52,7 @@ class ValidationError(AssertionError): self.field_name = kwargs.get('field_name') self.message = message - def __str__(self): + def __unicode__(self): return self.message def __repr__(self): @@ -1338,13 +1338,13 @@ class BaseDocument(object): u = '[Bad Unicode data]' return '<%s: %s>' % (self.__class__.__name__, u) - def __str__(self): + def __unicode__(self): if hasattr(self, '__unicode__'): if PY3: return self.__unicode__() else: return unicode(self).encode('utf-8') - return '%s object' % self.__class__.__name__ + return unicode('%s object' % self.__class__.__name__) def __eq__(self, other): if isinstance(other, self.__class__) and hasattr(other, 'id'): diff --git a/mongoengine/fields.py b/mongoengine/fields.py index 7729444e..3dc67490 100644 --- a/mongoengine/fields.py +++ b/mongoengine/fields.py @@ -1,10 +1,12 @@ import datetime import decimal +import itertools import re import time +import urllib2 +import urlparse import uuid import warnings -import itertools from operator import itemgetter import gridfs @@ -101,25 +103,33 @@ class URLField(StringField): .. versionadded:: 0.3 """ - URL_REGEX = re.compile( - r'^https?://' - r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+[A-Z]{2,6}\.?|' - r'localhost|' - r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})' - r'(?::\d+)?' - r'(?:/?|[/?]\S+)$', re.IGNORECASE - ) + _URL_REGEX = re.compile( + r'^(?:http|ftp)s?://' # http:// or https:// + r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)|' #domain... + r'localhost|' #localhost... + r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})' # ...or ip + r'(?::\d+)?' # optional port + r'(?:/?|[/?]\S+)$', re.IGNORECASE) - def __init__(self, verify_exists=False, **kwargs): + def __init__(self, verify_exists=False, url_regex=None, **kwargs): self.verify_exists = verify_exists + self.url_regex = url_regex or self._URL_REGEX super(URLField, self).__init__(**kwargs) def validate(self, value): - if not URLField.URL_REGEX.match(value): - self.error('Invalid URL: %s' % value) + if not self.url_regex.match(value): + scheme, netloc, path, query, fragment = urlparse.urlsplit(value) + try: + netloc = netloc.encode('idna') # IDN -> ACE + except UnicodeError: # invalid domain part + self.error('Invalid URL: %s' % value) if self.verify_exists: - import urllib2 + warnings.warn( + "The URLField verify_exists argument has intractable security " + "and performance issues. Accordingly, it has been deprecated.", + DeprecationWarning + ) try: request = urllib2.Request(value) urllib2.urlopen(request) diff --git a/tests/test_fields.py b/tests/test_fields.py index 04c3765c..528f2a16 100644 --- a/tests/test_fields.py +++ b/tests/test_fields.py @@ -199,6 +199,9 @@ class FieldTest(unittest.TestCase): link.url = 'http://www.google.com:8080' link.validate() + link.url = u'http://президент.рф' + self.assertTrue(link.validate()) + def test_int_validation(self): """Ensure that invalid values cannot be assigned to int fields. """