7 years ago · eba482ed44
--- a/docs/source/ref/settings.rst
+++ b/docs/source/ref/settings.rst
@@ -428,51 +428,54 @@ Such files should always be deleted afterwards.
 
				
				 Slug settings
			
 
				
				 =============
			
 
				
				 
			
 
				
				-``OSCAR_SLUG_MAP``
			
 
				
				-------------------
			
 
				
				-
			
 
				
				-Default: ``{}``
			
 
				
				-
			
 
				
				-A dictionary to map strings to more readable versions for including in URL
			
 
				
				-slugs.  This mapping is appled before the slugify function.
			
 
				
				-This is useful when names contain characters which would normally be
			
 
				
				-stripped.  For instance::
			
 
				
				-
			
 
				
				-    OSCAR_SLUG_MAP = {
			
 
				
				-        'c++': 'cpp',
			
 
				
				-        'f#': 'fsharp',
			
 
				
				-    }
			
 
				
				-
			
 
				
				 ``OSCAR_SLUG_FUNCTION``
			
 
				
				 -----------------------
			
 
				
				 
			
 
				
				 Default: ``'oscar.core.utils.default_slugifier'``
			
 
				
				 
			
 
				
				-The slugify function to use.  Note that is used within Oscar's slugify wrapper
			
 
				
				-(in ``oscar.core.utils``) which applies the custom map and blacklist. String
			
 
				
				-notation is recommended, but specifying a callable is supported for
			
 
				
				-backwards-compatibility.
			
 
				
				+A dotted path to the slugify function to use.
			
 
				
				 
			
 
				
				 Example::
			
 
				
				 
			
 
				
				     # in myproject.utils
			
 
				
				-    def some_slugify(value):
			
 
				
				+    def some_slugify(value, allow_unicode=False):
			
 
				
				         return value
			
 
				
				 
			
 
				
				     # in settings.py
			
 
				
				     OSCAR_SLUG_FUNCTION = 'myproject.utils.some_slugify'
			
 
				
				 
			
 
				
				+``OSCAR_SLUG_MAP``
			
 
				
				+------------------
			
 
				
				+
			
 
				
				+Default: ``{}``
			
 
				
				+
			
 
				
				+A dictionary to target:replacement strings that the slugify will apply before
			
 
				
				+slugifying the value. This is useful when names contain characters which would
			
 
				
				+normally be stripped. For instance::
			
 
				
				+
			
 
				
				+    OSCAR_SLUG_MAP = {
			
 
				
				+        'c++': 'cpp',
			
 
				
				+        'f#': 'fsharp',
			
 
				
				+    }
			
 
				
				 
			
 
				
				 ``OSCAR_SLUG_BLACKLIST``
			
 
				
				 ------------------------
			
 
				
				 
			
 
				
				 Default: ``[]``
			
 
				
				 
			
 
				
				-A list of words to exclude from slugs.
			
 
				
				+An iterable of words the slugify will try to remove after the value has been
			
 
				
				+slugified. Note, a word will not be removed from the slug if it would
			
 
				
				+result in an empty slug.
			
 
				
				 
			
 
				
				 Example::
			
 
				
				 
			
 
				
				-    OSCAR_SLUG_BLACKLIST = ['the', 'a', 'but']
			
 
				
				+    # With OSCAR_SLUG_BLACKLIST = ['the']
			
 
				
				+    slugify('The cat')
			
 
				
				+    > 'cat'
			
 
				
				+
			
 
				
				+    # With OSCAR_SLUG_BLACKLIST = ['the', 'cat']
			
 
				
				+    slugify('The cat')
			
 
				
				+    > 'cat'
			
 
				
				 
			
 
				
				 ``OSCAR_SLUG_ALLOW_UNICODE``
			
 
				
				 ----------------------------
			
--- a/docs/source/releases/v2.0.rst
+++ b/docs/source/releases/v2.0.rst
@@ -40,6 +40,14 @@ Minor changes
 
				
				 - ``OrderPlacementMixin.get_message_context()`` is now passed a ``code`` argument
			
 
				
				   specifying the communication event type code for a message being sent.
			
 
				
				 
			
 
				
				+- We've dropped the dependency on Unidecode due to license incompatibilities,
			
 
				
				+  ``oscar.core.utils.default_slugifier`` now uses ``oscar.core.utils.cautious_slugify``
			
 
				
				+  to handle unicode characters in slugs when ``OSCAR_SLUG_ALLOW_UNICODE`` is ``False``.
			
 
				
				+
			
 
				
				+- ``OSCAR_SLUG_FUNCTION`` previously accepted a function as its value. It now
			
 
				
				+  only accepts a dotted path to a function as its value. Such functions must
			
 
				
				+  also now take a ``allow_unicode`` kwarg.
			
 
				
				+
			
 
				
				 Dependency changes
			
 
				
				 ------------------
			
 
				
				 
			
--- a/setup.py
+++ b/setup.py
@@ -31,8 +31,6 @@ install_requires = [
 
				
				     'sorl-thumbnail>=12.4.1,<12.5',
			
 
				
				     # Babel is used for currency formatting
			
 
				
				     'Babel>=1.0,<3.0',
			
 
				
				-    # For converting non-ASCII to ASCII when creating slugs
			
 
				
				-    'Unidecode>=1.0,<1.1',
			
 
				
				     # For manipulating search URLs
			
 
				
				     'purl>=0.7',
			
 
				
				     # For phone number field
			
--- a/src/oscar/core/utils.py
+++ b/src/oscar/core/utils.py
@@ -1,5 +1,7 @@
 
				
				 import datetime
			
 
				
				 import logging
			
 
				
				+import re
			
 
				
				+import unicodedata
			
 
				
				 
			
 
				
				 from django.conf import settings
			
 
				
				 from django.shortcuts import redirect, resolve_url
			
@@ -8,43 +10,88 @@ from django.utils.http import is_safe_url
 
				
				 from django.utils.module_loading import import_string
			
 
				
				 from django.utils.text import slugify as django_slugify
			
 
				
				 from django.utils.timezone import get_current_timezone, is_naive, make_aware
			
 
				
				-from unidecode import unidecode
			
 
				
				 
			
 
				
				 
			
 
				
				-def default_slugifier(value):
			
 
				
				+SLUGIFY_RE = re.compile(r'[^\w\s-]', re.UNICODE)
			
 
				
				+
			
 
				
				+
			
 
				
				+def cautious_slugify(value):
			
 
				
				+    """
			
 
				
				+    Convert a string to ASCII exactly as Django's slugify does, with the exception
			
 
				
				+    that any non-ASCII alphanumeric characters (that cannot be ASCIIfied under Unicode
			
 
				
				+    normalisation) are escaped into codes like 'u0421' instead of being deleted entirely.
			
 
				
				+    This ensures that the result of slugifying e.g. Cyrillic text will not be an empty
			
 
				
				+    string, and can thus be safely used as an identifier (albeit not a human-readable one).
			
 
				
				+
			
 
				
				+    cautious_slugify was copied from Wagtail:
			
 
				
				+    <https://github.com/wagtail/wagtail/blob/8b420b9/wagtail/core/utils.py>
			
 
				
				+
			
 
				
				+    Copyright (c) 2014-present Torchbox Ltd and individual contributors.
			
 
				
				+    Released under the BSD 3-clause "New" or "Revised" License
			
 
				
				+    <https://github.com/wagtail/wagtail/blob/8b420b9/LICENSE>
			
 
				
				+
			
 
				
				+    Date: 2018-06-15
			
 
				
				+    """
			
 
				
				+    # Normalize the string to decomposed unicode form. This causes accented Latin
			
 
				
				+    # characters to be split into 'base character' + 'accent modifier'; the latter will
			
 
				
				+    # be stripped out by the regexp, resulting in an ASCII-clean character that doesn't
			
 
				
				+    # need to be escaped
			
 
				
				+    value = unicodedata.normalize('NFKD', value)
			
 
				
				+
			
 
				
				+    # Strip out characters that aren't letterlike, underscores or hyphens,
			
 
				
				+    # using the same regexp that slugify uses. This ensures that non-ASCII non-letters
			
 
				
				+    # (e.g. accent modifiers, fancy punctuation) get stripped rather than escaped
			
 
				
				+    value = SLUGIFY_RE.sub('', value)
			
 
				
				+
			
 
				
				+    # Encode as ASCII, escaping non-ASCII characters with backslashreplace, then convert
			
 
				
				+    # back to a unicode string (which is what slugify expects)
			
 
				
				+    value = value.encode('ascii', 'backslashreplace').decode('ascii')
			
 
				
				+
			
 
				
				+    # Pass to slugify to perform final conversion (whitespace stripping, applying
			
 
				
				+    # mark_safe); this will also strip out the backslashes from the 'backslashreplace'
			
 
				
				+    # conversion
			
 
				
				+    return django_slugify(value)
			
 
				
				+
			
 
				
				+
			
 
				
				+def default_slugifier(value, allow_unicode=False):
			
 
				
				     """
			
 
				
				-    Oscar's default slugifier function. Uses Django's slugify function.
			
 
				
				+    Oscar's default slugifier function. When unicode is allowed
			
 
				
				+    it uses Django's slugify function, otherwise it uses cautious_slugify.
			
 
				
				     """
			
 
				
				-    return django_slugify(value, allow_unicode=settings.OSCAR_SLUG_ALLOW_UNICODE)
			
 
				
				+    if allow_unicode:
			
 
				
				+        return django_slugify(value, allow_unicode=True)
			
 
				
				+    else:
			
 
				
				+        return cautious_slugify(value)
			
 
				
				 
			
 
				
				 
			
 
				
				 def slugify(value):
			
 
				
				     """
			
 
				
				-    Slugify a string (even if it contains non-ASCII chars)
			
 
				
				+    Slugify a string
			
 
				
				+
			
 
				
				+    The OSCAR_SLUG_FUNCTION can be set with a dotted path to the slug
			
 
				
				+    function to use, defaults to 'oscar.core.utils.default_slugifier'.
			
 
				
				+
			
 
				
				+    OSCAR_SLUG_MAP can be set of a dictionary of target:replacement pairs
			
 
				
				+
			
 
				
				+    OSCAR_SLUG_BLACKLIST can be set to a iterable of words to remove after
			
 
				
				+    the slug is generated; though it will not reduce a slug to zero length.
			
 
				
				     """
			
 
				
				+    value = str(value)
			
 
				
				+
			
 
				
				     # Re-map some strings to avoid important characters being stripped.  Eg
			
 
				
				     # remap 'c++' to 'cpp' otherwise it will become 'c'.
			
 
				
				     for k, v in settings.OSCAR_SLUG_MAP.items():
			
 
				
				         value = value.replace(k, v)
			
 
				
				 
			
 
				
				-    # Allow an alternative slugify function to be specified
			
 
				
				-    # Recommended way to specify a function is as a string
			
 
				
				-    slugifier = getattr(settings, 'OSCAR_SLUG_FUNCTION', default_slugifier)
			
 
				
				-    if isinstance(slugifier, str):
			
 
				
				-        slugifier = import_string(slugifier)
			
 
				
				-
			
 
				
				-    # Use unidecode to convert non-ASCII strings to ASCII equivalents where
			
 
				
				-    # possible if unicode is not allowed to contain in slug.
			
 
				
				-    if not settings.OSCAR_SLUG_ALLOW_UNICODE:
			
 
				
				-        value = unidecode(str(value))
			
 
				
				+    slugifier = import_string(settings.OSCAR_SLUG_FUNCTION)
			
 
				
				+    slug = slugifier(value, allow_unicode=settings.OSCAR_SLUG_ALLOW_UNICODE)
			
 
				
				 
			
 
				
				-    value = slugifier(str(value))
			
 
				
				-    # Remove stopwords
			
 
				
				+    # Remove stopwords from slug
			
 
				
				     for word in settings.OSCAR_SLUG_BLACKLIST:
			
 
				
				-        value = value.replace(word + '-', '')
			
 
				
				-        value = value.replace('-' + word, '')
			
 
				
				+        slug = slug.replace(word + '-', '')
			
 
				
				+        slug = slug.replace('-' + word, '')
			
 
				
				 
			
 
				
				-    return value
			
 
				
				+    return slug
			
 
				
				 
			
 
				
				 
			
 
				
				 def format_datetime(dt, format=None):
			
--- a/tests/integration/core/test_utils.py
+++ b/tests/integration/core/test_utils.py
@@ -4,26 +4,36 @@ from django.test.utils import override_settings
 
				
				 
			
 
				
				 from oscar.core import utils
			
 
				
				 
			
 
				
				-sluggish = lambda s: s.upper()
			
 
				
				+
			
 
				
				+def sluggish(value, allow_unicode=False):
			
 
				
				+    return value.upper()
			
 
				
				 
			
 
				
				 
			
 
				
				 class TestSlugify(TestCase):
			
 
				
				 
			
 
				
				+    def test_default_unicode_to_ascii(self):
			
 
				
				+        self.assertEqual('konig-der-straxdfe', utils.slugify('König der Straße'))
			
 
				
				+        self.assertEqual('not-fancy', utils.slugify('Not fancy'))
			
 
				
				+        self.assertEqual('u4e01u4e02-u4e03u4e04u4e05', utils.slugify('丁丂 七丄丅'))
			
 
				
				+
			
 
				
				+    @override_settings(OSCAR_SLUG_ALLOW_UNICODE=True)
			
 
				
				+    def test_allow_unicode(self):
			
 
				
				+        self.assertEqual('könig-der-straße', utils.slugify('König der Straße'))
			
 
				
				+        self.assertEqual('丁丂-七丄丅', utils.slugify('丁丂 七丄丅'))
			
 
				
				+        self.assertEqual('not-fancy', utils.slugify('Not fancy'))
			
 
				
				+
			
 
				
				+    @override_settings(OSCAR_SLUG_FUNCTION='tests.integration.core.test_utils.sluggish')
			
 
				
				+    def test_custom_slugifier(self):
			
 
				
				+        self.assertEqual('HAM AND EGGS', utils.slugify('Ham and eggs'))
			
 
				
				+
			
 
				
				+    @override_settings(OSCAR_SLUG_MAP={'c++': 'cpp'})
			
 
				
				     def test_uses_custom_mappings(self):
			
 
				
				-        mapping = {'c++': 'cpp'}
			
 
				
				-        with override_settings(OSCAR_SLUG_MAP=mapping):
			
 
				
				-            self.assertEqual('cpp', utils.slugify('c++'))
			
 
				
				+        self.assertEqual('cpp', utils.slugify('c++'))
			
 
				
				 
			
 
				
				+    @override_settings(OSCAR_SLUG_BLACKLIST=['the'])
			
 
				
				     def test_uses_blacklist(self):
			
 
				
				-        blacklist = ['the']
			
 
				
				-        with override_settings(OSCAR_SLUG_BLACKLIST=blacklist):
			
 
				
				-            self.assertEqual('bible', utils.slugify('The Bible'))
			
 
				
				-
			
 
				
				-    def test_handles_unicode(self):
			
 
				
				-        self.assertEqual('konig-der-strasse',
			
 
				
				-                         utils.slugify('König der Straße'))
			
 
				
				-
			
 
				
				-    def test_works_with_custom_slugifier(self):
			
 
				
				-        for fn in [sluggish, 'tests.integration.core.test_utils.sluggish']:
			
 
				
				-            with override_settings(OSCAR_SLUG_FUNCTION=fn):
			
 
				
				-                self.assertEqual('HAM AND EGGS', utils.slugify('Ham and eggs'))
			
 
				
				+        self.assertEqual('bible', utils.slugify('The Bible'))
			
 
				
				+
			
 
				
				+    @override_settings(OSCAR_SLUG_BLACKLIST=['the', 'bible'])
			
 
				
				+    def test_uses_blacklist_doesnt_reduce_to_nothing(self):
			
 
				
				+        self.assertEqual('bible', utils.slugify('The Bible'))