Browse Source

Drop unidecode and replace with cautious_slugify from Wagtail

master
Craig Loftus 7 years ago
parent
commit
eba482ed44
5 changed files with 126 additions and 60 deletions
  1. 25
    22
      docs/source/ref/settings.rst
  2. 8
    0
      docs/source/releases/v2.0.rst
  3. 0
    2
      setup.py
  4. 67
    20
      src/oscar/core/utils.py
  5. 26
    16
      tests/integration/core/test_utils.py

+ 25
- 22
docs/source/ref/settings.rst View File

@@ -428,51 +428,54 @@ Such files should always be deleted afterwards.
428 428
 Slug settings
429 429
 =============
430 430
 
431
-``OSCAR_SLUG_MAP``
432
-------------------
433
-
434
-Default: ``{}``
435
-
436
-A dictionary to map strings to more readable versions for including in URL
437
-slugs.  This mapping is appled before the slugify function.
438
-This is useful when names contain characters which would normally be
439
-stripped.  For instance::
440
-
441
-    OSCAR_SLUG_MAP = {
442
-        'c++': 'cpp',
443
-        'f#': 'fsharp',
444
-    }
445
-
446 431
 ``OSCAR_SLUG_FUNCTION``
447 432
 -----------------------
448 433
 
449 434
 Default: ``'oscar.core.utils.default_slugifier'``
450 435
 
451
-The slugify function to use.  Note that is used within Oscar's slugify wrapper
452
-(in ``oscar.core.utils``) which applies the custom map and blacklist. String
453
-notation is recommended, but specifying a callable is supported for
454
-backwards-compatibility.
436
+A dotted path to the slugify function to use.
455 437
 
456 438
 Example::
457 439
 
458 440
     # in myproject.utils
459
-    def some_slugify(value):
441
+    def some_slugify(value, allow_unicode=False):
460 442
         return value
461 443
 
462 444
     # in settings.py
463 445
     OSCAR_SLUG_FUNCTION = 'myproject.utils.some_slugify'
464 446
 
447
+``OSCAR_SLUG_MAP``
448
+------------------
449
+
450
+Default: ``{}``
451
+
452
+A dictionary to target:replacement strings that the slugify will apply before
453
+slugifying the value. This is useful when names contain characters which would
454
+normally be stripped. For instance::
455
+
456
+    OSCAR_SLUG_MAP = {
457
+        'c++': 'cpp',
458
+        'f#': 'fsharp',
459
+    }
465 460
 
466 461
 ``OSCAR_SLUG_BLACKLIST``
467 462
 ------------------------
468 463
 
469 464
 Default: ``[]``
470 465
 
471
-A list of words to exclude from slugs.
466
+An iterable of words the slugify will try to remove after the value has been
467
+slugified. Note, a word will not be removed from the slug if it would
468
+result in an empty slug.
472 469
 
473 470
 Example::
474 471
 
475
-    OSCAR_SLUG_BLACKLIST = ['the', 'a', 'but']
472
+    # With OSCAR_SLUG_BLACKLIST = ['the']
473
+    slugify('The cat')
474
+    > 'cat'
475
+
476
+    # With OSCAR_SLUG_BLACKLIST = ['the', 'cat']
477
+    slugify('The cat')
478
+    > 'cat'
476 479
 
477 480
 ``OSCAR_SLUG_ALLOW_UNICODE``
478 481
 ----------------------------

+ 8
- 0
docs/source/releases/v2.0.rst View File

@@ -40,6 +40,14 @@ Minor changes
40 40
 - ``OrderPlacementMixin.get_message_context()`` is now passed a ``code`` argument
41 41
   specifying the communication event type code for a message being sent.
42 42
 
43
+- We've dropped the dependency on Unidecode due to license incompatibilities,
44
+  ``oscar.core.utils.default_slugifier`` now uses ``oscar.core.utils.cautious_slugify``
45
+  to handle unicode characters in slugs when ``OSCAR_SLUG_ALLOW_UNICODE`` is ``False``.
46
+
47
+- ``OSCAR_SLUG_FUNCTION`` previously accepted a function as its value. It now
48
+  only accepts a dotted path to a function as its value. Such functions must
49
+  also now take a ``allow_unicode`` kwarg.
50
+
43 51
 Dependency changes
44 52
 ------------------
45 53
 

+ 0
- 2
setup.py View File

@@ -31,8 +31,6 @@ install_requires = [
31 31
     'sorl-thumbnail>=12.4.1,<12.5',
32 32
     # Babel is used for currency formatting
33 33
     'Babel>=1.0,<3.0',
34
-    # For converting non-ASCII to ASCII when creating slugs
35
-    'Unidecode>=1.0,<1.1',
36 34
     # For manipulating search URLs
37 35
     'purl>=0.7',
38 36
     # For phone number field

+ 67
- 20
src/oscar/core/utils.py View File

@@ -1,5 +1,7 @@
1 1
 import datetime
2 2
 import logging
3
+import re
4
+import unicodedata
3 5
 
4 6
 from django.conf import settings
5 7
 from django.shortcuts import redirect, resolve_url
@@ -8,43 +10,88 @@ from django.utils.http import is_safe_url
8 10
 from django.utils.module_loading import import_string
9 11
 from django.utils.text import slugify as django_slugify
10 12
 from django.utils.timezone import get_current_timezone, is_naive, make_aware
11
-from unidecode import unidecode
12 13
 
13 14
 
14
-def default_slugifier(value):
15
+SLUGIFY_RE = re.compile(r'[^\w\s-]', re.UNICODE)
16
+
17
+
18
+def cautious_slugify(value):
19
+    """
20
+    Convert a string to ASCII exactly as Django's slugify does, with the exception
21
+    that any non-ASCII alphanumeric characters (that cannot be ASCIIfied under Unicode
22
+    normalisation) are escaped into codes like 'u0421' instead of being deleted entirely.
23
+    This ensures that the result of slugifying e.g. Cyrillic text will not be an empty
24
+    string, and can thus be safely used as an identifier (albeit not a human-readable one).
25
+
26
+    cautious_slugify was copied from Wagtail:
27
+    <https://github.com/wagtail/wagtail/blob/8b420b9/wagtail/core/utils.py>
28
+
29
+    Copyright (c) 2014-present Torchbox Ltd and individual contributors.
30
+    Released under the BSD 3-clause "New" or "Revised" License
31
+    <https://github.com/wagtail/wagtail/blob/8b420b9/LICENSE>
32
+
33
+    Date: 2018-06-15
34
+    """
35
+    # Normalize the string to decomposed unicode form. This causes accented Latin
36
+    # characters to be split into 'base character' + 'accent modifier'; the latter will
37
+    # be stripped out by the regexp, resulting in an ASCII-clean character that doesn't
38
+    # need to be escaped
39
+    value = unicodedata.normalize('NFKD', value)
40
+
41
+    # Strip out characters that aren't letterlike, underscores or hyphens,
42
+    # using the same regexp that slugify uses. This ensures that non-ASCII non-letters
43
+    # (e.g. accent modifiers, fancy punctuation) get stripped rather than escaped
44
+    value = SLUGIFY_RE.sub('', value)
45
+
46
+    # Encode as ASCII, escaping non-ASCII characters with backslashreplace, then convert
47
+    # back to a unicode string (which is what slugify expects)
48
+    value = value.encode('ascii', 'backslashreplace').decode('ascii')
49
+
50
+    # Pass to slugify to perform final conversion (whitespace stripping, applying
51
+    # mark_safe); this will also strip out the backslashes from the 'backslashreplace'
52
+    # conversion
53
+    return django_slugify(value)
54
+
55
+
56
+def default_slugifier(value, allow_unicode=False):
15 57
     """
16
-    Oscar's default slugifier function. Uses Django's slugify function.
58
+    Oscar's default slugifier function. When unicode is allowed
59
+    it uses Django's slugify function, otherwise it uses cautious_slugify.
17 60
     """
18
-    return django_slugify(value, allow_unicode=settings.OSCAR_SLUG_ALLOW_UNICODE)
61
+    if allow_unicode:
62
+        return django_slugify(value, allow_unicode=True)
63
+    else:
64
+        return cautious_slugify(value)
19 65
 
20 66
 
21 67
 def slugify(value):
22 68
     """
23
-    Slugify a string (even if it contains non-ASCII chars)
69
+    Slugify a string
70
+
71
+    The OSCAR_SLUG_FUNCTION can be set with a dotted path to the slug
72
+    function to use, defaults to 'oscar.core.utils.default_slugifier'.
73
+
74
+    OSCAR_SLUG_MAP can be set of a dictionary of target:replacement pairs
75
+
76
+    OSCAR_SLUG_BLACKLIST can be set to a iterable of words to remove after
77
+    the slug is generated; though it will not reduce a slug to zero length.
24 78
     """
79
+    value = str(value)
80
+
25 81
     # Re-map some strings to avoid important characters being stripped.  Eg
26 82
     # remap 'c++' to 'cpp' otherwise it will become 'c'.
27 83
     for k, v in settings.OSCAR_SLUG_MAP.items():
28 84
         value = value.replace(k, v)
29 85
 
30
-    # Allow an alternative slugify function to be specified
31
-    # Recommended way to specify a function is as a string
32
-    slugifier = getattr(settings, 'OSCAR_SLUG_FUNCTION', default_slugifier)
33
-    if isinstance(slugifier, str):
34
-        slugifier = import_string(slugifier)
35
-
36
-    # Use unidecode to convert non-ASCII strings to ASCII equivalents where
37
-    # possible if unicode is not allowed to contain in slug.
38
-    if not settings.OSCAR_SLUG_ALLOW_UNICODE:
39
-        value = unidecode(str(value))
86
+    slugifier = import_string(settings.OSCAR_SLUG_FUNCTION)
87
+    slug = slugifier(value, allow_unicode=settings.OSCAR_SLUG_ALLOW_UNICODE)
40 88
 
41
-    value = slugifier(str(value))
42
-    # Remove stopwords
89
+    # Remove stopwords from slug
43 90
     for word in settings.OSCAR_SLUG_BLACKLIST:
44
-        value = value.replace(word + '-', '')
45
-        value = value.replace('-' + word, '')
91
+        slug = slug.replace(word + '-', '')
92
+        slug = slug.replace('-' + word, '')
46 93
 
47
-    return value
94
+    return slug
48 95
 
49 96
 
50 97
 def format_datetime(dt, format=None):

+ 26
- 16
tests/integration/core/test_utils.py View File

@@ -4,26 +4,36 @@ from django.test.utils import override_settings
4 4
 
5 5
 from oscar.core import utils
6 6
 
7
-sluggish = lambda s: s.upper()
7
+
8
+def sluggish(value, allow_unicode=False):
9
+    return value.upper()
8 10
 
9 11
 
10 12
 class TestSlugify(TestCase):
11 13
 
14
+    def test_default_unicode_to_ascii(self):
15
+        self.assertEqual('konig-der-straxdfe', utils.slugify('König der Straße'))
16
+        self.assertEqual('not-fancy', utils.slugify('Not fancy'))
17
+        self.assertEqual('u4e01u4e02-u4e03u4e04u4e05', utils.slugify('丁丂 七丄丅'))
18
+
19
+    @override_settings(OSCAR_SLUG_ALLOW_UNICODE=True)
20
+    def test_allow_unicode(self):
21
+        self.assertEqual('könig-der-straße', utils.slugify('König der Straße'))
22
+        self.assertEqual('丁丂-七丄丅', utils.slugify('丁丂 七丄丅'))
23
+        self.assertEqual('not-fancy', utils.slugify('Not fancy'))
24
+
25
+    @override_settings(OSCAR_SLUG_FUNCTION='tests.integration.core.test_utils.sluggish')
26
+    def test_custom_slugifier(self):
27
+        self.assertEqual('HAM AND EGGS', utils.slugify('Ham and eggs'))
28
+
29
+    @override_settings(OSCAR_SLUG_MAP={'c++': 'cpp'})
12 30
     def test_uses_custom_mappings(self):
13
-        mapping = {'c++': 'cpp'}
14
-        with override_settings(OSCAR_SLUG_MAP=mapping):
15
-            self.assertEqual('cpp', utils.slugify('c++'))
31
+        self.assertEqual('cpp', utils.slugify('c++'))
16 32
 
33
+    @override_settings(OSCAR_SLUG_BLACKLIST=['the'])
17 34
     def test_uses_blacklist(self):
18
-        blacklist = ['the']
19
-        with override_settings(OSCAR_SLUG_BLACKLIST=blacklist):
20
-            self.assertEqual('bible', utils.slugify('The Bible'))
21
-
22
-    def test_handles_unicode(self):
23
-        self.assertEqual('konig-der-strasse',
24
-                         utils.slugify('König der Straße'))
25
-
26
-    def test_works_with_custom_slugifier(self):
27
-        for fn in [sluggish, 'tests.integration.core.test_utils.sluggish']:
28
-            with override_settings(OSCAR_SLUG_FUNCTION=fn):
29
-                self.assertEqual('HAM AND EGGS', utils.slugify('Ham and eggs'))
35
+        self.assertEqual('bible', utils.slugify('The Bible'))
36
+
37
+    @override_settings(OSCAR_SLUG_BLACKLIST=['the', 'bible'])
38
+    def test_uses_blacklist_doesnt_reduce_to_nothing(self):
39
+        self.assertEqual('bible', utils.slugify('The Bible'))

Loading…
Cancel
Save