|
|
@@ -1,5 +1,7 @@
|
|
1
|
1
|
import datetime
|
|
2
|
2
|
import logging
|
|
|
3
|
+import re
|
|
|
4
|
+import unicodedata
|
|
3
|
5
|
|
|
4
|
6
|
from django.conf import settings
|
|
5
|
7
|
from django.shortcuts import redirect, resolve_url
|
|
|
@@ -8,43 +10,88 @@ from django.utils.http import is_safe_url
|
|
8
|
10
|
from django.utils.module_loading import import_string
|
|
9
|
11
|
from django.utils.text import slugify as django_slugify
|
|
10
|
12
|
from django.utils.timezone import get_current_timezone, is_naive, make_aware
|
|
11
|
|
-from unidecode import unidecode
|
|
12
|
13
|
|
|
13
|
14
|
|
|
14
|
|
-def default_slugifier(value):
|
|
|
15
|
+SLUGIFY_RE = re.compile(r'[^\w\s-]', re.UNICODE)
|
|
|
16
|
+
|
|
|
17
|
+
|
|
|
18
|
+def cautious_slugify(value):
|
|
|
19
|
+ """
|
|
|
20
|
+ Convert a string to ASCII exactly as Django's slugify does, with the exception
|
|
|
21
|
+ that any non-ASCII alphanumeric characters (that cannot be ASCIIfied under Unicode
|
|
|
22
|
+ normalisation) are escaped into codes like 'u0421' instead of being deleted entirely.
|
|
|
23
|
+ This ensures that the result of slugifying e.g. Cyrillic text will not be an empty
|
|
|
24
|
+ string, and can thus be safely used as an identifier (albeit not a human-readable one).
|
|
|
25
|
+
|
|
|
26
|
+ cautious_slugify was copied from Wagtail:
|
|
|
27
|
+ <https://github.com/wagtail/wagtail/blob/8b420b9/wagtail/core/utils.py>
|
|
|
28
|
+
|
|
|
29
|
+ Copyright (c) 2014-present Torchbox Ltd and individual contributors.
|
|
|
30
|
+ Released under the BSD 3-clause "New" or "Revised" License
|
|
|
31
|
+ <https://github.com/wagtail/wagtail/blob/8b420b9/LICENSE>
|
|
|
32
|
+
|
|
|
33
|
+ Date: 2018-06-15
|
|
|
34
|
+ """
|
|
|
35
|
+ # Normalize the string to decomposed unicode form. This causes accented Latin
|
|
|
36
|
+ # characters to be split into 'base character' + 'accent modifier'; the latter will
|
|
|
37
|
+ # be stripped out by the regexp, resulting in an ASCII-clean character that doesn't
|
|
|
38
|
+ # need to be escaped
|
|
|
39
|
+ value = unicodedata.normalize('NFKD', value)
|
|
|
40
|
+
|
|
|
41
|
+ # Strip out characters that aren't letterlike, underscores or hyphens,
|
|
|
42
|
+ # using the same regexp that slugify uses. This ensures that non-ASCII non-letters
|
|
|
43
|
+ # (e.g. accent modifiers, fancy punctuation) get stripped rather than escaped
|
|
|
44
|
+ value = SLUGIFY_RE.sub('', value)
|
|
|
45
|
+
|
|
|
46
|
+ # Encode as ASCII, escaping non-ASCII characters with backslashreplace, then convert
|
|
|
47
|
+ # back to a unicode string (which is what slugify expects)
|
|
|
48
|
+ value = value.encode('ascii', 'backslashreplace').decode('ascii')
|
|
|
49
|
+
|
|
|
50
|
+ # Pass to slugify to perform final conversion (whitespace stripping, applying
|
|
|
51
|
+ # mark_safe); this will also strip out the backslashes from the 'backslashreplace'
|
|
|
52
|
+ # conversion
|
|
|
53
|
+ return django_slugify(value)
|
|
|
54
|
+
|
|
|
55
|
+
|
|
|
56
|
+def default_slugifier(value, allow_unicode=False):
|
|
15
|
57
|
"""
|
|
16
|
|
- Oscar's default slugifier function. Uses Django's slugify function.
|
|
|
58
|
+ Oscar's default slugifier function. When unicode is allowed
|
|
|
59
|
+ it uses Django's slugify function, otherwise it uses cautious_slugify.
|
|
17
|
60
|
"""
|
|
18
|
|
- return django_slugify(value, allow_unicode=settings.OSCAR_SLUG_ALLOW_UNICODE)
|
|
|
61
|
+ if allow_unicode:
|
|
|
62
|
+ return django_slugify(value, allow_unicode=True)
|
|
|
63
|
+ else:
|
|
|
64
|
+ return cautious_slugify(value)
|
|
19
|
65
|
|
|
20
|
66
|
|
|
21
|
67
|
def slugify(value):
|
|
22
|
68
|
"""
|
|
23
|
|
- Slugify a string (even if it contains non-ASCII chars)
|
|
|
69
|
+ Slugify a string
|
|
|
70
|
+
|
|
|
71
|
+ The OSCAR_SLUG_FUNCTION can be set with a dotted path to the slug
|
|
|
72
|
+ function to use, defaults to 'oscar.core.utils.default_slugifier'.
|
|
|
73
|
+
|
|
|
74
|
+ OSCAR_SLUG_MAP can be set of a dictionary of target:replacement pairs
|
|
|
75
|
+
|
|
|
76
|
+ OSCAR_SLUG_BLACKLIST can be set to a iterable of words to remove after
|
|
|
77
|
+ the slug is generated; though it will not reduce a slug to zero length.
|
|
24
|
78
|
"""
|
|
|
79
|
+ value = str(value)
|
|
|
80
|
+
|
|
25
|
81
|
# Re-map some strings to avoid important characters being stripped. Eg
|
|
26
|
82
|
# remap 'c++' to 'cpp' otherwise it will become 'c'.
|
|
27
|
83
|
for k, v in settings.OSCAR_SLUG_MAP.items():
|
|
28
|
84
|
value = value.replace(k, v)
|
|
29
|
85
|
|
|
30
|
|
- # Allow an alternative slugify function to be specified
|
|
31
|
|
- # Recommended way to specify a function is as a string
|
|
32
|
|
- slugifier = getattr(settings, 'OSCAR_SLUG_FUNCTION', default_slugifier)
|
|
33
|
|
- if isinstance(slugifier, str):
|
|
34
|
|
- slugifier = import_string(slugifier)
|
|
35
|
|
-
|
|
36
|
|
- # Use unidecode to convert non-ASCII strings to ASCII equivalents where
|
|
37
|
|
- # possible if unicode is not allowed to contain in slug.
|
|
38
|
|
- if not settings.OSCAR_SLUG_ALLOW_UNICODE:
|
|
39
|
|
- value = unidecode(str(value))
|
|
|
86
|
+ slugifier = import_string(settings.OSCAR_SLUG_FUNCTION)
|
|
|
87
|
+ slug = slugifier(value, allow_unicode=settings.OSCAR_SLUG_ALLOW_UNICODE)
|
|
40
|
88
|
|
|
41
|
|
- value = slugifier(str(value))
|
|
42
|
|
- # Remove stopwords
|
|
|
89
|
+ # Remove stopwords from slug
|
|
43
|
90
|
for word in settings.OSCAR_SLUG_BLACKLIST:
|
|
44
|
|
- value = value.replace(word + '-', '')
|
|
45
|
|
- value = value.replace('-' + word, '')
|
|
|
91
|
+ slug = slug.replace(word + '-', '')
|
|
|
92
|
+ slug = slug.replace('-' + word, '')
|
|
46
|
93
|
|
|
47
|
|
- return value
|
|
|
94
|
+ return slug
|
|
48
|
95
|
|
|
49
|
96
|
|
|
50
|
97
|
def format_datetime(dt, format=None):
|