|
19 | 19 |
|
20 | 20 | """ |
21 | 21 |
|
22 | | -import re |
23 | | -import regex |
24 | 22 | import uuid |
25 | 23 |
|
26 | 24 | from textile.tools import sanitizer, imagesize |
|
48 | 46 | from HTMLParser import HTMLParser |
49 | 47 |
|
50 | 48 |
|
| 49 | +try: |
| 50 | + # Use regex module for matching uppercase characters if installed, |
| 51 | + # otherwise fall back to finding all the uppercase chars in a loop. |
| 52 | + import regex as re |
| 53 | + upper_re_s = r'\p{Lu}' |
| 54 | +except ImportError: |
| 55 | + import re |
| 56 | + from sys import maxunicode |
| 57 | + upper_re_s = "".join([unichr(c) for c in xrange(maxunicode) if |
| 58 | + unichr(c).isupper()]) |
| 59 | + |
| 60 | + |
51 | 61 | def _normalize_newlines(string): |
52 | 62 | out = string.strip() |
53 | 63 | out = re.sub(r'\r\n', '\n', out) |
@@ -182,10 +192,10 @@ def __init__(self, restricted=False, lite=False, noimage=False, |
182 | 192 | # plus/minus |
183 | 193 | re.compile(r'[([]\+\/-[])]', re.I | re.U), |
184 | 194 | # 3+ uppercase acronym |
185 | | - regex.compile(r'\b([\p{Lu}][\p{Lu}0-9]{2,})\b(?:[(]([^)]*)[)])'), |
| 195 | + re.compile(r'\b([%s%s0-9]{2,})\b(?:[(]([^)]*)[)])' % (upper_re_s, upper_re_s)), |
186 | 196 | # 3+ uppercase |
187 | | - regex.compile(r"""(?:(?<=^)|(?<=\s)|(?<=[>\(;-]))([\p{Lu}]{3,})(\w*)(?=\s|%s|$)(?=[^">]*?(<|$))""" % |
188 | | - self.pnct_re_s), |
| 197 | + re.compile(r"""(?:(?<=^)|(?<=\s)|(?<=[>\(;-]))([%s]{3,})(\w*)(?=\s|%s|$)(?=[^">]*?(<|$))""" % |
| 198 | + (upper_re_s, self.pnct_re_s)), |
189 | 199 | ] |
190 | 200 |
|
191 | 201 | # These are the changes that need to be made for characters that occur |
@@ -427,7 +437,7 @@ def hasRawText(self, text): |
427 | 437 | True |
428 | 438 |
|
429 | 439 | """ |
430 | | - r = re.compile(r'<(p|blockquote|div|form|table|ul|ol|dl|pre|h\d)[^>]*?>.*</\1>', |
| 440 | + r = re.compile(r'<(pre|p|blockquote|div|form|table|ul|ol|dl|h[1-6])[^>]*?>.*</\1>', |
431 | 441 | re.S).sub('', text.strip()).strip() |
432 | 442 | r = re.compile(r'<(hr|br)[^>]*?/>').sub('', r) |
433 | 443 | return '' != r |
|
0 commit comments