Ticket #7267: clean_html_unicode_r7601.patch
File clean_html_unicode_r7601.patch, 1.3 KB (added by , 16 years ago) |
---|
-
django/utils/html.py
13 13 TRAILING_PUNCTUATION = ['.', ',', ')', '>', '\n', '>'] 14 14 15 15 # List of possible strings used for bullets in bulleted lists. 16 DOTS = [ '·', '*', '\xe2\x80\xa2', '•', '•','•']16 DOTS = [u'·', u'*', u'\xe2\x80\xa2', u'•', u'•', u'•'] 17 17 18 18 unencoded_ampersands_re = re.compile(r'&(?!(\w+|#\d+);)') 19 19 word_split_re = re.compile(r'(\s+)') … … 151 151 text = html_gunk_re.sub('', text) 152 152 # Convert hard-coded bullets into HTML unordered lists. 153 153 def replace_p_tags(match): 154 s = match.group().replace( '</p>','</li>')154 s = match.group().replace(u'</p>', u'</li>') 155 155 for d in DOTS: 156 s = s.replace( '<p>%s' % d,'<li>')156 s = s.replace(u'<p>%s' % d, u'<li>') 157 157 return u'<ul>\n%s\n</ul>' % s 158 158 text = hard_coded_bullets_re.sub(replace_p_tags, text) 159 159 # Remove stuff like "<p> </p>", but only if it's at the bottom 160 160 # of the text. 161 text = trailing_empty_content_re.sub( '', text)161 text = trailing_empty_content_re.sub(u'', text) 162 162 return text 163 163 clean_html = allow_lazy(clean_html, unicode)