Context Navigation

Back to Ticket #5025

Ticket #5025: 5025.2.diff

File 5025.2.diff, 17.9 KB (added by Chris Beaven, 13 years ago)

django/template/defaultfilters.py

diff --git a/django/template/defaultfilters.py b/django/template/defaultfilters.py
index 60fa59e..3d7129d 100644

                from functools import wraps
 from django.template.base import Variable, Library
 from django.conf import settings
 from django.utils import formats
+from django.utils import text as text_utils
 from django.utils.encoding import force_unicode, iri_to_uri
 from django.utils.html import conditional_escape
 from django.utils.safestring import mark_safe, SafeData
-…
+               def title(value):
 title.is_safe = True
 title = stringfilter(title)
+def truncatechars(value, arg):
+    """
+    Truncates a string after a certain number of characters.
+    Argument: Number of characters to truncate after.
+    """
+    try:
+        length = int(arg)
+    except ValueError: # Invalid literal for int().
+        return value # Fail silently.
+    return text_utils.Truncator(value).chars(value, length)
+truncatechars.is_safe = True
+truncatechars = stringfilter(truncatechars)
 def truncatewords(value, arg):
     """
     Truncates a string after a certain number of words.
-…
+               def truncatewords(value, arg):
     Newlines within the string are removed.
     """
-    from django.utils.text import truncate_words
     try:
         length = int(arg)
     except ValueError: # Invalid literal for int().
         return value # Fail silently.
     return truncate_words(value, length)
+    return text_utils.Truncator(value).words(length, truncate=' ...')
 truncatewords.is_safe = True
 truncatewords = stringfilter(truncatewords)
-…
+               def truncatewords_html(value, arg):
     Newlines in the HTML are preserved.
     """
-    from django.utils.text import truncate_html_words
     try:
         length = int(arg)
     except ValueError: # invalid literal for int()
         return value # Fail silently.
+    return truncate_html_words(value, length)
+    return text_utils.Truncator(value).words(length, html=True,
+                                             truncate=' ...')
 truncatewords_html.is_safe = True
 truncatewords_html = stringfilter(truncatewords_html)

django/utils/text.py

diff --git a/django/utils/text.py b/django/utils/text.py
index 00c999c..80198f7 100644

-              a
 import re
+import unicodedata
 from django.utils.encoding import force_unicode
 from django.utils.functional import allow_lazy
 from django.utils.translation import ugettext_lazy, ugettext as _
+from django.utils.functional import allow_lazy, LazyObject
+from django.utils.translation import ugettext_lazy, ugettext as _, pgettext
 from htmlentitydefs import name2codepoint
 # Capitalizes the first letter of a string.
-…
+               def wrap(text, width):
     return u''.join(_generator())
 wrap = allow_lazy(wrap, unicode)
+def truncate_words(s, num, end_text='...'):
+    """Truncates a string after a certain number of words. Takes an optional
+    argument of what should be used to notify that the string has been
+    truncated, defaulting to ellipsis (...)
+    Newlines in the string will be stripped.
+class Truncator(LazyObject):
+    """
+    An object used to truncate text, either by characters or words.
     """
-    s = force_unicode(s)
-    length = int(num)
-    words = s.split()
-    if len(words) > length:
-        words = words[:length]
-        if not words[-1].endswith(end_text):
-            words.append(end_text)
-    return u' '.join(words)
-truncate_words = allow_lazy(truncate_words, unicode)
+def truncate_html_words(s, num, end_text='...'):
+    """Truncates HTML to a certain number of words (not counting tags and
+    comments). Closes opened tags if they were correctly closed in the given
+    html. Takes an optional argument of what should be used to notify that the
+    string has been truncated, defaulting to ellipsis (...).
+    def __init__(self, text):
+        self.__dict__['text'] = text
+        super(Truncator, self).__init__()
+    Newlines in the HTML are preserved.
+    """
+    s = force_unicode(s)
+    length = int(num)
+    if length <= 0:
+        return u''
+    html4_singlets = ('br', 'col', 'link', 'base', 'img', 'param', 'area', 'hr', 'input')
+    # Set up regular expressions
+    re_words = re.compile(r'&.*?;|<.*?>|(\w[\w-]*)', re.U)
+    re_tag = re.compile(r'<(/)?([^ ]+?)(?: (/)| .*?)?>')
+    # Count non-HTML words and keep note of open tags
+    pos = 0
+    end_text_pos = 0
+    words = 0
+    open_tags = []
+    while words <= length:
+        m = re_words.search(s, pos)
+        if not m:
+            # Checked through whole string
+            break
+        pos = m.end(0)
+        if m.group(1):
+            # It's an actual non-HTML word
+            words += 1
+            if words == length:
+                end_text_pos = pos
+            continue
+        # Check for tag
+        tag = re_tag.match(m.group(0))
+        if not tag or end_text_pos:
+            # Don't worry about non tags or tags after our truncate point
+            continue
+        closing_tag, tagname, self_closing = tag.groups()
+        tagname = tagname.lower()  # Element names are always case-insensitive
+        if self_closing or tagname in html4_singlets:
+            pass
+        elif closing_tag:
+            # Check for match in open tags list
+            try:
+                i = open_tags.index(tagname)
+            except ValueError:
+    def _setup(self):
+        self.text = force_unicode(self.text)
+    def add_truncation_text(self, text, truncate=None):
+        if truncate is None:
+            truncate = pgettext('String to return when truncating text',
+                u'%(truncated_text)s...', )
+        truncate = force_unicode(truncate)
+        if '%(truncated_text)s' in truncate:
+            return truncate % {'truncated_text': text}
+        # The truncation text didn't contain the %(truncated_text)s string
+        # replacement argument so just append it to the text.
+        if text.endswith(truncate):
+            # But don't append the truncation text if the current text already
+            # ends in this.
+            return text
+        return '%s%s' % (text, truncate)
+    def chars(self, num, truncate=None):
+        """
+        Returns the text truncated to be no longer than the specified number of
+        characters.
+        Takes an optional argument of what should be used to notify that the
+        string has been truncated, defaulting to a translatable string of an
+        ellipsis (...).
+        """
+        length = int(num)
+        text = unicodedata.normalize('NFC', self.text)
+        # Calculate the length to truncate to (max length - end_text length)
+        truncate_len = length
+        for char in self.add_truncation_text('', truncate):
+            if not unicodedata.combining(char):
+                truncate_len -= 1
+                if truncate_len == 0:
+                    break
+        s_len = 0
+        end_index = None
+        for i, char in enumerate(text):
+            if unicodedata.combining(char):
+                # Don't consider combining characters as adding to the string
+                # length
+                continue
+            s_len += 1
+            if end_index is None and s_len > truncate_len:
+                end_index = i
+            if s_len > length:
+                # Return the truncated string
+                return self.add_truncation_text(text[:end_index or 0],
+                                                truncate)
+        # Return the original string since no truncation was necessary
+        return text
+    chars = allow_lazy(chars)
+    def words(self, num, truncate=None, html=False):
+        """
+        Truncates a string after a certain number of words. Takes an optional
+        argument of what should be used to notify that the string has been
+        truncated, defaulting to ellipsis (...).
+        """
+        length = int(num)
+        if html:
+            return self._html_words(length, truncate)
+        return self._text_words(length, truncate)
+    words = allow_lazy(words)
+    def _text_words(self, length, truncate):
+        """
+        Truncates a string after a certain number of words.
+        Newlines in the string will be stripped.
+        """
+        words = self.text.split()
+        if len(words) > length:
+            words = words[:length]
+            return self.add_truncation_text(u' '.join(words), truncate)
+        return u' '.join(words)
+    def _html_words(self, length, truncate):
+        """
+        Truncates HTML to a certain number of words (not counting tags and
+        comments). Closes opened tags if they were correctly closed in the
+        given HTML.
+        Newlines in the HTML are preserved.
+        """
+        if length <= 0:
+            return u''
+        html4_singlets = ('br', 'col', 'link', 'base', 'img', 'param', 'area',
+            'hr', 'input')
+        # Set up regular expressions
+        re_words = re.compile(r'&.*?;|<.*?>|(\w[\w-]*)', re.U)
+        re_tag = re.compile(r'<(/)?([^ ]+?)(?: (/)| .*?)?>')
+        # Count non-HTML words and keep note of open tags
+        pos = 0
+        end_text_pos = 0
+        words = 0
+        open_tags = []
+        while words <= length:
+            m = re_words.search(self.text, pos)
+            if not m:
+                # Checked through whole string
+                break
+            pos = m.end(0)
+            if m.group(1):
+                # It's an actual non-HTML word
+                words += 1
+                if words == length:
+                    end_text_pos = pos
+                continue
+            # Check for tag
+            tag = re_tag.match(m.group(0))
+            if not tag or end_text_pos:
+                # Don't worry about non tags or tags after our truncate point
+                continue
+            closing_tag, tagname, self_closing = tag.groups()
+            # Element names are always case-insensitive
+            tagname = tagname.lower()
+            if self_closing or tagname in html4_singlets:
                 pass
+            elif closing_tag:
+                # Check for match in open tags list
+                try:
+                    i = open_tags.index(tagname)
+                except ValueError:
+                    pass
+                else:
+                    # SGML: An end tag closes, back to the matching start tag,
+                    # all unclosed intervening start tags with omitted end tags
+                    open_tags = open_tags[i + 1:]
             else:
+                # SGML: An end tag closes, back to the matching start tag, all unclosed intervening start tags with omitted end tags
+                open_tags = open_tags[i+1:]
+        else:
+            # Add it to the start of the open tags list
+            open_tags.insert(0, tagname)
+    if words <= length:
+        # Don't try to close tags if we don't need to truncate
+        return s
+    out = s[:end_text_pos]
+    if end_text:
+        out += ' ' + end_text
+    # Close any tags still open
+    for tag in open_tags:
+        out += '</%s>' % tag
+    # Return string
+    return out
+                # Add it to the start of the open tags list
+                open_tags.insert(0, tagname)
+        if words <= length:
+            # Don't try to close tags if we don't need to truncate
+            return self.text
+        out = self.text[:end_text_pos]
+        truncate_text = self.add_truncation_text('', truncate)
+        if truncate_text:
+            out += truncate_text
+        # Close any tags still open
+        for tag in open_tags:
+            out += '</%s>' % tag
+        # Return string
+        return out
+def truncate_words(s, num, end_text='...'):
+    import warnings
+    warnings.warn('This function has been deprecated. Use the Truncator class '
+        'in django.utils.text instead.', category=PendingDeprecationWarning)
+    truncate = end_text and ' %s' % end_text or ''
+    return Truncator(s).words(num, truncate=truncate)
+truncate_words = allow_lazy(truncate_words, unicode)
+def truncate_html_words(s, num, end_text='...'):
+    import warnings
+    warnings.warn('This function has been deprecated. Use the Truncator class '
+        'in django.utils.text instead.', category=PendingDeprecationWarning)
+    truncate = end_text and ' %s' % end_text or ''
+    return Truncator(s).words(num, truncate=truncate, html=True)
 truncate_html_words = allow_lazy(truncate_html_words, unicode)
 def get_valid_filename(s):

docs/ref/templates/builtins.txt

diff --git a/docs/ref/templates/builtins.txt b/docs/ref/templates/builtins.txt
index 29bed25..efddf02 100644

                For example::
 If ``value`` is ``"my first post"``, the output will be ``"My First Post"``.
+.. templatefilter:: truncatechars
+truncatechars
+~~~~~~~~~~~~~
+Truncates a string if it is longer than the specified number of characters.
+Truncated strings will end with a translatable ellipsis sequence ("...").
+**Argument:** Number of characters to truncate to
+For example::
+    {{ value|truncatechars:9 }}
+If ``value`` is ``"Joel is a slug"``, the output will be ``"Joel i..."``.
 .. templatefilter:: truncatewords
 truncatewords

docs/releases/1.4.txt

diff --git a/docs/releases/1.4.txt b/docs/releases/1.4.txt
index 496a4c9..d57cc49 100644

                A new helper function,
 ``template.Library`` to ease the creation of template tags that store some
 data in a specified context variable.
+``truncatechars`` template filter
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Added a filter which truncates a string to be no longer than the specified
+number of characters. Truncated strings end with a translatable ellipsis
+sequence ("...").
 .. _backwards-incompatible-changes-1.4:
 Backwards incompatible changes in 1.4

tests/regressiontests/utils/text.py

diff --git a/tests/regressiontests/utils/text.py b/tests/regressiontests/utils/text.py
index f565d87..921fde1 100644

-              a
+# -*- coding: utf-8 -*-
 import unittest
 from django.utils import text
 class TestUtilsText(unittest.TestCase):
+    def test_truncate_chars(self):
+        truncator = text.Truncator(
+            u'The quick brown fox jumped over the lazy dog.'
+        )
+        self.assertEqual(u'The quick brown fox jumped over the lazy dog.',
+            truncator.chars(100)),
+        self.assertEqual(u'The quick brown fox ...',
+            truncator.chars(23)),
+        self.assertEqual(u'The quick brown fo.....',
+            truncator.chars(23, '.....')),
+        # Ensure that we normalize our unicode data first
+        nfc = text.Truncator(u'o\xfco\xfco\xfco\xfc')
+        nfd = text.Truncator(u'ou\u0308ou\u0308ou\u0308ou\u0308')
+        self.assertEqual(u'oüoüoüoü', nfc.chars(8))
+        self.assertEqual(u'oüoüoüoü', nfd.chars(8))
+        self.assertEqual(u'oü...', nfc.chars(5))
+        self.assertEqual(u'oü...', nfd.chars(5))
+        # Ensure the final length is calculated correctly when there are
+        # combining characters with no precomposed form, and that combining
+        # characters are not split up.
+        truncator = text.Truncator(u'-B\u030AB\u030A----8')
+        self.assertEqual(u'-B\u030A...', truncator.chars(5))
+        self.assertEqual(u'-B\u030AB\u030A-...', truncator.chars(7))
+        self.assertEqual(u'-B\u030AB\u030A----8', truncator.chars(8))
+        # Ensure the length of the end text is correctly calculated when it
+        # contains combining characters with no precomposed form.
+        truncator = text.Truncator(u'-----')
+        self.assertEqual(u'---B\u030A', truncator.chars(4, u'B\u030A'))
+        self.assertEqual(u'-----', truncator.chars(5, u'B\u030A'))
+        # Make a best effort to shorten to the desired length, but requesting
+        # a length shorter than the ellipsis shouldn't break
+        self.assertEqual(u'...', text.Truncator(u'asdf').chars(1))
     def test_truncate_words(self):
+        truncator = text.Truncator(u'The quick brown fox jumped over the lazy '
+            'dog.')
+        self.assertEqual(u'The quick brown fox jumped over the lazy dog.',
+            truncator.words(10))
+        self.assertEqual(u'The quick brown fox...', truncator.words(4))
+        self.assertEqual(u'The quick brown fox[snip]',
+            truncator.words(4, '[snip]'))
+    def test_truncate_html_words(self):
+        truncator = text.Truncator('<p><strong><em>The quick brown fox jumped '
+            'over the lazy dog.</em></strong></p>')
+        self.assertEqual(u'<p><strong><em>The quick brown fox jumped over the '
+            'lazy dog.</em></strong></p>', truncator.words(10, html=True))
+        self.assertEqual(u'<p><strong><em>The quick brown fox...</em>'
+            '</strong></p>', truncator.words(4, html=True))
+        self.assertEqual(u'<p><strong><em>The quick brown fox....</em>'
+            '</strong></p>', truncator.words(4, '....', html=True))
+        self.assertEqual(u'<p><strong><em>The quick brown fox</em></strong>'
+            '</p>', truncator.words(4, '', html=True))
+    def test_old_truncate_words(self):
         self.assertEqual(u'The quick brown fox jumped over the lazy dog.',
             text.truncate_words(u'The quick brown fox jumped over the lazy dog.', 10))
         self.assertEqual(u'The quick brown fox ...',
-…
+               class TestUtilsText(unittest.TestCase):
         self.assertEqual(u'The quick brown fox ....',
             text.truncate_words('The quick brown fox jumped over the lazy dog.', 4, '....'))
     def test_truncate_html_words(self):
+    def test_old_truncate_html_words(self):
         self.assertEqual(u'<p><strong><em>The quick brown fox jumped over the lazy dog.</em></strong></p>',
             text.truncate_html_words('<p><strong><em>The quick brown fox jumped over the lazy dog.</em></strong></p>', 10))
         self.assertEqual(u'<p><strong><em>The quick brown fox ...</em></strong></p>',

Download in other formats:

Original Format

Issues

Context Navigation

Ticket #5025: 5025.2.diff

django/template/defaultfilters.py

django/utils/text.py

docs/ref/templates/builtins.txt

docs/releases/1.4.txt

tests/regressiontests/utils/text.py

Download in other formats:

Django Links

Learn More

Get Involved

Get Help

Follow Us

Support Us