Ticket #5418: assert_no_broken_links_with_tests_and_doc.2.diff

File assert_no_broken_links_with_tests_and_doc.2.diff, 19.1 KB (added by Michael Nelson, 17 years ago)

Updated patch that also checks for blank links and internal page links (ie. href="#content")

  • django/test/testcases.py

     
    186186        self.failIf(template_name in template_names,
    187187            (u"Template '%s' was used unexpectedly in rendering the"
    188188             u" response") % template_name)
     189
     190    def assertNoBrokenLinks(self, response, internal_only=True):
     191        """
     192        Asserts that all the links within the response, when followed, return
     193        a valid page (a 200) or a redirect (302).
     194       
     195        Blank links are also identified (such as <a href="">) as this is helpful
     196        to identify when the url tag in <a href="{% url my-url-name arg1 %}">
     197        fails.
     198       
     199        Internal page links (such as <a href="#content">Skip to content</a> are
     200        also checked to ensure they are not broken (ie. that an element with the
     201        id exists on the page).
     202       
     203        Current issues/thoughts:
     204          * Should we follow 302's to verify the page redirects to a 200 result?
     205        """
     206        non_broken_status_codes = (200, 301, 302, 304)
     207
     208        # Create the parser to grab the internal and external links
     209        import HTMLParser
     210
     211        class AnchorParser(HTMLParser.HTMLParser):
     212            external_href_re = re.compile(r'^https?://', re.IGNORECASE)
     213            ignore_href_re = re.compile(r'^(mailto|ftp):', re.IGNORECASE)
     214            internal_id_href_re = re.compile(r'^#(.*)')
     215           
     216            def __init__(self):
     217                self.hrefs_internal = []
     218                self.hrefs_external = []
     219                self.interal_page_link_ids = []
     220                self.element_ids = []
     221                self.reset()
     222       
     223            def handle_starttag(self, tag, attrs):
     224                if tag == "a":                       
     225                    for k, v in attrs:
     226                        if k == "href":
     227                            # For each href that we're not ignoring, save the
     228                            # value and position
     229                            if self.ignore_href_re.match(v):
     230                                pass
     231                            elif self.external_href_re.match(v):
     232                                self.hrefs_external.append((v, self.getpos()))
     233                            elif self.internal_id_href_re.match(v):
     234                                # If this is of the form href="#content" then
     235                                # remember the actual id "content".
     236                                self.interal_page_link_ids.append(
     237                                    (
     238                                        self.internal_id_href_re.match(v).groups()[0],
     239                                        self.getpos()
     240                                    )                             
     241                                )
     242                            else:
     243                                self.hrefs_internal.append((v, self.getpos()))
     244                        elif k == "id":
     245                            # An anchor link can have an id and be linked to
     246                            # via an internal page link too.
     247                            self.element_ids.append(v)
     248                else:
     249                # Go through the attributes of all the other tags so we know all
     250                # the element id's within the page for internal page links.
     251                    for k, v in attrs:
     252                        if k == "id":
     253                            self.element_ids.append(v)
     254
     255        p = AnchorParser()
     256        p.feed(response.content)
     257        p.close()
     258       
     259        # Check the internal links first:
     260        for link, (lineno, offset) in p.hrefs_internal:
     261            self.failIf(
     262                ''==link,
     263                (u"The page contains a link with an empty href on line %(lineno)d.") % {
     264                    'page': 'pagename',
     265                    'lineno': lineno,
     266                    'response': response                                                                   
     267                }
     268            )
     269               
     270            link_response = response.client.get(link)
     271            self.failUnless(
     272                link_response.status_code in non_broken_status_codes,
     273                (u"The link '%(link)s' on line %(lineno)d appears to be broken (status is %(status)s)") % {
     274                    'link': link,
     275                    'lineno': lineno,
     276                    'status': link_response.status_code                                                       
     277                }           
     278            )
     279
     280        # Next, check the internal page links:
     281        for id, (lineno, offset) in p.interal_page_link_ids:
     282            # If the id wasn't blank (ie. <a href="#"> then make sure that there
     283            # was an element with the same id on the page somewhere.
     284            if id:
     285                self.failUnless(
     286                    id in p.element_ids,
     287                    (
     288                        u"The internal link to #%(id)s on line %(lineno)d does"
     289                        u" not link to a corresponding element with an "
     290                        u"id=\"%(id)s\"." % {
     291                            'id': id,
     292                            'lineno': lineno                   
     293                        }
     294                    )
     295                )
     296           
     297        # Then check the external links
     298        if not internal_only:
     299            import urllib2
     300            from django.conf import settings
     301
     302            headers = {
     303                "Accept": "text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5",
     304                "Accept-Language": "en-us,en;q=0.5",
     305                "Accept-Charset": "ISO-8859-1,utf-8;q=0.7,*;q=0.7",
     306                "Connection": "close",
     307                "User-Agent": settings.URL_VALIDATOR_USER_AGENT,
     308            }
     309
     310            for link, (lineno, offset) in p.hrefs_external:
     311                try:
     312                    req = urllib2.Request(link, None, headers)
     313                    u = urllib2.urlopen(req)
     314                except ValueError:
     315                    self.fail(
     316                        u"The link '%(link)s' on line %(lineno)d appears to be invalid." % {
     317                            'link': link,
     318                            'lineno': lineno
     319                        }
     320                    )
     321                except: # urllib2.URLError, httplib.InvalidURL, etc.
     322                    self.fail(u"The link '%(link)s' on line %(lineno)d appears to be broken." % {
     323                            'link': link,
     324                            'lineno': lineno
     325                        }
     326                    )
  • tests/regressiontests/test_client_regress/views.py

     
    11from django.contrib.auth.decorators import login_required
    22from django.core.mail import EmailMessage, SMTPConnection
    3 from django.http import HttpResponse, HttpResponseRedirect, HttpResponseServerError
     3from django.http import HttpResponse, HttpResponseRedirect, HttpResponseServerError, HttpResponseNotFound
    44from django.shortcuts import render_to_response
    55
    66def no_template_view(request):
     
    2727def login_protected_redirect_view(request):
    2828    "A view that redirects all requests to the GET view"
    2929    return HttpResponseRedirect('/test_client_regress/get_view/')
    30 login_protected_redirect_view = login_required(login_protected_redirect_view)
    31  No newline at end of file
     30login_protected_redirect_view = login_required(login_protected_redirect_view)
     31
     32def no_broken_links_view(request):
     33    return HttpResponse(
     34    """
     35    Lots of html stuff, including a few local links:
     36      <a href="/test_client_regress/get_view/">here</a>
     37      <a href="/test_client_regress/file_upload">This should redirect to add the /</a>
     38      <a some="attribute" href="/test_client_regress/no_template_view/">
     39        a redirect view. Doesn't matter if closing tag missing.
     40       
     41    Some external links:
     42      <a href="http://djangoproject.com/weblog/">Django blog</a>
     43
     44    Some anchor links should be ignored, such as:
     45      <a href="mailto:me@example.com">Send me an email</a>, or
     46      <a href="ftp://example.com">Download from here</a>
     47   
     48    Including a <a lats of bad="stuff" href="http://djangoproject.com">Missing slash</a>
     49    Whole bunch of other stuff before the page ends.
     50    """                                     
     51    )
     52   
     53def broken_external_link_view(request):
     54    return HttpResponse(
     55    """
     56    Lots of html stuff, including a few local links:
     57      <a href="/test_client_regress/get_view/">here</a>
     58      <a href="/test_client_regress/file_upload">This should redirect to add the /</a>
     59      <a some="attribute" href="/test_client_regress/no_template_view/">
     60        a redirect view. Doesn't matter if closing tag missing.
     61       
     62    Some external links:
     63      <a class="this one's fine" href="http://djangoproject.com/weblog">Django blog</a>
     64    But this one's a
     65      <a href="http://djangoproject.com/badlink.html">Broken link</a>
     66    Whole bunch of other stuff before the page ends.
     67    """                                     
     68    )
     69   
     70def broken_internal_link_view(request):
     71    return HttpResponse(
     72    """
     73    Lots of html stuff, including a few local links:
     74
     75      <a class="test" href="/test_client_regress/broken_view/">A broken view</a>
     76       
     77    """                                     
     78    )
     79
     80def bad_internal_link_view(request):
     81    return HttpResponse(
     82    """
     83    Lots of html stuff, including a few local links:
     84
     85      <a class="test" href="/test_client_regress/bad_view/">A bad view</a>
     86   
     87    """                                     
     88    )
     89   
     90def invalid_external_link_view(request):
     91    return HttpResponse(
     92    """
     93    Lots of html stuff, including a few local links:
     94
     95      <a class="test" href="http://djangoproject&.com">An invalid link</a>
     96       
     97    """                                     
     98    )
     99
     100def blank_link_view(request):
     101    return HttpResponse(
     102    """
     103    If a link uses the url template tag to create the link for the href like
     104    this:
     105     href="{% url my-url-name arg %}"
     106    and fails, it will end up with blank href="", this would be useful to
     107    catch!
     108
     109      <a class="test" href="">A blank link.</a>
     110       
     111    """                                     
     112    )
     113   
     114def internal_page_link_view(request):
     115    return HttpResponse(
     116    """
     117    A link which is just internal to the page, href="#content" needs to
     118    have a matching element with an id="content" on the page.
     119
     120      <a class="test" href="#content">This one should be fine</a>
     121     
     122      <a href="#">This one should be ignored (lots of JS uses)</a>
     123     
     124      <a href="#footer">But this one isn't valid</a> as there's no corresponding
     125      element with the id="footer"
     126     
     127      <div id="content">
     128        Here's the content
     129      </div>
     130    """                                     
     131    )
     132   
     133def broken_view(request):
     134    return HttpResponseServerError()
     135
     136def bad_view(request):
     137    return HttpResponseNotFound()
  • tests/regressiontests/test_client_regress/models.py

     
    233233        except AssertionError, e:
    234234            self.assertEqual(str(e), "The form 'form' in context 0 does not contain the non-field error 'Some error.' (actual errors: )")       
    235235
     236class AssertNoBrokenLinksTests(TestCase):
     237    def test_no_broken_links(self):
     238        "Tests that assertion confirms internal and external non-broken links."
     239       
     240        response = self.client.get('/test_client_regress/no_broken_links_view/')
     241        self.assertEqual(response.status_code, 200)
     242               
     243        self.assertNoBrokenLinks(response, internal_only=False)
     244       
     245    def test_broken_external_link(self):
     246        "Tests that assertion finds broken external links"
     247
     248        response = self.client.get('/test_client_regress/broken_external_link_view/')
     249        self.assertEqual(response.status_code, 200)
     250       
     251        # No internal broken links:
     252        self.assertNoBrokenLinks(response)
     253
     254        # But there is an external broken link:
     255        assertion_raised=False
     256        try:
     257            self.assertNoBrokenLinks(response, internal_only=False)
     258        except AssertionError, e:
     259            assertion_raised = True # Should always get here
     260            self.assertEqual(
     261                str(e),
     262                "The link 'http://djangoproject.com/badlink.html' on line 11"
     263                " appears to be broken."
     264            )
     265           
     266        self.assertTrue(assertion_raised)
     267
     268    def test_invalid_external_link(self):
     269        "Tests that assertion finds invalid external links"
     270
     271        response = self.client.get('/test_client_regress/invalid_external_link_view/')
     272        self.assertEqual(response.status_code, 200)
     273       
     274        assertion_raised=False
     275        try:
     276            self.assertNoBrokenLinks(response, internal_only=False)
     277        except AssertionError, e:
     278            assertion_raised = True # Should always get here
     279            self.assertEqual(
     280                str(e),
     281                "The link 'http://djangoproject&.com' on line 4 appears"
     282                " to be broken."
     283            )
     284           
     285        self.assertTrue(assertion_raised)
     286       
     287    def test_broken_internal_link(self):
     288        "Tests that assertion finds broken internal links"
     289
     290        response = self.client.get('/test_client_regress/broken_internal_link_view/')
     291        self.assertEqual(response.status_code, 200)
     292       
     293        assertion_raised=False
     294        try:
     295            self.assertNoBrokenLinks(response)
     296        except AssertionError, e:
     297            assertion_raised = True # Should always get here
     298            self.assertEqual(
     299                str(e),
     300                "The link '/test_client_regress/broken_view/' on line 4"
     301                " appears to be broken (status is 500)"
     302            )
     303           
     304        self.assertTrue(assertion_raised)
     305       
     306    def test_bad_internal_link(self):
     307        "Tests that assertion finds bad internal links"
     308
     309        response = self.client.get('/test_client_regress/bad_internal_link_view/')
     310        self.assertEqual(response.status_code, 200)
     311       
     312        assertion_raised=False
     313        try:
     314            self.assertNoBrokenLinks(response)
     315        except AssertionError, e:
     316            assertion_raised = True # Should always get here
     317            self.assertEqual(
     318                str(e),
     319                "The link '/test_client_regress/bad_view/' on line 4 appears to"
     320                " be broken (status is 404)"
     321            )
     322           
     323        self.assertTrue(assertion_raised)
     324
     325    def test_blank_link(self):
     326        "Tests that links with blank hrefs are identified appropriately"
     327       
     328        response = self.client.get('/test_client_regress/blank_link_view/')
     329        self.assertEqual(response.status_code, 200)
     330       
     331        assertion_raised=False
     332        try:
     333            self.assertNoBrokenLinks(response)
     334        except AssertionError, e:
     335            assertion_raised = True # Should always get here
     336            self.assertEqual(
     337                str(e),
     338                "The page contains a link with an empty href on line 8."
     339            )
     340           
     341        self.assertTrue(assertion_raised)
     342   
     343    def test_internal_page_link(self):
     344        "Tests that internal page links are valid"
     345       
     346        response = self.client.get('/test_client_regress/internal_page_link_view/')
     347        self.assertEqual(response.status_code, 200)
     348       
     349        assertion_raised=False
     350        try:
     351            self.assertNoBrokenLinks(response)
     352        except AssertionError, e:
     353            assertion_raised = True # Should always get here
     354            self.assertEqual(
     355                str(e),
     356                "The internal link to #footer on line 9 does not link to a"
     357                " corresponding element with an id=\"footer\"."
     358            )
     359           
     360        self.assertTrue(assertion_raised)
     361       
    236362class FileUploadTests(TestCase):
    237363    def test_simple_upload(self):
    238364        fd = open(os.path.join(os.path.dirname(__file__), "views.py"))
  • tests/regressiontests/test_client_regress/urls.py

     
    55    (r'^no_template_view/$', views.no_template_view),
    66    (r'^file_upload/$', views.file_upload_view),
    77    (r'^get_view/$', views.get_view),
    8     (r'^login_protected_redirect_view/$', views.login_protected_redirect_view)
     8    (r'^login_protected_redirect_view/$', views.login_protected_redirect_view),
     9    # All the following urls are for the assertNoBrokenLinks feature:
     10    (r'^no_broken_links_view/$', views.no_broken_links_view),
     11    (r'^broken_external_link_view/$', views.broken_external_link_view),
     12    (r'^broken_internal_link_view/$', views.broken_internal_link_view),
     13    (r'^bad_internal_link_view/$', views.bad_internal_link_view),
     14    (r'^invalid_external_link_view/$', views.invalid_external_link_view),
     15    (r'^blank_link_view/$', views.blank_link_view),
     16    (r'^internal_page_link_view/$', views.internal_page_link_view),
     17    (r'^broken_view/$', views.broken_view),
     18    (r'^bad_view/$', views.bad_view)
    919)
  • AUTHORS

     
    210210    Jason McBrayer <http://www.carcosa.net/jason/>
    211211    mccutchen@gmail.com
    212212    michael.mcewan@gmail.com
     213    Michael Nelson <http://liveandletlearn.net/>
    213214    mikko@sorl.net
    214215    Slawek Mikula <slawek dot mikula at gmail dot com>
    215216    mitakummaa@gmail.com
  • docs/testing.txt

     
    846846
    847847    The name is a string such as ``'admin/index.html'``.
    848848
     849``assertNoBrokenLinks(response, internal_only=True)``
     850    Asserts that all the anchor links within the response are not broken (ie.
     851    result in a status of 200 or a redirect). By default only links internal to
     852    the site will be checked (ie. those not beginning with http:// or https://).
     853   
     854    Internal page links such as <a href="#content"> are checked to ensure
     855    that they are not broken (ie. that an element with the id="content" exists
     856    on the page).
     857   
     858    Blank links, such as <a href=""> are also identified, which is helpful
     859    to check when the url tag in <a href="{% url my-url-name arg1 %}"> fails.
     860   
     861    Note: As this assertion effectively clicks on all the links within the
     862    response, care needs to be taken if any link has a side effect (such as
     863    modifying your database).
     864
    849865E-mail services
    850866---------------
    851867
Back to Top