$include_dir="/home/hyper-archives/boost-commit/include"; include("$include_dir/msg-header.inc") ?>
Subject: [Boost-commit] svn:boost r52664 - trunk/tools/inspect
From: daniel_james_at_[hidden]
Date: 2009-04-29 17:12:32
Author: danieljames
Date: 2009-04-29 17:12:32 EDT (Wed, 29 Apr 2009)
New Revision: 52664
URL: http://svn.boost.org/trac/boost/changeset/52664
Log:
Check for external links.
Text files modified: 
   trunk/tools/inspect/link_check.cpp |    32 ++++++++++++++++++++++++--------        
   trunk/tools/inspect/link_check.hpp |     3 ++-                                     
   2 files changed, 26 insertions(+), 9 deletions(-)
Modified: trunk/tools/inspect/link_check.cpp
==============================================================================
--- trunk/tools/inspect/link_check.cpp	(original)
+++ trunk/tools/inspect/link_check.cpp	2009-04-29 17:12:32 EDT (Wed, 29 Apr 2009)
@@ -9,6 +9,7 @@
 #include "link_check.hpp"
 #include "boost/regex.hpp"
 #include "boost/filesystem/operations.hpp"
+#include <boost/algorithm/string/case_conv.hpp>
 #include <cstdlib>
 
 namespace fs = boost::filesystem;
@@ -16,8 +17,8 @@
 namespace
 {
   boost::regex html_url_regex(
-    "<\\s*[^>]*\\s+(?:HREF|SRC)" // HREF or SRC
-    "\\s*=\\s*(['\"])(.*?)\\1",
+    "<([^\\s<>]*)\\s*[^<>]*\\s+(?:HREF|SRC)" // HREF or SRC
+    "\\s*=\\s*(['\"])(.*?)\\2",
     boost::regbase::normal | boost::regbase::icase);
   boost::regex css_url_regex(
     "(\\@import\\s*[\"']|url\\s*\\(\\s*[\"']?)([^\"')]*)",
@@ -130,14 +131,21 @@
       boost::match_results< string::const_iterator > what;
       boost::match_flag_type flags = boost::match_default;
 
-      if(is_css(full_path))
+      if(!is_css(full_path))
       {
         while( boost::regex_search( start, end, what, html_url_regex, flags) )
         {
           // what[0] contains the whole string iterators.
-          // what[2] contains the URL iterators.
-          do_url( string( what[2].first, what[2].second ),
-            library_name, full_path, no_link_errors );
+          // what[1] contains the element type iterators.
+          // what[3] contains the URL iterators.
+
+          string type( what[1].first, what[1].second );
+          boost::algorithm::to_lower(type);
+
+          // TODO: Complain if 'link' tags use external stylesheets.
+          do_url( string( what[3].first, what[3].second ),
+            library_name, full_path, no_link_errors,
+            type == "a" || type == "link" );
 
           start = what[0].second; // update search position
           flags |= boost::match_prev_avail; // update flags
@@ -150,7 +158,7 @@
         // what[0] contains the whole string iterators.
         // what[2] contains the URL iterators.
         do_url( string( what[2].first, what[2].second ),
-          library_name, full_path, no_link_errors );
+          library_name, full_path, no_link_errors, false );
 
         start = what[0].second; // update search position
         flags |= boost::match_prev_avail; // update flags
@@ -161,7 +169,7 @@
 //  do_url  ------------------------------------------------------------------//
 
     void link_check::do_url( const string & url, const string & library_name,
-      const path & source_path, bool no_link_errors )
+      const path & source_path, bool no_link_errors, bool allow_external_links )
         // precondition: source_path.is_complete()
     {
       if(!no_link_errors && url.empty()) {
@@ -200,6 +208,14 @@
         //query(m[7]),
         fragment(m[9]);
 
+      // Check for external links
+      if(!allow_external_links && (authority_matched || scheme_matched)) {
+        if(!no_link_errors) {
+          ++m_invalid_errors;
+          error( library_name, source_path, string(name()) + " invalid external link: " + decoded_url );
+        }
+      }
+
       // Protocol checks
       if(scheme_matched) {
         if(scheme == "http" || scheme == "https") {
Modified: trunk/tools/inspect/link_check.hpp
==============================================================================
--- trunk/tools/inspect/link_check.hpp	(original)
+++ trunk/tools/inspect/link_check.hpp	2009-04-29 17:12:32 EDT (Wed, 29 Apr 2009)
@@ -32,7 +32,8 @@
       m_path_map m_paths; // first() is relative initial_path()
 
       void do_url( const string & url, const string & library_name,
-        const path & full_source_path, bool no_link_errors );
+        const path & full_source_path, bool no_link_errors,
+        bool allow_external_links );
     public:
 
       link_check();