$include_dir="/home/hyper-archives/boost-commit/include"; include("$include_dir/msg-header.inc") ?>
Subject: [Boost-commit] svn:boost r49598 - branches/release/tools/inspect
From: daniel_james_at_[hidden]
Date: 2008-11-05 08:47:38
Author: danieljames
Date: 2008-11-05 08:47:38 EST (Wed, 05 Nov 2008)
New Revision: 49598
URL: http://svn.boost.org/trac/boost/changeset/49598
Log:
Merge inspect from trunk, up to version 49597
Text files modified: 
   branches/release/tools/inspect/inspect.cpp         |    26 +++++--                                 
   branches/release/tools/inspect/link_check.cpp      |   128 ++++++++++++++++++++++++++++----------- 
   branches/release/tools/inspect/link_check.hpp      |     2                                         
   branches/release/tools/inspect/path_name_check.cpp |     1                                         
   4 files changed, 113 insertions(+), 44 deletions(-)
Modified: branches/release/tools/inspect/inspect.cpp
==============================================================================
--- branches/release/tools/inspect/inspect.cpp	(original)
+++ branches/release/tools/inspect/inspect.cpp	2008-11-05 08:47:38 EST (Wed, 05 Nov 2008)
@@ -104,13 +104,7 @@
 
 //  get info (as a string) if inspect_root is svn working copy  --------------//
 
-  string info( const fs::path & inspect_root )
-  {
-    string rev;
-    string repos;
-    fs::path entries( inspect_root / ".svn" / "entries" );
-    fs::ifstream entries_file( entries );
-    if ( entries_file )
+  void extract_info( fs::ifstream & entries_file, string & rev, string & repos )
     {
       std::getline( entries_file, rev );
       std::getline( entries_file, rev );
@@ -118,6 +112,22 @@
       std::getline( entries_file, rev );    // revision number as a string
       std::getline( entries_file, repos );  // repository as a string
     }
+
+  string info( const fs::path & inspect_root )
+  {
+    string rev( "?" );
+    string repos( "unknown" );
+    fs::path entries( inspect_root / ".svn" / "entries" );
+    fs::ifstream entries_file( entries );
+    if ( entries_file )
+      extract_info( entries_file, rev, repos );
+    else
+    {
+      entries = inspect_root / ".." / "svn_info" / ".svn" / "entries";
+      fs::ifstream entries_file( entries );
+      if ( entries_file )
+        extract_info( entries_file, rev, repos );
+    }
     return repos + " at revision " + rev;
   }
 
@@ -146,6 +156,8 @@
       && leaf != ".htaccess"
       // ignore svn files:
       && leaf != ".svn"
+      // ignore OS X directory info files:
+      && leaf != ".DS_Store"
       ;
   }
 
Modified: branches/release/tools/inspect/link_check.cpp
==============================================================================
--- branches/release/tools/inspect/link_check.cpp	(original)
+++ branches/release/tools/inspect/link_check.cpp	2008-11-05 08:47:38 EST (Wed, 05 Nov 2008)
@@ -15,10 +15,13 @@
 
 namespace
 {
-  boost::regex url_regex(
+  boost::regex html_url_regex(
     "<\\s*[^>]*\\s+(?:HREF|SRC)" // HREF or SRC
     "\\s*=\\s*(['\"])(.*?)\\1",
     boost::regbase::normal | boost::regbase::icase);
+  boost::regex css_url_regex(
+    "(\\@import\\s*[\"']|url\\s*\\(\\s*[\"']?)([^\"')]*)",
+    boost::regbase::normal | boost::regbase::icase);
 
   // Regular expression for parsing URLS from:
   // http://tools.ietf.org/html/rfc3986#appendix-B
@@ -26,15 +29,36 @@
     "^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?$",
     boost::regbase::normal);
 
-  // Decode percent encoded characters and html escapsed ampersands,
-  // returns an empty string if there's an error.
-  // The urls should really be fully HTML decoded at the beginning.
-  std::string decode_url(std::string const& url_path) {
+  // Decode html escapsed ampersands, returns an empty string if there's an error.
+  std::string decode_ampersands(std::string const& url_path) {
+    std::string::size_type pos = 0, next;
+    std::string result;
+    result.reserve(url_path.length());
+
+    while((next = url_path.find('&', pos)) != std::string::npos) {
+      result.append(url_path, pos, next - pos);
+      pos = next;
+      if(url_path.substr(pos, 5) == "&") {
+        result += '&'; pos += 5;
+      }
+      else {
+        result += '&'; pos += 1;
+      }
+      break;
+    }
+
+    result.append(url_path, pos, url_path.length());
+
+    return result;
+  }
+
+  // Decode percent encoded characters, returns an empty string if there's an error.
+  std::string decode_percents(std::string const& url_path) {
     std::string::size_type pos = 0, next;
     std::string result;
     result.reserve(url_path.length());
 
-    while((next = url_path.find_first_of("&%", pos)) != std::string::npos) {
+    while((next = url_path.find('%', pos)) != std::string::npos) {
       result.append(url_path, pos, next - pos);
       pos = next;
       switch(url_path[pos]) {
@@ -47,15 +71,6 @@
           pos = next + 3;
           break;
         }
-        case '&': {
-          if(url_path.substr(pos, 5) == "&") {
-            result += '&'; pos += 5;
-          }
-          else {
-            result += '&'; pos += 1;
-          }
-          break;
-        }
       }
     }
 
@@ -64,6 +79,10 @@
     return result;
   }
 
+  bool is_css(const path & p) {
+      return p.extension() == ".css";
+  }
+
 } // unnamed namespace
 
 namespace boost
@@ -77,6 +96,9 @@
      : m_broken_errors(0), m_unlinked_errors(0), m_invalid_errors(0),
        m_bookmark_errors(0)
    {
+       // HTML signatures are already registered by the base class,
+       // 'hypertext_inspector' 
+       register_signature(".css");
    }
 
 //  inspect (all)  -----------------------------------------------------------//
@@ -90,7 +112,7 @@
         m_paths[ relative_to( full_path, fs::initial_path() ) ] |= m_present;
     }
 
-//  inspect ( .htm, .html )  -------------------------------------------------//
+//  inspect ( .htm, .html, .shtml, .css )  -----------------------------------//
 
    void link_check::inspect(
       const string & library_name,
@@ -108,6 +130,9 @@
       boost::match_results< string::const_iterator > what;
       boost::match_flag_type flags = boost::match_default;
 
+      boost::regex const& url_regex =
+          is_css(full_path) ? css_url_regex : html_url_regex;
+
       while( boost::regex_search( start, end, what, url_regex, flags) )
       {
         // what[0] contains the whole string iterators.
@@ -127,11 +152,27 @@
       const path & source_path, bool no_link_errors )
         // precondition: source_path.is_complete()
     {
+      if(!no_link_errors && url.empty()) {
+        ++m_invalid_errors;
+        error( library_name, source_path, string(name()) + " empty URL." );
+        return;
+      }
+
+      // Decode ampersand encoded characters.
+      string decoded_url = is_css(source_path) ? url : decode_ampersands(url);
+      if(decoded_url.empty()) {
+        if(!no_link_errors) {
+          ++m_invalid_errors;
+          error( library_name, source_path, string(name()) + " invalid URL (invalid ampersand encodings): " + url );
+        }
+        return;
+      }
+    
       boost::smatch m;
-      if(!boost::regex_match(url, m, url_decompose_regex)) {
+      if(!boost::regex_match(decoded_url, m, url_decompose_regex)) {
         if(!no_link_errors) {
           ++m_invalid_errors;
-          error( library_name, source_path, string(name()) + " invalid URL: " + url );
+          error( library_name, source_path, string(name()) + " invalid URL: " + decoded_url );
         }
         return;
       }
@@ -156,7 +197,7 @@
           if(!authority_matched) {
             if(!no_link_errors) {
               ++m_invalid_errors;
-              error( library_name, source_path, string(name()) + " no hostname: " + url );
+              error( library_name, source_path, string(name()) + " no hostname: " + decoded_url );
             }
           }
 
@@ -165,13 +206,19 @@
         else if(scheme == "file") {
           if(!no_link_errors) {
             ++m_invalid_errors;
-            error( library_name, source_path, string(name()) + " invalid URL (hardwired file): " + url );
+            error( library_name, source_path, string(name()) + " invalid URL (hardwired file): " + decoded_url );
+          }
+        }
+        else if(scheme == "mailto" || scheme == "ftp" || scheme == "news" || scheme == "javascript") {
+          if ( !no_link_errors && is_css(source_path) ) {
+            ++m_invalid_errors;
+            error( library_name, source_path, string(name()) + " invalid protocol for css: " + decoded_url );
           }
         }
-        else if(!(scheme == "mailto" || scheme == "ftp" || scheme == "news" || scheme == "javascript")) {
+        else {
           if(!no_link_errors) {
             ++m_invalid_errors;
-            error( library_name, source_path, string(name()) + " unknown protocol: " + url );
+            error( library_name, source_path, string(name()) + " unknown protocol: " + decoded_url );
           }
         }
 
@@ -182,16 +229,24 @@
       if(authority_matched) {
         if(!no_link_errors) {
           ++m_invalid_errors;
-          error( library_name, source_path, string(name()) + " invalid URL (hostname without protocol): " + url );
+          error( library_name, source_path, string(name()) + " invalid URL (hostname without protocol): " + decoded_url );
         }
       }
 
       // Check the fragment identifier
-      if(fragment_matched) {
-        if ( !no_link_errors && fragment.find( '#' ) != string::npos )
-        {
-          ++m_bookmark_errors;
-          error( library_name, source_path, string(name()) + " invalid bookmark: " + url );
+      if ( fragment_matched ) {
+        if ( is_css(source_path) ) {
+            if ( !no_link_errors ) {
+              ++m_invalid_errors;
+              error( library_name, source_path, string(name()) + " fragment link in CSS: " + decoded_url );
+            }
+        }
+        else {
+          if ( !no_link_errors && fragment.find( '#' ) != string::npos )
+          {
+            ++m_bookmark_errors;
+            error( library_name, source_path, string(name()) + " invalid bookmark: " + decoded_url );
+          }
         }
 
         // No more to do if it's just a fragment identifier
@@ -199,26 +254,26 @@
       }
 
       // Detect characters banned by RFC2396:
-      if ( !no_link_errors && url.find_first_of( " <>\"{}|\\^[]'" ) != string::npos )
+      if ( !no_link_errors && decoded_url.find_first_of( " <>\"{}|\\^[]'" ) != string::npos )
       {
         ++m_invalid_errors;
-        error( library_name, source_path, string(name()) + " invalid character in URL: " + url );
+        error( library_name, source_path, string(name()) + " invalid character in URL: " + decoded_url );
       }
 
       // Check that we actually have a path.
       if(url_path.empty()) {
         if(!no_link_errors) {
           ++m_invalid_errors;
-          error( library_name, source_path, string(name()) + " invalid URL (empty path in relative url): " + url );
+          error( library_name, source_path, string(name()) + " invalid URL (empty path in relative url): " + decoded_url );
         }
       }
 
       // Decode percent and ampersand encoded characters.
-      string decoded_path = decode_url(url_path);
+      string decoded_path = decode_percents(url_path);
       if(decoded_path.empty()) {
         if(!no_link_errors) {
           ++m_invalid_errors;
-          error( library_name, source_path, string(name()) + " invalid URL (invalid character encodings): " + url );
+          error( library_name, source_path, string(name()) + " invalid URL (invalid character encodings): " + decoded_url );
         }
         return;
       }
@@ -234,7 +289,7 @@
       {
         if(!no_link_errors) {
           ++m_invalid_errors;
-          error( library_name, source_path, string(name()) + " invalid URL (error resolving path): " + url );
+          error( library_name, source_path, string(name()) + " invalid URL (error resolving path): " + decoded_url );
         }
         return;
       }
@@ -256,7 +311,7 @@
       if ( !no_link_errors && (itr->second & m_present) == 0 )
       {
         ++m_broken_errors;
-        error( library_name, source_path, string(name()) + " broken link: " + url );
+        error( library_name, source_path, string(name()) + " broken link: " + decoded_url );
       }
     }
 
@@ -271,7 +326,8 @@
        if ( (itr->second & m_linked_to) != m_linked_to
          && (itr->second & m_nounlinked_errors) != m_nounlinked_errors
          && (itr->first.rfind( ".html" ) == itr->first.size()-5
-          || itr->first.rfind( ".htm" ) == itr->first.size()-4)
+          || itr->first.rfind( ".htm" ) == itr->first.size()-4
+          || itr->first.rfind( ".css" ) == itr->first.size()-4)
          // because they may be redirectors, it is OK if these are unlinked:
          && itr->first.rfind( "index.html" ) == string::npos
          && itr->first.rfind( "index.htm" ) == string::npos )
Modified: branches/release/tools/inspect/link_check.hpp
==============================================================================
--- branches/release/tools/inspect/link_check.hpp	(original)
+++ branches/release/tools/inspect/link_check.hpp	2008-11-05 08:47:38 EST (Wed, 05 Nov 2008)
@@ -36,7 +36,7 @@
     public:
 
       link_check();
-      virtual const char * name() const { return "*HTML*"; }
+      virtual const char * name() const { return "*LINK*"; }
       virtual const char * desc() const { return "invalid bookmarks, invalid urls, broken links, unlinked files"; }
 
       virtual void inspect(
Modified: branches/release/tools/inspect/path_name_check.cpp
==============================================================================
--- branches/release/tools/inspect/path_name_check.cpp	(original)
+++ branches/release/tools/inspect/path_name_check.cpp	2008-11-05 08:47:38 EST (Wed, 05 Nov 2008)
@@ -15,6 +15,7 @@
 #include <string>
 #include <algorithm>
 #include <cctype>
+#include <cstring>
 
 using std::string;