$include_dir="/home/hyper-archives/boost-commit/include"; include("$include_dir/msg-header.inc") ?>
From: daniel_james_at_[hidden]
Date: 2008-03-21 12:56:36
Author: danieljames
Date: 2008-03-21 12:56:35 EDT (Fri, 21 Mar 2008)
New Revision: 43752
URL: http://svn.boost.org/trac/boost/changeset/43752
Log:
Decode percent coded characters and '&' when inspecting urls.
Text files modified: 
   trunk/tools/inspect/link_check.cpp |    52 ++++++++++++++++++++++++++++++++++++++- 
   1 files changed, 50 insertions(+), 2 deletions(-)
Modified: trunk/tools/inspect/link_check.cpp
==============================================================================
--- trunk/tools/inspect/link_check.cpp	(original)
+++ trunk/tools/inspect/link_check.cpp	2008-03-21 12:56:35 EDT (Fri, 21 Mar 2008)
@@ -9,6 +9,7 @@
 #include "link_check.hpp"
 #include "boost/regex.hpp"
 #include "boost/filesystem/operations.hpp"
+#include <cstdlib>
 
 namespace fs = boost::filesystem;
 
@@ -19,6 +20,44 @@
     "\\s*=\\s*(['\"])(.*?)\\1",
     boost::regbase::normal | boost::regbase::icase);
 
+  // Decode percent encoded characters and html escapsed ampersands,
+  // returns an empty string if there's an error.
+  // The urls should really be fully HTML decoded at the beginning.
+  std::string decode_url(std::string const& path) {
+    std::string::size_type pos = 0, next;
+    std::string result;
+    result.reserve(path.length());
+
+    while((next = path.find_first_of("&%", pos)) != std::string::npos) {
+      result.append(path, pos, next - pos);
+      pos = next;
+      switch(path[pos]) {
+        case '%': {
+          if(path.length() - next < 3) return "";
+          char hex[3] = { path[next + 1], path[next + 2], '\0' };
+          char* end_ptr;
+          result += (char) std::strtol(hex, &end_ptr, 16);
+          if(*end_ptr) return "";
+          pos = next + 3;
+          break;
+        }
+        case '&': {
+          if(path.substr(pos, 5) == "&") {
+            result += '&'; pos += 5;
+          }
+          else {
+            result += '&'; pos += 1;
+          }
+          break;
+        }
+      }
+    }
+
+    result.append(path, pos, path.length());
+
+    return result;
+  }
+
 } // unnamed namespace
 
 namespace boost
@@ -121,13 +160,22 @@
         }
       }
 
+      string decoded_url = decode_url(plain_url);
+      if(decoded_url.empty()) {
+        if(!no_link_errors) {
+          ++m_invalid_errors;
+          error( library_name, source_path, string(name()) + " invalid URL: " + url );
+        }
+        return;
+      }
+
       // strip url of references to current dir
-      if ( plain_url[0]=='.' && plain_url[1]=='/' ) plain_url.erase( 0, 2 );
+      if ( decoded_url[0]=='.' && decoded_url[1]=='/' ) decoded_url.erase( 0, 2 );
 
       // url is relative source_path.branch()
       // convert to target_path, which is_complete()
       path target_path;
-      try { target_path = source_path.branch_path() /= path( plain_url, fs::no_check ); }
+      try { target_path = source_path.branch_path() /= path( decoded_url, fs::no_check ); }
       catch ( const fs::filesystem_error & )
       {
         if(!no_link_errors) {