$include_dir="/home/hyper-archives/boost-commit/include"; include("$include_dir/msg-header.inc") ?>
Subject: [Boost-commit] svn:boost r53807 - in trunk/libs/spirit: doc doc/lex example/lex
From: hartmut.kaiser_at_[hidden]
Date: 2009-06-11 22:40:09
Author: hkaiser
Date: 2009-06-11 22:40:08 EDT (Thu, 11 Jun 2009)
New Revision: 53807
URL: http://svn.boost.org/trac/boost/changeset/53807
Log:
Spirit: some documentation work
Text files modified: 
   trunk/libs/spirit/doc/karma.qbk                    |     2                                         
   trunk/libs/spirit/doc/lex/lexer_quickstart2.qbk    |    54 ++++++++++++++++++++++++++-----------   
   trunk/libs/spirit/doc/lex/tokenizing.qbk           |    57 ++++++++++++++++++++++++++++++++++++++- 
   trunk/libs/spirit/doc/preface.qbk                  |     2                                         
   trunk/libs/spirit/doc/qi.qbk                       |     2                                         
   trunk/libs/spirit/doc/spirit2.qbk                  |     6 ++-                                     
   trunk/libs/spirit/example/lex/word_count.cpp       |    18 ++++++------                            
   trunk/libs/spirit/example/lex/word_count_lexer.cpp |    47 ++++++++++++++++++--------------        
   8 files changed, 135 insertions(+), 53 deletions(-)
Modified: trunk/libs/spirit/doc/karma.qbk
==============================================================================
--- trunk/libs/spirit/doc/karma.qbk	(original)
+++ trunk/libs/spirit/doc/karma.qbk	2009-06-11 22:40:08 EDT (Thu, 11 Jun 2009)
@@ -6,7 +6,7 @@
     file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 ===============================================================================/]
 
-[section Karma]
+[section:karma Karma]
 
 [/section    Tutorials]
 [/endsect]
Modified: trunk/libs/spirit/doc/lex/lexer_quickstart2.qbk
==============================================================================
--- trunk/libs/spirit/doc/lex/lexer_quickstart2.qbk	(original)
+++ trunk/libs/spirit/doc/lex/lexer_quickstart2.qbk	2009-06-11 22:40:08 EDT (Thu, 11 Jun 2009)
@@ -62,22 +62,35 @@
 __phoenix2__, but it is possible to insert any C++ function or function object 
 as long as it exposes the interface:
 
-    void f (Range r, Idtype id, bool& matched, Context& ctx);
+    void f (Iterator& start, Iterator& end, pass_flag& matched, Idtype& id, Context& ctx);
 
 [variablelist where:
-    [[`Range r`]            [This is a `boost::iterator_range` holding two 
-                             iterators pointing to the matched range in the
-                             underlying input sequence. The type of the
-                             held iterators is the same as specified while
+    [[`Iterator& start`]    [This is a the iterator pointing to the begin of the 
+                             matched range in the underlying input sequence. The 
+                             type of the iterator is the same as specified while
                              defining the type of the `lexertl_lexer<...>` 
-                             (its first template parameter).]]
-    [[`Idtype id`]          [This is the token id of the type `std::size_t` 
-                             for the matched token.]]
-    [[`bool& matched`]      [This boolean value is pre/initialized to `true`.
-                             If the functor sets it to `false` the lexer
-                             stops calling any semantic actions attached to 
-                             this token and behaves as if the token has not
-                             been matched in the first place.]]
+                             (its first template parameter). The semantic action 
+                             is allowed to change the value of this iterator
+                             influencing, the matched input sequence.]]
+    [[`Iterator& end`]      [This is a the iterator pointing to the end of the 
+                             matched range in the underlying input sequence. The 
+                             type of the iterator is the same as specified while
+                             defining the type of the `lexertl_lexer<...>` 
+                             (its first template parameter). The semantic action 
+                             is allowed to change the value of this iterator
+                             influencing, the matched input sequence.]]
+    [[`pass_flag& matched`] [This value is pre/initialized to `pass_normal`.
+                             If the semantic action sets it to `pass_fail` the 
+                             behaves as if the token has not been matched in 
+                             the first place. If the semantic action sets this
+                             to `pass_ignore` the lexer ignores the current
+                             token and tries to match a next token from the
+                             input.]]
+    [[`Idtype& id`]         [This is the token id of the type Idtype (most of 
+                             the time this will be a `std::size_t`) for the 
+                             matched token. The semantic action is allowed to 
+                             change the value of this token id, influencing the 
+                             if of the created token.]]
     [[`Context& ctx`]       [This is a reference to a lexer specific, 
                              unspecified type, providing the context for the
                              current lexer state. It can be used to access
@@ -89,9 +102,10 @@
 When using a C++ function as the semantic action the following prototypes are 
 allowed as well:
 
-    void f (Range r, Idtype id, bool& matched);
-    void f (Range r, Idtype id);
-    void f (Range r);
+    void f (Iterator& start, Iterator& end, pass_flag& matched, Idtype& id);
+    void f (Iterator& start, Iterator& end, pass_flag& matched);
+    void f (Iterator& start, Iterator& end);
+    void f ();
 
 Even if it is possible to write your own function object implementations (i.e. 
 using Boost.Lambda or Boost.Bind), the preferred way of defining lexer semantic 
@@ -127,6 +141,14 @@
 numbers to the token definitions, starting with the constant defined by 
 `boost::spirit::lex::min_token_id`.
 
+[heading Pulling everything together]
+
+In order to execute the code defined above we still need to instantiate an 
+instance of the lexer type, feed it from some input sequence and create a pair 
+of iterators allowing to iterate over the token sequence as created by the 
+lexer. This code shows how to achieve these steps:
+
+[wcl_main]
 
 
 [endsect]
Modified: trunk/libs/spirit/doc/lex/tokenizing.qbk
==============================================================================
--- trunk/libs/spirit/doc/lex/tokenizing.qbk	(original)
+++ trunk/libs/spirit/doc/lex/tokenizing.qbk	2009-06-11 22:40:08 EDT (Thu, 11 Jun 2009)
@@ -8,8 +8,61 @@
 
 [section:lexer_tokenizing Tokenizing Input Data]
 
-[heading:tokenize_function The tokenize() function]
+[heading The tokenize function]
 
-[heading:generate_static_function The generate_static() function]
+The `tokenize()` function is a helper function simplifying the usage of a lexer
+in a standalone fashion. Consider you have a standalone lexer where all the 
+required functionality is implemented inside of lexer semantic actions. A good 
+example for this is the [@../../example/lex/word_count_lexer.cpp word_count_lexer]
+described in more detail in the section __sec_lex_quickstart_2__. 
+As a reminder, here is the token definition class:
+
+[wcl_token_definition]
+
+The construct used to tokenize the given input, while discarding all generated 
+tokens is a common application of the lexer. For this reason __lex__ exposes an
+API function `tokenize()` minimizing the code required:
+
+    // Read input from the given file
+    std::string str (read_from_file(1 == argc ? "word_count.input" : argv[1]));
+
+    word_count_tokens<lexer_type> word_count_lexer;
+    std::string::iterator first = str.begin();
+
+    // Tokenize all the input, while discarding all generated tokens
+    bool r = tokenize(first, str.end(), word_count_lexer);
+
+This code is completely equivalent to the more verbose version as shown in the
+section __sec_lex_quickstart_2__. The function `tokenize()` will return either
+if the end of the input has been reached (in this case the return value will 
+be `true`), or if the lexer couldn't match any of the token definitions in the 
+input (in this case the return value will be `false` and the iterator `first`
+will point to the first not matched character in the input sequence).
+
+The prototype of this function is:
+
+    template <typename Iterator, typename Lexer>
+    bool tokenize(Iterator& first, Iterator last, Lexer const& lex
+      , typename Lexer::char_type const* initial_state = 0);
+
+[variablelist where:
+    [[Iterator& first]      [The begin of the input sequence to tokenize. The
+                             value of this iterator will be updated by the 
+                             lexer, pointing to the first not matched
+                             character of the input after the function 
+                             returns.]]
+    [[Iterator last]        [The end of the input sequence to tokenize.]]
+    [[Lexer const& lex]     [The lexer instance to use for tokenization.]]
+    [[Lexer::char_type const* initial_state]
+                            [This optional parameter can be used to specify 
+                             the initial lexer state fo rthe tokenization.]]
+]
+
+A second overload of the `tokenize()` function allows to specify ana arbitrary 
+function of function object to be called for each of the generated tokens:
+
+    
+
+[heading The generate_static function]
 
 [endsect]
Modified: trunk/libs/spirit/doc/preface.qbk
==============================================================================
--- trunk/libs/spirit/doc/preface.qbk	(original)
+++ trunk/libs/spirit/doc/preface.qbk	2009-06-11 22:40:08 EDT (Thu, 11 Jun 2009)
@@ -160,7 +160,7 @@
 
 [heading How to use this manual]
 
-Each major section (there are two: __sec_qi_and_karma__, and __sec_lex__) is
+Each major section (there are 3: __sec_qi__, __sec_karma__, and __sec_lex__) is
 roughly divided into 3 parts:
 
 # Tutorials: A step by step guide with heavily annotated code. These
Modified: trunk/libs/spirit/doc/qi.qbk
==============================================================================
--- trunk/libs/spirit/doc/qi.qbk	(original)
+++ trunk/libs/spirit/doc/qi.qbk	2009-06-11 22:40:08 EDT (Thu, 11 Jun 2009)
@@ -6,7 +6,7 @@
     file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 ===============================================================================/]
 
-[section Qi]
+[section:qi Qi]
 
 [section    Tutorials]
 [include        qi/tutorial_intro.qbk]
Modified: trunk/libs/spirit/doc/spirit2.qbk
==============================================================================
--- trunk/libs/spirit/doc/spirit2.qbk	(original)
+++ trunk/libs/spirit/doc/spirit2.qbk	2009-06-11 22:40:08 EDT (Thu, 11 Jun 2009)
@@ -67,7 +67,8 @@
 
 [/ Sections ]
 
-[def __sec_qi_and_karma__       [link spirit.qi_and_karma Qi and Karma]]
+[def __sec_qi__                 [link spirit.qi Qi]]
+[def __sec_karma__              [link spirit.karma Karma]]
 [def __sec_qi_karma_attributes__  [link spirit.qi_and_karma.abstracts.attributes Attributes]]
 
 [def __sec_lex__                [link spirit.lex Lex]]
@@ -86,7 +87,8 @@
 [/ References to API descriptions ]
 
 [def __api_tokenize_and_parse__ [link spirit.qi_and_karma.abstracts.parsing_and_generating.the_tokenize_and_phrase_parse___function `tokenize_and_parse()`]]
-[def __api_generate_static__    [link spirit.lex.abstracts.tokenizing_input_data.generate_static_function `generate_static()`]]
+[def __api_tokenize__           [link spirit.lex.lexer_tokenizing.the_tokenize_function `tokenize()`]]
+[def __api_generate_static__    [link spirit.lex.lexer_tokenizing.the_generate_static_function `generate_static()`]]
 
 
 [/ References to classes ]
Modified: trunk/libs/spirit/example/lex/word_count.cpp
==============================================================================
--- trunk/libs/spirit/example/lex/word_count.cpp	(original)
+++ trunk/libs/spirit/example/lex/word_count.cpp	2009-06-11 22:40:08 EDT (Thu, 11 Jun 2009)
@@ -126,16 +126,16 @@
 //[wcp_main
 int main(int argc, char* argv[])
 {
-/*< define the token type to be used: `std::string` is available as the 
+/*<  Define the token type to be used: `std::string` is available as the 
      type of the token attribute 
 >*/  typedef lexertl::token<
         char const*, boost::mpl::vector<std::string>
     > token_type;
 
-/*< define the lexer type to use implementing the state machine
+/*<  Define the lexer type to use implementing the state machine
 >*/  typedef lexertl::lexer<token_type> lexer_type;
 
-/*< define the iterator type exposed by the lexer type
+/*<  Define the iterator type exposed by the lexer type
 >*/  typedef word_count_tokens<lexer_type>::iterator_type iterator_type;
 
     // now we use the types defined above to create the lexer and grammar
@@ -147,12 +147,12 @@
     std::string str (read_from_file(1 == argc ? "word_count.input" : argv[1]));
     char const* first = str.c_str();
     char const* last = &first[str.size()];
-    
-    // Parsing is done based on the the token stream, not the character 
-    // stream read from the input. The function `tokenize_and_parse()` wraps
-    // the passed iterator range `[first, last)` by the lexical analyzer and 
-    // uses its exposed iterators to parse the toke stream.
-    bool r = tokenize_and_parse(first, last, word_count, g);
+
+/*<  Parsing is done based on the the token stream, not the character 
+     stream read from the input. The function `tokenize_and_parse()` wraps
+     the passed iterator range `[first, last)` by the lexical analyzer and 
+     uses its exposed iterators to parse the toke stream.
+>*/  bool r = tokenize_and_parse(first, last, word_count, g);
 
     if (r) {
         std::cout << "lines: " << g.l << ", words: " << g.w 
Modified: trunk/libs/spirit/example/lex/word_count_lexer.cpp
==============================================================================
--- trunk/libs/spirit/example/lex/word_count_lexer.cpp	(original)
+++ trunk/libs/spirit/example/lex/word_count_lexer.cpp	2009-06-11 22:40:08 EDT (Thu, 11 Jun 2009)
@@ -103,34 +103,39 @@
 //[wcl_main
 int main(int argc, char* argv[])
 {
-    // read input from the given file
-    std::string str (read_from_file(1 == argc ? "word_count.input" : argv[1]));
 
-    // Specifying 'omitted' as the token attribute type generates a token class 
-    // notholding any token attribute at all (not even the iterator_range of the 
-    // matched input sequence), therefor optimizing the token, the lexer, and 
-    // possibly the parser implementation as much as possible. 
-    //
-    // Specifying mpl::false_ as the 3rd template parameter generates a token
-    // type and an iterator, both holding no lexer state, allowing for even more 
-    // aggressive optimizations.
-    //
-    // As a result the token instances contain the token ids as the only data 
-    // member.
-    typedef lexertl::token<char const*, omitted, boost::mpl::false_> token_type;
+/*<  Specifying `omitted` as the token attribute type generates a token class 
+     not holding any token attribute at all (not even the iterator range of the 
+     matched input sequence), therefore optimizing the token, the lexer, and 
+     possibly the parser implementation as much as possible. Specifying 
+     `mpl::false_` as the 3rd template parameter generates a token
+     type and an iterator, both holding no lexer state, allowing for even more 
+     aggressive optimizations. As a result the token instances contain the token 
+     ids as the only data member.
+>*/  typedef lexertl::token<char const*, omitted, boost::mpl::false_> token_type;
 
-    // lexer type
-    typedef lexertl::actor_lexer<token_type> lexer_type;
+/*<  This defines the lexer type to use
+>*/  typedef lexertl::actor_lexer<token_type> lexer_type;
 
-    // create the lexer object instance needed to invoke the lexical analysis 
-    word_count_tokens<lexer_type> word_count_lexer;
+/*<  Create the lexer object instance needed to invoke the lexical analysis 
+>*/  word_count_tokens<lexer_type> word_count_lexer;
 
-    // tokenize the given string, all generated tokens are discarded
+/*<  Read input from the given file, tokenize all the input, while discarding
+     all generated tokens
+>*/  std::string str (read_from_file(1 == argc ? "word_count.input" : argv[1]));
     char const* first = str.c_str();
     char const* last = &first[str.size()];
-    bool r = tokenize(first, last, word_count_lexer);
 
-    if (r) {
+/*<  Create a pair of iterators returning the sequence of generated tokens
+>*/  lexer_type::iterator_type iter = word_count_lexer.begin(first, last);
+    lexer_type::iterator_type end = word_count_lexer.end();
+
+/*<  Here we simply iterate over all tokens, making sure to break the loop
+     if an invalid token gets returned from the lexer
+>*/  while (iter != end && token_is_valid(*iter))
+        ++iter;
+
+    if (iter == end) {
         std::cout << "lines: " << word_count_lexer.l 
                   << ", words: " << word_count_lexer.w 
                   << ", characters: " << word_count_lexer.c