Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members   Related Pages  

XMLNode.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2004 University of Massachusetts.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.lemurproject.org/license.html
00008  *
00009  *==========================================================================
00010 */
00011 
00012 
00013 //
00014 // XMLNode
00015 //
00016 // 8 October 2003 - tds
00017 //
00018 
00019 #ifndef MONITOR_XMLNODE_H
00020 #define MONITOR_XMLNODE_H
00021 
00022 #include <string>
00023 #include <vector>
00024 #include <map>
00025 #include <sstream>
00026 #include <assert.h>
00027 #include <iomanip>
00028 #include <iostream>
00029 
00030 #include "indri/indri-platform.h"
00031 #include "lemur-compat.hpp"
00034 class XMLNode {
00035 public:
00036   typedef std::map<std::string,std::string> MAttributes;
00037   
00038 private:
00039   std::string _name;
00040   MAttributes _attributes;
00041   std::vector<XMLNode*> _children;
00042   std::string _value;
00043 
00044 public:
00047   XMLNode( const std::string& name );
00051   XMLNode( const std::string& name, const std::string& value );
00055   XMLNode( const std::string& name, const MAttributes& attributes );
00060   XMLNode( const std::string& name, const MAttributes& attributes, const std::string& value );
00062   ~XMLNode();
00065   void addChild( XMLNode* child );
00069   void addAttribute( const std::string& key, const std::string& value );
00072   void setValue( const std::string& value );
00074   const std::string& getName() const;
00076   const std::string& getValue() const;
00078   const MAttributes& getAttributes() const;
00082   std::string getAttribute( const std::string& name ) const;
00085   const std::vector<XMLNode*>& getChildren() const;
00089   const XMLNode* getChild( const std::string& name ) const;  
00093   std::string getChildValue( const std::string& name ) const; 
00094 };
00095 
00097 inline std::string i64_to_string( INT64 value ) {
00098   assert( value >= 0 );
00099   std::stringstream number;
00100 
00101   if( value > 1000000000 ) {
00102     number << (value/1000000000) << std::setw(9) << std::setfill('0') << (value%1000000000);
00103   } else {
00104     number << value;
00105   }
00106 
00107   return number.str();
00108 }
00109 
00111 inline INT64 string_to_i64( const std::string& str ) {
00112   INT64 result = 0;
00113   INT64 negative = 1;
00114   unsigned int i = 0;
00115 
00116   if( str.length() > 0 && str[0] == '-' ) {
00117     negative = -1;
00118     i = 1;
00119   }
00120 
00121   for( unsigned int i=0; i<str.length(); i++ ) {
00122     result = result * 10 + (str[i] - '0');
00123   }
00124 
00125   return result * negative;
00126 }
00127 
00129 inline int string_to_int( const std::string& str ) {
00130   return (int) string_to_i64( str );
00131 }
00132 
00137 inline std::string base64_encode( const void* input, int length ) {
00138   static unsigned char lookup[] = {
00139     'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
00140     'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 
00141     'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
00142     'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 
00143     '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/'
00144   };
00145 
00146   const unsigned char* in = (const unsigned char*) input;
00147   std::string result;
00148   unsigned int value;
00149   unsigned int mainLength;
00150 
00151   // mainlength is the total length of contiguous 3-byte chunks
00152   if( length%3 ) {
00153     mainLength = length - length%3;
00154   } else {
00155     mainLength = length;
00156   }
00157 
00158   // reserve enough string space to hold the result
00159   result.reserve( (length/2+1)*3 );
00160 
00161   // main loop encodes each group of 3 8-bit chars as
00162   // 4 6-bit chars
00163   for( unsigned int i=0; i<mainLength; i+=3 ) {
00164     value = (in[i+0] & 0xff) << 16 |
00165             (in[i+1] & 0xff) <<  8 |
00166             (in[i+2] & 0xff);
00167 
00168     unsigned char fourth = lookup[value & 0x3f];
00169     value >>= 6;
00170     unsigned char third = lookup[value & 0x3f];
00171     value >>= 6;
00172     unsigned char second = lookup[value & 0x3f];
00173     value >>= 6;
00174     unsigned char first = lookup[value & 0x3f];
00175 
00176     result.push_back( first );
00177     result.push_back( second );
00178     result.push_back( third );
00179     result.push_back( fourth );
00180   }
00181 
00182   if( mainLength != length ) {
00183     value = 0;
00184     int remaining = length - mainLength;
00185 
00186     {
00187       // build a value based on the characters we 
00188       // have left
00189       unsigned char first = 0;
00190       unsigned char second = 0;
00191       unsigned char third = 0;
00192 
00193       if( remaining >= 1 )
00194         first = in[mainLength+0];
00195       if( remaining >= 2 )
00196         second = in[mainLength+1];
00197       if( remaining >= 3 )
00198         third = in[mainLength+2];
00199 
00200       value = first << 16 |
00201               second << 8 |
00202               third;
00203     }
00204 
00205     {
00206       // encode them
00207       unsigned char fourth = '=';
00208       unsigned char third = '=';
00209       unsigned char second = '=';
00210       unsigned char first = '=';
00211 
00212       if( remaining >= 3 )
00213         fourth = lookup[value & 0x3f];
00214       value >>= 6;
00215       if( remaining >= 2 )
00216         third = lookup[value & 0x3f];
00217       value >>= 6;
00218       if( remaining >= 1 )
00219         second = lookup[value & 0x3f];
00220       value >>= 6;
00221       first = lookup[value & 0x3f];
00222 
00223       result.push_back( first );
00224       result.push_back( second );
00225       result.push_back( third );
00226       result.push_back( fourth );
00227     }
00228   }
00229 
00230   return result;
00231 }
00232 
00238 inline int base64_decode( void* output, int outputLength, const std::string& input ) {
00239   assert( (input.size() % 4) == 0 );
00240 
00241   // encoding table built with a python script to match the encoding proposed in rfc1521
00242   static char lookup[] = {
00243     -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
00244     -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
00245     -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   62,   -1,   -1,   -1,   63,
00246     52,   53,   54,   55,   56,   57,   58,   59,   60,   61,   -1,   -1,   -1,    0,   -1,   -1,
00247     -1,    0,    1,    2,    3,    4,    5,    6,    7,    8,    9,   10,   11,   12,   13,   14,
00248     15,   16,   17,   18,   19,   20,   21,   22,   23,   24,   25,   -1,   -1,   -1,   -1,   -1,
00249     -1,   26,   27,   28,   29,   30,   31,   32,   33,   34,   35,   36,   37,   38,   39,   40,
00250     41,   42,   43,   44,   45,   46,   47,   48,   49,   50,   51,   -1,   -1,   -1,   -1,   -1,
00251     -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
00252     -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
00253     -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
00254     -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
00255     -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
00256     -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
00257     -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
00258     -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1
00259   };
00260 
00261   char* out = (char*) output;
00262   int trueOutputLength = 0;
00263 
00264   for( size_t i=0; i<input.size(); i+=4 ) {
00265     // decode 4 byte chunks
00266     unsigned char first = input[i];
00267     unsigned char second = input[i+1];
00268     unsigned char third = input[i+2];
00269     unsigned char fourth = input[i+3];
00270 
00271     unsigned int value;
00272 
00273     value = lookup[first] << 18 |
00274             lookup[second] << 12 |
00275             lookup[third] << 6 |
00276             lookup[fourth];
00277 
00278     if( fourth == '=' ) {
00279       // this chunk ends in padding, so handle it in a special way
00280       if( third == '=' ) {
00281         // only one additional byte
00282         out[trueOutputLength]   = (value >> 16) & 0xff;
00283         trueOutputLength++;
00284       } else {
00285         // two additional bytes
00286         out[trueOutputLength]   = (value >> 16) & 0xff;
00287         out[trueOutputLength+1] = (value >> 8) & 0xff;
00288         trueOutputLength+=2;
00289       }
00290     } else {
00291       out[trueOutputLength]   = (value >> 16) & 0xff;
00292       out[trueOutputLength+1] = (value >> 8) & 0xff;
00293       out[trueOutputLength+2] = (value) & 0xff;
00294 
00295       trueOutputLength += 3;
00296     }
00297   }
00298 
00299   assert( trueOutputLength <= outputLength );
00300   return trueOutputLength;
00301 }
00302 
00306 inline void base64_decode_string( std::string& out, const std::string& in ) {
00307   char* buf = new char[in.size()+1];
00308   size_t outLength = base64_decode( buf, in.size()+5, in );
00309   buf[outLength] = 0;
00310   out = buf;
00311   delete buf;
00312 }
00313 
00314 #endif // MONITOR_XMLNODE_H
00315 

Generated on Wed Nov 3 12:59:08 2004 for Lemur Toolkit by doxygen1.2.18