dlrLibs Utility Libraries: stringManipulation.cpp Source File

00001 
00014 #include <ctype.h>
00015 #include <dlrUtilities/stringManipulation.h>
00016 
00017 namespace dlr {
00018 
00019   namespace utilities {
00020 
00021     // This function takes an input string and returns a string in which
00022     // all special shell characters have been escaped.
00023     std::string
00024     cleanString(const std::string& inputString,
00025                 const std::string& specialCharacters,
00026                 char quoteCharacter,
00027                 bool alreadyQuoted)
00028     {
00029       std::string outputString;
00030       std::string::size_type index = 0;
00031       while(index < inputString.size()) {
00032         // Look for the next special character.
00033         std::string::size_type nextIndex =
00034           inputString.find_first_of(specialCharacters, index);
00035 
00036         // Add all of the normal characters before the next special
00037         // character to the output string.
00038         if(nextIndex != index) {
00039           outputString +=
00040             inputString.substr(index, nextIndex - index);
00041         }
00042 
00043         // If we didn't find a special character, we're done.
00044         if(nextIndex == std::string::npos) {
00045           break;
00046         }
00047 
00048         // If we did find a special character, quote it unless it's a
00049         // quote character itself and we've been told not to quote quote
00050         // characters.
00051         if(alreadyQuoted && (inputString[nextIndex] == quoteCharacter)) {
00052           // It's a quote character, and we're not supposed to quote it,
00053           // so pass it on unchanged.
00054           outputString += inputString[nextIndex];
00055 
00056           // And pass the character that it's quoting, too.
00057           ++nextIndex;
00058           if(nextIndex < inputString.size()) {
00059             outputString += inputString[nextIndex];
00060           }
00061         } else {
00062           // We're allowed to quote this character.
00063           outputString += quoteCharacter;
00064           outputString += inputString[nextIndex];
00065         }
00066       
00067         // And start the next search from one past the last character we
00068         // added.
00069         index = nextIndex + 1;
00070       }
00071 
00072       // Return the escaped version of the string.
00073       return outputString;
00074     }
00075 
00076 
00077     // This function returns a single string comprising copies of all of
00078     // the strings in inputStringVector, interposed by the copies of
00079     // separator.
00080     std::string
00081     joinString(const std::vector<std::string>& inputStringVector,
00082                const std::string& separator)
00083     {
00084       std::ostringstream outputStream;
00085       std::vector<std::string>::const_iterator inputIterator =
00086         inputStringVector.begin();
00087 
00088       if(inputIterator != inputStringVector.end()) {
00089         outputStream << *inputIterator;
00090         ++inputIterator;
00091       }
00092     
00093       while(inputIterator != inputStringVector.end()) {
00094         outputStream << separator << *inputIterator;
00095         ++inputIterator;      
00096       }
00097       return outputStream.str();
00098     }
00099 
00100   
00101     // This function returns a copy of the input argument in which every
00102     // uppercase character has been replaced with its lowercase
00103     // equivalent.
00104     std::string
00105     lowerCaseString(const std::string& inputString)
00106     {
00107       std::ostringstream buffer;
00108       for(size_t index0 = 0; index0 < inputString.size(); ++index0) {
00109         std::string::value_type lowercaseChar =
00110           static_cast<std::string::value_type>(
00111             tolower(inputString[index0]));
00112         buffer << lowercaseChar;
00113       }
00114       return buffer.str();
00115     }
00116   
00117   
00118     // Copies inputString, replacing non-overlapping occurrences of
00119     // target with replacement.
00120     std::string
00121     replaceString(const std::string& inputString,
00122                   const std::string& target,
00123                   const std::string& replacement)
00124     {
00125       std::string outputString;
00126       std::string::size_type startIndex = 0;
00127       while(startIndex <= inputString.size()) {
00128         std::string::size_type targetPosition =
00129           inputString.find(target, startIndex);
00130         if(targetPosition == std::string::npos) {
00131           // target not found, so add the rest of inputString & quit.
00132           outputString += inputString.substr(startIndex);
00133           break;
00134         }
00135         // Target found, so add up to it's beginning.
00136         outputString +=
00137           inputString.substr(startIndex, targetPosition - startIndex);
00138         // And then add replacement.
00139         outputString += replacement;
00140         // Get ready to start searching again.
00141         startIndex = targetPosition + target.size();
00142       }
00143       return outputString;
00144     }
00145   
00146     // Divides inputString around instances of delimiter.
00147     std::vector<std::string>
00148     splitString(const std::string& inputString,
00149                 const std::string& delimiter,
00150                 bool includeNullStrings,
00151                 size_t maxSplit)
00152     {
00153       std::vector<std::string> stringParts;
00154       std::string::size_type startIndex = 0;
00155       size_t splitCount = 0;
00156       while(startIndex < inputString.size()) {
00157         std::string::size_type endIndex =
00158           inputString.find(delimiter, startIndex);
00159         if(endIndex == std::string::npos) {
00160           // npos is hard to work with.  This is more convenient.
00161           endIndex = inputString.size();
00162         }
00163         // Terminate early if we've exceeded a non-zero maxSplit.
00164         if(maxSplit != 0 && splitCount >= maxSplit) {
00165           endIndex = inputString.size();
00166         }
00167         // Delimiter not at very beginning of remaining string?
00168         if(includeNullStrings || (endIndex != startIndex)) {
00169           // Push the next section
00170           stringParts.push_back(inputString.substr(startIndex,
00171                                                    endIndex - startIndex));
00172           ++splitCount;
00173         }
00174         startIndex = endIndex + delimiter.size();
00175       }
00176       if(includeNullStrings && (startIndex == inputString.size())) {
00177         stringParts.push_back(std::string(""));
00178       }
00179       return stringParts;
00180     }
00181 
00182     // This function removes whitespace from the beginning and end of
00183     // a string.
00184     std::string
00185     stripString(const std::string& inputString, const std::string& whiteSpace)
00186     {
00187       std::string::size_type startIndex =
00188         inputString.find_first_not_of(whiteSpace);
00189       std::string::size_type endIndex =
00190         inputString.find_last_not_of(whiteSpace);
00191       if((startIndex == std::string::npos) || (endIndex == std::string::npos)) {
00192         return std::string();
00193       }
00194       return inputString.substr(startIndex, endIndex - startIndex + 1);
00195     }
00196 
00197 
00198     // This function returns a copy of the input argument in which every
00199     // lowercase character has been replaced with its uppercase
00200     // equivalent.
00201     std::string
00202     upperCaseString(const std::string& inputString)
00203     {
00204       std::ostringstream buffer;
00205       for(size_t index0 = 0; index0 < inputString.size(); ++index0) {
00206         std::string::value_type uppercaseChar =
00207           static_cast<std::string::value_type>(
00208             toupper(inputString[index0]));
00209         buffer << uppercaseChar;
00210       }
00211       return buffer.str();
00212     }
00213 
00214 
00215     // This function returns a copy of the input argument in which
00216     // end-of-line markers have been inserted to wrap the string at a
00217     // specified line length.
00218     std::string
00219     wrapString(const std::string& inputString,
00220                const std::string& fillPrefix,
00221                size_t width,
00222                const std::string& whitespace,
00223                const std::string& eolString)
00224     {
00225       enum WrapState {
00226         DLR_WS_STARTING_LINE,
00227         DLR_WS_SEARCHING_FOR_EOL,
00228         DLR_WS_SEARCHING_FOR_WHITESPACE0,
00229         DLR_WS_SEARCHING_FOR_WHITESPACE1,
00230         DLR_WS_EATING_BEGINNING_WHITESPACE,
00231         DLR_WS_EATING_ENDING_WHITESPACE,
00232         DLR_WS_FINISHED
00233       };
00234 
00235       // We'll use an ostringstream to format the result.
00236       std::ostringstream outputStream;
00237 
00238       // The position in inputString at which we'll start searching for
00239       // whitespace & eol characters.
00240       std::string::size_type startIndex = 0;
00241 
00242       // The first column of the current line which is available for
00243       // characters from inputString.  This is always zero for the
00244       // first line, and fillPrefix.size() for subsequent lines.
00245       std::string::size_type startColumn = 0;
00246 
00247       // Whenever this variable is greater than startIndex, it will
00248       // indicate the position of the next newline in the inputString.
00249       std::string::size_type eolIndex = 0;
00250 
00251       // Whenever this variable is greater than startIndex, it will
00252       // indicate the position of the next whitespace in the inputString.
00253       std::string::size_type whitespaceIndex0 = 0;
00254 
00255       // Whenever this variable is greater than whitespaceIndex0, it
00256       // will indicate the position of the next subsequent whitespace in
00257       // the inputString.
00258       std::string::size_type whitespaceIndex1 = 0;
00259 
00260       // Whenever this variable is greater than startIndex, it will
00261       // indicate the position of the next non-whitespace character.
00262       std::string::size_type nonWhitespaceIndex = 0;
00263 
00264       // Loop until done processing the entire string.
00265       WrapState currentState = DLR_WS_STARTING_LINE;
00266       while(currentState != DLR_WS_FINISHED) {
00267 
00268         // Sanity check here to make sure we don't run off the end of
00269         // the string.  This simplifies the code below a little.
00270         if(startIndex >= inputString.size()) {
00271           currentState = DLR_WS_FINISHED;
00272           break;
00273         }
00274 
00275         // This switch implements the parsing algorithm.
00276         switch(currentState) {
00277         case DLR_WS_STARTING_LINE:
00278           if(inputString.size() - startIndex <= width - startColumn) {
00279             // Remaining text will all fit on one line.  We're done!
00280             outputStream << inputString.substr(startIndex, std::string::npos);
00281             currentState = DLR_WS_FINISHED;
00282           } else {
00283             // String doesn't fit.  Look for line breaks.
00284             currentState = DLR_WS_SEARCHING_FOR_EOL;
00285           }
00286           break;
00287         case DLR_WS_SEARCHING_FOR_EOL:
00288           // Look for existing line breaks.
00289           if(eolIndex <= startIndex) {
00290             eolIndex = inputString.find(eolString, startIndex);
00291           }
00292           if(eolIndex - startIndex <= width - startColumn) {
00293             // There's a line break early enough in the string that we
00294             // can just break there without exceeding width.
00295             outputStream
00296               << inputString.substr(startIndex, eolIndex - startIndex)
00297               << eolString;
00298             startIndex = eolIndex + eolString.size();
00299 
00300             // We just printed a line break.  Eat whitespace so we don't
00301             // start a line with spaces, and so we're sure there remains
00302             // non-whitespace to print.
00303             currentState = DLR_WS_EATING_BEGINNING_WHITESPACE;
00304           } else {
00305             // No appropriate line break found.  Look for whitespace at
00306             // which to break.
00307             currentState = DLR_WS_SEARCHING_FOR_WHITESPACE0;
00308           }
00309           break;
00310         case DLR_WS_SEARCHING_FOR_WHITESPACE0:
00311           // Search for next whitespace.
00312           if(whitespaceIndex0 <= startIndex) {
00313             whitespaceIndex0 = inputString.find_first_of(whitespace, startIndex);
00314           }
00315           if(whitespaceIndex0 == std::string::npos) {
00316             // No whitespace available.  Nothing we can do.  Simply
00317             // write out the rest of the string.
00318             outputStream << inputString.substr(startIndex, std::string::npos);
00319             currentState = DLR_WS_FINISHED;
00320           } else if(whitespaceIndex0 - startIndex > width - startColumn) {
00321             // Next whitespace is far enough away that we exceed width.
00322             // We're stuck with an overlong line.
00323             outputStream
00324               << inputString.substr(startIndex, whitespaceIndex0 - startIndex);
00325             startIndex = whitespaceIndex0 + 1;
00326 
00327             // Before we print a linebreak, we need to make sure that
00328             // there's more non-whitespace to print.  In other words, we
00329             // need to be sure that we actually need another line of
00330             // output.
00331             currentState = DLR_WS_EATING_ENDING_WHITESPACE;
00332           } else {
00333             // Found whitespace that's early enough to break without
00334             // exceeding width, but is there more whitespace that's even
00335             // better?
00336             currentState = DLR_WS_SEARCHING_FOR_WHITESPACE1;
00337           }
00338           break;
00339         case DLR_WS_SEARCHING_FOR_WHITESPACE1:
00340           // Make sure we won't run off the end of the string by
00341           // searching for more whitespace.
00342           if(whitespaceIndex0 == inputString.size() - 1) {
00343             // Looks like we've reached the end of the string.  Write it out.
00344             outputStream
00345               << inputString.substr(startIndex, whitespaceIndex0 - startIndex);
00346             currentState = DLR_WS_FINISHED;
00347           } else {
00348             // Search for next whitespace without forgetting the current
00349             // whitespace.
00350             whitespaceIndex1 = whitespaceIndex0;
00351             whitespaceIndex0 =
00352               inputString.find_first_of(whitespace, whitespaceIndex1 + 1);
00353             if(whitespaceIndex0 - startIndex <= width - startColumn) {
00354               // Found whitespace that's early enough to break at without
00355               // exceeding width, but is there another whitespace that's
00356               // even better?
00357               currentState = DLR_WS_SEARCHING_FOR_WHITESPACE1;
00358             } else {
00359               // No more whitespace before we exceed width.  Break the
00360               // string at the best position found.
00361               outputStream
00362                 << inputString.substr(startIndex, whitespaceIndex1 - startIndex);
00363               startIndex = whitespaceIndex1 + 1;
00364 
00365               // Before we print a linebreak, we need to make sure that
00366               // there's more non-whitespace to print.  In other words, we
00367               // need to be sure that we actually need another line of
00368               // output.
00369               currentState = DLR_WS_EATING_ENDING_WHITESPACE;
00370             }
00371           }
00372           break;
00373         case DLR_WS_EATING_BEGINNING_WHITESPACE:
00374           // This state happens immediately after we copy a line break
00375           // from the input string to the output.
00376           //
00377           // Find next non-whitespace character.
00378           if(nonWhitespaceIndex <= startIndex) {
00379             nonWhitespaceIndex =
00380               inputString.find_first_not_of(whitespace, startIndex);
00381           }
00382           if(nonWhitespaceIndex == std::string::npos) {
00383             // No more non-whitespace.  We're done!
00384             currentState = DLR_WS_FINISHED;
00385           } else {
00386             // Found non-whitespace.  Prepare for next line.
00387             outputStream << fillPrefix;
00388             startIndex = nonWhitespaceIndex;
00389             startColumn = fillPrefix.size();
00390             currentState = DLR_WS_STARTING_LINE;
00391           }
00392           break;
00393         case DLR_WS_EATING_ENDING_WHITESPACE:
00394           // This state happens immediately before we print a line break
00395           // that wasn't part of the original string.
00396           // 
00397           // Find next non-whitespace character.
00398           if(nonWhitespaceIndex <= startIndex) {
00399             nonWhitespaceIndex =
00400               inputString.find_first_not_of(whitespace, startIndex);
00401           }
00402           if(nonWhitespaceIndex == std::string::npos) {
00403             // No more non-whitespace.  We're done!
00404             currentState = DLR_WS_FINISHED;
00405           } else {
00406             // Found non-whitespace.  We'll need another line.
00407             outputStream << eolString << fillPrefix;
00408             startIndex = nonWhitespaceIndex;
00409             startColumn = fillPrefix.size();
00410             currentState = DLR_WS_STARTING_LINE;
00411           }
00412           break;
00413         default:
00414           // Will never get here.
00415           break;
00416         }
00417       }
00418 
00419       // Done.  Return the string we just formatted.
00420       return outputStream.str();
00421     }
00422 
00423   } // namespace utilities
00424   
00425 } // namespace dlr