Replace the strtok loop with the standard library

I have a problem that strtok solves (split substrings from a string), but I'm that strtok is unsafe. I want to use some more modern parts of the C ++ standard library.

What should i use instead?

static int ParseLine(std::string line, std::string seps, int startIdx, std::vector<CNode>& collection) { if (startIdx > collection.size()) { throw std::invalid_argument("the start index is out of range"); } char buf[2000]; strcpy_s(buf, line.c_str()); auto idx = startIdx; for (auto objectType = strtok(buf, seps.c_str()); objectType != nullptr; idx++) { if (idx == collection.size()) { collection.push_back(CNode(idx)); } collection[idx].SetObjectType(objectType); objectType = strtok(nullptr, seps.c_str()); } return (idx - 1); } 

Here is a complete sample that compiles with _CRT_SECURE_NO_WARNINGS:

 #include <string> #include <vector> #include <iostream> class CObject { std::string _objectType; public: CObject() : _objectType("n/a") {} void SetObjectType(std::string objectType) { _objectType = objectType; } std::string GetObjectType() const { return _objectType; } }; class CNode { int _id; CObject _object; public: explicit CNode(int id) : _id(id) {} void SetObjectType(std::string objectType) { _object.SetObjectType(objectType); } std::string GetObjectType() const { return _object.GetObjectType(); } }; // Update the collection of nodes with the object types found in the line specified. // Return the number of elements in the collection. static int ParseLine(std::string line, std::string seps, int startIdx, std::vector<CNode>& collection) { if (startIdx > collection.size()) { throw std::invalid_argument("the start index is out of range"); } char buf[2000]; strcpy_s(buf, line.c_str()); auto idx = startIdx; for (auto objectType = strtok(buf, seps.c_str()); objectType != nullptr; idx++) { if (idx == collection.size()) { collection.push_back(CNode(idx)); } collection[idx].SetObjectType(objectType); objectType = strtok(nullptr, seps.c_str()); } return (idx - 1); } int main() { std::string seps = "."; // 2 3 4 5 6 7 8 9 std::string line = "abc.def.ghi.klm.nop.qrs.tuv.wxyz"; std::vector<CNode> collection { CNode(0), CNode(1), CNode(2) , CNode(3) , CNode(4) , CNode(5) }; auto startAt = 2; try { auto collection_size = ParseLine(line, seps, startAt, collection); std::cout << collection_size << std::endl; for (auto value : collection) { std::cout << value.GetObjectType() << std::endl; } } catch (std::invalid_argument& e) { std::cout << " out of range exception " << e.what() << std::endl; } return 0; } 
+5
source share
3 answers

Here are two examples of delimited string separation.

The first uses std::getline with a string stream, specifying a delimiter instead of using the default newline character. Only single character delimiters can be used with this technique.

The second example uses the <regex> library, which allows you to use delimiters of arbitrary length, and also gives you more control over the recognition of the delimiter. Note that the dot character must be escaped in the regex specification because the regex language is "." acts like a template.

 #include <iostream> #include <sstream> #include <vector> #include <regex> std::vector<std::string> GetlineSplit(std::string const& line) { static const char sep = '.'; std::istringstream liness{line}; std::vector<std::string> fields; for(std::string field; std::getline(liness, field, sep); ) { fields.push_back(field); } return fields; } std::vector<std::string> RegexSplit(std::string const& line) { std::regex seps("\\."); // the dot character needs to be escaped in a regex std::sregex_token_iterator rit(line.begin(), line.end(), seps, -1); return std::vector<std::string>(rit, std::sregex_token_iterator()); } int main() { std::string line = "abc.def.ghi.klm.nop.qrs.tuv.wxyz"; std::cout << "getline split result:\n"; auto fields_getline = GetlineSplit(line); for(const auto& field : fields_getline) { std::cout << field << '\n'; } std::cout << "\nregex split result:\n"; auto fields_regex = RegexSplit(line); for(const auto& field : fields_regex) { std::cout << field << '\n'; } } 
+3
source

I have a Utility class that has nothing but static methods, since you cannot instantiate this Utility class. I use this for various methods of working with conversion strings, removing spaces, splitting, modifying, etc. Here is the line splitting function from this class:

Utility.h

 class Utility { public: static std::vector<std::string> splitString( const std::string& strStringToSplit, const std::string& strDelimiter, const bool keepEmpty = true ); private: Utility(); }; 

Utility.cpp

 #include "Utility.h" // splitString() std::vector<std::string> Utility::splitString( const std::string& strStringToSplit, const std::string& strDelimiter, const bool keepEmpty ) { std::vector<std::string> vResult; if ( strDelimiter.empty() ) { vResult.push_back( strStringToSplit ); return vResult; } std::string::const_iterator itSubStrStart = strStringToSplit.begin(), itSubStrEnd; while ( true ) { itSubStrEnd = search( itSubStrStart, strStringToSplit.end(), strDelimiter.begin(), strDelimiter.end() ); std::string strTemp( itSubStrStart, itSubStrEnd ); if ( keepEmpty || !strTemp.empty() ) { vResult.push_back( strTemp ); } if ( itSubStrEnd == strStringToSplit.end() ) { break; } itSubStrStart = itSubStrEnd + strDelimiter.size(); } return vResult; } // splitString 

The required library includes the utilities <vector> , <string> and <algorithm> for this method, which are most often used in almost all applications.

To use this function, we can do a simple test as follows:

 #include <iostream> #include <string> #include <vector> #include <algorithm> #include "Utility.h" int main() { std::string someLongString2( "Hello World How Are You" ); std::vector<std::string> singleWords; singleWords = Utility::splitString( someLongString, " " ); // Space is the delimiter and now each individual word // from the long string are now each a new string stored // in this vector. You can use any character for your delimiter. // Also this function is not limited to having a single character // as its delimiter. You can use a series of characters or specific // words as your delimiter. Such as a comma followed by a space. std::string someLongString2( "Hello, World, How, Are, You" ); singleWords.clear(); singleWords = Utility::splitString( someLongString2, ", " ); return 0; } // main 
+1
source

Below is a solution to replace strtok using the standard library (using the received answer):

 std::vector<std::string> SplitLine(std::string const& line, std::string seps) { std::regex regxSeps(seps); // the dot character needs to be escaped in a regex std::sregex_token_iterator rit(line.begin(), line.end(), regxSeps, -1); return std::vector<std::string>(rit, std::sregex_token_iterator()); } static int ParseLine(std::string line, std::string seps, size_t startIdx, std::vector<CNode>& collection) { if (startIdx > collection.size()) { throw std::invalid_argument("the start index is out of range"); } auto objectTypes = SplitLine(line, seps); auto idx = startIdx; for (const auto& objectType : objectTypes) { if (idx == collection.size()) { collection.push_back(CNode(idx)); } collection[idx++].SetObjectType(objectType); } return (idx - 1); } int main() { std::string seps = "\\."; // the dot character needs to be escaped in a regex ... } 
0
source

All Articles