#include #include #include #include #include #include #include #include /* Lab 5 - TDDE18 Wordlists lab - Main file */ using namespace std; ////////////////////////////////// /// User interaction functions /// ////////////////////////////////// //TODO: Non fundamental types taken as parameters should not be copied //as they might be large. Parameters which are not changed should also //be const. --> done. // Help function void print_err(const string & str){ cerr << "Error: " << str << endl; cerr << "Usage: ./a.out FILE [-a] [-f] [-o N]" << endl; } // Print final wordlist /// For alphabetic ordering void print_a(const std::vector> & wl, int max_length) { for_each(wl.begin(), wl.end(), [max_length](std::pair & pair){ cout << left << setw(max_length + 1) << pair.first << pair.second << endl; }); } /// For frequency ordering void print_f(const std::vector> & wl, int max_length) { for_each(wl.begin(), wl.end(), [max_length](std::pair & pair){ cout << setw(max_length) << pair.first << " " << pair.second << endl; }); } /// For -o argument void print_o(const std::vector & list, int limit) { string line{}; for_each(list.begin(), list.end(), [&line, limit](string word){ if (int(line.length() + word.length()) >= limit - 1){ cout << line << endl; line = word; } else { if (line == ""){ line = word; } else { line += " " + word; } } }); cout << line << endl; } ///////////////////////// /// Utility functions /// ///////////////////////// //TODO: This function does too much. Divide the tasks into more //functions which clearly state what they handle. For example: //remove_trash, is_valid, etc. --> done. //TODO: Enumerating the entire alphabet should be avoided since //missing a character is easy. Use the library cctype for this //instead. --> done. // Check if a given word is valid bool is_valid(const string & word) { return !(!std::all_of(word.begin(), word.end(), [](const char c){ return isalpha(c) || c == '-'; }) || word.at(0) == '-' || word.at(word.length()-1) == '-' || int(word.length()) < 3 || word.find("--") != std::string::npos); } bool cmp_length(const string & a, const string & b){ return a.length() < b.length(); } //Comment: Why unsigned long int? find_first.. will return an index in //the string. This index will not be too large for a regular int to //handle. --> Because it is the return type of these functions, else //GCC will do a strange convertion and put a warning at compilation: // //main.cc:103:15: warning: comparison of integer expressions of different signedness: // ‘int’ and ‘const size_type’ {aka ‘const long unsigned int’} [-Wsign-compare] //103 | if (begin != std::string::npos && end != std::string::npos && begin < end){ // string remove_trash(string & word){ // Here we remove head and tail junk. Lot of the complexity // is dues to weird cases like "(?!)" or ")?!(" const long unsigned int begin = word.find_first_not_of("(\"\'"); const long unsigned int end = word.find_last_not_of("!?;,:.\"\')"); if (begin != std::string::npos && end != std::string::npos && begin < end){ word.erase(0, begin); word.erase(word.find_last_not_of("!?;,:.\"\')") + 1); } // Manage the 's tail trash if (*(word.end()-2) == '\'' && *(word.end()-1) == 's') word.erase(word.end()-2, word.end()); // Lowering case std::transform(word.begin(), word.end(), word.begin(), ::tolower); return word; } //TODO: See lab instructions for correct error message in case of //invalid input. --> done. Note : In some cases, the PDF does not //specify what should be displayed, in this case we have displayed //the message that suits us the most suitable int main(int argc, char* argv[]) { //////////////////////////// /// Arguments management /// //////////////////////////// /// Args parsing // Basic checks if (argc <= 1 ){ print_err("No arguments given."); return 1; } else if (argc == 2){ print_err("Second argument missing or invalid."); return 2; } // Checking file vector args {argv, argv + argc}; string file_name{args[1]}; ifstream file{}; file.open(file_name); if (! file.is_open()){ print_err("Second argument missing or invalid."); return 3; } // Checking command string param{args[2][1]}; int line_length{}; if (args[2][0] != '-' || (param != "a" && param != "f" && param != "o")){ print_err("Third argument invalid."); return 4; } else if (param == "o"){ if (argc != 4) { print_err("You must specify a N value."); return 6; } try { line_length = stoi(args[3]); } catch (std::exception& e){ print_err("Invalid N value. (" + string(e.what()) + ")"); return 5; } } ///////////////////// /// File analysis /// ///////////////////// int max_length{0}; istream_iterator iis {file}; istream_iterator eof; std::vector dirty_words{iis, eof}; std::vector all(dirty_words.size()); std::map collection; //TODO: Here you use for_each to emulate a for-loop. This is not //an appropriate algorithm for this task. What you want to do is //transform each element in dirty_words. Hint: Check out the //transform algorithm. See TODO about dividing the purifier //function before fixing this TODO. --> done. //TODO: This algorithm does too much. Break it down into more //appropriate algorithms. For example, use std::max_element to //find the largest element, etc. --> done. std::transform(dirty_words.begin(), dirty_words.end(), dirty_words.begin(), remove_trash); std::copy_if(dirty_words.begin(), dirty_words.end(), all.begin(), is_valid); max_length = (*std::max_element(all.begin(), all.end(), cmp_length)).length(); // Counting word in a std::map std::for_each(all.begin(), all.end(), [&collection](string word) { if (word != "") collection[word]++; }); // Converting map collection into vector wordlist to allow sorting std::vector> wordlist; for_each(collection.begin(), collection.end(), [&wordlist](auto pair){ wordlist.push_back(pair); }); // Output selection and sorting (if needed) if (param == "f"){ std::sort(wordlist.begin(), wordlist.end(), [](std::pair const & a, std::pair const & b) { return a.second > b.second; }); print_f(wordlist, max_length); } else if (param == "a"){ std::sort(wordlist.begin(), wordlist.end(), [](std::pair const & a, std::pair const & b) { return a.first < b.first; }); print_a(wordlist, max_length); } else { print_o(all, line_length); } return 0; }