123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243 |
- #include <iostream>
- #include <fstream>
- #include <iomanip>
- #include <algorithm>
- #include <iterator>
- #include <vector>
- #include <map>
- #include <cctype>
-
-
- /*
- Lab 5 - TDDE18
- Wordlists lab - Main file
- */
-
- using namespace std;
-
- //////////////////////////////////
- /// User interaction functions ///
- //////////////////////////////////
-
- //TODO: Non fundamental types taken as parameters should not be copied
- //as they might be large. Parameters which are not changed should also
- //be const. --> done.
-
- // Help function
- void print_err(const string & str){
- cerr << "Error: " << str << endl;
- cerr << "Usage: ./a.out FILE [-a] [-f] [-o N]" << endl;
- }
-
- // Print final wordlist
- /// For alphabetic ordering
- void print_a(const std::vector<std::pair<string,int>> & wl, int max_length) {
- for_each(wl.begin(), wl.end(), [max_length](std::pair<string,int> & pair){
- cout << left << setw(max_length + 1)
- << pair.first << pair.second << endl;
- });
- }
-
- /// For frequency ordering
- void print_f(const std::vector<std::pair<string,int>> & wl, int max_length) {
- for_each(wl.begin(), wl.end(), [max_length](std::pair<string,int> & pair){
- cout << setw(max_length) << pair.first << " " << pair.second << endl;
- });
- }
-
- /// For -o argument
- void print_o(const std::vector<string> & list, int limit) {
- string line{};
- for_each(list.begin(), list.end(), [&line, limit](string word){
- if (int(line.length() + word.length()) >= limit - 1){
- cout << line << endl;
- line = word;
- }
- else {
- if (line == ""){
- line = word;
- }
- else {
- line += " " + word;
- }
- }
- });
- cout << line << endl;
- }
-
-
- /////////////////////////
- /// Utility functions ///
- /////////////////////////
-
- //TODO: This function does too much. Divide the tasks into more
- //functions which clearly state what they handle. For example:
- //remove_trash, is_valid, etc. --> done.
-
- //TODO: Enumerating the entire alphabet should be avoided since
- //missing a character is easy. Use the library cctype for this
- //instead. --> done.
-
- // Check if a given word is valid
- bool is_valid(const string & word) {
- return !(!std::all_of(word.begin(), word.end(), [](const char c){
- return isalpha(c) || c == '-';
- })
- || word.at(0) == '-' || word.at(word.length()-1) == '-'
- || int(word.length()) < 3
- || word.find("--") != std::string::npos);
- }
-
-
- bool cmp_length(const string & a, const string & b){
- return a.length() < b.length();
- }
-
- //Comment: Why unsigned long int? find_first.. will return an index in
- //the string. This index will not be too large for a regular int to
- //handle. --> Because it is the return type of these functions, else
- //GCC will do a strange convertion and put a warning at compilation:
- //
- //main.cc:103:15: warning: comparison of integer expressions of different signedness:
- // ‘int’ and ‘const size_type’ {aka ‘const long unsigned int’} [-Wsign-compare]
- //103 | if (begin != std::string::npos && end != std::string::npos && begin < end){
- //
-
- string remove_trash(string & word){
- // Here we remove head and tail junk. Lot of the complexity
- // is dues to weird cases like "(?!)" or ")?!("
- const long unsigned int begin = word.find_first_not_of("(\"\'");
- const long unsigned int end = word.find_last_not_of("!?;,:.\"\')");
- if (begin != std::string::npos && end != std::string::npos && begin < end){
- word.erase(0, begin);
- word.erase(word.find_last_not_of("!?;,:.\"\')") + 1);
- }
- // Manage the 's tail trash
- if (*(word.end()-2) == '\'' && *(word.end()-1) == 's')
- word.erase(word.end()-2, word.end());
- // Lowering case
- std::transform(word.begin(), word.end(), word.begin(), ::tolower);
- return word;
- }
-
-
- //TODO: See lab instructions for correct error message in case of
- //invalid input. --> done. Note : In some cases, the PDF does not
- //specify what should be displayed, in this case we have displayed
- //the message that suits us the most suitable
-
- int main(int argc, char* argv[]) {
-
- ////////////////////////////
- /// Arguments management ///
- ////////////////////////////
-
- /// Args parsing
- // Basic checks
- if (argc <= 1 ){
- print_err("No arguments given.");
- return 1;
- }
- else if (argc == 2){
- print_err("Second argument missing or invalid.");
- return 2;
- }
-
- // Checking file
- vector<string> args {argv, argv + argc};
- string file_name{args[1]};
-
- ifstream file{};
- file.open(file_name);
-
- if (! file.is_open()){
- print_err("Second argument missing or invalid.");
- return 3;
- }
-
- // Checking command
- string param{args[2][1]};
- int line_length{};
-
- if (args[2][0] != '-' || (param != "a" && param != "f" && param != "o")){
- print_err("Third argument invalid.");
- return 4;
- }
- else if (param == "o"){
- if (argc != 4) {
- print_err("You must specify a N value.");
- return 6;
- }
- try {
- line_length = stoi(args[3]);
- }
- catch (std::exception& e){
- print_err("Invalid N value. (" + string(e.what()) + ")");
- return 5;
- }
- }
-
- /////////////////////
- /// File analysis ///
- /////////////////////
-
- int max_length{0};
- istream_iterator<string> iis {file};
- istream_iterator<string> eof;
- std::vector<string> dirty_words{iis, eof};
- std::vector<string> all(dirty_words.size());
- std::map<string, int> collection;
-
-
- //TODO: Here you use for_each to emulate a for-loop. This is not
- //an appropriate algorithm for this task. What you want to do is
- //transform each element in dirty_words. Hint: Check out the
- //transform algorithm. See TODO about dividing the purifier
- //function before fixing this TODO. --> done.
-
- //TODO: This algorithm does too much. Break it down into more
- //appropriate algorithms. For example, use std::max_element to
- //find the largest element, etc. --> done.
-
- std::transform(dirty_words.begin(), dirty_words.end(), dirty_words.begin(), remove_trash);
- std::copy_if(dirty_words.begin(), dirty_words.end(), all.begin(), is_valid);
- max_length = (*std::max_element(all.begin(), all.end(), cmp_length)).length();
-
- // Counting word in a std::map
- std::for_each(all.begin(), all.end(), [&collection](string word) {
- if (word != "")
- collection[word]++;
- });
-
-
- // Converting map collection into vector wordlist to allow sorting
- std::vector<std::pair<string,int>> wordlist;
- for_each(collection.begin(), collection.end(), [&wordlist](auto pair){
- wordlist.push_back(pair);
- });
-
-
- // Output selection and sorting (if needed)
- if (param == "f"){
- std::sort(wordlist.begin(), wordlist.end(),
- [](std::pair<string,int> const & a, std::pair<string,int> const & b)
- {
- return a.second > b.second;
- });
- print_f(wordlist, max_length);
- }
- else if (param == "a"){
- std::sort(wordlist.begin(), wordlist.end(),
- [](std::pair<string,int> const & a, std::pair<string,int> const & b)
- {
- return a.first < b.first;
- });
- print_a(wordlist, max_length);
- }
- else {
- print_o(all, line_length);
- }
-
-
- return 0;
- }
|