Sharing of my labs carried out during the TDDE18 course at Linköping University
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

main.cc 7.3KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243
  1. #include <iostream>
  2. #include <fstream>
  3. #include <iomanip>
  4. #include <algorithm>
  5. #include <iterator>
  6. #include <vector>
  7. #include <map>
  8. #include <cctype>
  9. /*
  10. Lab 5 - TDDE18
  11. Wordlists lab - Main file
  12. */
  13. using namespace std;
  14. //////////////////////////////////
  15. /// User interaction functions ///
  16. //////////////////////////////////
  17. //TODO: Non fundamental types taken as parameters should not be copied
  18. //as they might be large. Parameters which are not changed should also
  19. //be const. --> done.
  20. // Help function
  21. void print_err(const string & str){
  22. cerr << "Error: " << str << endl;
  23. cerr << "Usage: ./a.out FILE [-a] [-f] [-o N]" << endl;
  24. }
  25. // Print final wordlist
  26. /// For alphabetic ordering
  27. void print_a(const std::vector<std::pair<string,int>> & wl, int max_length) {
  28. for_each(wl.begin(), wl.end(), [max_length](std::pair<string,int> & pair){
  29. cout << left << setw(max_length + 1)
  30. << pair.first << pair.second << endl;
  31. });
  32. }
  33. /// For frequency ordering
  34. void print_f(const std::vector<std::pair<string,int>> & wl, int max_length) {
  35. for_each(wl.begin(), wl.end(), [max_length](std::pair<string,int> & pair){
  36. cout << setw(max_length) << pair.first << " " << pair.second << endl;
  37. });
  38. }
  39. /// For -o argument
  40. void print_o(const std::vector<string> & list, int limit) {
  41. string line{};
  42. for_each(list.begin(), list.end(), [&line, limit](string word){
  43. if (int(line.length() + word.length()) >= limit - 1){
  44. cout << line << endl;
  45. line = word;
  46. }
  47. else {
  48. if (line == ""){
  49. line = word;
  50. }
  51. else {
  52. line += " " + word;
  53. }
  54. }
  55. });
  56. cout << line << endl;
  57. }
  58. /////////////////////////
  59. /// Utility functions ///
  60. /////////////////////////
  61. //TODO: This function does too much. Divide the tasks into more
  62. //functions which clearly state what they handle. For example:
  63. //remove_trash, is_valid, etc. --> done.
  64. //TODO: Enumerating the entire alphabet should be avoided since
  65. //missing a character is easy. Use the library cctype for this
  66. //instead. --> done.
  67. // Check if a given word is valid
  68. bool is_valid(const string & word) {
  69. return !(!std::all_of(word.begin(), word.end(), [](const char c){
  70. return isalpha(c) || c == '-';
  71. })
  72. || word.at(0) == '-' || word.at(word.length()-1) == '-'
  73. || int(word.length()) < 3
  74. || word.find("--") != std::string::npos);
  75. }
  76. bool cmp_length(const string & a, const string & b){
  77. return a.length() < b.length();
  78. }
  79. //Comment: Why unsigned long int? find_first.. will return an index in
  80. //the string. This index will not be too large for a regular int to
  81. //handle. --> Because it is the return type of these functions, else
  82. //GCC will do a strange convertion and put a warning at compilation:
  83. //
  84. //main.cc:103:15: warning: comparison of integer expressions of different signedness:
  85. // ‘int’ and ‘const size_type’ {aka ‘const long unsigned int’} [-Wsign-compare]
  86. //103 | if (begin != std::string::npos && end != std::string::npos && begin < end){
  87. //
  88. string remove_trash(string & word){
  89. // Here we remove head and tail junk. Lot of the complexity
  90. // is dues to weird cases like "(?!)" or ")?!("
  91. const long unsigned int begin = word.find_first_not_of("(\"\'");
  92. const long unsigned int end = word.find_last_not_of("!?;,:.\"\')");
  93. if (begin != std::string::npos && end != std::string::npos && begin < end){
  94. word.erase(0, begin);
  95. word.erase(word.find_last_not_of("!?;,:.\"\')") + 1);
  96. }
  97. // Manage the 's tail trash
  98. if (*(word.end()-2) == '\'' && *(word.end()-1) == 's')
  99. word.erase(word.end()-2, word.end());
  100. // Lowering case
  101. std::transform(word.begin(), word.end(), word.begin(), ::tolower);
  102. return word;
  103. }
  104. //TODO: See lab instructions for correct error message in case of
  105. //invalid input. --> done. Note : In some cases, the PDF does not
  106. //specify what should be displayed, in this case we have displayed
  107. //the message that suits us the most suitable
  108. int main(int argc, char* argv[]) {
  109. ////////////////////////////
  110. /// Arguments management ///
  111. ////////////////////////////
  112. /// Args parsing
  113. // Basic checks
  114. if (argc <= 1 ){
  115. print_err("No arguments given.");
  116. return 1;
  117. }
  118. else if (argc == 2){
  119. print_err("Second argument missing or invalid.");
  120. return 2;
  121. }
  122. // Checking file
  123. vector<string> args {argv, argv + argc};
  124. string file_name{args[1]};
  125. ifstream file{};
  126. file.open(file_name);
  127. if (! file.is_open()){
  128. print_err("Second argument missing or invalid.");
  129. return 3;
  130. }
  131. // Checking command
  132. string param{args[2][1]};
  133. int line_length{};
  134. if (args[2][0] != '-' || (param != "a" && param != "f" && param != "o")){
  135. print_err("Third argument invalid.");
  136. return 4;
  137. }
  138. else if (param == "o"){
  139. if (argc != 4) {
  140. print_err("You must specify a N value.");
  141. return 6;
  142. }
  143. try {
  144. line_length = stoi(args[3]);
  145. }
  146. catch (std::exception& e){
  147. print_err("Invalid N value. (" + string(e.what()) + ")");
  148. return 5;
  149. }
  150. }
  151. /////////////////////
  152. /// File analysis ///
  153. /////////////////////
  154. int max_length{0};
  155. istream_iterator<string> iis {file};
  156. istream_iterator<string> eof;
  157. std::vector<string> dirty_words{iis, eof};
  158. std::vector<string> all(dirty_words.size());
  159. std::map<string, int> collection;
  160. //TODO: Here you use for_each to emulate a for-loop. This is not
  161. //an appropriate algorithm for this task. What you want to do is
  162. //transform each element in dirty_words. Hint: Check out the
  163. //transform algorithm. See TODO about dividing the purifier
  164. //function before fixing this TODO. --> done.
  165. //TODO: This algorithm does too much. Break it down into more
  166. //appropriate algorithms. For example, use std::max_element to
  167. //find the largest element, etc. --> done.
  168. std::transform(dirty_words.begin(), dirty_words.end(), dirty_words.begin(), remove_trash);
  169. std::copy_if(dirty_words.begin(), dirty_words.end(), all.begin(), is_valid);
  170. max_length = (*std::max_element(all.begin(), all.end(), cmp_length)).length();
  171. // Counting word in a std::map
  172. std::for_each(all.begin(), all.end(), [&collection](string word) {
  173. if (word != "")
  174. collection[word]++;
  175. });
  176. // Converting map collection into vector wordlist to allow sorting
  177. std::vector<std::pair<string,int>> wordlist;
  178. for_each(collection.begin(), collection.end(), [&wordlist](auto pair){
  179. wordlist.push_back(pair);
  180. });
  181. // Output selection and sorting (if needed)
  182. if (param == "f"){
  183. std::sort(wordlist.begin(), wordlist.end(),
  184. [](std::pair<string,int> const & a, std::pair<string,int> const & b)
  185. {
  186. return a.second > b.second;
  187. });
  188. print_f(wordlist, max_length);
  189. }
  190. else if (param == "a"){
  191. std::sort(wordlist.begin(), wordlist.end(),
  192. [](std::pair<string,int> const & a, std::pair<string,int> const & b)
  193. {
  194. return a.first < b.first;
  195. });
  196. print_a(wordlist, max_length);
  197. }
  198. else {
  199. print_o(all, line_length);
  200. }
  201. return 0;
  202. }