TDDE18CppCourse/lab5/main.cc
2021-08-22 13:21:37 +02:00

243 lines
7.3 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#include <iostream>
#include <fstream>
#include <iomanip>
#include <algorithm>
#include <iterator>
#include <vector>
#include <map>
#include <cctype>
/*
Lab 5 - TDDE18
Wordlists lab - Main file
*/
using namespace std;
//////////////////////////////////
/// User interaction functions ///
//////////////////////////////////
//TODO: Non fundamental types taken as parameters should not be copied
//as they might be large. Parameters which are not changed should also
//be const. --> done.
// Help function
void print_err(const string & str){
cerr << "Error: " << str << endl;
cerr << "Usage: ./a.out FILE [-a] [-f] [-o N]" << endl;
}
// Print final wordlist
/// For alphabetic ordering
void print_a(const std::vector<std::pair<string,int>> & wl, int max_length) {
for_each(wl.begin(), wl.end(), [max_length](std::pair<string,int> & pair){
cout << left << setw(max_length + 1)
<< pair.first << pair.second << endl;
});
}
/// For frequency ordering
void print_f(const std::vector<std::pair<string,int>> & wl, int max_length) {
for_each(wl.begin(), wl.end(), [max_length](std::pair<string,int> & pair){
cout << setw(max_length) << pair.first << " " << pair.second << endl;
});
}
/// For -o argument
void print_o(const std::vector<string> & list, int limit) {
string line{};
for_each(list.begin(), list.end(), [&line, limit](string word){
if (int(line.length() + word.length()) >= limit - 1){
cout << line << endl;
line = word;
}
else {
if (line == ""){
line = word;
}
else {
line += " " + word;
}
}
});
cout << line << endl;
}
/////////////////////////
/// Utility functions ///
/////////////////////////
//TODO: This function does too much. Divide the tasks into more
//functions which clearly state what they handle. For example:
//remove_trash, is_valid, etc. --> done.
//TODO: Enumerating the entire alphabet should be avoided since
//missing a character is easy. Use the library cctype for this
//instead. --> done.
// Check if a given word is valid
bool is_valid(const string & word) {
return !(!std::all_of(word.begin(), word.end(), [](const char c){
return isalpha(c) || c == '-';
})
|| word.at(0) == '-' || word.at(word.length()-1) == '-'
|| int(word.length()) < 3
|| word.find("--") != std::string::npos);
}
bool cmp_length(const string & a, const string & b){
return a.length() < b.length();
}
//Comment: Why unsigned long int? find_first.. will return an index in
//the string. This index will not be too large for a regular int to
//handle. --> Because it is the return type of these functions, else
//GCC will do a strange convertion and put a warning at compilation:
//
//main.cc:103:15: warning: comparison of integer expressions of different signedness:
// int and const size_type {aka const long unsigned int} [-Wsign-compare]
//103 | if (begin != std::string::npos && end != std::string::npos && begin < end){
//
string remove_trash(string & word){
// Here we remove head and tail junk. Lot of the complexity
// is dues to weird cases like "(?!)" or ")?!("
const long unsigned int begin = word.find_first_not_of("(\"\'");
const long unsigned int end = word.find_last_not_of("!?;,:.\"\')");
if (begin != std::string::npos && end != std::string::npos && begin < end){
word.erase(0, begin);
word.erase(word.find_last_not_of("!?;,:.\"\')") + 1);
}
// Manage the 's tail trash
if (*(word.end()-2) == '\'' && *(word.end()-1) == 's')
word.erase(word.end()-2, word.end());
// Lowering case
std::transform(word.begin(), word.end(), word.begin(), ::tolower);
return word;
}
//TODO: See lab instructions for correct error message in case of
//invalid input. --> done. Note : In some cases, the PDF does not
//specify what should be displayed, in this case we have displayed
//the message that suits us the most suitable
int main(int argc, char* argv[]) {
////////////////////////////
/// Arguments management ///
////////////////////////////
/// Args parsing
// Basic checks
if (argc <= 1 ){
print_err("No arguments given.");
return 1;
}
else if (argc == 2){
print_err("Second argument missing or invalid.");
return 2;
}
// Checking file
vector<string> args {argv, argv + argc};
string file_name{args[1]};
ifstream file{};
file.open(file_name);
if (! file.is_open()){
print_err("Second argument missing or invalid.");
return 3;
}
// Checking command
string param{args[2][1]};
int line_length{};
if (args[2][0] != '-' || (param != "a" && param != "f" && param != "o")){
print_err("Third argument invalid.");
return 4;
}
else if (param == "o"){
if (argc != 4) {
print_err("You must specify a N value.");
return 6;
}
try {
line_length = stoi(args[3]);
}
catch (std::exception& e){
print_err("Invalid N value. (" + string(e.what()) + ")");
return 5;
}
}
/////////////////////
/// File analysis ///
/////////////////////
int max_length{0};
istream_iterator<string> iis {file};
istream_iterator<string> eof;
std::vector<string> dirty_words{iis, eof};
std::vector<string> all(dirty_words.size());
std::map<string, int> collection;
//TODO: Here you use for_each to emulate a for-loop. This is not
//an appropriate algorithm for this task. What you want to do is
//transform each element in dirty_words. Hint: Check out the
//transform algorithm. See TODO about dividing the purifier
//function before fixing this TODO. --> done.
//TODO: This algorithm does too much. Break it down into more
//appropriate algorithms. For example, use std::max_element to
//find the largest element, etc. --> done.
std::transform(dirty_words.begin(), dirty_words.end(), dirty_words.begin(), remove_trash);
std::copy_if(dirty_words.begin(), dirty_words.end(), all.begin(), is_valid);
max_length = (*std::max_element(all.begin(), all.end(), cmp_length)).length();
// Counting word in a std::map
std::for_each(all.begin(), all.end(), [&collection](string word) {
if (word != "")
collection[word]++;
});
// Converting map collection into vector wordlist to allow sorting
std::vector<std::pair<string,int>> wordlist;
for_each(collection.begin(), collection.end(), [&wordlist](auto pair){
wordlist.push_back(pair);
});
// Output selection and sorting (if needed)
if (param == "f"){
std::sort(wordlist.begin(), wordlist.end(),
[](std::pair<string,int> const & a, std::pair<string,int> const & b)
{
return a.second > b.second;
});
print_f(wordlist, max_length);
}
else if (param == "a"){
std::sort(wordlist.begin(), wordlist.end(),
[](std::pair<string,int> const & a, std::pair<string,int> const & b)
{
return a.first < b.first;
});
print_a(wordlist, max_length);
}
else {
print_o(all, line_length);
}
return 0;
}