243 lines
7.3 KiB
C++
243 lines
7.3 KiB
C++
#include <iostream>
|
||
#include <fstream>
|
||
#include <iomanip>
|
||
#include <algorithm>
|
||
#include <iterator>
|
||
#include <vector>
|
||
#include <map>
|
||
#include <cctype>
|
||
|
||
|
||
/*
|
||
Lab 5 - TDDE18
|
||
Wordlists lab - Main file
|
||
*/
|
||
|
||
using namespace std;
|
||
|
||
//////////////////////////////////
|
||
/// User interaction functions ///
|
||
//////////////////////////////////
|
||
|
||
//TODO: Non fundamental types taken as parameters should not be copied
|
||
//as they might be large. Parameters which are not changed should also
|
||
//be const. --> done.
|
||
|
||
// Help function
|
||
void print_err(const string & str){
|
||
cerr << "Error: " << str << endl;
|
||
cerr << "Usage: ./a.out FILE [-a] [-f] [-o N]" << endl;
|
||
}
|
||
|
||
// Print final wordlist
|
||
/// For alphabetic ordering
|
||
void print_a(const std::vector<std::pair<string,int>> & wl, int max_length) {
|
||
for_each(wl.begin(), wl.end(), [max_length](std::pair<string,int> & pair){
|
||
cout << left << setw(max_length + 1)
|
||
<< pair.first << pair.second << endl;
|
||
});
|
||
}
|
||
|
||
/// For frequency ordering
|
||
void print_f(const std::vector<std::pair<string,int>> & wl, int max_length) {
|
||
for_each(wl.begin(), wl.end(), [max_length](std::pair<string,int> & pair){
|
||
cout << setw(max_length) << pair.first << " " << pair.second << endl;
|
||
});
|
||
}
|
||
|
||
/// For -o argument
|
||
void print_o(const std::vector<string> & list, int limit) {
|
||
string line{};
|
||
for_each(list.begin(), list.end(), [&line, limit](string word){
|
||
if (int(line.length() + word.length()) >= limit - 1){
|
||
cout << line << endl;
|
||
line = word;
|
||
}
|
||
else {
|
||
if (line == ""){
|
||
line = word;
|
||
}
|
||
else {
|
||
line += " " + word;
|
||
}
|
||
}
|
||
});
|
||
cout << line << endl;
|
||
}
|
||
|
||
|
||
/////////////////////////
|
||
/// Utility functions ///
|
||
/////////////////////////
|
||
|
||
//TODO: This function does too much. Divide the tasks into more
|
||
//functions which clearly state what they handle. For example:
|
||
//remove_trash, is_valid, etc. --> done.
|
||
|
||
//TODO: Enumerating the entire alphabet should be avoided since
|
||
//missing a character is easy. Use the library cctype for this
|
||
//instead. --> done.
|
||
|
||
// Check if a given word is valid
|
||
bool is_valid(const string & word) {
|
||
return !(!std::all_of(word.begin(), word.end(), [](const char c){
|
||
return isalpha(c) || c == '-';
|
||
})
|
||
|| word.at(0) == '-' || word.at(word.length()-1) == '-'
|
||
|| int(word.length()) < 3
|
||
|| word.find("--") != std::string::npos);
|
||
}
|
||
|
||
|
||
bool cmp_length(const string & a, const string & b){
|
||
return a.length() < b.length();
|
||
}
|
||
|
||
//Comment: Why unsigned long int? find_first.. will return an index in
|
||
//the string. This index will not be too large for a regular int to
|
||
//handle. --> Because it is the return type of these functions, else
|
||
//GCC will do a strange convertion and put a warning at compilation:
|
||
//
|
||
//main.cc:103:15: warning: comparison of integer expressions of different signedness:
|
||
// ‘int’ and ‘const size_type’ {aka ‘const long unsigned int’} [-Wsign-compare]
|
||
//103 | if (begin != std::string::npos && end != std::string::npos && begin < end){
|
||
//
|
||
|
||
string remove_trash(string & word){
|
||
// Here we remove head and tail junk. Lot of the complexity
|
||
// is dues to weird cases like "(?!)" or ")?!("
|
||
const long unsigned int begin = word.find_first_not_of("(\"\'");
|
||
const long unsigned int end = word.find_last_not_of("!?;,:.\"\')");
|
||
if (begin != std::string::npos && end != std::string::npos && begin < end){
|
||
word.erase(0, begin);
|
||
word.erase(word.find_last_not_of("!?;,:.\"\')") + 1);
|
||
}
|
||
// Manage the 's tail trash
|
||
if (*(word.end()-2) == '\'' && *(word.end()-1) == 's')
|
||
word.erase(word.end()-2, word.end());
|
||
// Lowering case
|
||
std::transform(word.begin(), word.end(), word.begin(), ::tolower);
|
||
return word;
|
||
}
|
||
|
||
|
||
//TODO: See lab instructions for correct error message in case of
|
||
//invalid input. --> done. Note : In some cases, the PDF does not
|
||
//specify what should be displayed, in this case we have displayed
|
||
//the message that suits us the most suitable
|
||
|
||
int main(int argc, char* argv[]) {
|
||
|
||
////////////////////////////
|
||
/// Arguments management ///
|
||
////////////////////////////
|
||
|
||
/// Args parsing
|
||
// Basic checks
|
||
if (argc <= 1 ){
|
||
print_err("No arguments given.");
|
||
return 1;
|
||
}
|
||
else if (argc == 2){
|
||
print_err("Second argument missing or invalid.");
|
||
return 2;
|
||
}
|
||
|
||
// Checking file
|
||
vector<string> args {argv, argv + argc};
|
||
string file_name{args[1]};
|
||
|
||
ifstream file{};
|
||
file.open(file_name);
|
||
|
||
if (! file.is_open()){
|
||
print_err("Second argument missing or invalid.");
|
||
return 3;
|
||
}
|
||
|
||
// Checking command
|
||
string param{args[2][1]};
|
||
int line_length{};
|
||
|
||
if (args[2][0] != '-' || (param != "a" && param != "f" && param != "o")){
|
||
print_err("Third argument invalid.");
|
||
return 4;
|
||
}
|
||
else if (param == "o"){
|
||
if (argc != 4) {
|
||
print_err("You must specify a N value.");
|
||
return 6;
|
||
}
|
||
try {
|
||
line_length = stoi(args[3]);
|
||
}
|
||
catch (std::exception& e){
|
||
print_err("Invalid N value. (" + string(e.what()) + ")");
|
||
return 5;
|
||
}
|
||
}
|
||
|
||
/////////////////////
|
||
/// File analysis ///
|
||
/////////////////////
|
||
|
||
int max_length{0};
|
||
istream_iterator<string> iis {file};
|
||
istream_iterator<string> eof;
|
||
std::vector<string> dirty_words{iis, eof};
|
||
std::vector<string> all(dirty_words.size());
|
||
std::map<string, int> collection;
|
||
|
||
|
||
//TODO: Here you use for_each to emulate a for-loop. This is not
|
||
//an appropriate algorithm for this task. What you want to do is
|
||
//transform each element in dirty_words. Hint: Check out the
|
||
//transform algorithm. See TODO about dividing the purifier
|
||
//function before fixing this TODO. --> done.
|
||
|
||
//TODO: This algorithm does too much. Break it down into more
|
||
//appropriate algorithms. For example, use std::max_element to
|
||
//find the largest element, etc. --> done.
|
||
|
||
std::transform(dirty_words.begin(), dirty_words.end(), dirty_words.begin(), remove_trash);
|
||
std::copy_if(dirty_words.begin(), dirty_words.end(), all.begin(), is_valid);
|
||
max_length = (*std::max_element(all.begin(), all.end(), cmp_length)).length();
|
||
|
||
// Counting word in a std::map
|
||
std::for_each(all.begin(), all.end(), [&collection](string word) {
|
||
if (word != "")
|
||
collection[word]++;
|
||
});
|
||
|
||
|
||
// Converting map collection into vector wordlist to allow sorting
|
||
std::vector<std::pair<string,int>> wordlist;
|
||
for_each(collection.begin(), collection.end(), [&wordlist](auto pair){
|
||
wordlist.push_back(pair);
|
||
});
|
||
|
||
|
||
// Output selection and sorting (if needed)
|
||
if (param == "f"){
|
||
std::sort(wordlist.begin(), wordlist.end(),
|
||
[](std::pair<string,int> const & a, std::pair<string,int> const & b)
|
||
{
|
||
return a.second > b.second;
|
||
});
|
||
print_f(wordlist, max_length);
|
||
}
|
||
else if (param == "a"){
|
||
std::sort(wordlist.begin(), wordlist.end(),
|
||
[](std::pair<string,int> const & a, std::pair<string,int> const & b)
|
||
{
|
||
return a.first < b.first;
|
||
});
|
||
print_a(wordlist, max_length);
|
||
}
|
||
else {
|
||
print_o(all, line_length);
|
||
}
|
||
|
||
|
||
return 0;
|
||
}
|