115 lines
No EOL
2.5 KiB
C
115 lines
No EOL
2.5 KiB
C
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include "preprocessing.h"
|
|
|
|
Feature *init_feature(float value)
|
|
{
|
|
Feature *feature = (Feature*)malloc(sizeof(Feature));
|
|
feature->value = value;
|
|
feature->next_feature = NULL;
|
|
|
|
return feature;
|
|
}
|
|
|
|
OneHotLabel *init_onehotlabel(float value)
|
|
{
|
|
OneHotLabel *hot = (OneHotLabel*)malloc(sizeof(OneHotLabel));
|
|
hot->value = value;
|
|
hot->next = NULL;
|
|
|
|
return hot;
|
|
}
|
|
|
|
Sample *init_sample()
|
|
{
|
|
Sample *sample = (Sample*)malloc(sizeof(Sample));
|
|
sample->first_feature = NULL;
|
|
sample->first_hot = NULL;
|
|
sample->next_sample = NULL;
|
|
sample->t = TRAIN;
|
|
|
|
return sample;
|
|
}
|
|
|
|
Data *init_data()
|
|
{
|
|
Data *data = (Data*)malloc(sizeof(Data));
|
|
data->first_sample = NULL;
|
|
data->last_sample = NULL;
|
|
data->size = 0;
|
|
|
|
return data;
|
|
}
|
|
|
|
void add_sample_to_data(Data *data, Sample *sample)
|
|
{
|
|
data->last_sample->next_sample = sample;
|
|
data->last_sample = sample;
|
|
data->size++;
|
|
}
|
|
|
|
void destroy_sample(Sample *sample)
|
|
{
|
|
Feature *temp1;
|
|
while (sample->first_feature != NULL)
|
|
{
|
|
temp1 = sample->first_feature;
|
|
sample->first_feature = sample->first_feature->next_feature;
|
|
free(temp1);
|
|
}
|
|
OneHotLabel *temp2;
|
|
while (sample->first_hot != NULL)
|
|
{
|
|
temp2 = sample->first_hot;
|
|
sample->first_hot = sample->first_hot->next;
|
|
free(temp2);
|
|
}
|
|
free(sample);
|
|
}
|
|
|
|
void destroy_data(Data *data)
|
|
{
|
|
Sample *temp;
|
|
while (data->first_sample != NULL)
|
|
{
|
|
temp = data->first_sample;
|
|
data->first_sample = data->first_sample->next_sample;
|
|
destroy_sample(temp);
|
|
}
|
|
free(data);
|
|
}
|
|
|
|
Data *csv_to_samples(char *path_to_csv, int n_features, char *features_separator, float train_percent, char *apply_onthot_encode, int n_classes)
|
|
{
|
|
Data *data = init_data();
|
|
FILE *file = fopen(path_to_csv, "r");
|
|
char line[100], *dup;
|
|
char *token;
|
|
//float val;
|
|
if(file != NULL)
|
|
{
|
|
while(fgets(line, 100, file) != NULL)
|
|
{
|
|
dup = strtok(line, "\n"); //extracting line content without '\n'
|
|
token = strtok(dup, features_separator);
|
|
while(token != NULL)
|
|
{
|
|
printf("current token = %s\n", token);
|
|
token = strtok(NULL, features_separator);
|
|
}
|
|
printf("----------------\n");
|
|
}
|
|
fclose(file);
|
|
}else
|
|
{
|
|
printf("Unable to open the file\n");
|
|
exit(-1);
|
|
}
|
|
return data;
|
|
}
|
|
|
|
void print_data(const Data *data)
|
|
{
|
|
printf("Hello\n");
|
|
} |