|
@@ -44,9 +44,17 @@ Data *init_data()
|
44
|
44
|
|
45
|
45
|
void add_sample_to_data(Data *data, Sample *sample)
|
46
|
46
|
{
|
47
|
|
- data->last_sample->next_sample = sample;
|
48
|
|
- data->last_sample = sample;
|
49
|
|
- data->size++;
|
|
47
|
+ if(data->size == 0)
|
|
48
|
+ {
|
|
49
|
+ data->first_sample = sample;
|
|
50
|
+ data->last_sample = sample;
|
|
51
|
+ data->size++;
|
|
52
|
+ }else
|
|
53
|
+ {
|
|
54
|
+ data->last_sample->next_sample = sample;
|
|
55
|
+ data->last_sample = sample;
|
|
56
|
+ data->size++;
|
|
57
|
+ }
|
50
|
58
|
}
|
51
|
59
|
|
52
|
60
|
void destroy_sample(Sample *sample)
|
|
@@ -86,19 +94,60 @@ Data *csv_to_samples(char *path_to_csv, int n_features, char *features_separator
|
86
|
94
|
FILE *file = fopen(path_to_csv, "r");
|
87
|
95
|
char line[100], *dup;
|
88
|
96
|
char *token;
|
89
|
|
- //float val;
|
|
97
|
+ int features_count;
|
90
|
98
|
if(file != NULL)
|
91
|
99
|
{
|
92
|
100
|
while(fgets(line, 100, file) != NULL)
|
93
|
101
|
{
|
|
102
|
+ Sample *current_sample = init_sample();
|
|
103
|
+ Feature *temp_last_feature;
|
|
104
|
+ features_count = n_features;
|
94
|
105
|
dup = strtok(line, "\n"); //extracting line content without '\n'
|
95
|
106
|
token = strtok(dup, features_separator);
|
96
|
107
|
while(token != NULL)
|
97
|
108
|
{
|
98
|
|
- printf("current token = %s\n", token);
|
|
109
|
+ if(features_count > 0) //first put features into current sample
|
|
110
|
+ {
|
|
111
|
+ if(current_sample->first_feature == NULL)
|
|
112
|
+ {
|
|
113
|
+ current_sample->first_feature = init_feature( atof(token) );
|
|
114
|
+ temp_last_feature = current_sample->first_feature;
|
|
115
|
+ }else
|
|
116
|
+ {
|
|
117
|
+ temp_last_feature->next_feature = init_feature( atof(token) );
|
|
118
|
+ temp_last_feature = temp_last_feature->next_feature;
|
|
119
|
+ }
|
|
120
|
+ features_count--;
|
|
121
|
+ }else //put label into current sample
|
|
122
|
+ {
|
|
123
|
+ if(strcmp(apply_onthot_encode, "yes") == 0)
|
|
124
|
+ {
|
|
125
|
+ OneHotLabel *temp_last_hotlabel;
|
|
126
|
+ int i;
|
|
127
|
+ for(i=0 ; i<n_classes ; i++)
|
|
128
|
+ {
|
|
129
|
+ if(current_sample->first_hot == NULL)
|
|
130
|
+ {
|
|
131
|
+ current_sample->first_hot = init_onehotlabel(0.0);
|
|
132
|
+ temp_last_hotlabel = current_sample->first_hot;
|
|
133
|
+ }else
|
|
134
|
+ {
|
|
135
|
+ temp_last_hotlabel->next = init_onehotlabel(0.0);
|
|
136
|
+ temp_last_hotlabel = temp_last_hotlabel->next;
|
|
137
|
+ }
|
|
138
|
+ if((float)i == atof(token) )
|
|
139
|
+ {
|
|
140
|
+ temp_last_hotlabel->value = 1.0;
|
|
141
|
+ }
|
|
142
|
+ }
|
|
143
|
+ }else //when not applying one hot encoding, first_hot is directly equal to label
|
|
144
|
+ {
|
|
145
|
+ current_sample->first_hot = init_onehotlabel( atof(token) );
|
|
146
|
+ }
|
|
147
|
+ }
|
99
|
148
|
token = strtok(NULL, features_separator);
|
100
|
149
|
}
|
101
|
|
- printf("----------------\n");
|
|
150
|
+ add_sample_to_data(data, current_sample);
|
102
|
151
|
}
|
103
|
152
|
fclose(file);
|
104
|
153
|
}else
|
|
@@ -111,5 +160,52 @@ Data *csv_to_samples(char *path_to_csv, int n_features, char *features_separator
|
111
|
160
|
|
112
|
161
|
void print_data(const Data *data)
|
113
|
162
|
{
|
114
|
|
- printf("Hello\n");
|
|
163
|
+ Sample *current_sample = data->first_sample;
|
|
164
|
+ Feature *temp_feature;
|
|
165
|
+ OneHotLabel *temp_hotlabel;
|
|
166
|
+ printf("#=============================================#\n");
|
|
167
|
+ if(current_sample != NULL)
|
|
168
|
+ {
|
|
169
|
+ int count = 1;
|
|
170
|
+ while(current_sample != NULL)
|
|
171
|
+ {
|
|
172
|
+ printf("Sample %d : ", count);
|
|
173
|
+ if(current_sample->t == TRAIN)
|
|
174
|
+ {
|
|
175
|
+ printf("TR\n");
|
|
176
|
+ }else if(current_sample->t == TEST)
|
|
177
|
+ {
|
|
178
|
+ printf("TE\n");
|
|
179
|
+ }else
|
|
180
|
+ {
|
|
181
|
+ printf("VA\n");
|
|
182
|
+ }
|
|
183
|
+ temp_feature = current_sample->first_feature;
|
|
184
|
+ while(temp_feature != NULL)
|
|
185
|
+ {
|
|
186
|
+ printf("%f ", temp_feature->value);
|
|
187
|
+ temp_feature = temp_feature->next_feature;
|
|
188
|
+ }
|
|
189
|
+ temp_hotlabel = current_sample->first_hot;
|
|
190
|
+ printf("##> ");
|
|
191
|
+ while(temp_hotlabel != NULL)
|
|
192
|
+ {
|
|
193
|
+ printf("%f ", temp_hotlabel->value);
|
|
194
|
+ temp_hotlabel = temp_hotlabel->next;
|
|
195
|
+ }
|
|
196
|
+ current_sample = current_sample->next_sample;
|
|
197
|
+ if(current_sample != NULL)
|
|
198
|
+ {
|
|
199
|
+ printf("\n#---------------------------------------------#\n");
|
|
200
|
+ }else
|
|
201
|
+ {
|
|
202
|
+ printf("\n");
|
|
203
|
+ }
|
|
204
|
+ count++;
|
|
205
|
+ }
|
|
206
|
+ }else
|
|
207
|
+ {
|
|
208
|
+ printf("Nothing to print : empty data !!!\n");
|
|
209
|
+ }
|
|
210
|
+ printf("#=============================================#\n");
|
115
|
211
|
}
|