Ajout dataset artificiels

This commit is contained in:
Paul Faure 2022-01-09 10:35:05 +01:00
parent a41d8033c5
commit b9da01d3e0
14 changed files with 115721 additions and 89 deletions

View file

@ -21,6 +21,15 @@ def extract_data_3d(data_path):
return np.array([[x[0], x[1], x[2]] for x in databrut[0]])
def extract_data_txt(data_path):
databrut = open(data_path + ".txt").readlines()
ret = []
for x in databrut:
line = list(map(float, x.split()))
ret += [line]
return np.array(ret)
def scale_data(data):
scaler = preprocessing.StandardScaler()
return scaler.fit_transform(data)

View file

@ -0,0 +1,16 @@
84 152 100 52 96 186 169 107 36 186 139 200 100 104 88 114 54 186 77 152 155 70 188 65 214 116 75 55 124 65 154 177
65 113 75 199 203 126 122 165 167 199 76 186 109 165 141 66 74 109 162 168 85 195 209 203 122 119 112 50 215 110 118 152
153 106 172 119 203 132 158 59 121 99 33 194 53 196 143 33 134 70 144 168 52 53 216 46 61 66 53 181 54 56 67 73
88 50 94 163 35 152 212 80 62 39 182 114 148 70 132 167 118 165 108 184 207 40 60 215 175 117 67 143 160 216 112 145
141 206 77 151 182 149 160 88 77 119 199 106 100 204 37 113 179 171 187 81 79 212 204 156 180 51 33 199 68 198 69 120
73 213 76 166 152 116 164 180 144 218 74 195 136 46 207 93 57 33 169 73 147 77 50 58 181 198 115 142 204 208 107 86
122 69 159 211 167 127 172 198 114 187 38 94 196 208 196 89 187 86 133 215 123 69 84 213 192 167 165 135 45 149 187 97
36 121 105 154 35 219 178 34 77 51 183 121 51 91 181 123 215 154 102 108 180 56 135 138 97 52 121 147 214 62 153 54
110 74 168 57 193 104 82 64 68 46 48 88 103 59 188 100 191 166 85 76 96 208 62 211 84 209 160 160 182 107 52 196
89 58 42 76 171 104 61 221 146 158 204 180 172 200 49 217 108 91 101 46 137 131 36 185 186 190 161 212 57 124 67 97
43 84 211 84 54 132 69 156 96 187 185 191 76 82 85 41 147 198 42 58 77 78 46 169 43 137 198 34 198 66 71 42
120 142 54 161 108 54 67 195 61 208 75 186 59 82 105 121 52 162 136 167 63 99 114 213 73 207 133 48 157 99 60 83
83 53 168 145 181 90 168 135 33 185 155 129 128 83 36 198 142 220 144 196 212 36 184 61 34 35 41 57 169 110 208 53
144 203 63 87 182 84 155 206 158 178 82 44 206 201 194 146 157 73 182 159 53 146 110 174 160 101 83 198 93 217 129 213
73 48 70 139 75 173 184 77 64 35 165 110 191 156 91 139 182 124 66 74 144 221 35 179 65 105 116 118 61 175 49 208
105 53 169 77 188 142 155 181 128 63 215 129 95 62 94 161 216 116 91 122 194 68 196 192 114 102 58 78 80 132 97 75

View file

@ -0,0 +1,16 @@
167 102 189 123 155 52 73 81 91 108 208 38 195 46 110 187 159 146 212 66 138 42 44 132 151 185 172 211 183 141 85 214 112 175 42 167 113 121 218 58 52 195 216 76 42 186 65 92 126 212 185 43 216 150 183 80 106 165 129 76 52 164 171 70
146 108 68 193 145 175 171 108 90 33 129 159 191 96 212 173 179 163 192 47 34 67 56 44 122 116 181 46 156 168 102 63 165 71 207 45 107 181 140 107 42 119 170 61 220 155 164 214 87 133 177 34 217 190 100 104 98 48 202 188 210 106 186 200
203 161 157 221 69 99 67 107 37 47 217 129 44 171 160 221 176 102 57 170 61 175 144 101 201 128 113 199 96 141 186 35 190 153 168 125 72 116 199 212 179 189 190 152 123 123 186 139 159 111 202 207 170 203 189 216 82 106 169 211 72 218 86 121
160 196 214 126 77 175 87 160 169 36 200 121 145 148 121 191 204 153 189 169 158 185 91 199 47 130 104 220 106 108 133 60 106 158 143 171 149 189 131 109 105 52 54 97 144 36 61 114 121 215 120 57 217 52 103 158 155 47 177 55 39 155 76 53
147 167 54 189 57 140 147 202 123 176 57 96 134 116 178 152 198 125 61 163 198 35 196 87 143 192 95 210 201 179 126 80 41 130 153 62 120 131 101 87 113 144 53 183 108 101 153 163 144 130 115 157 126 155 183 89 40 214 154 50 117 175 111 63
206 103 163 190 197 166 119 128 185 144 188 187 76 123 119 208 97 160 134 158 96 137 154 104 203 127 54 87 160 197 141 64 149 169 115 44 146 192 38 66 121 115 155 89 177 60 166 94 145 211 161 76 70 82 38 200 57 52 96 201 191 43 125 206
206 49 56 90 139 85 209 116 116 51 177 56 177 42 129 169 180 142 143 57 122 132 176 174 50 92 95 88 176 199 78 118 185 159 93 157 111 132 66 61 60 136 108 152 154 218 35 191 85 105 45 104 152 90 162 132 103 71 103 109 116 131 204 118
125 218 94 163 129 175 57 153 159 182 207 157 73 104 73 138 175 153 55 210 191 41 169 129 146 149 63 155 63 191 97 68 110 201 56 62 36 47 162 77 187 146 212 87 131 189 175 189 204 112 147 119 192 33 99 64 91 76 116 72 43 93 174 187
184 213 103 138 117 162 200 105 145 115 100 190 73 115 108 151 110 216 89 154 105 194 86 160 199 65 65 155 40 90 162 93 115 218 204 89 81 159 79 117 74 57 57 103 177 74 76 74 132 199 159 137 71 213 39 186 212 112 84 41 36 210 164 113
206 38 205 220 124 50 145 57 95 51 50 65 135 46 56 170 185 135 115 118 216 179 176 220 82 179 198 128 220 176 153 211 176 111 117 104 82 85 71 81 177 121 143 58 71 106 167 111 40 114 191 123 143 96 205 74 77 61 95 86 39 69 63 142
159 135 82 134 143 125 54 137 118 36 116 43 80 68 203 145 36 181 138 58 90 40 197 192 108 206 204 36 96 210 117 111 89 117 104 40 103 218 174 142 47 173 177 186 112 42 205 45 187 110 162 113 91 63 81 135 215 216 71 65 54 198 95 105
173 83 86 159 154 207 125 207 44 52 170 215 212 173 162 67 154 106 140 160 77 196 37 146 49 152 136 145 43 117 95 34 62 194 130 88 66 94 143 99 86 164 205 203 62 84 215 48 211 161 178 83 204 165 93 209 36 204 147 36 84 139 219 138
217 209 159 222 41 57 157 66 112 174 221 103 215 89 168 65 124 86 158 52 108 63 159 112 186 153 144 94 36 104 115 131 138 105 221 60 64 138 36 48 198 154 187 219 175 41 72 214 148 207 180 132 76 83 56 176 65 40 66 179 101 74 118 32
176 127 140 173 70 213 147 219 196 136 138 107 210 72 162 49 51 92 179 126 186 108 81 216 47 77 125 61 213 52 184 190 145 114 38 159 197 181 44 160 105 144 37 207 220 49 127 193 83 195 123 107 88 77 58 39 56 119 207 167 94 119 110 104
172 96 204 98 144 97 101 189 152 93 106 43 137 65 149 199 130 184 140 145 204 173 193 198 172 56 191 151 132 221 210 139 61 164 114 50 145 187 110 98 142 134 115 81 88 41 165 175 85 48 69 53 45 148 74 34 215 162 143 141 70 68 161 80
177 73 110 135 85 153 143 73 210 208 148 50 40 165 51 201 47 102 198 55 192 42 89 189 104 86 183 162 60 145 122 104 133 199 167 51 147 167 192 220 85 75 57 72 43 150 155 53 164 171 106 115 99 78 88 48 81 213 114 126 196 214 220 75

View file

@ -0,0 +1,3 @@
1000 1000
800 1300
2000 1500

View file

@ -0,0 +1,6 @@
5000 5000
4800 6000
6000 6500
4200 5500
5500 5500
8000 4500

1024
new-data/d32.txt Normal file

File diff suppressed because it is too large Load diff

1024
new-data/d64.txt Normal file

File diff suppressed because it is too large Load diff

2250
new-data/n1.txt Normal file

File diff suppressed because it is too large Load diff

5500
new-data/n2.txt Normal file

File diff suppressed because it is too large Load diff

35
new-data/pluie.csv Normal file
View file

@ -0,0 +1,35 @@
Ville,JANVIERp,FEVRIERp,MARSp,AVRILp,MAIp,JUINp,JUILLETp,AOUTp,SEPTEMBREp,OCTOBREp,NOVEMBREp,DECEMBREp,Précipitations annuelles,JANVIERnb.j.pl,FEVRIERnb.j.pl,MARSnb.j.pl,AVRILnb.j.pl,MAInb.j.pl,JUINnb.j.pl,JUILLETnb.j.pl,AOUTnb.j.pl,SEPTEMBREnb.j.pl,OCTOBREnb.j.pl,NOVEMBREnb.j.pl,DECEMBREnb.j.pl,Nombre annuel de jours de pluie,Température moyenne annuelle,Amplitude annuelle des températures,Insolation annuelle,Latitude,Longitude,Précipitations de mai à aout,Précipitations sept-oct,Géographie
Ajaccio,78.00,69.00,51.00,39.00,43.00,23.00,10.00,15.00,43.00,81.00,105.00,96.00,653.00,12.00,10.00,9.00,9.00,8.00,4.00,1.00,2.00,6.00,10.00,11.00,13.00,95.00,14.71,14.50,2811.00,41.55,8.44,13.90,25.80,Sud
Angers,65.00,50.00,60.00,45.00,50.00,55.00,35.00,60.00,55.00,65.00,80.00,70.00,690.00,16.00,13.00,12.00,12.00,13.00,10.00,11.00,11.00,12.00,13.00,15.00,16.00,154.00,11.28,14.50,1899.00,47.28,-0.33,29.00,30.80,Ouest
Angouleme,79.00,68.00,64.00,62.00,70.00,58.00,53.00,66.00,69.00,70.00,79.00,88.00,826.00,16.00,14.00,13.00,12.00,14.00,11.00,12.00,12.00,12.00,13.00,15.00,16.00,160.00,12.02,14.90,1989.00,45.39,0.09,29.90,31.00,Ouest
Besancon,94.00,87.00,75.00,74.00,86.00,107.00,80.00,116.00,106.00,78.00,92.00,93.00,1088.00,17.00,14.00,12.00,14.00,14.00,14.00,14.00,13.00,13.00,14.00,15.00,15.00,169.00,10.04,17.60,1897.00,47.15,6.02,35.80,35.50,Est
Biarritz,128.00,105.00,98.00,102.00,100.00,91.00,69.00,123.00,155.00,152.00,175.00,176.00,1474.00,16.00,14.00,13.00,15.00,17.00,14.00,13.00,13.00,14.00,15.00,16.00,17.00,177.00,13.58,12.30,1921.00,43.29,-1.34,26.00,36.80,Ouest
Bordeaux,100.00,84.00,66.00,57.00,64.00,71.00,52.00,65.00,88.00,84.00,99.00,117.00,947.00,16.00,13.00,13.00,13.00,14.00,11.00,11.00,12.00,13.00,14.00,15.00,17.00,162.00,13.33,15.40,2076.00,44.50,-0.34,26.60,31.30,Ouest
Brest,130.00,98.00,89.00,77.00,74.00,60.00,51.00,80.00,95.00,108.00,136.00,159.00,1157.00,22.00,16.00,15.00,15.00,14.00,13.00,14.00,15.00,16.00,19.00,20.00,22.00,201.00,10.77,10.20,1757.00,48.24,-4.29,22.90,28.20,Ouest
Caen,65.00,61.00,45.00,44.00,53.00,52.00,45.00,57.00,66.00,75.00,79.00,71.00,713.00,17.00,14.00,12.00,13.00,14.00,12.00,13.00,13.00,13.00,15.00,16.00,17.00,169.00,10.45,12.70,1777.00,49.11,-0.21,29.00,35.60,Nord
Clermont-Fd,28.00,27.00,30.00,41.00,78.00,79.00,48.00,70.00,58.00,43.00,39.00,30.00,571.00,12.00,11.00,9.00,12.00,12.00,12.00,9.00,10.00,10.00,11.00,12.00,12.00,132.00,10.94,16.80,1899.00,45.47,3.05,48.20,51.30,Est
Dijon,62.00,48.00,51.00,48.00,68.00,79.00,44.00,79.00,74.00,53.00,67.00,61.00,734.00,16.00,13.00,10.00,11.00,12.00,12.00,11.00,11.00,11.00,12.00,14.00,14.00,147.00,10.50,18.30,1934.00,47.19,5.01,36.80,37.10,Est
Embrun,61.00,55.00,55.00,48.00,47.00,63.00,41.00,65.00,60.00,60.00,81.00,62.00,698.00,9.00,9.00,8.00,9.00,11.00,10.00,7.00,8.00,8.00,9.00,10.00,10.00,108.00,9.49,18.40,2604.00,44.34,6.30,30.90,32.10,Sud
Grenoble,80.00,79.00,69.00,69.00,83.00,94.00,74.00,96.00,88.00,85.00,90.00,98.00,1005.00,14.00,11.00,11.00,12.00,14.00,11.00,10.00,11.00,11.00,12.00,13.00,14.00,144.00,10.98,18.60,2100.00,45.10,5.43,34.50,34.50,Est
Lille,45.00,43.00,38.00,37.00,45.00,57.00,62.00,64.00,53.00,56.00,56.00,56.00,612.00,18.00,14.00,13.00,14.00,13.00,12.00,13.00,13.00,14.00,14.00,16.00,17.00,171.00,9.73,14.70,1641.00,50.38,3.04,37.30,37.10,Nord
Limoges,87.00,75.00,68.00,69.00,72.00,71.00,56.00,73.00,87.00,72.00,82.00,98.00,910.00,17.00,14.00,13.00,13.00,14.00,12.00,12.00,12.00,12.00,14.00,15.00,17.00,165.00,10.59,15.30,1853.00,45.50,1.16,29.90,33.00,Ouest
Lyon,53.00,50.00,60.00,54.00,67.00,84.00,55.00,104.00,86.00,73.00,80.00,62.00,828.00,15.00,12.00,11.00,11.00,13.00,11.00,10.00,11.00,11.00,12.00,14.00,14.00,145.00,11.36,18.60,2036.00,45.45,4.51,37.40,42.10,Est
Marseille,36.00,49.00,40.00,35.00,38.00,33.00,13.00,27.00,65.00,67.00,69.00,61.00,533.00,8.00,6.00,7.00,6.00,7.00,4.00,2.00,4.00,6.00,8.00,8.00,10.00,76.00,14.23,17.80,2866.00,43.18,5.24,20.80,41.00,Sud
Montpellier,56.00,59.00,69.00,46.00,47.00,41.00,20.00,52.00,78.00,125.00,70.00,73.00,736.00,8.00,6.00,8.00,8.00,9.00,5.00,3.00,6.00,7.00,9.00,9.00,10.00,88.00,13.89,17.10,2709.00,43.36,3.53,21.70,44.90,Sud
Nancy,66.00,58.00,43.00,45.00,62.00,70.00,58.00,76.00,65.00,52.00,59.00,67.00,721.00,16.00,13.00,12.00,13.00,13.00,13.00,12.00,13.00,12.00,13.00,15.00,16.00,161.00,9.50,17.50,1633.00,48.41,6.12,36.90,33.90,Est
Nantes,83.00,65.00,53.00,48.00,54.00,52.00,42.00,66.00,80.00,77.00,95.00,94.00,809.00,18.00,14.00,14.00,11.00,13.00,11.00,12.00,12.00,14.00,15.00,16.00,18.00,168.00,11.69,13.80,1901.00,47.13,-1.33,26.50,33.60,Ouest
Nice,67.00,83.00,71.00,70.00,39.00,37.00,21.00,38.00,83.00,109.00,158.00,92.00,868.00,9.00,7.00,8.00,9.00,8.00,5.00,2.00,4.00,7.00,9.00,9.00,9.00,86.00,14.84,15.20,2779.00,43.42,7.15,15.60,33.10,Sud
Nimes,52.00,53.00,57.00,45.00,50.00,40.00,25.00,40.00,75.00,100.00,83.00,60.00,680.00,8.00,6.00,9.00,8.00,9.00,6.00,4.00,6.00,7.00,9.00,10.00,10.00,92.00,14.18,17.90,2628.00,43.50,4.21,22.80,43.20,Sud
Orleans,57.00,48.00,43.00,46.00,52.00,54.00,47.00,54.00,51.00,54.00,61.00,54.00,621.00,16.00,13.00,12.00,12.00,13.00,11.00,11.00,12.00,12.00,13.00,15.00,16.00,156.00,10.53,15.70,1799.00,47.55,1.54,33.30,33.10,Ouest
Paris,53.00,48.00,40.00,45.00,53.00,57.00,54.00,61.00,54.00,50.00,58.00,51.00,624.00,17.00,14.00,12.00,13.00,12.00,12.00,12.00,13.00,13.00,13.00,15.00,16.00,162.00,11.18,15.70,1814.00,48.52,2.20,36.10,34.70,Nord
Perpignan,27.00,52.00,59.00,47.00,49.00,33.00,27.00,28.00,69.00,97.00,70.00,71.00,629.00,7.00,6.00,8.00,7.00,9.00,7.00,5.00,6.00,7.00,8.00,6.00,9.00,85.00,15.24,16.30,2603.00,42.41,2.53,21.80,44.10,Sud
Poitiers,65.00,58.00,56.00,49.00,55.00,55.00,46.00,59.00,52.00,61.00,78.00,68.00,702.00,16.00,13.00,13.00,12.00,14.00,11.00,10.00,11.00,12.00,12.00,15.00,16.00,155.00,11.28,15.10,2024.00,46.35,0.20,30.60,29.30,Ouest
Reims,43.00,44.00,42.00,37.00,52.00,53.00,47.00,58.00,54.00,43.00,52.00,50.00,575.00,17.00,15.00,12.00,13.00,13.00,13.00,12.00,13.00,12.00,13.00,16.00,16.00,165.00,10.06,16.40,1702.00,49.15,4.02,36.50,35.40,Est
Rennes,57.00,50.00,45.00,43.00,46.00,48.00,36.00,57.00,53.00,60.00,73.00,66.00,634.00,18.00,14.00,14.00,12.00,13.00,11.00,12.00,12.00,13.00,15.00,16.00,18.00,168.00,11.13,13.10,1835.00,48.05,-1.41,29.50,32.20,Ouest
Rouen,65.00,58.00,50.00,44.00,50.00,57.00,49.00,67.00,70.00,72.00,68.00,66.00,716.00,17.00,15.00,12.00,12.00,13.00,12.00,12.00,13.00,13.00,15.00,16.00,18.00,168.00,10.34,14.20,1694.00,49.26,1.05,31.10,37.50,Nord
St-Quentin,52.00,50.00,46.00,44.00,52.00,63.00,61.00,69.00,67.00,52.00,63.00,65.00,684.00,17.00,14.00,12.00,12.00,12.00,12.00,13.00,14.00,13.00,14.00,15.00,16.00,164.00,9.85,16.40,1661.00,49.51,3.17,35.80,36.30,Nord
Strasbourg,51.00,44.00,42.00,58.00,71.00,88.00,73.00,90.00,61.00,43.00,51.00,47.00,719.00,15.00,13.00,12.00,13.00,13.00,14.00,14.00,13.00,12.00,12.00,13.00,14.00,158.00,9.72,18.60,1696.00,48.35,7.45,44.80,37.40,Est
Toulon,76.00,86.00,82.00,60.00,49.00,35.00,12.00,31.00,77.00,105.00,117.00,107.00,837.00,9.00,6.00,8.00,7.00,7.00,4.00,2.00,4.00,5.00,9.00,9.00,11.00,81.00,15.28,14.00,2917.00,43.07,5.56,15.20,31.80,Sud
Toulouse,53.00,50.00,52.00,55.00,65.00,65.00,44.00,43.00,57.00,49.00,58.00,65.00,656.00,14.00,12.00,11.00,12.00,13.00,10.00,9.00,9.00,10.00,11.00,12.00,14.00,137.00,12.68,16.20,2081.00,43.36,1.26,33.10,32.40,Sud
Tours,63.00,55.00,52.00,51.00,53.00,58.00,47.00,60.00,60.00,55.00,68.00,65.00,687.00,16.00,13.00,12.00,12.00,13.00,11.00,11.00,12.00,13.00,13.00,15.00,16.00,157.00,11.22,15.60,1859.00,47.23,0.41,31.70,32.10,Ouest
Vichy,50.00,45.00,51.00,52.00,84.00,84.00,63.00,86.00,75.00,58.00,58.00,55.00,761.00,17.00,14.00,12.00,13.00,14.00,12.00,11.00,12.00,12.00,13.00,15.00,16.00,161.00,10.72,16.90,1873.00,46.08,3.26,41.70,42.00,Est
1 Ville JANVIERp FEVRIERp MARSp AVRILp MAIp JUINp JUILLETp AOUTp SEPTEMBREp OCTOBREp NOVEMBREp DECEMBREp Précipitations annuelles JANVIERnb.j.pl FEVRIERnb.j.pl MARSnb.j.pl AVRILnb.j.pl MAInb.j.pl JUINnb.j.pl JUILLETnb.j.pl AOUTnb.j.pl SEPTEMBREnb.j.pl OCTOBREnb.j.pl NOVEMBREnb.j.pl DECEMBREnb.j.pl Nombre annuel de jours de pluie Température moyenne annuelle Amplitude annuelle des températures Insolation annuelle Latitude Longitude Précipitations de mai à aout Précipitations sept-oct Géographie
2 Ajaccio 78.00 69.00 51.00 39.00 43.00 23.00 10.00 15.00 43.00 81.00 105.00 96.00 653.00 12.00 10.00 9.00 9.00 8.00 4.00 1.00 2.00 6.00 10.00 11.00 13.00 95.00 14.71 14.50 2811.00 41.55 8.44 13.90 25.80 Sud
3 Angers 65.00 50.00 60.00 45.00 50.00 55.00 35.00 60.00 55.00 65.00 80.00 70.00 690.00 16.00 13.00 12.00 12.00 13.00 10.00 11.00 11.00 12.00 13.00 15.00 16.00 154.00 11.28 14.50 1899.00 47.28 -0.33 29.00 30.80 Ouest
4 Angouleme 79.00 68.00 64.00 62.00 70.00 58.00 53.00 66.00 69.00 70.00 79.00 88.00 826.00 16.00 14.00 13.00 12.00 14.00 11.00 12.00 12.00 12.00 13.00 15.00 16.00 160.00 12.02 14.90 1989.00 45.39 0.09 29.90 31.00 Ouest
5 Besancon 94.00 87.00 75.00 74.00 86.00 107.00 80.00 116.00 106.00 78.00 92.00 93.00 1088.00 17.00 14.00 12.00 14.00 14.00 14.00 14.00 13.00 13.00 14.00 15.00 15.00 169.00 10.04 17.60 1897.00 47.15 6.02 35.80 35.50 Est
6 Biarritz 128.00 105.00 98.00 102.00 100.00 91.00 69.00 123.00 155.00 152.00 175.00 176.00 1474.00 16.00 14.00 13.00 15.00 17.00 14.00 13.00 13.00 14.00 15.00 16.00 17.00 177.00 13.58 12.30 1921.00 43.29 -1.34 26.00 36.80 Ouest
7 Bordeaux 100.00 84.00 66.00 57.00 64.00 71.00 52.00 65.00 88.00 84.00 99.00 117.00 947.00 16.00 13.00 13.00 13.00 14.00 11.00 11.00 12.00 13.00 14.00 15.00 17.00 162.00 13.33 15.40 2076.00 44.50 -0.34 26.60 31.30 Ouest
8 Brest 130.00 98.00 89.00 77.00 74.00 60.00 51.00 80.00 95.00 108.00 136.00 159.00 1157.00 22.00 16.00 15.00 15.00 14.00 13.00 14.00 15.00 16.00 19.00 20.00 22.00 201.00 10.77 10.20 1757.00 48.24 -4.29 22.90 28.20 Ouest
9 Caen 65.00 61.00 45.00 44.00 53.00 52.00 45.00 57.00 66.00 75.00 79.00 71.00 713.00 17.00 14.00 12.00 13.00 14.00 12.00 13.00 13.00 13.00 15.00 16.00 17.00 169.00 10.45 12.70 1777.00 49.11 -0.21 29.00 35.60 Nord
10 Clermont-Fd 28.00 27.00 30.00 41.00 78.00 79.00 48.00 70.00 58.00 43.00 39.00 30.00 571.00 12.00 11.00 9.00 12.00 12.00 12.00 9.00 10.00 10.00 11.00 12.00 12.00 132.00 10.94 16.80 1899.00 45.47 3.05 48.20 51.30 Est
11 Dijon 62.00 48.00 51.00 48.00 68.00 79.00 44.00 79.00 74.00 53.00 67.00 61.00 734.00 16.00 13.00 10.00 11.00 12.00 12.00 11.00 11.00 11.00 12.00 14.00 14.00 147.00 10.50 18.30 1934.00 47.19 5.01 36.80 37.10 Est
12 Embrun 61.00 55.00 55.00 48.00 47.00 63.00 41.00 65.00 60.00 60.00 81.00 62.00 698.00 9.00 9.00 8.00 9.00 11.00 10.00 7.00 8.00 8.00 9.00 10.00 10.00 108.00 9.49 18.40 2604.00 44.34 6.30 30.90 32.10 Sud
13 Grenoble 80.00 79.00 69.00 69.00 83.00 94.00 74.00 96.00 88.00 85.00 90.00 98.00 1005.00 14.00 11.00 11.00 12.00 14.00 11.00 10.00 11.00 11.00 12.00 13.00 14.00 144.00 10.98 18.60 2100.00 45.10 5.43 34.50 34.50 Est
14 Lille 45.00 43.00 38.00 37.00 45.00 57.00 62.00 64.00 53.00 56.00 56.00 56.00 612.00 18.00 14.00 13.00 14.00 13.00 12.00 13.00 13.00 14.00 14.00 16.00 17.00 171.00 9.73 14.70 1641.00 50.38 3.04 37.30 37.10 Nord
15 Limoges 87.00 75.00 68.00 69.00 72.00 71.00 56.00 73.00 87.00 72.00 82.00 98.00 910.00 17.00 14.00 13.00 13.00 14.00 12.00 12.00 12.00 12.00 14.00 15.00 17.00 165.00 10.59 15.30 1853.00 45.50 1.16 29.90 33.00 Ouest
16 Lyon 53.00 50.00 60.00 54.00 67.00 84.00 55.00 104.00 86.00 73.00 80.00 62.00 828.00 15.00 12.00 11.00 11.00 13.00 11.00 10.00 11.00 11.00 12.00 14.00 14.00 145.00 11.36 18.60 2036.00 45.45 4.51 37.40 42.10 Est
17 Marseille 36.00 49.00 40.00 35.00 38.00 33.00 13.00 27.00 65.00 67.00 69.00 61.00 533.00 8.00 6.00 7.00 6.00 7.00 4.00 2.00 4.00 6.00 8.00 8.00 10.00 76.00 14.23 17.80 2866.00 43.18 5.24 20.80 41.00 Sud
18 Montpellier 56.00 59.00 69.00 46.00 47.00 41.00 20.00 52.00 78.00 125.00 70.00 73.00 736.00 8.00 6.00 8.00 8.00 9.00 5.00 3.00 6.00 7.00 9.00 9.00 10.00 88.00 13.89 17.10 2709.00 43.36 3.53 21.70 44.90 Sud
19 Nancy 66.00 58.00 43.00 45.00 62.00 70.00 58.00 76.00 65.00 52.00 59.00 67.00 721.00 16.00 13.00 12.00 13.00 13.00 13.00 12.00 13.00 12.00 13.00 15.00 16.00 161.00 9.50 17.50 1633.00 48.41 6.12 36.90 33.90 Est
20 Nantes 83.00 65.00 53.00 48.00 54.00 52.00 42.00 66.00 80.00 77.00 95.00 94.00 809.00 18.00 14.00 14.00 11.00 13.00 11.00 12.00 12.00 14.00 15.00 16.00 18.00 168.00 11.69 13.80 1901.00 47.13 -1.33 26.50 33.60 Ouest
21 Nice 67.00 83.00 71.00 70.00 39.00 37.00 21.00 38.00 83.00 109.00 158.00 92.00 868.00 9.00 7.00 8.00 9.00 8.00 5.00 2.00 4.00 7.00 9.00 9.00 9.00 86.00 14.84 15.20 2779.00 43.42 7.15 15.60 33.10 Sud
22 Nimes 52.00 53.00 57.00 45.00 50.00 40.00 25.00 40.00 75.00 100.00 83.00 60.00 680.00 8.00 6.00 9.00 8.00 9.00 6.00 4.00 6.00 7.00 9.00 10.00 10.00 92.00 14.18 17.90 2628.00 43.50 4.21 22.80 43.20 Sud
23 Orleans 57.00 48.00 43.00 46.00 52.00 54.00 47.00 54.00 51.00 54.00 61.00 54.00 621.00 16.00 13.00 12.00 12.00 13.00 11.00 11.00 12.00 12.00 13.00 15.00 16.00 156.00 10.53 15.70 1799.00 47.55 1.54 33.30 33.10 Ouest
24 Paris 53.00 48.00 40.00 45.00 53.00 57.00 54.00 61.00 54.00 50.00 58.00 51.00 624.00 17.00 14.00 12.00 13.00 12.00 12.00 12.00 13.00 13.00 13.00 15.00 16.00 162.00 11.18 15.70 1814.00 48.52 2.20 36.10 34.70 Nord
25 Perpignan 27.00 52.00 59.00 47.00 49.00 33.00 27.00 28.00 69.00 97.00 70.00 71.00 629.00 7.00 6.00 8.00 7.00 9.00 7.00 5.00 6.00 7.00 8.00 6.00 9.00 85.00 15.24 16.30 2603.00 42.41 2.53 21.80 44.10 Sud
26 Poitiers 65.00 58.00 56.00 49.00 55.00 55.00 46.00 59.00 52.00 61.00 78.00 68.00 702.00 16.00 13.00 13.00 12.00 14.00 11.00 10.00 11.00 12.00 12.00 15.00 16.00 155.00 11.28 15.10 2024.00 46.35 0.20 30.60 29.30 Ouest
27 Reims 43.00 44.00 42.00 37.00 52.00 53.00 47.00 58.00 54.00 43.00 52.00 50.00 575.00 17.00 15.00 12.00 13.00 13.00 13.00 12.00 13.00 12.00 13.00 16.00 16.00 165.00 10.06 16.40 1702.00 49.15 4.02 36.50 35.40 Est
28 Rennes 57.00 50.00 45.00 43.00 46.00 48.00 36.00 57.00 53.00 60.00 73.00 66.00 634.00 18.00 14.00 14.00 12.00 13.00 11.00 12.00 12.00 13.00 15.00 16.00 18.00 168.00 11.13 13.10 1835.00 48.05 -1.41 29.50 32.20 Ouest
29 Rouen 65.00 58.00 50.00 44.00 50.00 57.00 49.00 67.00 70.00 72.00 68.00 66.00 716.00 17.00 15.00 12.00 12.00 13.00 12.00 12.00 13.00 13.00 15.00 16.00 18.00 168.00 10.34 14.20 1694.00 49.26 1.05 31.10 37.50 Nord
30 St-Quentin 52.00 50.00 46.00 44.00 52.00 63.00 61.00 69.00 67.00 52.00 63.00 65.00 684.00 17.00 14.00 12.00 12.00 12.00 12.00 13.00 14.00 13.00 14.00 15.00 16.00 164.00 9.85 16.40 1661.00 49.51 3.17 35.80 36.30 Nord
31 Strasbourg 51.00 44.00 42.00 58.00 71.00 88.00 73.00 90.00 61.00 43.00 51.00 47.00 719.00 15.00 13.00 12.00 13.00 13.00 14.00 14.00 13.00 12.00 12.00 13.00 14.00 158.00 9.72 18.60 1696.00 48.35 7.45 44.80 37.40 Est
32 Toulon 76.00 86.00 82.00 60.00 49.00 35.00 12.00 31.00 77.00 105.00 117.00 107.00 837.00 9.00 6.00 8.00 7.00 7.00 4.00 2.00 4.00 5.00 9.00 9.00 11.00 81.00 15.28 14.00 2917.00 43.07 5.56 15.20 31.80 Sud
33 Toulouse 53.00 50.00 52.00 55.00 65.00 65.00 44.00 43.00 57.00 49.00 58.00 65.00 656.00 14.00 12.00 11.00 12.00 13.00 10.00 9.00 9.00 10.00 11.00 12.00 14.00 137.00 12.68 16.20 2081.00 43.36 1.26 33.10 32.40 Sud
34 Tours 63.00 55.00 52.00 51.00 53.00 58.00 47.00 60.00 60.00 55.00 68.00 65.00 687.00 16.00 13.00 12.00 12.00 13.00 11.00 11.00 12.00 13.00 13.00 15.00 16.00 157.00 11.22 15.60 1859.00 47.23 0.41 31.70 32.10 Ouest
35 Vichy 50.00 45.00 51.00 52.00 84.00 84.00 63.00 86.00 75.00 58.00 58.00 55.00 761.00 17.00 14.00 12.00 13.00 14.00 12.00 11.00 12.00 12.00 13.00 15.00 16.00 161.00 10.72 16.90 1873.00 46.08 3.26 41.70 42.00 Est

105600
new-data/w2.txt Normal file

File diff suppressed because it is too large Load diff

View file

@ -13,7 +13,7 @@ from myplotlib import print_1d_data, print_2d_data
from mydatalib import extract_data_2d, scale_data, apply_DBSCAN, evaluate
path = './artificial/'
dataset_name = "banana"
dataset_name = "s-set1"
save = True
print("-----------------------------------------------------------")
@ -30,23 +30,22 @@ print_2d_data(data_scaled, dataset_name=dataset_name +
print("-----------------------------------------------------------")
print(" Calcul du voisinage")
n = 50
n = 7
neighbors = NearestNeighbors(n_neighbors=n)
neighbors.fit(data)
distances, indices = neighbors.kneighbors(data)
neighbors.fit(data_scaled)
distances, indices = neighbors.kneighbors(data_scaled)
distances = list(map(lambda x: sum(x[1:n-1])/(len(x)-1), distances))
print(distances)
distances = np.sort(distances, axis=0)
print(distances)
print_1d_data(distances, range(1, len(distances)+1), x_name="distance_moyenne",
y_name="nombre_de_points", stop=False, save=False)
y_name="nombre_de_points", dataset_name=dataset_name,
method_name="DBSCAN", stop=False, save=save)
print("-----------------------------------------------------------")
print(" Création clusters : DBSCAN")
params = []
for i in range(1, 20):
params += [(i/100, 5)]
for i in range(1, 30):
params += [(0.1, i)]
durations = []
silouettes = []
daviess = []
@ -55,7 +54,7 @@ clusters = []
noise_points = []
for (distance, min_pts) in params:
# Application du clustering agglomeratif
(model, duration) = apply_DBSCAN(data, distance, min_pts)
(model, duration) = apply_DBSCAN(data_scaled, distance, min_pts)
cl_pred = model.labels_
# Affichage des clusters# Affichage des clusters
print_2d_data(data_scaled, dataset_name=dataset_name,

View file

@ -5,90 +5,200 @@ Created on Wed Dec 8 16:07:28 2021
@author: pfaure
"""
from numpy import arange
from myplotlib import print_1d_data
from mydatalib import (extract_data_txt, scale_data, apply_kmeans,
apply_agglomerative_clustering, apply_DBSCAN,
apply_mean_shift, evaluate)
from sklearn.neighbors import NearestNeighbors
import numpy as np
from myplotlib import print_1d_data, print_2d_data
from mydatalib import extract_data_2d, scale_data, apply_DBSCAN, evaluate
path = './artificial/'
dataset_name = "banana"
path = './new-data/'
dataset_name_list = ["d32", "d64"]
#eps_list = [0.6, 0.75, 0.2, 0.8, 0.8]
eps_list = [0.8, 0.8]
save = True
for i in range(0, 2):
dataset_name = dataset_name_list[i]
eps = eps_list[i]
print("-----------------------------------------------------------")
print(" Chargement du dataset : " + dataset_name)
data = extract_data_2d(path + dataset_name)
print_2d_data(data, dataset_name=dataset_name +
"_brutes", stop=False, save=save)
print("-----------------------------------------------------------")
print(" Chargement du dataset : " + dataset_name)
data = extract_data_txt(path + dataset_name)
print("-----------------------------------------------------------")
print(" Mise à l'échelle")
data_scaled = scale_data(data)
print_2d_data(data_scaled, dataset_name=dataset_name +
"_scaled", stop=False, save=save)
print("-----------------------------------------------------------")
print(" Mise à l'échelle")
data_scaled = scale_data(data)
print("-----------------------------------------------------------")
print(" Calcul du voisinage")
n = 50
neighbors = NearestNeighbors(n_neighbors=n)
neighbors.fit(data)
distances, indices = neighbors.kneighbors(data)
distances = list(map(lambda x: sum(x[1:n-1])/(len(x)-1), distances))
print(distances)
distances = np.sort(distances, axis=0)
print(distances)
print_1d_data(distances, range(1, len(distances)+1), x_name="distance_moyenne",
y_name="nombre_de_points", stop=False, save=False)
k_max = 10
print("-----------------------------------------------------------")
print(" Application de k-means")
# Application de k-means pour plusieurs valeurs de k
# et evaluation de la solution
k = []
durations = []
silouettes = []
daviess = []
calinskis = []
inerties = []
iterations = []
for i in range(2, k_max):
# Application de k-means
(model, duration) = apply_kmeans(data_scaled, k=i, init="k-means++")
# Evaluation de la solution de clustering
(silouette, davies, calinski) = evaluate(data_scaled, model)
# Enregistrement des valeurs
k += [i]
durations += [duration]
silouettes += [silouette]
daviess += [davies]
calinskis += [calinski]
inerties += [model.inertia_]
iterations += [model.n_iter_]
# Affichage des résultats
print_1d_data(k, k, x_name="k", y_name="k", dataset_name=dataset_name,
method_name="k-means", stop=False, save=save)
print_1d_data(k, durations, x_name="k", y_name="temps_de_calcul", y_unit="ms",
dataset_name=dataset_name, method_name="k-means",
stop=False, save=save)
print_1d_data(k, silouettes, x_name="k", y_name="coeficient_de_silhouette",
dataset_name=dataset_name, method_name="k-means",
stop=False, save=save)
print_1d_data(k, daviess, x_name="k", y_name="coeficient_de_Davies",
dataset_name=dataset_name, method_name="k-means",
stop=False, save=save)
print_1d_data(k, calinskis, x_name="k", y_name="coeficient_de_Calinski",
dataset_name=dataset_name, method_name="k-means",
stop=False, save=save)
print_1d_data(k, inerties, x_name="k", y_name="inertie",
dataset_name=dataset_name, method_name="k-means",
stop=False, save=save)
print_1d_data(k, iterations, x_name="k", y_name="nombre_d_iterations",
dataset_name=dataset_name, method_name="k-means",
stop=True, save=save)
print("-----------------------------------------------------------")
print(" Création clusters : DBSCAN")
params = []
for i in range(1, 20):
params += [(i/100, 5)]
durations = []
silouettes = []
daviess = []
calinskis = []
clusters = []
noise_points = []
for (distance, min_pts) in params:
# Application du clustering agglomeratif
(model, duration) = apply_DBSCAN(data, distance, min_pts)
cl_pred = model.labels_
# Affichage des clusters# Affichage des clusters
print_2d_data(data_scaled, dataset_name=dataset_name,
method_name="DBSCAN-Eps=" +
str(distance)+"-Minpt="+str(min_pts),
k=0, stop=False, save=save, c=cl_pred)
# Evaluation de la solution de clustering
(silouette, davies, calinski) = evaluate(data_scaled, model)
# Enregistrement des valeurs
durations += [duration]
silouettes += [silouette]
daviess += [davies]
calinskis += [calinski]
clusters += [len(set(cl_pred)) - (1 if -1 in cl_pred else 0)]
noise_points += [list(cl_pred).count(-1)]
print("-----------------------------------------------------------")
print(" Création clusters : agglomerative ")
# Application du clustering agglomeratif pour plusieurs valeurs de k
# et evaluation de la solution
linkage = "ward"
k = []
durations = []
silouettes = []
daviess = []
calinskis = []
for i in range(2, k_max):
# Application du clustering agglomeratif
(model, duration) = apply_agglomerative_clustering(
data_scaled, k=i, linkage=linkage)
# Evaluation de la solution de clustering
(silouette, davies, calinski) = evaluate(data_scaled, model)
# Enregistrement des valeurs
k += [i]
durations += [duration]
silouettes += [silouette]
daviess += [davies]
calinskis += [calinski]
# Affichage des résultats
params = [str(i) for i in params]
print_1d_data(params, durations, x_name="(eps,min_pts)",
y_name="temps_de_calcul", y_unit="ms", dataset_name=dataset_name,
method_name="DBSCAN", stop=False, save=save)
print_1d_data(params, silouettes, x_name="(eps,min_pts)",
y_name="coeficient_de_silhouette", dataset_name=dataset_name,
method_name="DBSCAN", stop=False, save=save)
print_1d_data(params, daviess, x_name="(eps,min_pts)",
y_name="coeficient_de_Davies", dataset_name=dataset_name,
method_name="DBSCAN", stop=False, save=save)
print_1d_data(params, calinskis, x_name="(eps,min_pts)",
y_name="coeficient_de_Calinski", dataset_name=dataset_name,
method_name="DBSCAN", stop=False, save=save)
print_1d_data(params, clusters, x_name="(eps,min_pts)",
y_name="nombre_de_clusters", dataset_name=dataset_name,
method_name="DBSCAN", stop=False, save=save)
print_1d_data(params, noise_points, x_name="(eps,min_pts)",
y_name="points_de_bruit", dataset_name=dataset_name,
method_name="DBSCAN", stop=False, save=save)
# Affichage des résultats
print_1d_data(k, k, x_name="k", y_name="k", dataset_name=dataset_name,
method_name="agglomerative_" + linkage, stop=False, save=save)
print_1d_data(k, durations, x_name="k", y_name="temps_de_calcul", y_unit="ms",
dataset_name=dataset_name,
method_name="agglomerative_" + linkage, stop=False, save=save)
print_1d_data(k, silouettes, x_name="k", y_name="coeficient_de_silhouette",
dataset_name=dataset_name,
method_name="agglomerative_" + linkage, stop=False, save=save)
print_1d_data(k, daviess, x_name="k", y_name="coeficient_de_Davies",
dataset_name=dataset_name,
method_name="agglomerative_" + linkage, stop=False, save=save)
print_1d_data(k, calinskis, x_name="k", y_name="coeficient_de_Calinski",
dataset_name=dataset_name,
method_name="agglomerative_" + linkage, stop=False, save=save)
min_sample_max = 30
print("-----------------------------------------------------------")
print(" Création clusters : DBSCAN")
params = []
for i in range(1, min_sample_max):
params += [(eps, i)]
durations = []
silouettes = []
daviess = []
calinskis = []
clusters = []
noise_points = []
for (distance, min_pts) in params:
# Application du clustering agglomeratif
(model, duration) = apply_DBSCAN(data_scaled, distance, min_pts)
cl_pred = model.labels_
# Evaluation de la solution de clustering
(silouette, davies, calinski) = evaluate(data_scaled, model)
# Enregistrement des valeurs
durations += [duration]
silouettes += [silouette]
daviess += [davies]
calinskis += [calinski]
clusters += [len(set(cl_pred)) - (1 if -1 in cl_pred else 0)]
noise_points += [list(cl_pred).count(-1)]
# Affichage des résultats
params = [str(i) for i in params]
print_1d_data(params, durations, x_name="(eps,min_pts)",
y_name="temps_de_calcul", y_unit="ms", dataset_name=dataset_name,
method_name="DBSCAN", stop=False, save=save)
print_1d_data(params, silouettes, x_name="(eps,min_pts)",
y_name="coeficient_de_silhouette", dataset_name=dataset_name,
method_name="DBSCAN", stop=False, save=save)
print_1d_data(params, daviess, x_name="(eps,min_pts)",
y_name="coeficient_de_Davies", dataset_name=dataset_name,
method_name="DBSCAN", stop=False, save=save)
print_1d_data(params, calinskis, x_name="(eps,min_pts)",
y_name="coeficient_de_Calinski", dataset_name=dataset_name,
method_name="DBSCAN", stop=False, save=save)
print_1d_data(params, clusters, x_name="(eps,min_pts)",
y_name="nombre_de_clusters", dataset_name=dataset_name,
method_name="DBSCAN", stop=False, save=save)
print_1d_data(params, noise_points, x_name="(eps,min_pts)",
y_name="points_de_bruit", dataset_name=dataset_name,
method_name="DBSCAN", stop=False, save=save)
print("-----------------------------------------------------------")
print(" Création clusters : mean-shift")
# Application de Affinity Propagation pour plusieurs valeurs de préférence
# et evaluation de la solution
k_max = 2
k = []
durations = []
silouettes = []
daviess = []
calinskis = []
for bandwidth in arange(0.1, k_max, 0.2):
# Application du clustering
(model, duration) = apply_mean_shift(
data_scaled, bandwidth=bandwidth)
# Evaluation de la solution de clustering
(silouette, davies, calinski) = evaluate(data_scaled, model)
# Enregistrement des valeurs
k += [bandwidth]
durations += [duration]
silouettes += [silouette]
daviess += [davies]
calinskis += [calinski]
# Affichage des résultats
print_1d_data(k, k, x_name="k", y_name="k", dataset_name=dataset_name,
method_name="mean-shift", stop=False, save=save)
print_1d_data(k, durations, x_name="k", y_name="temps_de_calcul", y_unit="ms",
dataset_name=dataset_name,
method_name="mean-shift", stop=False, save=save)
print_1d_data(k, silouettes, x_name="k", y_name="coeficient_de_silhouette",
dataset_name=dataset_name,
method_name="mean-shift", stop=False, save=save)
print_1d_data(k, daviess, x_name="k", y_name="coeficient_de_Davies",
dataset_name=dataset_name,
method_name="mean-shift", stop=False, save=save)
print_1d_data(k, calinskis, x_name="k", y_name="coeficient_de_Calinski",
dataset_name=dataset_name,
method_name="mean-shift", stop=False, save=save)

40
tp5-preprocessing.py Normal file
View file

@ -0,0 +1,40 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Dec 8 16:07:28 2021
@author: pfaure
"""
from sklearn.neighbors import NearestNeighbors
import numpy as np
from myplotlib import print_1d_data, print_2d_data
from mydatalib import extract_data_txt, scale_data
path = './new-data/'
dataset_name = "d32"
save = False
print("-----------------------------------------------------------")
print(" Chargement du dataset : " + dataset_name)
data = extract_data_txt(path + dataset_name)
print_2d_data(data, dataset_name=dataset_name +
"_brutes", stop=False, save=save)
print("-----------------------------------------------------------")
print(" Mise à l'échelle")
data_scaled = scale_data(data)
print_2d_data(data_scaled, dataset_name=dataset_name +
"_scaled", stop=False, save=save)
print("-----------------------------------------------------------")
print(" Calcul du voisinage")
n = 50
neighbors = NearestNeighbors(n_neighbors=n)
neighbors.fit(data_scaled)
distances, indices = neighbors.kneighbors(data_scaled)
distances = list(map(lambda x: sum(x[1:n-1])/(len(x)-1), distances))
distances = np.sort(distances, axis=0)
print_1d_data(distances, range(1, len(distances)+1), x_name="distance_moyenne",
y_name="nombre_de_points", stop=False, save=save)