Ajout dataset artificiels
This commit is contained in:
parent
a41d8033c5
commit
b9da01d3e0
14 changed files with 115721 additions and 89 deletions
|
@ -21,6 +21,15 @@ def extract_data_3d(data_path):
|
|||
return np.array([[x[0], x[1], x[2]] for x in databrut[0]])
|
||||
|
||||
|
||||
def extract_data_txt(data_path):
|
||||
databrut = open(data_path + ".txt").readlines()
|
||||
ret = []
|
||||
for x in databrut:
|
||||
line = list(map(float, x.split()))
|
||||
ret += [line]
|
||||
return np.array(ret)
|
||||
|
||||
|
||||
def scale_data(data):
|
||||
scaler = preprocessing.StandardScaler()
|
||||
return scaler.fit_transform(data)
|
||||
|
|
16
new-data/Ground-Truth/d32-gt.txt
Normal file
16
new-data/Ground-Truth/d32-gt.txt
Normal file
|
@ -0,0 +1,16 @@
|
|||
84 152 100 52 96 186 169 107 36 186 139 200 100 104 88 114 54 186 77 152 155 70 188 65 214 116 75 55 124 65 154 177
|
||||
65 113 75 199 203 126 122 165 167 199 76 186 109 165 141 66 74 109 162 168 85 195 209 203 122 119 112 50 215 110 118 152
|
||||
153 106 172 119 203 132 158 59 121 99 33 194 53 196 143 33 134 70 144 168 52 53 216 46 61 66 53 181 54 56 67 73
|
||||
88 50 94 163 35 152 212 80 62 39 182 114 148 70 132 167 118 165 108 184 207 40 60 215 175 117 67 143 160 216 112 145
|
||||
141 206 77 151 182 149 160 88 77 119 199 106 100 204 37 113 179 171 187 81 79 212 204 156 180 51 33 199 68 198 69 120
|
||||
73 213 76 166 152 116 164 180 144 218 74 195 136 46 207 93 57 33 169 73 147 77 50 58 181 198 115 142 204 208 107 86
|
||||
122 69 159 211 167 127 172 198 114 187 38 94 196 208 196 89 187 86 133 215 123 69 84 213 192 167 165 135 45 149 187 97
|
||||
36 121 105 154 35 219 178 34 77 51 183 121 51 91 181 123 215 154 102 108 180 56 135 138 97 52 121 147 214 62 153 54
|
||||
110 74 168 57 193 104 82 64 68 46 48 88 103 59 188 100 191 166 85 76 96 208 62 211 84 209 160 160 182 107 52 196
|
||||
89 58 42 76 171 104 61 221 146 158 204 180 172 200 49 217 108 91 101 46 137 131 36 185 186 190 161 212 57 124 67 97
|
||||
43 84 211 84 54 132 69 156 96 187 185 191 76 82 85 41 147 198 42 58 77 78 46 169 43 137 198 34 198 66 71 42
|
||||
120 142 54 161 108 54 67 195 61 208 75 186 59 82 105 121 52 162 136 167 63 99 114 213 73 207 133 48 157 99 60 83
|
||||
83 53 168 145 181 90 168 135 33 185 155 129 128 83 36 198 142 220 144 196 212 36 184 61 34 35 41 57 169 110 208 53
|
||||
144 203 63 87 182 84 155 206 158 178 82 44 206 201 194 146 157 73 182 159 53 146 110 174 160 101 83 198 93 217 129 213
|
||||
73 48 70 139 75 173 184 77 64 35 165 110 191 156 91 139 182 124 66 74 144 221 35 179 65 105 116 118 61 175 49 208
|
||||
105 53 169 77 188 142 155 181 128 63 215 129 95 62 94 161 216 116 91 122 194 68 196 192 114 102 58 78 80 132 97 75
|
16
new-data/Ground-Truth/d64-gt.txt
Normal file
16
new-data/Ground-Truth/d64-gt.txt
Normal file
|
@ -0,0 +1,16 @@
|
|||
167 102 189 123 155 52 73 81 91 108 208 38 195 46 110 187 159 146 212 66 138 42 44 132 151 185 172 211 183 141 85 214 112 175 42 167 113 121 218 58 52 195 216 76 42 186 65 92 126 212 185 43 216 150 183 80 106 165 129 76 52 164 171 70
|
||||
146 108 68 193 145 175 171 108 90 33 129 159 191 96 212 173 179 163 192 47 34 67 56 44 122 116 181 46 156 168 102 63 165 71 207 45 107 181 140 107 42 119 170 61 220 155 164 214 87 133 177 34 217 190 100 104 98 48 202 188 210 106 186 200
|
||||
203 161 157 221 69 99 67 107 37 47 217 129 44 171 160 221 176 102 57 170 61 175 144 101 201 128 113 199 96 141 186 35 190 153 168 125 72 116 199 212 179 189 190 152 123 123 186 139 159 111 202 207 170 203 189 216 82 106 169 211 72 218 86 121
|
||||
160 196 214 126 77 175 87 160 169 36 200 121 145 148 121 191 204 153 189 169 158 185 91 199 47 130 104 220 106 108 133 60 106 158 143 171 149 189 131 109 105 52 54 97 144 36 61 114 121 215 120 57 217 52 103 158 155 47 177 55 39 155 76 53
|
||||
147 167 54 189 57 140 147 202 123 176 57 96 134 116 178 152 198 125 61 163 198 35 196 87 143 192 95 210 201 179 126 80 41 130 153 62 120 131 101 87 113 144 53 183 108 101 153 163 144 130 115 157 126 155 183 89 40 214 154 50 117 175 111 63
|
||||
206 103 163 190 197 166 119 128 185 144 188 187 76 123 119 208 97 160 134 158 96 137 154 104 203 127 54 87 160 197 141 64 149 169 115 44 146 192 38 66 121 115 155 89 177 60 166 94 145 211 161 76 70 82 38 200 57 52 96 201 191 43 125 206
|
||||
206 49 56 90 139 85 209 116 116 51 177 56 177 42 129 169 180 142 143 57 122 132 176 174 50 92 95 88 176 199 78 118 185 159 93 157 111 132 66 61 60 136 108 152 154 218 35 191 85 105 45 104 152 90 162 132 103 71 103 109 116 131 204 118
|
||||
125 218 94 163 129 175 57 153 159 182 207 157 73 104 73 138 175 153 55 210 191 41 169 129 146 149 63 155 63 191 97 68 110 201 56 62 36 47 162 77 187 146 212 87 131 189 175 189 204 112 147 119 192 33 99 64 91 76 116 72 43 93 174 187
|
||||
184 213 103 138 117 162 200 105 145 115 100 190 73 115 108 151 110 216 89 154 105 194 86 160 199 65 65 155 40 90 162 93 115 218 204 89 81 159 79 117 74 57 57 103 177 74 76 74 132 199 159 137 71 213 39 186 212 112 84 41 36 210 164 113
|
||||
206 38 205 220 124 50 145 57 95 51 50 65 135 46 56 170 185 135 115 118 216 179 176 220 82 179 198 128 220 176 153 211 176 111 117 104 82 85 71 81 177 121 143 58 71 106 167 111 40 114 191 123 143 96 205 74 77 61 95 86 39 69 63 142
|
||||
159 135 82 134 143 125 54 137 118 36 116 43 80 68 203 145 36 181 138 58 90 40 197 192 108 206 204 36 96 210 117 111 89 117 104 40 103 218 174 142 47 173 177 186 112 42 205 45 187 110 162 113 91 63 81 135 215 216 71 65 54 198 95 105
|
||||
173 83 86 159 154 207 125 207 44 52 170 215 212 173 162 67 154 106 140 160 77 196 37 146 49 152 136 145 43 117 95 34 62 194 130 88 66 94 143 99 86 164 205 203 62 84 215 48 211 161 178 83 204 165 93 209 36 204 147 36 84 139 219 138
|
||||
217 209 159 222 41 57 157 66 112 174 221 103 215 89 168 65 124 86 158 52 108 63 159 112 186 153 144 94 36 104 115 131 138 105 221 60 64 138 36 48 198 154 187 219 175 41 72 214 148 207 180 132 76 83 56 176 65 40 66 179 101 74 118 32
|
||||
176 127 140 173 70 213 147 219 196 136 138 107 210 72 162 49 51 92 179 126 186 108 81 216 47 77 125 61 213 52 184 190 145 114 38 159 197 181 44 160 105 144 37 207 220 49 127 193 83 195 123 107 88 77 58 39 56 119 207 167 94 119 110 104
|
||||
172 96 204 98 144 97 101 189 152 93 106 43 137 65 149 199 130 184 140 145 204 173 193 198 172 56 191 151 132 221 210 139 61 164 114 50 145 187 110 98 142 134 115 81 88 41 165 175 85 48 69 53 45 148 74 34 215 162 143 141 70 68 161 80
|
||||
177 73 110 135 85 153 143 73 210 208 148 50 40 165 51 201 47 102 198 55 192 42 89 189 104 86 183 162 60 145 122 104 133 199 167 51 147 167 192 220 85 75 57 72 43 150 155 53 164 171 106 115 99 78 88 48 81 213 114 126 196 214 220 75
|
3
new-data/Ground-Truth/n1-gt.txt
Normal file
3
new-data/Ground-Truth/n1-gt.txt
Normal file
|
@ -0,0 +1,3 @@
|
|||
1000 1000
|
||||
800 1300
|
||||
2000 1500
|
6
new-data/Ground-Truth/n2-gt.txt
Normal file
6
new-data/Ground-Truth/n2-gt.txt
Normal file
|
@ -0,0 +1,6 @@
|
|||
5000 5000
|
||||
4800 6000
|
||||
6000 6500
|
||||
4200 5500
|
||||
5500 5500
|
||||
8000 4500
|
1024
new-data/d32.txt
Normal file
1024
new-data/d32.txt
Normal file
File diff suppressed because it is too large
Load diff
1024
new-data/d64.txt
Normal file
1024
new-data/d64.txt
Normal file
File diff suppressed because it is too large
Load diff
2250
new-data/n1.txt
Normal file
2250
new-data/n1.txt
Normal file
File diff suppressed because it is too large
Load diff
5500
new-data/n2.txt
Normal file
5500
new-data/n2.txt
Normal file
File diff suppressed because it is too large
Load diff
35
new-data/pluie.csv
Normal file
35
new-data/pluie.csv
Normal file
|
@ -0,0 +1,35 @@
|
|||
Ville,JANVIERp,FEVRIERp,MARSp,AVRILp,MAIp,JUINp,JUILLETp,AOUTp,SEPTEMBREp,OCTOBREp,NOVEMBREp,DECEMBREp,Précipitations annuelles,JANVIERnb.j.pl,FEVRIERnb.j.pl,MARSnb.j.pl,AVRILnb.j.pl,MAInb.j.pl,JUINnb.j.pl,JUILLETnb.j.pl,AOUTnb.j.pl,SEPTEMBREnb.j.pl,OCTOBREnb.j.pl,NOVEMBREnb.j.pl,DECEMBREnb.j.pl,Nombre annuel de jours de pluie,Température moyenne annuelle,Amplitude annuelle des températures,Insolation annuelle,Latitude,Longitude,Précipitations de mai à aout,Précipitations sept-oct,Géographie
|
||||
Ajaccio,78.00,69.00,51.00,39.00,43.00,23.00,10.00,15.00,43.00,81.00,105.00,96.00,653.00,12.00,10.00,9.00,9.00,8.00,4.00,1.00,2.00,6.00,10.00,11.00,13.00,95.00,14.71,14.50,2811.00,41.55,8.44,13.90,25.80,Sud
|
||||
Angers,65.00,50.00,60.00,45.00,50.00,55.00,35.00,60.00,55.00,65.00,80.00,70.00,690.00,16.00,13.00,12.00,12.00,13.00,10.00,11.00,11.00,12.00,13.00,15.00,16.00,154.00,11.28,14.50,1899.00,47.28,-0.33,29.00,30.80,Ouest
|
||||
Angouleme,79.00,68.00,64.00,62.00,70.00,58.00,53.00,66.00,69.00,70.00,79.00,88.00,826.00,16.00,14.00,13.00,12.00,14.00,11.00,12.00,12.00,12.00,13.00,15.00,16.00,160.00,12.02,14.90,1989.00,45.39,0.09,29.90,31.00,Ouest
|
||||
Besancon,94.00,87.00,75.00,74.00,86.00,107.00,80.00,116.00,106.00,78.00,92.00,93.00,1088.00,17.00,14.00,12.00,14.00,14.00,14.00,14.00,13.00,13.00,14.00,15.00,15.00,169.00,10.04,17.60,1897.00,47.15,6.02,35.80,35.50,Est
|
||||
Biarritz,128.00,105.00,98.00,102.00,100.00,91.00,69.00,123.00,155.00,152.00,175.00,176.00,1474.00,16.00,14.00,13.00,15.00,17.00,14.00,13.00,13.00,14.00,15.00,16.00,17.00,177.00,13.58,12.30,1921.00,43.29,-1.34,26.00,36.80,Ouest
|
||||
Bordeaux,100.00,84.00,66.00,57.00,64.00,71.00,52.00,65.00,88.00,84.00,99.00,117.00,947.00,16.00,13.00,13.00,13.00,14.00,11.00,11.00,12.00,13.00,14.00,15.00,17.00,162.00,13.33,15.40,2076.00,44.50,-0.34,26.60,31.30,Ouest
|
||||
Brest,130.00,98.00,89.00,77.00,74.00,60.00,51.00,80.00,95.00,108.00,136.00,159.00,1157.00,22.00,16.00,15.00,15.00,14.00,13.00,14.00,15.00,16.00,19.00,20.00,22.00,201.00,10.77,10.20,1757.00,48.24,-4.29,22.90,28.20,Ouest
|
||||
Caen,65.00,61.00,45.00,44.00,53.00,52.00,45.00,57.00,66.00,75.00,79.00,71.00,713.00,17.00,14.00,12.00,13.00,14.00,12.00,13.00,13.00,13.00,15.00,16.00,17.00,169.00,10.45,12.70,1777.00,49.11,-0.21,29.00,35.60,Nord
|
||||
Clermont-Fd,28.00,27.00,30.00,41.00,78.00,79.00,48.00,70.00,58.00,43.00,39.00,30.00,571.00,12.00,11.00,9.00,12.00,12.00,12.00,9.00,10.00,10.00,11.00,12.00,12.00,132.00,10.94,16.80,1899.00,45.47,3.05,48.20,51.30,Est
|
||||
Dijon,62.00,48.00,51.00,48.00,68.00,79.00,44.00,79.00,74.00,53.00,67.00,61.00,734.00,16.00,13.00,10.00,11.00,12.00,12.00,11.00,11.00,11.00,12.00,14.00,14.00,147.00,10.50,18.30,1934.00,47.19,5.01,36.80,37.10,Est
|
||||
Embrun,61.00,55.00,55.00,48.00,47.00,63.00,41.00,65.00,60.00,60.00,81.00,62.00,698.00,9.00,9.00,8.00,9.00,11.00,10.00,7.00,8.00,8.00,9.00,10.00,10.00,108.00,9.49,18.40,2604.00,44.34,6.30,30.90,32.10,Sud
|
||||
Grenoble,80.00,79.00,69.00,69.00,83.00,94.00,74.00,96.00,88.00,85.00,90.00,98.00,1005.00,14.00,11.00,11.00,12.00,14.00,11.00,10.00,11.00,11.00,12.00,13.00,14.00,144.00,10.98,18.60,2100.00,45.10,5.43,34.50,34.50,Est
|
||||
Lille,45.00,43.00,38.00,37.00,45.00,57.00,62.00,64.00,53.00,56.00,56.00,56.00,612.00,18.00,14.00,13.00,14.00,13.00,12.00,13.00,13.00,14.00,14.00,16.00,17.00,171.00,9.73,14.70,1641.00,50.38,3.04,37.30,37.10,Nord
|
||||
Limoges,87.00,75.00,68.00,69.00,72.00,71.00,56.00,73.00,87.00,72.00,82.00,98.00,910.00,17.00,14.00,13.00,13.00,14.00,12.00,12.00,12.00,12.00,14.00,15.00,17.00,165.00,10.59,15.30,1853.00,45.50,1.16,29.90,33.00,Ouest
|
||||
Lyon,53.00,50.00,60.00,54.00,67.00,84.00,55.00,104.00,86.00,73.00,80.00,62.00,828.00,15.00,12.00,11.00,11.00,13.00,11.00,10.00,11.00,11.00,12.00,14.00,14.00,145.00,11.36,18.60,2036.00,45.45,4.51,37.40,42.10,Est
|
||||
Marseille,36.00,49.00,40.00,35.00,38.00,33.00,13.00,27.00,65.00,67.00,69.00,61.00,533.00,8.00,6.00,7.00,6.00,7.00,4.00,2.00,4.00,6.00,8.00,8.00,10.00,76.00,14.23,17.80,2866.00,43.18,5.24,20.80,41.00,Sud
|
||||
Montpellier,56.00,59.00,69.00,46.00,47.00,41.00,20.00,52.00,78.00,125.00,70.00,73.00,736.00,8.00,6.00,8.00,8.00,9.00,5.00,3.00,6.00,7.00,9.00,9.00,10.00,88.00,13.89,17.10,2709.00,43.36,3.53,21.70,44.90,Sud
|
||||
Nancy,66.00,58.00,43.00,45.00,62.00,70.00,58.00,76.00,65.00,52.00,59.00,67.00,721.00,16.00,13.00,12.00,13.00,13.00,13.00,12.00,13.00,12.00,13.00,15.00,16.00,161.00,9.50,17.50,1633.00,48.41,6.12,36.90,33.90,Est
|
||||
Nantes,83.00,65.00,53.00,48.00,54.00,52.00,42.00,66.00,80.00,77.00,95.00,94.00,809.00,18.00,14.00,14.00,11.00,13.00,11.00,12.00,12.00,14.00,15.00,16.00,18.00,168.00,11.69,13.80,1901.00,47.13,-1.33,26.50,33.60,Ouest
|
||||
Nice,67.00,83.00,71.00,70.00,39.00,37.00,21.00,38.00,83.00,109.00,158.00,92.00,868.00,9.00,7.00,8.00,9.00,8.00,5.00,2.00,4.00,7.00,9.00,9.00,9.00,86.00,14.84,15.20,2779.00,43.42,7.15,15.60,33.10,Sud
|
||||
Nimes,52.00,53.00,57.00,45.00,50.00,40.00,25.00,40.00,75.00,100.00,83.00,60.00,680.00,8.00,6.00,9.00,8.00,9.00,6.00,4.00,6.00,7.00,9.00,10.00,10.00,92.00,14.18,17.90,2628.00,43.50,4.21,22.80,43.20,Sud
|
||||
Orleans,57.00,48.00,43.00,46.00,52.00,54.00,47.00,54.00,51.00,54.00,61.00,54.00,621.00,16.00,13.00,12.00,12.00,13.00,11.00,11.00,12.00,12.00,13.00,15.00,16.00,156.00,10.53,15.70,1799.00,47.55,1.54,33.30,33.10,Ouest
|
||||
Paris,53.00,48.00,40.00,45.00,53.00,57.00,54.00,61.00,54.00,50.00,58.00,51.00,624.00,17.00,14.00,12.00,13.00,12.00,12.00,12.00,13.00,13.00,13.00,15.00,16.00,162.00,11.18,15.70,1814.00,48.52,2.20,36.10,34.70,Nord
|
||||
Perpignan,27.00,52.00,59.00,47.00,49.00,33.00,27.00,28.00,69.00,97.00,70.00,71.00,629.00,7.00,6.00,8.00,7.00,9.00,7.00,5.00,6.00,7.00,8.00,6.00,9.00,85.00,15.24,16.30,2603.00,42.41,2.53,21.80,44.10,Sud
|
||||
Poitiers,65.00,58.00,56.00,49.00,55.00,55.00,46.00,59.00,52.00,61.00,78.00,68.00,702.00,16.00,13.00,13.00,12.00,14.00,11.00,10.00,11.00,12.00,12.00,15.00,16.00,155.00,11.28,15.10,2024.00,46.35,0.20,30.60,29.30,Ouest
|
||||
Reims,43.00,44.00,42.00,37.00,52.00,53.00,47.00,58.00,54.00,43.00,52.00,50.00,575.00,17.00,15.00,12.00,13.00,13.00,13.00,12.00,13.00,12.00,13.00,16.00,16.00,165.00,10.06,16.40,1702.00,49.15,4.02,36.50,35.40,Est
|
||||
Rennes,57.00,50.00,45.00,43.00,46.00,48.00,36.00,57.00,53.00,60.00,73.00,66.00,634.00,18.00,14.00,14.00,12.00,13.00,11.00,12.00,12.00,13.00,15.00,16.00,18.00,168.00,11.13,13.10,1835.00,48.05,-1.41,29.50,32.20,Ouest
|
||||
Rouen,65.00,58.00,50.00,44.00,50.00,57.00,49.00,67.00,70.00,72.00,68.00,66.00,716.00,17.00,15.00,12.00,12.00,13.00,12.00,12.00,13.00,13.00,15.00,16.00,18.00,168.00,10.34,14.20,1694.00,49.26,1.05,31.10,37.50,Nord
|
||||
St-Quentin,52.00,50.00,46.00,44.00,52.00,63.00,61.00,69.00,67.00,52.00,63.00,65.00,684.00,17.00,14.00,12.00,12.00,12.00,12.00,13.00,14.00,13.00,14.00,15.00,16.00,164.00,9.85,16.40,1661.00,49.51,3.17,35.80,36.30,Nord
|
||||
Strasbourg,51.00,44.00,42.00,58.00,71.00,88.00,73.00,90.00,61.00,43.00,51.00,47.00,719.00,15.00,13.00,12.00,13.00,13.00,14.00,14.00,13.00,12.00,12.00,13.00,14.00,158.00,9.72,18.60,1696.00,48.35,7.45,44.80,37.40,Est
|
||||
Toulon,76.00,86.00,82.00,60.00,49.00,35.00,12.00,31.00,77.00,105.00,117.00,107.00,837.00,9.00,6.00,8.00,7.00,7.00,4.00,2.00,4.00,5.00,9.00,9.00,11.00,81.00,15.28,14.00,2917.00,43.07,5.56,15.20,31.80,Sud
|
||||
Toulouse,53.00,50.00,52.00,55.00,65.00,65.00,44.00,43.00,57.00,49.00,58.00,65.00,656.00,14.00,12.00,11.00,12.00,13.00,10.00,9.00,9.00,10.00,11.00,12.00,14.00,137.00,12.68,16.20,2081.00,43.36,1.26,33.10,32.40,Sud
|
||||
Tours,63.00,55.00,52.00,51.00,53.00,58.00,47.00,60.00,60.00,55.00,68.00,65.00,687.00,16.00,13.00,12.00,12.00,13.00,11.00,11.00,12.00,13.00,13.00,15.00,16.00,157.00,11.22,15.60,1859.00,47.23,0.41,31.70,32.10,Ouest
|
||||
Vichy,50.00,45.00,51.00,52.00,84.00,84.00,63.00,86.00,75.00,58.00,58.00,55.00,761.00,17.00,14.00,12.00,13.00,14.00,12.00,11.00,12.00,12.00,13.00,15.00,16.00,161.00,10.72,16.90,1873.00,46.08,3.26,41.70,42.00,Est
|
|
105600
new-data/w2.txt
Normal file
105600
new-data/w2.txt
Normal file
File diff suppressed because it is too large
Load diff
|
@ -13,7 +13,7 @@ from myplotlib import print_1d_data, print_2d_data
|
|||
from mydatalib import extract_data_2d, scale_data, apply_DBSCAN, evaluate
|
||||
|
||||
path = './artificial/'
|
||||
dataset_name = "banana"
|
||||
dataset_name = "s-set1"
|
||||
save = True
|
||||
|
||||
print("-----------------------------------------------------------")
|
||||
|
@ -30,23 +30,22 @@ print_2d_data(data_scaled, dataset_name=dataset_name +
|
|||
|
||||
print("-----------------------------------------------------------")
|
||||
print(" Calcul du voisinage")
|
||||
n = 50
|
||||
n = 7
|
||||
neighbors = NearestNeighbors(n_neighbors=n)
|
||||
neighbors.fit(data)
|
||||
distances, indices = neighbors.kneighbors(data)
|
||||
neighbors.fit(data_scaled)
|
||||
distances, indices = neighbors.kneighbors(data_scaled)
|
||||
distances = list(map(lambda x: sum(x[1:n-1])/(len(x)-1), distances))
|
||||
print(distances)
|
||||
distances = np.sort(distances, axis=0)
|
||||
print(distances)
|
||||
print_1d_data(distances, range(1, len(distances)+1), x_name="distance_moyenne",
|
||||
y_name="nombre_de_points", stop=False, save=False)
|
||||
y_name="nombre_de_points", dataset_name=dataset_name,
|
||||
method_name="DBSCAN", stop=False, save=save)
|
||||
|
||||
|
||||
print("-----------------------------------------------------------")
|
||||
print(" Création clusters : DBSCAN")
|
||||
params = []
|
||||
for i in range(1, 20):
|
||||
params += [(i/100, 5)]
|
||||
for i in range(1, 30):
|
||||
params += [(0.1, i)]
|
||||
durations = []
|
||||
silouettes = []
|
||||
daviess = []
|
||||
|
@ -55,7 +54,7 @@ clusters = []
|
|||
noise_points = []
|
||||
for (distance, min_pts) in params:
|
||||
# Application du clustering agglomeratif
|
||||
(model, duration) = apply_DBSCAN(data, distance, min_pts)
|
||||
(model, duration) = apply_DBSCAN(data_scaled, distance, min_pts)
|
||||
cl_pred = model.labels_
|
||||
# Affichage des clusters# Affichage des clusters
|
||||
print_2d_data(data_scaled, dataset_name=dataset_name,
|
||||
|
|
|
@ -5,90 +5,200 @@ Created on Wed Dec 8 16:07:28 2021
|
|||
|
||||
@author: pfaure
|
||||
"""
|
||||
from numpy import arange
|
||||
from myplotlib import print_1d_data
|
||||
from mydatalib import (extract_data_txt, scale_data, apply_kmeans,
|
||||
apply_agglomerative_clustering, apply_DBSCAN,
|
||||
apply_mean_shift, evaluate)
|
||||
|
||||
from sklearn.neighbors import NearestNeighbors
|
||||
import numpy as np
|
||||
|
||||
from myplotlib import print_1d_data, print_2d_data
|
||||
from mydatalib import extract_data_2d, scale_data, apply_DBSCAN, evaluate
|
||||
|
||||
path = './artificial/'
|
||||
dataset_name = "banana"
|
||||
path = './new-data/'
|
||||
dataset_name_list = ["d32", "d64"]
|
||||
#eps_list = [0.6, 0.75, 0.2, 0.8, 0.8]
|
||||
eps_list = [0.8, 0.8]
|
||||
save = True
|
||||
for i in range(0, 2):
|
||||
dataset_name = dataset_name_list[i]
|
||||
eps = eps_list[i]
|
||||
|
||||
print("-----------------------------------------------------------")
|
||||
print(" Chargement du dataset : " + dataset_name)
|
||||
data = extract_data_2d(path + dataset_name)
|
||||
print_2d_data(data, dataset_name=dataset_name +
|
||||
"_brutes", stop=False, save=save)
|
||||
print("-----------------------------------------------------------")
|
||||
print(" Chargement du dataset : " + dataset_name)
|
||||
data = extract_data_txt(path + dataset_name)
|
||||
|
||||
print("-----------------------------------------------------------")
|
||||
print(" Mise à l'échelle")
|
||||
data_scaled = scale_data(data)
|
||||
print_2d_data(data_scaled, dataset_name=dataset_name +
|
||||
"_scaled", stop=False, save=save)
|
||||
print("-----------------------------------------------------------")
|
||||
print(" Mise à l'échelle")
|
||||
data_scaled = scale_data(data)
|
||||
|
||||
print("-----------------------------------------------------------")
|
||||
print(" Calcul du voisinage")
|
||||
n = 50
|
||||
neighbors = NearestNeighbors(n_neighbors=n)
|
||||
neighbors.fit(data)
|
||||
distances, indices = neighbors.kneighbors(data)
|
||||
distances = list(map(lambda x: sum(x[1:n-1])/(len(x)-1), distances))
|
||||
print(distances)
|
||||
distances = np.sort(distances, axis=0)
|
||||
print(distances)
|
||||
print_1d_data(distances, range(1, len(distances)+1), x_name="distance_moyenne",
|
||||
y_name="nombre_de_points", stop=False, save=False)
|
||||
k_max = 10
|
||||
print("-----------------------------------------------------------")
|
||||
print(" Application de k-means")
|
||||
# Application de k-means pour plusieurs valeurs de k
|
||||
# et evaluation de la solution
|
||||
k = []
|
||||
durations = []
|
||||
silouettes = []
|
||||
daviess = []
|
||||
calinskis = []
|
||||
inerties = []
|
||||
iterations = []
|
||||
for i in range(2, k_max):
|
||||
# Application de k-means
|
||||
(model, duration) = apply_kmeans(data_scaled, k=i, init="k-means++")
|
||||
# Evaluation de la solution de clustering
|
||||
(silouette, davies, calinski) = evaluate(data_scaled, model)
|
||||
# Enregistrement des valeurs
|
||||
k += [i]
|
||||
durations += [duration]
|
||||
silouettes += [silouette]
|
||||
daviess += [davies]
|
||||
calinskis += [calinski]
|
||||
inerties += [model.inertia_]
|
||||
iterations += [model.n_iter_]
|
||||
|
||||
# Affichage des résultats
|
||||
print_1d_data(k, k, x_name="k", y_name="k", dataset_name=dataset_name,
|
||||
method_name="k-means", stop=False, save=save)
|
||||
print_1d_data(k, durations, x_name="k", y_name="temps_de_calcul", y_unit="ms",
|
||||
dataset_name=dataset_name, method_name="k-means",
|
||||
stop=False, save=save)
|
||||
print_1d_data(k, silouettes, x_name="k", y_name="coeficient_de_silhouette",
|
||||
dataset_name=dataset_name, method_name="k-means",
|
||||
stop=False, save=save)
|
||||
print_1d_data(k, daviess, x_name="k", y_name="coeficient_de_Davies",
|
||||
dataset_name=dataset_name, method_name="k-means",
|
||||
stop=False, save=save)
|
||||
print_1d_data(k, calinskis, x_name="k", y_name="coeficient_de_Calinski",
|
||||
dataset_name=dataset_name, method_name="k-means",
|
||||
stop=False, save=save)
|
||||
print_1d_data(k, inerties, x_name="k", y_name="inertie",
|
||||
dataset_name=dataset_name, method_name="k-means",
|
||||
stop=False, save=save)
|
||||
print_1d_data(k, iterations, x_name="k", y_name="nombre_d_iterations",
|
||||
dataset_name=dataset_name, method_name="k-means",
|
||||
stop=True, save=save)
|
||||
|
||||
print("-----------------------------------------------------------")
|
||||
print(" Création clusters : DBSCAN")
|
||||
params = []
|
||||
for i in range(1, 20):
|
||||
params += [(i/100, 5)]
|
||||
durations = []
|
||||
silouettes = []
|
||||
daviess = []
|
||||
calinskis = []
|
||||
clusters = []
|
||||
noise_points = []
|
||||
for (distance, min_pts) in params:
|
||||
# Application du clustering agglomeratif
|
||||
(model, duration) = apply_DBSCAN(data, distance, min_pts)
|
||||
cl_pred = model.labels_
|
||||
# Affichage des clusters# Affichage des clusters
|
||||
print_2d_data(data_scaled, dataset_name=dataset_name,
|
||||
method_name="DBSCAN-Eps=" +
|
||||
str(distance)+"-Minpt="+str(min_pts),
|
||||
k=0, stop=False, save=save, c=cl_pred)
|
||||
# Evaluation de la solution de clustering
|
||||
(silouette, davies, calinski) = evaluate(data_scaled, model)
|
||||
# Enregistrement des valeurs
|
||||
durations += [duration]
|
||||
silouettes += [silouette]
|
||||
daviess += [davies]
|
||||
calinskis += [calinski]
|
||||
clusters += [len(set(cl_pred)) - (1 if -1 in cl_pred else 0)]
|
||||
noise_points += [list(cl_pred).count(-1)]
|
||||
print("-----------------------------------------------------------")
|
||||
print(" Création clusters : agglomerative ")
|
||||
# Application du clustering agglomeratif pour plusieurs valeurs de k
|
||||
# et evaluation de la solution
|
||||
linkage = "ward"
|
||||
k = []
|
||||
durations = []
|
||||
silouettes = []
|
||||
daviess = []
|
||||
calinskis = []
|
||||
for i in range(2, k_max):
|
||||
# Application du clustering agglomeratif
|
||||
(model, duration) = apply_agglomerative_clustering(
|
||||
data_scaled, k=i, linkage=linkage)
|
||||
# Evaluation de la solution de clustering
|
||||
(silouette, davies, calinski) = evaluate(data_scaled, model)
|
||||
# Enregistrement des valeurs
|
||||
k += [i]
|
||||
durations += [duration]
|
||||
silouettes += [silouette]
|
||||
daviess += [davies]
|
||||
calinskis += [calinski]
|
||||
|
||||
# Affichage des résultats
|
||||
params = [str(i) for i in params]
|
||||
print_1d_data(params, durations, x_name="(eps,min_pts)",
|
||||
y_name="temps_de_calcul", y_unit="ms", dataset_name=dataset_name,
|
||||
method_name="DBSCAN", stop=False, save=save)
|
||||
print_1d_data(params, silouettes, x_name="(eps,min_pts)",
|
||||
y_name="coeficient_de_silhouette", dataset_name=dataset_name,
|
||||
method_name="DBSCAN", stop=False, save=save)
|
||||
print_1d_data(params, daviess, x_name="(eps,min_pts)",
|
||||
y_name="coeficient_de_Davies", dataset_name=dataset_name,
|
||||
method_name="DBSCAN", stop=False, save=save)
|
||||
print_1d_data(params, calinskis, x_name="(eps,min_pts)",
|
||||
y_name="coeficient_de_Calinski", dataset_name=dataset_name,
|
||||
method_name="DBSCAN", stop=False, save=save)
|
||||
print_1d_data(params, clusters, x_name="(eps,min_pts)",
|
||||
y_name="nombre_de_clusters", dataset_name=dataset_name,
|
||||
method_name="DBSCAN", stop=False, save=save)
|
||||
print_1d_data(params, noise_points, x_name="(eps,min_pts)",
|
||||
y_name="points_de_bruit", dataset_name=dataset_name,
|
||||
method_name="DBSCAN", stop=False, save=save)
|
||||
# Affichage des résultats
|
||||
print_1d_data(k, k, x_name="k", y_name="k", dataset_name=dataset_name,
|
||||
method_name="agglomerative_" + linkage, stop=False, save=save)
|
||||
print_1d_data(k, durations, x_name="k", y_name="temps_de_calcul", y_unit="ms",
|
||||
dataset_name=dataset_name,
|
||||
method_name="agglomerative_" + linkage, stop=False, save=save)
|
||||
print_1d_data(k, silouettes, x_name="k", y_name="coeficient_de_silhouette",
|
||||
dataset_name=dataset_name,
|
||||
method_name="agglomerative_" + linkage, stop=False, save=save)
|
||||
print_1d_data(k, daviess, x_name="k", y_name="coeficient_de_Davies",
|
||||
dataset_name=dataset_name,
|
||||
method_name="agglomerative_" + linkage, stop=False, save=save)
|
||||
print_1d_data(k, calinskis, x_name="k", y_name="coeficient_de_Calinski",
|
||||
dataset_name=dataset_name,
|
||||
method_name="agglomerative_" + linkage, stop=False, save=save)
|
||||
|
||||
min_sample_max = 30
|
||||
print("-----------------------------------------------------------")
|
||||
print(" Création clusters : DBSCAN")
|
||||
params = []
|
||||
for i in range(1, min_sample_max):
|
||||
params += [(eps, i)]
|
||||
durations = []
|
||||
silouettes = []
|
||||
daviess = []
|
||||
calinskis = []
|
||||
clusters = []
|
||||
noise_points = []
|
||||
for (distance, min_pts) in params:
|
||||
# Application du clustering agglomeratif
|
||||
(model, duration) = apply_DBSCAN(data_scaled, distance, min_pts)
|
||||
cl_pred = model.labels_
|
||||
# Evaluation de la solution de clustering
|
||||
(silouette, davies, calinski) = evaluate(data_scaled, model)
|
||||
# Enregistrement des valeurs
|
||||
durations += [duration]
|
||||
silouettes += [silouette]
|
||||
daviess += [davies]
|
||||
calinskis += [calinski]
|
||||
clusters += [len(set(cl_pred)) - (1 if -1 in cl_pred else 0)]
|
||||
noise_points += [list(cl_pred).count(-1)]
|
||||
|
||||
# Affichage des résultats
|
||||
params = [str(i) for i in params]
|
||||
print_1d_data(params, durations, x_name="(eps,min_pts)",
|
||||
y_name="temps_de_calcul", y_unit="ms", dataset_name=dataset_name,
|
||||
method_name="DBSCAN", stop=False, save=save)
|
||||
print_1d_data(params, silouettes, x_name="(eps,min_pts)",
|
||||
y_name="coeficient_de_silhouette", dataset_name=dataset_name,
|
||||
method_name="DBSCAN", stop=False, save=save)
|
||||
print_1d_data(params, daviess, x_name="(eps,min_pts)",
|
||||
y_name="coeficient_de_Davies", dataset_name=dataset_name,
|
||||
method_name="DBSCAN", stop=False, save=save)
|
||||
print_1d_data(params, calinskis, x_name="(eps,min_pts)",
|
||||
y_name="coeficient_de_Calinski", dataset_name=dataset_name,
|
||||
method_name="DBSCAN", stop=False, save=save)
|
||||
print_1d_data(params, clusters, x_name="(eps,min_pts)",
|
||||
y_name="nombre_de_clusters", dataset_name=dataset_name,
|
||||
method_name="DBSCAN", stop=False, save=save)
|
||||
print_1d_data(params, noise_points, x_name="(eps,min_pts)",
|
||||
y_name="points_de_bruit", dataset_name=dataset_name,
|
||||
method_name="DBSCAN", stop=False, save=save)
|
||||
|
||||
print("-----------------------------------------------------------")
|
||||
print(" Création clusters : mean-shift")
|
||||
# Application de Affinity Propagation pour plusieurs valeurs de préférence
|
||||
# et evaluation de la solution
|
||||
|
||||
k_max = 2
|
||||
|
||||
k = []
|
||||
durations = []
|
||||
silouettes = []
|
||||
daviess = []
|
||||
calinskis = []
|
||||
for bandwidth in arange(0.1, k_max, 0.2):
|
||||
# Application du clustering
|
||||
(model, duration) = apply_mean_shift(
|
||||
data_scaled, bandwidth=bandwidth)
|
||||
# Evaluation de la solution de clustering
|
||||
(silouette, davies, calinski) = evaluate(data_scaled, model)
|
||||
# Enregistrement des valeurs
|
||||
k += [bandwidth]
|
||||
durations += [duration]
|
||||
silouettes += [silouette]
|
||||
daviess += [davies]
|
||||
calinskis += [calinski]
|
||||
|
||||
# Affichage des résultats
|
||||
print_1d_data(k, k, x_name="k", y_name="k", dataset_name=dataset_name,
|
||||
method_name="mean-shift", stop=False, save=save)
|
||||
print_1d_data(k, durations, x_name="k", y_name="temps_de_calcul", y_unit="ms",
|
||||
dataset_name=dataset_name,
|
||||
method_name="mean-shift", stop=False, save=save)
|
||||
print_1d_data(k, silouettes, x_name="k", y_name="coeficient_de_silhouette",
|
||||
dataset_name=dataset_name,
|
||||
method_name="mean-shift", stop=False, save=save)
|
||||
print_1d_data(k, daviess, x_name="k", y_name="coeficient_de_Davies",
|
||||
dataset_name=dataset_name,
|
||||
method_name="mean-shift", stop=False, save=save)
|
||||
print_1d_data(k, calinskis, x_name="k", y_name="coeficient_de_Calinski",
|
||||
dataset_name=dataset_name,
|
||||
method_name="mean-shift", stop=False, save=save)
|
||||
|
|
40
tp5-preprocessing.py
Normal file
40
tp5-preprocessing.py
Normal file
|
@ -0,0 +1,40 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Created on Wed Dec 8 16:07:28 2021
|
||||
|
||||
@author: pfaure
|
||||
"""
|
||||
|
||||
from sklearn.neighbors import NearestNeighbors
|
||||
import numpy as np
|
||||
|
||||
from myplotlib import print_1d_data, print_2d_data
|
||||
from mydatalib import extract_data_txt, scale_data
|
||||
|
||||
path = './new-data/'
|
||||
dataset_name = "d32"
|
||||
save = False
|
||||
|
||||
print("-----------------------------------------------------------")
|
||||
print(" Chargement du dataset : " + dataset_name)
|
||||
data = extract_data_txt(path + dataset_name)
|
||||
print_2d_data(data, dataset_name=dataset_name +
|
||||
"_brutes", stop=False, save=save)
|
||||
|
||||
print("-----------------------------------------------------------")
|
||||
print(" Mise à l'échelle")
|
||||
data_scaled = scale_data(data)
|
||||
print_2d_data(data_scaled, dataset_name=dataset_name +
|
||||
"_scaled", stop=False, save=save)
|
||||
|
||||
print("-----------------------------------------------------------")
|
||||
print(" Calcul du voisinage")
|
||||
n = 50
|
||||
neighbors = NearestNeighbors(n_neighbors=n)
|
||||
neighbors.fit(data_scaled)
|
||||
distances, indices = neighbors.kneighbors(data_scaled)
|
||||
distances = list(map(lambda x: sum(x[1:n-1])/(len(x)-1), distances))
|
||||
distances = np.sort(distances, axis=0)
|
||||
print_1d_data(distances, range(1, len(distances)+1), x_name="distance_moyenne",
|
||||
y_name="nombre_de_points", stop=False, save=save)
|
Loading…
Reference in a new issue