{ "cells": [ { "cell_type": "markdown", "id": "335c108d", "metadata": {}, "source": [ "# Hands-on clustering \\#1: \n", "## Large Multi-spectral image segmentation: MARS" ] }, { "cell_type": "markdown", "id": "fe466679", "metadata": {}, "source": [ "## 3. Data exploration" ] }, { "cell_type": "markdown", "id": "e59c1d62", "metadata": {}, "source": [ "### 3.1. Read the data and understand the formatting" ] }, { "cell_type": "markdown", "id": "b2939e4a", "metadata": {}, "source": [ "#### Import first required packages" ] }, { "cell_type": "code", "execution_count": 1, "id": "c3f78612", "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "\n", "plt.rcParams[\"figure.figsize\"] = (10, 5)\n", "plt.rcParams[\"font.size\"] = 15\n" ] }, { "cell_type": "markdown", "id": "b56fdade", "metadata": {}, "source": [ "#### 3.1.1 Read the data" ] }, { "cell_type": "code", "execution_count": 2, "id": "350ba4b5", "metadata": {}, "outputs": [], "source": [ "mars = pd.read_csv(\"mars.csv.zip\")\n" ] }, { "cell_type": "markdown", "id": "d5e3237f", "metadata": {}, "source": [ "#### 3.1.2 Display data summary" ] }, { "cell_type": "code", "execution_count": 3, "id": "af38dd20", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | V1 | \n", "V2 | \n", "V3 | \n", "V4 | \n", "V5 | \n", "V6 | \n", "V7 | \n", "V8 | \n", "V9 | \n", "V10 | \n", "... | \n", "V246 | \n", "V247 | \n", "V248 | \n", "V249 | \n", "V250 | \n", "V251 | \n", "V252 | \n", "V253 | \n", "V254 | \n", "V255 | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
count | \n", "38400.000000 | \n", "38400.000000 | \n", "38400.000000 | \n", "38400.000000 | \n", "38400.000000 | \n", "38400.000000 | \n", "38400.000000 | \n", "38400.000000 | \n", "38400.000000 | \n", "38400.000000 | \n", "... | \n", "38400.000000 | \n", "38400.000000 | \n", "38400.000000 | \n", "38400.000000 | \n", "38400.000000 | \n", "38400.000000 | \n", "38400.000000 | \n", "38400.000000 | \n", "38400.000000 | \n", "38400.000000 | \n", "
mean | \n", "0.373672 | \n", "0.408012 | \n", "0.418404 | \n", "0.420980 | \n", "0.421814 | \n", "0.421358 | \n", "0.426807 | \n", "0.429181 | \n", "0.419277 | \n", "0.418314 | \n", "... | \n", "0.327834 | \n", "0.358084 | \n", "0.366237 | \n", "0.341929 | \n", "0.376129 | \n", "0.394448 | \n", "0.373834 | \n", "0.370925 | \n", "0.409842 | \n", "0.413357 | \n", "
std | \n", "0.098974 | \n", "0.080131 | \n", "0.080369 | \n", "0.081863 | \n", "0.081315 | \n", "0.080683 | \n", "0.080681 | \n", "0.081414 | \n", "0.078250 | \n", "0.077991 | \n", "... | \n", "0.064647 | \n", "0.056778 | \n", "0.050779 | \n", "0.059134 | \n", "0.052337 | \n", "0.056993 | \n", "0.064088 | \n", "0.062586 | \n", "0.056333 | \n", "0.053984 | \n", "
min | \n", "0.038469 | \n", "0.233688 | \n", "0.259045 | \n", "0.269580 | \n", "0.273937 | \n", "0.274090 | \n", "0.275866 | \n", "0.274769 | \n", "0.273685 | \n", "0.274631 | \n", "... | \n", "0.143893 | \n", "0.193508 | \n", "0.219910 | \n", "0.174218 | \n", "0.220138 | \n", "0.215826 | \n", "0.198084 | \n", "0.186781 | \n", "0.224686 | \n", "0.219101 | \n", "
25% | \n", "0.299668 | \n", "0.334763 | \n", "0.343065 | \n", "0.345050 | \n", "0.344962 | \n", "0.345116 | \n", "0.350838 | \n", "0.352490 | \n", "0.346149 | \n", "0.345396 | \n", "... | \n", "0.272904 | \n", "0.312169 | \n", "0.330923 | \n", "0.295109 | \n", "0.339747 | \n", "0.352853 | \n", "0.324080 | \n", "0.321077 | \n", "0.369166 | \n", "0.375433 | \n", "
50% | \n", "0.365481 | \n", "0.388543 | \n", "0.397193 | \n", "0.399102 | \n", "0.399245 | \n", "0.398413 | \n", "0.403601 | \n", "0.407299 | \n", "0.396357 | \n", "0.394954 | \n", "... | \n", "0.334283 | \n", "0.362259 | \n", "0.367263 | \n", "0.344043 | \n", "0.375732 | \n", "0.397163 | \n", "0.378828 | \n", "0.372294 | \n", "0.411261 | \n", "0.414135 | \n", "
75% | \n", "0.445685 | \n", "0.487916 | \n", "0.502658 | \n", "0.507322 | \n", "0.508697 | \n", "0.508175 | \n", "0.512995 | \n", "0.515465 | \n", "0.503711 | \n", "0.502607 | \n", "... | \n", "0.379558 | \n", "0.400942 | \n", "0.398504 | \n", "0.385884 | \n", "0.409758 | \n", "0.433080 | \n", "0.420058 | \n", "0.419563 | \n", "0.447426 | \n", "0.448774 | \n", "
max | \n", "0.832065 | \n", "0.590190 | \n", "0.580816 | \n", "0.579686 | \n", "0.580050 | \n", "0.579551 | \n", "0.584240 | \n", "0.591300 | \n", "0.569333 | \n", "0.567649 | \n", "... | \n", "0.559236 | \n", "0.596519 | \n", "2.304593 | \n", "1.393481 | \n", "0.661885 | \n", "0.651398 | \n", "0.624899 | \n", "0.652349 | \n", "0.699387 | \n", "0.655508 | \n", "
8 rows × 255 columns
\n", "