{ "cells": [ { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "from matplotlib.pyplot import savefig\n", "\n", "sns.set(style=\"white\")" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# Useful function\n", "\n", "def get_highest_values(arr, n):\n", " return np.array(arr).argsort()[-n:][::-1]\n", "\n", "def get_lowest_values(arr, n):\n", " return np.array(arr).argsort()[::-1][-n:][::-1]" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "data_file = \"data/temp.train\"" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Correlation analysis between SVD features" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | 1 | \n", "2 | \n", "3 | \n", "4 | \n", "5 | \n", "6 | \n", "7 | \n", "8 | \n", "9 | \n", "10 | \n", "... | \n", "191 | \n", "192 | \n", "193 | \n", "194 | \n", "195 | \n", "196 | \n", "197 | \n", "198 | \n", "199 | \n", "200 | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "0.747867 | \n", "0.015906 | \n", "0.013435 | \n", "0.010133 | \n", "0.009132 | \n", "0.006958 | \n", "0.006652 | \n", "0.006193 | \n", "0.005855 | \n", "0.005532 | \n", "... | \n", "0.000031 | \n", "0.000028 | \n", "0.000023 | \n", "0.000020 | \n", "0.000014 | \n", "0.000014 | \n", "0.000010 | \n", "0.000007 | \n", "0.000003 | \n", "2.854226e-16 | \n", "
1 | \n", "0.742556 | \n", "0.016040 | \n", "0.013593 | \n", "0.010234 | \n", "0.009399 | \n", "0.007116 | \n", "0.006650 | \n", "0.006278 | \n", "0.006005 | \n", "0.005751 | \n", "... | \n", "0.000057 | \n", "0.000053 | \n", "0.000043 | \n", "0.000033 | \n", "0.000029 | \n", "0.000025 | \n", "0.000022 | \n", "0.000012 | \n", "0.000006 | \n", "3.012061e-16 | \n", "
2 | \n", "0.741817 | \n", "0.016078 | \n", "0.013635 | \n", "0.010261 | \n", "0.009387 | \n", "0.007099 | \n", "0.006711 | \n", "0.006291 | \n", "0.006031 | \n", "0.005788 | \n", "... | \n", "0.000067 | \n", "0.000060 | \n", "0.000060 | \n", "0.000048 | \n", "0.000036 | \n", "0.000033 | \n", "0.000017 | \n", "0.000013 | \n", "0.000006 | \n", "1.667818e-16 | \n", "
3 | \n", "0.728526 | \n", "0.016202 | \n", "0.014147 | \n", "0.010591 | \n", "0.009438 | \n", "0.007605 | \n", "0.006869 | \n", "0.006649 | \n", "0.006454 | \n", "0.006113 | \n", "... | \n", "0.000118 | \n", "0.000107 | \n", "0.000101 | \n", "0.000078 | \n", "0.000072 | \n", "0.000056 | \n", "0.000033 | \n", "0.000020 | \n", "0.000006 | \n", "3.226420e-16 | \n", "
4 | \n", "0.747843 | \n", "0.015910 | \n", "0.013440 | \n", "0.010138 | \n", "0.009140 | \n", "0.006962 | \n", "0.006653 | \n", "0.006193 | \n", "0.005857 | \n", "0.005531 | \n", "... | \n", "0.000032 | \n", "0.000029 | \n", "0.000023 | \n", "0.000019 | \n", "0.000015 | \n", "0.000012 | \n", "0.000010 | \n", "0.000004 | \n", "0.000002 | \n", "3.281675e-16 | \n", "
5 rows × 200 columns
\n", "\n", " | 0 | \n", "1 | \n", "2 | \n", "3 | \n", "4 | \n", "5 | \n", "6 | \n", "7 | \n", "8 | \n", "9 | \n", "... | \n", "191 | \n", "192 | \n", "193 | \n", "194 | \n", "195 | \n", "196 | \n", "197 | \n", "198 | \n", "199 | \n", "200 | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "0 | \n", "0.747867 | \n", "0.015906 | \n", "0.013435 | \n", "0.010133 | \n", "0.009132 | \n", "0.006958 | \n", "0.006652 | \n", "0.006193 | \n", "0.005855 | \n", "... | \n", "0.000031 | \n", "0.000028 | \n", "0.000023 | \n", "0.000020 | \n", "0.000014 | \n", "0.000014 | \n", "0.000010 | \n", "0.000007 | \n", "0.000003 | \n", "2.854226e-16 | \n", "
1 | \n", "1 | \n", "0.742556 | \n", "0.016040 | \n", "0.013593 | \n", "0.010234 | \n", "0.009399 | \n", "0.007116 | \n", "0.006650 | \n", "0.006278 | \n", "0.006005 | \n", "... | \n", "0.000057 | \n", "0.000053 | \n", "0.000043 | \n", "0.000033 | \n", "0.000029 | \n", "0.000025 | \n", "0.000022 | \n", "0.000012 | \n", "0.000006 | \n", "3.012061e-16 | \n", "
2 | \n", "1 | \n", "0.741817 | \n", "0.016078 | \n", "0.013635 | \n", "0.010261 | \n", "0.009387 | \n", "0.007099 | \n", "0.006711 | \n", "0.006291 | \n", "0.006031 | \n", "... | \n", "0.000067 | \n", "0.000060 | \n", "0.000060 | \n", "0.000048 | \n", "0.000036 | \n", "0.000033 | \n", "0.000017 | \n", "0.000013 | \n", "0.000006 | \n", "1.667818e-16 | \n", "
3 | \n", "1 | \n", "0.728526 | \n", "0.016202 | \n", "0.014147 | \n", "0.010591 | \n", "0.009438 | \n", "0.007605 | \n", "0.006869 | \n", "0.006649 | \n", "0.006454 | \n", "... | \n", "0.000118 | \n", "0.000107 | \n", "0.000101 | \n", "0.000078 | \n", "0.000072 | \n", "0.000056 | \n", "0.000033 | \n", "0.000020 | \n", "0.000006 | \n", "3.226420e-16 | \n", "
4 | \n", "0 | \n", "0.747843 | \n", "0.015910 | \n", "0.013440 | \n", "0.010138 | \n", "0.009140 | \n", "0.006962 | \n", "0.006653 | \n", "0.006193 | \n", "0.005857 | \n", "... | \n", "0.000032 | \n", "0.000029 | \n", "0.000023 | \n", "0.000019 | \n", "0.000015 | \n", "0.000012 | \n", "0.000010 | \n", "0.000004 | \n", "0.000002 | \n", "3.281675e-16 | \n", "
5 rows × 201 columns
\n", "