diff --git a/notebooks/01-exploratory_data_analysis.ipynb b/notebooks/01-exploratory_data_analysis.ipynb index 8a8a8d9..98df65a 100644 --- a/notebooks/01-exploratory_data_analysis.ipynb +++ b/notebooks/01-exploratory_data_analysis.ipynb @@ -13,22 +13,600 @@ "metadata": {}, "outputs": [], "source": [ - "import pandas as pd" + "from IPython.display import display, Markdown\n", + "import numpy as np\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## 1. Descrição dos dados" + "## Leitura do conjunto de dados\n", + "***" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " | Age | \n", + "Gender | \n", + "Height | \n", + "Weight | \n", + "CALC | \n", + "FAVC | \n", + "FCVC | \n", + "NCP | \n", + "SCC | \n", + "SMOKE | \n", + "CH2O | \n", + "family_history_with_overweight | \n", + "FAF | \n", + "TUE | \n", + "CAEC | \n", + "MTRANS | \n", + "NObeyesdad | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "21.000000 | \n", + "Female | \n", + "1.620000 | \n", + "64.000000 | \n", + "no | \n", + "no | \n", + "2.0 | \n", + "3.0 | \n", + "no | \n", + "no | \n", + "2.000000 | \n", + "yes | \n", + "0.000000 | \n", + "1.000000 | \n", + "Sometimes | \n", + "Public_Transportation | \n", + "Normal_Weight | \n", + "
1 | \n", + "21.000000 | \n", + "Female | \n", + "1.520000 | \n", + "56.000000 | \n", + "Sometimes | \n", + "no | \n", + "3.0 | \n", + "3.0 | \n", + "yes | \n", + "yes | \n", + "3.000000 | \n", + "yes | \n", + "3.000000 | \n", + "0.000000 | \n", + "Sometimes | \n", + "Public_Transportation | \n", + "Normal_Weight | \n", + "
2 | \n", + "23.000000 | \n", + "Male | \n", + "1.800000 | \n", + "77.000000 | \n", + "Frequently | \n", + "no | \n", + "2.0 | \n", + "3.0 | \n", + "no | \n", + "no | \n", + "2.000000 | \n", + "yes | \n", + "2.000000 | \n", + "1.000000 | \n", + "Sometimes | \n", + "Public_Transportation | \n", + "Normal_Weight | \n", + "
3 | \n", + "27.000000 | \n", + "Male | \n", + "1.800000 | \n", + "87.000000 | \n", + "Frequently | \n", + "no | \n", + "3.0 | \n", + "3.0 | \n", + "no | \n", + "no | \n", + "2.000000 | \n", + "no | \n", + "2.000000 | \n", + "0.000000 | \n", + "Sometimes | \n", + "Walking | \n", + "Overweight_Level_I | \n", + "
4 | \n", + "22.000000 | \n", + "Male | \n", + "1.780000 | \n", + "89.800000 | \n", + "Sometimes | \n", + "no | \n", + "2.0 | \n", + "1.0 | \n", + "no | \n", + "no | \n", + "2.000000 | \n", + "no | \n", + "0.000000 | \n", + "0.000000 | \n", + "Sometimes | \n", + "Public_Transportation | \n", + "Overweight_Level_II | \n", + "
... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
2106 | \n", + "20.976842 | \n", + "Female | \n", + "1.710730 | \n", + "131.408528 | \n", + "Sometimes | \n", + "yes | \n", + "3.0 | \n", + "3.0 | \n", + "no | \n", + "no | \n", + "1.728139 | \n", + "yes | \n", + "1.676269 | \n", + "0.906247 | \n", + "Sometimes | \n", + "Public_Transportation | \n", + "Obesity_Type_III | \n", + "
2107 | \n", + "21.982942 | \n", + "Female | \n", + "1.748584 | \n", + "133.742943 | \n", + "Sometimes | \n", + "yes | \n", + "3.0 | \n", + "3.0 | \n", + "no | \n", + "no | \n", + "2.005130 | \n", + "yes | \n", + "1.341390 | \n", + "0.599270 | \n", + "Sometimes | \n", + "Public_Transportation | \n", + "Obesity_Type_III | \n", + "
2108 | \n", + "22.524036 | \n", + "Female | \n", + "1.752206 | \n", + "133.689352 | \n", + "Sometimes | \n", + "yes | \n", + "3.0 | \n", + "3.0 | \n", + "no | \n", + "no | \n", + "2.054193 | \n", + "yes | \n", + "1.414209 | \n", + "0.646288 | \n", + "Sometimes | \n", + "Public_Transportation | \n", + "Obesity_Type_III | \n", + "
2109 | \n", + "24.361936 | \n", + "Female | \n", + "1.739450 | \n", + "133.346641 | \n", + "Sometimes | \n", + "yes | \n", + "3.0 | \n", + "3.0 | \n", + "no | \n", + "no | \n", + "2.852339 | \n", + "yes | \n", + "1.139107 | \n", + "0.586035 | \n", + "Sometimes | \n", + "Public_Transportation | \n", + "Obesity_Type_III | \n", + "
2110 | \n", + "23.664709 | \n", + "Female | \n", + "1.738836 | \n", + "133.472641 | \n", + "Sometimes | \n", + "yes | \n", + "3.0 | \n", + "3.0 | \n", + "no | \n", + "no | \n", + "2.863513 | \n", + "yes | \n", + "1.026452 | \n", + "0.714137 | \n", + "Sometimes | \n", + "Public_Transportation | \n", + "Obesity_Type_III | \n", + "
2111 rows × 17 columns
\n", + "\n", + " | variavel | \n", + "descricao | \n", + "tipo | \n", + "subtipo | \n", + "
---|---|---|---|---|
0 | \n", + "Age | \n", + "idade do indivíduo | \n", + "quantitativa | \n", + "contínua | \n", + "
1 | \n", + "Gender | \n", + "gênero do indivíduo | \n", + "qualitativa | \n", + "nominal | \n", + "
2 | \n", + "Height | \n", + "altura do indiíduo | \n", + "quantitativa | \n", + "contínua | \n", + "
3 | \n", + "Weight | \n", + "peso do indivíduo | \n", + "quantitativa | \n", + "contínua | \n", + "
4 | \n", + "CALC | \n", + "frequência do consumo de álcool pelo indivíduo | \n", + "qualitativa | \n", + "ordinal | \n", + "
5 | \n", + "FAVC | \n", + "indica se o indivíduo consome comidas altamentes calóricas com frequência | \n", + "qualitativa | \n", + "nominal | \n", + "
6 | \n", + "FCVC | \n", + "indica o nível de consumo de vegetais nas refeições do indivíduo | \n", + "quantitativa | \n", + "discreta | \n", + "
7 | \n", + "NCP | \n", + "quantas refeições principais o indivíduo faz diariamente | \n", + "quantitativa | \n", + "contínua | \n", + "
8 | \n", + "SCC | \n", + "indica se o indivíduo monitora as calorias ingeridas diariamente | \n", + "qualitativa | \n", + "nominal | \n", + "
9 | \n", + "SMOKE | \n", + "indica se o indivíduo fuma ou não | \n", + "qualitativa | \n", + "nominal | \n", + "
10 | \n", + "CH2O | \n", + "quanta água o indivíduo consome diariamente | \n", + "quantitativa | \n", + "contínua | \n", + "
11 | \n", + "family_history_with_overweight | \n", + "indica se algum membro da família do indivídio sofreu ou sofre com excesso de peso | \n", + "qualitativa | \n", + "nominal | \n", + "
12 | \n", + "FAF | \n", + "quão frequentemente o indivíduo pratica atividades físicas | \n", + "quantitativa | \n", + "contínua | \n", + "
13 | \n", + "TUE | \n", + "quanto tempo o indivíduo passa usando dispositivos tecnológicos | \n", + "quantitativa | \n", + "contínua | \n", + "
14 | \n", + "CAEC | \n", + "frequência em que o indivíduo come algum alimento entre as refeições | \n", + "qualitativa | \n", + "ordinal | \n", + "
15 | \n", + "MTRANS | \n", + "tipo de transporte que o indivíduo costuma usar | \n", + "qualitativa | \n", + "nominal | \n", + "
16 | \n", + "NObeyesdad | \n", + "nível de obesidade do indivíduo | \n", + "qualitativa | \n", + "ordinal | \n", + "