method chain & func added

This commit is contained in:
almsam 2023-04-13 03:33:02 -07:00
parent cf131c39bb
commit c18048c125
2 changed files with 2407 additions and 2423 deletions

View File

@ -875,7 +875,7 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 77,
"metadata": {},
"outputs": [
{
@ -1080,7 +1080,7 @@
"[5 rows x 29 columns]"
]
},
"execution_count": 9,
"execution_count": 77,
"metadata": {},
"output_type": "execute_result"
}
@ -1096,47 +1096,37 @@
},
{
"cell_type": "code",
"execution_count": 27,
"execution_count": 82,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" zip pride parade index gay bars index ZIP \\\n",
" pride parade index gay bars index lat long\n",
"count 2328.000000 2328.000000 2328.000000 2328.000000\n",
"mean 48616.478522 0.017612 0.118127 48616.478522 \n",
"std 35481.240641 0.131563 0.861863 35481.240641 \n",
"min 1730.000000 0.000000 0.000000 1730.000000 \n",
"25% 11362.750000 0.000000 0.000000 11362.750000 \n",
"50% 46351.000000 0.000000 0.000000 46351.000000 \n",
"75% 80234.250000 0.000000 0.000000 80234.250000 \n",
"max 98686.000000 1.000000 17.000000 98686.000000 \n",
"\n",
" lat long \n",
"count 2328.000000 2328.000000 \n",
"mean 38.044304 -91.221236 \n",
"std 5.148365 18.533499 \n",
"min 25.572213 -123.118977 \n",
"25% 34.021932 -105.049099 \n",
"50% 39.899977 -87.494097 \n",
"75% 40.912413 -74.288743 \n",
"max 47.916786 -70.758184 \n",
"mean 0.017612 0.118127 38.044304 -91.221236\n",
"std 0.131563 0.861863 5.148365 18.533499\n",
"min 0.000000 0.000000 25.572213 -123.118977\n",
"25% 0.000000 0.000000 34.021932 -105.049099\n",
"50% 0.000000 0.000000 39.899977 -87.494097\n",
"75% 0.000000 0.000000 40.912413 -74.288743\n",
"max 1.000000 17.000000 47.916786 -70.758184\n",
"------------------------------------------------------------------------\n",
" zip pride parade index gay bars index ZIP lat long\n",
"0 90069 1 15 90069 34.093828 -118.381697\n",
"1 94114 0 17 94114 37.758057 -122.435410\n",
"2 10011 1 5 10011 40.742039 -74.000620\n",
"3 10014 1 10 10014 40.734012 -74.006746\n",
"4 94103 1 9 94103 37.773134 -122.411167\n",
"... ... ... ... ... ... ...\n",
"2323 97208 0 0 97208 45.528666 -122.678981\n",
"2324 98154 0 0 98154 47.606211 -122.333792\n",
"2325 98158 0 0 98158 47.449678 -122.307657\n",
"2326 98174 0 0 98174 47.604569 -122.335359\n",
"2327 98195 0 0 98195 47.649339 -122.310294\n",
" pride parade index gay bars index lat long\n",
"0 1 15 34.093828 -118.381697\n",
"1 0 17 37.758057 -122.435410\n",
"2 1 5 40.742039 -74.000620\n",
"3 1 10 40.734012 -74.006746\n",
"4 1 9 37.773134 -122.411167\n",
"... ... ... ... ...\n",
"2323 0 0 45.528666 -122.678981\n",
"2324 0 0 47.606211 -122.333792\n",
"2325 0 0 47.449678 -122.307657\n",
"2326 0 0 47.604569 -122.335359\n",
"2327 0 0 47.649339 -122.310294\n",
"\n",
"[2328 rows x 6 columns]\n"
"[2328 rows x 4 columns]\n"
]
}
],
@ -1144,16 +1134,38 @@
"#wrangle gay\n",
"# gaydf = pd.DataFrame(zip(gaybourhoods['GEOID10'], gaybourhoods['Parade_Weight'], gaybourhoods['Bars_Weight']))\n",
"\n",
"gaydf = pd.DataFrame(zip(gaybourhoods['GEOID10'], gaybourhoods['ParadeFlag'], gaybourhoods['CountBars']))\n",
"\n",
"gaydf.columns=(('zip', 'pride parade index', 'gay bars index'))\n",
"\n",
"# print(gaydf.describe())\n",
"# print(gaydf)\n",
"\n",
"cords = pd.read_csv(\"../data/raw/zip_lat_long.csv\")\n",
"\n",
"gaydf = gaydf.merge(cords, left_on=\"zip\", right_on=\"ZIP\")\n",
"# gaydf = pd.DataFrame(zip(gaybourhoods['GEOID10'], gaybourhoods['ParadeFlag'], gaybourhoods['CountBars']))\n",
"# gaydf.columns=(('zip', 'pride parade index', 'gay bars index'))\n",
"# gaydf = gaydf.merge(cords, left_on=\"zip\", right_on=\"ZIP\")\n",
"\n",
"\n",
"def loadprideindexes(secdf):\n",
" gaydf = pd.DataFrame(zip(gaybourhoods['GEOID10'], gaybourhoods['ParadeFlag'], gaybourhoods['CountBars']))\n",
" gaydf.columns=(('zip', 'pride parade index', 'gay bars index'))\n",
" gaydf = gaydf.merge(secdf, left_on=\"zip\", right_on=\"ZIP\")\n",
" \n",
" return gaydf\n",
" \n",
" # gaydf = (\n",
" # pd.DataFrame(zip(gaybourhoods['GEOID10'], gaybourhoods['ParadeFlag'], gaybourhoods['CountBars']))\n",
" # .columns=(('zip', 'pride parade index', 'gay bars index'))\n",
" # # .merge(cords, left_on=\"zip\", right_on=\"ZIP\")\n",
" # )\n",
"\n",
"gaydf = loadprideindexes(cords)\n",
"\n",
"def notzip(dat):\n",
" del dat['zip']\n",
" del dat['ZIP']\n",
"\n",
"notzip(gaydf)\n",
"\n",
"\n",
"# // unneded was already filtered out\n",
"\n",
@ -1179,7 +1191,7 @@
},
{
"cell_type": "code",
"execution_count": 28,
"execution_count": 53,
"metadata": {},
"outputs": [],
"source": [
@ -1188,7 +1200,7 @@
},
{
"cell_type": "code",
"execution_count": 29,
"execution_count": 54,
"metadata": {},
"outputs": [
{
@ -1219,7 +1231,7 @@
},
{
"cell_type": "code",
"execution_count": 34,
"execution_count": 55,
"metadata": {},
"outputs": [
{
@ -1252,7 +1264,7 @@
},
{
"cell_type": "code",
"execution_count": 35,
"execution_count": 56,
"metadata": {},
"outputs": [
{
@ -1280,7 +1292,7 @@
},
{
"cell_type": "code",
"execution_count": 31,
"execution_count": 57,
"metadata": {},
"outputs": [
{
@ -1304,10 +1316,8 @@
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>zip</th>\n",
" <th>pride parade index</th>\n",
" <th>gay bars index</th>\n",
" <th>ZIP</th>\n",
" <th>lat</th>\n",
" <th>long</th>\n",
" </tr>\n",
@ -1319,69 +1329,53 @@
" <td>2328.000000</td>\n",
" <td>2328.000000</td>\n",
" <td>2328.000000</td>\n",
" <td>2328.000000</td>\n",
" <td>2328.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>48616.478522</td>\n",
" <td>0.017612</td>\n",
" <td>0.118127</td>\n",
" <td>48616.478522</td>\n",
" <td>38.044304</td>\n",
" <td>-91.221236</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>35481.240641</td>\n",
" <td>0.131563</td>\n",
" <td>0.861863</td>\n",
" <td>35481.240641</td>\n",
" <td>5.148365</td>\n",
" <td>18.533499</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>1730.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>1730.000000</td>\n",
" <td>25.572213</td>\n",
" <td>-123.118977</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>11362.750000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>11362.750000</td>\n",
" <td>34.021932</td>\n",
" <td>-105.049099</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>46351.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>46351.000000</td>\n",
" <td>39.899977</td>\n",
" <td>-87.494097</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>80234.250000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>80234.250000</td>\n",
" <td>40.912413</td>\n",
" <td>-74.288743</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>98686.000000</td>\n",
" <td>1.000000</td>\n",
" <td>17.000000</td>\n",
" <td>98686.000000</td>\n",
" <td>47.916786</td>\n",
" <td>-70.758184</td>\n",
" </tr>\n",
@ -1390,28 +1384,18 @@
"</div>"
],
"text/plain": [
" zip pride parade index gay bars index ZIP \\\n",
" pride parade index gay bars index lat long\n",
"count 2328.000000 2328.000000 2328.000000 2328.000000\n",
"mean 48616.478522 0.017612 0.118127 48616.478522 \n",
"std 35481.240641 0.131563 0.861863 35481.240641 \n",
"min 1730.000000 0.000000 0.000000 1730.000000 \n",
"25% 11362.750000 0.000000 0.000000 11362.750000 \n",
"50% 46351.000000 0.000000 0.000000 46351.000000 \n",
"75% 80234.250000 0.000000 0.000000 80234.250000 \n",
"max 98686.000000 1.000000 17.000000 98686.000000 \n",
"\n",
" lat long \n",
"count 2328.000000 2328.000000 \n",
"mean 38.044304 -91.221236 \n",
"std 5.148365 18.533499 \n",
"min 25.572213 -123.118977 \n",
"25% 34.021932 -105.049099 \n",
"50% 39.899977 -87.494097 \n",
"75% 40.912413 -74.288743 \n",
"max 47.916786 -70.758184 "
"mean 0.017612 0.118127 38.044304 -91.221236\n",
"std 0.131563 0.861863 5.148365 18.533499\n",
"min 0.000000 0.000000 25.572213 -123.118977\n",
"25% 0.000000 0.000000 34.021932 -105.049099\n",
"50% 0.000000 0.000000 39.899977 -87.494097\n",
"75% 0.000000 0.000000 40.912413 -74.288743\n",
"max 1.000000 17.000000 47.916786 -70.758184"
]
},
"execution_count": 31,
"execution_count": 57,
"metadata": {},
"output_type": "execute_result"
}
@ -1424,16 +1408,16 @@
},
{
"cell_type": "code",
"execution_count": 32,
"execution_count": 58,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<seaborn.axisgrid.PairGrid at 0x2aec9f50910>"
"<seaborn.axisgrid.PairGrid at 0x2aecc151110>"
]
},
"execution_count": 32,
"execution_count": 58,
"metadata": {},
"output_type": "execute_result"
},
@ -1449,23 +1433,23 @@
}
],
"source": [
"del gaydfhybridplot2['zip']; del gaydfhybridplot2['ZIP']\n",
"# del gaydfhybridplot2['zip']; del gaydfhybridplot2['ZIP']\n",
"\n",
"sns.pairplot(gaydfhybridplot2)"
]
},
{
"cell_type": "code",
"execution_count": 38,
"execution_count": 59,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<seaborn.axisgrid.PairGrid at 0x2aecadf1110>"
"<seaborn.axisgrid.PairGrid at 0x2aecd0e1750>"
]
},
"execution_count": 38,
"execution_count": 59,
"metadata": {},
"output_type": "execute_result"
},
@ -1498,16 +1482,16 @@
},
{
"cell_type": "code",
"execution_count": 42,
"execution_count": 60,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<seaborn.axisgrid.PairGrid at 0x2aebe5ed150>"
"<seaborn.axisgrid.PairGrid at 0x2aebcea1e50>"
]
},
"execution_count": 42,
"execution_count": 60,
"metadata": {},
"output_type": "execute_result"
},
@ -1531,7 +1515,7 @@
},
{
"cell_type": "code",
"execution_count": 47,
"execution_count": 61,
"metadata": {},
"outputs": [
{
@ -1558,7 +1542,7 @@
},
{
"cell_type": "code",
"execution_count": 48,
"execution_count": 62,
"metadata": {},
"outputs": [
{

File diff suppressed because it is too large Load Diff