parent
8de27d9c5e
commit
0dc1eb2749
File diff suppressed because one or more lines are too long
|
@ -410,7 +410,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 91,
|
"execution_count": 93,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
|
@ -460,7 +460,99 @@
|
||||||
"\n",
|
"\n",
|
||||||
"print(df2.describe())\n",
|
"print(df2.describe())\n",
|
||||||
"print(\"------------------------------------------------------------------------\")\n",
|
"print(\"------------------------------------------------------------------------\")\n",
|
||||||
"print(df2)"
|
"print(df2)\n",
|
||||||
|
"\n",
|
||||||
|
"df2.to_csv(\"../data/processed/tax_and_gay.csv\")\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 101,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
" zip population gay tax rate overall tax paid income \\\n",
|
||||||
|
"count 2184.000000 2184.000000 2184.000000 2184.000000 2184.0 \n",
|
||||||
|
"mean 48935.203297 26691.730769 4373.997253 596.719322 1.0 \n",
|
||||||
|
"std 35451.335807 17960.713867 3054.620840 615.174358 0.0 \n",
|
||||||
|
"min 1730.000000 160.000000 0.000000 0.000000 1.0 \n",
|
||||||
|
"25% 11360.750000 13337.500000 2110.000000 217.000000 1.0 \n",
|
||||||
|
"50% 60023.500000 24070.000000 3900.000000 434.000000 1.0 \n",
|
||||||
|
"75% 80227.250000 35640.000000 5902.500000 777.250000 1.0 \n",
|
||||||
|
"max 98686.000000 114420.000000 24560.000000 9166.000000 1.0 \n",
|
||||||
|
"\n",
|
||||||
|
" lat long \n",
|
||||||
|
"count 2184.000000 2184.000000 \n",
|
||||||
|
"mean 38.016518 -91.296804 \n",
|
||||||
|
"std 5.210272 18.476699 \n",
|
||||||
|
"min 25.572213 -123.118977 \n",
|
||||||
|
"25% 33.997027 -105.037767 \n",
|
||||||
|
"50% 39.930150 -87.603617 \n",
|
||||||
|
"75% 40.960828 -74.310179 \n",
|
||||||
|
"max 47.916786 -70.758184 \n",
|
||||||
|
"------------------------------------------------------------------------\n",
|
||||||
|
" zip population gay tax rate overall tax paid income lat \\\n",
|
||||||
|
"0 1730 13570.0 3260 150.0 1 42.499295 \n",
|
||||||
|
"1 1731 2450.0 550 0.0 1 42.456748 \n",
|
||||||
|
"2 1742 17170.0 4220 297.0 1 42.462911 \n",
|
||||||
|
"3 1760 34350.0 7880 468.0 1 42.284822 \n",
|
||||||
|
"4 1770 4310.0 1060 46.0 1 42.231947 \n",
|
||||||
|
"... ... ... ... ... ... ... \n",
|
||||||
|
"2179 98682 57010.0 11080 703.0 1 45.673209 \n",
|
||||||
|
"2180 98683 30700.0 6470 358.0 1 45.603287 \n",
|
||||||
|
"2181 98684 27630.0 5390 371.0 1 45.630556 \n",
|
||||||
|
"2182 98685 27540.0 6490 298.0 1 45.715211 \n",
|
||||||
|
"2183 98686 17800.0 4120 215.0 1 45.723392 \n",
|
||||||
|
"\n",
|
||||||
|
" long \n",
|
||||||
|
"0 -71.281889 \n",
|
||||||
|
"1 -71.279484 \n",
|
||||||
|
"2 -71.364496 \n",
|
||||||
|
"3 -71.348811 \n",
|
||||||
|
"4 -71.372963 \n",
|
||||||
|
"... ... \n",
|
||||||
|
"2179 -122.481745 \n",
|
||||||
|
"2180 -122.510170 \n",
|
||||||
|
"2181 -122.514839 \n",
|
||||||
|
"2182 -122.693165 \n",
|
||||||
|
"2183 -122.624397 \n",
|
||||||
|
"\n",
|
||||||
|
"[2184 rows x 7 columns]\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"## take the dataset & add long/lat\n",
|
||||||
|
"## props to Nat for creating the backbone I used to zipcode -> long/lat\n",
|
||||||
|
"\n",
|
||||||
|
"cords = pd.read_csv(\"../data/raw/zip_lat_long.csv\")\n",
|
||||||
|
"gaydf = pd.read_csv(\"../data/processed/tax_and_gay.csv\")\n",
|
||||||
|
"\n",
|
||||||
|
"# Let's add long/lat columns to gb\n",
|
||||||
|
"gaydf = gaydf.merge(cords, left_on=\"zip\", right_on=\"ZIP\")\n",
|
||||||
|
"\n",
|
||||||
|
"# // unneded was already filtered out\n",
|
||||||
|
"\n",
|
||||||
|
"# There's a lot of info baked into some of these columns. Especially the composite indexes.\n",
|
||||||
|
"# We'll leave their names as is for easy reference even if they're a little ugly.\n",
|
||||||
|
"gaydf = gaydf.rename({\n",
|
||||||
|
" \"LAT\": \"lat\",\n",
|
||||||
|
" \"LNG\": \"long\",\n",
|
||||||
|
"}, axis=\"columns\")\n",
|
||||||
|
"\n",
|
||||||
|
"# gb.to_csv(\"../data/processed/gaybourhoods-nat.csv\")\n",
|
||||||
|
"# gb.head()\n",
|
||||||
|
"\n",
|
||||||
|
"# unperson unneccesary zip codes\n",
|
||||||
|
"del gaydf['zip.1']\n",
|
||||||
|
"del gaydf['ZIP']\n",
|
||||||
|
"\n",
|
||||||
|
"print(gaydf.describe())\n",
|
||||||
|
"print(\"------------------------------------------------------------------------\")\n",
|
||||||
|
"print(gaydf)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -469,8 +561,16 @@
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
"\n",
|
||||||
"#compare taxes paid by queers to taxes paid by general"
|
"#compare taxes paid by queers to taxes paid by general"
|
||||||
]
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue