anal 2 coords

coordinates added to analysis 2
This commit is contained in:
almsam 2023-03-03 01:13:54 -08:00
parent 8de27d9c5e
commit 0dc1eb2749
3 changed files with 2301 additions and 11 deletions

File diff suppressed because one or more lines are too long

View File

@ -410,7 +410,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 91, "execution_count": 93,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -460,7 +460,99 @@
"\n", "\n",
"print(df2.describe())\n", "print(df2.describe())\n",
"print(\"------------------------------------------------------------------------\")\n", "print(\"------------------------------------------------------------------------\")\n",
"print(df2)" "print(df2)\n",
"\n",
"df2.to_csv(\"../data/processed/tax_and_gay.csv\")\n"
]
},
{
"cell_type": "code",
"execution_count": 101,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" zip population gay tax rate overall tax paid income \\\n",
"count 2184.000000 2184.000000 2184.000000 2184.000000 2184.0 \n",
"mean 48935.203297 26691.730769 4373.997253 596.719322 1.0 \n",
"std 35451.335807 17960.713867 3054.620840 615.174358 0.0 \n",
"min 1730.000000 160.000000 0.000000 0.000000 1.0 \n",
"25% 11360.750000 13337.500000 2110.000000 217.000000 1.0 \n",
"50% 60023.500000 24070.000000 3900.000000 434.000000 1.0 \n",
"75% 80227.250000 35640.000000 5902.500000 777.250000 1.0 \n",
"max 98686.000000 114420.000000 24560.000000 9166.000000 1.0 \n",
"\n",
" lat long \n",
"count 2184.000000 2184.000000 \n",
"mean 38.016518 -91.296804 \n",
"std 5.210272 18.476699 \n",
"min 25.572213 -123.118977 \n",
"25% 33.997027 -105.037767 \n",
"50% 39.930150 -87.603617 \n",
"75% 40.960828 -74.310179 \n",
"max 47.916786 -70.758184 \n",
"------------------------------------------------------------------------\n",
" zip population gay tax rate overall tax paid income lat \\\n",
"0 1730 13570.0 3260 150.0 1 42.499295 \n",
"1 1731 2450.0 550 0.0 1 42.456748 \n",
"2 1742 17170.0 4220 297.0 1 42.462911 \n",
"3 1760 34350.0 7880 468.0 1 42.284822 \n",
"4 1770 4310.0 1060 46.0 1 42.231947 \n",
"... ... ... ... ... ... ... \n",
"2179 98682 57010.0 11080 703.0 1 45.673209 \n",
"2180 98683 30700.0 6470 358.0 1 45.603287 \n",
"2181 98684 27630.0 5390 371.0 1 45.630556 \n",
"2182 98685 27540.0 6490 298.0 1 45.715211 \n",
"2183 98686 17800.0 4120 215.0 1 45.723392 \n",
"\n",
" long \n",
"0 -71.281889 \n",
"1 -71.279484 \n",
"2 -71.364496 \n",
"3 -71.348811 \n",
"4 -71.372963 \n",
"... ... \n",
"2179 -122.481745 \n",
"2180 -122.510170 \n",
"2181 -122.514839 \n",
"2182 -122.693165 \n",
"2183 -122.624397 \n",
"\n",
"[2184 rows x 7 columns]\n"
]
}
],
"source": [
"## take the dataset & add long/lat\n",
"## props to Nat for creating the backbone I used to zipcode -> long/lat\n",
"\n",
"cords = pd.read_csv(\"../data/raw/zip_lat_long.csv\")\n",
"gaydf = pd.read_csv(\"../data/processed/tax_and_gay.csv\")\n",
"\n",
"# Let's add long/lat columns to gb\n",
"gaydf = gaydf.merge(cords, left_on=\"zip\", right_on=\"ZIP\")\n",
"\n",
"# // unneded was already filtered out\n",
"\n",
"# There's a lot of info baked into some of these columns. Especially the composite indexes.\n",
"# We'll leave their names as is for easy reference even if they're a little ugly.\n",
"gaydf = gaydf.rename({\n",
" \"LAT\": \"lat\",\n",
" \"LNG\": \"long\",\n",
"}, axis=\"columns\")\n",
"\n",
"# gb.to_csv(\"../data/processed/gaybourhoods-nat.csv\")\n",
"# gb.head()\n",
"\n",
"# unperson unneccesary zip codes\n",
"del gaydf['zip.1']\n",
"del gaydf['ZIP']\n",
"\n",
"print(gaydf.describe())\n",
"print(\"------------------------------------------------------------------------\")\n",
"print(gaydf)"
] ]
}, },
{ {
@ -469,8 +561,16 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"\n",
"#compare taxes paid by queers to taxes paid by general" "#compare taxes paid by queers to taxes paid by general"
] ]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
} }
], ],
"metadata": { "metadata": {

File diff suppressed because it is too large Load Diff