Move analysis1 chains to code.project_functions1
This commit is contained in:
parent
6b4e79d55e
commit
4566009cae
|
@ -36,414 +36,39 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": 49,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"import seaborn as sns"
|
||||
"import seaborn as sns\n",
|
||||
"\n",
|
||||
"# Absolutely diabolical method of doing relative imports with a package who shares its name with\n",
|
||||
"# something in the stdlib in Jupyter Lab because it seems impossible otherwise\n",
|
||||
"__import__(\"sys\").path.append(\"./code\")\n",
|
||||
"from project_functions1 import *"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Loading the data"
|
||||
"## Data Analysis Pipeline"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 50,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"## counties - Relating US counties to their long/lat position on the Earth\n",
|
||||
"counties = pd.read_csv(\"../data/raw/us-county-boundaries.csv\", sep=\";\")\n",
|
||||
"\n",
|
||||
"## pol - Election results from the 2012 American presidential election\n",
|
||||
"pol = pd.read_csv(\"../data/raw/countypres_2000-2020.csv\")\n",
|
||||
"\n",
|
||||
"## gb - the gaybourhoods dataset\n",
|
||||
"gb = pd.read_csv(\"../data/raw/gaybourhoods.csv\")\n",
|
||||
"\n",
|
||||
"# cords - mapping zip codes to long/lat coordinates\n",
|
||||
"cords = pd.read_csv(\"../data/raw/zip_lat_long.csv\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Cleaning the data"
|
||||
"# Now in one, new-and-improved, non-descript method imported from another file\n",
|
||||
"gb, pol, counties, cords = load_and_process()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"counties = counties.rename({\n",
|
||||
" \"NAME\": \"name\",\n",
|
||||
" \"INTPTLAT\": \"lat\",\n",
|
||||
" \"INTPTLON\": \"long\",\n",
|
||||
"}, axis=\"columns\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>state</th>\n",
|
||||
" <th>county</th>\n",
|
||||
" <th>party</th>\n",
|
||||
" <th>votes</th>\n",
|
||||
" <th>total</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>AL</td>\n",
|
||||
" <td>Autauga</td>\n",
|
||||
" <td>Democrat</td>\n",
|
||||
" <td>6363</td>\n",
|
||||
" <td>23932</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>AL</td>\n",
|
||||
" <td>Autauga</td>\n",
|
||||
" <td>Republican</td>\n",
|
||||
" <td>17379</td>\n",
|
||||
" <td>23932</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>AL</td>\n",
|
||||
" <td>Autauga</td>\n",
|
||||
" <td>Other</td>\n",
|
||||
" <td>190</td>\n",
|
||||
" <td>23932</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>AL</td>\n",
|
||||
" <td>Baldwin</td>\n",
|
||||
" <td>Democrat</td>\n",
|
||||
" <td>18424</td>\n",
|
||||
" <td>85338</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>AL</td>\n",
|
||||
" <td>Baldwin</td>\n",
|
||||
" <td>Republican</td>\n",
|
||||
" <td>66016</td>\n",
|
||||
" <td>85338</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" state county party votes total\n",
|
||||
"0 AL Autauga Democrat 6363 23932\n",
|
||||
"1 AL Autauga Republican 17379 23932\n",
|
||||
"2 AL Autauga Other 190 23932\n",
|
||||
"3 AL Baldwin Democrat 18424 85338\n",
|
||||
"4 AL Baldwin Republican 66016 85338"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# We only want 2012--the latest election before the gb data was collected\n",
|
||||
"pol = pol.query(\"`year` == 2012\") \\\n",
|
||||
" .reset_index() \\\n",
|
||||
" .drop([\n",
|
||||
" \"year\", \"state\", \"county_fips\", \"office\",\n",
|
||||
" \"candidate\", \"version\", \"mode\", \"index\",\n",
|
||||
" ], axis=\"columns\") \\\n",
|
||||
" .rename({\n",
|
||||
" \"county_name\": \"county\",\n",
|
||||
" \"state_po\": \"state\",\n",
|
||||
" \"candidatevotes\": \"votes\",\n",
|
||||
" \"totalvotes\": \"total\"\n",
|
||||
" }, axis=\"columns\") \\\n",
|
||||
" .apply(lambda x: x.str.capitalize() if x.name == \"county\" or x.name == \"party\" else x)\n",
|
||||
"\n",
|
||||
"pol.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Let's add long/lat columns to gb\n",
|
||||
"gb = gb.merge(cords, left_on=\"GEOID10\", right_on=\"ZIP\") \\\n",
|
||||
" .drop([\n",
|
||||
" \"Mjoint_MF\", \"Mjoint_SS\", \"Mjoint_FF\", \"Mjoint_MM\",\n",
|
||||
" \"Cns_TotHH\", \"Cns_UPSS\", \"Cns_UPFF\", \"Cns_UPMM\",\n",
|
||||
" \"ParadeFlag\", \"FF_Tax\", \"FF_Cns\", \"MM_Tax\", \"MM_Cns\",\n",
|
||||
" \"SS_Index_Weight\", \"Parade_Weight\", \"Bars_Weight\",\n",
|
||||
" \"GEOID10\", \"ZIP\",\n",
|
||||
" ], axis=\"columns\") \\\n",
|
||||
" .rename({\n",
|
||||
" \"LAT\": \"lat\",\n",
|
||||
" \"LNG\": \"long\",\n",
|
||||
" }, axis=\"columns\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Process/Wrangle the data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>name</th>\n",
|
||||
" <th>lat</th>\n",
|
||||
" <th>long</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>Hancock OH</td>\n",
|
||||
" <td>41.000471</td>\n",
|
||||
" <td>-83.666033</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>Stafford VA</td>\n",
|
||||
" <td>38.413261</td>\n",
|
||||
" <td>-77.451334</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>Webster NE</td>\n",
|
||||
" <td>40.180646</td>\n",
|
||||
" <td>-98.498590</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>Dimmit TX</td>\n",
|
||||
" <td>28.423587</td>\n",
|
||||
" <td>-99.765871</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>Cedar IA</td>\n",
|
||||
" <td>41.772360</td>\n",
|
||||
" <td>-91.132610</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" name lat long\n",
|
||||
"0 Hancock OH 41.000471 -83.666033\n",
|
||||
"1 Stafford VA 38.413261 -77.451334\n",
|
||||
"2 Webster NE 40.180646 -98.498590\n",
|
||||
"3 Dimmit TX 28.423587 -99.765871\n",
|
||||
"4 Cedar IA 41.772360 -91.132610"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Combine the county name with the state code\n",
|
||||
"def combine_name_state(row):\n",
|
||||
" row[\"name\"] = f\"{row['name']} {row['STUSAB']}\"\n",
|
||||
" return row\n",
|
||||
"\n",
|
||||
"counties = counties.apply(combine_name_state, axis=\"columns\") \\\n",
|
||||
" .drop([\"STUSAB\"], axis=\"columns\")\n",
|
||||
"\n",
|
||||
"counties.to_csv(\"../data/processed/us-county-boundaries.csv\")\n",
|
||||
"counties.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>county</th>\n",
|
||||
" <th>party</th>\n",
|
||||
" <th>votes</th>\n",
|
||||
" <th>total</th>\n",
|
||||
" <th>lat</th>\n",
|
||||
" <th>long</th>\n",
|
||||
" <th>percent</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>Autauga AL</td>\n",
|
||||
" <td>Democrat</td>\n",
|
||||
" <td>6363</td>\n",
|
||||
" <td>23932</td>\n",
|
||||
" <td>32.532237</td>\n",
|
||||
" <td>-86.646439</td>\n",
|
||||
" <td>0.265878</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>Autauga AL</td>\n",
|
||||
" <td>Republican</td>\n",
|
||||
" <td>17379</td>\n",
|
||||
" <td>23932</td>\n",
|
||||
" <td>32.532237</td>\n",
|
||||
" <td>-86.646439</td>\n",
|
||||
" <td>0.726183</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>Autauga AL</td>\n",
|
||||
" <td>Other</td>\n",
|
||||
" <td>190</td>\n",
|
||||
" <td>23932</td>\n",
|
||||
" <td>32.532237</td>\n",
|
||||
" <td>-86.646439</td>\n",
|
||||
" <td>0.007939</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>Baldwin AL</td>\n",
|
||||
" <td>Democrat</td>\n",
|
||||
" <td>18424</td>\n",
|
||||
" <td>85338</td>\n",
|
||||
" <td>30.659218</td>\n",
|
||||
" <td>-87.746067</td>\n",
|
||||
" <td>0.215894</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>Baldwin AL</td>\n",
|
||||
" <td>Republican</td>\n",
|
||||
" <td>66016</td>\n",
|
||||
" <td>85338</td>\n",
|
||||
" <td>30.659218</td>\n",
|
||||
" <td>-87.746067</td>\n",
|
||||
" <td>0.773583</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" county party votes total lat long percent\n",
|
||||
"0 Autauga AL Democrat 6363 23932 32.532237 -86.646439 0.265878\n",
|
||||
"1 Autauga AL Republican 17379 23932 32.532237 -86.646439 0.726183\n",
|
||||
"2 Autauga AL Other 190 23932 32.532237 -86.646439 0.007939\n",
|
||||
"3 Baldwin AL Democrat 18424 85338 30.659218 -87.746067 0.215894\n",
|
||||
"4 Baldwin AL Republican 66016 85338 30.659218 -87.746067 0.773583"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Combine the county name with the state code\n",
|
||||
"def combine_name_state(row):\n",
|
||||
" row[\"county\"] = f\"{row['county']} {row['state']}\"\n",
|
||||
" return row\n",
|
||||
"\n",
|
||||
"pol = pol.apply(combine_name_state, axis=\"columns\") \\\n",
|
||||
" .merge(counties, left_on=\"county\", right_on=\"name\") \\\n",
|
||||
" .drop([\"state\", \"name\"], axis=\"columns\") \\\n",
|
||||
" .assign(percent=lambda x: x.votes/x.total)\n",
|
||||
"\n",
|
||||
"pol.to_csv(\"../data/processed/election-2012.csv\", index=False)\n",
|
||||
"pol.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 51,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
|
@ -596,7 +221,7 @@
|
|||
"4 37.773134 -122.411167 "
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"execution_count": 51,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
@ -606,6 +231,115 @@
|
|||
"gb.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 52,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>county</th>\n",
|
||||
" <th>party</th>\n",
|
||||
" <th>votes</th>\n",
|
||||
" <th>total</th>\n",
|
||||
" <th>lat</th>\n",
|
||||
" <th>long</th>\n",
|
||||
" <th>percent</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>Autauga AL</td>\n",
|
||||
" <td>Democrat</td>\n",
|
||||
" <td>6363</td>\n",
|
||||
" <td>23932</td>\n",
|
||||
" <td>32.532237</td>\n",
|
||||
" <td>-86.646439</td>\n",
|
||||
" <td>0.265878</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>Autauga AL</td>\n",
|
||||
" <td>Republican</td>\n",
|
||||
" <td>17379</td>\n",
|
||||
" <td>23932</td>\n",
|
||||
" <td>32.532237</td>\n",
|
||||
" <td>-86.646439</td>\n",
|
||||
" <td>0.726183</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>Autauga AL</td>\n",
|
||||
" <td>Other</td>\n",
|
||||
" <td>190</td>\n",
|
||||
" <td>23932</td>\n",
|
||||
" <td>32.532237</td>\n",
|
||||
" <td>-86.646439</td>\n",
|
||||
" <td>0.007939</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>Baldwin AL</td>\n",
|
||||
" <td>Democrat</td>\n",
|
||||
" <td>18424</td>\n",
|
||||
" <td>85338</td>\n",
|
||||
" <td>30.659218</td>\n",
|
||||
" <td>-87.746067</td>\n",
|
||||
" <td>0.215894</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>Baldwin AL</td>\n",
|
||||
" <td>Republican</td>\n",
|
||||
" <td>66016</td>\n",
|
||||
" <td>85338</td>\n",
|
||||
" <td>30.659218</td>\n",
|
||||
" <td>-87.746067</td>\n",
|
||||
" <td>0.773583</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" county party votes total lat long percent\n",
|
||||
"0 Autauga AL Democrat 6363 23932 32.532237 -86.646439 0.265878\n",
|
||||
"1 Autauga AL Republican 17379 23932 32.532237 -86.646439 0.726183\n",
|
||||
"2 Autauga AL Other 190 23932 32.532237 -86.646439 0.007939\n",
|
||||
"3 Baldwin AL Democrat 18424 85338 30.659218 -87.746067 0.215894\n",
|
||||
"4 Baldwin AL Republican 66016 85338 30.659218 -87.746067 0.773583"
|
||||
]
|
||||
},
|
||||
"execution_count": 52,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"pol.to_csv(\"../data/processed/election-2012.csv\")\n",
|
||||
"pol.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
|
|
|
@ -0,0 +1,69 @@
|
|||
import pandas as pd
|
||||
|
||||
def load_and_process():
|
||||
|
||||
# cords - mapping zip codes to long/lat coordinates
|
||||
cords = pd.read_csv("../data/raw/zip_lat_long.csv")
|
||||
|
||||
## counties - Relating US counties to their long/lat position on the Earth
|
||||
# Combine the county name with the state code
|
||||
def combine_name_state(row):
|
||||
row["name"] = f"{row['name']} {row['STUSAB']}"
|
||||
return row
|
||||
|
||||
counties = (
|
||||
pd.read_csv("../data/raw/us-county-boundaries.csv", sep=";")
|
||||
.rename({
|
||||
"NAME": "name",
|
||||
"INTPTLAT": "lat",
|
||||
"INTPTLON": "long",
|
||||
}, axis="columns")
|
||||
.apply(combine_name_state, axis="columns")
|
||||
.drop(["STUSAB"], axis="columns")
|
||||
)
|
||||
|
||||
|
||||
## pol - Election results from the 2012 American presidential election
|
||||
def combine_name_state(row):
|
||||
row["county"] = f"{row['county']} {row['state']}"
|
||||
return row
|
||||
|
||||
pol = (
|
||||
pd.read_csv("../data/raw/countypres_2000-2020.csv")
|
||||
.query("`year` == 2012")
|
||||
.reset_index()
|
||||
.drop([
|
||||
"year", "state", "county_fips", "office",
|
||||
"candidate", "version", "mode", "index",
|
||||
], axis="columns")
|
||||
.rename({
|
||||
"county_name": "county",
|
||||
"state_po": "state",
|
||||
"candidatevotes": "votes",
|
||||
"totalvotes": "total"
|
||||
}, axis="columns")
|
||||
.apply(lambda x: x.str.capitalize() if x.name == "county" or x.name == "party" else x)
|
||||
.apply(combine_name_state, axis="columns")
|
||||
.merge(counties, left_on="county", right_on="name")
|
||||
.drop(["state", "name"], axis="columns")
|
||||
.assign(percent=lambda x: x.votes/x.total)
|
||||
)
|
||||
|
||||
## gb - the gaybourhoods dataset
|
||||
gb = (
|
||||
pd.read_csv("../data/raw/gaybourhoods.csv")
|
||||
.merge(cords, left_on="GEOID10", right_on="ZIP") \
|
||||
.drop([
|
||||
"Mjoint_MF", "Mjoint_SS", "Mjoint_FF", "Mjoint_MM",
|
||||
"Cns_TotHH", "Cns_UPSS", "Cns_UPFF", "Cns_UPMM",
|
||||
"ParadeFlag", "FF_Tax", "FF_Cns", "MM_Tax", "MM_Cns",
|
||||
"SS_Index_Weight", "Parade_Weight", "Bars_Weight",
|
||||
"GEOID10", "ZIP",
|
||||
], axis="columns") \
|
||||
.rename({
|
||||
"LAT": "lat",
|
||||
"LNG": "long",
|
||||
}, axis="columns")
|
||||
)
|
||||
|
||||
return (gb, pol, counties, cords)
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue