pride-data-analysis/analysis/code/project_functions1.py

import pandas as pd

def load_and_process():

    # cords - mapping zip codes to long/lat coordinates
    cords = pd.read_csv("../data/raw/zip_lat_long.csv")

    ## counties - Relating US counties to their long/lat position on the Earth
    # Combine the county name with the state code
    def combine_name_state(row):
        row["name"] = f"{row['name']} {row['STUSAB']}"
        return row

    counties = (
        pd.read_csv("../data/raw/us-county-boundaries.csv", sep=";")
        .rename({
            "NAME": "name",
            "INTPTLAT": "lat",
            "INTPTLON": "long",
        }, axis="columns")
        .apply(combine_name_state, axis="columns")
        .drop(["STUSAB"], axis="columns")
    )


    ## pol - Election results from the 2012 American presidential election
    def combine_name_state(row):
        row["county"] = f"{row['county']} {row['state']}"
        return row

    pol = (
        pd.read_csv("../data/raw/countypres_2000-2020.csv")
        .query("`year` == 2012")
        .reset_index()
        .drop([
            "year", "state", "county_fips", "office",
            "candidate", "version", "mode", "index",
        ], axis="columns")
        .rename({
            "county_name": "county",
            "state_po": "state",
            "candidatevotes": "votes",
            "totalvotes": "total"
        }, axis="columns")
        .apply(lambda x: x.str.capitalize() if x.name == "county" or x.name == "party" else x)
        .apply(combine_name_state, axis="columns")
        .merge(counties, left_on="county", right_on="name")
        .drop(["state", "name"], axis="columns")
        .assign(percent=lambda x: x.votes/x.total)
    )

    ## gb - the gaybourhoods dataset
    gb = (
        pd.read_csv("../data/raw/gaybourhoods.csv")
        .merge(cords, left_on="GEOID10", right_on="ZIP") \
        .drop([
            "Mjoint_MF", "Mjoint_SS", "Mjoint_FF", "Mjoint_MM",
            "Cns_TotHH", "Cns_UPSS", "Cns_UPFF", "Cns_UPMM",
            "ParadeFlag", "FF_Tax", "FF_Cns", "MM_Tax", "MM_Cns",
            "SS_Index_Weight", "Parade_Weight", "Bars_Weight",
            "GEOID10", "ZIP",
        ], axis="columns") \
        .rename({
            "LAT": "lat",
            "LNG": "long",
        }, axis="columns")
    )

    return (gb, pol, counties, cords)