# NOTE: This cell will not work unless this file is in the repository. The source
# can be found linked in the references section of the readme, however, it is too
# big for GitHub to handle.

irs = pd.read_csv("../data/raw/irs_2015.csv")

# Naively splitting the IRS data set in two. More formal data wrangling will
# come later
irs1 = irs.head(int(irs.shape[0] / 2))
# Now these two datasets can be joined and worked with
irs = pd.concat([
    pd.read_csv("../data/processed/irs_2015_1"),
    pd.read_csv("../data/processed/irs_2015_2")
])
# irs.head()


#selected data: ZIPCODE - this will be used in conjunction with the rest of the set
    # N2 - population of zip code
    
    #data of intrest
    # A11900	Total overpayments amount
    # AGI_STUB - metric for income

# print(irs.loc[irs['zipcode']==90069])
# df = {irs['zipcode'], irs['N2']} #compare taxes paid by queers to taxes paid by general