Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Political-Compass-Project/working_congressional_vote_analytics.py
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
390 lines (280 sloc)
16.3 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
"""Working Congressional vote analytics | |
Automatically generated by Colaboratory. | |
Original file is located at | |
https://colab.research.google.com/gist/npapa17/2f5a7027c1f6dac531572118beb0f314/working-congressional-vote-analytics.ipynb | |
# Merging data sets prior to EDA | |
""" | |
import pandas as pd | |
import numpy as np | |
import os | |
import glob | |
import matplotlib.pyplot as plt | |
import seaborn as sb | |
import csv | |
from pylab import * | |
from google.colab import drive | |
drive.mount('/content/gdrive') | |
joined_files = os.path.join("/content/gdrive/MyDrive/Independent study Spring 2022:/2017 Congressional voting records_CLEAN","*.csv") | |
joined_list = glob.glob(joined_files) | |
df2017 = pd.concat(map(pd.read_csv,joined_list),axis=1,join='outer') | |
#df2017.to_csv('2017MergedData.csv') | |
joined_files = os.path.join("/content/gdrive/MyDrive/Independent study Spring 2022:/2018 Congressional voting records_CLEAN","*.csv") | |
joined_list = glob.glob(joined_files) | |
df2018 = pd.concat(map(pd.read_csv,joined_list),axis=1,join='outer') | |
#df2018.to_csv('2018MergedData.csv') | |
joined_files = os.path.join("/content/gdrive/MyDrive/Independent study Spring 2022:/2019 Congressional votes CLEAN","*.csv") | |
joined_list = glob.glob(joined_files) | |
df2019 = pd.concat(map(pd.read_csv,joined_list),axis=1,join='outer') | |
#df2019.to_csv('2019MergedData.csv') | |
joined_files = os.path.join("/content/gdrive/MyDrive/Independent study Spring 2022:/2020 Congressional votes CLEAN","*.csv") | |
joined_list = glob.glob(joined_files) | |
df2020 = pd.concat(map(pd.read_csv,joined_list),axis=1,join='outer') | |
#df2020.to_csv('2020MergedData.csv') | |
joined_files = os.path.join("/content/gdrive/MyDrive/Independent study Spring 2022:/2021 Congressional votes CLEAN","*.csv") | |
joined_list = glob.glob(joined_files) | |
df2021 = pd.concat(map(pd.read_csv,joined_list),axis=1,join='outer') | |
#df2021.to_csv('2021MergedData.csv') | |
df2017=pd.read_csv('/content/gdrive/MyDrive/Independent study Spring 2022:/Merged yearly data/2017MergedData.csv') | |
df2018=pd.read_csv('/content/gdrive/MyDrive/Independent study Spring 2022:/Merged yearly data/2018MergedData.csv') | |
df2019=pd.read_csv('/content/gdrive/MyDrive/Independent study Spring 2022:/Merged yearly data/2019MergedData.csv') | |
df2020=pd.read_csv('/content/gdrive/MyDrive/Independent study Spring 2022:/Merged yearly data/2020MergedData.csv') | |
df2021=pd.read_csv('/content/gdrive/MyDrive/Independent study Spring 2022:/Merged yearly data/2021MergedData.csv') | |
"""* | |
* | |
# 2017DataWrangling | |
1. Look for what years were most and least partisan! How? | |
* Find percent of votes that each party votes 100% together, graph that over time | |
* Pseudo code: | |
i=0 | |
while i<#of{ | |
if (RepublicanYea>X and DemocratNay>X OR RepublicanNay>X AND DemocratYea>X) vote = partisan | |
-Add new rows. One for Republican Yea count, One for Republican Nay count. One for Democrat yea count, one for Democrat Nay count. | |
-Use np.where to find columns where above conditions are met. Create new row based on this that gives vote a partisan index. | |
""" | |
df2017 | |
df2017DropDistrict = df2017[df2017.columns.drop(list(df2017.filter(regex='district')))] | |
df2017DropPerson = df2017[df2017DropDistrict.columns.drop(list(df2017.filter(regex='person.')))] | |
df2017DropState = df2017[df2017DropPerson.columns.drop(list(df2017DropPerson.filter(regex='state.')))] | |
df2017DropName =df2017[df2017DropState.columns.drop(list(df2017.filter(regex='name.')))] | |
df2017JustVotes= df2017[df2017DropName.columns.drop(list(df2017.filter(regex='party.')))] | |
df2017JustVotes | |
df2017JustVotes | |
df2017RepublicanVotes= df2017JustVotes.loc[df2017JustVotes['party']=='Republican'] | |
df2017RepublicanVotes | |
df2017RepublicanJustVotes=df2017RepublicanVotes.drop(['person','state','name','party'],axis=1) | |
df2017RepublicanJustVotes | |
dfRepVoteCounts=df2017RepublicanJustVotes.apply(pd.Series.value_counts) | |
dfRepVoteCounts | |
dfRepVoteCounts=dfRepVoteCounts.fillna(0) | |
dfRepVoteCounts=dfRepVoteCounts.reindex(['Yea','Nay','Present','Not Voting']) | |
dfRepVoteCounts | |
dfRepVoteCounts=dfRepVoteCounts.transpose() | |
dfRepVoteCounts.columns=["RepYea","RepNay","RepPresent","RepNot-Voting"] | |
dfRepVoteCounts=dfRepVoteCounts.transpose() | |
dfRepVoteCounts | |
dfRepVoteCounts | |
df2017DemocratVotes= df2017JustVotes.loc[df2017JustVotes['party']=='Democrat'] | |
df2017DemocratVotes | |
df2017DemocratJustVotes=df2017DemocratVotes.drop(['person','state','name','party'],axis=1) | |
df2017DemocratJustVotes | |
dfDemVoteCounts=df2017DemocratJustVotes.apply(pd.Series.value_counts) | |
dfDemVoteCounts | |
dfDemVoteCounts=dfDemVoteCounts.fillna(0) | |
dfDemVoteCounts=dfDemVoteCounts.reindex(['Yea','Nay','Present','Not Voting']) | |
dfDemVoteCounts=dfDemVoteCounts.transpose() | |
dfDemVoteCounts.columns=["DemYea","DemNay","DemPresent","DemNot-Voting"] | |
dfDemVoteCounts=dfDemVoteCounts.transpose() | |
dfDemVoteCounts | |
dfTotalVoteCounts=pd.concat([dfDemVoteCounts,dfRepVoteCounts],ignore_index=True) | |
dfTotalVoteCounts | |
dfTotalVoteCounts=dfTotalVoteCounts.transpose() | |
dfTotalVoteCounts.columns=["DemYea","DemNay","DemPresent","DemNot-Voting","RepYea","RepNay","RepPresent","RepNot-Voting"] | |
dfTotalVoteCounts=dfTotalVoteCounts.transpose() | |
dfTotalVoteCounts | |
dfTotalVoteCounts=dfTotalVoteCounts.transpose() | |
dfTotalVoteCounts | |
dfTotalVoteCounts['Vote Partisanship']=np.where(((dfTotalVoteCounts['DemYea']>33) & (dfTotalVoteCounts['RepNay']>33)) | ((dfTotalVoteCounts['DemNay']>33) & (dfTotalVoteCounts['RepYea']>33)),'Partisan','Bi-Partisan') | |
dfTotalVoteCounts | |
#dfTotalVoteCounts=dfTotalVoteCounts.transpose() | |
dfTotalVoteCounts | |
"""# 2018DataWranging""" | |
df2018DropDistrict = df2018[df2018.columns.drop(list(df2018.filter(regex='district')))] | |
df2018DropPerson = df2018[df2018DropDistrict.columns.drop(list(df2018.filter(regex='person.')))] | |
df2018DropState = df2018[df2018DropPerson.columns.drop(list(df2018DropPerson.filter(regex='state.')))] | |
df2018DropName =df2018[df2018DropState.columns.drop(list(df2018.filter(regex='name.')))] | |
df2018JustVotes= df2018[df2018DropName.columns.drop(list(df2018.filter(regex='party.')))] | |
df2018JustVotes | |
df2018RepublicanVotes= df2018JustVotes.loc[df2018JustVotes['party']=='Republican'] | |
df2018RepublicanVotes | |
df2018RepublicanJustVotes=df2018RepublicanVotes.drop(['person','state','name','party'],axis=1) | |
df2018RepublicanJustVotes | |
dfRepVoteCounts18=df2018RepublicanJustVotes.apply(pd.Series.value_counts) | |
dfRepVoteCounts18 | |
dfRepVoteCounts18=dfRepVoteCounts18.fillna(0) | |
dfRepVoteCounts18=dfRepVoteCounts18.reindex(['Yea','Nay','Present','Not Voting']) | |
dfRepVoteCounts18 | |
dfRepVoteCounts18=dfRepVoteCounts18.transpose() | |
dfRepVoteCounts18.columns=["RepYea","RepNay","RepPresent","RepNot-Voting"] | |
dfRepVoteCounts18=dfRepVoteCounts18.transpose() | |
dfRepVoteCounts18 | |
df2018DemocratVotes= df2018JustVotes.loc[df2018JustVotes['party']=='Democrat'] | |
df2018DemocratVotes | |
df2018DemocratJustVotes=df2018DemocratVotes.drop(['person','state','name','party'],axis=1) | |
df2018DemocratJustVotes | |
dfDemVoteCounts18=df2018DemocratJustVotes.apply(pd.Series.value_counts) | |
dfDemVoteCounts18=dfDemVoteCounts18.fillna(0) | |
dfDemVoteCounts18=dfDemVoteCounts18.reindex(['Yea','Nay','Present','Not Voting']) | |
dfDemVoteCounts18=dfDemVoteCounts18.transpose() | |
dfDemVoteCounts18.columns=["DemYea","DemNay","DemPresent","DemNot-Voting"] | |
dfDemVoteCounts18=dfDemVoteCounts18.transpose() | |
dfDemVoteCounts18 | |
dfTotalVoteCounts18=pd.concat([dfDemVoteCounts18,dfRepVoteCounts18],ignore_index=True) | |
dfTotalVoteCounts18 | |
dfTotalVoteCounts18=dfTotalVoteCounts18.transpose() | |
dfTotalVoteCounts18.columns=["DemYea","DemNay","DemPresent","DemNot-Voting","RepYea","RepNay","RepPresent","RepNot-Voting"] | |
dfTotalVoteCounts18=dfTotalVoteCounts18.transpose() | |
dfTotalVoteCounts18=dfTotalVoteCounts18.fillna(0) | |
dfTotalVoteCounts18 | |
dfTotalVoteCounts18=dfTotalVoteCounts18.transpose() | |
dfTotalVoteCounts18['Vote Partisanship']=np.where(((dfTotalVoteCounts18['DemYea']>33) & (dfTotalVoteCounts18['RepNay']>33)) | ((dfTotalVoteCounts18['DemNay']>33) & (dfTotalVoteCounts18['RepYea']>33)),'Partisan','Bi-Partisan') | |
dfTotalVoteCounts18=dfTotalVoteCounts18.transpose() | |
dfTotalVoteCounts18 | |
"""# 2019DataWrangling | |
""" | |
df2019DropDistrict = df2019[df2019.columns.drop(list(df2019.filter(regex='district')))] | |
df2019DropPerson = df2019[df2019DropDistrict.columns.drop(list(df2019.filter(regex='person.')))] | |
df2019DropState = df2019[df2019DropPerson.columns.drop(list(df2019DropPerson.filter(regex='state.')))] | |
df2019DropName =df2019[df2019DropState.columns.drop(list(df2019.filter(regex='name.')))] | |
df2019JustVotes= df2019[df2019DropName.columns.drop(list(df2019.filter(regex='party.')))] | |
df2019JustVotes | |
df2019RepublicanVotes= df2019JustVotes.loc[df2019JustVotes['party']=='Republican'] | |
df2019RepublicanJustVotes=df2019RepublicanVotes.drop(['person','state','name','party'],axis=1) | |
df2019RepublicanJustVotes | |
dfRepVoteCounts19=df2019RepublicanJustVotes.apply(pd.Series.value_counts) | |
dfRepVoteCounts19 | |
dfRepVoteCounts19=dfRepVoteCounts19.fillna(0) | |
dfRepVoteCounts19=dfRepVoteCounts19.reindex(['Yea','Nay','Present','Not Voting']) | |
dfRepVoteCounts19 | |
dfRepVoteCounts19=dfRepVoteCounts19.transpose() | |
dfRepVoteCounts19.columns=["RepYea","RepNay","RepPresent","RepNot-Voting"] | |
dfRepVoteCounts19=dfRepVoteCounts19.transpose() | |
dfRepVoteCounts19=dfRepVoteCounts19.fillna(0) | |
df2019DemocratVotes= df2019JustVotes.loc[df2019JustVotes['party']=='Democrat'] | |
df2019DemocratVotes | |
df2019DemocratJustVotes=df2019DemocratVotes.drop(['person','state','name','party'],axis=1) | |
df2019DemocratJustVotes | |
dfDemVoteCounts19=df2019DemocratJustVotes.apply(pd.Series.value_counts) | |
dfDemVoteCounts19=dfDemVoteCounts19.fillna(0) | |
dfDemVoteCounts19=dfDemVoteCounts19.reindex(['Yea','Nay','Present','Not Voting']) | |
dfDemVoteCounts19=dfDemVoteCounts19.transpose() | |
dfDemVoteCounts19.columns=["DemYea","DemNay","DemPresent","DemNot-Voting"] | |
dfDemVoteCounts19=dfDemVoteCounts19.transpose() | |
dfTotalVoteCounts19=pd.concat([dfDemVoteCounts19,dfRepVoteCounts19],ignore_index=True) | |
dfDemVoteCounts19 | |
dfTotalVoteCounts19=pd.concat([dfDemVoteCounts19,dfRepVoteCounts19],ignore_index=True) | |
dfTotalVoteCounts19=dfTotalVoteCounts19.transpose() | |
dfTotalVoteCounts19.columns=["DemYea","DemNay","DemPresent","DemNot-Voting","RepYea","RepNay","RepPresent","RepNot-Voting"] | |
dfTotalVoteCounts19=dfTotalVoteCounts19.transpose() | |
dfTotalVoteCounts19=dfTotalVoteCounts19.fillna(0) | |
dfTotalVoteCounts19=dfTotalVoteCounts19.transpose() | |
dfTotalVoteCounts19['Vote Partisanship']=np.where(((dfTotalVoteCounts19['DemYea']>33) & (dfTotalVoteCounts19['RepNay']>33)) | ((dfTotalVoteCounts19['DemNay']>33) & (dfTotalVoteCounts19['RepYea']>33)),'Partisan','Bi-Partisan') | |
dfTotalVoteCounts19=dfTotalVoteCounts19.transpose() | |
dfTotalVoteCounts19 | |
"""#2020DataWrangling""" | |
df2020DropDistrict = df2020[df2020.columns.drop(list(df2020.filter(regex='district')))] | |
df2020DropPerson = df2020[df2020DropDistrict.columns.drop(list(df2020.filter(regex='person.')))] | |
df2020DropState = df2020[df2020DropPerson.columns.drop(list(df2020DropPerson.filter(regex='state.')))] | |
df2020DropName =df2020[df2020DropState.columns.drop(list(df2020.filter(regex='name.')))] | |
df2020JustVotes= df2020[df2020DropName.columns.drop(list(df2020.filter(regex='party.')))] | |
df2020JustVotes | |
df2020RepublicanVotes= df2020JustVotes.loc[df2020JustVotes['party']=='Republican'] | |
df2020RepublicanJustVotes=df2020RepublicanVotes.drop(['person','state','name','party'],axis=1) | |
df2020RepublicanJustVotes | |
dfRepVoteCounts20=df2020RepublicanJustVotes.apply(pd.Series.value_counts) | |
dfRepVoteCounts20=dfRepVoteCounts20=dfRepVoteCounts20.fillna(0) | |
dfRepVoteCounts20=dfRepVoteCounts20.reindex(['Yea','Nay','Present','Not Voting']) | |
dfRepVoteCounts20 | |
dfRepVoteCounts20=dfRepVoteCounts20.transpose() | |
dfRepVoteCounts20.columns=["RepYea","RepNay","RepPresent","RepNot-Voting"] | |
dfRepVoteCounts20=dfRepVoteCounts20.transpose() | |
dfRepVoteCounts20=dfRepVoteCounts20.fillna(0) | |
df2020DemocratVotes= df2020JustVotes.loc[df2020JustVotes['party']=='Democrat'] | |
df2020DemocratVotes | |
df2020DemocratJustVotes=df2020DemocratVotes.drop(['person','state','name','party'],axis=1) | |
df2020DemocratJustVotes | |
dfDemVoteCounts20=df2020DemocratJustVotes.apply(pd.Series.value_counts) | |
dfDemVoteCounts20=dfDemVoteCounts20.fillna(0) | |
dfDemVoteCounts20=dfDemVoteCounts20.reindex(['Yea','Nay','Present','Not Voting']) | |
dfDemVoteCounts20=dfDemVoteCounts20.transpose() | |
dfDemVoteCounts20.columns=["DemYea","DemNay","DemPresent","DemNot-Voting"] | |
dfDemVoteCounts20=dfDemVoteCounts20.transpose() | |
dfTotalVoteCounts20=pd.concat([dfDemVoteCounts20,dfRepVoteCounts20],ignore_index=True) | |
dfDemVoteCounts20 | |
dfTotalVoteCounts20=pd.concat([dfDemVoteCounts20,dfRepVoteCounts20],ignore_index=True) | |
dfTotalVoteCounts20=dfTotalVoteCounts20.transpose() | |
dfTotalVoteCounts20.columns=["DemYea","DemNay","DemPresent","DemNot-Voting","RepYea","RepNay","RepPresent","RepNot-Voting"] | |
dfTotalVoteCounts20=dfTotalVoteCounts20.transpose() | |
dfTotalVoteCounts20=dfTotalVoteCounts20.fillna(0) | |
dfTotalVoteCounts20=dfTotalVoteCounts20.transpose() | |
dfTotalVoteCounts20['Vote Partisanship']=np.where(((dfTotalVoteCounts20['DemYea']>33) & (dfTotalVoteCounts20['RepNay']>33)) | ((dfTotalVoteCounts20['DemNay']>33) & (dfTotalVoteCounts20['RepYea']>33)),'Partisan','Bi-Partisan') | |
dfTotalVoteCounts20=dfTotalVoteCounts20.transpose() | |
dfTotalVoteCounts20 | |
"""# 2021DataWrangling""" | |
df2021DropDistrict = df2021[df2021.columns.drop(list(df2021.filter(regex='district')))] | |
df2021DropPerson = df2021[df2021DropDistrict.columns.drop(list(df2021.filter(regex='person.')))] | |
df2021DropState = df2021[df2021DropPerson.columns.drop(list(df2021DropPerson.filter(regex='state.')))] | |
df2021DropName =df2021[df2021DropState.columns.drop(list(df2021.filter(regex='name.')))] | |
df2021JustVotes= df2021[df2021DropName.columns.drop(list(df2021.filter(regex='party.')))] | |
df2021JustVotes | |
df2021RepublicanVotes= df2021JustVotes.loc[df2021JustVotes['party']=='Republican'] | |
df2021RepublicanJustVotes=df2021RepublicanVotes.drop(['person','state','name','party'],axis=1) | |
df2021RepublicanJustVotes | |
dfRepVoteCounts21=df2021RepublicanJustVotes.apply(pd.Series.value_counts) | |
dfRepVoteCounts21=dfRepVoteCounts21=dfRepVoteCounts21.fillna(0) | |
dfRepVoteCounts21=dfRepVoteCounts21.reindex(['Yea','Nay','Present','Not Voting']) | |
dfRepVoteCounts21 | |
dfRepVoteCounts21=dfRepVoteCounts21.transpose() | |
dfRepVoteCounts21.columns=["RepYea","RepNay","RepPresent","RepNot-Voting"] | |
dfRepVoteCounts21=dfRepVoteCounts21.transpose() | |
dfRepVoteCounts21=dfRepVoteCounts21.fillna(0) | |
df2021DemocratVotes= df2021JustVotes.loc[df2021JustVotes['party']=='Democrat'] | |
df2021DemocratVotes | |
df2021DemocratJustVotes=df2021DemocratVotes.drop(['person','state','name','party'],axis=1) | |
df2021DemocratJustVotes | |
dfDemVoteCounts21=df2021DemocratJustVotes.apply(pd.Series.value_counts) | |
dfDemVoteCounts21=dfDemVoteCounts21.fillna(0) | |
dfDemVoteCounts21=dfDemVoteCounts21.reindex(['Yea','Nay','Present','Not Voting']) | |
dfDemVoteCounts21=dfDemVoteCounts21.transpose() | |
dfDemVoteCounts21.columns=["DemYea","DemNay","DemPresent","DemNot-Voting"] | |
dfDemVoteCounts21=dfDemVoteCounts21.transpose() | |
dfTotalVoteCounts21=pd.concat([dfDemVoteCounts21,dfRepVoteCounts21],ignore_index=True) | |
dfDemVoteCounts21 | |
dfTotalVoteCounts21=pd.concat([dfDemVoteCounts21,dfRepVoteCounts21],ignore_index=True) | |
dfTotalVoteCounts21=dfTotalVoteCounts21.transpose() | |
dfTotalVoteCounts21.columns=["DemYea","DemNay","DemPresent","DemNot-Voting","RepYea","RepNay","RepPresent","RepNot-Voting"] | |
dfTotalVoteCounts21=dfTotalVoteCounts21.transpose() | |
dfTotalVoteCounts21=dfTotalVoteCounts21.fillna(0) | |
dfTotalVoteCounts21=dfTotalVoteCounts21.transpose() | |
dfTotalVoteCounts21['Vote Partisanship']=np.where(((dfTotalVoteCounts21['DemYea']>33) & (dfTotalVoteCounts21['RepNay']>33)) | ((dfTotalVoteCounts21['DemNay']>33) & (dfTotalVoteCounts21['RepYea']>33)),'Partisan','Bi-Partisan') | |
dfTotalVoteCounts21=dfTotalVoteCounts21.transpose() | |
dfTotalVoteCounts21 | |
"""# EDA: Partisanship analysis""" | |
dfTotalVoteCounts | |
dfTotalVoteCounts['Vote Partisanship'].value_counts(normalize=True)*100 | |
dfTotalVoteCounts18 | |
dfTotalVoteCounts18['Vote Partisanship'].value_counts(normalize=True)*100 | |
dfTotalVoteCounts19 | |
dfTotalVoteCounts19['Vote Partisanship'].value_counts(normalize=True)*100 | |
dfTotalVoteCounts20 | |
dfTotalVoteCounts20['Vote Partisanship'].value_counts(normalize=True)*100 | |
dfTotalVoteCounts21 | |
dfTotalVoteCounts21['Vote Partisanship'].value_counts(normalize=True)*100 | |
""" | |
* Find if votes tend to get more or less partisan as we got closer to the 2020 election | |
""" | |
partisanData = {'% of votes that are Partisan':[48.30769,30.291971,42.056075,54.109589,62.75], | |
'Year':[2017,2018,2019,2020,2021]} | |
dfPartisanPercents = pd.DataFrame(partisanData) | |
dfPartisanPercents | |
dfPartisanPercents.plot.line(x='Year',y='% of votes that are Partisan',) |