Skip to content
Permalink
main
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
# -*- coding: utf-8 -*-
"""Working Congressional vote analytics
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/gist/npapa17/2f5a7027c1f6dac531572118beb0f314/working-congressional-vote-analytics.ipynb
# Merging data sets prior to EDA
"""
import pandas as pd
import numpy as np
import os
import glob
import matplotlib.pyplot as plt
import seaborn as sb
import csv
from pylab import *
from google.colab import drive
drive.mount('/content/gdrive')
joined_files = os.path.join("/content/gdrive/MyDrive/Independent study Spring 2022:/2017 Congressional voting records_CLEAN","*.csv")
joined_list = glob.glob(joined_files)
df2017 = pd.concat(map(pd.read_csv,joined_list),axis=1,join='outer')
#df2017.to_csv('2017MergedData.csv')
joined_files = os.path.join("/content/gdrive/MyDrive/Independent study Spring 2022:/2018 Congressional voting records_CLEAN","*.csv")
joined_list = glob.glob(joined_files)
df2018 = pd.concat(map(pd.read_csv,joined_list),axis=1,join='outer')
#df2018.to_csv('2018MergedData.csv')
joined_files = os.path.join("/content/gdrive/MyDrive/Independent study Spring 2022:/2019 Congressional votes CLEAN","*.csv")
joined_list = glob.glob(joined_files)
df2019 = pd.concat(map(pd.read_csv,joined_list),axis=1,join='outer')
#df2019.to_csv('2019MergedData.csv')
joined_files = os.path.join("/content/gdrive/MyDrive/Independent study Spring 2022:/2020 Congressional votes CLEAN","*.csv")
joined_list = glob.glob(joined_files)
df2020 = pd.concat(map(pd.read_csv,joined_list),axis=1,join='outer')
#df2020.to_csv('2020MergedData.csv')
joined_files = os.path.join("/content/gdrive/MyDrive/Independent study Spring 2022:/2021 Congressional votes CLEAN","*.csv")
joined_list = glob.glob(joined_files)
df2021 = pd.concat(map(pd.read_csv,joined_list),axis=1,join='outer')
#df2021.to_csv('2021MergedData.csv')
df2017=pd.read_csv('/content/gdrive/MyDrive/Independent study Spring 2022:/Merged yearly data/2017MergedData.csv')
df2018=pd.read_csv('/content/gdrive/MyDrive/Independent study Spring 2022:/Merged yearly data/2018MergedData.csv')
df2019=pd.read_csv('/content/gdrive/MyDrive/Independent study Spring 2022:/Merged yearly data/2019MergedData.csv')
df2020=pd.read_csv('/content/gdrive/MyDrive/Independent study Spring 2022:/Merged yearly data/2020MergedData.csv')
df2021=pd.read_csv('/content/gdrive/MyDrive/Independent study Spring 2022:/Merged yearly data/2021MergedData.csv')
"""*
*
# 2017DataWrangling
1. Look for what years were most and least partisan! How?
* Find percent of votes that each party votes 100% together, graph that over time
* Pseudo code:
i=0
while i<#of{
if (RepublicanYea>X and DemocratNay>X OR RepublicanNay>X AND DemocratYea>X) vote = partisan
-Add new rows. One for Republican Yea count, One for Republican Nay count. One for Democrat yea count, one for Democrat Nay count.
-Use np.where to find columns where above conditions are met. Create new row based on this that gives vote a partisan index.
"""
df2017
df2017DropDistrict = df2017[df2017.columns.drop(list(df2017.filter(regex='district')))]
df2017DropPerson = df2017[df2017DropDistrict.columns.drop(list(df2017.filter(regex='person.')))]
df2017DropState = df2017[df2017DropPerson.columns.drop(list(df2017DropPerson.filter(regex='state.')))]
df2017DropName =df2017[df2017DropState.columns.drop(list(df2017.filter(regex='name.')))]
df2017JustVotes= df2017[df2017DropName.columns.drop(list(df2017.filter(regex='party.')))]
df2017JustVotes
df2017JustVotes
df2017RepublicanVotes= df2017JustVotes.loc[df2017JustVotes['party']=='Republican']
df2017RepublicanVotes
df2017RepublicanJustVotes=df2017RepublicanVotes.drop(['person','state','name','party'],axis=1)
df2017RepublicanJustVotes
dfRepVoteCounts=df2017RepublicanJustVotes.apply(pd.Series.value_counts)
dfRepVoteCounts
dfRepVoteCounts=dfRepVoteCounts.fillna(0)
dfRepVoteCounts=dfRepVoteCounts.reindex(['Yea','Nay','Present','Not Voting'])
dfRepVoteCounts
dfRepVoteCounts=dfRepVoteCounts.transpose()
dfRepVoteCounts.columns=["RepYea","RepNay","RepPresent","RepNot-Voting"]
dfRepVoteCounts=dfRepVoteCounts.transpose()
dfRepVoteCounts
dfRepVoteCounts
df2017DemocratVotes= df2017JustVotes.loc[df2017JustVotes['party']=='Democrat']
df2017DemocratVotes
df2017DemocratJustVotes=df2017DemocratVotes.drop(['person','state','name','party'],axis=1)
df2017DemocratJustVotes
dfDemVoteCounts=df2017DemocratJustVotes.apply(pd.Series.value_counts)
dfDemVoteCounts
dfDemVoteCounts=dfDemVoteCounts.fillna(0)
dfDemVoteCounts=dfDemVoteCounts.reindex(['Yea','Nay','Present','Not Voting'])
dfDemVoteCounts=dfDemVoteCounts.transpose()
dfDemVoteCounts.columns=["DemYea","DemNay","DemPresent","DemNot-Voting"]
dfDemVoteCounts=dfDemVoteCounts.transpose()
dfDemVoteCounts
dfTotalVoteCounts=pd.concat([dfDemVoteCounts,dfRepVoteCounts],ignore_index=True)
dfTotalVoteCounts
dfTotalVoteCounts=dfTotalVoteCounts.transpose()
dfTotalVoteCounts.columns=["DemYea","DemNay","DemPresent","DemNot-Voting","RepYea","RepNay","RepPresent","RepNot-Voting"]
dfTotalVoteCounts=dfTotalVoteCounts.transpose()
dfTotalVoteCounts
dfTotalVoteCounts=dfTotalVoteCounts.transpose()
dfTotalVoteCounts
dfTotalVoteCounts['Vote Partisanship']=np.where(((dfTotalVoteCounts['DemYea']>33) & (dfTotalVoteCounts['RepNay']>33)) | ((dfTotalVoteCounts['DemNay']>33) & (dfTotalVoteCounts['RepYea']>33)),'Partisan','Bi-Partisan')
dfTotalVoteCounts
#dfTotalVoteCounts=dfTotalVoteCounts.transpose()
dfTotalVoteCounts
"""# 2018DataWranging"""
df2018DropDistrict = df2018[df2018.columns.drop(list(df2018.filter(regex='district')))]
df2018DropPerson = df2018[df2018DropDistrict.columns.drop(list(df2018.filter(regex='person.')))]
df2018DropState = df2018[df2018DropPerson.columns.drop(list(df2018DropPerson.filter(regex='state.')))]
df2018DropName =df2018[df2018DropState.columns.drop(list(df2018.filter(regex='name.')))]
df2018JustVotes= df2018[df2018DropName.columns.drop(list(df2018.filter(regex='party.')))]
df2018JustVotes
df2018RepublicanVotes= df2018JustVotes.loc[df2018JustVotes['party']=='Republican']
df2018RepublicanVotes
df2018RepublicanJustVotes=df2018RepublicanVotes.drop(['person','state','name','party'],axis=1)
df2018RepublicanJustVotes
dfRepVoteCounts18=df2018RepublicanJustVotes.apply(pd.Series.value_counts)
dfRepVoteCounts18
dfRepVoteCounts18=dfRepVoteCounts18.fillna(0)
dfRepVoteCounts18=dfRepVoteCounts18.reindex(['Yea','Nay','Present','Not Voting'])
dfRepVoteCounts18
dfRepVoteCounts18=dfRepVoteCounts18.transpose()
dfRepVoteCounts18.columns=["RepYea","RepNay","RepPresent","RepNot-Voting"]
dfRepVoteCounts18=dfRepVoteCounts18.transpose()
dfRepVoteCounts18
df2018DemocratVotes= df2018JustVotes.loc[df2018JustVotes['party']=='Democrat']
df2018DemocratVotes
df2018DemocratJustVotes=df2018DemocratVotes.drop(['person','state','name','party'],axis=1)
df2018DemocratJustVotes
dfDemVoteCounts18=df2018DemocratJustVotes.apply(pd.Series.value_counts)
dfDemVoteCounts18=dfDemVoteCounts18.fillna(0)
dfDemVoteCounts18=dfDemVoteCounts18.reindex(['Yea','Nay','Present','Not Voting'])
dfDemVoteCounts18=dfDemVoteCounts18.transpose()
dfDemVoteCounts18.columns=["DemYea","DemNay","DemPresent","DemNot-Voting"]
dfDemVoteCounts18=dfDemVoteCounts18.transpose()
dfDemVoteCounts18
dfTotalVoteCounts18=pd.concat([dfDemVoteCounts18,dfRepVoteCounts18],ignore_index=True)
dfTotalVoteCounts18
dfTotalVoteCounts18=dfTotalVoteCounts18.transpose()
dfTotalVoteCounts18.columns=["DemYea","DemNay","DemPresent","DemNot-Voting","RepYea","RepNay","RepPresent","RepNot-Voting"]
dfTotalVoteCounts18=dfTotalVoteCounts18.transpose()
dfTotalVoteCounts18=dfTotalVoteCounts18.fillna(0)
dfTotalVoteCounts18
dfTotalVoteCounts18=dfTotalVoteCounts18.transpose()
dfTotalVoteCounts18['Vote Partisanship']=np.where(((dfTotalVoteCounts18['DemYea']>33) & (dfTotalVoteCounts18['RepNay']>33)) | ((dfTotalVoteCounts18['DemNay']>33) & (dfTotalVoteCounts18['RepYea']>33)),'Partisan','Bi-Partisan')
dfTotalVoteCounts18=dfTotalVoteCounts18.transpose()
dfTotalVoteCounts18
"""# 2019DataWrangling
"""
df2019DropDistrict = df2019[df2019.columns.drop(list(df2019.filter(regex='district')))]
df2019DropPerson = df2019[df2019DropDistrict.columns.drop(list(df2019.filter(regex='person.')))]
df2019DropState = df2019[df2019DropPerson.columns.drop(list(df2019DropPerson.filter(regex='state.')))]
df2019DropName =df2019[df2019DropState.columns.drop(list(df2019.filter(regex='name.')))]
df2019JustVotes= df2019[df2019DropName.columns.drop(list(df2019.filter(regex='party.')))]
df2019JustVotes
df2019RepublicanVotes= df2019JustVotes.loc[df2019JustVotes['party']=='Republican']
df2019RepublicanJustVotes=df2019RepublicanVotes.drop(['person','state','name','party'],axis=1)
df2019RepublicanJustVotes
dfRepVoteCounts19=df2019RepublicanJustVotes.apply(pd.Series.value_counts)
dfRepVoteCounts19
dfRepVoteCounts19=dfRepVoteCounts19.fillna(0)
dfRepVoteCounts19=dfRepVoteCounts19.reindex(['Yea','Nay','Present','Not Voting'])
dfRepVoteCounts19
dfRepVoteCounts19=dfRepVoteCounts19.transpose()
dfRepVoteCounts19.columns=["RepYea","RepNay","RepPresent","RepNot-Voting"]
dfRepVoteCounts19=dfRepVoteCounts19.transpose()
dfRepVoteCounts19=dfRepVoteCounts19.fillna(0)
df2019DemocratVotes= df2019JustVotes.loc[df2019JustVotes['party']=='Democrat']
df2019DemocratVotes
df2019DemocratJustVotes=df2019DemocratVotes.drop(['person','state','name','party'],axis=1)
df2019DemocratJustVotes
dfDemVoteCounts19=df2019DemocratJustVotes.apply(pd.Series.value_counts)
dfDemVoteCounts19=dfDemVoteCounts19.fillna(0)
dfDemVoteCounts19=dfDemVoteCounts19.reindex(['Yea','Nay','Present','Not Voting'])
dfDemVoteCounts19=dfDemVoteCounts19.transpose()
dfDemVoteCounts19.columns=["DemYea","DemNay","DemPresent","DemNot-Voting"]
dfDemVoteCounts19=dfDemVoteCounts19.transpose()
dfTotalVoteCounts19=pd.concat([dfDemVoteCounts19,dfRepVoteCounts19],ignore_index=True)
dfDemVoteCounts19
dfTotalVoteCounts19=pd.concat([dfDemVoteCounts19,dfRepVoteCounts19],ignore_index=True)
dfTotalVoteCounts19=dfTotalVoteCounts19.transpose()
dfTotalVoteCounts19.columns=["DemYea","DemNay","DemPresent","DemNot-Voting","RepYea","RepNay","RepPresent","RepNot-Voting"]
dfTotalVoteCounts19=dfTotalVoteCounts19.transpose()
dfTotalVoteCounts19=dfTotalVoteCounts19.fillna(0)
dfTotalVoteCounts19=dfTotalVoteCounts19.transpose()
dfTotalVoteCounts19['Vote Partisanship']=np.where(((dfTotalVoteCounts19['DemYea']>33) & (dfTotalVoteCounts19['RepNay']>33)) | ((dfTotalVoteCounts19['DemNay']>33) & (dfTotalVoteCounts19['RepYea']>33)),'Partisan','Bi-Partisan')
dfTotalVoteCounts19=dfTotalVoteCounts19.transpose()
dfTotalVoteCounts19
"""#2020DataWrangling"""
df2020DropDistrict = df2020[df2020.columns.drop(list(df2020.filter(regex='district')))]
df2020DropPerson = df2020[df2020DropDistrict.columns.drop(list(df2020.filter(regex='person.')))]
df2020DropState = df2020[df2020DropPerson.columns.drop(list(df2020DropPerson.filter(regex='state.')))]
df2020DropName =df2020[df2020DropState.columns.drop(list(df2020.filter(regex='name.')))]
df2020JustVotes= df2020[df2020DropName.columns.drop(list(df2020.filter(regex='party.')))]
df2020JustVotes
df2020RepublicanVotes= df2020JustVotes.loc[df2020JustVotes['party']=='Republican']
df2020RepublicanJustVotes=df2020RepublicanVotes.drop(['person','state','name','party'],axis=1)
df2020RepublicanJustVotes
dfRepVoteCounts20=df2020RepublicanJustVotes.apply(pd.Series.value_counts)
dfRepVoteCounts20=dfRepVoteCounts20=dfRepVoteCounts20.fillna(0)
dfRepVoteCounts20=dfRepVoteCounts20.reindex(['Yea','Nay','Present','Not Voting'])
dfRepVoteCounts20
dfRepVoteCounts20=dfRepVoteCounts20.transpose()
dfRepVoteCounts20.columns=["RepYea","RepNay","RepPresent","RepNot-Voting"]
dfRepVoteCounts20=dfRepVoteCounts20.transpose()
dfRepVoteCounts20=dfRepVoteCounts20.fillna(0)
df2020DemocratVotes= df2020JustVotes.loc[df2020JustVotes['party']=='Democrat']
df2020DemocratVotes
df2020DemocratJustVotes=df2020DemocratVotes.drop(['person','state','name','party'],axis=1)
df2020DemocratJustVotes
dfDemVoteCounts20=df2020DemocratJustVotes.apply(pd.Series.value_counts)
dfDemVoteCounts20=dfDemVoteCounts20.fillna(0)
dfDemVoteCounts20=dfDemVoteCounts20.reindex(['Yea','Nay','Present','Not Voting'])
dfDemVoteCounts20=dfDemVoteCounts20.transpose()
dfDemVoteCounts20.columns=["DemYea","DemNay","DemPresent","DemNot-Voting"]
dfDemVoteCounts20=dfDemVoteCounts20.transpose()
dfTotalVoteCounts20=pd.concat([dfDemVoteCounts20,dfRepVoteCounts20],ignore_index=True)
dfDemVoteCounts20
dfTotalVoteCounts20=pd.concat([dfDemVoteCounts20,dfRepVoteCounts20],ignore_index=True)
dfTotalVoteCounts20=dfTotalVoteCounts20.transpose()
dfTotalVoteCounts20.columns=["DemYea","DemNay","DemPresent","DemNot-Voting","RepYea","RepNay","RepPresent","RepNot-Voting"]
dfTotalVoteCounts20=dfTotalVoteCounts20.transpose()
dfTotalVoteCounts20=dfTotalVoteCounts20.fillna(0)
dfTotalVoteCounts20=dfTotalVoteCounts20.transpose()
dfTotalVoteCounts20['Vote Partisanship']=np.where(((dfTotalVoteCounts20['DemYea']>33) & (dfTotalVoteCounts20['RepNay']>33)) | ((dfTotalVoteCounts20['DemNay']>33) & (dfTotalVoteCounts20['RepYea']>33)),'Partisan','Bi-Partisan')
dfTotalVoteCounts20=dfTotalVoteCounts20.transpose()
dfTotalVoteCounts20
"""# 2021DataWrangling"""
df2021DropDistrict = df2021[df2021.columns.drop(list(df2021.filter(regex='district')))]
df2021DropPerson = df2021[df2021DropDistrict.columns.drop(list(df2021.filter(regex='person.')))]
df2021DropState = df2021[df2021DropPerson.columns.drop(list(df2021DropPerson.filter(regex='state.')))]
df2021DropName =df2021[df2021DropState.columns.drop(list(df2021.filter(regex='name.')))]
df2021JustVotes= df2021[df2021DropName.columns.drop(list(df2021.filter(regex='party.')))]
df2021JustVotes
df2021RepublicanVotes= df2021JustVotes.loc[df2021JustVotes['party']=='Republican']
df2021RepublicanJustVotes=df2021RepublicanVotes.drop(['person','state','name','party'],axis=1)
df2021RepublicanJustVotes
dfRepVoteCounts21=df2021RepublicanJustVotes.apply(pd.Series.value_counts)
dfRepVoteCounts21=dfRepVoteCounts21=dfRepVoteCounts21.fillna(0)
dfRepVoteCounts21=dfRepVoteCounts21.reindex(['Yea','Nay','Present','Not Voting'])
dfRepVoteCounts21
dfRepVoteCounts21=dfRepVoteCounts21.transpose()
dfRepVoteCounts21.columns=["RepYea","RepNay","RepPresent","RepNot-Voting"]
dfRepVoteCounts21=dfRepVoteCounts21.transpose()
dfRepVoteCounts21=dfRepVoteCounts21.fillna(0)
df2021DemocratVotes= df2021JustVotes.loc[df2021JustVotes['party']=='Democrat']
df2021DemocratVotes
df2021DemocratJustVotes=df2021DemocratVotes.drop(['person','state','name','party'],axis=1)
df2021DemocratJustVotes
dfDemVoteCounts21=df2021DemocratJustVotes.apply(pd.Series.value_counts)
dfDemVoteCounts21=dfDemVoteCounts21.fillna(0)
dfDemVoteCounts21=dfDemVoteCounts21.reindex(['Yea','Nay','Present','Not Voting'])
dfDemVoteCounts21=dfDemVoteCounts21.transpose()
dfDemVoteCounts21.columns=["DemYea","DemNay","DemPresent","DemNot-Voting"]
dfDemVoteCounts21=dfDemVoteCounts21.transpose()
dfTotalVoteCounts21=pd.concat([dfDemVoteCounts21,dfRepVoteCounts21],ignore_index=True)
dfDemVoteCounts21
dfTotalVoteCounts21=pd.concat([dfDemVoteCounts21,dfRepVoteCounts21],ignore_index=True)
dfTotalVoteCounts21=dfTotalVoteCounts21.transpose()
dfTotalVoteCounts21.columns=["DemYea","DemNay","DemPresent","DemNot-Voting","RepYea","RepNay","RepPresent","RepNot-Voting"]
dfTotalVoteCounts21=dfTotalVoteCounts21.transpose()
dfTotalVoteCounts21=dfTotalVoteCounts21.fillna(0)
dfTotalVoteCounts21=dfTotalVoteCounts21.transpose()
dfTotalVoteCounts21['Vote Partisanship']=np.where(((dfTotalVoteCounts21['DemYea']>33) & (dfTotalVoteCounts21['RepNay']>33)) | ((dfTotalVoteCounts21['DemNay']>33) & (dfTotalVoteCounts21['RepYea']>33)),'Partisan','Bi-Partisan')
dfTotalVoteCounts21=dfTotalVoteCounts21.transpose()
dfTotalVoteCounts21
"""# EDA: Partisanship analysis"""
dfTotalVoteCounts
dfTotalVoteCounts['Vote Partisanship'].value_counts(normalize=True)*100
dfTotalVoteCounts18
dfTotalVoteCounts18['Vote Partisanship'].value_counts(normalize=True)*100
dfTotalVoteCounts19
dfTotalVoteCounts19['Vote Partisanship'].value_counts(normalize=True)*100
dfTotalVoteCounts20
dfTotalVoteCounts20['Vote Partisanship'].value_counts(normalize=True)*100
dfTotalVoteCounts21
dfTotalVoteCounts21['Vote Partisanship'].value_counts(normalize=True)*100
"""
* Find if votes tend to get more or less partisan as we got closer to the 2020 election
"""
partisanData = {'% of votes that are Partisan':[48.30769,30.291971,42.056075,54.109589,62.75],
'Year':[2017,2018,2019,2020,2021]}
dfPartisanPercents = pd.DataFrame(partisanData)
dfPartisanPercents
dfPartisanPercents.plot.line(x='Year',y='% of votes that are Partisan',)