Skip to content
Permalink
master
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
{
"cells": [
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Method: text_standard\n",
"Supporting: 10.931256713211601\n",
"Non supporting 10.042061386888973\n",
"Method: flesh_reading_ease\n",
"Supporting: 45.08041890440387\n",
"Non supporting 57.44273209549071\n",
"Method: smog_index\n",
"Supporting: 1.5800214822771215\n",
"Non supporting 1.8214475179992422\n",
"Method: flesh_kincaid_grade\n",
"Supporting: 11.059076262083781\n",
"Non supporting 9.622015915119363\n",
"Method: coleman_liau_index\n",
"Supporting: 18.19042964554243\n",
"Non supporting 13.42537703675635\n",
"Method: automated_readability_index\n",
"Supporting: 19.753383458646617\n",
"Non supporting 14.782720727548316\n",
"Method: dale_chall_readability_score\n",
"Supporting: 9.386272824919441\n",
"Non supporting 8.550011367942401\n",
"Method: difficult_words\n",
"Supporting: 6.6433941997851775\n",
"Non supporting 6.3372489579386135\n",
"Method: linsear_write_formula\n",
"Supporting: 8.546807068692138\n",
"Non supporting 9.111126749550394\n",
"Method: gunning_fog\n",
"Supporting: 11.380816326530613\n",
"Non supporting 10.703054187192118\n"
]
}
],
"source": [
"from pandas import read_csv, DataFrame\n",
"import textstat\n",
"df = read_csv('data/raw/april-21.csv',encoding='ISO-8859-1')\n",
"\n",
"possible_methods = {\n",
" 'text_standard': lambda test_data: textstat.text_standard(test_data, float_output=True),\n",
" 'flesh_reading_ease': lambda test_data: textstat.flesch_reading_ease(test_data),\n",
" 'smog_index': lambda test_data: textstat.smog_index(test_data),\n",
" 'flesh_kincaid_grade': lambda test_data: textstat.flesch_kincaid_grade(test_data),\n",
" 'coleman_liau_index': lambda test_data: textstat.coleman_liau_index(test_data),\n",
" 'automated_readability_index': lambda test_data: textstat.automated_readability_index(test_data),\n",
" 'dale_chall_readability_score': lambda test_data: textstat.dale_chall_readability_score(test_data),\n",
" 'difficult_words': lambda test_data: textstat.difficult_words(test_data),\n",
" 'linsear_write_formula':lambda test_data: textstat.linsear_write_formula(test_data),\n",
" 'gunning_fog': lambda test_data: textstat.gunning_fog(test_data),\n",
"\n",
"\n",
"}\n",
"\n",
"\n",
"\n",
"csv_array = [['readability function','supporting','non supporting']]\n",
"for i in possible_methods.keys():\n",
" csv_array.append([i])\n",
"\n",
"cur_index = 1\n",
"for method_name, method in possible_methods.items():\n",
" small_df = df.copy()\n",
" small_df['readability'] = small_df['text'].apply(method)\n",
" yes_df = small_df[small_df['Y/N'] == 'Y']\n",
" no_df = small_df[small_df['Y/N'] == 'N']\n",
" yes_mean = yes_df['readability'].mean()\n",
" no_mean = no_df['readability'].mean()\n",
" csv_array[cur_index].append(yes_mean)\n",
" csv_array[cur_index].append(no_mean)\n",
" cur_index += 1\n",
" \n",
" print(\"Method: \", method_name)\n",
" print(\"Supporting: \", yes_mean)\n",
" print(\"Non supporting\", no_mean)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd \n",
"\n",
"df = pd.read_csv('data/preprocessed/may-3.csv', encoding='ISO-8859-1')\n",
"\n",
"def count_if_exist(corpus, word):\n",
" word_count = corpus.get(word,0) + 1\n",
" corpus[word] = word_count\n",
"\n",
"def convert_dict_to_csv(input_dict,name):\n",
" pd.DataFrame(input_dict.items()).to_csv(\"./data/computed/{name}.csv\".format(name=name),header=None,index=None)\n",
"supporting_word_count = {} \n",
"non_supporting_word_count = {}\n",
"word_support_probability = {}\n",
"for i in range(1,len(df)):\n",
" cur_row = df.iloc[i]\n",
" words = cur_row.text.split(' ')\n",
" for word in words:\n",
" count_if_exist(general_word_count,word)\n",
" if(cur_row['Y/N'] == 1):\n",
" count_if_exist(supporting_word_count,word)\n",
" else:\n",
" count_if_exist(non_supporting_word_count,word)\n",
" \n",
" "
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'michiganprotest', 'like', 'people', 'u', 'whitmer', 'state', 'protester', 'michiganterrorists', 'go', 'not', 'arm', 'protest', 'gun', 'american', 'get', 'governor', 'amp', 'need', 'right', 'freedom', 'michigan', 'call', 'covid', 'government', 'patriot'}\n"
]
}
],
"source": [
"supporting_word_list = list(supporting_word_count.items())\n",
"supporting_word_list.sort(key=lambda x: x[1], reverse=True)\n",
"best_supporting_words = list(map(lambda x: x[0], supporting_word_list[:25]))\n",
"non_supporting_word_list = list(non_supporting_word_count.items())\n",
"non_supporting_word_list.sort(key=lambda x: x[1], reverse=True)\n",
"best_non_supporting_words = list(map(lambda x: x[0],supporting_word_list[:25]))\n",
"print(set.intersection(set(best_supporting_words),set(best_non_supporting_words)))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"d"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.9"
}
},
"nbformat": 4,
"nbformat_minor": 4
}