Skip to content
Permalink
master
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
{
"cells": [
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Word</th>\n",
" <th>Bigram</th>\n",
" <th>Conc.M</th>\n",
" <th>Conc.SD</th>\n",
" <th>Unknown</th>\n",
" <th>Total</th>\n",
" <th>Percent_known</th>\n",
" <th>SUBTLEX</th>\n",
" <th>Dom_Pos</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>eh</td>\n",
" <td>0</td>\n",
" <td>1.04</td>\n",
" <td>0.20</td>\n",
" <td>4</td>\n",
" <td>29</td>\n",
" <td>0.86</td>\n",
" <td>2619</td>\n",
" <td>Interjection</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>essentialness</td>\n",
" <td>0</td>\n",
" <td>1.04</td>\n",
" <td>0.20</td>\n",
" <td>2</td>\n",
" <td>26</td>\n",
" <td>0.92</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>although</td>\n",
" <td>0</td>\n",
" <td>1.07</td>\n",
" <td>0.27</td>\n",
" <td>0</td>\n",
" <td>27</td>\n",
" <td>1.00</td>\n",
" <td>2143</td>\n",
" <td>Conjunction</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>spirituality</td>\n",
" <td>0</td>\n",
" <td>1.07</td>\n",
" <td>0.37</td>\n",
" <td>0</td>\n",
" <td>30</td>\n",
" <td>1.00</td>\n",
" <td>46</td>\n",
" <td>Noun</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>would</td>\n",
" <td>0</td>\n",
" <td>1.12</td>\n",
" <td>0.34</td>\n",
" <td>3</td>\n",
" <td>27</td>\n",
" <td>0.89</td>\n",
" <td>90162</td>\n",
" <td>Verb</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Word Bigram Conc.M Conc.SD Unknown Total Percent_known \\\n",
"0 eh 0 1.04 0.20 4 29 0.86 \n",
"1 essentialness 0 1.04 0.20 2 26 0.92 \n",
"2 although 0 1.07 0.27 0 27 1.00 \n",
"3 spirituality 0 1.07 0.37 0 30 1.00 \n",
"4 would 0 1.12 0.34 3 27 0.89 \n",
"\n",
" SUBTLEX Dom_Pos \n",
"0 2619 Interjection \n",
"1 0 NaN \n",
"2 2143 Conjunction \n",
"3 46 Noun \n",
"4 90162 Verb "
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import string\n",
"import re\n",
"from nltk.stem import PorterStemmer\n",
"import nltk\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"from collections import Counter\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"import re\n",
"#read file\n",
"\n",
"df = pd.read_csv(r\"workout_cities.csv\", encoding =\"utf-8\")\n",
"word_df = pd.read_csv(r\"Concreteness_ratings_Brysbaert_et_al_BRM.csv\", encoding =\"utf-8\")\n",
"word_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"helmet_cities_tfidf_matrix.csv\n",
"jinx_cities_tfidf_matrix.csv\n",
"paradox_cities_tfidf_matrix.csv\n",
"domino_cities_tfidf_matrix.csv\n",
"choice_cities_tfidf_matrix.csv\n",
"retrospect_cities_tfidf_matrix.csv\n",
"legacy_cities_tfidf_matrix.csv\n",
"legislature_cities_tfidf_matrix.csv\n",
"flattery_cities_tfidf_matrix.csv\n",
"outburst_cities_tfidf_matrix.csv\n",
"puncture_cities_tfidf_matrix.csv\n",
"piety_cities_tfidf_matrix.csv\n",
"mutiny_cities_tfidf_matrix.csv\n",
"prophet_cities_tfidf_matrix.csv\n",
"sale_cities_tfidf_matrix.csv\n",
"prediction_cities_tfidf_matrix.csv\n",
"morale_cities_tfidf_matrix.csv\n",
"barrier_cities_tfidf_matrix.csv\n",
"riddle_cities_tfidf_matrix.csv\n",
"balloon_cities_tfidf_matrix.csv\n",
"plan_cities_tfidf_matrix.csv\n",
"aircraft_cities_tfidf_matrix.csv\n",
"being_cities_tfidf_matrix.csv\n",
"encore_cities_tfidf_matrix.csv\n",
"luggage_cities_tfidf_matrix.csv\n",
"hardcore_cities_tfidf_matrix.csv\n",
"aroma_cities_tfidf_matrix.csv\n",
"array_cities_tfidf_matrix.csv\n",
"machete_cities_tfidf_matrix.csv\n",
"tactic_cities_tfidf_matrix.csv\n",
"energy_cities_tfidf_matrix.csv\n",
"appeal_cities_tfidf_matrix.csv\n",
"chaos_cities_tfidf_matrix.csv\n",
"rope_cities_tfidf_matrix.csv\n",
"funeral_cities_tfidf_matrix.csv\n",
"voyage_cities_tfidf_matrix.csv\n",
"equipment_cities_tfidf_matrix.csv\n",
"reggae_cities_tfidf_matrix.csv\n",
"arch_cities_tfidf_matrix.csv\n",
"echo_cities_tfidf_matrix.csv\n",
"mayhem_cities_tfidf_matrix.csv\n",
"boycott_cities_tfidf_matrix.csv\n",
"vinyl_cities_tfidf_matrix.csv\n",
"moment_cities_tfidf_matrix.csv\n",
"humor_cities_tfidf_matrix.csv\n",
"wire_cities_tfidf_matrix.csv\n",
"prank_cities_tfidf_matrix.csv\n",
"persona_cities_tfidf_matrix.csv\n",
"debate_cities_tfidf_matrix.csv\n",
"trait_cities_tfidf_matrix.csv\n",
"pedal_cities_tfidf_matrix.csv\n",
"splinter_cities_tfidf_matrix.csv\n",
"football_cities_tfidf_matrix.csv\n",
"outline_cities_tfidf_matrix.csv\n",
"courtesy_cities_tfidf_matrix.csv\n",
"workout_cities_tfidf_matrix.csv\n",
"decor_cities_tfidf_matrix.csv\n",
"encounter_cities_tfidf_matrix.csv\n",
"delight_cities_tfidf_matrix.csv\n",
"article_cities_tfidf_matrix.csv\n",
"democracy_cities_tfidf_matrix.csv\n",
"factor_cities_tfidf_matrix.csv\n",
"merit_cities_tfidf_matrix.csv\n",
"farewell_cities_tfidf_matrix.csv\n",
"parliament_cities_tfidf_matrix.csv\n",
"proxy_cities_tfidf_matrix.csv\n",
"service_cities_tfidf_matrix.csv\n",
"psyche_cities_tfidf_matrix.csv\n",
"epiphany_cities_tfidf_matrix.csv\n",
"obedience_cities_tfidf_matrix.csv\n",
"theory_cities_tfidf_matrix.csv\n",
"sunshine_cities_tfidf_matrix.csv\n",
"anomaly_cities_tfidf_matrix.csv\n",
"prelude_cities_tfidf_matrix.csv\n",
"gist_cities_tfidf_matrix.csv\n",
"trampoline_cities_tfidf_matrix.csv\n",
"aspect_cities_tfidf_matrix.csv\n",
"dungeon_cities_tfidf_matrix.csv\n",
"saxophone_cities_tfidf_matrix.csv\n",
"genre_cities_tfidf_matrix.csv\n",
"solace_cities_tfidf_matrix.csv\n",
"pencil_cities_tfidf_matrix.csv\n",
"innuendo_cities_tfidf_matrix.csv\n",
"breach_cities_tfidf_matrix.csv\n",
"interpretation_cities_tfidf_matrix.csv\n",
"symbol_cities_tfidf_matrix.csv\n",
"surfboard_cities_tfidf_matrix.csv\n",
"beauty_cities_tfidf_matrix.csv\n",
"satire_cities_tfidf_matrix.csv\n",
"umpire_cities_tfidf_matrix.csv\n",
"scale_cities_tfidf_matrix.csv\n",
"monitor_cities_tfidf_matrix.csv\n",
"tablet_cities_tfidf_matrix.csv\n",
"charger_cities_tfidf_matrix.csv\n",
"widow_cities_tfidf_matrix.csv\n",
"colleague_cities_tfidf_matrix.csv\n",
"odor_cities_tfidf_matrix.csv\n",
"lagoon_cities_tfidf_matrix.csv\n",
"flaw_cities_tfidf_matrix.csv\n",
"passion_cities_tfidf_matrix.csv\n",
"cauldron_cities_tfidf_matrix.csv\n",
"ritual_cities_tfidf_matrix.csv\n",
"truce_cities_tfidf_matrix.csv\n",
"deadline_cities_tfidf_matrix.csv\n",
"tax_cities_tfidf_matrix.csv\n",
"railing_cities_tfidf_matrix.csv\n",
"karma_cities_tfidf_matrix.csv\n",
"spaceship_cities_tfidf_matrix.csv\n",
"donation_cities_tfidf_matrix.csv\n",
"taboo_cities_tfidf_matrix.csv\n",
"landmark_cities_tfidf_matrix.csv\n",
"sarcasm_cities_tfidf_matrix.csv\n",
"speech_cities_tfidf_matrix.csv\n",
"instant_cities_tfidf_matrix.csv\n",
"route_cities_tfidf_matrix.csv\n",
"purse_cities_tfidf_matrix.csv\n",
"tornado_cities_tfidf_matrix.csv\n",
"plate_cities_tfidf_matrix.csv\n",
"relief_cities_tfidf_matrix.csv\n",
"motive_cities_tfidf_matrix.csv\n",
"existence_cities_tfidf_matrix.csv\n",
"fiction_cities_tfidf_matrix.csv\n",
"custom_cities_tfidf_matrix.csv\n",
"hypocrite_cities_tfidf_matrix.csv\n",
"circumstance_cities_tfidf_matrix.csv\n",
"lodge_cities_tfidf_matrix.csv\n",
"handkerchief_cities_tfidf_matrix.csv\n",
"virtue_cities_tfidf_matrix.csv\n",
"nickel_cities_tfidf_matrix.csv\n",
"silk_cities_tfidf_matrix.csv\n",
"hammer_cities_tfidf_matrix.csv\n",
"luxury_cities_tfidf_matrix.csv\n",
"ambition_cities_tfidf_matrix.csv\n",
"analysis_cities_tfidf_matrix.csv\n",
"poncho_cities_tfidf_matrix.csv\n",
"footprint_cities_tfidf_matrix.csv\n",
"future_cities_tfidf_matrix.csv\n",
"hint_cities_tfidf_matrix.csv\n",
"parcel_cities_tfidf_matrix.csv\n",
"bomb_cities_tfidf_matrix.csv\n",
"inertia_cities_tfidf_matrix.csv\n",
"journey_cities_tfidf_matrix.csv\n",
"tenure_cities_tfidf_matrix.csv\n",
"phantom_cities_tfidf_matrix.csv\n",
"beginning_cities_tfidf_matrix.csv\n",
"souvenir_cities_tfidf_matrix.csv\n",
"dose_cities_tfidf_matrix.csv\n",
"currency_cities_tfidf_matrix.csv\n",
"wisdom_cities_tfidf_matrix.csv\n",
"veteran_cities_tfidf_matrix.csv\n",
"principle_cities_tfidf_matrix.csv\n",
"nostalgia_cities_tfidf_matrix.csv\n",
"stethoscope_cities_tfidf_matrix.csv\n",
"keepsake_cities_tfidf_matrix.csv\n",
"ukulele_cities_tfidf_matrix.csv\n",
"ploy_cities_tfidf_matrix.csv\n",
"adage_cities_tfidf_matrix.csv\n",
"leverage_cities_tfidf_matrix.csv\n",
"valor_cities_tfidf_matrix.csv\n",
"vest_cities_tfidf_matrix.csv\n",
"ingredient_cities_tfidf_matrix.csv\n",
"razor_cities_tfidf_matrix.csv\n",
"transmission_cities_tfidf_matrix.csv\n",
"metaphor_cities_tfidf_matrix.csv\n",
"cluster_cities_tfidf_matrix.csv\n",
"uprising_cities_tfidf_matrix.csv\n",
"aptitude_cities_tfidf_matrix.csv\n",
"realm_cities_tfidf_matrix.csv\n",
"priority_cities_tfidf_matrix.csv\n",
"coat_cities_tfidf_matrix.csv\n",
"rumble_cities_tfidf_matrix.csv\n",
"wiretap_cities_tfidf_matrix.csv\n",
"creed_cities_tfidf_matrix.csv\n",
"magazine_cities_tfidf_matrix.csv\n",
"dynasty_cities_tfidf_matrix.csv\n",
"risk_cities_tfidf_matrix.csv\n",
"logic_cities_tfidf_matrix.csv\n",
"prize_cities_tfidf_matrix.csv\n",
"allure_cities_tfidf_matrix.csv\n",
"fence_cities_tfidf_matrix.csv\n",
"jargon_cities_tfidf_matrix.csv\n",
"vibe_cities_tfidf_matrix.csv\n",
"square_cities_tfidf_matrix.csv\n",
"autumn_cities_tfidf_matrix.csv\n",
"danger_cities_tfidf_matrix.csv\n",
"peak_cities_tfidf_matrix.csv\n",
"ecstasy_cities_tfidf_matrix.csv\n",
"society_cities_tfidf_matrix.csv\n",
"corduroy_cities_tfidf_matrix.csv\n",
"syringe_cities_tfidf_matrix.csv\n",
"ballet_cities_tfidf_matrix.csv\n",
"obituary_cities_tfidf_matrix.csv\n",
"reason_cities_tfidf_matrix.csv\n",
"ball_cities_tfidf_matrix.csv\n",
"utopia_cities_tfidf_matrix.csv\n",
"casket_cities_tfidf_matrix.csv\n",
"belief_cities_tfidf_matrix.csv\n",
"dilemma_cities_tfidf_matrix.csv\n",
"album_cities_tfidf_matrix.csv\n",
"institute_cities_tfidf_matrix.csv\n",
"heirloom_cities_tfidf_matrix.csv\n",
"frisbee_cities_tfidf_matrix.csv\n",
"origin_cities_tfidf_matrix.csv\n",
"compost_cities_tfidf_matrix.csv\n",
"tray_cities_tfidf_matrix.csv\n",
"gimmick_cities_tfidf_matrix.csv\n",
"cloak_cities_tfidf_matrix.csv\n",
"extinction_cities_tfidf_matrix.csv\n",
"league_cities_tfidf_matrix.csv\n",
"opinion_cities_tfidf_matrix.csv\n",
"siren_cities_tfidf_matrix.csv\n",
"premise_cities_tfidf_matrix.csv\n",
"glove_cities_tfidf_matrix.csv\n",
"ruse_cities_tfidf_matrix.csv\n",
"outcast_cities_tfidf_matrix.csv\n",
"pinnacle_cities_tfidf_matrix.csv\n",
"vendetta_cities_tfidf_matrix.csv\n",
"bottle_cities_tfidf_matrix.csv\n",
"strategy_cities_tfidf_matrix.csv\n",
"caution_cities_tfidf_matrix.csv\n",
"stairs_cities_tfidf_matrix.csv\n",
"noise_cities_tfidf_matrix.csv\n",
"stapler_cities_tfidf_matrix.csv\n",
"seatbelt_cities_tfidf_matrix.csv\n",
"threat_cities_tfidf_matrix.csv\n",
"poem_cities_tfidf_matrix.csv\n",
"integrity_cities_tfidf_matrix.csv\n",
"theme_cities_tfidf_matrix.csv\n",
"insight_cities_tfidf_matrix.csv\n",
"melody_cities_tfidf_matrix.csv\n",
"protocol_cities_tfidf_matrix.csv\n",
"anxiety_cities_tfidf_matrix.csv\n",
"comparison_cities_tfidf_matrix.csv\n",
"outfit_cities_tfidf_matrix.csv\n",
"purpose_cities_tfidf_matrix.csv\n",
"cockpit_cities_tfidf_matrix.csv\n",
"junk_cities_tfidf_matrix.csv\n",
"magnitude_cities_tfidf_matrix.csv\n",
"potion_cities_tfidf_matrix.csv\n",
"diamond_cities_tfidf_matrix.csv\n",
"napkin_cities_tfidf_matrix.csv\n",
"precedent_cities_tfidf_matrix.csv\n",
"plea_cities_tfidf_matrix.csv\n",
"landscape_cities_tfidf_matrix.csv\n",
"velocity_cities_tfidf_matrix.csv\n",
"silence_cities_tfidf_matrix.csv\n",
"clue_cities_tfidf_matrix.csv\n",
"spool_cities_tfidf_matrix.csv\n",
"value_cities_tfidf_matrix.csv\n",
"agenda_cities_tfidf_matrix.csv\n",
"adventure_cities_tfidf_matrix.csv\n",
"circus_cities_tfidf_matrix.csv\n",
"chamber_cities_tfidf_matrix.csv\n",
"void_cities_tfidf_matrix.csv\n",
"appetite_cities_tfidf_matrix.csv\n",
"boutique_cities_tfidf_matrix.csv\n",
"galaxy_cities_tfidf_matrix.csv\n",
"muscle_cities_tfidf_matrix.csv\n",
"display_cities_tfidf_matrix.csv\n",
"habit_cities_tfidf_matrix.csv\n",
"chord_cities_tfidf_matrix.csv\n",
"penance_cities_tfidf_matrix.csv\n",
"disco_cities_tfidf_matrix.csv\n",
"linoleum_cities_tfidf_matrix.csv\n",
"paddle_cities_tfidf_matrix.csv\n",
"conspiracy_cities_tfidf_matrix.csv\n",
"crystal_cities_tfidf_matrix.csv\n",
"hurdle_cities_tfidf_matrix.csv\n",
"suitcase_cities_tfidf_matrix.csv\n",
"religion_cities_tfidf_matrix.csv\n",
"perch_cities_tfidf_matrix.csv\n",
"cascade_cities_tfidf_matrix.csv\n",
"charity_cities_tfidf_matrix.csv\n",
"chivalry_cities_tfidf_matrix.csv\n",
"metropolis_cities_tfidf_matrix.csv\n",
"courage_cities_tfidf_matrix.csv\n",
"gaze_cities_tfidf_matrix.csv\n",
"esteem_cities_tfidf_matrix.csv\n",
"charm_cities_tfidf_matrix.csv\n",
"trend_cities_tfidf_matrix.csv\n",
"needle_cities_tfidf_matrix.csv\n",
"violence_cities_tfidf_matrix.csv\n",
"estate_cities_tfidf_matrix.csv\n",
"symptom_cities_tfidf_matrix.csv\n",
"chant_cities_tfidf_matrix.csv\n",
"knowledge_cities_tfidf_matrix.csv\n",
"situation_cities_tfidf_matrix.csv\n",
"plight_cities_tfidf_matrix.csv\n",
"oar_cities_tfidf_matrix.csv\n",
"epilepsy_cities_tfidf_matrix.csv\n",
"fork_cities_tfidf_matrix.csv\n",
"platform_cities_tfidf_matrix.csv\n",
"toothbrush_cities_tfidf_matrix.csv\n",
"scalpel_cities_tfidf_matrix.csv\n",
"essence_cities_tfidf_matrix.csv\n",
"bargain_cities_tfidf_matrix.csv\n",
"harbor_cities_tfidf_matrix.csv\n",
"decoy_cities_tfidf_matrix.csv\n",
"conflict_cities_tfidf_matrix.csv\n",
"joke_cities_tfidf_matrix.csv\n"
]
}
],
"source": [
"import glob\n",
"word_df = pd.read_csv(r\"Concreteness_ratings_Brysbaert_et_al_BRM.csv\", encoding =\"utf-8\")\n",
"path = \"/Users/qixia/Git/Charles_ContextEffectsOnAbstract_tweets/compilations/*.csv\"\n",
"path_len = len(path) - 5\n",
"for fname in glob.glob(path):\n",
" new_fname = \"\".join([fname[path_len:-4], \"_tfidf_matrix.csv\"])\n",
" print(new_fname)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 2
}