Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
data_fusion_p1/3) mapgpsdata.py
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
209 lines (165 sloc)
7.95 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
__author__ = 'Shweta' | |
import csv | |
import glob | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
import numpy as np | |
from datetime import date, timedelta, datetime | |
from pylab import scatter, show, title, xlabel, ylabel, plot, contour | |
import re | |
import os, time | |
import datetime | |
from collections import defaultdict | |
import itertools | |
import math | |
import numpy | |
########### check if difference between wifi and gps timestamp is within 5 minutes, time interval of 5 mins between gps and wifi data. | |
delta = 300000 | |
def median(lst): | |
return numpy.median(numpy.array(lst)) | |
###########This script looks for a corresponding latitude and longitude match in location data at a given timestamp for a given BSSID in wifi data. | |
############### both gps and wifi results are in milliseconds. | |
##################gps data and wifi data are not upsampled. | |
''' | |
andrd=pd.read_csv("matchedgps_androidcheck.csv") | |
andrd.columns=["Userid", "BSSID", "Longitude", "Latitude", "wifi_senseStartTimeMillis","gps_senseStartTimeMillis"] | |
uidand=andrd.Userid.unique() | |
uidandlst=uidand.tolist() | |
fz1 = open("matchedgps_andcheck1.csv", "w", encoding="utf8") # csv to write the results to. | |
#fz1 = open("matchedgps_androidcheck.csv", "a") # csv to write the results to. | |
cz1 = csv.writer(fz1) | |
''' | |
############ Runs for android data | |
fz1 = open("matchedgps_androidcheck.csv", "w", encoding="utf8") # csv to write the results to. | |
cz1 = csv.writer(fz1) | |
cz1.writerow(["Userid", "BSSID", "Longitude", "Latitude", "wifi_senseStartTimeMillis","gps_senseStartTimeMillis"]) | |
from collections import defaultdict | |
bssid_dict = defaultdict(list) | |
for files in glob.glob("android_wifiraw_remdup\\*.csv"): # wifi android data non-upsampled. | |
arr = files.split("\\") | |
uval = arr[1] | |
id = uval.split(".csv") | |
id1 = id[0] | |
id11 = id1 | |
print(id11) | |
columnswifi = defaultdict(list) | |
pathcluster = files | |
if (os.path.isfile(pathcluster)): | |
# reader = csv.DictReader(fq) # read rows into a dictionary format | |
with open(pathcluster) as fq: | |
reader = csv.DictReader(fq) | |
for row in reader: # read a row as {column1: value1, column2: value2,...} | |
for (k, v) in row.items(): # go over each column name and value | |
columnswifi[k].append(v) # | |
fq.close() | |
userdf = pd.read_csv(files) | |
############ remove duplicates and sort for wifi. | |
udf111 = userdf.drop_duplicates(['senseStartTimeMillis:'], keep='last') | |
# udf111.reset_index(drop=True) | |
udf11 = udf111.sort_values(by="senseStartTimeMillis:", ascending=[True]) # sometimes gives error. | |
udf11 = udf11.reset_index(drop=True) | |
qu = udf11['Userid'] | |
qb = udf11['BSSID:'] | |
qstate = udf11['State'] | |
qss = udf11['senseStartTimeMillis:'] | |
if (os.path.exists("android_remdup\\" + id11 + "_Location.csv")): #location data android. | |
sensedf10 = pd.read_csv("android_remdup\\" + id11 + "_Location.csv") ### gps data. | |
sensedf111 = sensedf10.drop_duplicates(['senseStartTimeMillis'], keep='last') | |
sensedf= sensedf111.sort_values(by="senseStartTimeMillis", ascending=[True]) # sometimes gives error. Location file. | |
sensedf = sensedf.reset_index(drop=True) | |
#temp = udf11 | |
s =qss # sensestarttimemillis. | |
for i, v in enumerate(s): # wifi record sensestarttime | |
nflist = [] | |
latl = [] | |
longl = [] | |
gpslist=[] | |
lattemp = pd.DataFrame() | |
longtemp = pd.DataFrame() | |
col = "" | |
stemp_1 = sensedf[abs(sensedf['senseStartTimeMillis'] - float(s[i])) <= delta] | |
#print(abs(sensedf['senseStartTimeMillis'] - float(s[i]))) | |
stemp=stemp_1[abs(stemp_1.senseStartTimeMillis-float(v))<=delta] | |
l1 = stemp.latitude | |
latl = l1.tolist() | |
m = 'mango' | |
if (m != 'mango'): # ignore this. | |
print('=====Not=====') | |
elif(len(latl)>0): # found a match, read latitude and longitude values within 10 minutes interval. | |
lo1 = (stemp.longitude) | |
longl = lo1.tolist() | |
gpstime=stemp.senseStartTimeMillis # sensestarttime for gps. | |
gpslist=gpstime.tolist() | |
if (int(qstate[i])==1): | |
for ix, val in enumerate(longl): | |
cz1.writerow([qu[i], qb[i], longl[ix], latl[ix],s[i],gpslist[ix]]) # writing to a file. | |
else: | |
print('Doesnot exist '+'android_remdup\\' + id11 + '_Location.csv') | |
fz1.close() | |
################################################################################################################################## | |
fz1 = open("matchedgps_ioscheck.csv", "w", encoding="utf8") # csv to write the results to. | |
cz1 = csv.writer(fz1) | |
cz1.writerow(["Userid", "BSSID", "Longitude", "Latitude", "wifi_senseStartTimeMillis","gps_senseStartTimeMillis"]) | |
from collections import defaultdict | |
bssid_dict = defaultdict(list) | |
for files in glob.glob("ios_wifiraw_remdup\\*.csv"): # wifi android data non-upsampled. | |
arr = files.split("\\") | |
uval = arr[1] | |
id = uval.split(".csv") | |
id1 = id[0] | |
id11 = id1 | |
print(id11) | |
columnswifi = defaultdict(list) | |
pathcluster = files | |
if (os.path.isfile(pathcluster)): | |
# reader = csv.DictReader(fq) # read rows into a dictionary format | |
with open(pathcluster) as fq: | |
reader = csv.DictReader(fq) | |
for row in reader: # read a row as {column1: value1, column2: value2,...} | |
for (k, v) in row.items(): # go over each column name and value | |
columnswifi[k].append(v) # | |
fq.close() | |
userdf = pd.read_csv(files) | |
############ remove duplicates and sort for wifi. | |
udf111 = userdf.drop_duplicates(['senseStartTimeMillis:'], keep='last') | |
# udf111.reset_index(drop=True) | |
udf11 = udf111.sort_values(by="senseStartTimeMillis:", ascending=[True]) # sometimes gives error. | |
udf11 = udf11.reset_index(drop=True) | |
qu = udf11['Userid'] | |
qb = udf11['BSSID:'] | |
qstate = udf11['State'] | |
qss = udf11['senseStartTimeMillis:'] | |
if (os.path.exists("ios_remdup\\" + id11 + "_Location.csv")): #location data android. | |
sensedf10 = pd.read_csv("ios_remdup\\" + id11 + "_Location.csv") ### gps data. | |
sensedf111 = sensedf10.drop_duplicates(['senseStartTimeMillis'], keep='last') | |
sensedf= sensedf111.sort_values(by="senseStartTimeMillis", ascending=[True]) # sometimes gives error. Location file. | |
sensedf = sensedf.reset_index(drop=True) | |
#temp = udf11 | |
s =qss # sensestarttimemillis. | |
for i, v in enumerate(s): # wifi record sensestarttime | |
nflist = [] | |
latl = [] | |
longl = [] | |
gpslist=[] | |
lattemp = pd.DataFrame() | |
longtemp = pd.DataFrame() | |
col = "" | |
stemp_1 = sensedf[abs(sensedf['senseStartTimeMillis'] - float(s[i])) <= delta] ########### check if difference between wifi and gps timestamp is within 10 minutes, time interval of 10 mins between gps and wifi data. | |
#print(abs(sensedf['senseStartTimeMillis'] - float(s[i]))) | |
stemp=stemp_1[abs(stemp_1.senseStartTimeMillis-float(v))<=delta] | |
l1 = stemp.latitude | |
latl = l1.tolist() | |
m = 'mango' | |
if (m != 'mango'): # ignore this. | |
print('=====Not=====') | |
elif(len(latl)>0): # found a match, read latitude and longitude values within 10 minutes interval. | |
lo1 = (stemp.longitude) | |
longl = lo1.tolist() | |
gpstime=stemp.senseStartTimeMillis # sensestarttime for gps. | |
gpslist=gpstime.tolist() | |
if (int(qstate[i])==1): | |
for ix, val in enumerate(longl): | |
cz1.writerow([qu[i], qb[i], longl[ix], latl[ix],s[i],gpslist[ix]]) # writing to a file. | |
else: | |
print('Doesnot exist '+'ios_remdup\\' + id11 + '_Location.csv') | |
fz1.close() |