Skip to content
Permalink
master
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
__author__ = 'Shweta'
import csv
import glob
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from datetime import date, timedelta, datetime
from pylab import scatter, show, title, xlabel, ylabel, plot, contour
import re
import os, time
import datetime
from collections import defaultdict
import itertools
import math
import numpy
########### check if difference between wifi and gps timestamp is within 5 minutes, time interval of 5 mins between gps and wifi data.
delta = 300000
def median(lst):
return numpy.median(numpy.array(lst))
###########This script looks for a corresponding latitude and longitude match in location data at a given timestamp for a given BSSID in wifi data.
############### both gps and wifi results are in milliseconds.
##################gps data and wifi data are not upsampled.
'''
andrd=pd.read_csv("matchedgps_androidcheck.csv")
andrd.columns=["Userid", "BSSID", "Longitude", "Latitude", "wifi_senseStartTimeMillis","gps_senseStartTimeMillis"]
uidand=andrd.Userid.unique()
uidandlst=uidand.tolist()
fz1 = open("matchedgps_andcheck1.csv", "w", encoding="utf8") # csv to write the results to.
#fz1 = open("matchedgps_androidcheck.csv", "a") # csv to write the results to.
cz1 = csv.writer(fz1)
'''
############ Runs for android data
fz1 = open("matchedgps_androidcheck.csv", "w", encoding="utf8") # csv to write the results to.
cz1 = csv.writer(fz1)
cz1.writerow(["Userid", "BSSID", "Longitude", "Latitude", "wifi_senseStartTimeMillis","gps_senseStartTimeMillis"])
from collections import defaultdict
bssid_dict = defaultdict(list)
for files in glob.glob("android_wifiraw_remdup\\*.csv"): # wifi android data non-upsampled.
arr = files.split("\\")
uval = arr[1]
id = uval.split(".csv")
id1 = id[0]
id11 = id1
print(id11)
columnswifi = defaultdict(list)
pathcluster = files
if (os.path.isfile(pathcluster)):
# reader = csv.DictReader(fq) # read rows into a dictionary format
with open(pathcluster) as fq:
reader = csv.DictReader(fq)
for row in reader: # read a row as {column1: value1, column2: value2,...}
for (k, v) in row.items(): # go over each column name and value
columnswifi[k].append(v) #
fq.close()
userdf = pd.read_csv(files)
############ remove duplicates and sort for wifi.
udf111 = userdf.drop_duplicates(['senseStartTimeMillis:'], keep='last')
# udf111.reset_index(drop=True)
udf11 = udf111.sort_values(by="senseStartTimeMillis:", ascending=[True]) # sometimes gives error.
udf11 = udf11.reset_index(drop=True)
qu = udf11['Userid']
qb = udf11['BSSID:']
qstate = udf11['State']
qss = udf11['senseStartTimeMillis:']
if (os.path.exists("android_remdup\\" + id11 + "_Location.csv")): #location data android.
sensedf10 = pd.read_csv("android_remdup\\" + id11 + "_Location.csv") ### gps data.
sensedf111 = sensedf10.drop_duplicates(['senseStartTimeMillis'], keep='last')
sensedf= sensedf111.sort_values(by="senseStartTimeMillis", ascending=[True]) # sometimes gives error. Location file.
sensedf = sensedf.reset_index(drop=True)
#temp = udf11
s =qss # sensestarttimemillis.
for i, v in enumerate(s): # wifi record sensestarttime
nflist = []
latl = []
longl = []
gpslist=[]
lattemp = pd.DataFrame()
longtemp = pd.DataFrame()
col = ""
stemp_1 = sensedf[abs(sensedf['senseStartTimeMillis'] - float(s[i])) <= delta]
#print(abs(sensedf['senseStartTimeMillis'] - float(s[i])))
stemp=stemp_1[abs(stemp_1.senseStartTimeMillis-float(v))<=delta]
l1 = stemp.latitude
latl = l1.tolist()
m = 'mango'
if (m != 'mango'): # ignore this.
print('=====Not=====')
elif(len(latl)>0): # found a match, read latitude and longitude values within 10 minutes interval.
lo1 = (stemp.longitude)
longl = lo1.tolist()
gpstime=stemp.senseStartTimeMillis # sensestarttime for gps.
gpslist=gpstime.tolist()
if (int(qstate[i])==1):
for ix, val in enumerate(longl):
cz1.writerow([qu[i], qb[i], longl[ix], latl[ix],s[i],gpslist[ix]]) # writing to a file.
else:
print('Doesnot exist '+'android_remdup\\' + id11 + '_Location.csv')
fz1.close()
##################################################################################################################################
fz1 = open("matchedgps_ioscheck.csv", "w", encoding="utf8") # csv to write the results to.
cz1 = csv.writer(fz1)
cz1.writerow(["Userid", "BSSID", "Longitude", "Latitude", "wifi_senseStartTimeMillis","gps_senseStartTimeMillis"])
from collections import defaultdict
bssid_dict = defaultdict(list)
for files in glob.glob("ios_wifiraw_remdup\\*.csv"): # wifi android data non-upsampled.
arr = files.split("\\")
uval = arr[1]
id = uval.split(".csv")
id1 = id[0]
id11 = id1
print(id11)
columnswifi = defaultdict(list)
pathcluster = files
if (os.path.isfile(pathcluster)):
# reader = csv.DictReader(fq) # read rows into a dictionary format
with open(pathcluster) as fq:
reader = csv.DictReader(fq)
for row in reader: # read a row as {column1: value1, column2: value2,...}
for (k, v) in row.items(): # go over each column name and value
columnswifi[k].append(v) #
fq.close()
userdf = pd.read_csv(files)
############ remove duplicates and sort for wifi.
udf111 = userdf.drop_duplicates(['senseStartTimeMillis:'], keep='last')
# udf111.reset_index(drop=True)
udf11 = udf111.sort_values(by="senseStartTimeMillis:", ascending=[True]) # sometimes gives error.
udf11 = udf11.reset_index(drop=True)
qu = udf11['Userid']
qb = udf11['BSSID:']
qstate = udf11['State']
qss = udf11['senseStartTimeMillis:']
if (os.path.exists("ios_remdup\\" + id11 + "_Location.csv")): #location data android.
sensedf10 = pd.read_csv("ios_remdup\\" + id11 + "_Location.csv") ### gps data.
sensedf111 = sensedf10.drop_duplicates(['senseStartTimeMillis'], keep='last')
sensedf= sensedf111.sort_values(by="senseStartTimeMillis", ascending=[True]) # sometimes gives error. Location file.
sensedf = sensedf.reset_index(drop=True)
#temp = udf11
s =qss # sensestarttimemillis.
for i, v in enumerate(s): # wifi record sensestarttime
nflist = []
latl = []
longl = []
gpslist=[]
lattemp = pd.DataFrame()
longtemp = pd.DataFrame()
col = ""
stemp_1 = sensedf[abs(sensedf['senseStartTimeMillis'] - float(s[i])) <= delta] ########### check if difference between wifi and gps timestamp is within 10 minutes, time interval of 10 mins between gps and wifi data.
#print(abs(sensedf['senseStartTimeMillis'] - float(s[i])))
stemp=stemp_1[abs(stemp_1.senseStartTimeMillis-float(v))<=delta]
l1 = stemp.latitude
latl = l1.tolist()
m = 'mango'
if (m != 'mango'): # ignore this.
print('=====Not=====')
elif(len(latl)>0): # found a match, read latitude and longitude values within 10 minutes interval.
lo1 = (stemp.longitude)
longl = lo1.tolist()
gpstime=stemp.senseStartTimeMillis # sensestarttime for gps.
gpslist=gpstime.tolist()
if (int(qstate[i])==1):
for ix, val in enumerate(longl):
cz1.writerow([qu[i], qb[i], longl[ix], latl[ix],s[i],gpslist[ix]]) # writing to a file.
else:
print('Doesnot exist '+'ios_remdup\\' + id11 + '_Location.csv')
fz1.close()