Skip to content
Permalink
master
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
%% This file generates features for GPS -
% Approach is as described in North Western study
%%
clc, clear all, close all
% Load gps data cell
load 'data\gps_feature.mat' % Last cell represents the user ids
%%
% Read data - change to loop later, for all users
for z = 1:(size(gpsData,2) - 1) % Last column has users id
userData = gpsData{z};
%%
% Calculate how much time user spent on a perticular location
% Traverse through all location samples
% clear timeSpent
for i = 1:size(userData,1)
if i == 1
timeSpent(i,1) = 10; % At the start, time = 10 (sensing frequency)
else % See the previous long lat difference and current travel
latDiff =abs(userData(i-1,2) - userData(i,2));
longDiff = abs(userData(i-1,3) - userData(i,3));
if (latDiff < 1 & longDiff < 1 & userData(i,4) == 0) % Combine case
timeSpent(i,1) = (userData(i,1)/86400 + datenum(1970,1,1)) - (userData(i-1,1)/86400 + datenum(1970,1,1));
timeSpent(i,1) = timeSpent(i,1) * 24 * 60; % to get minutes
elseif (latDiff >= 1 | longDiff >= 1 | userData(i,4) == 1) % New ts
timeSpent(i,1) = 10; % 10 minutes here
else
timeSpent(i,1) = userData(i,4);
end
end
end
userData(:, end+1) = timeSpent;
%%
% First feature - Location variance - that measures the variability in the
% subject's GPS location - For stationary user only
% Get the user data where state is stationary i.e 0
ind = find(userData(:,4) == 0);
userDataStat = userData(ind,:);
% Calculate statistical variance of longitude and latitude
varLat = var(userDataStat(:,2));
varLong = var(userDataStat(:,3));
locVar = (varLat + varLong); %Nautral Log
gpsFeatures(z,1) = locVar;
%%
% Entropy - to measure the variability of the time the subject spent at a
% location cluster
% First identify clusters in location - use only stationary state data
LongLatStat = [userDataStat(:,2) userDataStat(:,3)];
kmax = 10;
for k = 1: kmax
[IDX, C, SUMD] = kmeans(LongLatStat, k); %returns the within-cluster sums of point-to-centroid distances in the K-by-1 vector sumD.
Sum (k) = sum(SUMD);
end
Sum2 = Sum; Sum2(1) = [];Sum2(kmax) = Sum(kmax);
Diff = Sum - Sum2;
Diff(kmax) = [];
[Y, I] = min(Diff); % I is the proper number of k-means
[idx, C] = kmeans(LongLatStat, I);
gpsFeatures(z, 11) = I;
% [idx,C] = kmeans(LongLatStat, 3);
% Add cluster number to data
clususerDataStat = [userDataStat(:,:) idx];
% Percentage of time spent in the cluster - ClusterLoc/Overall time
clus1Ind = find(clususerDataStat(:,end) == 1);
timeClus1 = sum(clususerDataStat(clus1Ind, 5))/sum(clususerDataStat(:, 5));
clus2Ind = find(clususerDataStat(:,end) == 2);
timeClus2 = sum(clususerDataStat(clus2Ind, 5))/sum(clususerDataStat(:, 5));
clus3Ind = find(clususerDataStat(:,end) == 3);
timeClus3 = sum(clususerDataStat(clus3Ind, 5))/sum(clususerDataStat(:, 5));
% Add user id with timeClusters
gpsFeatures(z,2:4) = [timeClus1, timeClus2, timeClus3];
% Calculate Entropy
ent = 0;
for i = 1:3
ent = ent + (gpsFeatures(z,i+1) * (log(gpsFeatures(z,i+1))));
end
ent = -ent;
gpsFeatures(z,5) = ent;
%%
% Normalized entropy
gpsFeatures(z,6) = gpsFeatures(z,5)/(log(3)); % N = 3, the total number of clusters
%%
% Home stay - cluster which have the time between 12am to 6am
gpsTime = datestr(clususerDataStat(:,1)/86400 + datenum(1970,1,1));
t = datetime(gpsTime, 'Format', 'dd/M/yy HH:mm:ss');
t = datevec(t); % 4th column have hour
col = size(clususerDataStat,2);
for i = 1:size(t,1) % Lets home = 0
if(t(i,4) >= 00 & t(i,4) < 6)
clususerDataStat(i, col+1) = 0;
else
clususerDataStat(i, col+1) = 1; %Not at home
end
end
% ----------------------------------------------------------------------
% As of now, our cluster 1 is major but there is an overlap i.e. both
% cluster 2 and 3 have the home stay time as well - probably because of
% small campus
% ----------------------------------------------------------------------
% To find the percentage of time = total time@Home/total tile
homeTime = find(clususerDataStat(:,end) == 0);
gpsFeatures(z,7) = sum(clususerDataStat(homeTime,5))/sum(clususerDataStat(:,5));
%%
% Circadian movement ?
%%
% Transition time = number of samples in transition/ total number of
% samples
movingSamples = size(find(userData(:,4) == 1),1);
AllSamples = size(find(userData(:,4) ~= 1),1);
gpsFeatures(z,8) = movingSamples/(movingSamples + AllSamples);
%%
% Total distance traveled in km
sumdistkm = 0;
for i = 1:(size(userData,1)-1)
latLong1 = [userData(i,2) userData(i,3)];
latLong2 = [userData(i+1,2) userData(i+1,3)];
[d1km d2km]=lldistkm(latLong1,latLong2);
sumdistkm = sumdistkm + d1km;
end
gpsFeatures(z,9) = sumdistkm;
clear userData timeSpent latDiff longDiff ind userDataStat varLat varLong locVar LongLatStat idx C clususerDataStat clus1Ind clus2Ind clus3Ind timeClus1 timeClus2 timeClus3 i gpsTime homeTime t sumdistkm movingSamples col ent latLong1 latLong2 d1km d2km AllSamples
end
gpsFeatures(:,10) = gpsData{z+1};
%%
% Write down the GPS Features
save('\data\gps_Processesed_feature.mat','gpsFeatures');