Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Sensor-Depression/gpsFExtraction.m
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
130 lines (125 sloc)
5.44 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
%% This file generates features for GPS - | |
% Approach is as described in North Western study | |
%% | |
clc, clear all, close all | |
% Load gps data cell | |
load 'data\gps_feature.mat' % Last cell represents the user ids | |
%% | |
% Read data - change to loop later, for all users | |
for z = 1:(size(gpsData,2) - 1) % Last column has users id | |
userData = gpsData{z}; | |
%% | |
% Calculate how much time user spent on a perticular location | |
% Traverse through all location samples | |
% clear timeSpent | |
for i = 1:size(userData,1) | |
if i == 1 | |
timeSpent(i,1) = 10; % At the start, time = 10 (sensing frequency) | |
else % See the previous long lat difference and current travel | |
latDiff =abs(userData(i-1,2) - userData(i,2)); | |
longDiff = abs(userData(i-1,3) - userData(i,3)); | |
if (latDiff < 1 & longDiff < 1 & userData(i,4) == 0) % Combine case | |
timeSpent(i,1) = (userData(i,1)/86400 + datenum(1970,1,1)) - (userData(i-1,1)/86400 + datenum(1970,1,1)); | |
timeSpent(i,1) = timeSpent(i,1) * 24 * 60; % to get minutes | |
elseif (latDiff >= 1 | longDiff >= 1 | userData(i,4) == 1) % New ts | |
timeSpent(i,1) = 10; % 10 minutes here | |
else | |
timeSpent(i,1) = userData(i,4); | |
end | |
end | |
end | |
userData(:, end+1) = timeSpent; | |
%% | |
% First feature - Location variance - that measures the variability in the | |
% subject's GPS location - For stationary user only | |
% Get the user data where state is stationary i.e 0 | |
ind = find(userData(:,4) == 0); | |
userDataStat = userData(ind,:); | |
% Calculate statistical variance of longitude and latitude | |
varLat = var(userDataStat(:,2)); | |
varLong = var(userDataStat(:,3)); | |
locVar = (varLat + varLong); %Nautral Log | |
gpsFeatures(z,1) = locVar; | |
%% | |
% Entropy - to measure the variability of the time the subject spent at a | |
% location cluster | |
% First identify clusters in location - use only stationary state data | |
LongLatStat = [userDataStat(:,2) userDataStat(:,3)]; | |
kmax = 10; | |
for k = 1: kmax | |
[IDX, C, SUMD] = kmeans(LongLatStat, k); %returns the within-cluster sums of point-to-centroid distances in the K-by-1 vector sumD. | |
Sum (k) = sum(SUMD); | |
end | |
Sum2 = Sum; Sum2(1) = [];Sum2(kmax) = Sum(kmax); | |
Diff = Sum - Sum2; | |
Diff(kmax) = []; | |
[Y, I] = min(Diff); % I is the proper number of k-means | |
[idx, C] = kmeans(LongLatStat, I); | |
gpsFeatures(z, 11) = I; | |
% [idx,C] = kmeans(LongLatStat, 3); | |
% Add cluster number to data | |
clususerDataStat = [userDataStat(:,:) idx]; | |
% Percentage of time spent in the cluster - ClusterLoc/Overall time | |
clus1Ind = find(clususerDataStat(:,end) == 1); | |
timeClus1 = sum(clususerDataStat(clus1Ind, 5))/sum(clususerDataStat(:, 5)); | |
clus2Ind = find(clususerDataStat(:,end) == 2); | |
timeClus2 = sum(clususerDataStat(clus2Ind, 5))/sum(clususerDataStat(:, 5)); | |
clus3Ind = find(clususerDataStat(:,end) == 3); | |
timeClus3 = sum(clususerDataStat(clus3Ind, 5))/sum(clususerDataStat(:, 5)); | |
% Add user id with timeClusters | |
gpsFeatures(z,2:4) = [timeClus1, timeClus2, timeClus3]; | |
% Calculate Entropy | |
ent = 0; | |
for i = 1:3 | |
ent = ent + (gpsFeatures(z,i+1) * (log(gpsFeatures(z,i+1)))); | |
end | |
ent = -ent; | |
gpsFeatures(z,5) = ent; | |
%% | |
% Normalized entropy | |
gpsFeatures(z,6) = gpsFeatures(z,5)/(log(3)); % N = 3, the total number of clusters | |
%% | |
% Home stay - cluster which have the time between 12am to 6am | |
gpsTime = datestr(clususerDataStat(:,1)/86400 + datenum(1970,1,1)); | |
t = datetime(gpsTime, 'Format', 'dd/M/yy HH:mm:ss'); | |
t = datevec(t); % 4th column have hour | |
col = size(clususerDataStat,2); | |
for i = 1:size(t,1) % Lets home = 0 | |
if(t(i,4) >= 00 & t(i,4) < 6) | |
clususerDataStat(i, col+1) = 0; | |
else | |
clususerDataStat(i, col+1) = 1; %Not at home | |
end | |
end | |
% ---------------------------------------------------------------------- | |
% As of now, our cluster 1 is major but there is an overlap i.e. both | |
% cluster 2 and 3 have the home stay time as well - probably because of | |
% small campus | |
% ---------------------------------------------------------------------- | |
% To find the percentage of time = total time@Home/total tile | |
homeTime = find(clususerDataStat(:,end) == 0); | |
gpsFeatures(z,7) = sum(clususerDataStat(homeTime,5))/sum(clususerDataStat(:,5)); | |
%% | |
% Circadian movement ? | |
%% | |
% Transition time = number of samples in transition/ total number of | |
% samples | |
movingSamples = size(find(userData(:,4) == 1),1); | |
AllSamples = size(find(userData(:,4) ~= 1),1); | |
gpsFeatures(z,8) = movingSamples/(movingSamples + AllSamples); | |
%% | |
% Total distance traveled in km | |
sumdistkm = 0; | |
for i = 1:(size(userData,1)-1) | |
latLong1 = [userData(i,2) userData(i,3)]; | |
latLong2 = [userData(i+1,2) userData(i+1,3)]; | |
[d1km d2km]=lldistkm(latLong1,latLong2); | |
sumdistkm = sumdistkm + d1km; | |
end | |
gpsFeatures(z,9) = sumdistkm; | |
clear userData timeSpent latDiff longDiff ind userDataStat varLat varLong locVar LongLatStat idx C clususerDataStat clus1Ind clus2Ind clus3Ind timeClus1 timeClus2 timeClus3 i gpsTime homeTime t sumdistkm movingSamples col ent latLong1 latLong2 d1km d2km AllSamples | |
end | |
gpsFeatures(:,10) = gpsData{z+1}; | |
%% | |
% Write down the GPS Features | |
save('\data\gps_Processesed_feature.mat','gpsFeatures'); |