Skip to content
Permalink
master
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
%% This file generates features for GPS - This file should work for both Android and iOS
% Used for feature extraction for the WH paper
%% Basic stuff
function [re rergw]=feature_extract_shweta(eps_p,minpts_p,time_di,os,nday)
epochtime = 1443672000000; % 2015/10/01 12:00 am, used to calculate hour of the day quickly
allneed = [];
if strcmp(os,'ios')
load('gps_feature_ios_up_shweta.mat'); % Supports both Android and iOS; for iOS load: gps_feature_ios.mat
interval = 1;
tthreshold=600; % set threshold to calculate time spent
wifi_tthreshold=600;
else
load('gps_feature_and_up_shweta.mat');
interval = 1;
tthreshold=600;
wifi_tthreshold=600;
end
% Initializations
c = 0;
time_d = time_di
for eps = eps_p%0.0001%:0.0001:0.001
for minpts = minpts_p %25 %2:1:30 % for iOS set to 60:60:420
c = c+1
%%
% Read data - for all users one by one
re=[];
for z = 1:size(gpsData,2) % Last column has users id
z
for iii=[0 ]
userData = gpsData{z};
userData(find(userData(:,11)==3),:)=[];
if (length(userData) == 0) % if no data found, ignore and move one
continue;
end
% in case the time is in microsecond
if userData(1,4)>1000000000000
userData(:,4) = userData(:,4)/1000;
end
if userData(1,1)>1000000000000
userData(:,1) = userData(:,1)/1000;
end
% senate check
periodts = userData(1,1);
useris = userData(1,6);
userData = userData(find(userData(:,4)>userData(1,1)-nday*3600*24*1000),:);
if (length(userData) == 0) % if no data found, ignore and move one
continue;
end
ind = find(userData(:,4)<=userData(1,1));
if length(ind)==0
continue
end
userData = userData(ind,:);
if (length(userData) == 0) % if no data found, ignore and move one
continue;
end
if length(userData(find(userData(:,11)==-1),1)) >= 100 % Atlease 100 samples for a window
dc = 1;
for di = 2:length(userData) % Day count in a specific PHQ9 period
if userData(di,9) ~= userData(di-1,9)
dc=dc+1;
end
end
gpsFeatures(z,16) = length(userData)/(dc*24*60/interval); % Time coverage in terms of percentage
% to get depress label
tdepress = userData(1,10);
depress = 1;
if (tdepress == 1 | tdepress == 2)%
depress = -1;
end
gpsFeatures(z,15) = depress; % clinical ground truth
gpsFeatures(z,14) = dc; % day count
gpsFeatures(z,13) = userData(1,6); % uid
ind = find(userData(:,7)~=-1);
gpsFeatures(z,12) = userData(ind(1),7); % phq9
gpsFeatures(z,1) = userData(1,1); % start time
userData = [userData(:,4) userData(:,2) userData(:,3) userData(:,5) userData(:,9) userData(:,11)]; % [timestamp, lat, long, moving/stat, weekday]
%% Time spent calculation (preprocessing step)
% Calculate how much time user spent on a perticular location
% Traverse through all location samples
timeSpent = []; % clear timeSpent
timeSpent(1,1) = tthreshold;
for i = 2:size(userData,1)
latDiff =abs(userData(i-1,2) - userData(i,2));
longDiff = abs(userData(i-1,3) - userData(i,3));
timeSpent(i,1) = userData(i,1) - userData(i-1,1) ;
if timeSpent(i,1) < 0
i
disp('something is fishy, see code line 59');
pause(5); % pause, in case timespent seems incorrect
end
if userData(i-1,6)==-2
if timeSpent(i,1) > wifi_tthreshold % missing data case
timeSpent(i,1) = wifi_tthreshold; % android: 10 minutes ios: 1 minute
end
else
if timeSpent(i,1) > tthreshold % missing data case
timeSpent(i,1) = tthreshold; % android: 10 minutes ios: 1 minute
end
end
end % end for - this will give us time spent b/w consecutive long/lat
userData(:, end+1) = timeSpent; % add as a column % [timestamp, lat, long, moving/stat, weekday mins_between_traces]
r1=sum(userData(:,end))/(dc*24*60*60);
r2=sum(userData(find(userData(:,6)==-1),end))/(dc*86400);
% re=[re;[r1 r2]];
gpsFeatures(z,21)=r1;
gpsFeatures(z,22)=r2;
if iii
userData = userData(find(userData(:,6) == -1),:); % keep only rows where timeSpent > 0
end
%% Feature 1 = Variance
% First feature - Location variance - that measures the variability in the
% subject's GPS location - For stationary user only
% Get the user data where state is stationary i.e 0
ind = find(userData(:,4) == 0 ); % 0 = Stationary points; 1 = Moving points
userDataStat = userData(ind,:); % [timestamp, lat, long, moving/stat, weekday mins_between_traces]
% Calculate statistical variance of longitude and latitude
varLat = var(userDataStat(:,2));
varLong = var(userDataStat(:,3));
locVar = log(varLat + varLong); % Nautral Log
gpsFeatures(z,2) = locVar;
%% Feature 2 = Entropy
% Entropy - to measure the variability of the time the subject spent at a
% location cluster
% Clustering used = dbScan
if time_d==0
[class,type]= dbscan([userDataStat(:,2:3)], minpts, eps); % Apply dbscan without time dimension
else
timeInfo = mod(userDataStat(:,1)-userDataStat(1,1),86400000) * eps / 3600000; % convert to hours then time eps
%[class,type]= dbscan([userDataStat(:,2:3)], minpts, eps); % Apply dbscan
[class,type]= dbscan([userDataStat(:,2:3) timeInfo], minpts, eps); % Apply dbscan with time dimension
end
userDataStat(:,end+1) = class'; % Cluster label % [timestamp, lat, long, moving/stat, weekday mins_between_traces cluster#]
eind1 = size(userDataStat,2); % This index have cluster number (-1 = outlier)
userDataStat(:,end+1) = type'; % Boundry or outlier etc % [timestamp, lat, long, moving/stat, weekday mins_between_traces cluster# type]
eind2 = size(userDataStat,2); % This index have cluster type info
% Remove noise/outlier points i.e., type = -1
indClus = find(userDataStat(:,eind1) ~= -1); % -1 Out - ASMA UNCOMMENTED
userDataStat = userDataStat(indClus,:);
ix = size(userDataStat,2);
% Calculate Entropy
ent = 0;
% gpsTime = datestr(userDataStat(:,1)/86400000 + datenum(1970,1,1)); % Get the time info
%t = datetime(gpsTime, 'Format', 'dd/M/yy HH:mm:ss');
%t = datevec(t); % 4th column have hour
t = [zeros(length(userDataStat(:,1)),3) mod((userDataStat(:,1)*1000-epochtime)/3600000,24)];
col = size(userDataStat,2);
userDataStat(:,end+1) = t(:,4);
eind3 = size(userDataStat,2); % This index have hour info
%% Feature 4 = Number of Unique Clusters
% Number of unique clusters
uniClus = unique(userDataStat(:,eind1)); % Cluster number % ASMA CHANGED
I = size(uniClus,1); % Number of unique clusters
gpsFeatures(z,10) = I; % outlier already removed from cluster
if iii==0
baseone = ones(length(userDataStat(:,1)),1);
allneed=[allneed;baseone*useris userDataStat(:,1) baseone*periodts userDataStat(:,eind1)];
end
clearvars -except eps_p allneed epochtime nday gpsonly minpts_p wifi_tthreshold nightcluster os interval time_d starttime eps minpts c tthreshold z gpsData gpsFeatures re %tsumTime centers sumTime userData timeSpent latDiff longDiff ind varLat varLong locVar LongLatStat idx C clususerDataStat clus1Ind clus2Ind clus3Ind timeClus1 timeClus2 timeClus3 i gpsTime homeTime t sumdistkm movingSamples col ent latLong1 latLong2 d1km d2km AllSamples
end
end
end
end
end
cHeader = {'Userid' 'timestamp' 'PHQ9 Date' 'Cluster_ID'}; %dummy header
commaHeader = [cHeader;repmat({','},1,numel(cHeader))]; %insert commaas
commaHeader = commaHeader(:)';
textHeader = cell2mat(commaHeader); %cHeader in text with commas
%write header to file
fid = fopen(['phase1' os '_clusters_new.csv'],'w');
fprintf(fid,'%s\n',textHeader)
fclose(fid)
dlmwrite(['phase1' os '_clusters_new.csv'],allneed,'-append','precision','%.0f');