Skip to content
Permalink
master
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
%%
clc, clear all, close all
%%
% Load Data
load 'data\conv_feature.mat'
load 'data\activity_feature.mat'
load 'data\dark_feature.mat'
load 'data\phonecharge_feature.mat'
load 'data\phonelock_feature.mat'
load 'data\audio_feature.mat'
load 'data\pre_PHQ9.mat'
load 'data\post_PHQ9.mat'
% Path setting
prefix='C:\Users\aaf12005\Desktop\Jins ALG\Depression_2\';
addpath([prefix '\utility_code_1\clustering\ssvd-code']);
addpath([prefix '\utility_code_1\clustering\spectralCoClustering']);
addpath([prefix '\utility_code_1\clustering']);
addpath([prefix '\utility_code_1\short_functions']);
addpath([prefix '\utility_code_1\InfoTheory']);
addpath([prefix '\utility_code_1\kernel']);
addpath([prefix '\utility_code_1\evaluate']);
addpath([prefix '\utility_code_1\code']);
addpath([prefix '\code\exsiting_algorithm']);
addpath([prefix '\code']);
addpath([prefix '\proximal_2']);
%% read the raw data for activity, conversation, dark, audio, phonelock records and give the average on day values as view1. Each row of view1 is an instance from the population.
% avaerage one user/day
view1(:, 1) = average_c(activity_feature, 3); % for all 49 user get 1 value
view1(:, 2) = average_c(conv_feature, 3);
view1(:, 3) = average_c(dark_feature, 3);
view1(:, 4) = average_c(dark_feature, 4);
view1(:, 5) = average_c(conv_feature, 2);
view1(:, 6) = average_c(activity_feature, 4);
view1(:, 7) = average_c(activity_feature, 5);
view1(:, 8) = average_c(audio_feature, 3);
view1(:, 9) = average_c(audio_feature, 4);
view1(:, 10) = average_c(audio_feature, 5);
view1(:, 11) = average_c(phonelock_feature, 2);
view1(:, 12) = average_c(phonelock_feature, 3);
%% read the sine parameters which fit the time series data of activity, audio, conversation, phonelock as view2.
view2(:, 1:4) = denoising_wl_sin(activity_feature, 3, [500 20 3 700]); % amp, phase, intercept and freq ///for all 49 user get 4 value
view2(:, 5:8) = denoising_wl_sin(audio_feature, 3, [500 20 3 700]);
view2(:, 9:12) = denoising_wl_sin(conv_feature, 3, [500 20 3 700]);
% view2(:, 13:16) = denoising_wl_sin(phonelock_feature, 3);
% view2 = [sin_act_5, sin_audio_5, sin_conv_5, sin_lock_5];
view1_data = view1;
view2_data = view2;
% normalize the data.
n = size(view1_data, 1);
d1 = size(view1_data, 2);
d2 = size(view2_data, 2);
M_1_norm = normc(view1_data);
M_2_norm = normc(view2_data);
M_1 = M_1_norm;
M_2 = M_2_norm;
M = cell(1, 1);
M{1} = M_1;
M{2} = M_2;
%% using cv to find the proper parameters lambda_z, lambda_2 then run
% Multiview Biclustering method to get the 1st cluster. lambda_z is for the
% size of the cluster we want. lambda_2 is for the number of features to
% differentiate the cluster from the rest of the population.
lambda_z = 9;
lambda_2 = 5;
iSeedV1 = 12;
%% z1 is a binary vector as a idnetifier of our explored cluster. 1 means
% the ith instance on the ith row belong to the identified cluster. 0 means
% this instance does not belong to this cluster. V is a matrix shows the
% significantly useful features that be detected to diffentiate the
% identified cluster Clus1 from the rest by our method.
% Depressed people identification, also find what features are important
% for our clustering method
% cluster people into group, and see how they are related to depression
[z1, U, V, obj] = proxi3_3(M, lambda_z, [lambda_2; lambda_2], iSeedV1);
rowClus1 = double(z1~=0);
Clus1 = rowClus1; %After finding one cluster, erase that from data and then move forward for next clustering
%% erase the instances in Clus1, then prepare for the next clustering process.
IND = find(z1~=0);
M2_1 = M_1;
M2_2 = M_2;
M2_1(IND, :)=[];
M2_2(IND, :)=[];
M2{1} = M2_1;
M2{2} = M2_2;
%% run multiview biclustering method again to detect the 2nd cluster Clus2.
lambda_z2 = 7;
iSeedV2 = 1;
n_1_pre = [];
n_2_pre = [];
n_3_pre = [];
n_1_post = [];
n_2_post = [];
n_3_post = [];
[z2, U2, V2, obj] = proxi3_3(M2, lambda_z2, [lambda_2; lambda_2], iSeedV2);
rowClus2 = double(z2~=0);
Clus2 = zeros(n, 1);
Clus2(IND) = 0;
IND2 = 1:1:49;
IND2(IND) = [];
TN = find(z2==0);
IND2(TN) = [];
Clus2(IND2) = 1;
Clus3 = ones(n, 1) - Clus1 - Clus2;
% view 1 is ok
% view 2, wavelet filtering, f transformation -> raw data...can improve ft
% method. using ft to find most important point. Also we can add useful
% descrption - more features
%% Get data from View 1 and 2 for Random forest
% First get all the values for view 1 based on V value
% For all users, get the average of each feature i.e. 49 rows/users, we
% will have 12 columns containing average of that perticular feature
for k = 1:49 %kth user
% view1Data(k, 1) = mean(activity_feature{k}(:, 3)); % Activity = No Movement
% view1Data(k, 2) = mean(conv_feature{k}(:, 3)); % Conversation duration
% view1Data(k, 3) = mean(dark_feature{k}(:, 3)); % Dark count
% view1Data(k, 4) = mean(dark_feature{k}(:, 4)); % Dark duration
% view1Data(k, 5) = mean(conv_feature{k}(:, 2)); % Conversation count
% view1Data(k, 6) = mean(activity_feature{k}(:, 4)); % Activity = Walk
% view1Data(k, 7) = mean(activity_feature{k}(:, 5)); % Activity = Run
% view1Data(k, 8) = mean(audio_feature{k}(:, 3)); % Audio = Quite
% view1Data(k, 9) = mean(audio_feature{k}(:, 4)); % Audio = Noisy
% view1Data(k, 10) = mean(audio_feature{k}(:, 5)); % Audio = Loud
% view1Data(k, 11) = mean(phonelock_feature{k}(:, 2)); % Phone lock = Count
% view1Data(k, 12) = mean(phonelock_feature{k}(:, 3)); % Phone lock = Duration
view1Data(k,1:12) = view1(k,:); % Jin's feature
% For testing purpose - delete later
view1Data(k, 13) = double(activity_feature{k}(1, 1)); % Activity user id
view1Data(k, 14) = double(conv_feature{k}(1, 1)); % conv user id
view1Data(k, 15) = double(dark_feature{k}(1, 1)); % dark feature user id
view1Data(k, 16) = double(audio_feature{k}(1, 1)); % audio user id
view1Data(k, 17) = double(phonelock_feature{k}(1, 1)); % ph lock user id
end
% View 2 data
view1Data(:, 18:21) = view2(:, 1:4); % amp, phase, intercept and freq of activity
view1Data(:, 22:25) = view2(:, 5:8); % Audio feature
view1Data(:, 26:29) = view2(:, 9:12); % Conversation feaure
% Match user ids and assign both PHQ9 id and score, else keep -1
x = 1;
z = size(view1Data,2); % To add more columns
for i = 1:size(view1Data,1)
if view1Data(i, 13) == pre_PHQ9(x,1) %if ids match - pre-phq9
view1Data(i, z+1) = pre_PHQ9(x,1); % User id
view1Data(i, z+2) = pre_PHQ9(x, 2); % User's pre-phq9 score
x = x + 1;
else % If user not exists
view1Data(i, z+1) = -1;
view1Data(i, z+2) = -1;
end
end
% Same for post phq-9
x = 1;
for i = 1:size(view1Data,1)
if view1Data(i, 13) == post_PHQ9(x,1) %if ids match - post-phq9
view1Data(i, z+3) = post_PHQ9(x,1); % User id
view1Data(i, z+4) = post_PHQ9(x, 2); % User's pre-phq9 score
x = x + 1;
else % If user not exists
view1Data(i, z+3) = -1;
view1Data(i, z+4) = -1;
end
end
% Feature selection through multiview - might have to change
fSetV1 = V{1}(:,1);
fSetV2 = V{2}(:,1);
j = 1;
for i = 1:size(fSetV1,1)
if fSetV1(i,1) > 0
fSetV1Data(:,j) = view1Data(:,i);
j = j + 1;
end
end
%View 2
for i = 1:size(fSetV2,1)
if fSetV2(i,1) > 0
fSetV1Data(:,j) = view1Data(:,17+i);
j = j + 1;
end
end
b1 = size(fSetV1Data,2);
fSetV1Data(:,j) = view1Data(:,z+2); % pre Phq-9 score
fSetV1Data(:,j+1) = view1Data(:,z+4); % post Phq-9 score
for i = 1:size(fSetV1Data,1)
fSetV1Data(i,j+2) = mean(fSetV1Data(i,j:j+1)); %PHQ-9 scores
end
for i = 1:size(fSetV1Data,1)
if fSetV1Data(i,j+2) >= 10 % Cut off at 10 for the mean PHQ-9
fSetV1Data(i,j+3) = 1;
else
fSetV1Data(i,j+3) = 0;
end
end
AllClusData = zeros(size(Clus1,1),1);
for i = 1:size(AllClusData,1)
if (Clus1(i,1) == 1)
AllClusData(i,1) = -1; % Cluster 1
elseif (Clus2(i,1) == 1)
AllClusData(i,1) = 1; % Cluster 2
elseif (Clus3(i,1) == 1)
AllClusData(i,1) = 0; % Cluster 3
end
end
fSetV1Data(:,end+1) = AllClusData;
% fSetV1Data(:,end+1) = Clus1(:,1);
% fSetV1Data(:,end+1) = Clus2(:,1);
% fSetV1Data(:,end+1) = Clus3(:,1);
forPlot = fSetV1Data;
%%
% Creating a balanced data set
Clus1Assign = find(fSetV1Data(:,end) == -1);
% Generate noisy data for positive phq-9
a1 = size(fSetV1Data,2);
for i = 1:size(Clus1Assign,1) % 1 percent noise
y_d(i,1:b1) = fSetV1Data(Clus1Assign(i),1:b1) +0.01*fSetV1Data(Clus1Assign(i),1:b1)*randn;
y_d(i,b1+1:a1) = fSetV1Data(Clus1Assign(i),b1+1:end);
end
for i = 1:size(Clus1Assign,1) % 2 percent noise
z_d(i,1:b1) = fSetV1Data(Clus1Assign(i),1:b1) +0.02*fSetV1Data(Clus1Assign(i),1:b1)*randn;
z_d(i,b1+1:a1) = fSetV1Data(Clus1Assign(i),b1+1:end);
end
for i = 1:size(Clus1Assign,1) % 3 percent noise
z_d2(i,1:b1) = fSetV1Data(Clus1Assign(i),1:b1) +0.03*fSetV1Data(Clus1Assign(i),1:b1)*randn;
z_d2(i,b1+1:a1) = fSetV1Data(Clus1Assign(i),b1+1:end);
end
%%
Clus2Assign = find(fSetV1Data(:,end) == 1);
for i = 1:size(Clus2Assign,1) % 1 percent noise
y_d_2(i,1:b1) = fSetV1Data(Clus2Assign(i),1:b1) +0.01*fSetV1Data(Clus2Assign(i),1:b1)*randn;
y_d_2(i,b1+1:a1) = fSetV1Data(Clus2Assign(i),b1+1:end);
end
for i = 1:size(Clus2Assign,1) % 2 percent noise
z_d_2(i,1:b1) = fSetV1Data(Clus2Assign(i),1:b1) +0.02*fSetV1Data(Clus2Assign(i),1:b1)*randn;
z_d_2(i,b1+1:a1) = fSetV1Data(Clus2Assign(i),b1+1:end);
end
for i = 1:size(Clus2Assign,1) % 3 percent noise
z_d2_2(i,1:b1) = fSetV1Data(Clus2Assign(i),1:b1) +0.03*fSetV1Data(Clus2Assign(i),1:b1)*randn;
z_d2_2(i,b1+1:a1) = fSetV1Data(Clus2Assign(i),b1+1:end);
end
fSetV1Data = [fSetV1Data; y_d; z_d; z_d2; y_d_2; z_d_2; z_d2_2];
% fSetV1Data(50:end, end-3) = 1; % Self assignment
% Normalize the data set
d3 = fSetV1Data(:,1:end-5); % Remove other cols - Self assignment
d3 = [normc(d3) fSetV1Data(:,end)]; % Normalize the data, but not labels
%%
clus1Rows = find(forPlot(:,end) == 1);
clus1Features = forPlot(clus1Rows,1:end-5);
for i = 1:size(forPlot,1)
if forPlot(i,end-3) == -1
forPlot(i,end-3) = NaN;
elseif forPlot(i,end-4) == -1
forPlot(i,end-4) = NaN;
end
end
clus1PostPHQ = nansum(forPlot(clus1Rows,end-3))/size(clus1Rows,1);
clus1PrePHQ = nansum(forPlot(clus1Rows,end-4))/size(clus1Rows,1);% end-4 = pre
Averageclus1PostPHQ = clus1PostPHQ/38;%nansum(forPlot(:,end-3));
Averageclus1PrePHQ = clus1PrePHQ/nansum(forPlot(:,end-4));
% clus1Plot = [mean(clus1PHQ) mean(normc(clus1Features))];
clus2Rows = find(forPlot(:,end) == -1);
clus2Features = forPlot(clus2Rows,1:end-5);
clus2PostPHQ = nansum(forPlot(clus2Rows,end-3))/size(clus2Rows,1);
clus2PrePHQ = nansum(forPlot(clus2Rows,end-4))/size(clus2Rows,1);% end-4 = pre
Averageclus2PostPHQ = clus2PostPHQ/38;%nansum(forPlot(:,end-3));
Averageclus2PrePHQ = clus2PrePHQ/nansum(forPlot(:,end-4));
% clus2Plot = [mean(clus2PHQ) mean(normc(clus2Features))];
clus3Rows = find(forPlot(:,end) == 0);
clus3Features = forPlot(clus3Rows,1:end-5);
clus3PostPHQ = nansum(forPlot(clus3Rows,end-3))/size(clus3Rows,1);
clus3PrePHQ = nansum(forPlot(clus3Rows,end-4))/size(clus3Rows,1);% end-4 = pre
Averageclus3PostPHQ = clus3PostPHQ/38;%nansum(forPlot(:,end-3));
Averageclus3PrePHQ = clus3PrePHQ/nansum(forPlot(:,end-4));
% clus3Plot = [mean(clus3PHQ) mean(normc(clus3Features))];