Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Sensor-Depression/demo_balanced_cluster.m
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
306 lines (271 sloc)
11.1 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
%% | |
clc, clear all, close all | |
%% | |
% Load Data | |
load 'data\conv_feature.mat' | |
load 'data\activity_feature.mat' | |
load 'data\dark_feature.mat' | |
load 'data\phonecharge_feature.mat' | |
load 'data\phonelock_feature.mat' | |
load 'data\audio_feature.mat' | |
load 'data\pre_PHQ9.mat' | |
load 'data\post_PHQ9.mat' | |
% Path setting | |
prefix='C:\Users\aaf12005\Desktop\Jins ALG\Depression_2\'; | |
addpath([prefix '\utility_code_1\clustering\ssvd-code']); | |
addpath([prefix '\utility_code_1\clustering\spectralCoClustering']); | |
addpath([prefix '\utility_code_1\clustering']); | |
addpath([prefix '\utility_code_1\short_functions']); | |
addpath([prefix '\utility_code_1\InfoTheory']); | |
addpath([prefix '\utility_code_1\kernel']); | |
addpath([prefix '\utility_code_1\evaluate']); | |
addpath([prefix '\utility_code_1\code']); | |
addpath([prefix '\code\exsiting_algorithm']); | |
addpath([prefix '\code']); | |
addpath([prefix '\proximal_2']); | |
%% read the raw data for activity, conversation, dark, audio, phonelock records and give the average on day values as view1. Each row of view1 is an instance from the population. | |
% avaerage one user/day | |
view1(:, 1) = average_c(activity_feature, 3); % for all 49 user get 1 value | |
view1(:, 2) = average_c(conv_feature, 3); | |
view1(:, 3) = average_c(dark_feature, 3); | |
view1(:, 4) = average_c(dark_feature, 4); | |
view1(:, 5) = average_c(conv_feature, 2); | |
view1(:, 6) = average_c(activity_feature, 4); | |
view1(:, 7) = average_c(activity_feature, 5); | |
view1(:, 8) = average_c(audio_feature, 3); | |
view1(:, 9) = average_c(audio_feature, 4); | |
view1(:, 10) = average_c(audio_feature, 5); | |
view1(:, 11) = average_c(phonelock_feature, 2); | |
view1(:, 12) = average_c(phonelock_feature, 3); | |
%% read the sine parameters which fit the time series data of activity, audio, conversation, phonelock as view2. | |
view2(:, 1:4) = denoising_wl_sin(activity_feature, 3, [500 20 3 700]); % amp, phase, intercept and freq ///for all 49 user get 4 value | |
view2(:, 5:8) = denoising_wl_sin(audio_feature, 3, [500 20 3 700]); | |
view2(:, 9:12) = denoising_wl_sin(conv_feature, 3, [500 20 3 700]); | |
% view2(:, 13:16) = denoising_wl_sin(phonelock_feature, 3); | |
% view2 = [sin_act_5, sin_audio_5, sin_conv_5, sin_lock_5]; | |
view1_data = view1; | |
view2_data = view2; | |
% normalize the data. | |
n = size(view1_data, 1); | |
d1 = size(view1_data, 2); | |
d2 = size(view2_data, 2); | |
M_1_norm = normc(view1_data); | |
M_2_norm = normc(view2_data); | |
M_1 = M_1_norm; | |
M_2 = M_2_norm; | |
M = cell(1, 1); | |
M{1} = M_1; | |
M{2} = M_2; | |
%% using cv to find the proper parameters lambda_z, lambda_2 then run | |
% Multiview Biclustering method to get the 1st cluster. lambda_z is for the | |
% size of the cluster we want. lambda_2 is for the number of features to | |
% differentiate the cluster from the rest of the population. | |
lambda_z = 9; | |
lambda_2 = 5; | |
iSeedV1 = 12; | |
%% z1 is a binary vector as a idnetifier of our explored cluster. 1 means | |
% the ith instance on the ith row belong to the identified cluster. 0 means | |
% this instance does not belong to this cluster. V is a matrix shows the | |
% significantly useful features that be detected to diffentiate the | |
% identified cluster Clus1 from the rest by our method. | |
% Depressed people identification, also find what features are important | |
% for our clustering method | |
% cluster people into group, and see how they are related to depression | |
[z1, U, V, obj] = proxi3_3(M, lambda_z, [lambda_2; lambda_2], iSeedV1); | |
rowClus1 = double(z1~=0); | |
Clus1 = rowClus1; %After finding one cluster, erase that from data and then move forward for next clustering | |
%% erase the instances in Clus1, then prepare for the next clustering process. | |
IND = find(z1~=0); | |
M2_1 = M_1; | |
M2_2 = M_2; | |
M2_1(IND, :)=[]; | |
M2_2(IND, :)=[]; | |
M2{1} = M2_1; | |
M2{2} = M2_2; | |
%% run multiview biclustering method again to detect the 2nd cluster Clus2. | |
lambda_z2 = 7; | |
iSeedV2 = 1; | |
n_1_pre = []; | |
n_2_pre = []; | |
n_3_pre = []; | |
n_1_post = []; | |
n_2_post = []; | |
n_3_post = []; | |
[z2, U2, V2, obj] = proxi3_3(M2, lambda_z2, [lambda_2; lambda_2], iSeedV2); | |
rowClus2 = double(z2~=0); | |
Clus2 = zeros(n, 1); | |
Clus2(IND) = 0; | |
IND2 = 1:1:49; | |
IND2(IND) = []; | |
TN = find(z2==0); | |
IND2(TN) = []; | |
Clus2(IND2) = 1; | |
Clus3 = ones(n, 1) - Clus1 - Clus2; | |
% view 1 is ok | |
% view 2, wavelet filtering, f transformation -> raw data...can improve ft | |
% method. using ft to find most important point. Also we can add useful | |
% descrption - more features | |
%% Get data from View 1 and 2 for Random forest | |
% First get all the values for view 1 based on V value | |
% For all users, get the average of each feature i.e. 49 rows/users, we | |
% will have 12 columns containing average of that perticular feature | |
for k = 1:49 %kth user | |
% view1Data(k, 1) = mean(activity_feature{k}(:, 3)); % Activity = No Movement | |
% view1Data(k, 2) = mean(conv_feature{k}(:, 3)); % Conversation duration | |
% view1Data(k, 3) = mean(dark_feature{k}(:, 3)); % Dark count | |
% view1Data(k, 4) = mean(dark_feature{k}(:, 4)); % Dark duration | |
% view1Data(k, 5) = mean(conv_feature{k}(:, 2)); % Conversation count | |
% view1Data(k, 6) = mean(activity_feature{k}(:, 4)); % Activity = Walk | |
% view1Data(k, 7) = mean(activity_feature{k}(:, 5)); % Activity = Run | |
% view1Data(k, 8) = mean(audio_feature{k}(:, 3)); % Audio = Quite | |
% view1Data(k, 9) = mean(audio_feature{k}(:, 4)); % Audio = Noisy | |
% view1Data(k, 10) = mean(audio_feature{k}(:, 5)); % Audio = Loud | |
% view1Data(k, 11) = mean(phonelock_feature{k}(:, 2)); % Phone lock = Count | |
% view1Data(k, 12) = mean(phonelock_feature{k}(:, 3)); % Phone lock = Duration | |
view1Data(k,1:12) = view1(k,:); % Jin's feature | |
% For testing purpose - delete later | |
view1Data(k, 13) = double(activity_feature{k}(1, 1)); % Activity user id | |
view1Data(k, 14) = double(conv_feature{k}(1, 1)); % conv user id | |
view1Data(k, 15) = double(dark_feature{k}(1, 1)); % dark feature user id | |
view1Data(k, 16) = double(audio_feature{k}(1, 1)); % audio user id | |
view1Data(k, 17) = double(phonelock_feature{k}(1, 1)); % ph lock user id | |
end | |
% View 2 data | |
view1Data(:, 18:21) = view2(:, 1:4); % amp, phase, intercept and freq of activity | |
view1Data(:, 22:25) = view2(:, 5:8); % Audio feature | |
view1Data(:, 26:29) = view2(:, 9:12); % Conversation feaure | |
% Match user ids and assign both PHQ9 id and score, else keep -1 | |
x = 1; | |
z = size(view1Data,2); % To add more columns | |
for i = 1:size(view1Data,1) | |
if view1Data(i, 13) == pre_PHQ9(x,1) %if ids match - pre-phq9 | |
view1Data(i, z+1) = pre_PHQ9(x,1); % User id | |
view1Data(i, z+2) = pre_PHQ9(x, 2); % User's pre-phq9 score | |
x = x + 1; | |
else % If user not exists | |
view1Data(i, z+1) = -1; | |
view1Data(i, z+2) = -1; | |
end | |
end | |
% Same for post phq-9 | |
x = 1; | |
for i = 1:size(view1Data,1) | |
if view1Data(i, 13) == post_PHQ9(x,1) %if ids match - post-phq9 | |
view1Data(i, z+3) = post_PHQ9(x,1); % User id | |
view1Data(i, z+4) = post_PHQ9(x, 2); % User's pre-phq9 score | |
x = x + 1; | |
else % If user not exists | |
view1Data(i, z+3) = -1; | |
view1Data(i, z+4) = -1; | |
end | |
end | |
% Feature selection through multiview - might have to change | |
fSetV1 = V{1}(:,1); | |
fSetV2 = V{2}(:,1); | |
j = 1; | |
for i = 1:size(fSetV1,1) | |
if fSetV1(i,1) > 0 | |
fSetV1Data(:,j) = view1Data(:,i); | |
j = j + 1; | |
end | |
end | |
%View 2 | |
for i = 1:size(fSetV2,1) | |
if fSetV2(i,1) > 0 | |
fSetV1Data(:,j) = view1Data(:,17+i); | |
j = j + 1; | |
end | |
end | |
b1 = size(fSetV1Data,2); | |
fSetV1Data(:,j) = view1Data(:,z+2); % pre Phq-9 score | |
fSetV1Data(:,j+1) = view1Data(:,z+4); % post Phq-9 score | |
for i = 1:size(fSetV1Data,1) | |
fSetV1Data(i,j+2) = mean(fSetV1Data(i,j:j+1)); %PHQ-9 scores | |
end | |
for i = 1:size(fSetV1Data,1) | |
if fSetV1Data(i,j+2) >= 10 % Cut off at 10 for the mean PHQ-9 | |
fSetV1Data(i,j+3) = 1; | |
else | |
fSetV1Data(i,j+3) = 0; | |
end | |
end | |
AllClusData = zeros(size(Clus1,1),1); | |
for i = 1:size(AllClusData,1) | |
if (Clus1(i,1) == 1) | |
AllClusData(i,1) = -1; % Cluster 1 | |
elseif (Clus2(i,1) == 1) | |
AllClusData(i,1) = 1; % Cluster 2 | |
elseif (Clus3(i,1) == 1) | |
AllClusData(i,1) = 0; % Cluster 3 | |
end | |
end | |
fSetV1Data(:,end+1) = AllClusData; | |
% fSetV1Data(:,end+1) = Clus1(:,1); | |
% fSetV1Data(:,end+1) = Clus2(:,1); | |
% fSetV1Data(:,end+1) = Clus3(:,1); | |
forPlot = fSetV1Data; | |
%% | |
% Creating a balanced data set | |
Clus1Assign = find(fSetV1Data(:,end) == -1); | |
% Generate noisy data for positive phq-9 | |
a1 = size(fSetV1Data,2); | |
for i = 1:size(Clus1Assign,1) % 1 percent noise | |
y_d(i,1:b1) = fSetV1Data(Clus1Assign(i),1:b1) +0.01*fSetV1Data(Clus1Assign(i),1:b1)*randn; | |
y_d(i,b1+1:a1) = fSetV1Data(Clus1Assign(i),b1+1:end); | |
end | |
for i = 1:size(Clus1Assign,1) % 2 percent noise | |
z_d(i,1:b1) = fSetV1Data(Clus1Assign(i),1:b1) +0.02*fSetV1Data(Clus1Assign(i),1:b1)*randn; | |
z_d(i,b1+1:a1) = fSetV1Data(Clus1Assign(i),b1+1:end); | |
end | |
for i = 1:size(Clus1Assign,1) % 3 percent noise | |
z_d2(i,1:b1) = fSetV1Data(Clus1Assign(i),1:b1) +0.03*fSetV1Data(Clus1Assign(i),1:b1)*randn; | |
z_d2(i,b1+1:a1) = fSetV1Data(Clus1Assign(i),b1+1:end); | |
end | |
%% | |
Clus2Assign = find(fSetV1Data(:,end) == 1); | |
for i = 1:size(Clus2Assign,1) % 1 percent noise | |
y_d_2(i,1:b1) = fSetV1Data(Clus2Assign(i),1:b1) +0.01*fSetV1Data(Clus2Assign(i),1:b1)*randn; | |
y_d_2(i,b1+1:a1) = fSetV1Data(Clus2Assign(i),b1+1:end); | |
end | |
for i = 1:size(Clus2Assign,1) % 2 percent noise | |
z_d_2(i,1:b1) = fSetV1Data(Clus2Assign(i),1:b1) +0.02*fSetV1Data(Clus2Assign(i),1:b1)*randn; | |
z_d_2(i,b1+1:a1) = fSetV1Data(Clus2Assign(i),b1+1:end); | |
end | |
for i = 1:size(Clus2Assign,1) % 3 percent noise | |
z_d2_2(i,1:b1) = fSetV1Data(Clus2Assign(i),1:b1) +0.03*fSetV1Data(Clus2Assign(i),1:b1)*randn; | |
z_d2_2(i,b1+1:a1) = fSetV1Data(Clus2Assign(i),b1+1:end); | |
end | |
fSetV1Data = [fSetV1Data; y_d; z_d; z_d2; y_d_2; z_d_2; z_d2_2]; | |
% fSetV1Data(50:end, end-3) = 1; % Self assignment | |
% Normalize the data set | |
d3 = fSetV1Data(:,1:end-5); % Remove other cols - Self assignment | |
d3 = [normc(d3) fSetV1Data(:,end)]; % Normalize the data, but not labels | |
%% | |
clus1Rows = find(forPlot(:,end) == 1); | |
clus1Features = forPlot(clus1Rows,1:end-5); | |
for i = 1:size(forPlot,1) | |
if forPlot(i,end-3) == -1 | |
forPlot(i,end-3) = NaN; | |
elseif forPlot(i,end-4) == -1 | |
forPlot(i,end-4) = NaN; | |
end | |
end | |
clus1PostPHQ = nansum(forPlot(clus1Rows,end-3))/size(clus1Rows,1); | |
clus1PrePHQ = nansum(forPlot(clus1Rows,end-4))/size(clus1Rows,1);% end-4 = pre | |
Averageclus1PostPHQ = clus1PostPHQ/38;%nansum(forPlot(:,end-3)); | |
Averageclus1PrePHQ = clus1PrePHQ/nansum(forPlot(:,end-4)); | |
% clus1Plot = [mean(clus1PHQ) mean(normc(clus1Features))]; | |
clus2Rows = find(forPlot(:,end) == -1); | |
clus2Features = forPlot(clus2Rows,1:end-5); | |
clus2PostPHQ = nansum(forPlot(clus2Rows,end-3))/size(clus2Rows,1); | |
clus2PrePHQ = nansum(forPlot(clus2Rows,end-4))/size(clus2Rows,1);% end-4 = pre | |
Averageclus2PostPHQ = clus2PostPHQ/38;%nansum(forPlot(:,end-3)); | |
Averageclus2PrePHQ = clus2PrePHQ/nansum(forPlot(:,end-4)); | |
% clus2Plot = [mean(clus2PHQ) mean(normc(clus2Features))]; | |
clus3Rows = find(forPlot(:,end) == 0); | |
clus3Features = forPlot(clus3Rows,1:end-5); | |
clus3PostPHQ = nansum(forPlot(clus3Rows,end-3))/size(clus3Rows,1); | |
clus3PrePHQ = nansum(forPlot(clus3Rows,end-4))/size(clus3Rows,1);% end-4 = pre | |
Averageclus3PostPHQ = clus3PostPHQ/38;%nansum(forPlot(:,end-3)); | |
Averageclus3PrePHQ = clus3PrePHQ/nansum(forPlot(:,end-4)); | |
% clus3Plot = [mean(clus3PHQ) mean(normc(clus3Features))]; |