demo_balanced_cluster.m

%%
clc, clear all, close all
%%
% Load Data
load 'data\conv_feature.mat'
load 'data\activity_feature.mat'
load 'data\dark_feature.mat'
load 'data\phonecharge_feature.mat'
load 'data\phonelock_feature.mat'
load 'data\audio_feature.mat'
load 'data\pre_PHQ9.mat'
load 'data\post_PHQ9.mat'
% Path setting
prefix='C:\Users\aaf12005\Desktop\Jins ALG\Depression_2\';
addpath([prefix '\utility_code_1\clustering\ssvd-code']);
addpath([prefix '\utility_code_1\clustering\spectralCoClustering']);
addpath([prefix '\utility_code_1\clustering']);
addpath([prefix '\utility_code_1\short_functions']);
addpath([prefix '\utility_code_1\InfoTheory']);
addpath([prefix '\utility_code_1\kernel']);
addpath([prefix '\utility_code_1\evaluate']);
addpath([prefix '\utility_code_1\code']);
addpath([prefix '\code\exsiting_algorithm']);
addpath([prefix '\code']);
addpath([prefix '\proximal_2']);
%% read the raw data for activity, conversation, dark, audio, phonelock records and give the average on day values as view1. Each row of view1 is an instance from the population.
% avaerage one user/day
view1(:, 1) = average_c(activity_feature, 3); % for all 49 user get 1 value
view1(:, 2) = average_c(conv_feature, 3);
view1(:, 3) = average_c(dark_feature, 3);
view1(:, 4) = average_c(dark_feature, 4);
view1(:, 5) = average_c(conv_feature, 2);
view1(:, 6) = average_c(activity_feature, 4);
view1(:, 7) = average_c(activity_feature, 5);
view1(:, 8) = average_c(audio_feature, 3);
view1(:, 9) = average_c(audio_feature, 4);
view1(:, 10) = average_c(audio_feature, 5);
view1(:, 11) = average_c(phonelock_feature, 2);
view1(:, 12) = average_c(phonelock_feature, 3);


%% read the sine parameters which fit the time series data of activity, audio, conversation, phonelock as view2.
view2(:, 1:4) = denoising_wl_sin(activity_feature, 3, [500 20 3 700]); % amp, phase, intercept and freq ///for all 49 user get 4 value
view2(:, 5:8) = denoising_wl_sin(audio_feature, 3, [500 20 3 700]);
view2(:, 9:12) = denoising_wl_sin(conv_feature, 3, [500 20 3 700]);
% view2(:, 13:16) = denoising_wl_sin(phonelock_feature, 3);
% view2 = [sin_act_5, sin_audio_5, sin_conv_5, sin_lock_5];
view1_data = view1;
view2_data = view2;

% normalize the data.
n = size(view1_data, 1);
d1 = size(view1_data, 2);
d2 = size(view2_data, 2);

M_1_norm = normc(view1_data);
M_2_norm = normc(view2_data);

M_1 = M_1_norm;
M_2 = M_2_norm;

M = cell(1, 1);
M{1} = M_1;
M{2} = M_2;

%% using cv to find the proper parameters lambda_z, lambda_2 then run
% Multiview Biclustering method to get the 1st cluster. lambda_z is for the
% size of the cluster we want. lambda_2 is for the number of features to
% differentiate the cluster from the rest of the population.
lambda_z = 9;
lambda_2 = 5;
iSeedV1 = 12;
%% z1 is a binary vector as a idnetifier of our explored cluster. 1 means
% the ith instance on the ith row belong to the identified cluster. 0 means
% this instance does not belong to this cluster. V is a matrix shows the
% significantly useful features that be detected to diffentiate the
% identified cluster Clus1 from the rest by our method.


% Depressed people identification, also find what features are important
% for our clustering method
% cluster people into group, and see how they are related to depression
[z1, U, V, obj] = proxi3_3(M, lambda_z, [lambda_2; lambda_2], iSeedV1);
rowClus1 = double(z1~=0);
Clus1 = rowClus1; %After finding one cluster, erase that from data and then move forward for next clustering

%% erase the instances in Clus1, then prepare for the next clustering process.
IND = find(z1~=0);
M2_1 = M_1;
M2_2 = M_2;
M2_1(IND, :)=[];
M2_2(IND, :)=[];
M2{1} = M2_1;
M2{2} = M2_2;

%% run multiview biclustering method again to detect the 2nd cluster Clus2.
lambda_z2 = 7;
iSeedV2 = 1;
n_1_pre = [];
n_2_pre = [];
n_3_pre = [];
n_1_post = [];
n_2_post = [];
n_3_post = [];
[z2, U2, V2, obj] = proxi3_3(M2, lambda_z2, [lambda_2; lambda_2], iSeedV2);
rowClus2 = double(z2~=0);
Clus2 = zeros(n, 1);
Clus2(IND) = 0;
IND2 = 1:1:49;
IND2(IND) = [];
TN = find(z2==0);
IND2(TN) = [];
Clus2(IND2) = 1;
Clus3 = ones(n, 1) - Clus1 - Clus2;

% view 1 is ok
% view 2, wavelet filtering, f transformation -> raw data...can improve ft
% method. using ft to find most important point. Also we can add useful
% descrption - more features

%% Get data from View 1 and 2 for Random forest
% First get all the values for view 1 based on V value
% For all users, get the average of each feature i.e. 49 rows/users, we
% will have 12 columns containing average of that perticular feature


for k = 1:49 %kth user
%     view1Data(k, 1) = mean(activity_feature{k}(:, 3)); % Activity  = No Movement
%     view1Data(k, 2) = mean(conv_feature{k}(:, 3));  % Conversation duration
%     view1Data(k, 3) = mean(dark_feature{k}(:, 3)); % Dark count
%     view1Data(k, 4) = mean(dark_feature{k}(:, 4)); % Dark duration
%     view1Data(k, 5) = mean(conv_feature{k}(:, 2)); % Conversation count
%     view1Data(k, 6) = mean(activity_feature{k}(:, 4)); % Activity  = Walk
%     view1Data(k, 7) = mean(activity_feature{k}(:, 5)); % Activity  = Run
%     view1Data(k, 8) = mean(audio_feature{k}(:, 3)); % Audio = Quite
%     view1Data(k, 9) = mean(audio_feature{k}(:, 4)); % Audio = Noisy
%     view1Data(k, 10) = mean(audio_feature{k}(:, 5)); % Audio = Loud
%     view1Data(k, 11) = mean(phonelock_feature{k}(:, 2)); % Phone lock = Count
%     view1Data(k, 12) = mean(phonelock_feature{k}(:, 3)); % Phone lock = Duration

     view1Data(k,1:12) = view1(k,:); % Jin's feature
    % For testing purpose - delete later
    view1Data(k, 13) = double(activity_feature{k}(1, 1)); % Activity user id
    view1Data(k, 14) = double(conv_feature{k}(1, 1)); % conv user id
    view1Data(k, 15) = double(dark_feature{k}(1, 1)); % dark feature user id
    view1Data(k, 16) = double(audio_feature{k}(1, 1)); % audio user id
    view1Data(k, 17) = double(phonelock_feature{k}(1, 1)); % ph lock user id
end
% View 2 data
view1Data(:, 18:21) = view2(:, 1:4); % amp, phase, intercept and freq of activity
view1Data(:, 22:25) = view2(:, 5:8); % Audio feature
view1Data(:, 26:29) = view2(:, 9:12); % Conversation feaure

% Match user ids and assign both PHQ9 id and score, else keep -1
x = 1;
z = size(view1Data,2); % To add more columns
for i = 1:size(view1Data,1)
    if view1Data(i, 13) == pre_PHQ9(x,1) %if ids match - pre-phq9
        view1Data(i, z+1) = pre_PHQ9(x,1); % User id
        view1Data(i, z+2) = pre_PHQ9(x, 2); % User's pre-phq9 score
        x = x + 1;
    else % If user not exists
        view1Data(i, z+1) = -1;
        view1Data(i, z+2) = -1;
    end
end
% Same for post phq-9
x = 1;
for i = 1:size(view1Data,1)
    if view1Data(i, 13) == post_PHQ9(x,1) %if ids match - post-phq9
        view1Data(i, z+3) = post_PHQ9(x,1); % User id
        view1Data(i, z+4) = post_PHQ9(x, 2); % User's pre-phq9 score
        x = x + 1;
    else % If user not exists
        view1Data(i, z+3) = -1;
        view1Data(i, z+4) = -1;
    end
end

% Feature selection through multiview - might have to change
fSetV1 = V{1}(:,1);
fSetV2 = V{2}(:,1);
j = 1;
for i = 1:size(fSetV1,1)
    if fSetV1(i,1) > 0
        fSetV1Data(:,j) = view1Data(:,i);
        j = j + 1;
    end
end
%View 2
for i = 1:size(fSetV2,1)
    if fSetV2(i,1) > 0
        fSetV1Data(:,j) = view1Data(:,17+i);
        j = j + 1;
    end
end
b1 = size(fSetV1Data,2);

fSetV1Data(:,j) = view1Data(:,z+2); % pre Phq-9 score
fSetV1Data(:,j+1) = view1Data(:,z+4); % post Phq-9 score
for i = 1:size(fSetV1Data,1)
    fSetV1Data(i,j+2) = mean(fSetV1Data(i,j:j+1)); %PHQ-9 scores
end
for i = 1:size(fSetV1Data,1)
    if fSetV1Data(i,j+2) >= 10 % Cut off at 10 for the mean PHQ-9
        fSetV1Data(i,j+3) = 1;
    else
        fSetV1Data(i,j+3) = 0;
    end
end
AllClusData = zeros(size(Clus1,1),1);
for i = 1:size(AllClusData,1)
    if (Clus1(i,1) == 1)
        AllClusData(i,1) = -1; % Cluster 1
    elseif (Clus2(i,1) == 1)
        AllClusData(i,1) = 1; % Cluster 2
    elseif (Clus3(i,1) == 1)
        AllClusData(i,1) = 0; % Cluster 3
    end
end
fSetV1Data(:,end+1) = AllClusData;
% fSetV1Data(:,end+1) = Clus1(:,1);
% fSetV1Data(:,end+1) = Clus2(:,1);
% fSetV1Data(:,end+1) = Clus3(:,1);
forPlot = fSetV1Data;

%%
% Creating a balanced data set
Clus1Assign = find(fSetV1Data(:,end) == -1);

% Generate noisy data for positive phq-9
a1 = size(fSetV1Data,2);
for i = 1:size(Clus1Assign,1) % 1 percent noise
    y_d(i,1:b1) = fSetV1Data(Clus1Assign(i),1:b1) +0.01*fSetV1Data(Clus1Assign(i),1:b1)*randn;
    y_d(i,b1+1:a1) = fSetV1Data(Clus1Assign(i),b1+1:end);
end

for i = 1:size(Clus1Assign,1) % 2 percent noise
    z_d(i,1:b1) = fSetV1Data(Clus1Assign(i),1:b1) +0.02*fSetV1Data(Clus1Assign(i),1:b1)*randn;
    z_d(i,b1+1:a1) = fSetV1Data(Clus1Assign(i),b1+1:end);
end

for i = 1:size(Clus1Assign,1) % 3 percent noise
    z_d2(i,1:b1) = fSetV1Data(Clus1Assign(i),1:b1) +0.03*fSetV1Data(Clus1Assign(i),1:b1)*randn;
    z_d2(i,b1+1:a1) = fSetV1Data(Clus1Assign(i),b1+1:end);
end
%%
Clus2Assign = find(fSetV1Data(:,end) == 1);
for i = 1:size(Clus2Assign,1) % 1 percent noise
    y_d_2(i,1:b1) = fSetV1Data(Clus2Assign(i),1:b1) +0.01*fSetV1Data(Clus2Assign(i),1:b1)*randn;
    y_d_2(i,b1+1:a1) = fSetV1Data(Clus2Assign(i),b1+1:end);
end

for i = 1:size(Clus2Assign,1) % 2 percent noise
    z_d_2(i,1:b1) = fSetV1Data(Clus2Assign(i),1:b1) +0.02*fSetV1Data(Clus2Assign(i),1:b1)*randn;
    z_d_2(i,b1+1:a1) = fSetV1Data(Clus2Assign(i),b1+1:end);
end

for i = 1:size(Clus2Assign,1) % 3 percent noise
    z_d2_2(i,1:b1) = fSetV1Data(Clus2Assign(i),1:b1) +0.03*fSetV1Data(Clus2Assign(i),1:b1)*randn;
    z_d2_2(i,b1+1:a1) = fSetV1Data(Clus2Assign(i),b1+1:end);
end

fSetV1Data = [fSetV1Data; y_d; z_d; z_d2; y_d_2; z_d_2; z_d2_2];
% fSetV1Data(50:end, end-3) = 1; % Self assignment
% Normalize the data set
d3 = fSetV1Data(:,1:end-5); % Remove other cols - Self assignment
d3 = [normc(d3) fSetV1Data(:,end)]; % Normalize the data, but not labels

%%
clus1Rows = find(forPlot(:,end) == 1);
clus1Features = forPlot(clus1Rows,1:end-5);

for i = 1:size(forPlot,1)
    if forPlot(i,end-3) == -1
        forPlot(i,end-3) = NaN;
    elseif forPlot(i,end-4) == -1
        forPlot(i,end-4) = NaN;
    end
end


clus1PostPHQ = nansum(forPlot(clus1Rows,end-3))/size(clus1Rows,1);
clus1PrePHQ = nansum(forPlot(clus1Rows,end-4))/size(clus1Rows,1);% end-4 = pre
Averageclus1PostPHQ = clus1PostPHQ/38;%nansum(forPlot(:,end-3));
Averageclus1PrePHQ = clus1PrePHQ/nansum(forPlot(:,end-4));

% clus1Plot = [mean(clus1PHQ) mean(normc(clus1Features))];

clus2Rows = find(forPlot(:,end) == -1);
clus2Features = forPlot(clus2Rows,1:end-5);

clus2PostPHQ = nansum(forPlot(clus2Rows,end-3))/size(clus2Rows,1);
clus2PrePHQ = nansum(forPlot(clus2Rows,end-4))/size(clus2Rows,1);% end-4 = pre
Averageclus2PostPHQ = clus2PostPHQ/38;%nansum(forPlot(:,end-3));
Averageclus2PrePHQ = clus2PrePHQ/nansum(forPlot(:,end-4));
% clus2Plot = [mean(clus2PHQ) mean(normc(clus2Features))];

clus3Rows = find(forPlot(:,end) == 0);
clus3Features = forPlot(clus3Rows,1:end-5);

clus3PostPHQ = nansum(forPlot(clus3Rows,end-3))/size(clus3Rows,1);
clus3PrePHQ = nansum(forPlot(clus3Rows,end-4))/size(clus3Rows,1);% end-4 = pre
Averageclus3PostPHQ = clus3PostPHQ/38;%nansum(forPlot(:,end-3));
Averageclus3PrePHQ = clus3PrePHQ/nansum(forPlot(:,end-4));
% clus3Plot = [mean(clus3PHQ) mean(normc(clus3Features))];
	%%
	clc, clear all, close all
	%%
	% Load Data
	load 'data\conv_feature.mat'
	load 'data\activity_feature.mat'
	load 'data\dark_feature.mat'
	load 'data\phonecharge_feature.mat'
	load 'data\phonelock_feature.mat'
	load 'data\audio_feature.mat'
	load 'data\pre_PHQ9.mat'
	load 'data\post_PHQ9.mat'
	% Path setting
	prefix='C:\Users\aaf12005\Desktop\Jins ALG\Depression_2\';
	addpath([prefix '\utility_code_1\clustering\ssvd-code']);
	addpath([prefix '\utility_code_1\clustering\spectralCoClustering']);
	addpath([prefix '\utility_code_1\clustering']);
	addpath([prefix '\utility_code_1\short_functions']);
	addpath([prefix '\utility_code_1\InfoTheory']);
	addpath([prefix '\utility_code_1\kernel']);
	addpath([prefix '\utility_code_1\evaluate']);
	addpath([prefix '\utility_code_1\code']);
	addpath([prefix '\code\exsiting_algorithm']);
	addpath([prefix '\code']);
	addpath([prefix '\proximal_2']);
	%% read the raw data for activity, conversation, dark, audio, phonelock records and give the average on day values as view1. Each row of view1 is an instance from the population.
	% avaerage one user/day
	view1(:, 1) = average_c(activity_feature, 3); % for all 49 user get 1 value
	view1(:, 2) = average_c(conv_feature, 3);
	view1(:, 3) = average_c(dark_feature, 3);
	view1(:, 4) = average_c(dark_feature, 4);
	view1(:, 5) = average_c(conv_feature, 2);
	view1(:, 6) = average_c(activity_feature, 4);
	view1(:, 7) = average_c(activity_feature, 5);
	view1(:, 8) = average_c(audio_feature, 3);
	view1(:, 9) = average_c(audio_feature, 4);
	view1(:, 10) = average_c(audio_feature, 5);
	view1(:, 11) = average_c(phonelock_feature, 2);
	view1(:, 12) = average_c(phonelock_feature, 3);


	%% read the sine parameters which fit the time series data of activity, audio, conversation, phonelock as view2.
	view2(:, 1:4) = denoising_wl_sin(activity_feature, 3, [500 20 3 700]); % amp, phase, intercept and freq ///for all 49 user get 4 value
	view2(:, 5:8) = denoising_wl_sin(audio_feature, 3, [500 20 3 700]);
	view2(:, 9:12) = denoising_wl_sin(conv_feature, 3, [500 20 3 700]);
	% view2(:, 13:16) = denoising_wl_sin(phonelock_feature, 3);
	% view2 = [sin_act_5, sin_audio_5, sin_conv_5, sin_lock_5];
	view1_data = view1;
	view2_data = view2;

	% normalize the data.
	n = size(view1_data, 1);
	d1 = size(view1_data, 2);
	d2 = size(view2_data, 2);

	M_1_norm = normc(view1_data);
	M_2_norm = normc(view2_data);

	M_1 = M_1_norm;
	M_2 = M_2_norm;

	M = cell(1, 1);
	M{1} = M_1;
	M{2} = M_2;

	%% using cv to find the proper parameters lambda_z, lambda_2 then run
	% Multiview Biclustering method to get the 1st cluster. lambda_z is for the
	% size of the cluster we want. lambda_2 is for the number of features to
	% differentiate the cluster from the rest of the population.
	lambda_z = 9;
	lambda_2 = 5;
	iSeedV1 = 12;
	%% z1 is a binary vector as a idnetifier of our explored cluster. 1 means
	% the ith instance on the ith row belong to the identified cluster. 0 means
	% this instance does not belong to this cluster. V is a matrix shows the
	% significantly useful features that be detected to diffentiate the
	% identified cluster Clus1 from the rest by our method.


	% Depressed people identification, also find what features are important
	% for our clustering method
	% cluster people into group, and see how they are related to depression
	[z1, U, V, obj] = proxi3_3(M, lambda_z, [lambda_2; lambda_2], iSeedV1);
	rowClus1 = double(z1~=0);
	Clus1 = rowClus1; %After finding one cluster, erase that from data and then move forward for next clustering

	%% erase the instances in Clus1, then prepare for the next clustering process.
	IND = find(z1~=0);
	M2_1 = M_1;
	M2_2 = M_2;
	M2_1(IND, :)=[];
	M2_2(IND, :)=[];
	M2{1} = M2_1;
	M2{2} = M2_2;

	%% run multiview biclustering method again to detect the 2nd cluster Clus2.
	lambda_z2 = 7;
	iSeedV2 = 1;
	n_1_pre = [];
	n_2_pre = [];
	n_3_pre = [];
	n_1_post = [];
	n_2_post = [];
	n_3_post = [];
	[z2, U2, V2, obj] = proxi3_3(M2, lambda_z2, [lambda_2; lambda_2], iSeedV2);
	rowClus2 = double(z2~=0);
	Clus2 = zeros(n, 1);
	Clus2(IND) = 0;
	IND2 = 1:1:49;
	IND2(IND) = [];
	TN = find(z2==0);
	IND2(TN) = [];
	Clus2(IND2) = 1;
	Clus3 = ones(n, 1) - Clus1 - Clus2;

	% view 1 is ok
	% view 2, wavelet filtering, f transformation -> raw data...can improve ft
	% method. using ft to find most important point. Also we can add useful
	% descrption - more features

	%% Get data from View 1 and 2 for Random forest
	% First get all the values for view 1 based on V value
	% For all users, get the average of each feature i.e. 49 rows/users, we
	% will have 12 columns containing average of that perticular feature


	for k = 1:49 %kth user
	% view1Data(k, 1) = mean(activity_feature{k}(:, 3)); % Activity = No Movement
	% view1Data(k, 2) = mean(conv_feature{k}(:, 3)); % Conversation duration
	% view1Data(k, 3) = mean(dark_feature{k}(:, 3)); % Dark count
	% view1Data(k, 4) = mean(dark_feature{k}(:, 4)); % Dark duration
	% view1Data(k, 5) = mean(conv_feature{k}(:, 2)); % Conversation count
	% view1Data(k, 6) = mean(activity_feature{k}(:, 4)); % Activity = Walk
	% view1Data(k, 7) = mean(activity_feature{k}(:, 5)); % Activity = Run
	% view1Data(k, 8) = mean(audio_feature{k}(:, 3)); % Audio = Quite
	% view1Data(k, 9) = mean(audio_feature{k}(:, 4)); % Audio = Noisy
	% view1Data(k, 10) = mean(audio_feature{k}(:, 5)); % Audio = Loud
	% view1Data(k, 11) = mean(phonelock_feature{k}(:, 2)); % Phone lock = Count
	% view1Data(k, 12) = mean(phonelock_feature{k}(:, 3)); % Phone lock = Duration

	view1Data(k,1:12) = view1(k,:); % Jin's feature
	% For testing purpose - delete later
	view1Data(k, 13) = double(activity_feature{k}(1, 1)); % Activity user id
	view1Data(k, 14) = double(conv_feature{k}(1, 1)); % conv user id
	view1Data(k, 15) = double(dark_feature{k}(1, 1)); % dark feature user id
	view1Data(k, 16) = double(audio_feature{k}(1, 1)); % audio user id
	view1Data(k, 17) = double(phonelock_feature{k}(1, 1)); % ph lock user id
	end
	% View 2 data
	view1Data(:, 18:21) = view2(:, 1:4); % amp, phase, intercept and freq of activity
	view1Data(:, 22:25) = view2(:, 5:8); % Audio feature
	view1Data(:, 26:29) = view2(:, 9:12); % Conversation feaure

	% Match user ids and assign both PHQ9 id and score, else keep -1
	x = 1;
	z = size(view1Data,2); % To add more columns
	for i = 1:size(view1Data,1)
	if view1Data(i, 13) == pre_PHQ9(x,1) %if ids match - pre-phq9
	view1Data(i, z+1) = pre_PHQ9(x,1); % User id
	view1Data(i, z+2) = pre_PHQ9(x, 2); % User's pre-phq9 score
	x = x + 1;
	else % If user not exists
	view1Data(i, z+1) = -1;
	view1Data(i, z+2) = -1;
	end
	end
	% Same for post phq-9
	x = 1;
	for i = 1:size(view1Data,1)
	if view1Data(i, 13) == post_PHQ9(x,1) %if ids match - post-phq9
	view1Data(i, z+3) = post_PHQ9(x,1); % User id
	view1Data(i, z+4) = post_PHQ9(x, 2); % User's pre-phq9 score
	x = x + 1;
	else % If user not exists
	view1Data(i, z+3) = -1;
	view1Data(i, z+4) = -1;
	end
	end

	% Feature selection through multiview - might have to change
	fSetV1 = V{1}(:,1);
	fSetV2 = V{2}(:,1);
	j = 1;
	for i = 1:size(fSetV1,1)
	if fSetV1(i,1) > 0
	fSetV1Data(:,j) = view1Data(:,i);
	j = j + 1;
	end
	end
	%View 2
	for i = 1:size(fSetV2,1)
	if fSetV2(i,1) > 0
	fSetV1Data(:,j) = view1Data(:,17+i);
	j = j + 1;
	end
	end
	b1 = size(fSetV1Data,2);

	fSetV1Data(:,j) = view1Data(:,z+2); % pre Phq-9 score
	fSetV1Data(:,j+1) = view1Data(:,z+4); % post Phq-9 score
	for i = 1:size(fSetV1Data,1)
	fSetV1Data(i,j+2) = mean(fSetV1Data(i,j:j+1)); %PHQ-9 scores
	end
	for i = 1:size(fSetV1Data,1)
	if fSetV1Data(i,j+2) >= 10 % Cut off at 10 for the mean PHQ-9
	fSetV1Data(i,j+3) = 1;
	else
	fSetV1Data(i,j+3) = 0;
	end
	end
	AllClusData = zeros(size(Clus1,1),1);
	for i = 1:size(AllClusData,1)
	if (Clus1(i,1) == 1)
	AllClusData(i,1) = -1; % Cluster 1
	elseif (Clus2(i,1) == 1)
	AllClusData(i,1) = 1; % Cluster 2
	elseif (Clus3(i,1) == 1)
	AllClusData(i,1) = 0; % Cluster 3
	end
	end
	fSetV1Data(:,end+1) = AllClusData;
	% fSetV1Data(:,end+1) = Clus1(:,1);
	% fSetV1Data(:,end+1) = Clus2(:,1);
	% fSetV1Data(:,end+1) = Clus3(:,1);
	forPlot = fSetV1Data;

	%%
	% Creating a balanced data set
	Clus1Assign = find(fSetV1Data(:,end) == -1);

	% Generate noisy data for positive phq-9
	a1 = size(fSetV1Data,2);
	for i = 1:size(Clus1Assign,1) % 1 percent noise
	y_d(i,1:b1) = fSetV1Data(Clus1Assign(i),1:b1) +0.01fSetV1Data(Clus1Assign(i),1:b1)randn;
	y_d(i,b1+1:a1) = fSetV1Data(Clus1Assign(i),b1+1:end);
	end

	for i = 1:size(Clus1Assign,1) % 2 percent noise
	z_d(i,1:b1) = fSetV1Data(Clus1Assign(i),1:b1) +0.02fSetV1Data(Clus1Assign(i),1:b1)randn;
	z_d(i,b1+1:a1) = fSetV1Data(Clus1Assign(i),b1+1:end);
	end

	for i = 1:size(Clus1Assign,1) % 3 percent noise
	z_d2(i,1:b1) = fSetV1Data(Clus1Assign(i),1:b1) +0.03fSetV1Data(Clus1Assign(i),1:b1)randn;
	z_d2(i,b1+1:a1) = fSetV1Data(Clus1Assign(i),b1+1:end);
	end
	%%
	Clus2Assign = find(fSetV1Data(:,end) == 1);
	for i = 1:size(Clus2Assign,1) % 1 percent noise
	y_d_2(i,1:b1) = fSetV1Data(Clus2Assign(i),1:b1) +0.01fSetV1Data(Clus2Assign(i),1:b1)randn;
	y_d_2(i,b1+1:a1) = fSetV1Data(Clus2Assign(i),b1+1:end);
	end

	for i = 1:size(Clus2Assign,1) % 2 percent noise
	z_d_2(i,1:b1) = fSetV1Data(Clus2Assign(i),1:b1) +0.02fSetV1Data(Clus2Assign(i),1:b1)randn;
	z_d_2(i,b1+1:a1) = fSetV1Data(Clus2Assign(i),b1+1:end);
	end

	for i = 1:size(Clus2Assign,1) % 3 percent noise
	z_d2_2(i,1:b1) = fSetV1Data(Clus2Assign(i),1:b1) +0.03fSetV1Data(Clus2Assign(i),1:b1)randn;
	z_d2_2(i,b1+1:a1) = fSetV1Data(Clus2Assign(i),b1+1:end);
	end

	fSetV1Data = [fSetV1Data; y_d; z_d; z_d2; y_d_2; z_d_2; z_d2_2];
	% fSetV1Data(50:end, end-3) = 1; % Self assignment
	% Normalize the data set
	d3 = fSetV1Data(:,1:end-5); % Remove other cols - Self assignment
	d3 = [normc(d3) fSetV1Data(:,end)]; % Normalize the data, but not labels

	%%
	clus1Rows = find(forPlot(:,end) == 1);
	clus1Features = forPlot(clus1Rows,1:end-5);

	for i = 1:size(forPlot,1)
	if forPlot(i,end-3) == -1
	forPlot(i,end-3) = NaN;
	elseif forPlot(i,end-4) == -1
	forPlot(i,end-4) = NaN;
	end
	end


	clus1PostPHQ = nansum(forPlot(clus1Rows,end-3))/size(clus1Rows,1);
	clus1PrePHQ = nansum(forPlot(clus1Rows,end-4))/size(clus1Rows,1);% end-4 = pre
	Averageclus1PostPHQ = clus1PostPHQ/38;%nansum(forPlot(:,end-3));
	Averageclus1PrePHQ = clus1PrePHQ/nansum(forPlot(:,end-4));

	% clus1Plot = [mean(clus1PHQ) mean(normc(clus1Features))];

	clus2Rows = find(forPlot(:,end) == -1);
	clus2Features = forPlot(clus2Rows,1:end-5);

	clus2PostPHQ = nansum(forPlot(clus2Rows,end-3))/size(clus2Rows,1);
	clus2PrePHQ = nansum(forPlot(clus2Rows,end-4))/size(clus2Rows,1);% end-4 = pre
	Averageclus2PostPHQ = clus2PostPHQ/38;%nansum(forPlot(:,end-3));
	Averageclus2PrePHQ = clus2PrePHQ/nansum(forPlot(:,end-4));
	% clus2Plot = [mean(clus2PHQ) mean(normc(clus2Features))];

	clus3Rows = find(forPlot(:,end) == 0);
	clus3Features = forPlot(clus3Rows,1:end-5);

	clus3PostPHQ = nansum(forPlot(clus3Rows,end-3))/size(clus3Rows,1);
	clus3PrePHQ = nansum(forPlot(clus3Rows,end-4))/size(clus3Rows,1);% end-4 = pre
	Averageclus3PostPHQ = clus3PostPHQ/38;%nansum(forPlot(:,end-3));
	Averageclus3PrePHQ = clus3PrePHQ/nansum(forPlot(:,end-4));
	% clus3Plot = [mean(clus3PHQ) mean(normc(clus3Features))];