dream_merge_split.m 5.26 KB
Newer Older
Raghvendra Mall's avatar
Raghvendra Mall committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115
addpath('Evaluation_Metrics/conductance');
addpath('Evaluation_Metrics/graph_metrics');
warning('off');

location = 'Louvain_Results';
filename = '5_cancer_anonym_v2';

%% Get network, adjacency matrix and connectivity matrix
[flag,filename,network,Nrows] = load_network(filename);
A  =  get_adjacency(network);
%Create 4-step random network connectivity matrix (for biological networks)
%[K,N_nodes] = create_connectivity_matrix(A);
method = 'Louvain';

%% Create the output directories
mkdir('Louvain_Results');
mkdir('Final_Results');

%% Run the Louvain method for iteration 0
disp('Run the Louvain method');
iter=0;
%run_louvain(network,A,filename,iter,location);

%% Analyize results of first iteration and get best level of hierarchy
load([location,'/Hierarchy_',method,'_',filename,'_',num2str(iter),'.mat']);
outputlabels = hierarchy_list;
noh = size(hierarchy_list,2);
type = 'worst';
[F_score,best_level_hierarchy,Q_prev] = quality_evaluation(LQ,LCC,hierarchy_list,K,type);
clear hierarchy_list LQ LCC LNOC;

%% Start the steps of iterative Louvain method for clusters of size > 100
copylabels = outputlabels;
max_noc = length(unique(outputlabels(:,best_level_hierarchy)));
c_s_info = cluster_size_information(outputlabels,best_level_hierarchy);
prev_rem_c_s_info=[0 0];
max_iter = 1;
while(1)
    iter = 0;
    [rem_c_s_info,final_outputlabels] = find_rem_c_s(c_s_info,max_noc,outputlabels,N_nodes,best_level_hierarchy);
        
    %% Break condition for the iterative Louvain clustering
    if (isempty(rem_c_s_info) || (sum(prev_rem_c_s_info(:,2))-sum(rem_c_s_info(:,2)))==0 || max_iter>=5)
        break;  
    end
    
    %% For the remaining generate clusters run Louvain method
    iter = generate_iterative_clusters(rem_c_s_info,outputlabels,best_level_hierarchy,A,filename,iter,location);
    
    %% Start operating over the remaining clusters and keep best level of hierarchy for subclusters
    [concatenate_labels,bad_indices] = find_large_clusters(iter,outputlabels,best_level_hierarchy,rem_c_s_info,method,filename,K,max_noc,final_outputlabels,location);
    outputlabels(bad_indices,best_level_hierarchy) = concatenate_labels;
    outputlabels = transform_labels(outputlabels,noh);
    
    %% Get new Q and max number of clusters
    max_noc = max(outputlabels(:,best_level_hierarchy));
    newQ = calculate_modularity(A,outputlabels(:,best_level_hierarchy));
    if (newQ<(Q_prev/2))
        break;
    end
    prev_rem_c_s = c_s_info;
    c_s_info = cluster_size_information(outputlabels,best_level_hierarchy);
    max_iter = max_iter+1;
    clear concatenate_labels bad_indices;
end
copylabels(:,best_level_hierarchy) = outputlabels(:,best_level_hierarchy);

%% Connectivity score promotes smaller clusters so if wieghted version increases it means we have to merge
%% See the levels of hierarchy and identify clusters with best weighted connectivity score
noh = size(copylabels,2);
disp('Merging communities between hierarchies based on weighted connectivity scores');
final_labels = merge_step(copylabels,N_nodes,best_level_hierarchy,noh,K);
disp('Finished the merge step');

%% Calculate connectivity information for modified set of clusters
copylabels(:,best_level_hierarchy) = final_labels;
[copylabels] = transform_labels(copylabels,noh);
[connectivity_clusters_per_level,cluster_size_per_level] = calculate_connectivity(copylabels,noh,K);


%% Get id of clusters with size > 100 and assess them (break-up) based on F_score 
mink=3;
maxk=100;
copylabels = split_step(copylabels,best_level_hierarchy,mink,maxk,connectivity_clusters_per_level,cluster_size_per_level,...
    N_nodes,A,K);
disp('Identified optimal communities using proposed F-score');

%% Get info about final set of clusters after merge and break-up
[copylabels] = transform_labels(copylabels,noh);
[connectivity_clusters_per_level,c_s_per_level] = calculate_connectivity(copylabels,noh,K);
conductance_best_level = cutcond(A,copylabels(:,best_level_hierarchy));
connectivity_best_level = connectivity_clusters_per_level{best_level_hierarchy}(:,2);
confidence_best_level = [conductance_best_level./(connectivity_best_level.*(c_s_per_level{best_level_hierarchy}(:,2).^2))];
confidence_best_level(confidence_best_level==Inf) = max(confidence_best_level(confidence_best_level~=Inf));

csvwrite([location,'/Hierarchy_',method,'_',filename,'_best_list.csv'],copylabels);
csvwrite([location,'/Confidence_',filename,'_best_list.csv'],[c_s_per_level{best_level_hierarchy}(:,1) confidence_best_level c_s_per_level{best_level_hierarchy}(:,2)]);
clear final_labels K conductance_best_level connectivity_best_level;

%% Estimate the best possible threshold for selection of communities
intervals = 100;
final_confidence_threshold_value = estimate_best_threshold(copylabels,best_level_hierarchy,confidence_best_level,A,mink,maxk,Q_prev,intervals);
csvwrite([location,'/Confidence_',filename,'_best_value.csv'],[final_confidence_threshold_value,best_level_hierarchy]);
clear final_confidence_threhold_value;

%% Generate final result and put it in Final Result folder
disp('Generate the final optimum result and place in Final_Results folder');
command = ['python final_submission.py ',location,' ',filename,' ',method];
system(command);
final_location = 'Final_Results';
command = ['mv ',location,'/*final* ',final_location,'/'];
system(command);
varlist = who;
clear(varlist{:});
clear varlist;