function [Ptys,Ptx,trace]= ibsi_sequ(Pxys,parms); % % Function: IBSI_SEQU % Cluster X into T, to maxmizes the weighted informations target % function: w_0 I(X;T) + \sum_i w_i I(X;Y_i). The function implements % the sequential IBSI algorithm. % % Usage: [Ptys,Ptx,trace]= ibsi_sequ(Pxys,parms); % % Algorithm: % 1. Initialize randomly to K clusters. % 2. Take an element out of its cluster % 3. Score all alternative assignments of of the element to clusters. % and choose the one that maximizes the target function. % 4. Repeat 2-3 until convergence. % % Inputs: % Pxys - A cell array with joint probability matrices. % all matrices Pxys{:} must have the same number of % rows=n(X). % parms - a structure with the following fields: % w_vec - Weights of information terms. Negative values % correspond to side info matrices. % w_0 - weight of the compression term. (equal 1/beta) % n_clusters - Number of clusters. % n_starts - The no of starting points. algorithm is repeated % n_starts times and the best one is returned. % default is 1. % max_repeats - The maximal no of steps per starting point. % init_assign - An initialization suggestion. (optional) % once_in - Printing flag. print and save results after % 'once_in' steps. (default is 1000); % stop_tol - Stopping tolerance criterion. stop when the % total improvement of the target function over % the last 100 steps is smaller than stop_tol. % Outputs: % Ptys - Matrices P(T,Y) after compression % Ptx - The hard clusternig solution. P(t,x) >0 % when x is assigned to the cluter t. % trace - a structure that gathers data along the run. % Each of the following fields is a cell array of size % of size n_starts x max_repeats/once_in % score - target function vale. recorded every step. % assign - hard-clustering assignment. recorded every % 'once_in' steps. - a cell array. % Is - vector of weighted Information terms for each % Pxys{i}, recorded every 'once_in' steps. % The variables that are recorded every 'once_in' % steps, are also recorded on the last step. % % Change log: % 2005 Nov 02 - sign of w_0 was inconsistent in documentation and code. % % % Additional options: % choose_method- The method For choosing the next candidate % element to be moved. (optional. Default is % move_sequentialy). % % (C) Gal Chechik 2003. % % http://robotics.stanford.edu/~gal/ % % If you use this code, please cite as % % G. Chechik and N. Tishby, Extracting relevant structures with side % information. In Becker, S., Thrun, S., Obermayer, K., eds.: Advances % in Neural Information Processing Systems-15 MIT press (2003) % %@InProceedings{chechik02extracting, % author = "G. Chechik and N. Tishby", % title = "Extracting relevant structures with side information", % Editor = "S. Becker and S. Thrun and K. Obermayer", % booktitle = "Advances in Neural Information Processing Systems % 15 (NIPS-2002)" % year = "2003", %} if(nargin<2) error('illegal nargin to IBSI_SEQU. Use help ibsi_sequ.'); end % Check inputs if(isfield(parms,'n_clusters')); n_c = parms.n_clusters; else error('missing field parms.n_clusters'); end if(isfield(parms,'w_vec')); w_vec = parms.w_vec; else error('missing field parms.w_vec'); end if(isfield(parms,'n_starts')); n_starts = parms.n_starts; else n_starts=1; end if(isfield(parms,'max_repeats')); max_repeats = parms.max_repeats; else error('missing field parms.max_repeats'); end if(isfield(parms,'choose_method')); choose_method = parms.choose_method; else choose_method = 'move_sequent___'; end if(isfield(parms,'once_in')); once_in = parms.once_in; else once_in = 1000; end if(isfield(parms,'init_assign')); init_assign = parms.init_assign; else init_assign=[]; end if(isfield(parms,'stop_tol')); stop_tol = parms.stop_tol; else stop_tol=0; end % Init disp('IBSI_SEQU: Enter') trace.score = zeros(n_starts,1+max_repeats); trace.Is = cell(n_starts,1+ceil(max_repeats/once_in)); n_x = size(Pxys{1},1); n_y = length(Pxys); for(i_y=1:n_y) sss = sum(sum(Pxys{i_y})); Pxys{i_y} = Pxys{i_y}/sss; Pys{i_y} = sum(Pxys{i_y},1); Pxs{i_y} = sum(Pxys{i_y},2); end n_back=100; tic; % % Repeate the annealing procedure % =============================== for(i_start=1:n_starts) disp(sprintf('IBSI_SEQU: start #%d',i_start)) t1 = cputime; % Init assigns if(isempty(init_assign)) disp('Use random assignment'); n_per_clust = ceil(n_x / n_c); seq_assign= repmat([1:n_c],1,n_per_clust); inds = randperm(n_x); assign= seq_assign(inds); else disp('Use initial assignment'); assign= init_assign; end % Init Ptx Ptx = zeros(n_c,n_x); for(i_x=1:n_x) i_c = assign(i_x); Ptx(i_c,i_x)=1/n_x; end % Init Ptys Ptys = cell(size(Pxys)); for(i_y=1:n_y) Pty = zeros(n_c,size(Pxys{i_y},2)); for(i_c=1:n_c) xs = find(assign==i_c); Pty(i_c,:) = sum(Pxys{i_y}(xs,:),1); Pts{i_y}(i_c) = sum(Pty(i_c,:)); end Ptys{i_y} = Pty; end % Calc score of all current clusters clust_scores = score_all(Pxys,Pys,Pxs,w_vec,assign); if(parms.w_0>eps) cmprs_score = MI2naive(Ptx)*parms.w_0; score = cmprs_score + sum(clust_scores); else score = sum(clust_scores); end working_cluster=1; i_record=0; % Optimization loop for(i_repeat = 1:max_repeats) switch(choose_method) case 'move_repeatedly', % Move an object from working to another cluster i_z = working_cluster; focus_x = find(assign==i_z); i_x = focus_x(ceil(rand(1)*length(focus_x))); case 'move_randomly__', % Move a random object i_x = ceil(rand(1)*n_x); i_z = assign(i_x); case 'move_sequent___', % inline function of mod i_x = i_repeat - n_x*floor(i_repeat/n_x); if(i_x==0) i_x = n_x; end; i_z = assign(i_x); otherwise, choose_method error('illegal choose_method'); end % Loop over the new cluster before_scr1 = clust_scores(i_z); if(parms.w_0>eps) cmprs_score = MI2naive(Ptx)*parms.w_0; else cmprs_score=0; end new_assign=assign;new_assign(i_x)=-1; for(i_y=1:n_y) Pys_gc{i_y} = Ptys{i_y}(i_z,:) - Pxys{i_y}(i_x,:); if(sum(Pys_gc{i_y})>0) Pys_gc{i_y} = Pys_gc{i_y} / sum(Pys_gc{i_y} ); else % Cluster turns empty when x is taken out Pys_gc{i_y}=0; end Pt(i_y) = Pts{i_y}(i_z) - Pxs{i_y}(i_x); end after_scr1 = ... score_single_withPty(Pxys,Pys_gc,Pys,Pxs,w_vec,Pt); payoffs(i_z) = 0; for(new_z=[1:i_z-1,i_z+1:n_c]) new_assign= assign; new_assign(i_x) = new_z; % Recalc score if(parms.w_0>eps) new_Ptx = Ptx; new_Ptx(new_z,i_x)=Ptx(i_z,i_x); new_Ptx(i_z, i_x)=0; new_cmprs_score = MI2naive(new_Ptx)*parms.w_0; else new_cmprs_score=0; end before_scr2 = clust_scores(new_z); for(i_y=1:n_y) Pys_gc{i_y} = Ptys{i_y}(new_z,:) + Pxys{i_y}(i_x,:); Pys_gc{i_y} = Pys_gc{i_y} / sum(Pys_gc{i_y} ); Pt(i_y) = Pts{i_y}(new_z) + Pxs{i_y}(i_x); end after_scr2 = ... score_single_withPty(Pxys,Pys_gc,Pys,Pxs,w_vec,Pt); % Check if accept the change payoffs(new_z) = ... + after_scr1 + after_scr2 ... - before_scr1 - before_scr2... - new_cmprs_score + cmprs_score; end % Change to the best [best_payoff, best_z] = max(payoffs); % Do the change if(best_payoff>0) new_assign= assign; new_assign(i_x) = best_z; % Recalc score if(parms.w_0>eps) new_Ptx = Ptx; new_Ptx(best_z,i_x)=Ptx(i_z,i_x); new_Ptx(i_z, i_x)=0; cmprs_score= MI2naive(Ptx)*parms.w_0; new_cmprs_score = MI2naive(new_Ptx)*parms.w_0; Ptx = new_Ptx; else cmprs_score=0; new_cmprs_score=0; end for(i_y=1:n_y) Pys_gc{i_y} = Ptys{i_y}(best_z,:) + Pxys{i_y}(i_x,:); Pys_gc{i_y} = Pys_gc{i_y} / sum(Pys_gc{i_y} ); Pt(i_y) = Pts{i_y}(best_z) + Pxs{i_y}(i_x); end after_scr2 = ... score_single_withPty(Pxys,Pys_gc,Pys,Pxs,w_vec,Pt); clust_scores(i_z) = after_scr1; clust_scores(best_z) = after_scr2; cmprs_score = new_cmprs_score; working_cluster = best_z; score = score + best_payoff; assign= new_assign; for(i_y=1:n_y) Ptys{i_y}(best_z,:)=Ptys{i_y}(best_z,:)+Pxys{i_y}(i_x,:); Ptys{i_y}(i_z,:) =Ptys{i_y}(i_z,:) -Pxys{i_y}(i_x,:); Pts{i_y}(best_z) = Pts{i_y}(best_z) + Pxs{i_y}(i_x); Pts{i_y}(i_z) = Pts{i_y}(i_z) - Pxs{i_y}(i_x); end else % dont move the object end % Printing if(ceil(i_repeat/once_in)*once_in==i_repeat) % record status i_record=i_record+1; trace.Is{i_start,i_record} = clust_scores; trace.assign{i_start,i_record}= assign; % Print str1 = '%5d %4d/%2d->%2d '; str2 = ' D=%+10.8f SCR=%10.6f '; str3 = ' %4.2fmin'; disp(sprintf([str1 str2 str3], i_repeat, i_x, i_z,... best_z, best_payoff, score, toc/60)); end trace.score(i_start,i_repeat) = score; % Check stopping criterion if(i_repeat>n_back) if(abs(score-trace.score(i_repeat-n_back))0); I = sum(sum(P(inds).*log2(P(inds)./approx(inds)))); return % ================================= function [Pty] = calc_Pty(Pxy,assign) % % % n_yi = size(Pxy,2); n_c = max(assign); Pty = zeros(n_c,n_yi); for(i_c=1:n_c) inds = find(assign==i_c); Pty(i_c,1:n_yi) = sum(Pxy(inds,1:n_yi),1); end return % ================================ function [score] = score_single(Pxys,Pys,Pxs,w_vec,xs) % % calculate information from a single cluster % score = 0; for(i_y = 1:length(Pxys)) Py_gc = sum(Pxys{i_y}(xs,:),1); Py_gc = Py_gc/sum(Py_gc); Py = Pys{i_y}; % Py = Py/sum(Py); nonz = find (Py_gc>0); Dkl = sum(Py_gc(nonz) .* log2(Py_gc(nonz)./Py(nonz))); Px = sum(Pxs{i_y}(xs)); score = score + Px * Dkl*w_vec(i_y); end return % ================================ function scores = score_all(Pxys,Pys,Pxs,w_vec,ass) % % calculate target function of weighted information terms % scores = zeros(1,max(ass)); for(i_clust = 1:max(ass)) xs = find(ass==i_clust); [scores(i_clust)] = score_single(Pxys,Pys,Pxs,w_vec,xs); end return % ================================ function [score] = score_single_withPty(... Pxys,Pys_gc,Pys,Pxs,w_vec,Pt); % % Calculate information from a single cluster % score = 0; for(i_y = 1:length(Pxys)) Py_gc = Pys_gc{i_y}; Py = Pys{i_y}; nonz = find (Py_gc>0); Dkl = sum(Py_gc(nonz) .* log2(Py_gc(nonz)./Py(nonz))); score = score + Pt(i_y) * Dkl*w_vec(i_y); end return