function [PL,grad_u,hessian_mat] = compute_PL_props(S,W,b,opt,Q)
%compute_PL_props computes the properties of the log-PL function
%PL - Pseudo-Likelihood
% =====================================================================================
% Input:
% S - a matrix of size m-by-N consisting of N sparsity patterns.
% W,b - the current values for the Boltzmann parameters: an interaction matrix of 
% size m-by-m and a bias vector of size m-by-1.
% opt - determines which properties of the PL function will be computed:
% opt=0 - only the value of the log-PL fumction, opt=1 - log-PL + gradient,
% opt=2 - log-PL + gradient + hessian matrix.  
% Q - a matrix consisting of the current gradient and several recent steps (normalized) 
% =====================================================================================
% Output:
% PL - the values of the log-PL function (up to an additive constant) in each iteration
% grad_u - gradient of the log-PL function with respect to the Boltzmann parameters 
% (stacked into a column vector u).
% hessian_mat - the hessian matrix with respect to the step sizes alpha, where the 
% update rule is: u^{j+1}=u^{j}+Q*alpha.
% =====================================================================================
% Tomer Faktor
% Department of Electrical Engineering
% Technion, Haifa 32000 Israel
% tomerfa@tx.technion.ac.il
%
% August 2011
% =====================================================================================
[m,N]=size(S);
p=(m^2+m)/2;
PL=0;
grad_u=zeros(p,1);
sum_S=0;
sum_phi=0;
sum2_S=0;
sum2_phi_S=0;
jump_size=1000;
Nl=ceil(N/jump_size);
if opt==2
    f_N=ceil(N/1000); % use only 1000 examples for computations of the hessian matrix
    L=size(Q,2);
    hessian_mat=zeros(L);
    hessian_aux=zeros(L);
    [r1,c1]=find(triu(ones(m),1)>0);
    inds0=1:p-m;
    inds1=r1+(inds0(:)-1)*m;
    inds2=c1+(inds0(:)-1)*m;
else
    hessian_mat=[];
end
for l=1:Nl
    S_partial=S(:,1+(l-1)*jump_size:min(l*jump_size,N));
    V1=W*S_partial+repmat(b,1,size(S_partial,2));
    PL=PL+sum(sum(rho_x(V1)-S_partial.*V1));
    if opt
        sum_S=sum_S+sum(S_partial,2);
        V2=drho_dx(V1);
        sum_phi=sum_phi+sum(V2,2);
        V3=d2rho_d2x(V1);
        sum2_S=sum2_S+S_partial*S_partial';
        sum2_phi_S=sum2_phi_S+V2*S_partial';
        if opt==2
            Nj=size(S_partial,2);
            clear V1
            clear V2
            inds_all=randperm(round(Nj));
            for j=1:floor(Nj/f_N)
                ind=inds_all(j);
                C=zeros(m,p-m);
                C(inds1)=S_partial(c1,ind);
                C(inds2)=S_partial(r1,ind); 
                B=[C,eye(m)]*Q;
                d=V3(:,ind);
                hessian_aux=hessian_aux+f_N*B'*diag(d)*B;
            end
        end
    end
end
if opt
    dPL_dW=(sum2_phi_S+sum2_phi_S')-2*sum2_S;
    dPL_dW=dPL_dW-diag(diag(dPL_dW));
    dPL_db=sum_phi-sum_S;
    grad_u=conv_Wb2u(dPL_dW,dPL_db);
    if opt==2
        hessian_mat=hessian_aux;
    end
end

%================================================
function f=rho_x(x)

f=log(cosh(x));

return;
%================================================
function df=drho_dx(x)

df=tanh(x);

return;
%================================================
function d2f=d2rho_d2x(x)

d2f=1-(tanh(x)).^2;

return;
%================================================
function u = conv_Wb2u(W,b)

m=numel(b);
inds=find(triu(ones(m),1)>0);
u=W(inds(:));
u=[u;b];

return;