addpath("../Matlabcodes/approximate-embedded-markov/ConstrainedOptimization/")
addpath("../Matlabcodes/approximate-embedded-markov/Utilities/")
addpath("../Matlabcodes/approximate-embedded-markov/Manifold/")
addpath("../Matlabcodes/approximate-embedded-markov/Matrices/")
here = pwd;
cd ../Matlabcodes/approximate-embedded-markov/manopt/
importmanopt;
cd(here);

Manopt was added to Matlab's path.
Path not saved: please re-call importmanopt next time.

P = [0.5000 0.3750 0.1250
0.7500 0.1250 0.1250
0.0833 0.0417 0.8750];
ev = eig(P)

ev = 3x1 double
   -0.2500
    1.0000
    0.7500

sqrtm(P)

ans = 
   0.6220 + 0.1667i   0.3110 - 0.1667i   0.0670 + 0.0000i
   0.6220 - 0.3333i   0.3110 + 0.3333i   0.0670 - 0.0000i
   0.0446 + 0.0000i   0.0224 - 0.0000i   0.9330 + 0.0000i

P = @(a) [1-2*a,1+a,1+a;1+a,1-2*a,1+a;1+a,1+a,1-2*a]/3;
sqrtm(P(1/6))

ans = 
   0.3333 + 0.2722i   0.3333 - 0.1361i   0.3333 - 0.1361i
   0.3333 - 0.1361i   0.3333 + 0.2722i   0.3333 - 0.1361i
   0.3333 - 0.1361i   0.3333 - 0.1361i   0.3333 + 0.2722i

X = @(a) [1,1+sqrt(3*a),1-sqrt(3*a);1-sqrt(3*a),1,1+sqrt(3*a);1+sqrt(3*a),1-sqrt(3*a),1]/3;
norm(P(1/6)-X(1/6)*X(1/6))

ans = 1.0834e-16

function [Y,g] = objective(X,A,p)
%%OBJECTIVE is the objective function to minimize

n = sqrt(length(X));
Xm = reshape(X,n,n);
Xp = mpower(Xm,p);
Y = 0.5*norm(Xp(:)-A(:),"fro")^2;

if nargout > 1 % Gradient is required
    F = Xp - A;
    g = zeros(n,n);
    for j=1:p
        g = g + mpower(Xm',j-1)*F*mpower(Xm',p-j);
    end
end

end

function y = hessian(x,lambda,v,A,p)
%HESSIAN implementation, this uses the matrix formulation and a number of conversion between matrix and vector arguments
n = size(A,1);      % Size of the problem
X = reshape(x,n,n); % Current point
E = reshape(v,n,n); % Evaluation matrix
F = mpower(X,p)-A; Y = zeros(n,n); % Hessian (free objective)
for j=1:p
    S = zeros(n,n);
    for l=1:p-j
        S = S+mpower(X',p-j-l)*E'*mpower(X',l-1);
    end
    Y = Y + mpower(X',j-1)*F*S;
    S = zeros(n,n); % Second term
    for k=1:p
        S = S + mpower(X,p-k)*E*mpower(X,k-1)*mpower(X',p-j);
    end
    Y = Y + mpower(X',j-1)*S;
    for i=1:j-1
        Y = Y + mpower(X',j-1-i)*E'*mpower(X',i-1)*F*mpower(X',p-j);
    end
end % Constraints that is always empty in our case.
l1 = lambda.ineqnonlin; l2 = lambda.eqnonlin;
y = Y(:);% Final Conversion
end

function [X,output,history] = approximatepower(A,p,A0,tol,maxIterations,varargin)
%%APPROXIMATEPOWER solves the constrained optimization problem
%   X = arg min || X^p - A ||_F
% requiring X to be row-stochastic, i.e., X >= 0 and X 1 = 1.
% The optional argument can be either 'FD' to use finite difference
% approximations of Hessian and Gradients, or 'ANALYTICAL' to use the one computed analytically.

end

X0 = rand(3,3);
X0 = X0./sum(X0,2);
[Xapprox,output,history] = approximatepower(P(1/6),2,X0,1e-6,500,'analytical');
disp(Xapprox)
disp(P(1/6))
disp(norm(Xapprox*Xapprox-P(1/6),"fro"))

    0.3337    0.5657    0.1005
    0.0943    0.3363    0.5695
    0.5720    0.0980    0.3300

    0.2222    0.3889    0.3889
    0.3889    0.2222    0.3889
    0.3889    0.3889    0.2222

   2.8422e-07

[v,l] = eigs(Xapprox',1,'largestabs');
disp(v.'./sum(v))

    0.3333    0.3333    0.3333

load("GD96_c.mat")
A = Problem.A;
G = graph(A,'omitselfloops');
n = size(A,1);
D = spdiags(G.degree,0,n,n);
P = D\A;
X0 = rand(n,n); % Optimization step
X0 = X0./sum(X0,2);
[Xapprox,output,history] = approximatepower(P,2,X0,1e-6,1000,'analytical');
disp(norm(Xapprox*Xapprox - P,"fro"))

    2.0209

[pitrue,l] = eigs(P',1,'largestabs'); pitrue = pitrue.'./sum(pitrue);
[pi,l] = eigs(Xapprox',1,'largestabs'); pi = pi.'./sum(pi);
figure("Position",[947 761 871 323]); semilogy(1:n,pitrue,'o',1:n,pi,'x')
legend('Steady state','Approximate root steady state')

function M = multinomialfixedstochasticfactory(pi,optionsolve)

n = length(pi);
e = ones(n, 1);
M.name = @() sprintf(['%dx%d row-stochastic matrices with positive ' ...
    'entries and fixed left-stochastic eigenvector'], n, n);
M.dim = @() (n-1)^2;
% Fisher metric
M.inner = @iproduct;
    function ip = iproduct(X, eta, zeta)
        ip = sum((eta(:).*zeta(:))./X(:));
    end
M.norm = @(X, eta) sqrt(M.inner(X, eta, eta));
end

M.proj = @projection;
    function etaproj = projection(X, eta) % Projection of the vector eta in the ambient space onto the tangent space
        b = [sum(eta, 2) ; eta'*pi];
        [alpha, beta] = mylinearsolve(X, b);
        etaproj = eta - (alpha*e' + pi*beta').*X;
    end

% Conversion of Euclidean to Riemannian gradient
M.egrad2rgrad = @egrad2rgrad;
    function rgrad = egrad2rgrad(X, egrad) % projection of the euclidean gradient
        mu = (X.*egrad);
        b = [sum(mu, 2) ; mu'*pi];
        [alpha, beta] = mylinearsolve(X, b);
        rgrad = mu - (alpha*e' + pi*beta').*X;
    end

M.ehess2rhess = @ehess2rhess;
    function rhess = ehess2rhess(X, egrad, ehess, eta)
        % Computing the directional derivative of the Riemannian
        % gradient
        gamma = egrad.*X;
        gammadot = ehess.*X + egrad.*eta;
        bdot = [ gammadot*e ; gammadot.'*pi];
        b = [gamma*e ; gamma.'*pi];
        [alpha, beta] = mylinearsolve(X, b);
        S1 = [ zeros(size(eta)) , diag(pi)*eta; eta'*diag(pi) , diag(eta'*diag(pi)*pi) ];
        [alphadot, betadot] = mylinearsolve(X, bdot - S1*[alpha; beta]); % %- [eta*beta; eta'*alpha]
        S = (alpha*e' + pi*beta');
        deltadot = gammadot - (alphadot*e' + pi*betadot').*X- S.*eta; % rgraddot
        % Computing Riemannian gradient
        delta = gamma - S.*X; % rgrad
        % Riemannian Hessian in the ambient space
        nabla = deltadot - 0.5*(delta.*eta)./X;
        % Riemannian Hessian on the tangent space
        rhess = projection(X, nabla);
    end

M.retr = @retraction;
    function Y = retraction(X, eta, t)
        if nargin < 3
            t = 1.0;
        end
        Y = X.*exp(t*(eta./X));
        Y = modifiedsinkhorn(Y,pi);
        Y = max(Y, eps);
    end

function [B,u,v] = modifiedsinkhorn(A,pi,maxit,checkperiod)
N = size(A,1);
tol = eps(N);
Ahat = diag(pi)*A;
iter = 0; % Number of iteration
u = ones(N,1); v = ones(N,1); e = v; % Initialize u and v
while iter < maxit
    iter = iter + 1; % previous for safekeeping
    u_prev = u;
    v_prev = v;
    row = Ahat*v;
    u = pi./(row); % update u and v
    v = pi./(Ahat'*u);
    if mod(iter, checkperiod) == 0 % Check if converged
        gap = abs(u'*row - 1);
        if isnan(gap)
            break;
        end
        if gap <= tol
            if norm(diag(u)*A*diag(v)*e-e,"inf") < tol && ...
                    norm(pi'*diag(u)*A*diag(v)-pi',"inf") < tol
                break;
            end
        end
    end

if any(isinf(u)) || any(isnan(u)) || any(isinf(v)) || any(isnan(v))
        warning('DoublyStochasticProjection:NanInfEncountered', ...
            'Nan or Inf occured at iter %d. \n', iter);
        u = u_prev;
        v = v_prev;
        break;
    end
end
 % The matrix we want is built from A as in the Theorem
B = diag(u)*A*diag(v);
end

function [alpha, beta] = mylinearsolve(X, b)
switch upper(optionsolve.formulation)
    case "BLOCK"
        % 2 x 2 block formulation
        switch upper(optionsolve.method)
            case "CG"
                [zeta, flag, res, iter,resvec] = ...
                    pcg(@(x) mycompute(x,true), b, 1e-6, 2*n,...
                    [],[],b);
            case "PCG"
                [zeta, flag, res, iter,resvec] = ...
                    pcg(@(x) mycompute(x,true), b, 1e-6, 2*n,...
                    @(x) myprec(x,true),[],b);
            case "LSQR"
                [zeta, flag, res, iter,resvec] = ...
                    lsqr(@mycompute, b, 1e-6, 2*n);
            case "DIRECT"
                S1 = [eye(size(X)) diag(pi)*X;
                    X.'*diag(pi) diag(X.'*(diag(pi)*pi))];
                S1 = S1 + (1/(pi'*pi + n))*[pi;-e]*[pi',e'];
                zeta = S1\b;
                flag = 0;
                iter = 0;
                res = norm(S1*zeta-b,2)/norm(b,2);
            otherwise
                error("Manopt:unknown_linear_solver")
        end

case "SCHUR"
    bschur = b(n+1:end,1) - X.'*diag(pi)*b(1:n,1);
    % We solve here the system in its reduced formulation
    switch upper(optionsolve.method)
        case "CG"
            [zetaschur, flag, res, iter,resvec] = ...
                pcg(@(x) mycompute_schur(x,true), bschur, ...
                1e-6, 2*n, [], [], bschur);
        case "PCG"
            L = ichol(sparse(diag(X.'*(diag(pi)*pi)) - X.'*(pi.^2.*(X))), ...
                struct('type','ict', ...
                'droptol',optionsolve.threshold, ...
                'michol','on', ...
                'diagcomp',min(pi)));
            [zetaschur, flag, res, iter,resvec] = ...
                pcg(@(x) mycompute_schur(x,true), bschur, ...
                1e-6, 2*n, L, L', bschur);
        case "PCG2"
            Donehalf = spdiags(1./sqrt((X.'*(diag(pi)*pi))),0,n,n);
            P = (Donehalf*X')*spdiags(pi.^2,0,n,n)*(X*Donehalf);
            P(abs(P) < optionsolve.threshold) = 0;
            P = sparse(P);
            PrecNeu = @(x) neumannseries(P,x,optionsolve.kappa);
            [zetaschur, flag, res, iter,resvec] = ...
                pcg(@(x) mycompute_schur2(x,Donehalf,true), Donehalf*bschur, ...
                1e-6, 2*n, PrecNeu, [], Donehalf*bschur);
            zetaschur = diag(1./sqrt((X.'*(diag(pi)*pi))))*zetaschur;

case "LSQR"
            [zetaschur, flag, res, iter,resvec] = ...
                lsqr(@mycompute_schur, bschur, ...
                1e-6, 2*n);
        case "DIRECT"
            S1schur = diag(X.'*(diag(pi)*pi)) - X.'*diag(pi.^2)*X + 0.005*ones(n,n)/n;
            zetaschur = S1schur\bschur;
            flag = 0;
            iter = 0;
            res = norm(S1schur*zetaschur-bschur,2)/norm(bschur,2);
        otherwise
            error("Manopt:unknown_linear_solver")
    end
    % reconstruct the whole vector
    zeta = [b(1:n,1) - pi.*(X*zetaschur);...
        zetaschur];
otherwise
    error("Manopt:unknown_linear_formulation")
end

function Ax = mycompute(x,flag)
    xtop = x(1:n,1);
    xbottom = x(n+1:end,1);
    Axtop = xtop + diag(pi)*X*xbottom;
    Axbottom = X'*diag(pi)*xtop + diag(pi'*diag(pi)*X)*xbottom;
    Ax = [Axtop; Axbottom];
end
function Ax = mycompute_schur(x,flag)
    Ax = (X.'*(diag(pi)*pi)).*x - X.'*(pi.^2.*(X*x));
end
function Ax = mycompute_schur2(x,D,flag)
    y = D*x;
    Ax = x - D*(X.'*(pi.^2.*(X*y)));
end

function Ax = myprec(x,flag)
            xtop = x(1:n,1);
            xbottom = x(n+1:end,1);
            Axtop = xtop;
            Axbottom = diag(X'*diag(pi)*pi)\xbottom;
            Ax = [Axtop; Axbottom];
        end
        function y = neumannseries(P,x,k)
            y = x;
            if k <= 0
                return
            else
                for j =1:k
                    y = y + P*x;
                end
            end
        end
        alpha = zeta(1:n, 1);
        beta = zeta(n+1:end, 1);
    end

clear; clc; close all;
load('Sandi_authors.mat');
A = Problem.A;
D = diag(sum(A,2));
A = full(D\A);
n = size(A,1);
e = ones(n,1);
p = 2; % What root do we want?
tol = 1e-6; % Optimization parameters
maxIterations = 1000;
manifold = multinomialfactory(n,n); % These are column stochastic!
X0 = manifold.rand();
clear manifold

%% Constrained Optimization
[X1,output1,history1] = approximatepower(A,p,X0,tol,maxIterations,"ANALYTICAL");
figure(1)
semilogy(1:length(history1.targetFunctionValues),history1.targetFunctionValues, ...
    'b-','LineWidth',2)
legend('Constrained Optimization')

manifold = multinomialfactory(n,n); % These are column stochastic!
problem.M = manifold;
problem.cost = @(x) 0.5*cnormsqfro(mpower(x,p).'-A);
problem = manoptAD(problem);
options.verbose = 0;
options.tolgradnorm = 1e-5;
options.maxiter = 100;
options.verbosity = 0;
[X2, xcost2, info2] = trustregions(problem,X0,options);

figure(1)
semilogy(1:length(history1.targetFunctionValues),history1.targetFunctionValues, ...
    'b-',...
    [info2.iter],[info2.cost],'b--','LineWidth',2);
legend('Constrained Optimization','Riemannian Trust-Region')
clear manifold problem options

[pi,~] = eigs(A.',1,'largestabs');
pi = pi./sum(pi);
optionsolve = initoptions();
optionsolve.method = "direct";
optionsolve.verbose = false;
M = multinomialfixedstochasticfactory(pi,optionsolve);
X0 = M.rand(); % Initial guess
problem.M = M;
problem.cost = @(x) 0.5*cnormsqfro(mpower(x,p)-A);
problem = manoptAD(problem);
options.tolgradnorm = 1e-5;
options.verbosity = 1;
options.maxiter = 100;
[X3, xcost3, info3, options] = trustregions(problem,X0,options);

Max iteration count reached; options.maxiter = 100.
Total time is 3.443367 [s] (excludes statsfun)

figure(1)
semilogy(1:length(history1.targetFunctionValues),history1.targetFunctionValues, ...
    'b-',...
    [info2.iter],[info2.cost],'b--', ...
    [info3.iter],[info3.cost],'r-','LineWidth',2);
legend('Constrained Optimization','Riemannian Trust-Region','RTS preserving steady state')

[pi3,~] = eigs(X3.',1,'largestabs');
pi3 = pi3/sum(pi3);
figure(2)
semilogy(1:length(pi),pi,'o',1:length(pi3),pi3,'x')

clear all;
data = [0.8910 	0.0963 	0.0078 	0.0019 	0.0030 	0.0000 	0.0000 	0.0000
    0.0086 	0.9010 	0.0747 	0.0099 	0.0029 	0.0029 	0.0000 	0.0000
    0.0009 	0.0291 	0.8894 	0.0649 	0.0101 	0.0045 	0.0000 	0.0009
    0.0006 	0.0043 	0.0656 	0.8427 	0.0644 	0.0160 	0.0018 	0.0045
    0.0004 	0.0022 	0.0079 	0.0719 	0.7764 	0.1043 	0.0127 	0.0241
    0.0000 	0.0019 	0.0031 	0.0066 	0.0517 	0.8246 	0.0435 	0.0685
    0.0000 	0.0000 	0.0116 	0.0116 	0.0203 	0.0754 	0.6493 	0.2319
    0.0000 	0.0000 	0.0000 	0.0000 	0.0000 	0.0000 	0.0000 	1.0000];
A = diag(sum(data,2))\data;
gam = 1e-4;
E = ones(size(A))./size(A,1);
B = gam*E + (1-gam)*A;
pi = pvgth(B); % Computation of the stationary vector:

% Build the manifold
optionsolve = initoptions();
optionsolve.correction = true;
option.verbose = 0;
M = multinomialfixedstochasticfactory(pi,optionsolve);

X0 = diag(sum(triu(ones(size(A))),2))\triu(ones(size(A))); % Initial guess
X0 = gam*E + (1-gam)*X0;
X0 = modifiedsinkhorn(X0,pi,100);

problem.M = M;
problem.cost = @(x) 0.5*cnormsqfro(mpower(x,2)-A);
problem = manoptAD(problem);
options.tolcost = 1e-3;
options.verbosity = 0;
options.Delta_bar = 22;
[X1, xcost, info, options] = rlbfgs(problem,X0,options);

rA = approximatepower(A,2,X0,1e-6,1000);

figure("Position",[3103 842 1113 365])
subplot(1,2,1)
heatmap(X1)
title("Riemannian L-BFGS")
subplot(1,2,2)
heatmap(rA);
title("Constrained Optimization (Trustregion)")

`data(i,j)`	`AAA`	`AA`	`A`	`BBB`	`BB`	`B`	`CCC`	`D`
`AAA`	0,891	0,0963	0,0078	0,0019	0,003	0	0	0
`AA`	0,0086	0,901	0,0747	0,0099	0,0029	0,0029	0	0
`A`	0,0009	0,0291	0,8894	0,0649	0,0101	0,0045	0	0,0009
`BBB`	0,0006	0,0043	0,0656	0,8427	0,0644	0,016	0,0018	0,0045
`BB`	0,0004	0,0022	0,0079	0,0719	0,7764	0,1043	0,0127	0,0241
`B`	0	0,0019	0,0031	0,0066	0,0517	0,8246	0,0435	0,0685
`CCC`	0	0	0,0116	0,0116	0,0203	0,0754	0,6493	0,2319
`D`	0	0	0	0	0	0	0	1

Stochastic $p$th root approximation of a stochastic matrix: A Riemannian optimization approach¶

The problem we want to solve¶

Counterexamples¶

Non-primary (stochastic) roots¶

Pathologies, pathologies, and more pathologies$\ldots$¶

So let's approximate it!¶

A constrained optimization problem¶

A constrained optimization problem¶

MATLAB Routines (objective)¶

MATLAB Routines (Hessian)¶

Putting all together¶

An example¶

Another example¶

Visualize the steady state¶

Preserving the steady state¶

The Riemannnian structure¶

The tangent space¶

The Fisher metric¶

Implementing Manifolds in MANOPT¶

Orthogonal projection¶

Orthogonal projection (computation)¶

Riemannian gradient¶

Riemannian gradient (computation)¶

Riemanian Hessian - Levi-Civita connection¶

Riemanian Hessian - the Hessian¶

Riemanian Hessian - the Hessian¶

Riemannian Hessian (computation)¶

Retractions - Sinkhorn-Knopp algorithm¶

Retractions - Definition¶

Retractions - Auxiliary Result¶

Retractions - Construction¶

Retractions - The implemented one¶

Modified Sinkhorn-Knopp implementation¶

Modified Sinkhorn-Knopp implementation (continued)¶

Computational issues a.k.a how-to solve the linear systems¶

Possible approaches¶

Possible approaches - Preconditioners¶

Implementation of mylinearsolve¶

Implementation of mylinearsolve (continued)¶

Implementation of mylinearsolve (continued)¶

Implementation of mylinearsolve (continued)¶

Implementation of mylinearsolve (continued)¶

Back to our sparse matrix example¶

Let us solve it by constrained optimization¶

And now we solve it on the manifold¶

Convergence history¶

And then we solve it on the new manifold¶

Convergence history¶

Looking at the steady state¶

The reducible case¶

An example¶

Idea: let's "PageRank"¶

Code¶

Solve it on the manifold¶

Constrained optimization and visualization¶

Implementation of `mylinearsolve`¶

Implementation of `mylinearsolve` (continued)¶

Implementation of `mylinearsolve` (continued)¶

Implementation of `mylinearsolve` (continued)¶

Implementation of `mylinearsolve` (continued)¶