I am having trouble understanding an issue that I observe when running a rather simple bundle adjustment problem using non-linear least-squares with an analytical Jacobian as opposed to a finite-difference approximation of that. For a reason that I do not understand, running a toy problem (see below) that performs bundle adjustment with 3 cameras and a few beacons/points demonstrates expected convergence behavior when running without an analytical Jacobian. However, once I plug in the Jacobian into the same optimizer, the convergence deteriorates significantly, to a point where the optimum is not even reached anymore.
I checked the obvious problem of just having a faulty Jacobian, but I double-checked the implementation (and the literature) and it seems to be correct.
My question is - assuming that I don't have an implementation error - if there is an explanation for the behaviour that I am seeing. I am expecting the FD approximation to be different from the analytical Jacobian, at least at all combinations of $X$ and $Y$ on the Lie manifold where $[X,Y]=0$ does not hold and I would expect the FD approximation to be performing worse for this reason, but the opposite seems to be the case.
I tried to squeeze the problem setup into a single MatLab toy-problem that shows the issue, sorry if this got a little lengthy and quite condensed in the style. Any input is appreciated as I am scratching my head over that for quite some time now.
% Setup example with 3 cameras and a few beacons to reconstruct
% cameras as 3x6 matrix (3 cameras, 6-parameter axis-angle / translation)
cameras = [0.1 0 0.2 0 0 2 ;
0.1 0.3 0.1 0 0 1 ;
0 0.2 0.4 0 0 1.5];
% beacons as 8x3 matrix, 3d points with row-wise stacking
beacons = [ 0.4, -0.2, 0.1 ;
-0.1, 0.1, 0.15 ;
-0.3, 0.2, -0.1 ;
0.3, 0.4, 0.1 ;
0.25, 0.1, -0.2 ;
0.4, 0.3, 0.2 ;
0.24, -0.13, 0.2 ;
0.1, -0.3, -0.12 ];
% compute ideal measurements (perspective projection)
measurements = zeros(size(cameras,1)*size(beacons,1),2);
mnum=1;
for c=1:size(cameras, 1)
M = se3exp(cameras(c,:));
for b=1:size(beacons, 1)
x = M*[beacons(b,:),1]';
measurements(mnum, :) = [x(1)/x(3), x(2)/x(3)];
mnum = mnum + 1;
end
end
%% Use finite-difference approximation to the Jacobian for minimization
% initialize reconstructed cameras and beacons
rcameras = [0.05 0 0.15 0 0 1 ;
0.05 0.05 0.2 0 0 1 ;
0 0 0.3 0 0 1 ];
rbeacons = zeros(size(beacons));
% run unconstrained non-linear least squares and read back results
options=optimoptions('lsqnonlin','Display','iter','Algorithm','levenberg-marquardt');
func = @(x) residual(reshape(x(1:size(rcameras,1)*6),[6,size(rcameras,1)])', reshape(x(size(rcameras,1)*6+1:end),[3,size(rbeacons,1)])', measurements);
xinit = [reshape(rcameras',[size(rcameras,1)*6,1]) ; reshape(rbeacons',[size(rbeacons,1)*3,1])];
xfinal = lsqnonlin(func,xinit,[],[],options);
% read back results and align with 1st reference camera
rcameras = reshape(xfinal(1:size(rcameras,1)*6),[6,size(rcameras,1)])';
rbeacons = reshape(xfinal(size(rcameras,1)*6+1:end),[3,size(rbeacons,1)])';
[rcameras, rbeacons] = align(rcameras, rbeacons, cameras, beacons);
cameras
rcameras
%% Use analytical Jacobian for minimization
% initialize reconstructed cameras and beacons
rcameras = [0.05 0 0.15 0 0 1 ;
0.05 0.05 0.2 0 0 1 ;
0 0 0.3 0 0 1 ];
rbeacons = zeros(size(beacons));
% run unconstrained non-linear least squares and read back results
joptions=optimoptions('lsqnonlin','Display','iter','Algorithm','levenberg-marquardt','SpecifyObjectiveGradient',true);
jfunc = @(x) adapter(reshape(x(1:size(rcameras,1)*6),[6,size(rcameras,1)])', reshape(x(size(rcameras,1)*6+1:end),[3,size(rbeacons,1)])', measurements);
xinit = [reshape(rcameras',[size(rcameras,1)*6,1]) ; reshape(rbeacons',[size(rbeacons,1)*3,1])];
xfinal = lsqnonlin(jfunc,xinit,[],[],joptions);
% read back results and align with 1st reference camera
rcameras = reshape(xfinal(1:size(rcameras,1)*6),[6,size(rcameras,1)])';
rbeacons = reshape(xfinal(size(rcameras,1)*6+1:end),[3,size(rbeacons,1)])';
[rcameras,rbeacons] = align(rcameras, rbeacons, cameras, beacons);
cameras
rcameras
%% Function to align cameras/beacons to the reference for comparison
function [cams, beacs] = align(cameras, beacons, refcameras, refbeacons)
refdist = norm(refbeacons(1,:)-refbeacons(2,:));
recdist = norm(beacons(1,:)-beacons(2,:));
scale = refdist/recdist;
beacs = beacons*scale;
cams = cameras;
for c=1:size(cameras,1)
cams(c,4:6) = cams(c,4:6)*scale;
end
Tref = se3exp(refcameras(1,:));
A = inv(se3exp(cams(1,:)))*Tref;
for c=1:size(cams,1)
cams(c,:) = se3log(se3exp(cams(c,:))*A);
end
Ai = inv(A);
for b=1:size(beacs,1)
t = Ai*[beacs(b,:),1]';
beacs(b,:) = t(1:3)';
end
end
%% Compute residual of bundle adjustment problem
function f = residual(cameras, beacons, measurements)
mnum=1;
f = zeros(size(measurements, 1)*2,1);
for c=1:size(cameras,1)
M = se3exp(cameras(c,:));
for b=1:size(beacons,1)
x = M * [beacons(b,:),1]';
d = (measurements(mnum,:) - [x(1)/x(3), x(2)/x(3)])';
i = (mnum-1)*2+1;
f(i:i+1) = d;
mnum = mnum + 1;
end
end
end
%% Compute Jacobian via finite-differencing (not used)
function J = fdJacobian(cameras, beacons, measurements)
epsilon = 1e-8;
r = residual(cameras, beacons, measurements);
J = [];
for c=1:size(cameras, 1)
for i=1:6
x = cameras;
x(c,i) = x(c,i) + epsilon;
fwd = residual(x, beacons, measurements);
J = [J (fwd-r)./epsilon];
end
end
for b=1:size(beacons,1)
for i=1:3
x = beacons;
x(b,i) = x(b,i) + epsilon;
fwd = residual(cameras, x, measurements);
J = [J (fwd-r)./epsilon];
end
end
end
%% Compute Jacobian
function J = lieJacobian(cameras, beacons, measurements)
J = zeros(size(measurements,1)*2, size(cameras,1)*6+size(beacons,1)*3);
mnum = 1;
for c = 1:size(cameras, 1)
M = se3exp(cameras(c,:));
columnc = (c-1)*6+1;
for b = 1:size(beacons, 1)
columnb = (b-1)*3+size(cameras,1)*6+1;
r = (mnum-1)*2+1;
x = M * [beacons(b,:),1]';
xo = beacons(b,:);
sx = [ 0, -xo(3), xo(2) ;
xo(3), 0, -xo(1) ;
-xo(2), xo(1), 0 ];
dpi = [ 1/x(3) 0 -x(1)/(x(3)^2) ;
0 1/x(3) -x(2)/(x(3)^2) ];
% Derivatives of rigid motion action chained with projection
J(r:r+1,columnc:columnc+5) = dpi*[M(1:3,1:3)*sx, -M(1:3,1:3)];
J(r:r+1,columnb:columnb+2) = -dpi*M(1:3,1:3);
mnum = mnum+1;
end
end
end
%% Adapter function for lsqnonlin that returns residual and Jacobian
function [f, J] = adapter(cameras, beacons, measurements)
f = residual(cameras, beacons, measurements);
J = lieJacobian(cameras, beacons, measurements);
end
%% Exponential map from se(3) to SE(3)
function M = se3exp(posevec)
angle = norm(posevec(1:3));
skew = [ 0 , -posevec(3), posevec(2) ;
posevec(3) , 0, -posevec(1) ;
-posevec(2) , posevec(1), 0 ];
if abs(angle) < 1e-10
R = eye(3);
Jl = eye(3);
else
R = eye(3)+(sin(angle)/angle)*skew + ((1-cos(angle))/(angle^2)) * skew*skew;
Jl = eye(3)+((1-cos(angle))/(angle^2))*skew + ((angle-sin(angle))/(angle^3)) * skew*skew;
end
M = [ R , Jl*posevec(4:6)' ;
0, 0, 0, 1 ];
end
%% Logarithm map from SE(3) to se(3)
function lie = se3log(M)
trM = trace(M(1:3,1:3));
if abs(trM - 3) < 1e-7
angle = 0;
rs = zeros(3,3);
Jr = eye(3);
theta = [0, 0, 0];
elseif abs(trM + 1) < 1e-7
angle = pi;
[me, mi] = max(diag(M(1:3,1:3)));
I = eye(3,3);
col = M(1:3,mi) + I(:,mi);
theta = angle * (col / sqrt(2*(1+me)))';
rs = skew(theta);
Jr = eye(3)-0.5*rs+(1/angle^2)*(1-(sin(angle)/angle)/(2*((1-cos(angle))/angle^2)))*rs*rs;
else
angle = acos((trace(M(1:3,1:3))-1)/2);
rs = angle/(2*sin(angle)) * (M(1:3,1:3)-M(1:3,1:3)');
Jr = eye(3)-0.5*rs+(1/angle^2)*(1-(sin(angle)/angle)/(2*((1-cos(angle))/angle^2)))*rs*rs;
theta=[rs(3,2),rs(1,3),rs(2,1)];
end
t = Jr*M(1:3,4);
lie=[theta,t(1),t(2),t(3)]';
end
Edit: there was an actual bug in the Jacobian that I fixed, the convergence behaviour is still sub-par to the FD approximation, but reaches the correct minimum.
The way the optimization problem is set up, requires the use of the left Jacobian of the Lie group. AFAIK using the derivative of the group action is not the same and the gradient directions are not the same (cf this document on page 248).
What is required here, is in the aforementioned text on page 251 (eq. 7.187). I took the liberty of adjusting the MatLab example, which is pasted below and exposes the same convergence behaviour for the finite-differencing and the analytical Jacobian.
The document linked above also has a closed form representation of the left Jacobian, which I was too lazy too implement (see page 235).