% bestFitNormChoice: a script to examine various best fit norms.
%
% When computing the best fit of a model with linear parameters, the minimum
% of the residual vector can be computed in several different norms.  Unless
% interpolation is possible, different norms will give different
% solutions.
%
% This file examines the l_1, l_2 and l_inf norms for a simple linear fit
% to some data with an obvious outlier.
%
% Note that the l_2 (least squares) fit can be accomplished with linear
% algebra, while the other norms require linear programming.  The latter is
% beyond the scope of CPSC 303.  For more details on linear programming,
% consider MATH 340 or CPSC 406.
%
% Because l_1 and l_inf require linear programming, this script will
% only work if the optimization toolbox is available.  It will work on
% the machines in the CS department, but will not work, for example, on
% student versions of Matlab.
%
% Ian Mitchell for CPSC 303, 2/8/06.

% Data points.
xi = (1:5)';
yi = [ 2; 4; 6; 8; 16 ];
m = length(xi);

% Open a figure, plot the data.
figure
hi = plot(xi, yi, 'ko');
hold on;
set(hi, 'MarkerSize', 12);

% For the legend at the end.
legend_handles = hi(1);
legend_strings = { 'data values' };

% Finally, the points at which to evaluate the best fit functions.
xs = linspace(min(xi), max(xi), 101)';

% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
% Construct the overdetermined linear system.

% Build the matrix.  For a linear model function, it will have two
% columns.
n = 2;

A = zeros(m, n);
% First column is constant basis function.
A(:,1) = 1;
% Second column is linear basis function.
A(:,2) = xi;

% Right hand side is simple the data values.
y = yi;

% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
% l_2: least squares fit.  Use the normal equations as a demonstration.
c2 = (A'*A) \ (A' * y);

% Plot the results.
y2 = c2(1) + c2(2) * xs;
h2 = plot(xs, y2, 'b-');
set(h2, 'LineWidth', 4);

legend_handles = [ legend_handles, h2(1) ];
legend_strings = { legend_strings{:}, 'l_2' };

% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
% l_1 fit.  Need to build a linear program (LP).  This involves augmenting
% the list of variables in the system to include not just the coefficients,
% but also the residuals.  In addition, to handle the absolute value we need
% to introduce two sets of residuals: the positives and the negatives.
%
% If you would like to discuss this algorithm, come by office hours.
len = n + 2 * m;
f = [ zeros(n, 1); ones(2 * m, 1) ];
Aineq = zeros(0, len);
bineq = zeros(0, 0);
Aeq = [ A, eye(m), -eye(m) ];
beq = y;
lb = [ -inf * ones(n, 1); zeros(2 * m, 1) ];
ub = +inf * ones(len, 1);

soln = linprog(f, Aineq, bineq, Aeq, beq, lb, ub);

% Extract the coefficients.
c1 = soln(1:2);

% Plot the results.
y1 = c1(1) + c1(2) * xs;
h1 = plot(xs, y1, 'r--');
set(h1, 'LineWidth', 4);

legend_handles = [ legend_handles, h1(1) ];
legend_strings = { legend_strings{:}, 'l_1' };

% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
% l_inf fit.  The LP is slightly different -- now we don't need separate
% positive and negative residuals, but merely a bound on all residuals.
% Furthermore, we have no bounds on any of the variables.
%
% If you would like to discuss this algorithm, come by office hours.
len = n + m + 1;
f = [ zeros(n, 1); zeros(m, 1); 1 ];
Aineq = [ zeros(m, n), -eye(m), -ones(m, 1); ...
          zeros(m, n), +eye(m), -ones(m, 1) ];
bineq = [ zeros(m, 1); zeros(m, 1) ];
Aeq = [ A, eye(m), zeros(m, 1) ];
beq = y;

soln = linprog(f, Aineq, bineq, Aeq, beq);

% Extract the coefficients.
cinf = soln(1:2);

% Plot the results.
yinf = cinf(1) + cinf(2) * xs;
hinf = plot(xs, yinf, 'k-.');
set(hinf, 'LineWidth', 4);

legend_handles = [ legend_handles, hinf(1) ];
legend_strings = { legend_strings{:}, 'l_\infty' };


% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
% Finish labelling the plot.
title({ 'Comparing best fits under various residual norms'; ...
        'Ian Mitchell for CPSC 303' });
xlabel('x');
ylabel('y');
legend(legend_handles, legend_strings, 'Location', 'NorthWest');

