Multivariate Quadratic Regression, Surface Fitting and the Hessian

880 Views Asked by At

I need to fit a quadratic surface to multidimensional data, one of the methods mentioned is using a polynomial basis function,

$$ \phi(x) = [ 1, x_1, ..., x_n x_1^2, ..., x_n^2 x_1 x_2, ..., x_{n-1}x_n ] $$

If we massage the data into this form,

$$ \Phi(Y) = \left[\begin{array}{cccccccccc} 1 & y_1^1 & \dots & y_n^1 & (y_1^1)^2 & \dots & (y_n^1)^2 & y_1^1y_2^1 & \dots & y_{n-1}^1 y_n^1\\ \vdots & & & & & & & & & \vdots \\ 1 & y_1^p & \dots & y_n^p & (y_1^p)^2 & \dots & (y_n^p)^2 & y_1^py_2^p & \dots & y_{n-1}^p y_n^p \end{array}\right] $$

We can run a regression to find the coefficients for each term of the quadratic expansion.

My question is can I use a library that can do this regression for any dimension (but always order=2, that is quadratic).

Also is there a way to obtain the Hessian matrix easily, from the epression above perhaps, once the fit is computed ? A way to represent a multivariate quadratic expression is known to be,

$$ f(x) = x^T A x $$

where $x = [x_1, x_2, ... , x_n]$ and the Hessian of this expression would be $A + A^T$. But how would the coefficients found above map to the matrix A? Is there a way to use the matrix $A$ directly in the regression so I also end up with the Hessian automaticaly?

My goal is to be able to sample values from a known function around a point, approximate it with a quadratic function at that point, and use this quadratic function to calculate a gradient and a Hessian.

1

There are 1 best solutions below

0
On BEST ANSWER

The basis for two dimensions would be

$$ p(x) = \left[\begin{array}{ccccc} x_1 & x_2 & x_1^2 & x_1x_2 & x_2^2 \end{array}\right]^T $$

$f(x) = p(x)^T a$

where $a = [a_0, a_1, ...]$. This would be akin to calculating

$$ f(x) = a_0 + a_1 x_1 + a_2 x_2 + a_3 x_1 x_2 + a_4 x_2^2 $$

Then with given data points

$$ \left[\begin{array}{ccccc} (x_1^1) & (x_2^1) & (x_1^1)^2 & (x_1^1)(x_2^1) & (x_2^1)^2 \\ \vdots & & & & \vdots \\ (x_1^n) & (x_2^n) & (x_1^n)^2 & (x_1^n)(x_2^n) & (x_2^n)^2 \\ \end{array}\right] \mathbf{a} = \left[\begin{array}{c} y^1 \\ \vdots \\ y^n \end{array}\right] $$

and solve for $a$.

But I need the

$$ f(x) = x^T A x $$

form, so I can easily get $\nabla f (x) = 2 A x$ and the Hessian $\nabla^2 f(x) = 2 A$.

It turns out if we start with 3 dimensions, and use $x_3 = 1$ this gives the necessary form in two dimensions,

$$ x^T A x = \left[\begin{array}{ccc} x_1 & x_2 & x_3 \end{array}\right] \left[\begin{array}{ccc} a_{11} & a_{12} & a_{13} \\ a_{21} & a_{22} & a_{23} \\ a_{31} & a_{32} & a_{33} \end{array}\right] \left[\begin{array}{c} x_1 \\ x_2 \\ x_3 \end{array}\right] $$

$$ = \left[\begin{array}{c} x_1 a_{11} + x_2 a_{21} + x_3 a_{31} \\ x_1 a_{12} + x_2 a_{22} + x_3 a_{32} \\ x_1 a_{13} + x_2 a_{23} + x_3 a_{33} \end{array}\right]^T \left[\begin{array}{c} x_1 \\ x_2 \\ x_3 \end{array}\right] $$

$$ = x_1 x_1 a_{11} + x_1 x_2 a_{21} + x_1 x_3 a_{31} + $$ $$ x_1 x_2 a_{12} + x_2 x_2 a_{22} + x_3 x_2 a_{32} + $$ $$ x_1 x_3 a_{13} + x_2 x_2 a_{23} + x_3 x_3 a_{33} $$

Using $x_3 = 1$

$$ x^T A x = \left[\begin{array}{ccc} x_1 & x_2 & 1 \end{array}\right] \left[\begin{array}{ccc} a_{11} & a_{12} & a_{13} \\ a_{21} & a_{22} & a_{23} \\ a_{31} & a_{32} & a_{33} \end{array}\right] \left[\begin{array}{c} x_1 \\ x_2 \\ 1 \end{array}\right] $$

$$ = a_{11} x_1^2 + a_{21} x_1 x_2 + a_{31}x_1 + $$ $$ a_{12}x_1 x_2 + a_{22}x_2^2 + a_{32} x_2+ $$ $$ a_{13} x_1 + a_{23} x_2 x_3 + a_{33} $$

from scipy.interpolate import Rbf
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
from matplotlib import cm
import numpy as np
import autograd.numpy as anp
import autograd

def random_ball(num_points, dimension, radius=1):
    from numpy import random, linalg
    random_directions = random.normal(size=(dimension,num_points))
    random_directions /= linalg.norm(random_directions, axis=0)
    random_radii = random.random(num_points) ** (1/dimension)
    return radius * (random_directions * random_radii).T

np.random.seed(0)
N = 20

def rosenbrock(x):
    return (1 + x[0])**2 + 100*(x[1] - x[0]**2)**2

def Rosenbrock(x,y):
    return (1 + x)**2 + 100*(y - x**2)**2

def get_fvals_in_region(xcurr, f, radius):    
    b = random_ball(N, 2, radius)
    pts = xcurr+b
    vals = [f(p) for p in pts]
    return xcurr+b, np.array(vals)

x0 = [1.5,0]
xs,vs = get_fvals_in_region(x0, rosenbrock, 0.5)

res = []
for i in range(vs.shape[0]):
    res.append((xs[i,0],xs[i,1],vs[i]))
res = np.array(res).reshape(vs.shape[0], 3)

x = np.linspace(-2,2,250)
y = np.linspace(-1,3,250)
X, Y = np.meshgrid(x, y)
Z = Rosenbrock(X, Y)

import itertools
import numpy.linalg as lin

def quad_interpolate(xi, yi):
    xi = np.hstack((xi, np.ones((1,len(xi))).T  ))
    #print (xi)
    D = xi.shape[1]
    print (D)
    X_train = []
    for row in xi:
        X_train.append([row[i]*row[j] for i,j in itertools.product(range(D),range(D)) ])
    X_train = np.array(X_train)
    print (X_train.shape)
    print (yi.shape)
    coef,_,_,_ = lin.lstsq(X_train, yi)
    return coef

xi = res[:,[0,1]]
yi = res[:,[2]]
coef = quad_interpolate(xi,yi)

print (coefs)

x = np.linspace(-2,2,250)
y = np.linspace(-1,3,250)
X, Y = np.meshgrid(x, y)
Z = Rosenbrock(X, Y)

fig = plt.figure(figsize = (8,4))
ax = fig.gca(projection='3d')
ax.plot3D(res[:,0],res[:,1],res[:,2],'r.')
ax.plot_surface(X,Y,Z,rstride = 5, cstride = 5, cmap = 'jet', alpha = .4, edgecolor = 'none' )

def q_interp(x1,x2):
    x = np.array([[x1,x2,1]])
    A = coef.reshape(3,3)
    res = np.dot(np.dot(x,A),x.T)
    return np.float(res)

Zi = np.array([q_interp(xx,yy) for xx,yy in zip(X.flatten(),Y.flatten())])
Zi = Zi.reshape(X.shape)
ax.plot_wireframe(X,Y,Zi)

coefs = coef.reshape(3,3)

g = (2 * np.dot(coefs[:2,:2],np.array(x0).reshape(2,1)))

gnorm = g / np.sum(g)

ax.set_zlim(0,2500)

ax.quiver(x0[0], x0[1], 0, -gnorm[0], -gnorm[1], 0, color='red')

hess = 2*coefs[:2,:2]
print (hess)
newton_dir = -np.dot(lin.inv(hess),g)
print (newton_dir)

d = newton_dir
print (d)

ax.quiver(x0[0], x0[1], 0, d[0], d[1], 0, color='green')

ax.plot3D([x0[0]], [x0[1]], [0.0], 'b.')

ax.view_init(21, -133)

enter image description here