def fconvex(x1, x2):
    return x1**(2) + x2**(2)

from mpl_toolkits import mplot3d
import numpy as np
import matplotlib.pyplot as plt

plt.rcParams["figure.figsize"] = (20,15)

fig = plt.figure()
ax = plt.axes(projection='3d')
x = np.linspace(-5, 5, 30)
y = np.linspace(-5, 5, 30)
X, Y = np.meshgrid(x, y)
Z = fconvex(X, Y)

ax.contour3D(X, Y, Z, 200, cmap='viridis')
ax.set_xlabel('x')
ax.set_ylabel('y')
ax.set_zlabel('z');

fig2 = plt.figure()
ax2 = plt.axes(projection='3d')
x = np.linspace(-5, 5, 30)
y = np.linspace(-5, 5, 30)
X, Y = np.meshgrid(x, y)
Z = fconvex(X, Y)

ax2.contour3D(X, Y, Z, 100, cmap='viridis')
ax2.set_xlabel('x')
ax2.set_ylabel('y')
ax2.set_zlabel('z')
ax2.set_title('Seen from above');

ax2.view_init(90, 0)

def f(x):
    return x[0]**(2) + x[1]**(2)

def g(x):
    return np.array([2*x[0], 2*x[1]])


def H(x):
    return np.array([[2, 0],
                    [0, 2]])

#Starting point: (25, 12)
λ = 0.2

x_k1 = np.array([25, 12])
g_k1 = g(x_k1)
A_k1 = -np.linalg.inv(H(x_k1))

print(g_k1)

[50 24]

x_k2 = x_k1 + λ* A_k1@g_k1

g(x_k2)

array([40. , 19.2])

# As you have seen, since the function is simple, H is the same for every iteration, we will not re-compute it.
g_k2 = g(x_k2)
x_k3 = x_k2 + λ* A_k1@g_k2

g(x_k3) #closer to 0, but still not there. Let's automatize this process

array([32.  , 15.36])

def gradient_descent(x_1, λ=0.5):
    error = 1000
    tol = 1e-8
    x_hist = []
    
    
    x = x_1
    while error > tol:
        g_k = g(x)
        A_k = -np.linalg.inv(H(x))
        x = x + λ*A_k@g_k
        x_hist.append(x)
        
        error = max(abs(g_k))
    
    x_hist_np = np.empty((len(x_hist), x_1.shape[0]))
    for iter_index in range(len(x_hist)):
        x_hist_np[iter_index, :] = x_hist[iter_index]
    return x, x_hist_np

sol = gradient_descent(np.array([4, 4]))[0]
sol

array([1.86264515e-09, 1.86264515e-09])

x_hist_1 = gradient_descent(np.array([-150, -150]))[1]

fig3 = plt.figure()
ax3 = plt.axes(projection='3d')

x = np.linspace(-100, 100, 300)
y = np.linspace(-100, 100, 300)
X, Y = np.meshgrid(x, y)
Z = fconvex(X, Y)

ax3.contour3D(X, Y, Z, 250, cmap='cividis', alpha=0.3)
ax3.set_xlabel('x')
ax3.set_ylabel('y')
ax3.set_zlabel('z')


xdata = x_hist_1[:, 0]
ydata = x_hist_1[:, 1]
zdata = fconvex(xdata, ydata)
ax3.scatter3D(xdata, ydata, zdata, c=zdata, cmap='cividis')
ax3.plot3D(xdata, ydata, zdata, 'red', lw=5)
ax3.view_init(40, 20);

from scipy.optimize import minimize
#BFGS is a 'quasi-Newton' method, that uses an approximation of the Hessian instead of computing it exactly as we have done before.

minimize(f, x0=[12, 12], method="BFGS")

      fun: 1.173795964081817e-12
 hess_inv: array([[ 0.74999998, -0.25000002],
       [-0.25000002,  0.74999998]])
      jac: array([1.54708651e-06, 1.54708651e-06])
  message: 'Optimization terminated successfully.'
     nfev: 16
      nit: 2
     njev: 4
   status: 0
  success: True
        x: array([7.66092672e-07, 7.66092672e-07])

def coordinate_descent(x_1, λ=0.5):
    error = 1000
    tol = 1e-8
    x_hist = []
    
    
    x = x_1
    coord = 0
    
    while error > tol:
        
        
        gradient_k = g(x)
        g_basis = np.zeros(x_1.shape[0])
        g_basis[coord] = gradient_k[coord]
        
        x = x - λ*g_basis
        x_hist.append(x)
        error = max(abs(gradient_k))
        
        if coord < x_1.shape[0]-1: #go to the next coordinate
            coord+=1
        else:
            coord=0 #if we're at the last coordinate already, go back to the first coordinate
            
    
    x_hist_np = np.empty((len(x_hist), x_1.shape[0]))
    for iter_index in range(len(x_hist)):
        x_hist_np[iter_index, :] = x_hist[iter_index]
    return x, x_hist_np

x_hist_2 = coordinate_descent(np.array([-100, -100]), λ=0.2)[1]

fig4 = plt.figure()
ax4 = plt.axes(projection='3d')

x = np.linspace(-100, 100, 300)
y = np.linspace(-100, 100, 300)
X, Y = np.meshgrid(x, y)
Z = fconvex(X, Y)

ax4.contour3D(X, Y, Z, 50, cmap='cividis')
ax4.set_xlabel('x')
ax4.set_ylabel('y')
ax4.set_zlabel('z')


xdata = x_hist_2[:, 0]
ydata = x_hist_2[:, 1]
zdata = fconvex(xdata, ydata)
ax4.scatter3D(xdata, ydata, zdata, c=zdata, cmap='cividis')
ax4.plot3D(xdata, ydata, zdata, 'red', lw=5)
ax4.view_init(45, 10);

def f_nonconvex_g(x, y):
    return 3*(1-x)**2 * np.exp(-x**2 - (y-1)**2) - 10*(x/5 - x**3 - y**5)*np.exp(-x**2 -y**2) - 1/3*(np.exp(-(x+1)**2 - y**2))

fig5 = plt.figure()
ax5 = plt.axes(projection='3d')

x = np.linspace(-3, 3, 300)
y = np.linspace(-3, 3, 300)
X, Y = np.meshgrid(x, y)
Z = f_nonconvex_g(X, Y)

#ax5.contour3D(X, Y, Z, 250, cmap='viridis', alpha=0.3)
ax5.plot_surface(X, Y, Z, cmap='cividis', antialiased=True)
ax5.set_xlabel('x')
ax5.set_ylabel('y')
ax5.set_zlabel('z')


#xdata = x_hist_1[:, 0]
#ydata = x_hist_1[:, 1]
#zdata = fconvex(xdata, ydata)
#ax5.scatter3D(xdata, ydata, zdata, c=zdata, cmap='cividis')
#ax5.plot3D(xdata, ydata, zdata, 'red', lw=5)
ax5.view_init(30, 225);

def num_gradient(f, x):
    partial_derivatives_vector=np.empty(x.shape[0])
    for index in range(x.shape[0]):
        e_ind = np.zeros(x.shape[0])
        e_ind[index] = 1.0
        h = 1e-5
        partial_derivatives_vector[index] = (f(x + e_ind*h) - f(x - e_ind*h))/(2*h)
    return partial_derivatives_vector

def num_hessian(f, x):
    hessian = np.empty((x.shape[0], x.shape[0]))
    for index in range(x.shape[0]):
          for jindex in range(x.shape[0]):
                e_ind = np.zeros(x.shape[0])
                e_ind[jindex] = 1.0
                h = 1e-5
                hessian[index, jindex] = (num_gradient(f, x + e_ind*h)[index] - num_gradient(f, x - e_ind*h)[index])/(2*h)
    return hessian

def gradient_descent(f, x_1, step=1, tol=1e-5, verbose=False):
    """Implements Gradient Descent using the Newton-Raphson algorithm. Gradients and Hessians are computed numerically.\n
    x_1: starting point of the iterative process\n
    X: Explanatory variables\n
    y: Dependent variable"""
    
    
    error = 1e6
    x_hist = []
    maxiter=100
    
    x = x_1
    n_iter=1
    
    while error > tol:
        g_k = num_gradient(f, x)
        if verbose==True:
            print(f'iteration: {n_iter}, x_hat: {np.round(x, 3)}, gradient: {np.round(g_k, 3)}, value: {np.round(f(x), 3)}')
        
        H_k = num_hessian(f, x)
        try:
            A_k = -np.linalg.inv(H_k)
        except:
            A_k = np.random.normal(0, 1, size=x.shape[0])*np.identity(x.shape[0])
            print("ERROR: Failure to invert the Hessian")
            
        x = x + step*A_k@g_k
        x_hist.append(x)
        error = max(abs(g_k))
        n_iter+=1
    return x, x_hist

def f_nonconvex(X):
    x = X[0]
    y = X[1]
    return 3*(1-x)**2 * np.exp(-x**2 - (y-1)**2) - 10*(x/5 - x**3 - y**5)*np.exp(-x**2 -y**2) - 1/3*(np.exp(-(x+1)**2 - y**2))

x_star, x_hist = gradient_descent(f_nonconvex, np.array([0, 1]), step=0.1)
x_star #true minimum

array([ 0.00936299, -1.58062483])

fig6 = plt.figure()
ax6 = plt.axes(projection='3d')

x = np.linspace(-3, 3, 300)
y = np.linspace(-3, 3, 300)
X, Y = np.meshgrid(x, y)
Z = f_nonconvex_g(X, Y)

#ax5.contour3D(X, Y, Z, 250, cmap='viridis', alpha=0.3)
ax6.plot_surface(X, Y, Z, cmap='cividis', antialiased=True, alpha=0.8)
ax6.set_xlabel('x')
ax6.set_ylabel('y')
ax6.set_zlabel('z')

x_hist_nc_lucky = np.array(gradient_descent(f_nonconvex, np.array([0, 1]), step=0.1)[1]) #we were lucky to pick a starting point that leads us to the global min
x_hist_nc = x_hist_nc_lucky

#x_hist_nc_unlucky = np.array(gradient_descent(f_nonconvex, np.array([-1.5, 0]), step=0.1)[1]) #we got stuck in a local minimum
#x_hist_nc = x_hist_nc_unlucky



xdata = x_hist_nc[:, 0]
ydata = x_hist_nc[:, 1]
Xdata = np.vstack([xdata, ydata])
zdata = f_nonconvex(Xdata)
ax6.scatter3D(xdata, ydata, zdata, c=zdata, cmap='cividis')
ax6.plot3D(xdata, ydata, zdata, 'red', lw=5)
ax6.view_init(45, 230);

minimize(f_nonconvex, x0=(-1.5, 0)) #scipy BFGS gets fooled as well when we start from the unlucky x0 and get in the local minimum.
                                    #That's way convexity is an absolute requirement.

      fun: -3.051010724588495
 hess_inv: array([[ 0.07087732, -0.01838886],
       [-0.01838886,  0.11539952]])
      jac: array([-8.94069672e-08,  2.32458115e-06])
  message: 'Optimization terminated successfully.'
     nfev: 36
      nit: 6
     njev: 9
   status: 0
  success: True
        x: array([-1.34667724, -0.20773589])

Lecture 3: Numerical Optimization¶

Antoine Chapel (Sciences Po & PSE) ¶

Alfred Galichon's math+econ+code prerequisite class on numerical optimization and econometrics, in Python ¶

Concepts:¶

Gradient Descent¶

Coordinate Descent:¶

When/how do these methods fail ?¶

Nonconvexity¶

Computational weight¶

(Imperfect) solutions:¶