#!pip install pyblp
#To run the pyblp part, you may need to revert your numpy installation to a former version (1.18.1 works).

import pandas as pd
import pyblp
import numpy as np
import matplotlib.pyplot as plt

#this (fake) cereal data is provided directly by the pyblp library to try out their code

product_data = pd.read_csv(pyblp.data.NEVO_PRODUCTS_LOCATION)
product_data.columns

Index(['market_ids', 'city_ids', 'quarter', 'product_ids', 'firm_ids',
       'brand_ids', 'shares', 'prices', 'sugar', 'mushy',
       'demand_instruments0', 'demand_instruments1', 'demand_instruments2',
       'demand_instruments3', 'demand_instruments4', 'demand_instruments5',
       'demand_instruments6', 'demand_instruments7', 'demand_instruments8',
       'demand_instruments9', 'demand_instruments10', 'demand_instruments11',
       'demand_instruments12', 'demand_instruments13', 'demand_instruments14',
       'demand_instruments15', 'demand_instruments16', 'demand_instruments17',
       'demand_instruments18', 'demand_instruments19'],
      dtype='object')

#X1: Linear variables, no random coefficients
#X2: Nonlinear variables, random coefficients

X1_formulation = pyblp.Formulation('0 + prices', absorb='C(product_ids)')
X2_formulation = pyblp.Formulation('1 + prices + sugar + mushy')

product_formulations = (X1_formulation, X2_formulation)


#Numerical integration of the probability P_{ij}

mc_integration = pyblp.Integration('monte_carlo', size=50, specification_options={'seed': 0})
mc_integration

mc_problem = pyblp.Problem(product_formulations, product_data, integration=mc_integration)
mc_problem

Initializing the problem ...
Absorbing demand-side fixed effects ...
Initialized the problem after 00:00:00.

Dimensions:
============================================
 T    N     F    I     K1    K2    MD    ED 
---  ----  ---  ----  ----  ----  ----  ----
94   2256   5   4700   1     4     20    1  
============================================

Formulations:
===========================================================
       Column Indices:           0       1       2      3  
-----------------------------  ------  ------  -----  -----
 X1: Linear Characteristics    prices                      
X2: Nonlinear Characteristics    1     prices  sugar  mushy
===========================================================

Dimensions:
============================================
 T    N     F    I     K1    K2    MD    ED 
---  ----  ---  ----  ----  ----  ----  ----
94   2256   5   4700   1     4     20    1  
============================================

Formulations:
===========================================================
       Column Indices:           0       1       2      3  
-----------------------------  ------  ------  -----  -----
 X1: Linear Characteristics    prices                      
X2: Nonlinear Characteristics    1     prices  sugar  mushy
===========================================================

pyblp.options.verbose = False
results = mc_problem.solve(sigma=np.ones((4, 4)), optimization=pyblp.Optimization('bfgs', {'gtol': 1e-4}))
results

Problem Results Summary:
================================================================================================================
GMM     Objective      Gradient         Hessian         Hessian     Clipped  Weighting Matrix  Covariance Matrix
Step      Value          Norm       Min Eigenvalue  Max Eigenvalue  Shares   Condition Number  Condition Number 
----  -------------  -------------  --------------  --------------  -------  ----------------  -----------------
 2    +1.483665E+02  +8.703747E-05  +8.515620E-02   +6.535576E+03      0      +5.150953E+07      +8.252073E+05  
================================================================================================================

Cumulative Statistics:
===========================================================================
Computation  Optimizer  Optimization   Objective   Fixed Point  Contraction
   Time      Converged   Iterations   Evaluations  Iterations   Evaluations
-----------  ---------  ------------  -----------  -----------  -----------
 00:01:00       Yes          58           75          88300       271026   
===========================================================================

Nonlinear Coefficient Estimates (Robust SEs in Parentheses):
=================================================================================================================================================================
Sigma:         1             prices            sugar            mushy       |  Sigma Squared:         1             prices            sugar            mushy     
------  ---------------  ---------------  ---------------  ---------------  |  --------------  ---------------  ---------------  ---------------  ---------------
  1      +1.207566E+00                                                      |        1          +1.458216E+00    -1.382374E+01    +7.314899E-02    -7.099420E-01 
        (+2.961805E+00)                                                     |                  (+7.153152E+00)  (+5.188636E+01)  (+2.222540E-01)  (+2.272004E+00)
                                                                            |                                                                                    
prices   -1.144760E+01    +8.423600E+00                                     |      prices       -1.382374E+01    +2.020046E+02    -1.463091E+00    +1.492144E+00 
        (+1.774956E+01)  (+1.157303E+01)                                    |                  (+5.188636E+01)  (+3.052988E+02)  (+1.203963E+00)  (+1.508886E+01)
                                                                            |                                                                                    
sugar    +6.057555E-02    -9.136790E-02    +3.783374E-02                    |      sugar        +7.314899E-02    -1.463091E+00    +1.344888E-02    +2.034639E-02 
        (+2.485504E-01)  (+2.282689E-01)  (+8.298206E-02)                   |                  (+2.222540E-01)  (+1.203963E+00)  (+2.772954E-02)  (+2.731843E-01)
                                                                            |                                                                                    
mushy    -5.879114E-01    -6.218281E-01    -2.261704E-02    +4.800048E-01   |      mushy        -7.099420E-01    +1.492144E+00    +2.034639E-02    +9.632262E-01 
        (+2.132815E+00)  (+1.532679E+00)  (+2.530561E+00)  (+1.330829E+00)  |                  (+2.272004E+00)  (+1.508886E+01)  (+2.731843E-01)  (+3.964838E+00)
=================================================================================================================================================================

Beta Estimates (Robust SEs in Parentheses):
===============
    prices     
---------------
 -3.137350E+01 
(+6.006485E+00)
===============

from scipy.optimize import minimize

P_df = pd.read_csv(r"https://raw.githubusercontent.com/AntoineChapel/pedagogical_contents/main/rust/transition_matrix.csv").iloc[0:, 1:]
rust_data = pd.read_csv(r"https://raw.githubusercontent.com/AntoineChapel/pedagogical_contents/main/rust/rust_data.csv").iloc[:, 2:]

#There are ways to estimate it from the data, but to keep things accessible
#I am giving it here: 

#How to interpret: every row/column index corresponds to a mileage category. 0: [0 5000]
#                                                                            1: [5000 10000], 2: [10000 15000]...

#If you are at mileage category 1, you have 35% chances to stay in that category, 64% chances to move above by one category,
# and 1% chances to move up by 2 categories. You can guess how these can be estimated from the data.

P_df

P = P_df.to_numpy()

rust_data

#Parameters used in the original paper
T = 90
β = 0.9999
scale=1e-3
γ = np.euler_gamma
x = np.arange(T)
data = rust_data.to_numpy()

#θ is a vector of two parameters that we wish to estimate. θ_0 denotes the cost of reparations per mileage 
#and θ_1 denotes the replacement cost.

def u(x, d, θ):
    if d==1: #replace
        uval = -θ[1] - (scale*x*θ[0])
    elif d==0: #not replace
        uval = -(scale*x*θ[0])
    return uval

def return_EV(θ, tol=1e-3, maxiter=300000, verbose=False):
    #fixed-point algorithm
    n_iter=0
    EV = np.zeros((T, 1))
    error = 1e5
    
    while error > tol or n_iter > maxiter:
        EV_new = np.empty((T, 1))
        
        u_not_replace = u(x, 0, θ).reshape(T, 1) + β*EV
        u_replace = u(x[0], 1, θ) + β*EV[0]
        
        EV_new = P@(np.log(np.exp(u_replace - EV) + np.exp(u_not_replace - EV)) + EV)
        error = np.max(np.abs(EV - EV_new))
        
        n_iter +=1
        if verbose==True:
            print(f'Iteration {n_iter}, error: {error}')
        EV = EV_new.copy()
    return EV

def return_ccp(θ):
    EV = return_EV(θ)
    
    state_ccp_map = np.empty((T, 2))
    for state in x:
        u_not_replace = u(state, 0, θ) + β*(P[state, :] @ EV)
        u_replace = u(0, 1, θ) + β*EV[0]
        
        proba_not_replace = (1/(1 + np.exp(u_replace - u_not_replace)))[0]
        proba_replace = (1/(1 + np.exp(u_not_replace - u_replace)))[0]
        
        state_ccp_map[state, :] = np.array([proba_not_replace, proba_replace])    
        
    return state_ccp_map

def ll(θ):
    CCP = return_ccp(θ)
    logL = 0    
    for s, d in data:
        if int(d)==0:
            logL += np.log(CCP[int(s)][0])
        elif int(d)==1:
            logL += np.log(CCP[int(s)][1])
    return -logL

#we start close to the optimal solution to speed things up, but feel free to try alternate starting values.
#It will already be very long, so I'd advise not running it.
theta_star = minimize(ll, x0=np.array([2, 10])).x

theta_star

array([ 2.61806004, 10.03903027])

Lecture 5: Advanced Econometrics BLP and Rust¶

Antoine Chapel (Sciences Po & PSE) ¶

Alfred Galichon's math+econ+code prerequisite class on numerical optimization and econometrics, in Python ¶

References¶

BLP: Random Coefficients Logit Model¶

Given a candidate $\theta_0 = (\beta_0, \sigma_0)$¶

A short tutorial in pyblp¶

Rust 1987¶

Dynamic Discrete Choice¶

The Emax operator¶

The Rust model (optimal bus engine replacement)¶

	0	1	2	3	4	5	6	7	8	9	...	80	81	82	83	84	85	86	87	88	89
0	0.35103	0.637445	0.011525	0.000000	0.000000	0.000000	0.000000	0.0	0.0	0.0	...	0.0	0.0	0.0	0.0	0.0	0.00000	0.000000	0.000000	0.000000	0.000000
1	0.00000	0.351030	0.637445	0.011525	0.000000	0.000000	0.000000	0.0	0.0	0.0	...	0.0	0.0	0.0	0.0	0.0	0.00000	0.000000	0.000000	0.000000	0.000000
2	0.00000	0.000000	0.351030	0.637445	0.011525	0.000000	0.000000	0.0	0.0	0.0	...	0.0	0.0	0.0	0.0	0.0	0.00000	0.000000	0.000000	0.000000	0.000000
3	0.00000	0.000000	0.000000	0.351030	0.637445	0.011525	0.000000	0.0	0.0	0.0	...	0.0	0.0	0.0	0.0	0.0	0.00000	0.000000	0.000000	0.000000	0.000000
4	0.00000	0.000000	0.000000	0.000000	0.351030	0.637445	0.011525	0.0	0.0	0.0	...	0.0	0.0	0.0	0.0	0.0	0.00000	0.000000	0.000000	0.000000	0.000000
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
85	0.00000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.0	0.0	0.0	...	0.0	0.0	0.0	0.0	0.0	0.35103	0.637445	0.011525	0.000000	0.000000
86	0.00000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.0	0.0	0.0	...	0.0	0.0	0.0	0.0	0.0	0.00000	0.351030	0.637445	0.011525	0.000000
87	0.00000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.0	0.0	0.0	...	0.0	0.0	0.0	0.0	0.0	0.00000	0.000000	0.351030	0.637445	0.011525
88	0.00000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.0	0.0	0.0	...	0.0	0.0	0.0	0.0	0.0	0.00000	0.000000	0.000000	0.351030	0.648970
89	0.00000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.0	0.0	0.0	...	0.0	0.0	0.0	0.0	0.0	0.00000	0.000000	0.000000	0.000000	1.000000

	state	decision
0	0.0	0.0
1	0.0	0.0
2	1.0	0.0
3	2.0	0.0
4	3.0	0.0
...	...	...
8255	68.0	0.0
8256	68.0	0.0
8257	69.0	0.0
8258	69.0	0.0
8259	69.0	0.0