--------------------------------------------------- lines 5-420 of file: example/user/hes_fixed_math.py --------------------------------------------------- # {xrst_begin user_hes_fixed_math.py} # {xrst_spell # cc # } # {xrst_comment_ch #} # # Check the Hessian of the Fixed Effects Objective # ################################################ # # Purpose # ******* # This is a detailed mathematical explanation of computing Hessian of the # fixed effects objective used by the # :ref:`sample_command@asymptotic` sampling method. # # Reference # ********* # See the # `theory `_ # section of the ``cppad_mixed`` documentation. # # Notation # ******** # # .. csv-table:: # :widths: auto # # :math:`\theta`,model incidence for the parent region # :math:`u_0`,incidence random effect for first child # :math:`u_1`,incidence random effect for second child # :math:`y_0`,measured incidence for the first child # :math:`y_1`,measured incidence for the second child # :math:`s`,standard deviation for data and random effects # # The only fixed effect in this model is :math:`\theta` # (sometimes written *theta* ) the incidence level for the world. # The random effects are :math:`u_0` and :math:`u_1`. # # Problem Parameters # ****************** # {xrst_spell_off} # {xrst_code py} import math import time theta_true = 0.5 u_true = [ 0.5, -0.5 ] y_0 = theta_true * math.exp( u_true[0] ) y_1 = theta_true * math.exp( u_true[1] ) standard_dev = theta_true # the standard deviation s random_seed = int( time.time() ) number_sample = 1000 eta_in_prior = 1e-6 # if None, the fixed effects are not scaled # {xrst_code} # {xrst_spell_on} # # Random Likelihood # ***************** # The negative log-likelihood for this example, ignoring the constant # of integration, is # # .. math:: # # f( \theta, u ) # = \frac{1}{2 s^2} \left( # [ y_0 - \theta \exp( u_0 ) ]^2 + # [ y_1 - \theta \exp( u_1 ) ]^2 + # u_0^2 + # u_1^2 # \right) # # Gradient w.r.t. Fixed Effects # ***************************** # # .. math:: # # f_\theta ( \theta, u ) # = # \frac{1}{s^2} [ # \theta \exp( 2 u_0 ) - y_0 \exp( u_0 ) + # \theta \exp( 2 u_1 ) - y_1 \exp( u_1 ) # ] # # Gradient w.r.t. Random Effects # ****************************** # # .. math:: # # f_u ( \theta, u ) # = # \frac{1}{s^2} \left( # \begin{array}{c} # \theta^2 \exp( 2 u_0 ) - y_0 \theta \exp( u_0 ) + u_0 # \\ # \theta^2 \exp( 2 u_1 ) - y_1 \theta \exp( u_1 ) + u_1 # \end{array} # \right) # # Hessian w.r.t. Fixed Effects # **************************** # # .. math:: # # f_{\theta,\theta} ( \theta, u ) # = # \frac{1}{s^2} [ \exp( 2 u_0 ) + \exp( 2 u_1 ) ] # # Hessian w.r.t. Random Effects # ***************************** # # .. math:: # # f_{u,u} ( \theta, u ) # = # \frac{1}{s^2} \left( # \begin{array}{cc} # 2 \theta^2 \exp( 2 u_0 ) - y_0 \theta \exp( u_0 ) + 1 & 0 # \\ # 0 & 2 \theta^2 \exp( 2 u_1 ) - y_1 \theta \exp( u_1 ) + 1 # \end{array} # \right) # # Hessian Cross Term # ****************** # # .. math:: # # f_{u,\theta} ( \theta, u ) # = # \frac{1}{s^2} \left( # \begin{array}{c} # 2 \theta \exp( 2 u_0 ) - y_0 \exp( u_0 ) # \\ # 2 \theta \exp( 2 u_1 ) - y_1 \exp( u_1 ) # \end{array} # \right) # # Optimal Random Effects # ********************** # # Implicit Function Definition # ============================ # The optimal random effects :math:`\hat{u} ( \theta )` # solve the equation # # .. math:: # # f_u [ \theta , \hat{u} ( \theta ) ] = 0 # # Derivatives of Optimal Random Effects # ===================================== # Using the implicit function theorem we have # # .. math:: # # \hat{u}^{(1)} ( \theta )= - # f_{u,u} [ \theta , \hat{u} ( \theta) ]^{-1} # f_{u,\theta} [ \theta , \hat{u} ( \theta) ] # # Substituting in the formulas above for the Hessian terms on the # right hand side we obtain: # # .. math:: # # \hat{u}_i^{(1)} ( \theta ) = - \frac{ # 2 \theta \exp[ 2 \hat{u}_i ( \theta) ] - # y_i \exp[ \hat{u}_i ( \theta) ] # }{ # 2 \theta^2 \exp[ 2 \hat{u}_i ( \theta) ] - # y_i \theta \exp[ \hat{u}_i ( \theta) ] + 1 # } # # We can compute :math:`\hat{u}_i ( \theta )` by optimizing the # random effects corresponding to the fixed effects being :math:`\theta`. # We define :math:`g_i ( \theta )` by the equation # # .. math:: # # g_i ( \theta ) = 2 \theta \exp[ 2 \hat{u}_i ( \theta) ] # - y_i \exp[ \hat{u}_i ( \theta ) ] # # Give :math:`\hat{u}_i ( \theta )` we can compute :math:`g_i ( \theta )`. # Given :math:`g_i ( \theta )`, we can compute # the derivative :math:`\hat{u}_i^{(1)} ( \theta )` using # # .. math:: # # \hat{u}_i^{(1)} ( \theta ) = - # \frac{ g_i ( \theta ) }{ \theta g_i ( \theta ) + 1} # # Given :math:`\hat{u}^{(1)} ( \theta )`, we can compute # the derivative :math:`g_i^{(1)} ( \theta )` using # # .. math:: # # g_i^{(1)} ( \theta ) = # 2 \exp[ 2 \hat{u}_i ( \theta) ] + # \left( # 4 \theta \exp [ 2 \hat{u}_i ( \theta ) ] - # y_i \exp [ \hat{u}_i ( \theta ) ] # \right) \hat{u}_i^{(1)} ( \theta ) # # Given :math:`g_i^{(1)} ( \theta )`, we can compute # the second derivative :math:`\hat{u}_i^{(2)} ( \theta )` using # # .. math:: # # \hat{u}_i^{(2)} ( \theta ) = # \frac{ g_i ( \theta ) [ g_i ( \theta ) + \theta g_i ^{(1)} ( \theta ) ] } # { [ \theta g_i ( \theta ) + 1 ]^2 } # - # \frac{ g_i ^{(1)}( \theta ) }{ \theta g_i ( \theta ) + 1} # # .. math:: # # \hat{u}_i^{(2)} ( \theta ) = # \frac{ g_i ( \theta )^2 - g_i ^{(1)}( \theta )} # { [ \theta g_i ( \theta ) + 1 ]^2 } # # Given :math:`\hat{u}^{(2)} ( \theta )`, we can compute # the second derivative :math:`g_i^{(2)} ( \theta )` using # # .. math:: # :nowrap: # # \begin{eqnarray} # g_i^{(2)} ( \theta ) & = & # 8 \exp[ 2 \hat{u}_i ( \theta) ] \hat{u}_i^{(1)} ( \theta ) # \\ & + & # \left( # 8 \theta \exp [ 2 \hat{u}_i ( \theta ) ] - # y_i \exp [ \hat{u}_i ( \theta ) ] # \right) \hat{u}_i^{(1)} ( \theta )^2 # \\ & + & # \left( # 4 \theta \exp [ 2 \hat{u}_i ( \theta ) ] - # y_i \exp [ \hat{u}_i ( \theta ) ] # \right) \hat{u}_i^{(2)} ( \theta ) # \end{eqnarray} # # Laplace Approximation # ********************* # Up to a constant, the Laplace approximation (fixed effects objective), # as a function of the fixed effects, is: # # .. math:: # # L ( \theta ) = F( \theta ) + G( \theta ) # # where :math:`F( \theta )` and :math:`G( \theta )` are defined by # # .. math:: # # F( \theta ) = f[ \theta , \hat{u} ( \theta ) ] # # .. math:: # # G( \theta ) =\frac{1}{2} \log \det f_{u,u} [ \theta , \hat{u} ( \theta ) ] # # The derivative of :math:`F( \theta )` is given by # # .. math:: # # F^{(1)} ( \theta ) = # f_\theta [ \theta , \hat{u} ( \theta ) ] + # f_u [ \theta , \hat{u} ( \theta ) ] \hat{u}^{(1)} ( \theta ) # # It follows from the definition of :math:`\hat{u} ( \theta )` that # :math:`f_u [ \theta , \hat{u} ( \theta ) ] = 0` and # # .. math:: # # F^{(1)} ( \theta ) = f_\theta [ \theta , \hat{u} ( \theta ) ] # # Taking the derivative of the equation above we obtain # # .. math:: # # F^{(2)} ( \theta ) = # f_{\theta,\theta} [ \theta , \hat{u} ( \theta ) ] + # f_{\theta,u} [ \theta , \hat{u} ( \theta ) ] \hat{u}^{(1)} ( \theta ) # # Combining the definition of :math:`G( \theta )`, :math:`g_i ( \theta )` # and the formula for :math:`f_{u,u} ( \theta , u )` we have # # .. math:: # # G( \theta ) # = # \frac{1}{2} \log \det \left( # \begin{array}{cc} # [ \theta g_0 ( \theta ) + 1 ] / s^2 & 0 # \\ # 0 & [ \theta g_1 ( \theta ) + 1 ] / s^2 # \end{array} # \right) # # Defining :math:`h_i ( \theta )` by # # .. math:: # # h_i ( \theta ) = \theta g_i ( \theta ) + 1 # # we obtain the representation # # .. math:: # # G ( \theta ) = # + \frac{1}{2} \left( # \log [ h_0 ( \theta ) ] + \log [ h_1 ( \theta ) ] - \log ( s^4 ) # \right) # # The first and second derivative of :math:`h_i ( \theta )` # and :math:`G( \theta )` are given by # # .. math:: # # h_i^{(1)} ( \theta ) = g_i ( \theta ) + \theta g_i^{(1)} ( \theta ) # # .. math:: # # G^{(1)} ( \theta ) = # \frac{1}{2} \left( # \frac{ h_0^{(1)} ( \theta ) }{ h_0 ( \theta ) } # + # \frac{ h_1^{(1)} ( \theta ) }{ h_1 ( \theta ) } # \right) # # .. math:: # # h_i^{(2)} ( \theta ) = 2 g_i^{(1)} ( \theta ) + \theta g_i^{(2)} ( \theta ) # # .. math:: # # G^{(2)} ( \theta ) = # \frac{1}{2} \left( # \frac{ h_0 ( \theta ) h_0^{(2)} ( \theta ) - h_0^{(1)} ( \theta )^2 } # { h_0 ( \theta )^2 } # + # \frac{ h_1 ( \theta ) h_1^{(2)} ( \theta ) - h_1^{(1)} ( \theta )^2 } # { h_1 ( \theta )^2 } # \right) # # Asymptotic Statistics # ********************* # The asymptotic posterior distribution for the optimal estimate of # :math:`\theta` give the data :math:`y` # is a normal with variance equal to the inverse of # # .. math:: # # L^{(2)} ( \theta ) = F^{(2)} ( \theta ) + G^{(2)} ( \theta ) # # Scaling Fixed Effects # ********************* # If :ref:`prior_table@eta` is not null, # the Hessian is with respect to :math:`\alpha = \log( \theta + \eta )`. # Inverting this relation we define # :math:`\theta ( \alpha ) = \exp( \alpha ) - \eta`. # The fixed effects objective as a function of :math:`\alpha` is # # .. math:: # # H( \alpha ) = # L[ \theta ( \alpha ) ] = # F[ \theta( \alpha ) ] + G[ \theta( \alpha ) ] # # Taking the first and second derivatives, we have # # .. math:: # # H^{(1)}( \alpha ) = \left( # F^{(1)}[ \theta( \alpha ) ] + G^{(1)}[ \theta( \alpha ) ] # \right) \exp( \alpha ) # # .. math:: # :nowrap: # # \begin{eqnarray} # H^{(2)}( \alpha ) & = & \left( # F^{(1)}[ \theta( \alpha ) ] + G^{(1)}[ \theta( \alpha ) ] # \right) \exp( \alpha ) # \\ & + & # \left( # F^{(2)}[ \theta( \alpha ) ] + G^{(2)}[ \theta( \alpha ) ] # \right) \exp( 2 \alpha ) # \end{eqnarray} # # Optimal Fixed Effects # ===================== # The first order necessary conditions for # :math:`\hat{\alpha}` # to be a minimizer of the fixed effects object is # :math:`H^{(1)} ( \hat{\alpha} ) = 0`. # In this case we can simplify the Hessian scaling as follows: # # .. math:: # :nowrap: # # \begin{eqnarray} # H^{(2)}( \hat{\alpha} ) & = & \left( # F^{(2 }( \hat{\theta} ) + G^{(2)}( \hat{\theta} ) # \right) \exp( 2 \hat{\alpha} ) # \\ & = & # L^{(2)} ( \hat{\theta} ) \exp( 2 \hat{\alpha} ) # \end{eqnarray} # # where :math:`\hat{\theta} = \theta( \hat{\alpha} )`. # # Source Code # *********** # {xrst_literal # BEGIN PYTHON # END PYTHON # } # # {xrst_end user_hes_fixed_math.py}