\(\newcommand{\B}[1]{ {\bf #1} }\) \(\newcommand{\R}[1]{ {\rm #1} }\) \(\newcommand{\W}[1]{ \; #1 \; }\)
user_sample_sim.py#
View page sourceSample Posterior Using Simulated Data#
Purpose#
The command
dismod_at
databasesample simulate
number_sample
samples form the posterior distribution of the model_variables using simulated data.
Node Table#
For this example, the node_table is
north_america
/ \
mexico canada
Model Variables#
All of the rates are constant w.r.t. age an time in this example.
iota#
We use \(\iota_n\) to denote the incidence rate for the parent region north_america which is a fixed effect .
u#
We use \(u_m\) (\(u_c\)) to denote the child rate effect for mexico (canada).
x#
The model variables for this example are combined into a single vector \(x \in \B{R}^3\) where \(x_0 = \iota_n\), \(x_1 = u_m\), and \(x_2 = u_c\).
Model Parameters#
y#
We use \(y_n, y_m, y_c\) to denote the measured Sincidence for north_america, mexico, and canada respectively.
s#
We use \(s_n, s_m, s_c\) to denote the standard deviation of the measured noise for north_america, mexico, and canada respectively.
delta#
We use \(\delta\) to denote the standard deviation of the child rate effects. (The mean for the child rate effects is zero.)
Values#
import math
y_n = 1e-2
y_m = 2.0 * y_n
y_c = y_n / 2.0
s_n = y_n / 10.0
s_m = y_m / 10.0
s_c = y_c / 10.0
delta = math.log( 2.0 )
Likelihood#
We define \(h(y, \mu , \delta)\) to be the log-density for a \(\B{N}( \mu, \delta^2 )\) distribution; i.e.,
The total log-likelihood for this example is
number_sample#
This is the number of samples in the number_sample specified by the sample command.
number_sample = 30
number_metropolis#
This is the number of mcmc samples generated by the metropolis mcmc method. These samples are used to check the simulate method.
number_metropolis = 500 * number_sample
Truth Table#
For the data simulation, we use the following model variable values:
iota_n_true = y_n
u_m_true = math.log( y_m / y_n )
u_c_true = math.log( y_c / y_n )
It follows that y_m = exp
( u_m_true ) * iota_n_true and
y_c = exp
( u_c_true ) * iota_n_true .
random_seed#
Set one random seed for use by both the python code and dismod_at.
In addition, if an error occurs, this random seed will be reported; see
random_seed
in the source code below.
import time
random_seed = int( time.time() )
Source Code#
import numpy
import sys
import os
import copy
test_program = 'example/user/sample_sim.py'
if sys.argv[0] != test_program or len(sys.argv) != 1 :
usage = 'python3 ' + test_program + '\n'
usage += 'where python3 is the python 3 program on your system\n'
usage += 'and working directory is the dismod_at distribution directory\n'
sys.exit(usage)
print(test_program)
#
# import dismod_at
local_dir = os.getcwd() + '/python'
if( os.path.isdir( local_dir + '/dismod_at' ) ) :
sys.path.insert(0, local_dir)
import dismod_at
#
# change into the build/example/user directory
if not os.path.exists('build/example/user') :
os.makedirs('build/example/user')
os.chdir('build/example/user')
# ---------------------------------------------------------------------------
#
# no need to include sqrt{2 \pi} term (it does not depend on model variables)
def h(y, mu, delta ) :
if delta <= 0.0 :
return - float("inf")
res = (y - mu ) / delta
return - res * res - math.log( delta )
#
def log_f(x) :
iota_n = x[0]
u_m = x[1]
u_c = x[2]
#
ret = h(y_n, iota_n, s_n)
ret += h(y_m, math.exp(u_m) * iota_n, s_m )
ret += h(y_c, math.exp(u_c) * iota_n, s_c )
ret += h(u_m, 0.0, delta) + h(u_c, 0.0, delta)
return ret
# ---------------------------------------------------------------------------
# Note that the a, t values are not used for this example
def example_db (file_name) :
def fun_iota_child(a, t) :
return ('prior_iota_child', None, None)
def fun_iota_parent(a, t) :
return ('prior_iota_parent', None, None)
# ----------------------------------------------------------------------
# age table
age_list = [ 0.0, 100.0 ]
#
# time table
time_list = [ 1995.0, 2015.0 ]
#
# integrand table
integrand_table = [ { 'name':'Sincidence' } ]
#
# node table: north_america -> (mexico, canada)
node_table = [
{ 'name':'north_america', 'parent':'' },
{ 'name':'mexico', 'parent':'north_america' },
{ 'name':'canada', 'parent':'north_america' }
]
#
# weight table:
weight_table = list()
#
# covariate table: no covriates
covariate_table = list()
#
# mulcov table
mulcov_table = list()
#
# avgint table:
avgint_table = list()
#
# nslist_dict:
nslist_dict = dict()
# ----------------------------------------------------------------------
# data table:
data_table = list()
row = {
'node': 'north_america',
'subgroup': 'world',
'density': 'gaussian',
'weight': '',
'hold_out': False,
'time_lower': 2000.0,
'time_upper': 2000.0,
'age_lower': 50.0,
'age_upper': 50.0,
'integrand': 'Sincidence',
'meas_value': y_n,
'meas_std': s_n,
}
data_table.append( copy.copy(row) )
#
row['node'] = 'mexico';
row['meas_value'] = y_m
row['meas_std'] = s_m
data_table.append( copy.copy(row) )
#
row['node'] = 'canada';
row['meas_value'] = y_c
row['meas_std'] = s_c
data_table.append( copy.copy(row) )
#
# ----------------------------------------------------------------------
# prior_table
prior_table = [
{ # prior_iota_parent
'name': 'prior_iota_parent',
'density': 'uniform',
'lower': y_n / 100.0 ,
'upper': y_n * 100.0 ,
'mean': y_n
},{ # prior_iota_child
'name': 'prior_iota_child',
'density': 'gaussian',
'mean': 0.0,
'std': delta,
}
]
# ----------------------------------------------------------------------
# smooth table
smooth_table = [
{ # smooth_iota_parent
'name': 'smooth_iota_parent',
'age_id': [ 0 ],
'time_id': [ 0 ],
'fun': fun_iota_parent
}, { # smooth_iota_child
'name': 'smooth_iota_child',
'age_id': [ 0 ],
'time_id': [ 0 ],
'fun': fun_iota_child
}
]
# ----------------------------------------------------------------------
# rate table
rate_table = [
{
'name': 'iota',
'parent_smooth': 'smooth_iota_parent',
'child_smooth': 'smooth_iota_child',
}
]
# ----------------------------------------------------------------------
# option_table
option_table = [
{ 'name':'parent_node_name', 'value':'north_america' },
{ 'name':'ode_step_size', 'value':'10.0' },
{ 'name':'random_seed', 'value':str(random_seed) },
{ 'name':'rate_case', 'value':'iota_pos_rho_zero' },
{ 'name':'quasi_fixed', 'value':'true' },
{ 'name':'derivative_test_fixed', 'value':'first-order' },
{ 'name':'max_num_iter_fixed', 'value':'100' },
{ 'name':'print_level_fixed', 'value':'0' },
{ 'name':'tolerance_fixed', 'value':'1e-12' },
{ 'name':'max_num_iter_random', 'value':'100' },
{ 'name':'print_level_random', 'value':'0' },
{ 'name':'tolerance_random', 'value':'1e-12' },
{ 'name':'zero_sum_child_rate', 'value':'iota' },
]
# ----------------------------------------------------------------------
# subgroup_table
subgroup_table = [ { 'subgroup':'world', 'group':'world' } ]
# ----------------------------------------------------------------------
# create database
dismod_at.create_database(
file_name,
age_list,
time_list,
integrand_table,
node_table,
subgroup_table,
weight_table,
covariate_table,
avgint_table,
data_table,
prior_table,
smooth_table,
nslist_dict,
rate_table,
mulcov_table,
option_table
)
# ===========================================================================
file_name = 'example.db'
example_db(file_name)
#
program = '../../devel/dismod_at'
na_string = str(number_sample)
dismod_at.system_command_prc([ program, file_name, 'init' ])
#
# -----------------------------------------------------------------------
# create truth_table
# -----------------------------------------------------------------------
# truth table:
connection = dismod_at.create_connection(
file_name, new = False, readonly = False
)
var_table = dismod_at.get_table_dict(connection, 'var')
rate_table = dismod_at.get_table_dict(connection, 'rate')
node_table = dismod_at.get_table_dict(connection, 'node')
assert len(var_table) == 3
#
tbl_name = 'truth_var'
col_name = [ 'truth_var_value' ]
col_type = [ 'real' ]
row_list = list()
node_name2var_id = dict()
for var_id in range( len(var_table) ) :
var_info = var_table[var_id]
rate_id = var_info['rate_id']
node_id = var_info['node_id']
rate_name = rate_table[rate_id]['rate_name']
node_name = node_table[node_id]['node_name']
assert( rate_name == 'iota' )
#
if node_name == 'north_america' :
truth_var_value = iota_n_true
elif node_name == 'mexico' :
truth_var_value = u_m_true
else :
assert node_name == 'canada'
truth_var_value = u_c_true
#
row_list.append( [ truth_var_value ] )
node_name2var_id[node_name] = var_id
dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list)
connection.close()
# -----------------------------------------------------------------------
# sample simulate results
#
ns_string = str(number_sample)
dismod_at.system_command_prc(
[ program, file_name, 'simulate', ns_string ]
)
dismod_at.system_command_prc(
[ program, file_name, 'set', 'start_var', 'truth_var' ]
)
dismod_at.system_command_prc(
[ program, file_name, 'set', 'scale_var', 'truth_var' ]
)
dismod_at.system_command_prc(
[ program, file_name, 'sample', 'simulate', 'both', ns_string ]
)
#
connection = dismod_at.create_connection(
file_name, new = False, readonly = True
)
sample_table = dismod_at.get_table_dict(connection, 'sample')
#
# convert samples to a numpy array
sample_array = numpy.zeros( (number_sample, 3), dtype = float )
for row in sample_table :
var_id = row['var_id']
sample_index = row['sample_index']
sample_array[sample_index, var_id ] = row['var_value']
#
# compute statistics
sim_avg = numpy.average(sample_array, axis=0);
sim_std = numpy.std(sample_array, axis=0, ddof=1);
# -----------------------------------------------------------------------
# MCMC results
print('mcmc')
numpy.random.seed( seed = random_seed )
x_start = numpy.array( [ y_n, 0.0, 0.0 ] )
scale = numpy.array( [ y_n, delta, delta] ) * 0.2
(a, c) = dismod_at.metropolis(log_f, number_metropolis, x_start, scale)
burn_in = int( 0.1 * number_metropolis )
c = c[burn_in :, :]
mcmc_avg = numpy.average(c, axis=0)
mcmc_std = numpy.std(c, axis=0, ddof=1)
mcmc_order = [ 'north_america', 'mexico', 'canada' ]
# -----------------------------------------------------------------------
# now compare values
for i in range( len(var_table) ) :
# node that this model variable corresponds to
node_name = mcmc_order[i]
var_id = node_name2var_id[node_name]
sim = sim_avg[var_id]
mcmc = mcmc_avg[i]
err = sim / mcmc - 1.0
if abs(err) > 0.05 :
print(node_name, '_avg (sim, mcmc, err) = ', sim, mcmc, err)
print('random_seed = ', random_seed)
assert(False)
sim = sim_std[var_id]
mcmc = mcmc_std[i]
err = sim / mcmc - 1.0
if abs(err) > 0.5 :
print(node_name, '_std (sim, mcmc, err) = ', sim, mcmc, err)
print('random_seed = ', random_seed)
assert(False)
# ----------------------------------------------------------------------------
print('sample_sim.py: OK')