\(\newcommand{\B}[1]{ {\bf #1} }\) \(\newcommand{\R}[1]{ {\rm #1} }\) \(\newcommand{\W}[1]{ \; #1 \; }\)
user_sim_log.py#
View page sourceSimulating Data with Log Transformed Distribution#
See Also#
Example Parameters#
The following values are used to simulate the data
number_simulate = 2000
iota_true = 0.01
meas_value_global = iota_true * 1.5
eta_global = iota_true * 1e-3
meas_std_global = meas_value_global * 0.25
gamma_global = meas_value_global * 0.25
Model#
The only non-zero model variable for this example is the rate of incidence for the world which is constant in age and time.
Data#
There is only one data point for this example and it’s integrand is Sincidence . This data has a log transformed distribution with mean iota_true , offset eta_global , and standard deviation meas_std_global .
Covariate Multiplier#
For this example there is one covariate multiplier. It is a meas_noise multiplier and the corresponding covariate value is one.
Notation#
\(y\) |
is the measurement value, meas_value_global |
\(\mu\) |
mean of the data, iota_true |
\(\eta\) |
offset in log transform, eta_global |
\(\Delta\) |
data measurement error, meas_std_global |
\(\gamma\) |
meta regression error, gamma_global |
\(n\) |
number of simulated data values, number_simulate |
\(z_i\) |
i-th simulate data for \(i = 1, \ldots , n\) |
delta#
The transformed standard deviation delta is given by
sigma#
For this example we use the add_std_scale_none option in the definition of the adjusted standard deviation sigma \(\sigma\); i.e.,
Simulations#
The offset log transform of each simulated measurement \(z_i\) has the following Gaussian distribution:
Source Code#
import sys
import os
import subprocess
import copy
import time
import numpy
test_program = 'example/user/sim_log.py'
if sys.argv[0] != test_program or len(sys.argv) != 1 :
usage = 'python3 ' + test_program + '\n'
usage += 'where python3 is the python 3 program on your system\n'
usage += 'and working directory is the dismod_at distribution directory\n'
sys.exit(usage)
print(test_program)
#
# import dismod_at
local_dir = os.getcwd() + '/python'
if( os.path.isdir( local_dir + '/dismod_at' ) ) :
sys.path.insert(0, local_dir)
import dismod_at
#
# change into the build/example/user directory
if not os.path.exists('build/example/user') :
os.makedirs('build/example/user')
os.chdir('build/example/user')
#
# random_seed_str
random_seed_str = str( int( time.time() ) )
# ----------------------------------------------------------------------------
# run a system command
def system_command(command) :
print( ' '.join(command) )
flag = subprocess.call( command )
if flag != 0 :
msg = 'command failed: flag = ' + str(flag)
msg += ', random_seed = ' + random_seed_str
sys.exit(msg)
return
# ------------------------------------------------------------------------
def fun_iota_parent(a, t) :
return ('prior_iota_parent', None, None)
def fun_gamma(a, t):
return ('prior_gamma', None, None)
# ------------------------------------------------------------------------
def example_db (file_name) :
# ----------------------------------------------------------------------
# age table:
age_list = [ 0.0, 100.0 ]
#
# time table:
time_list = [ 1990.0, 2020.0 ]
#
# integrand table:
integrand_table = [
{ 'name':'Sincidence', 'minimum_meas_cv':0.0 }
]
#
# node table:
node_table = [ { 'name':'world', 'parent':'' } ]
#
# empty tables
weight_table = list()
covariate_table = list()
mulcov_table = list()
avgint_table = list()
nslist_dict = dict()
#
# covariate table
covariate_table = [
{'name':'one', 'reference':0.0}
]
#
# mulcov table
mulcov_table = [ {
'covariate': 'one',
'type': 'meas_noise',
'effected': 'Sincidence',
'group': 'world',
'smooth': 'smooth_gamma',
} ]
# ----------------------------------------------------------------------
# data table:
# values that are the same for all data rows
row = {
'meas_value': meas_value_global,
'eta': eta_global,
'weight': '',
'hold_out': False,
'time_lower': 2000.,
'time_upper': 2000.,
'age_lower': 40.0,
'age_upper': 40.0,
'subgroup': 'world',
'density': 'log_gaussian',
'meas_std': meas_std_global,
'node': 'world',
'integrand': 'Sincidence',
'one': 1.0,
}
data_table = [ row ]
# ----------------------------------------------------------------------
# prior_table
# Note that the prior mean is equal to the true value for iota
prior_table = [
{ # prior_iota_parent
'name': 'prior_iota_parent',
'density': 'uniform',
'lower': iota_true / 100.,
'upper': iota_true * 10.0,
'mean': iota_true ,
'std': None,
'eta': None
},{
# prior_gamma
'name': 'prior_gamma',
'density': 'uniform',
'lower': gamma_global,
'upper': gamma_global,
'mean': gamma_global,
}
]
# ----------------------------------------------------------------------
# smooth table
age_id = 0
time_id = 0
name = 'smooth_iota_parent'
fun = fun_iota_parent
smooth_table = [
{'name':name, 'age_id':[age_id], 'time_id':[time_id], 'fun':fun }
]
name = 'smooth_gamma'
fun = fun_gamma
smooth_table.append(
{'name':name, 'age_id':[age_id], 'time_id':[time_id], 'fun':fun }
)
# ----------------------------------------------------------------------
# rate table:
rate_table = [
{ 'name': 'iota',
'parent_smooth': 'smooth_iota_parent',
'child_smooth': None,
'child_nslist': None
}
]
# ----------------------------------------------------------------------
# option_table
option_table = [
{ 'name':'rate_case', 'value':'iota_pos_rho_zero' },
{ 'name':'parent_node_name', 'value':'world' },
{ 'name':'random_seed', 'value':random_seed_str },
{ 'name':'meas_noise_effect', 'value':'add_std_scale_none' },
{ 'name':'quasi_fixed', 'value':'false' },
{ 'name':'max_num_iter_fixed', 'value':'50' },
{ 'name':'print_level_fixed', 'value':'0' },
{ 'name':'tolerance_fixed', 'value':'1e-6' },
{ 'name':'max_num_iter_random', 'value':'100' },
{ 'name':'print_level_random', 'value':'0' },
{ 'name':'tolerance_random', 'value':'1e-8' }
]
# ----------------------------------------------------------------------
# subgroup_table
subgroup_table = [ { 'subgroup':'world', 'group':'world' } ]
# ----------------------------------------------------------------------
# create database
dismod_at.create_database(
file_name,
age_list,
time_list,
integrand_table,
node_table,
subgroup_table,
weight_table,
covariate_table,
avgint_table,
data_table,
prior_table,
smooth_table,
nslist_dict,
rate_table,
mulcov_table,
option_table
)
# ----------------------------------------------------------------------
return
# ===========================================================================
# file_name
file_name = 'example.db'
#
# create example.db
example_db(file_name)
#
# program
program = '../../devel/dismod_at'
#
# init database
system_command([ program, file_name, 'init' ])
#
# Note that the prior mean is equal to the true value for iota
system_command([ program, file_name, 'set', 'truth_var', 'prior_mean' ])
#
# create data_sim table
system_command([ program, file_name, 'simulate', str(number_simulate) ])
# -----------------------------------------------------------------------
# check simulated data
from math import log
connection = dismod_at.create_connection(
file_name, new = False, readonly = True
)
data_sim_table = dismod_at.get_table_dict(connection, 'data_sim')
data_table = dismod_at.get_table_dict(connection, 'data')
data_subset_table = dismod_at.get_table_dict(connection, 'data_subset')
residual_list = list()
for row in data_sim_table :
data_sim_value = row['data_sim_value']
data_subset_id = row['data_subset_id']
data_id = data_subset_table[data_subset_id]['data_id']
meas_value = data_table[data_id]['meas_value']
Delta = data_table[data_id]['meas_std']
eta = data_table[data_id]['eta']
sigma = Delta + gamma_global
delta = log(meas_value + eta + sigma) - log(meas_value + eta)
residual = (log(data_sim_value + eta) - log(iota_true + eta) )/delta
residual_list.append( residual )
residual_array = numpy.array( residual_list )
residual_mean = residual_array.mean()
residual_std = residual_array.std(ddof=1)
# check that the mean of the residuals is within 2.5 standard deviations
assert abs(residual_mean) <= 2.5 / numpy.sqrt(number_simulate)
# check that the standard deviation of the residuals is near one
assert abs(residual_std - 1.0) <= 2.5 / numpy.sqrt(number_simulate)
# -----------------------------------------------------------------------------
print('fit_sim.py: OK')