\(\newcommand{\B}[1]{ {\bf #1} }\) \(\newcommand{\R}[1]{ {\rm #1} }\) \(\newcommand{\W}[1]{ \; #1 \; }\)
user_binomial.py¶
View page sourceFitting Binomial Data Example¶
Iota¶
There is only one grid point in the parent and child smoothing for iota, hence they are constant with respect to age and time.
Parent¶
The value iota_true
is the simulated true rate for iota
for both the parent and two child nodes.
A uniform prior is used for the parent rate with
iota_parent_true /100 as a lower limit,
and 1 as the upper limit.
Child¶
The iota Child Rate Effects are simulated with value zero. They are fit using a Gaussian prior with a mean zero and standard deviation 0.1.
Other Rates¶
The parent_smooth_id and child_smooth_id are null for the other rates; i.e.,the other rates are zero.
Data¶
Integrand¶
All of the data is for the prevalence integrand. Since iota is constant, and the other rates are zero, the true prevalence is \(1 - \exp( - iota * age )\) .
meas_value¶
The prevalence data is simulate using a binomial distribution divided by its sample size. The mean of the binomial distribution is the sample size times the true prevalence.
Sample Size¶
The sample size for each data point is chosen as follows:
def next_sample_size() :
sample_size = numpy.random.uniform(
low = numpy.log( sample_size_min ) ,
high = numpy.log( sample_size_max ) ,
)
sample_size = numpy.exp( round( sample_size ) )
return sample_size
Density¶
If the count, corresponding to the measured value, is greater than or equal a threshold, a Gaussian approximation is used, otherwise the binomial density is used. Note that the approximation seems to work well even when the threshold is zero.
Source Code¶
# values used to simulate data
import numpy
iota_true = 2e-4
n_children = 2
n_data = 50
random_seed = 0
if random_seed == 0 :
import time
random_seed = int( time.time() )
numpy.random.seed( int(random_seed) )
#
# sample_size_min, sample_size_max
# prevalence is 1.0 - exp( -iota_true * age )
sample_size_min = 0.5 / ( 1.0 - numpy.exp( - iota_true * 50.0 ) )
sample_size_max = 100.0 * sample_size_min
# ------------------------------------------------------------------------
import sys
import os
import copy
test_program = 'example/user/binomial.py'
check_program = sys.argv[0].replace('\\', '/')
if check_program != test_program or len(sys.argv) != 1 :
usage = 'python3 ' + test_program + '\n'
usage += 'where python3 is the python 3 program on your system\n'
usage += 'and working directory is the dismod_at distribution directory\n'
sys.exit(usage)
print(test_program)
#
# import dismod_at
local_dir = os.getcwd() + '/python'
if( os.path.isdir( local_dir + '/dismod_at' ) ) :
sys.path.insert(0, local_dir)
import dismod_at
#
# change into the build/example/user directory
if not os.path.exists('build/example/user') :
os.makedirs('build/example/user')
os.chdir('build/example/user')
# ------------------------------------------------------------------------
# BEGIN_SAMPLE_SIZE
def next_sample_size() :
sample_size = numpy.random.uniform(
low = numpy.log( sample_size_min ) ,
high = numpy.log( sample_size_max ) ,
)
sample_size = numpy.exp( round( sample_size ) )
return sample_size
# END_SAMPLE_SIZE
# ------------------------------------------------------------------------
def binomial(sample_size, success_rate) :
count = numpy.random.binomial(n = sample_size, p = success_rate)
return count
# ------------------------------------------------------------------------
# Note that the a, t values are not used for this example
def example_db (file_name) :
# note that the a, t values are not used for this case
def fun_rate_child(a, t) :
return ('prior_iota_child', None, None)
def fun_iota_parent(a, t) :
return ('prior_iota_parent', None, None)
# ----------------------------------------------------------------------
# age table:
age_list = [ 0.0, 50.0, 100.0 ]
#
# time table:
time_list = [ 1980.0, 2000.0, 2200.0 ]
#
# integrand table:
integrand_table = [ { 'name':'prevalence' } ]
#
# node table:
node_table = [ { 'name':'world', 'parent':'' } ]
for i in range(n_children) :
name = 'child_' + str(i + 1)
node_table.append( { 'name':name, 'parent':'world' } )
#
# ----------------------------------------------------------------------
# data table:
data_table = list()
#
# values that are the same for all data rows
row = {
'integrand': 'prevalence',
'weight': '',
'hold_out': False,
'time_lower': 2000.,
'time_upper': 2000.,
}
# values that change between rows:
for data_id in range( n_data ) :
row['node'] = 'child_' + str( (data_id % n_children) + 1 )
fraction = (data_id + 1) / float(n_data)
age = age_list[0] + (age_list[-1] - age_list[0])*fraction
row['age_lower'] = age
row['age_upper'] = age
meas_mean = 1.0 - numpy.exp( -iota_true * age )
assert 0.0 < meas_mean and meas_mean < 1.0
sample_size = next_sample_size()
count = binomial(sample_size, meas_mean)
meas_value = count / sample_size
row['sample_size'] = sample_size
row['meas_value'] = meas_value
threshold = 5
if threshold <= count :
row['density'] = 'gaussian'
k = count
n = sample_size
meas_std = numpy.sqrt( (k + 1)*(n + 1 - k) / n ) / (n + 2)
row['meas_std'] = meas_std
else :
row['density'] = 'binomial'
row['meas_std'] = None
data_table.append( copy.copy(row) )
#
# ----------------------------------------------------------------------
# prior_table
prior_table = [
{ # prior_iota_child
'name': 'prior_iota_child',
'density': 'gaussian',
'mean': 0.0,
'std': 0.1,
},{ # prior_iota_parent
'name': 'prior_iota_parent',
'density': 'uniform',
'lower': iota_true / 10.,
'upper': 1.0,
'mean': 0.1,
}
]
#----------------------------------------------------------------------
# smooth table
name = 'smooth_rate_child'
fun = fun_rate_child
age_id = int( len( age_list ) / 2 )
time_id = int( len( time_list ) / 2 )
smooth_table = [
{'name':name, 'age_id':[age_id], 'time_id':[time_id], 'fun':fun }
]
name = 'smooth_iota_parent'
fun = fun_iota_parent
smooth_table.append(
{'name':name, 'age_id':[age_id], 'time_id':[time_id], 'fun':fun }
)
# no standard deviation multipliers
for dictionary in smooth_table :
for name in [ 'value' , 'dage', 'dtime' ] :
key = 'mulstd_' + name + '_prior_name'
value = None
dictionary[key] = value
# ----------------------------------------------------------------------
# rate table:
rate_table = [
{ 'name': 'iota',
'parent_smooth': 'smooth_iota_parent',
'child_smooth': 'smooth_rate_child',
}
]
# ----------------------------------------------------------------------
# option_table
option_table = [
{ 'name':'rate_case', 'value':'iota_pos_rho_zero' },
{ 'name':'random_seed', 'value':str(random_seed) },
{ 'name':'parent_node_name', 'value':'world' },
{ 'name':'ode_step_size', 'value':'10.0' },
{ 'name':'meas_noise_effect', 'value':'add_std_scale_all' },
{ 'name':'quasi_fixed', 'value':'false' },
{ 'name':'derivative_test_fixed', 'value':'first-order' },
{ 'name':'max_num_iter_fixed', 'value':'100' },
{ 'name':'print_level_fixed', 'value':'0' },
{ 'name':'tolerance_fixed', 'value':'1e-10' },
{ 'name':'derivative_test_random', 'value':'second-order' },
{ 'name':'max_num_iter_random', 'value':'100' },
{ 'name':'print_level_random', 'value':'0' },
{ 'name':'tolerance_random', 'value':'1e-10' }
]
# ----------------------------------------------------------------------
# subgroup_table
subgroup_table = [ { 'subgroup':'world', 'group':'world' } ]
# ----------------------------------------------------------------------
# create database
dismod_at.create_database(
file_name = file_name,
age_list = age_list,
time_list = time_list,
integrand_table = integrand_table,
node_table = node_table,
subgroup_table = subgroup_table,
weight_table = list(),
covariate_table = list(),
avgint_table = list(),
data_table = data_table,
prior_table = prior_table,
smooth_table = smooth_table,
nslist_dict = dict(),
rate_table = rate_table,
mulcov_table = list(),
option_table = option_table,
)
# ----------------------------------------------------------------------
return
# ===========================================================================
# Run the init command to create the var table
file_name = 'example.db'
example_db(file_name)
#
program = '../../devel/dismod_at'
dismod_at.system_command_prc([ program, file_name, 'init' ])
dismod_at.system_command_prc([ program, file_name, 'fit' , 'both' ])
# -----------------------------------------------------------------------
# read database
connection = dismod_at.create_connection(
file_name, new = False, readonly = False
)
var_table = dismod_at.get_table_dict(connection, 'var')
rate_table = dismod_at.get_table_dict(connection, 'rate')
node_table = dismod_at.get_table_dict(connection, 'node')
rate_table = dismod_at.get_table_dict(connection, 'rate')
fit_var_table = dismod_at.get_table_dict(connection, 'fit_var')
# -----------------------------------------------------------------------------
count = 0
for (var_id, var_row) in enumerate(var_table) :
var_type = var_row['var_type']
node_id = var_row['node_id']
node_name = None
if node_id != None :
node_name = node_table[node_id]['node_name']
if var_type == 'rate' and node_name == 'world' :
count += 1
assert count == 1
#
rate_id = var_row['rate_id']
rate_name = rate_table[rate_id]['rate_name']
assert rate_name == 'iota'
#
fit_var_value = fit_var_table[var_id]['fit_var_value']
rel_error = 1. - fit_var_value / iota_true
if abs(rel_error) >= 0.1 :
print( f'binomial.py: rel_error = {rel_error}')
assert abs(rel_error) < 0.1
connection.close()
print('binomial.py: OK')
# -----------------------------------------------------------------------------