user_binomial.py#

Fitting Binomial Data Example#

Iota#

There is only one grid point in the parent and child smoothing for iota, hence they are constant with respect to age and time.

Parent#

The value iota_true is the simulated true rate for iota for both the parent and two child nodes. A uniform prior is used for the parent rate with iota_parent_true /100 as a lower limit, and 1 as the upper limit.

Child#

The iota Child Rate Effects are simulated with value zero. They are fit using a Gaussian prior with a mean zero and standard deviation 0.1.

Other Rates#

The parent_smooth_id and child_smooth_id are null for the other rates; i.e.,the other rates are zero.

Data#

Integrand#

All of the data is for the prevalence integrand. Since iota is constant, and the other rates are zero, the true prevalence is \(1 - \exp( - iota * age )\) .

meas_value#

The prevalence data is simulate using a binomial distribution divided by its sample size. The mean of the binomial distribution is the sample size times the true prevalence.

Sample Size#

The sample size for each data point is chosen as follows:

def next_sample_size() :
   sample_size = numpy.random.uniform(
      low  = numpy.log( sample_size_min ) ,
      high = numpy.log( sample_size_max ) ,
   )
   sample_size = numpy.exp( round( sample_size ) )
   return sample_size

Density#

If the count, corresponding to the measured value, is greater than or equal a threshold, a Gaussian approximation is used, otherwise the binomial density is used. Note that the approximation seems to work well even when the threshold is zero.

Source Code## values used to simulate data
import numpy
iota_true          = 2e-4
n_children         = 2
n_data             = 50
random_seed        = 0
if random_seed == 0 :
   import time
   random_seed = int( time.time() )
numpy.random.seed( int(random_seed) )
#
# sample_size_min, sample_size_max
# prevalence is 1.0 - exp( -iota_true * age )
sample_size_min  = 0.5 / ( 1.0 - numpy.exp( - iota_true * 50.0 ) )
sample_size_max  = 100.0 * sample_size_min
# ------------------------------------------------------------------------
import sys
import os
import copy
test_program = 'example/user/binomial.py'
if sys.argv[0] != test_program  or len(sys.argv) != 1 :
   usage  = 'python3 ' + test_program + '\n'
   usage += 'where python3 is the python 3 program on your system\n'
   usage += 'and working directory is the dismod_at distribution directory\n'
   sys.exit(usage)
print(test_program)
#
# import dismod_at
local_dir = os.getcwd() + '/python'
if( os.path.isdir( local_dir + '/dismod_at' ) ) :
   sys.path.insert(0, local_dir)
import dismod_at
#
# change into the build/example/user directory
if not os.path.exists('build/example/user') :
   os.makedirs('build/example/user')
os.chdir('build/example/user')
# ------------------------------------------------------------------------
# BEGIN_SAMPLE_SIZE
def next_sample_size() :
   sample_size = numpy.random.uniform(
      low  = numpy.log( sample_size_min ) ,
      high = numpy.log( sample_size_max ) ,
   )
   sample_size = numpy.exp( round( sample_size ) )
   return sample_size
# END_SAMPLE_SIZE
# ------------------------------------------------------------------------
def binomial(sample_size, success_rate) :
   count      = numpy.random.binomial(n = sample_size, p = success_rate)
   return count
# ------------------------------------------------------------------------
# Note that the a, t values are not used for this example
def example_db (file_name) :
   # note that the a, t values are not used for this case
   def fun_rate_child(a, t) :
      return ('prior_iota_child', None, None)
   def fun_iota_parent(a, t) :
      return ('prior_iota_parent', None, None)
   # ----------------------------------------------------------------------
   # age table:
   age_list    = [ 0.0, 50.0, 100.0 ]
   #
   # time table:
   time_list   = [ 1980.0, 2000.0, 2200.0 ]
   #
   # integrand table:
   integrand_table = [ { 'name':'prevalence' } ]
   #
   # node table:
   node_table = [ { 'name':'world', 'parent':'' } ]
   for i in range(n_children) :
      name = 'child_' + str(i + 1)
      node_table.append( { 'name':name, 'parent':'world' } )
   #
   # ----------------------------------------------------------------------
   # data table:
   data_table = list()
   #
   # values that are the same for all data rows
   row = {
      'integrand':  'prevalence',
      'weight':      '',
      'hold_out':     False,
      'time_lower':   2000.,
      'time_upper':   2000.,
   }
   # values that change between rows:
   for data_id in range( n_data ) :
      row['node']         = 'child_' + str( (data_id % n_children) + 1 )
      fraction            = (data_id + 1) / float(n_data)
      age                 = age_list[0] + (age_list[-1] - age_list[0])*fraction
      row['age_lower']    = age
      row['age_upper']    = age
      meas_mean           = 1.0 - numpy.exp( -iota_true * age )
      assert 0.0 < meas_mean and meas_mean < 1.0
      sample_size         = next_sample_size()
      count               = binomial(sample_size, meas_mean)
      meas_value          = count / sample_size
      row['sample_size']  = sample_size
      row['meas_value']   = meas_value
      threshold = 5
      if threshold <= count :
         row['density']  = 'gaussian'
         k               = count
         n               = sample_size
         meas_std        = numpy.sqrt( (k + 1)*(n + 1 - k) / n ) / (n + 2)
         row['meas_std'] = meas_std
      else :
         row['density']  = 'binomial'
         row['meas_std'] = None
      data_table.append( copy.copy(row) )
   #
   # ----------------------------------------------------------------------
   # prior_table
   prior_table = [
      {  # prior_iota_child
         'name':     'prior_iota_child',
         'density':  'gaussian',
         'mean':     0.0,
         'std':      0.1,
      },{ # prior_iota_parent
         'name':     'prior_iota_parent',
         'density':  'uniform',
         'lower':    iota_true / 10.,
         'upper':    1.0,
         'mean':     0.1,
      }
   ]
   #----------------------------------------------------------------------
   # smooth table
   name           = 'smooth_rate_child'
   fun            = fun_rate_child
   age_id         = int( len( age_list ) / 2 )
   time_id        = int( len( time_list ) / 2 )
   smooth_table = [
      {'name':name, 'age_id':[age_id], 'time_id':[time_id], 'fun':fun }
   ]
   name = 'smooth_iota_parent'
   fun  = fun_iota_parent
   smooth_table.append(
      {'name':name, 'age_id':[age_id], 'time_id':[time_id], 'fun':fun }
   )
   # no standard deviation multipliers
   for dictionary in smooth_table :
      for name in [ 'value' , 'dage', 'dtime' ] :
         key   = 'mulstd_' + name + '_prior_name'
         value = None
         dictionary[key] = value
   # ----------------------------------------------------------------------
   # rate table:
   rate_table = [
      {  'name':          'iota',
         'parent_smooth': 'smooth_iota_parent',
         'child_smooth':  'smooth_rate_child',
      }
   ]
   # ----------------------------------------------------------------------
   # option_table
   option_table = [
      { 'name':'rate_case',              'value':'iota_pos_rho_zero' },
      { 'name':'random_seed',            'value':str(random_seed)    },
      { 'name':'parent_node_name',       'value':'world'             },
      { 'name':'ode_step_size',          'value':'10.0'              },
      { 'name':'meas_noise_effect',      'value':'add_std_scale_all' },

      { 'name':'quasi_fixed',            'value':'false'        },
      { 'name':'derivative_test_fixed',  'value':'first-order'  },
      { 'name':'max_num_iter_fixed',     'value':'100'          },
      { 'name':'print_level_fixed',      'value':'0'            },
      { 'name':'tolerance_fixed',        'value':'1e-10'        },

      { 'name':'derivative_test_random', 'value':'second-order' },
      { 'name':'max_num_iter_random',    'value':'100'          },
      { 'name':'print_level_random',     'value':'0'            },
      { 'name':'tolerance_random',       'value':'1e-10'        }
   ]
   # ----------------------------------------------------------------------
   # subgroup_table
   subgroup_table = [ { 'subgroup':'world', 'group':'world' } ]
   # ----------------------------------------------------------------------
   # create database
   dismod_at.create_database(
      file_name        = file_name,
      age_list         = age_list,
      time_list        = time_list,
      integrand_table  = integrand_table,
      node_table       = node_table,
      subgroup_table   = subgroup_table,
      weight_table     = list(),
      covariate_table  = list(),
      avgint_table     = list(),
      data_table       = data_table,
      prior_table      = prior_table,
      smooth_table     = smooth_table,
      nslist_dict      = dict(),
      rate_table       = rate_table,
      mulcov_table     = list(),
      option_table     = option_table,
   )
   # ----------------------------------------------------------------------
   return
# ===========================================================================
# Run the init command to create the var table
file_name = 'example.db'
example_db(file_name)
#
program = '../../devel/dismod_at'
dismod_at.system_command_prc([ program, file_name, 'init' ])
dismod_at.system_command_prc([ program, file_name, 'fit' , 'both' ])
# -----------------------------------------------------------------------
# read database
connection      = dismod_at.create_connection(
   file_name, new = False, readonly = False
)
var_table        = dismod_at.get_table_dict(connection, 'var')
rate_table       = dismod_at.get_table_dict(connection, 'rate')
node_table       = dismod_at.get_table_dict(connection, 'node')
rate_table       = dismod_at.get_table_dict(connection, 'rate')
fit_var_table    = dismod_at.get_table_dict(connection, 'fit_var')
# -----------------------------------------------------------------------------
count = 0
for (var_id, var_row) in enumerate(var_table) :
   var_type  = var_row['var_type']
   node_id   = var_row['node_id']
   node_name = None
   if node_id != None :
      node_name = node_table[node_id]['node_name']
   if var_type == 'rate' and node_name == 'world' :
      count += 1
      assert count == 1
      #
      rate_id   = var_row['rate_id']
      rate_name = rate_table[rate_id]['rate_name']
      assert rate_name == 'iota'
      #
      fit_var_value = fit_var_table[var_id]['fit_var_value']
      rel_error     = 1. - fit_var_value / iota_true
      if abs(rel_error) >= 0.1 :
         print( f'binomial.py: rel_error = {rel_error}')
      assert abs(rel_error) < 0.1
connection.close()
print('binomial.py: OK')
# -----------------------------------------------------------------------------