--------------------------------------------------------
lines 5-492 of file: python/dismod_at/create_database.py
--------------------------------------------------------

# {xrst_begin create_database}
# {xrst_spell
#     bool
#     da
#     dt
#     len
#     subsmooth
#     tuple
# }
# {xrst_comment_ch #}
#
# Create a Dismod_at Database
# ###########################
#
# Prototype
# *********
# {xrst_literal
#     # BEGIN_PROTOTYPE
#     # END_PROTOTYPE
# }
#
# Purpose
# *******
# This routine makes it easy to create a ``dismod_at`` database
# with all of its :ref:`input-name` tables.
# This is only meant for small example and testing cases and is not efficient.
#
# Primary Key
# ***********
# For each of the lists above, the order of the
# elements in the corresponding table is the same as the corresponding list.
# For example, *age_list* [ *i* ] corresponds to the *i*-th row
# of the ``age`` table which has
# :ref:`database@Primary Key` value *age_id* = *i* .
#
# Name Column
# ***********
# The :ref:`name columns<database@Name Column>` are created with th unique
# constraint; i.e., it will be an error to have the same value appear
# twice in a column *table_name* _ ``name`` in the table
# *table_name* .
#
# file_name
# *********
# is as ``str`` containing the name of the file where the data base
# is stored.
# If this file already exists, it is deleted and a database is created.
#
# age_list
# ********
# is a ``list`` of ``float`` that
# specify age values by indices.
#
# time_list
# *********
# is a ``list`` of ``float`` that
# specify time values by indices.
#
# integrand_table
# ***************
# This is a list of ``dict``
# that define the rows of the :ref:`integrand_table-name` .
# The dictionary *integrand_table* [ *i* ] has the following:
#
# .. csv-table::
#     :widths: auto
#
#     Key,Value Type,Description
#     name,str,name for the *i*-th integrand
#     minimum_meas_cv,str,minimum measurement cv for this integrand
#
# The key ``minimum_meas_cv`` is optional.
# If it is not present, ``0.0`` is used for the corresponding value.
#
# node_table
# **********
# This is a list of ``dict``
# that define the rows of the :ref:`node_table-name` .
# The dictionary *node_table* [ *i* ] has the following:
#
# .. csv-table::
#     :widths: auto
#
#     Key,Value Type,Description
#     name,str,name for the *i*-th node
#     parent,str,name of parent of the *i*-th node
#
# Note that if the i-th node does not have a parent, the empty string
# should be used for the parent of that node.
#
# subgroup_table
# **************
# This is a list of ``dict``
# that define the rows of the :ref:`subgroup_table-name` .
# The dictionary *node_table* [ *i* ] has the following:
#
# .. csv-table::
#     :widths: auto
#
#     Key,Value Type,Description
#     subgroup,str,name for the *i*-th subgroup
#     group,str,name of group that subgroup is in
#
# Backward Compatibility
# ======================
# To get backward compatibility to before the subgroup information was added,
# add the following table to the ``create_database`` call
# (just after the *node_table* ):
#
#     *subgroup_table* = [ { ``'subgroup'`` : ``'world'`` , ``'group'`` : ``'world'``  } ]
#
# No other changes to the ``create_database`` call should be necessary
# (for backward compatibility).
#
# weight_table
# ************
# This is a list of ``dict``
# that define the rows of the :ref:`weight_table-name` and
# :ref:`weight_grid_table-name` .
# The dictionary *weight_table* [ *i* ] has the following:
#
# .. list-table::
#     :widths: auto
#
#     * - Key
#       - Value Type
#       - Description
#     * - name
#       - str
#       - name of *i*-th weighting
#     * - age_id
#       - list of int
#       - indices for age grid
#     * - time_id
#       - list of int
#       - indices for time grid
#     * - fun
#       - function
#       - *w* = *fun* ( *a* , *t* )
#
# The float *w* is the value of this weighting a the corresponding
# float age *a* and float time *t* .
# Note that there is an *i* , *j* such that
# *a* = *age_list* [ *age_id* [ *i* ]] and
# *t* = *time_list* [ *time_id* [ *j* ]] .
#
# covariate_table
# ***************
# This is a list of ``dict``
# that define the rows of the :ref:`covariate_table-name` .
# The dictionary *covariate_table* [ *i* ] has the following:
#
# .. csv-table::
#     :widths: auto
#
#     Key,Value Type,Description
#     name,str,name for the *i*-th covariate
#     reference,float,reference value for *i*-th covariate
#     max_difference,float,maximum difference for *i*-th covariate
#
# If *max_difference* is ``None`` , the corresponding table entry
# is null and this corresponds to an infinite maximum difference.
# If *max_difference* does not appear, null is written for the
# corresponding covariate entry.
#
# avgint_table
# ************
# This is a list of ``dict``
# that define the rows of the :ref:`avgint_table-name` .
# The dictionary *avgint_table* [ *i* ] has the following:
#
# .. csv-table::
#     :widths: auto
#
#     Key,Value Type,Description
#     integrand,str,integrand for *i*-th data
#     node,str,name of node in graph
#     subgroup,str,name of subgroup
#     weight,str,weighting function name
#     age_lower,float,lower age limit
#     age_upper,float,upper age limit
#     time_lower,float,lower time limit
#     time_lower,float,upper time limit
#     *c_0*,float,value of first covariate
#     ...,...,...
#     *c_J*,float,value of last covariate
#
# subgroup
# ========
# If the ``subgroup`` key is not present, the first subgroup in
# :ref:`create_database@subgroup_table` is used
# and a warning is printed.
#
# weight
# ======
# The weighting function name identifies an
# entry in the :ref:`create_database@weight_table`
# by its *name* . If *weight* is the empty string,
# the constant weighting is used.
#
# covariates
# ==========
# Note that *J* = ``len`` ( *covariate_table* ) ``- 1`` and for
# *j* = 0 , ... , *J* ,
#
#     *c_j* = *covariate_table* [ *j* ][ ``'name'`` ]
#
# We refer to the columns above as the required columns for
# *avgint_table* .
#
# avgint_extra_columns
# ====================
# If a *row* of *option_table* has *row* [ ``'name'`` ]
# equal to ``'avgint_extra_columns'`` , the corresponding
# *row* [ ``'value'`` ]. ``split`` () is the list of extra avgint table columns.
# Otherwise the list of extra avgint table columns is empty.
#
# data_table
# **********
# This is a list of ``dict``
# that define the rows of the :ref:`data_table-name` .
# It has all the columns required for the *avgint_table* .
# In addition, the dictionary *data_table* [ *i* ] has the following:
#
# .. csv-table::
#     :widths: auto
#
#     Key,Value Type,Description
#     hold_out,bool,hold out flag
#     density,str,:ref:`density_table@density_name`
#     meas_value,float,measured value
#     meas_std,float,standard deviation
#     eta,float,offset in log-transform
#     nu,float,Student's-t degrees of freedom
#     sample_size,int,sample size for a binomial distribution
#
# meas_std, eta, nu, sample_size
# ==============================
# The columns keys ``meas_std`` , ``eta`` , ``nu`` , and ``sample_size``
# are optional. If they are not present, the value ``null`` is used
# for the corresponding row of the data table.
#
# subgroup
# ========
# if the ``subgroup`` key is not present, the first subgroup in
# :ref:`create_database@subgroup_table` is used
# and a warning is printed.
#
# data_extra_columns
# ==================
# If a *row* of *option_table* has *row* [ ``'name'`` ]
# equal to ``'data_extra_columns'`` , the corresponding
# *row* [ ``'value'`` ]. ``split`` () is the list of extra data table columns.
# Otherwise the list of extra data table columns is empty.
#
# prior_table
# ***********
# This is a list of ``dict``
# that define the rows of the :ref:`prior_table-name` .
# The dictionary *prior_table* [ *i* ] has the following:
#
# .. csv-table::
#     :widths: auto
#
#     Key,Value Type,Description
#     name,str,name of *i*-th prior
#     lower,float,lower limit
#     upper,float,upper limit
#     std,float,standard deviation
#     density,str,:ref:`density_table@density_name`
#     eta,float,offset in log densities
#     nu,float,degrees of freed in Student densities
#
# The columns keys
# ``lower`` , ``upper`` , ``std`` , ``eta`` , and ``nu``
# are optional. If they are not present, the value ``null`` is used
# for the corresponding row of the prior table.
#
# smooth_table
# ************
# This is a list of ``dict``
# that define the rows of the :ref:`smooth_table-name` and
# :ref:`smooth_grid_table-name` .
# The dictionary *smooth_table* [ *i* ] has the following keys:
#
# name
# ====
# an ``str`` specifying the name used to reference the *i*-th smoothing.
#
# age_id
# ======
# a list of ``int`` specifying the age values for this smoothing
# as indices in :ref:`create_database@age_list` .
#
# time_id
# =======
# a list of ``int`` specifying the time values for this smoothing
# as indices in :ref:`create_database@time_list` .
#
# mulstd_value_prior_name
# =======================
# an ``str`` specifying the prior used for the value multiplier
# for the *i*-th smoothing; see
# :ref:`smooth_table@mulstd_value_prior_id`
# This key is optional and its default value is ``None`` which corresponds
# to ``null`` in the database.
#
# mulstd_dage_prior_name
# ======================
# an ``str`` specifying the prior used for the age difference multiplier
# for the *i*-th smoothing; see
# :ref:`smooth_table@mulstd_dage_prior_id`
# This key is optional and its default value is ``None`` which corresponds
# to ``null`` in the database.
#
# mulstd_dtime_prior_name
# =======================
# an ``str`` specifying the prior used for the time difference multiplier
# for the *i*-th smoothing; see
# :ref:`smooth_table@mulstd_dtime_prior_id`
# This key is optional and its default value is ``None`` which corresponds
# to ``null`` in the database.
#
# fun
# ===
# This is a function with the following syntax:
#
#     ( *v* , *da* , *dt* ) = *fun* ( *a* , *t* )
#
# The ``str`` results *v* , *da* , and *dt*
# are the names for the value prior, age difference prior,
# and time difference prior corresponding to the *i*-th smoothing.
# The value *da* is not used,
# when age *a* = *age_id* [ ``-1`` ] .
# The value *dt* is not used,
# when time *t* = *time_id* [ ``-1`` ] .
# Note that there is an *i* , *j* such that
# *a* = *age_list* [ *age_id* [ *i* ]] and
# *t* = *time_list* [ *time_id* [ *j* ]] .
#
# const_value
# ===========
# The *fun* return value *v* may be a ``float`` .
# In this case, the value of the smoothing, at the corresponding age and time,
# is constrained to be *v* using the
# :ref:`smooth_grid_table@const_value` column in the
# ``smooth_grid`` table.
#
# nslist_dict
# ***********
# This is a ``dict`` that specifies the
# :ref:`nslist_table-name` and the :ref:`nslist_pair_table-name` .
# For each :ref:`nslist_table@nslist_name` ,
#
#     *nslist_dict* [ *nslist_name* ] = [ ( *node_name* , *smooth_name* ), ... ]
#
# Note that each pair above is a python ``tuple`` :
#
# .. list-table::
#     :widths: auto
#
#     * - Variable
#       - Value Type
#       - Description
#     * - nslist_name
#       - str
#       - name of one list of node,smoothing pairs
#     * - node_name
#       - str
#       - name of the node for this pair
#     * - smooth_name
#       - str
#       - name of the smoothing for this pair
#
# rate_table
# **********
# This is a list of ``dict``
# that define the rows of the :ref:`rate_table-name` .
# The dictionary *rate_table* [ *i* ] has the following:
#
# .. list-table::
#     :widths: auto
#
#     * - Key
#       - Value Type
#       - Description
#     * - name
#       - str
#       - pini, iota, rho, chi, or omega
#     * - parent_smooth
#       - str
#       - parent smoothing
#     * - child_smooth
#       - str
#       - a single child smoothing
#     * - child_nslist
#       - str
#       - list of child smoothings
#
# The value ``None`` is used to represent a ``null`` value for
# the parent and child smoothings.
# If a key name does not appear, null is used for the corresponding value.
# If a *name* ; e.g. ``rho`` , does not appear, the value
# null is used for the parent and child smoothings for the corresponding rate.
#
# mulcov_table
# ************
# This is a list of ``dict``
# that define the rows of the :ref:`mulcov_table-name` .
# The dictionary *mulcov_table* [ *i* ] has the following:
#
# .. list-table::
#     :widths: auto
#
#     * - Key
#       - Value Type
#       - Description
#     * - covariate
#       - str
#       - is the covariate column
#     * - type
#       - str
#       - ``rate_value`` , ``meas_value`` , or ``meas_noise``
#     * - effected
#       - str
#       - integrand or rate affected
#     * - group
#       - str
#       - the group that is affected
#     * - smooth
#       - str
#       - smoothing at group level
#     * - subsmooth
#       - str
#       - smoothing at subgroup level
#
# effected
# ========
# If *type* is ``rate_value`` , *effected* is a rate.
# Otherwise it is an integrand.
#
# group
# =====
# If the ``group`` key is not present, the first group in
# :ref:`create_database@subgroup_table` is used.
#
# subsmooth
# =========
# If the ``subsmooth`` key is not present, the value null is used for
# the subgroup smoothing in the corresponding row and a warning is printed.
#
# option_table
# ************
# This is a list of ``dict``
# that define the values
# :ref:`option_table@Table Format@option_name` ,
# :ref:`option_table@Table Format@option_value` in the option table.
# The *i*-th row of the table will have
#
# | |tab| *option_name* = *option_table* [ *i* ][ ``'name'`` ]
# | |tab| *option_value* = *option_table* [ *i* ][ ``'value'`` ]
#
# rate_eff_cov_table
# ******************
# This is a list of ``dict``
# that define the rows of the :ref:`rate_eff_cov_table-name` .
# The dictionary *rate_eff_cov_table* [ *i* ] has the following:
#
# .. csv-table::
#     :widths: auto
#
#     Key,Value Type,Description
#     ``'node_name'``,      str,identifies the node for the *i*-th row
#     ``'covariate_name'``, str,identifies the covariate for the *i*-th row
#     ``split_value``,      float,value of the splitting covariate
#     ``'weight_name'``,    str,identifies weighting for this row
#
# Contents
# ********
# {xrst_toc_table
#    example/table/create_database.py
# }
# Example
# *******
# The file :ref:`create_database.py-name` contains
# and example and test of ``create_database`` .
#
# {xrst_end create_database}