-------------------------------------------------------- lines 5-492 of file: python/dismod_at/create_database.py -------------------------------------------------------- # {xrst_begin create_database} # {xrst_spell # bool # da # dt # len # subsmooth # tuple # } # {xrst_comment_ch #} # # Create a Dismod_at Database # ########################### # # Prototype # ********* # {xrst_literal # # BEGIN_PROTOTYPE # # END_PROTOTYPE # } # # Purpose # ******* # This routine makes it easy to create a ``dismod_at`` database # with all of its :ref:`input-name` tables. # This is only meant for small example and testing cases and is not efficient. # # Primary Key # *********** # For each of the lists above, the order of the # elements in the corresponding table is the same as the corresponding list. # For example, *age_list* [ *i* ] corresponds to the *i*-th row # of the ``age`` table which has # :ref:`database@Primary Key` value *age_id* = *i* . # # Name Column # *********** # The :ref:`name columns` are created with th unique # constraint; i.e., it will be an error to have the same value appear # twice in a column *table_name* _ ``name`` in the table # *table_name* . # # file_name # ********* # is as ``str`` containing the name of the file where the data base # is stored. # If this file already exists, it is deleted and a database is created. # # age_list # ******** # is a ``list`` of ``float`` that # specify age values by indices. # # time_list # ********* # is a ``list`` of ``float`` that # specify time values by indices. # # integrand_table # *************** # This is a list of ``dict`` # that define the rows of the :ref:`integrand_table-name` . # The dictionary *integrand_table* [ *i* ] has the following: # # .. csv-table:: # :widths: auto # # Key,Value Type,Description # name,str,name for the *i*-th integrand # minimum_meas_cv,str,minimum measurement cv for this integrand # # The key ``minimum_meas_cv`` is optional. # If it is not present, ``0.0`` is used for the corresponding value. # # node_table # ********** # This is a list of ``dict`` # that define the rows of the :ref:`node_table-name` . # The dictionary *node_table* [ *i* ] has the following: # # .. csv-table:: # :widths: auto # # Key,Value Type,Description # name,str,name for the *i*-th node # parent,str,name of parent of the *i*-th node # # Note that if the i-th node does not have a parent, the empty string # should be used for the parent of that node. # # subgroup_table # ************** # This is a list of ``dict`` # that define the rows of the :ref:`subgroup_table-name` . # The dictionary *node_table* [ *i* ] has the following: # # .. csv-table:: # :widths: auto # # Key,Value Type,Description # subgroup,str,name for the *i*-th subgroup # group,str,name of group that subgroup is in # # Backward Compatibility # ====================== # To get backward compatibility to before the subgroup information was added, # add the following table to the ``create_database`` call # (just after the *node_table* ): # # *subgroup_table* = [ { ``'subgroup'`` : ``'world'`` , ``'group'`` : ``'world'`` } ] # # No other changes to the ``create_database`` call should be necessary # (for backward compatibility). # # weight_table # ************ # This is a list of ``dict`` # that define the rows of the :ref:`weight_table-name` and # :ref:`weight_grid_table-name` . # The dictionary *weight_table* [ *i* ] has the following: # # .. list-table:: # :widths: auto # # * - Key # - Value Type # - Description # * - name # - str # - name of *i*-th weighting # * - age_id # - list of int # - indices for age grid # * - time_id # - list of int # - indices for time grid # * - fun # - function # - *w* = *fun* ( *a* , *t* ) # # The float *w* is the value of this weighting a the corresponding # float age *a* and float time *t* . # Note that there is an *i* , *j* such that # *a* = *age_list* [ *age_id* [ *i* ]] and # *t* = *time_list* [ *time_id* [ *j* ]] . # # covariate_table # *************** # This is a list of ``dict`` # that define the rows of the :ref:`covariate_table-name` . # The dictionary *covariate_table* [ *i* ] has the following: # # .. csv-table:: # :widths: auto # # Key,Value Type,Description # name,str,name for the *i*-th covariate # reference,float,reference value for *i*-th covariate # max_difference,float,maximum difference for *i*-th covariate # # If *max_difference* is ``None`` , the corresponding table entry # is null and this corresponds to an infinite maximum difference. # If *max_difference* does not appear, null is written for the # corresponding covariate entry. # # avgint_table # ************ # This is a list of ``dict`` # that define the rows of the :ref:`avgint_table-name` . # The dictionary *avgint_table* [ *i* ] has the following: # # .. csv-table:: # :widths: auto # # Key,Value Type,Description # integrand,str,integrand for *i*-th data # node,str,name of node in graph # subgroup,str,name of subgroup # weight,str,weighting function name # age_lower,float,lower age limit # age_upper,float,upper age limit # time_lower,float,lower time limit # time_lower,float,upper time limit # *c_0*,float,value of first covariate # ...,...,... # *c_J*,float,value of last covariate # # subgroup # ======== # If the ``subgroup`` key is not present, the first subgroup in # :ref:`create_database@subgroup_table` is used # and a warning is printed. # # weight # ====== # The weighting function name identifies an # entry in the :ref:`create_database@weight_table` # by its *name* . If *weight* is the empty string, # the constant weighting is used. # # covariates # ========== # Note that *J* = ``len`` ( *covariate_table* ) ``- 1`` and for # *j* = 0 , ... , *J* , # # *c_j* = *covariate_table* [ *j* ][ ``'name'`` ] # # We refer to the columns above as the required columns for # *avgint_table* . # # avgint_extra_columns # ==================== # If a *row* of *option_table* has *row* [ ``'name'`` ] # equal to ``'avgint_extra_columns'`` , the corresponding # *row* [ ``'value'`` ]. ``split`` () is the list of extra avgint table columns. # Otherwise the list of extra avgint table columns is empty. # # data_table # ********** # This is a list of ``dict`` # that define the rows of the :ref:`data_table-name` . # It has all the columns required for the *avgint_table* . # In addition, the dictionary *data_table* [ *i* ] has the following: # # .. csv-table:: # :widths: auto # # Key,Value Type,Description # hold_out,bool,hold out flag # density,str,:ref:`density_table@density_name` # meas_value,float,measured value # meas_std,float,standard deviation # eta,float,offset in log-transform # nu,float,Student's-t degrees of freedom # sample_size,int,sample size for a binomial distribution # # meas_std, eta, nu, sample_size # ============================== # The columns keys ``meas_std`` , ``eta`` , ``nu`` , and ``sample_size`` # are optional. If they are not present, the value ``null`` is used # for the corresponding row of the data table. # # subgroup # ======== # if the ``subgroup`` key is not present, the first subgroup in # :ref:`create_database@subgroup_table` is used # and a warning is printed. # # data_extra_columns # ================== # If a *row* of *option_table* has *row* [ ``'name'`` ] # equal to ``'data_extra_columns'`` , the corresponding # *row* [ ``'value'`` ]. ``split`` () is the list of extra data table columns. # Otherwise the list of extra data table columns is empty. # # prior_table # *********** # This is a list of ``dict`` # that define the rows of the :ref:`prior_table-name` . # The dictionary *prior_table* [ *i* ] has the following: # # .. csv-table:: # :widths: auto # # Key,Value Type,Description # name,str,name of *i*-th prior # lower,float,lower limit # upper,float,upper limit # std,float,standard deviation # density,str,:ref:`density_table@density_name` # eta,float,offset in log densities # nu,float,degrees of freed in Student densities # # The columns keys # ``lower`` , ``upper`` , ``std`` , ``eta`` , and ``nu`` # are optional. If they are not present, the value ``null`` is used # for the corresponding row of the prior table. # # smooth_table # ************ # This is a list of ``dict`` # that define the rows of the :ref:`smooth_table-name` and # :ref:`smooth_grid_table-name` . # The dictionary *smooth_table* [ *i* ] has the following keys: # # name # ==== # an ``str`` specifying the name used to reference the *i*-th smoothing. # # age_id # ====== # a list of ``int`` specifying the age values for this smoothing # as indices in :ref:`create_database@age_list` . # # time_id # ======= # a list of ``int`` specifying the time values for this smoothing # as indices in :ref:`create_database@time_list` . # # mulstd_value_prior_name # ======================= # an ``str`` specifying the prior used for the value multiplier # for the *i*-th smoothing; see # :ref:`smooth_table@mulstd_value_prior_id` # This key is optional and its default value is ``None`` which corresponds # to ``null`` in the database. # # mulstd_dage_prior_name # ====================== # an ``str`` specifying the prior used for the age difference multiplier # for the *i*-th smoothing; see # :ref:`smooth_table@mulstd_dage_prior_id` # This key is optional and its default value is ``None`` which corresponds # to ``null`` in the database. # # mulstd_dtime_prior_name # ======================= # an ``str`` specifying the prior used for the time difference multiplier # for the *i*-th smoothing; see # :ref:`smooth_table@mulstd_dtime_prior_id` # This key is optional and its default value is ``None`` which corresponds # to ``null`` in the database. # # fun # === # This is a function with the following syntax: # # ( *v* , *da* , *dt* ) = *fun* ( *a* , *t* ) # # The ``str`` results *v* , *da* , and *dt* # are the names for the value prior, age difference prior, # and time difference prior corresponding to the *i*-th smoothing. # The value *da* is not used, # when age *a* = *age_id* [ ``-1`` ] . # The value *dt* is not used, # when time *t* = *time_id* [ ``-1`` ] . # Note that there is an *i* , *j* such that # *a* = *age_list* [ *age_id* [ *i* ]] and # *t* = *time_list* [ *time_id* [ *j* ]] . # # const_value # =========== # The *fun* return value *v* may be a ``float`` . # In this case, the value of the smoothing, at the corresponding age and time, # is constrained to be *v* using the # :ref:`smooth_grid_table@const_value` column in the # ``smooth_grid`` table. # # nslist_dict # *********** # This is a ``dict`` that specifies the # :ref:`nslist_table-name` and the :ref:`nslist_pair_table-name` . # For each :ref:`nslist_table@nslist_name` , # # *nslist_dict* [ *nslist_name* ] = [ ( *node_name* , *smooth_name* ), ... ] # # Note that each pair above is a python ``tuple`` : # # .. list-table:: # :widths: auto # # * - Variable # - Value Type # - Description # * - nslist_name # - str # - name of one list of node,smoothing pairs # * - node_name # - str # - name of the node for this pair # * - smooth_name # - str # - name of the smoothing for this pair # # rate_table # ********** # This is a list of ``dict`` # that define the rows of the :ref:`rate_table-name` . # The dictionary *rate_table* [ *i* ] has the following: # # .. list-table:: # :widths: auto # # * - Key # - Value Type # - Description # * - name # - str # - pini, iota, rho, chi, or omega # * - parent_smooth # - str # - parent smoothing # * - child_smooth # - str # - a single child smoothing # * - child_nslist # - str # - list of child smoothings # # The value ``None`` is used to represent a ``null`` value for # the parent and child smoothings. # If a key name does not appear, null is used for the corresponding value. # If a *name* ; e.g. ``rho`` , does not appear, the value # null is used for the parent and child smoothings for the corresponding rate. # # mulcov_table # ************ # This is a list of ``dict`` # that define the rows of the :ref:`mulcov_table-name` . # The dictionary *mulcov_table* [ *i* ] has the following: # # .. list-table:: # :widths: auto # # * - Key # - Value Type # - Description # * - covariate # - str # - is the covariate column # * - type # - str # - ``rate_value`` , ``meas_value`` , or ``meas_noise`` # * - effected # - str # - integrand or rate affected # * - group # - str # - the group that is affected # * - smooth # - str # - smoothing at group level # * - subsmooth # - str # - smoothing at subgroup level # # effected # ======== # If *type* is ``rate_value`` , *effected* is a rate. # Otherwise it is an integrand. # # group # ===== # If the ``group`` key is not present, the first group in # :ref:`create_database@subgroup_table` is used. # # subsmooth # ========= # If the ``subsmooth`` key is not present, the value null is used for # the subgroup smoothing in the corresponding row and a warning is printed. # # option_table # ************ # This is a list of ``dict`` # that define the values # :ref:`option_table@Table Format@option_name` , # :ref:`option_table@Table Format@option_value` in the option table. # The *i*-th row of the table will have # # | |tab| *option_name* = *option_table* [ *i* ][ ``'name'`` ] # | |tab| *option_value* = *option_table* [ *i* ][ ``'value'`` ] # # rate_eff_cov_table # ****************** # This is a list of ``dict`` # that define the rows of the :ref:`rate_eff_cov_table-name` . # The dictionary *rate_eff_cov_table* [ *i* ] has the following: # # .. csv-table:: # :widths: auto # # Key,Value Type,Description # ``'node_name'``, str,identifies the node for the *i*-th row # ``'covariate_name'``, str,identifies the covariate for the *i*-th row # ``split_value``, float,value of the splitting covariate # ``'weight_name'``, str,identifies weighting for this row # # Contents # ******** # {xrst_toc_table # example/table/create_database.py # } # Example # ******* # The file :ref:`create_database.py-name` contains # and example and test of ``create_database`` . # # {xrst_end create_database}