-------------------------------------------------------
lines 5-648 of file: python/dismod_at/db2csv_command.py
-------------------------------------------------------

# {xrst_begin db2csv_command}
# {xrst_spell
#       diff
#       dir
#       ij
#       res
#       stdcv
# }
# {xrst_comment_ch #}
#
# Create Csv Files that Summarize The Database
# ############################################
#
# Syntax
# ******
#
# As Program
# ==========
# ``dismod-at`` *database* ``db2csv``
#
# As Python Function
# ==================
# ``dismod_at.db2csv_command`` ( *database*  )
#
# Convention
# **********
# The ``null`` value in the database corresponds
# to an empty string in the csv files.
#
# log table
# *********
# This command uses :ref:`python_log_command-name` to enter
# begin and end markers in the database log table.
#
# database
# ********
# is the path from the currently directory to the database.
# This must be a :ref:`dismod_at<database-name>` and
# the :ref:`init_command-name` must have been run on the database.
#
# dir
# ***
# We use the notation *dir* for the directory where *database*
# is located.
#
# fit_var, fit_data_subset
# ************************
# The :ref:`log_table-name` is used to determine if the previous
# fit command had a :ref:`fit_command@simulate_index` .
# If so, the :ref:`fit_var_table-name` and :ref:`fit_data_subset_table-name`
# corresponds to simulated data.
# Otherwise, if they exist, the correspond to the measured data.
#
# simulate_index
# **************
# If the previous fit command had a
# :ref:`fit_command@simulate_index`
# that value is used for *simulate_index* below.
# Otherwise, zero is used for *simulate_index* below.
#
# option.csv
# **********
# The file *dir* / ``option.csv`` is written by this command.
# It is a CSV file with one row for each possible row in the
# :ref:`option_table-name` .
# The columns in ``option.csv`` are
# :ref:`option_table@Table Format@option_name` and
# :ref:`option_table@Table Format@option_value` .
# If a row does not appear in the option table, the corresponding
# default value is written to ``option.csv`` .
# If the :ref:`option_table@Parent Node@parent_node_id` appears
# in the option table, the *parent_node_name* row of ``option.csv``
# is filled in with the corresponding node name.
#
# log.csv
# *******
# The file *dir* / ``log.csv`` is written by this command.
# It is a CSV file with one row for each message in the :ref:`log_table-name` .
# The columns in this table are
# :ref:`log_table@message_type` ,
# :ref:`log_table@table_name` ,
# :ref:`log_table@row_id` ,
# :ref:`log_table@unix_time` , and
# :ref:`log_table@message` .
# Note that a `begin db2csv` command will appear at the end of this file
# without the corresponding `end db2csv` because the db2csv command was not
# completed when ``log.csv`` was written.
#
# age_avg.csv
# ***********
# The file *dir* / ``age_avg.csv`` is written by this command.
# It is a CSV file with the contents of the age_avg table.
# The only column in this table is :ref:`age_avg_table@age` .
# Note that a :ref:`set_command-name` may change the value of
# :ref:`option_table@Age Average Grid@ode_step_size` or
# :ref:`option_table@Age Average Grid@age_avg_split` but it will not
# write out the new age_avg table.
#
# hes_fixed.csv
# *************
# If the :ref:`sample_command@asymptotic`
# sample command was executed,
# the contents of the :ref:`hes_fixed_table-name` are written to
# the CSV file *dir* / ``hes_fixed.csv`` .
# The columns in this table are
# :ref:`hes_fixed_table@row_var_id` ,
# :ref:`hes_fixed_table@col_var_id` ,
# :ref:`hes_fixed_table@hes_fixed_value` .
#
# hes_random.csv
# **************
# If a :ref:`fit both<fit_command@variables@both>` ,
# :ref:`fit random<fit_command@variables@random>` ,
# or :ref:`sample asymptotic<sample_command@asymptotic>`
# command was executed,
# the contents of the :ref:`hes_random_table-name` are written to
# the CSV file *dir* / ``hes_random.csv`` .
# The columns in this table are
# :ref:`hes_random_table@row_var_id` ,
# :ref:`hes_random_table@col_var_id` ,
# :ref:`hes_random_table@hes_random_value` .
#
# trace_fixed.csv
# ***************
# If the :ref:`fit fixed<fit_command@variables@fixed>` or
# :ref:`fit both<fit_command@variables@both>` command has completed,
# the contents of the :ref:`trace_fixed_table-name` are written to
# the CSV file *dir* / ``trace_fixed.csv`` .
# The columns in this table have the same name as in the corresponding table
# with the exception that the column
# :ref:`trace_fixed_table@regularization_size`
# is called *reg_size* .
#
# mixed_info.csv
# **************
# If the :ref:`fit_command-name` completed
# the contents of the :ref:`mixed_info_table-name` are written to
# the CSV file *dir* / ``mixed_info.csv`` .
#
# {xrst_comment -------------------------------------------------------}
# variable.csv
# ************
# {xrst_comment -------------------------------------------------------}
# The file *dir* / ``variable.csv`` is written by this command.
# It is a CSV file with one row for each of the :ref:`model_variables-name`
# and has the following columns:
#
# var_id
# ======
# is the :ref:`var_table@var_id` .
#
# var_type
# ========
# is the :ref:`var_table@var_type` .
#
# s_id
# ====
# is the :ref:`smooth_table@smooth_id` for this variable.
# If the variable is a
# :ref:`smoothing standard deviation multiplier<model_variables@Fixed Effects, theta@Smoothing Standard Deviation Multipliers, lambda>`
# this is the smoothing that this multiplier effects.
# Otherwise, it is the smoothing where the prior for this variable
# comes from.
#
# m_id
# ====
# If this variable is a covariate multiplier, this is the corresponding
# :ref:`mulcov_table@mulcov_id` .
#
# m_diff
# ======
# If this variable is a covariate multiplier, this is the corresponding
# :ref:`bnd_mulcov_table@max_cov_diff` .
#
# bound
# =====
# If the upper and lower value limits in the value prior for this variable
# are not equal,
# this is a bound for the absolute value of this variable; see
# :ref:`bnd_mulcov_table@max_mulcov` and
# :ref:`option_table@Optimize Random Only@bound_random` .
#
# age
# ===
# is the :ref:`age_table@age` .
#
# time
# ====
# is the :ref:`time_table@time` .
#
# rate
# ====
# is the :ref:`rate_table@rate_name` .
#
# integrand
# =========
# is the
# :ref:`integrand_table@integrand_name` .
#
# covariate
# =========
# is the
# :ref:`covariate_table@covariate_name` .
#
# node
# ====
# is the
# :ref:`node_table@node_name` .
#
# group
# =====
# This field is non-empty for
# :ref:`model_variables@Fixed Effects, theta@Group Covariate Multipliers` .
#
# subgroup
# ========
# This field is non-empty for
# :ref:`model_variables@Random Effects, u@Subgroup Covariate Multipliers` .
#
# fixed
# =====
# is ``true`` if this variable is a
# :ref:`fixed effect<model_variables@Fixed Effects, theta>` ,
# otherwise it is ``false`` .
#
# depend
# ======
# If the :ref:`depend_var_table-name` exists, this has one of the following:
# ``none`` if neither the data nor the prior depends on this variable,
# ``data`` if only the data depends on this variable,
# ``prior`` if only the prior depends on this variable,
# ``both`` if both the data and the prior depend on this variable.
#
# fit_value
# =========
# If the :ref:`fit_command-name` has been run, this is the
# :ref:`fit_var_table@fit_var_value` .
#
# start
# =====
# is the
# :ref:`start_var_table@start_var_value`
# for this variable.
#
# scale
# =====
# is the
# :ref:`scale_var_table@scale_var_value`
# for this variable.
#
# truth
# =====
# If the truth_var table exists, this is the
# :ref:`truth_var_table@truth_var_value`
# for this variable.
#
# sam_avg
# =======
# If the sample table exists,
# for each :ref:`sample_table@var_id`
# this is the average with respect to
# with respect to :ref:`sample_table@sample_index`
# of the :ref:`sample_table@var_value` corresponding to
# this *var_id* .
#
# sam_std
# =======
# If the sample table exists,
# for each fixed :ref:`sample_table@var_id`
# this is the estimated standard deviation with respect to
# with respect to :ref:`sample_table@sample_index`
# of the # :ref:`sample_table@var_value` corresponding to
# this *var_id* .
# If there is only one *sample_index* in the sample table,
# this column is empty because the standard deviation cannot be estimated
# from one sample.
#
# res_value
# =========
# If the :ref:`fit_command-name` has been run, this is the
# :ref:`fit_var_table@residual_value` .
#
# res_dage
# ========
# If the :ref:`fit_command-name` has been run, this is the
# :ref:`fit_var_table@residual_dage` ; see
# :ref:`fit_var<db2csv_command@fit_var, fit_data_subset>` above.
#
# res_dtime
# =========
# If the :ref:`fit_command-name` has been run, this is the
# :ref:`fit_var_table@residual_dtime` ; see
# :ref:`fit_var<db2csv_command@fit_var, fit_data_subset>` above.
#
# lag_value
# =========
# If the :ref:`fit_command-name` has been run, this is the
# :ref:`fit_var_table@lagrange_value` ; see
# :ref:`fit_var<db2csv_command@fit_var, fit_data_subset>` above.
#
# lag_dage
# ========
# If the :ref:`fit_command-name` has been run, this is the
# :ref:`fit_var_table@lagrange_dage` ; see
# :ref:`fit_var<db2csv_command@fit_var, fit_data_subset>` above.
#
# lag_dtime
# =========
# If the :ref:`fit_command-name` has been run, this is the
# :ref:`fit_var_table@lagrange_dtime` ; see
# :ref:`fit_var<db2csv_command@fit_var, fit_data_subset>` above.
#
# sim_v, sim_a, sim_t
# ===================
# If the :ref:`simulate_command-name` has been run,
# these are the values of
# :ref:`prior_sim_table@prior_sim_value` ,
# :ref:`prior_sim_table@prior_sim_dage` , and
# :ref:`prior_sim_table@prior_sim_dtime` ,
# for the
# :ref:`db2csv_command@simulate_index` .
#
# prior_info
# ==========
# There is a column named
#
#       *field* _ *character*
#
# for *character* equal to ``v`` , ``a`` and ``t``
# and for *field* equal to
# :ref:`prior_table@mean` ,
# :ref:`prior_table@lower` ,
# :ref:`prior_table@upper` ,
# :ref:`prior_table@std` ,
# :ref:`prior_table@eta` ,
# :ref:`prior_table@nu` and
# :ref:`density<prior_table@density_id>` .
#
# #.  The character ``v`` denotes this is the prior information for a value,
#     ``a`` the prior information for an age difference, and
#     ``t`` the prior information for a time difference.
# #.  The density has been mapped to the corresponding
#     :ref:`density_table@density_name` .
# #.  If the corresponding :ref:`smooth_grid_table@value_prior_id`
#     is ``null`` ,
#     the :ref:`smooth_grid_table@const_value` prior is displayed.
# #.  If is ``null`` , or has no affect, it is displayed as empty.
#     Note that the fields *eta_v* are always displayed for fixed
#     effects because they have a
#     :ref:`scaling<prior_table@eta@Scaling Fixed Effects>` affect.
#
# {xrst_comment -------------------------------------------------------}
# data.csv
# ********
# {xrst_comment -------------------------------------------------------}
# The file *dir* / ``data.csv`` is written by this command.
# It is a CSV file with one row for each row in the :ref:`data_subset_table-name`
# and has the following columns:
#
# data_id
# =======
# is the data table
# :ref:`data_table@data_id` .
#
# data_extra_columns
# ==================
# Each column specified by the
# :ref:`option_table@Extra Columns@data_extra_columns`
# option is included in the ``data.csv`` file.
#
# child
# =====
# If this data row is associated with a child,
# this is the name of the child. Otherwise, this data is associated
# with the :ref:`option_table@Parent Node` .
#
# node
# ====
# is the
# :ref:`node_table@node_name` for this data row.
# This will correspond directly to the data table
# :ref:`data_table@node_id` .
#
# group
# =====
# is the :ref:`subgroup_table@group_name` corresponding
# to the subgroup for this data row.
#
# subgroup
# ========
# is the
# :ref:`subgroup_table@subgroup_name` for this data row.
# This will correspond directly to the data table
# :ref:`data_table@subgroup_id` .
#
# integrand
# =========
# is the integrand table
# :ref:`integrand_table@integrand_name` .
#
# weight
# ======
# is the
# :ref:`weight_table@weight_name` .
#
# age_lo
# ======
# is the lower age used in the fits; i.e., the data table
# :ref:`data_table@age_lower` modified by the
# age compression interval in the
# :ref:`option_table@compress_interval` option.
#
# age_up
# ======
# is the upper age used in the fits; i.e., the data table
# :ref:`data_table@age_upper` modified by the
# age compression interval.
#
# time_lo
# =======
# is the lower time used in the fits; i.e., the data table
# :ref:`data_table@time_lower` modified by the
# time compression interval.
#
# time_up
# =======
# is the upper time used in the fits; i.e., the data table
# :ref:`data_table@time_upper` modified by the
# time compression interval.
#
# d_out
# =====
# is the value of
# :ref:`data_table@hold_out` in the data table.
#
# s_out
# =====
# is the value of
# :ref:`data_subset_table@hold_out` in the
# data_subset table.
#
# density
# =======
# is the
# :ref:`density_table@density_name` for data_subset table
# :ref:`data_subset_table@density_id` for this row.
#
# eta
# ===
# is the data_subset table
# :ref:`data_subset_table@eta` for this row.
#
# nu
# ==
# is the data_subset table
# :ref:`data_subset_table@nu` for this row.
#
# ss
# ==
# is the data_subset table
# :ref:`data_subset_table@sample_size` for this row.
#
# meas_std
# ========
# is the data table
# :ref:`data_table@meas_std` .
# Except in the binomial case, where it is an approximation for the
# standard deviation of the binomial counts divided by the sample size.
#
# meas_stdcv
# ==========
# is the minimum cv standard deviation used to define the likelihood; see
# :ref:`Delta<data_like@Notation@Minimum CV Standard Deviation, Delta_i>` .
# In the binomial case it is equal to *meas_std*.
#
# meas_sigma
# ==========
# If the previous fit command had a
# :ref:`db2csv_command@simulate_index` ,
# this column is empty.
# We use *sigma* to denote the adjusted standard deviation
# :ref:`sigma<data_like@Adjusted Standard Deviation, sigma_i(theta)>`
# for this row.
#
# The transformed standard deviation
# :ref:`delta<data_like@Transformed Standard Deviation, delta_i(theta)>`
# is computed by dividing by the residual.
# This results in plus infinity and not valid when the residual is zero.
# If this calculation for *delta*
# is greater than the maximum python ``float`` value,
# *meas_sigma* is reported as empty .
# Otherwise the transformation is inverted to get the value of *sigma* .
#
# meas_value
# ==========
# is the data table
# :ref:`data_table@meas_value` .
#
# avgint
# ======
# If the :ref:`fit_command-name` has been run, this is the
# :ref:`fit_data_subset_table@avg_integrand` for this row.
#
# residual
# ========
# If the :ref:`fit_command-name` has been run, this is the
# :ref:`fit_data_subset_table@weighted_residual`
# for this row; see
# :ref:`fit_data_subset<db2csv_command@fit_var, fit_data_subset>`
# above.
#
# sim_value
# =========
# If the :ref:`simulate_command-name` has been run, this is the
# :ref:`data_sim_table@data_sim_value` for this
# :ref:`db2csv_command@data.csv@data_id` and
# :ref:`fit_command@simulate_index`
# in the previous fit command.
# If there is no *simulate_index*
# in the previous fit command, the
# value zero is used for the *simulate_index* .
#
# Covariates
# ==========
# For each covariate in the :ref:`covariate_table-name` there is a column with
# the corresponding *covariate_name* .
# For each covariate column and measurement row, the value in the
# covariate column is covariate value for this measurement minus
# the reference value for this covariate, i.e., the corresponding
# covariate difference
# :ref:`x_ij<avg_integrand@Data or Avgint Table Notation@Covariate Difference, x_ij>`
# in the model for the average integrand.
#
# {xrst_comment -------------------------------------------------------}
# predict.csv
# ***********
# {xrst_comment -------------------------------------------------------}
# If the :ref:`predict_command-name` has was executed,
# the CSV file *dir* / ``predict.csv`` is written.
# For each row of the :ref:`predict_table-name`
# there is a corresponding row in ``predict.csv`` .
#
# avgint_id
# =========
# is the avgint table
# :ref:`avgint_table@avgint_id` .
#
# avgint_extra_columns
# ====================
# Each column specified by the
# :ref:`option_table@Extra Columns@avgint_extra_columns`
# option is included in the ``predict.csv`` file.
#
# s_index
# =======
# This identifies the set model variables corresponding to the
# last :ref:`predict_command-name` executed.
# If the source for the predict command was
# :ref:`predict_command@source@sample` ,
# the model variables correspond to the rows on the
# sample table with the same :ref:`sample_table@sample_index`
# equal to *s_index* .
# Otherwise, *s_index* is empty and
# the model variables correspond to the
# :ref:`predict_command@source@fit_var` or
# :ref:`predict_command@source@truth_var` table
# depending on the source for the last predict command executed.
#
# avgint
# ======
# is the :ref:`average integrand<avg_integrand@Average Integrand, A_i>`
# :math:`A_i(u, \theta)`. The model variables :math:`(u, \theta)`
# correspond to the *s_index* , and measurement subscript :math:`i`
# denotes to the :ref:`avgint_table-name` information
# for this row of ``predict.csv`` ; i.e., *age_lo* , *age_up* ,
# ...
#
# age_lo
# ======
# is the avgint table
# :ref:`avgint_table@age_lower` .
#
# age_up
# ======
# is the avgint table
# :ref:`data_table@age_upper` .
#
# time_lo
# =======
# is the avgint table
# :ref:`data_table@time_lower` .
#
# time_up
# =======
# is the avgint table
# :ref:`data_table@time_upper` .
#
# integrand
# =========
# is the avgint table
# :ref:`integrand_table@integrand_name` .
#
# weight
# ======
# is the
# :ref:`weight_table@weight_name` for this row.
#
# node
# ====
# is the
# :ref:`node_table@node_name` for this row.
#
# group
# =====
# is the :ref:`subgroup_table@group_name` corresponding
# to the subgroup for this data row.
#
# subgroup
# ========
# is the
# :ref:`subgroup_table@subgroup_name` for this data row.
# This will correspond directly to the avgint table
# :ref:`avgint_table@subgroup_id` .
#
# Covariates
# ==========
# For each covariate in the :ref:`covariate_table-name` there is a column with
# the corresponding *covariate_name* .
# For each covariate column and measurement row, the value in the
# covariate column is covariate value in the :ref:`avgint_table-name`
# minus the reference value for this covariate. i.e., the corresponding
# covariate difference
# :ref:`x_ij<avg_integrand@Data or Avgint Table Notation@Covariate Difference, x_ij>`
# in the model for the average integrand.
# {xrst_comment -------------------------------------------------------}
# {xrst_toc_hidden
#     example/get_started/db2csv_command.py
# }
# Example
# *******
# The file :ref:`db2csv_command.py-name` contains an example and test
# using this command.
#
# {xrst_end db2csv_command}