------------------------------------------------------- lines 5-648 of file: python/dismod_at/db2csv_command.py ------------------------------------------------------- # {xrst_begin db2csv_command} # {xrst_spell # diff # dir # ij # res # stdcv # } # {xrst_comment_ch #} # # Create Csv Files that Summarize The Database # ############################################ # # Syntax # ****** # # As Program # ========== # ``dismod-at`` *database* ``db2csv`` # # As Python Function # ================== # ``dismod_at.db2csv_command`` ( *database* ) # # Convention # ********** # The ``null`` value in the database corresponds # to an empty string in the csv files. # # log table # ********* # This command uses :ref:`python_log_command-name` to enter # begin and end markers in the database log table. # # database # ******** # is the path from the currently directory to the database. # This must be a :ref:`dismod_at` and # the :ref:`init_command-name` must have been run on the database. # # dir # *** # We use the notation *dir* for the directory where *database* # is located. # # fit_var, fit_data_subset # ************************ # The :ref:`log_table-name` is used to determine if the previous # fit command had a :ref:`fit_command@simulate_index` . # If so, the :ref:`fit_var_table-name` and :ref:`fit_data_subset_table-name` # corresponds to simulated data. # Otherwise, if they exist, the correspond to the measured data. # # simulate_index # ************** # If the previous fit command had a # :ref:`fit_command@simulate_index` # that value is used for *simulate_index* below. # Otherwise, zero is used for *simulate_index* below. # # option.csv # ********** # The file *dir* / ``option.csv`` is written by this command. # It is a CSV file with one row for each possible row in the # :ref:`option_table-name` . # The columns in ``option.csv`` are # :ref:`option_table@Table Format@option_name` and # :ref:`option_table@Table Format@option_value` . # If a row does not appear in the option table, the corresponding # default value is written to ``option.csv`` . # If the :ref:`option_table@Parent Node@parent_node_id` appears # in the option table, the *parent_node_name* row of ``option.csv`` # is filled in with the corresponding node name. # # log.csv # ******* # The file *dir* / ``log.csv`` is written by this command. # It is a CSV file with one row for each message in the :ref:`log_table-name` . # The columns in this table are # :ref:`log_table@message_type` , # :ref:`log_table@table_name` , # :ref:`log_table@row_id` , # :ref:`log_table@unix_time` , and # :ref:`log_table@message` . # Note that a `begin db2csv` command will appear at the end of this file # without the corresponding `end db2csv` because the db2csv command was not # completed when ``log.csv`` was written. # # age_avg.csv # *********** # The file *dir* / ``age_avg.csv`` is written by this command. # It is a CSV file with the contents of the age_avg table. # The only column in this table is :ref:`age_avg_table@age` . # Note that a :ref:`set_command-name` may change the value of # :ref:`option_table@Age Average Grid@ode_step_size` or # :ref:`option_table@Age Average Grid@age_avg_split` but it will not # write out the new age_avg table. # # hes_fixed.csv # ************* # If the :ref:`sample_command@asymptotic` # sample command was executed, # the contents of the :ref:`hes_fixed_table-name` are written to # the CSV file *dir* / ``hes_fixed.csv`` . # The columns in this table are # :ref:`hes_fixed_table@row_var_id` , # :ref:`hes_fixed_table@col_var_id` , # :ref:`hes_fixed_table@hes_fixed_value` . # # hes_random.csv # ************** # If a :ref:`fit both` , # :ref:`fit random` , # or :ref:`sample asymptotic` # command was executed, # the contents of the :ref:`hes_random_table-name` are written to # the CSV file *dir* / ``hes_random.csv`` . # The columns in this table are # :ref:`hes_random_table@row_var_id` , # :ref:`hes_random_table@col_var_id` , # :ref:`hes_random_table@hes_random_value` . # # trace_fixed.csv # *************** # If the :ref:`fit fixed` or # :ref:`fit both` command has completed, # the contents of the :ref:`trace_fixed_table-name` are written to # the CSV file *dir* / ``trace_fixed.csv`` . # The columns in this table have the same name as in the corresponding table # with the exception that the column # :ref:`trace_fixed_table@regularization_size` # is called *reg_size* . # # mixed_info.csv # ************** # If the :ref:`fit_command-name` completed # the contents of the :ref:`mixed_info_table-name` are written to # the CSV file *dir* / ``mixed_info.csv`` . # # {xrst_comment -------------------------------------------------------} # variable.csv # ************ # {xrst_comment -------------------------------------------------------} # The file *dir* / ``variable.csv`` is written by this command. # It is a CSV file with one row for each of the :ref:`model_variables-name` # and has the following columns: # # var_id # ====== # is the :ref:`var_table@var_id` . # # var_type # ======== # is the :ref:`var_table@var_type` . # # s_id # ==== # is the :ref:`smooth_table@smooth_id` for this variable. # If the variable is a # :ref:`smoothing standard deviation multiplier` # this is the smoothing that this multiplier effects. # Otherwise, it is the smoothing where the prior for this variable # comes from. # # m_id # ==== # If this variable is a covariate multiplier, this is the corresponding # :ref:`mulcov_table@mulcov_id` . # # m_diff # ====== # If this variable is a covariate multiplier, this is the corresponding # :ref:`bnd_mulcov_table@max_cov_diff` . # # bound # ===== # If the upper and lower value limits in the value prior for this variable # are not equal, # this is a bound for the absolute value of this variable; see # :ref:`bnd_mulcov_table@max_mulcov` and # :ref:`option_table@Optimize Random Only@bound_random` . # # age # === # is the :ref:`age_table@age` . # # time # ==== # is the :ref:`time_table@time` . # # rate # ==== # is the :ref:`rate_table@rate_name` . # # integrand # ========= # is the # :ref:`integrand_table@integrand_name` . # # covariate # ========= # is the # :ref:`covariate_table@covariate_name` . # # node # ==== # is the # :ref:`node_table@node_name` . # # group # ===== # This field is non-empty for # :ref:`model_variables@Fixed Effects, theta@Group Covariate Multipliers` . # # subgroup # ======== # This field is non-empty for # :ref:`model_variables@Random Effects, u@Subgroup Covariate Multipliers` . # # fixed # ===== # is ``true`` if this variable is a # :ref:`fixed effect` , # otherwise it is ``false`` . # # depend # ====== # If the :ref:`depend_var_table-name` exists, this has one of the following: # ``none`` if neither the data nor the prior depends on this variable, # ``data`` if only the data depends on this variable, # ``prior`` if only the prior depends on this variable, # ``both`` if both the data and the prior depend on this variable. # # fit_value # ========= # If the :ref:`fit_command-name` has been run, this is the # :ref:`fit_var_table@fit_var_value` . # # start # ===== # is the # :ref:`start_var_table@start_var_value` # for this variable. # # scale # ===== # is the # :ref:`scale_var_table@scale_var_value` # for this variable. # # truth # ===== # If the truth_var table exists, this is the # :ref:`truth_var_table@truth_var_value` # for this variable. # # sam_avg # ======= # If the sample table exists, # for each :ref:`sample_table@var_id` # this is the average with respect to # with respect to :ref:`sample_table@sample_index` # of the :ref:`sample_table@var_value` corresponding to # this *var_id* . # # sam_std # ======= # If the sample table exists, # for each fixed :ref:`sample_table@var_id` # this is the estimated standard deviation with respect to # with respect to :ref:`sample_table@sample_index` # of the # :ref:`sample_table@var_value` corresponding to # this *var_id* . # If there is only one *sample_index* in the sample table, # this column is empty because the standard deviation cannot be estimated # from one sample. # # res_value # ========= # If the :ref:`fit_command-name` has been run, this is the # :ref:`fit_var_table@residual_value` . # # res_dage # ======== # If the :ref:`fit_command-name` has been run, this is the # :ref:`fit_var_table@residual_dage` ; see # :ref:`fit_var` above. # # res_dtime # ========= # If the :ref:`fit_command-name` has been run, this is the # :ref:`fit_var_table@residual_dtime` ; see # :ref:`fit_var` above. # # lag_value # ========= # If the :ref:`fit_command-name` has been run, this is the # :ref:`fit_var_table@lagrange_value` ; see # :ref:`fit_var` above. # # lag_dage # ======== # If the :ref:`fit_command-name` has been run, this is the # :ref:`fit_var_table@lagrange_dage` ; see # :ref:`fit_var` above. # # lag_dtime # ========= # If the :ref:`fit_command-name` has been run, this is the # :ref:`fit_var_table@lagrange_dtime` ; see # :ref:`fit_var` above. # # sim_v, sim_a, sim_t # =================== # If the :ref:`simulate_command-name` has been run, # these are the values of # :ref:`prior_sim_table@prior_sim_value` , # :ref:`prior_sim_table@prior_sim_dage` , and # :ref:`prior_sim_table@prior_sim_dtime` , # for the # :ref:`db2csv_command@simulate_index` . # # prior_info # ========== # There is a column named # # *field* _ *character* # # for *character* equal to ``v`` , ``a`` and ``t`` # and for *field* equal to # :ref:`prior_table@mean` , # :ref:`prior_table@lower` , # :ref:`prior_table@upper` , # :ref:`prior_table@std` , # :ref:`prior_table@eta` , # :ref:`prior_table@nu` and # :ref:`density` . # # #. The character ``v`` denotes this is the prior information for a value, # ``a`` the prior information for an age difference, and # ``t`` the prior information for a time difference. # #. The density has been mapped to the corresponding # :ref:`density_table@density_name` . # #. If the corresponding :ref:`smooth_grid_table@value_prior_id` # is ``null`` , # the :ref:`smooth_grid_table@const_value` prior is displayed. # #. If is ``null`` , or has no affect, it is displayed as empty. # Note that the fields *eta_v* are always displayed for fixed # effects because they have a # :ref:`scaling` affect. # # {xrst_comment -------------------------------------------------------} # data.csv # ******** # {xrst_comment -------------------------------------------------------} # The file *dir* / ``data.csv`` is written by this command. # It is a CSV file with one row for each row in the :ref:`data_subset_table-name` # and has the following columns: # # data_id # ======= # is the data table # :ref:`data_table@data_id` . # # data_extra_columns # ================== # Each column specified by the # :ref:`option_table@Extra Columns@data_extra_columns` # option is included in the ``data.csv`` file. # # child # ===== # If this data row is associated with a child, # this is the name of the child. Otherwise, this data is associated # with the :ref:`option_table@Parent Node` . # # node # ==== # is the # :ref:`node_table@node_name` for this data row. # This will correspond directly to the data table # :ref:`data_table@node_id` . # # group # ===== # is the :ref:`subgroup_table@group_name` corresponding # to the subgroup for this data row. # # subgroup # ======== # is the # :ref:`subgroup_table@subgroup_name` for this data row. # This will correspond directly to the data table # :ref:`data_table@subgroup_id` . # # integrand # ========= # is the integrand table # :ref:`integrand_table@integrand_name` . # # weight # ====== # is the # :ref:`weight_table@weight_name` . # # age_lo # ====== # is the lower age used in the fits; i.e., the data table # :ref:`data_table@age_lower` modified by the # age compression interval in the # :ref:`option_table@compress_interval` option. # # age_up # ====== # is the upper age used in the fits; i.e., the data table # :ref:`data_table@age_upper` modified by the # age compression interval. # # time_lo # ======= # is the lower time used in the fits; i.e., the data table # :ref:`data_table@time_lower` modified by the # time compression interval. # # time_up # ======= # is the upper time used in the fits; i.e., the data table # :ref:`data_table@time_upper` modified by the # time compression interval. # # d_out # ===== # is the value of # :ref:`data_table@hold_out` in the data table. # # s_out # ===== # is the value of # :ref:`data_subset_table@hold_out` in the # data_subset table. # # density # ======= # is the # :ref:`density_table@density_name` for data_subset table # :ref:`data_subset_table@density_id` for this row. # # eta # === # is the data_subset table # :ref:`data_subset_table@eta` for this row. # # nu # == # is the data_subset table # :ref:`data_subset_table@nu` for this row. # # ss # == # is the data_subset table # :ref:`data_subset_table@sample_size` for this row. # # meas_std # ======== # is the data table # :ref:`data_table@meas_std` . # Except in the binomial case, where it is an approximation for the # standard deviation of the binomial counts divided by the sample size. # # meas_stdcv # ========== # is the minimum cv standard deviation used to define the likelihood; see # :ref:`Delta` . # In the binomial case it is equal to *meas_std*. # # meas_sigma # ========== # If the previous fit command had a # :ref:`db2csv_command@simulate_index` , # this column is empty. # We use *sigma* to denote the adjusted standard deviation # :ref:`sigma` # for this row. # # The transformed standard deviation # :ref:`delta` # is computed by dividing by the residual. # This results in plus infinity and not valid when the residual is zero. # If this calculation for *delta* # is greater than the maximum python ``float`` value, # *meas_sigma* is reported as empty . # Otherwise the transformation is inverted to get the value of *sigma* . # # meas_value # ========== # is the data table # :ref:`data_table@meas_value` . # # avgint # ====== # If the :ref:`fit_command-name` has been run, this is the # :ref:`fit_data_subset_table@avg_integrand` for this row. # # residual # ======== # If the :ref:`fit_command-name` has been run, this is the # :ref:`fit_data_subset_table@weighted_residual` # for this row; see # :ref:`fit_data_subset` # above. # # sim_value # ========= # If the :ref:`simulate_command-name` has been run, this is the # :ref:`data_sim_table@data_sim_value` for this # :ref:`db2csv_command@data.csv@data_id` and # :ref:`fit_command@simulate_index` # in the previous fit command. # If there is no *simulate_index* # in the previous fit command, the # value zero is used for the *simulate_index* . # # Covariates # ========== # For each covariate in the :ref:`covariate_table-name` there is a column with # the corresponding *covariate_name* . # For each covariate column and measurement row, the value in the # covariate column is covariate value for this measurement minus # the reference value for this covariate, i.e., the corresponding # covariate difference # :ref:`x_ij` # in the model for the average integrand. # # {xrst_comment -------------------------------------------------------} # predict.csv # *********** # {xrst_comment -------------------------------------------------------} # If the :ref:`predict_command-name` has was executed, # the CSV file *dir* / ``predict.csv`` is written. # For each row of the :ref:`predict_table-name` # there is a corresponding row in ``predict.csv`` . # # avgint_id # ========= # is the avgint table # :ref:`avgint_table@avgint_id` . # # avgint_extra_columns # ==================== # Each column specified by the # :ref:`option_table@Extra Columns@avgint_extra_columns` # option is included in the ``predict.csv`` file. # # s_index # ======= # This identifies the set model variables corresponding to the # last :ref:`predict_command-name` executed. # If the source for the predict command was # :ref:`predict_command@source@sample` , # the model variables correspond to the rows on the # sample table with the same :ref:`sample_table@sample_index` # equal to *s_index* . # Otherwise, *s_index* is empty and # the model variables correspond to the # :ref:`predict_command@source@fit_var` or # :ref:`predict_command@source@truth_var` table # depending on the source for the last predict command executed. # # avgint # ====== # is the :ref:`average integrand` # :math:`A_i(u, \theta)`. The model variables :math:`(u, \theta)` # correspond to the *s_index* , and measurement subscript :math:`i` # denotes to the :ref:`avgint_table-name` information # for this row of ``predict.csv`` ; i.e., *age_lo* , *age_up* , # ... # # age_lo # ====== # is the avgint table # :ref:`avgint_table@age_lower` . # # age_up # ====== # is the avgint table # :ref:`data_table@age_upper` . # # time_lo # ======= # is the avgint table # :ref:`data_table@time_lower` . # # time_up # ======= # is the avgint table # :ref:`data_table@time_upper` . # # integrand # ========= # is the avgint table # :ref:`integrand_table@integrand_name` . # # weight # ====== # is the # :ref:`weight_table@weight_name` for this row. # # node # ==== # is the # :ref:`node_table@node_name` for this row. # # group # ===== # is the :ref:`subgroup_table@group_name` corresponding # to the subgroup for this data row. # # subgroup # ======== # is the # :ref:`subgroup_table@subgroup_name` for this data row. # This will correspond directly to the avgint table # :ref:`avgint_table@subgroup_id` . # # Covariates # ========== # For each covariate in the :ref:`covariate_table-name` there is a column with # the corresponding *covariate_name* . # For each covariate column and measurement row, the value in the # covariate column is covariate value in the :ref:`avgint_table-name` # minus the reference value for this covariate. i.e., the corresponding # covariate difference # :ref:`x_ij` # in the model for the average integrand. # {xrst_comment -------------------------------------------------------} # {xrst_toc_hidden # example/get_started/db2csv_command.py # } # Example # ******* # The file :ref:`db2csv_command.py-name` contains an example and test # using this command. # # {xrst_end db2csv_command}