Source code for tlo.methods.hiv_tb_calibration

"""
Deviance measure

This module runs at the end of a simulation and calculates a weighted deviance measure
for a given set of parameters using outputs from the demography (deaths), HIV and TB modules

"""
import math
from collections import defaultdict

import pandas as pd

from tlo import Module, logging

logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)



[docs]
class Deviance(Module):
    """
    This module reads in logged outputs from HIV, TB and demography and compares them with reported data
    a deviance measure is calculated and returned on simulation end
    """


[docs]
    def __init__(self, name=None, resourcefilepath=None):
        super().__init__(name)
        self.resourcefilepath = resourcefilepath

        self.data_dict = dict()
        self.model_dict = dict()

        # Initialise empty dict (with factory method of list) to store lists containing information about each death
        # that is used by the `Deviance` module
        self.__demog_outputs__ = defaultdict(list)


    INIT_DEPENDENCIES = {'Demography', 'Hiv', 'Tb'}

    # Declare Metadata
    METADATA = {}

    # No parameters to declare
    PARAMETERS = {}

    # No properties to declare
    PROPERTIES = {}


[docs]
    def read_parameters(self, data_folder):
        pass



[docs]
    def initialise_population(self, population):
        pass



[docs]
    def initialise_simulation(self, sim):
        pass



[docs]
    def on_birth(self, mother_id, child_id):
        pass



[docs]
    def read_data_files(self):
        """Make a dict of all data to be used in calculating calibration score"""

        # # HIV read in resource files for data
        xls = pd.ExcelFile(self.resourcefilepath / "ResourceFile_HIV.xlsx")

        # MPHIA HIV data - age-structured
        data_hiv_mphia_inc = pd.read_excel(xls, sheet_name="MPHIA_incidence2015")
        data_hiv_mphia_prev = pd.read_excel(xls, sheet_name="MPHIA_prevalence_art2015")

        # hiv prevalence
        self.data_dict["mphia_prev_2015_adult"] = data_hiv_mphia_prev.loc[
            data_hiv_mphia_prev.age == "Total 15-49", "total percent hiv positive"
        ].values[
            0
        ]
        self.data_dict["mphia_prev_2015_child"] = data_hiv_mphia_prev.loc[
            data_hiv_mphia_prev.age == "Total 0-14", "total percent hiv positive"
        ].values[
            0
        ]

        # hiv incidence
        self.data_dict["mphia_inc_2015_adult"] = data_hiv_mphia_inc.loc[
            (data_hiv_mphia_inc.age == "15-49"), "total_percent_annual_incidence"
        ].values[
            0
        ]

        # DHS HIV data
        data_hiv_dhs_prev = pd.read_excel(xls, sheet_name="DHS_prevalence")
        self.data_dict["dhs_prev_2010"] = data_hiv_dhs_prev.loc[
            (data_hiv_dhs_prev.Year == 2010), "HIV prevalence among general population 15-49"
        ].values[0]
        self.data_dict["dhs_prev_2015"] = data_hiv_dhs_prev.loc[
            (data_hiv_dhs_prev.Year == 2015), "HIV prevalence among general population 15-49"
        ].values[0]

        # UNAIDS AIDS deaths data: 2010-
        data_hiv_unaids_deaths = pd.read_excel(xls, sheet_name="unaids_mortality_dalys2021")
        self.data_dict["unaids_deaths_per_100k"] = data_hiv_unaids_deaths["AIDS_mortality_per_100k"]

        # TB
        # TB WHO data: 2010-
        xls_tb = pd.ExcelFile(self.resourcefilepath / "ResourceFile_TB.xlsx")

        # TB active incidence per 100k 2010-2017
        data_tb_who = pd.read_excel(xls_tb, sheet_name="WHO_activeTB2023")
        self.data_dict["who_tb_inc_per_100k"] = data_tb_who.loc[
            (data_tb_who.year >= 2010), "incidence_per_100k"
        ]

        # TB mortality per 100k excluding HIV: 2010-2017
        self.data_dict["who_tb_deaths_per_100k"] = data_tb_who.loc[
            (data_tb_who.year >= 2010), "mortality_tb_excl_hiv_per_100k"
        ]



[docs]
    def read_model_outputs(self):
        hiv = self.sim.modules['Hiv'].hiv_outputs
        tb = self.sim.modules['Tb'].tb_outputs
        demog = self.__demog_outputs__

        # get logged outputs for calibration into dict
        # population size each year
        pop = pd.Series(hiv["population"])
        pop.index = hiv['date']
        pop.index = pd.to_datetime(pop.index, format="%Y")

        # ------------------ HIV disease ------------------ #
        # HIV - prevalence among in adults aged 15-49
        self.model_dict["hiv_prev_adult_2010"] = hiv["hiv_prev_adult_1549"][0] * 100
        self.model_dict["hiv_prev_adult_2015"] = hiv["hiv_prev_adult_1549"][6] * 100

        # hiv incidence in adults aged 15-49
        self.model_dict["hiv_inc_adult_2015"] = hiv["hiv_adult_inc_1549"][6] * 100

        # hiv prevalence in children (mphia)
        self.model_dict["hiv_prev_child_2015"] = hiv["hiv_prev_child"][6] * 100

        # ------------------ TB DISEASE ------------------ #

        # tb active incidence per 100k - all ages
        self.model_dict["TB_active_inc_per100k"] = (tb["num_new_active_tb"] / pop) * 100000

        # ------------------ DEATHS ------------------ #
        # convert dict to df for easier processing
        deaths = pd.DataFrame()
        deaths['date'] = demog['date']
        deaths['age'] = demog['age']
        deaths['sex'] = demog['sex']
        deaths['cause'] = demog['cause']

        # AIDS DEATHS
        # limit to deaths among aged 15+, include HIV/TB deaths
        keep = (deaths.age >= 15) & (
            (deaths.cause == "AIDS_TB") | (deaths.cause == "AIDS_non_TB")
        )
        deaths_AIDS = deaths.loc[keep].copy()
        tot_aids_deaths = deaths_AIDS.groupby(by=["date"]).size()
        tot_aids_deaths.index = pd.to_datetime(tot_aids_deaths.index, format="%Y")

        # aids mortality rates per 1000 person-years
        self.model_dict["AIDS_mortality_per_100k"] = (tot_aids_deaths / pop) * 100000

        # TB deaths (non-hiv only, all ages)
        keep = deaths.cause == "TB"
        deaths_TB = deaths.loc[keep].copy()
        tot_tb_non_hiv_deaths = deaths_TB.groupby(by=["date"]).size()
        tot_tb_non_hiv_deaths.index = pd.to_datetime(tot_tb_non_hiv_deaths.index, format="%Y")

        # tb mortality rates per 100k person-years
        self.model_dict["TB_mortality_per_100k"] = (tot_tb_non_hiv_deaths / pop) * 100000



[docs]
    def weighted_mean(self, model_dict, data_dict):
        # assert model_output is not empty

        # return calibration score (weighted mean deviance)
        # sqrt( (observed data – model output)^2 | / observed data)
        # sum these for each data item (all male prevalence over time by age-group) and divide by # items
        # then weighted sum of all components -> calibration score

        # for debugging
        # model_dict = self.model_dict
        # data_dict = self.data_dict

        # need weights for each data item
        model_weight = 0.5  # weight if "data" are modelled estimate (e.g. UNAIDS)

        def deviance_function(data, model):
            # in case there are NAs in model outputs (e.g. no deaths in given year)
            model = pd.Series(model)
            model = model.fillna(0)

            deviance = math.sqrt((data - model) ** 2) / data

            return deviance

        # ------------------ HIV ------------------ #

        # hiv prevalence in adults 15-49: dhs 2010, 2015, mphia
        hiv_prev_adult = (deviance_function(data_dict["dhs_prev_2010"], model_dict["hiv_prev_adult_2010"]) +
                          deviance_function(data_dict["dhs_prev_2015"], model_dict["hiv_prev_adult_2015"]) +
                          deviance_function(data_dict["mphia_prev_2015_adult"], model_dict["hiv_prev_adult_2015"])
                          ) / 3

        hiv_prev_child = deviance_function(
            data_dict["mphia_prev_2015_child"], model_dict["hiv_prev_child_2015"]
        )

        # hiv incidence mphia
        hiv_inc_adult = deviance_function(
            data_dict["mphia_inc_2015_adult"], model_dict["hiv_inc_adult_2015"]
        )

        # ------------------ TB ------------------ #

        # tb active incidence (WHO estimates) 2010-2021
        tb_incidence_who = (
                               deviance_function(
                                   data_dict["who_tb_inc_per_100k"].values[0], model_dict["TB_active_inc_per100k"][0]
                               ) +
                               deviance_function(
                                   data_dict["who_tb_inc_per_100k"].values[1], model_dict["TB_active_inc_per100k"][1]
                               ) +
                               deviance_function(
                                   data_dict["who_tb_inc_per_100k"].values[2], model_dict["TB_active_inc_per100k"][2]
                               ) +
                               deviance_function(
                                   data_dict["who_tb_inc_per_100k"].values[3], model_dict["TB_active_inc_per100k"][3]
                               ) +
                               deviance_function(
                                   data_dict["who_tb_inc_per_100k"].values[4], model_dict["TB_active_inc_per100k"][4]
                               ) +
                               deviance_function(
                                   data_dict["who_tb_inc_per_100k"].values[5], model_dict["TB_active_inc_per100k"][5]
                               ) +
                               deviance_function(
                                   data_dict["who_tb_inc_per_100k"].values[6], model_dict["TB_active_inc_per100k"][6]
                               ) +
                               deviance_function(
                                   data_dict["who_tb_inc_per_100k"].values[7], model_dict["TB_active_inc_per100k"][7]
                               ) +
                               deviance_function(
                                   data_dict["who_tb_inc_per_100k"].values[8], model_dict["TB_active_inc_per100k"][8]
                               ) +
                               deviance_function(
                                   data_dict["who_tb_inc_per_100k"].values[9], model_dict["TB_active_inc_per100k"][9]
                               ) +
                               deviance_function(
                                   data_dict["who_tb_inc_per_100k"].values[10], model_dict["TB_active_inc_per100k"][10]
                               ) +
                               deviance_function(
                                   data_dict["who_tb_inc_per_100k"].values[11], model_dict["TB_active_inc_per100k"][11]
                               )
                           ) / 12

        # ------------------ DEATHS ------------------ #

        # aids deaths unaids 2010-2021
        hiv_deaths_unaids = (
                                deviance_function(
                                    data_dict["unaids_deaths_per_100k"][0],
                                    model_dict["AIDS_mortality_per_100k"][0],
                                )
                                + deviance_function(
                                    data_dict["unaids_deaths_per_100k"][1],
                                    model_dict["AIDS_mortality_per_100k"][1],
                                )
                                + deviance_function(
                                    data_dict["unaids_deaths_per_100k"][2],
                                    model_dict["AIDS_mortality_per_100k"][2],
                                )
                                + deviance_function(
                                    data_dict["unaids_deaths_per_100k"][3],
                                    model_dict["AIDS_mortality_per_100k"][3],
                                )
                                + deviance_function(
                                    data_dict["unaids_deaths_per_100k"][4],
                                    model_dict["AIDS_mortality_per_100k"][4],
                                )
                                + deviance_function(
                                    data_dict["unaids_deaths_per_100k"][5],
                                    model_dict["AIDS_mortality_per_100k"][5],
                                )
                                + deviance_function(
                                    data_dict["unaids_deaths_per_100k"][6],
                                    model_dict["AIDS_mortality_per_100k"][6],
                                )
                                + deviance_function(
                                    data_dict["unaids_deaths_per_100k"][7],
                                    model_dict["AIDS_mortality_per_100k"][7],
                                )
                                + deviance_function(
                                    data_dict["unaids_deaths_per_100k"][8],
                                    model_dict["AIDS_mortality_per_100k"][8],
                                )
                                + deviance_function(
                                    data_dict["unaids_deaths_per_100k"][9],
                                    model_dict["AIDS_mortality_per_100k"][9],
                                )
                                + deviance_function(
                                    data_dict["unaids_deaths_per_100k"][10],
                                    model_dict["AIDS_mortality_per_100k"][10],
                                )
                                + deviance_function(
                                    data_dict["unaids_deaths_per_100k"][11],
                                    model_dict["AIDS_mortality_per_100k"][11],
                                )
                            ) / 12

        # tb death rate who 2010-2021
        tb_mortality_who = (
                               deviance_function(
                                   data_dict["who_tb_deaths_per_100k"].values[0],
                                   model_dict["TB_mortality_per_100k"][0],
                               ) +
                               deviance_function(
                                   data_dict["who_tb_deaths_per_100k"].values[1],
                                   model_dict["TB_mortality_per_100k"][1],
                               ) +
                               deviance_function(
                                   data_dict["who_tb_deaths_per_100k"].values[2],
                                   model_dict["TB_mortality_per_100k"][2],
                               ) +
                               deviance_function(
                                   data_dict["who_tb_deaths_per_100k"].values[3],
                                   model_dict["TB_mortality_per_100k"][3],
                               ) +
                               deviance_function(
                                   data_dict["who_tb_deaths_per_100k"].values[4],
                                   model_dict["TB_mortality_per_100k"][4],
                               ) +
                               deviance_function(
                                   data_dict["who_tb_deaths_per_100k"].values[5],
                                   model_dict["TB_mortality_per_100k"][5],
                               ) +
                               deviance_function(
                                   data_dict["who_tb_deaths_per_100k"].values[6],
                                   model_dict["TB_mortality_per_100k"][6],
                               ) +
                               deviance_function(
                                   data_dict["who_tb_deaths_per_100k"].values[8],
                                   model_dict["TB_mortality_per_100k"][8],
                               ) +
                               deviance_function(
                                   data_dict["who_tb_deaths_per_100k"].values[9],
                                   model_dict["TB_mortality_per_100k"][9],
                               ) +
                               deviance_function(
                                   data_dict["who_tb_deaths_per_100k"].values[10],
                                   model_dict["TB_mortality_per_100k"][10],
                               ) +
                               deviance_function(
                                   data_dict["who_tb_deaths_per_100k"].values[11],
                                   model_dict["TB_mortality_per_100k"][11],
                               )
                           ) / 12

        # tb who is a model estimate, also tb cnr depends on estimated incidence
        calibration_score = (
            hiv_prev_adult
            + hiv_prev_child
            + hiv_inc_adult
            + (tb_incidence_who * model_weight)
            + (hiv_deaths_unaids * model_weight)
            + (tb_mortality_who * model_weight)
        )

        hiv_beta = self.sim.modules["Hiv"].parameters["beta"]
        tb_scaling_factor_WHO = self.sim.modules["Tb"].parameters["scaling_factor_WHO"]

        return_values = [calibration_score, hiv_beta, tb_scaling_factor_WHO]

        return return_values



[docs]
    def record_death(self, year, age_years, sex, cause):
        """Save outputs about one death"""
        self.__demog_outputs__["date"] += [year]
        self.__demog_outputs__["age"] += [age_years]
        self.__demog_outputs__["sex"] += [sex]
        self.__demog_outputs__["cause"] += [cause]



[docs]
    def on_simulation_end(self):

        if self.sim.date.year >= 2020:
            self.read_data_files()
            self.read_model_outputs()
            deviance_measure = self.weighted_mean(model_dict=self.model_dict, data_dict=self.data_dict)

            logger.info(
                key="deviance_measure",
                description="Deviance measure for HIV and TB",
                data={"deviance_measure": deviance_measure[0],
                      "hiv_transmission_rate": deviance_measure[1],
                      "tb_scaling_factor_WHO": deviance_measure[2]
                      }
            )