Source code for tlo.methods.hiv_tb_calibration

"""
Deviance measure

This module runs at the end of a simulation and calculates a weighted deviance measure
for a given set of parameters using outputs from the demography (deaths), HIV and TB modules

"""
import math
from collections import defaultdict

import pandas as pd

from tlo import Module, logging

logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)


[docs] class Deviance(Module): """ This module reads in logged outputs from HIV, TB and demography and compares them with reported data a deviance measure is calculated and returned on simulation end """
[docs] def __init__(self, name=None, resourcefilepath=None): super().__init__(name) self.resourcefilepath = resourcefilepath self.data_dict = dict() self.model_dict = dict() # Initialise empty dict (with factory method of list) to store lists containing information about each death # that is used by the `Deviance` module self.__demog_outputs__ = defaultdict(list)
INIT_DEPENDENCIES = {'Demography', 'Hiv', 'Tb'} # Declare Metadata METADATA = {} # No parameters to declare PARAMETERS = {} # No properties to declare PROPERTIES = {}
[docs] def read_parameters(self, data_folder): pass
[docs] def initialise_population(self, population): pass
[docs] def initialise_simulation(self, sim): pass
[docs] def on_birth(self, mother_id, child_id): pass
[docs] def read_data_files(self): """Make a dict of all data to be used in calculating calibration score""" # # HIV read in resource files for data xls = pd.ExcelFile(self.resourcefilepath / "ResourceFile_HIV.xlsx") # MPHIA HIV data - age-structured data_hiv_mphia_inc = pd.read_excel(xls, sheet_name="MPHIA_incidence2015") data_hiv_mphia_prev = pd.read_excel(xls, sheet_name="MPHIA_prevalence_art2015") # hiv prevalence self.data_dict["mphia_prev_2015_adult"] = data_hiv_mphia_prev.loc[ data_hiv_mphia_prev.age == "Total 15-49", "total percent hiv positive" ].values[ 0 ] self.data_dict["mphia_prev_2015_child"] = data_hiv_mphia_prev.loc[ data_hiv_mphia_prev.age == "Total 0-14", "total percent hiv positive" ].values[ 0 ] # hiv incidence self.data_dict["mphia_inc_2015_adult"] = data_hiv_mphia_inc.loc[ (data_hiv_mphia_inc.age == "15-49"), "total_percent_annual_incidence" ].values[ 0 ] # DHS HIV data data_hiv_dhs_prev = pd.read_excel(xls, sheet_name="DHS_prevalence") self.data_dict["dhs_prev_2010"] = data_hiv_dhs_prev.loc[ (data_hiv_dhs_prev.Year == 2010), "HIV prevalence among general population 15-49" ].values[0] self.data_dict["dhs_prev_2015"] = data_hiv_dhs_prev.loc[ (data_hiv_dhs_prev.Year == 2015), "HIV prevalence among general population 15-49" ].values[0] # UNAIDS AIDS deaths data: 2010- data_hiv_unaids_deaths = pd.read_excel(xls, sheet_name="unaids_mortality_dalys2021") self.data_dict["unaids_deaths_per_100k"] = data_hiv_unaids_deaths["AIDS_mortality_per_100k"] # TB # TB WHO data: 2010- xls_tb = pd.ExcelFile(self.resourcefilepath / "ResourceFile_TB.xlsx") # TB active incidence per 100k 2010-2017 data_tb_who = pd.read_excel(xls_tb, sheet_name="WHO_activeTB2023") self.data_dict["who_tb_inc_per_100k"] = data_tb_who.loc[ (data_tb_who.year >= 2010), "incidence_per_100k" ] # TB mortality per 100k excluding HIV: 2010-2017 self.data_dict["who_tb_deaths_per_100k"] = data_tb_who.loc[ (data_tb_who.year >= 2010), "mortality_tb_excl_hiv_per_100k" ]
[docs] def read_model_outputs(self): hiv = self.sim.modules['Hiv'].hiv_outputs tb = self.sim.modules['Tb'].tb_outputs demog = self.__demog_outputs__ # get logged outputs for calibration into dict # population size each year pop = pd.Series(hiv["population"]) pop.index = hiv['date'] pop.index = pd.to_datetime(pop.index, format="%Y") # ------------------ HIV disease ------------------ # # HIV - prevalence among in adults aged 15-49 self.model_dict["hiv_prev_adult_2010"] = hiv["hiv_prev_adult_1549"][0] * 100 self.model_dict["hiv_prev_adult_2015"] = hiv["hiv_prev_adult_1549"][6] * 100 # hiv incidence in adults aged 15-49 self.model_dict["hiv_inc_adult_2015"] = hiv["hiv_adult_inc_1549"][6] * 100 # hiv prevalence in children (mphia) self.model_dict["hiv_prev_child_2015"] = hiv["hiv_prev_child"][6] * 100 # ------------------ TB DISEASE ------------------ # # tb active incidence per 100k - all ages self.model_dict["TB_active_inc_per100k"] = (tb["num_new_active_tb"] / pop) * 100000 # ------------------ DEATHS ------------------ # # convert dict to df for easier processing deaths = pd.DataFrame() deaths['date'] = demog['date'] deaths['age'] = demog['age'] deaths['sex'] = demog['sex'] deaths['cause'] = demog['cause'] # AIDS DEATHS # limit to deaths among aged 15+, include HIV/TB deaths keep = (deaths.age >= 15) & ( (deaths.cause == "AIDS_TB") | (deaths.cause == "AIDS_non_TB") ) deaths_AIDS = deaths.loc[keep].copy() tot_aids_deaths = deaths_AIDS.groupby(by=["date"]).size() tot_aids_deaths.index = pd.to_datetime(tot_aids_deaths.index, format="%Y") # aids mortality rates per 1000 person-years self.model_dict["AIDS_mortality_per_100k"] = (tot_aids_deaths / pop) * 100000 # TB deaths (non-hiv only, all ages) keep = deaths.cause == "TB" deaths_TB = deaths.loc[keep].copy() tot_tb_non_hiv_deaths = deaths_TB.groupby(by=["date"]).size() tot_tb_non_hiv_deaths.index = pd.to_datetime(tot_tb_non_hiv_deaths.index, format="%Y") # tb mortality rates per 100k person-years self.model_dict["TB_mortality_per_100k"] = (tot_tb_non_hiv_deaths / pop) * 100000
[docs] def weighted_mean(self, model_dict, data_dict): # assert model_output is not empty # return calibration score (weighted mean deviance) # sqrt( (observed data – model output)^2 | / observed data) # sum these for each data item (all male prevalence over time by age-group) and divide by # items # then weighted sum of all components -> calibration score # for debugging # model_dict = self.model_dict # data_dict = self.data_dict # need weights for each data item model_weight = 0.5 # weight if "data" are modelled estimate (e.g. UNAIDS) def deviance_function(data, model): # in case there are NAs in model outputs (e.g. no deaths in given year) model = pd.Series(model) model = model.fillna(0) deviance = math.sqrt((data - model) ** 2) / data return deviance # ------------------ HIV ------------------ # # hiv prevalence in adults 15-49: dhs 2010, 2015, mphia hiv_prev_adult = (deviance_function(data_dict["dhs_prev_2010"], model_dict["hiv_prev_adult_2010"]) + deviance_function(data_dict["dhs_prev_2015"], model_dict["hiv_prev_adult_2015"]) + deviance_function(data_dict["mphia_prev_2015_adult"], model_dict["hiv_prev_adult_2015"]) ) / 3 hiv_prev_child = deviance_function( data_dict["mphia_prev_2015_child"], model_dict["hiv_prev_child_2015"] ) # hiv incidence mphia hiv_inc_adult = deviance_function( data_dict["mphia_inc_2015_adult"], model_dict["hiv_inc_adult_2015"] ) # ------------------ TB ------------------ # # tb active incidence (WHO estimates) 2010-2021 tb_incidence_who = ( deviance_function( data_dict["who_tb_inc_per_100k"].values[0], model_dict["TB_active_inc_per100k"][0] ) + deviance_function( data_dict["who_tb_inc_per_100k"].values[1], model_dict["TB_active_inc_per100k"][1] ) + deviance_function( data_dict["who_tb_inc_per_100k"].values[2], model_dict["TB_active_inc_per100k"][2] ) + deviance_function( data_dict["who_tb_inc_per_100k"].values[3], model_dict["TB_active_inc_per100k"][3] ) + deviance_function( data_dict["who_tb_inc_per_100k"].values[4], model_dict["TB_active_inc_per100k"][4] ) + deviance_function( data_dict["who_tb_inc_per_100k"].values[5], model_dict["TB_active_inc_per100k"][5] ) + deviance_function( data_dict["who_tb_inc_per_100k"].values[6], model_dict["TB_active_inc_per100k"][6] ) + deviance_function( data_dict["who_tb_inc_per_100k"].values[7], model_dict["TB_active_inc_per100k"][7] ) + deviance_function( data_dict["who_tb_inc_per_100k"].values[8], model_dict["TB_active_inc_per100k"][8] ) + deviance_function( data_dict["who_tb_inc_per_100k"].values[9], model_dict["TB_active_inc_per100k"][9] ) + deviance_function( data_dict["who_tb_inc_per_100k"].values[10], model_dict["TB_active_inc_per100k"][10] ) + deviance_function( data_dict["who_tb_inc_per_100k"].values[11], model_dict["TB_active_inc_per100k"][11] ) ) / 12 # ------------------ DEATHS ------------------ # # aids deaths unaids 2010-2021 hiv_deaths_unaids = ( deviance_function( data_dict["unaids_deaths_per_100k"][0], model_dict["AIDS_mortality_per_100k"][0], ) + deviance_function( data_dict["unaids_deaths_per_100k"][1], model_dict["AIDS_mortality_per_100k"][1], ) + deviance_function( data_dict["unaids_deaths_per_100k"][2], model_dict["AIDS_mortality_per_100k"][2], ) + deviance_function( data_dict["unaids_deaths_per_100k"][3], model_dict["AIDS_mortality_per_100k"][3], ) + deviance_function( data_dict["unaids_deaths_per_100k"][4], model_dict["AIDS_mortality_per_100k"][4], ) + deviance_function( data_dict["unaids_deaths_per_100k"][5], model_dict["AIDS_mortality_per_100k"][5], ) + deviance_function( data_dict["unaids_deaths_per_100k"][6], model_dict["AIDS_mortality_per_100k"][6], ) + deviance_function( data_dict["unaids_deaths_per_100k"][7], model_dict["AIDS_mortality_per_100k"][7], ) + deviance_function( data_dict["unaids_deaths_per_100k"][8], model_dict["AIDS_mortality_per_100k"][8], ) + deviance_function( data_dict["unaids_deaths_per_100k"][9], model_dict["AIDS_mortality_per_100k"][9], ) + deviance_function( data_dict["unaids_deaths_per_100k"][10], model_dict["AIDS_mortality_per_100k"][10], ) + deviance_function( data_dict["unaids_deaths_per_100k"][11], model_dict["AIDS_mortality_per_100k"][11], ) ) / 12 # tb death rate who 2010-2021 tb_mortality_who = ( deviance_function( data_dict["who_tb_deaths_per_100k"].values[0], model_dict["TB_mortality_per_100k"][0], ) + deviance_function( data_dict["who_tb_deaths_per_100k"].values[1], model_dict["TB_mortality_per_100k"][1], ) + deviance_function( data_dict["who_tb_deaths_per_100k"].values[2], model_dict["TB_mortality_per_100k"][2], ) + deviance_function( data_dict["who_tb_deaths_per_100k"].values[3], model_dict["TB_mortality_per_100k"][3], ) + deviance_function( data_dict["who_tb_deaths_per_100k"].values[4], model_dict["TB_mortality_per_100k"][4], ) + deviance_function( data_dict["who_tb_deaths_per_100k"].values[5], model_dict["TB_mortality_per_100k"][5], ) + deviance_function( data_dict["who_tb_deaths_per_100k"].values[6], model_dict["TB_mortality_per_100k"][6], ) + deviance_function( data_dict["who_tb_deaths_per_100k"].values[8], model_dict["TB_mortality_per_100k"][8], ) + deviance_function( data_dict["who_tb_deaths_per_100k"].values[9], model_dict["TB_mortality_per_100k"][9], ) + deviance_function( data_dict["who_tb_deaths_per_100k"].values[10], model_dict["TB_mortality_per_100k"][10], ) + deviance_function( data_dict["who_tb_deaths_per_100k"].values[11], model_dict["TB_mortality_per_100k"][11], ) ) / 12 # tb who is a model estimate, also tb cnr depends on estimated incidence calibration_score = ( hiv_prev_adult + hiv_prev_child + hiv_inc_adult + (tb_incidence_who * model_weight) + (hiv_deaths_unaids * model_weight) + (tb_mortality_who * model_weight) ) hiv_beta = self.sim.modules["Hiv"].parameters["beta"] tb_scaling_factor_WHO = self.sim.modules["Tb"].parameters["scaling_factor_WHO"] return_values = [calibration_score, hiv_beta, tb_scaling_factor_WHO] return return_values
[docs] def record_death(self, year, age_years, sex, cause): """Save outputs about one death""" self.__demog_outputs__["date"] += [year] self.__demog_outputs__["age"] += [age_years] self.__demog_outputs__["sex"] += [sex] self.__demog_outputs__["cause"] += [cause]
[docs] def on_simulation_end(self): if self.sim.date.year >= 2020: self.read_data_files() self.read_model_outputs() deviance_measure = self.weighted_mean(model_dict=self.model_dict, data_dict=self.data_dict) logger.info( key="deviance_measure", description="Deviance measure for HIV and TB", data={"deviance_measure": deviance_measure[0], "hiv_transmission_rate": deviance_measure[1], "tb_scaling_factor_WHO": deviance_measure[2] } )