Source code for tlo.methods.healthburden

"""
This Module runs the counting of DALYS
"""
from pathlib import Path

import numpy as np
import pandas as pd

from tlo import DateOffset, Module, Parameter, Types, logging
from tlo.events import PopulationScopeEventMixin, RegularEvent
from tlo.methods import Metadata
from tlo.methods.causes import (
    Cause,
    collect_causes_from_disease_modules,
    create_mappers_from_causes_to_label,
)

logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)


[docs]class HealthBurden(Module): """ This module holds all the stuff to do with recording DALYS """
[docs] def __init__(self, name=None, resourcefilepath=None): super().__init__(name) self.resourcefilepath = resourcefilepath # instance variables self.multi_index = None self.YearsLifeLost = None self.YearsLifeLostStacked = None self.YearsLivedWithDisability = None self.recognised_modules_names = None self.causes_of_disability = None
INIT_DEPENDENCIES = {'Demography'} # Declare Metadata METADATA = {} PARAMETERS = { 'DALY_Weight_Database': Parameter( Types.DATA_FRAME, 'DALY Weight Database from GBD'), 'Age_Limit_For_YLL': Parameter( Types.REAL, 'The age up to which deaths are recorded as having induced a lost of life years'), 'gbd_causes_of_disability': Parameter( Types.LIST, 'List of the strings of causes of disability defined in the GBD data') } PROPERTIES = {}
[docs] def read_parameters(self, data_folder): p = self.parameters p['DALY_Weight_Database'] = pd.read_csv(Path(self.resourcefilepath) / 'ResourceFile_DALY_Weights.csv') p['Age_Limit_For_YLL'] = 70.0 # Assumption that only deaths younger than 70y incur years of lost life p['gbd_causes_of_disability'] = set(pd.read_csv( Path(self.resourcefilepath) / 'gbd' / 'ResourceFile_CausesOfDALYS_GBD2019.csv', header=None)[0].values)
[docs] def initialise_population(self, population): pass
[docs] def initialise_simulation(self, sim): """Do before simulation starts: 1) Prepare data storage structures 2) Collect the module that will use this HealthBurden module 3) Process the declarations of causes of disability made by the disease modules 4) Launch the DALY Logger to run every month, starting with the end of the first month of simulation """ # 1) Prepare data storage structures # Create the sex/age_range/year multi-index for YLL and YLD storage dataframes sex_index = ['M', 'F'] year_index = list(range(self.sim.start_date.year, self.sim.end_date.year + 1)) age_index = self.sim.modules['Demography'].AGE_RANGE_CATEGORIES multi_index = pd.MultiIndex.from_product([sex_index, age_index, year_index], names=['sex', 'age_range', 'year']) self.multi_index = multi_index # Create the YLL and YLD storage data-frame (using sex/age_range/year multi-index) self.YearsLifeLost = pd.DataFrame(index=multi_index) self.YearsLifeLostStacked = pd.DataFrame(index=multi_index) self.YearsLivedWithDisability = pd.DataFrame(index=multi_index) # 2) Collect the module that will use this HealthBurden module self.recognised_modules_names = [ m.name for m in self.sim.modules.values() if Metadata.USES_HEALTHBURDEN in m.METADATA ] # Check that all registered disease modules have the report_daly_values() function for module_name in self.recognised_modules_names: assert getattr(self.sim.modules[module_name], 'report_daly_values', None) and \ callable(self.sim.modules[module_name].report_daly_values), 'A module that declares use of ' \ 'HealthBurden module must have a ' \ 'callable function "report_daly_values"' # 3) Process the declarations of causes of disability made by the disease modules self.process_causes_of_disability() # 4) Launch the DALY Logger to run every month, starting with the end of the first month of simulation sim.schedule_event(Get_Current_DALYS(self), sim.date + DateOffset(months=1))
[docs] def process_causes_of_disability(self): """ 1) Collect causes of disability that are reported by each disease module 2) Define the "Other" tlo_cause of disability (corresponding to those gbd_causes that are not represented by the disease modules in this sim.) 3) Output to the log mappers for causes of disability to the label """ # 1) Collect causes of disability that are reported by each disease module self.causes_of_disability = collect_causes_from_disease_modules( all_modules=self.sim.modules.values(), collect='CAUSES_OF_DISABILITY', acceptable_causes=set(self.parameters['gbd_causes_of_disability']) ) # 2) Define the "Other" tlo_cause of disability self.causes_of_disability['Other'] = Cause( label='Other', gbd_causes=self.get_gbd_causes_of_disability_not_represented_in_disease_modules(self.causes_of_disability) ) # 3) Output to the log mappers for causes of disability mapper_from_tlo_causes, mapper_from_gbd_causes = self.create_mappers_from_causes_of_death_to_label() logger.info( key='mapper_from_tlo_cause_to_common_label', data=mapper_from_tlo_causes ) logger.info( key='mapper_from_gbd_cause_to_common_label', data=mapper_from_gbd_causes )
[docs] def on_birth(self, mother_id, child_id): pass
[docs] def on_simulation_end(self): """Log records of: 1) The Years Lived With Disability (YLD) (by the 'causes of disability' declared by the disease modules) 2) The Years Life Lost (YLL) (by the 'causes of death' declared by the disease module) 3) The total DALYS recorded (YLD + YLL) (by the labels that are declared for 'causes of death' and 'causes of disability'). """ # Check that the multi-index of the dataframes are as expected assert self.YearsLifeLost.index.equals(self.multi_index) assert self.YearsLivedWithDisability.index.equals(self.multi_index) # 1) Log the Years Lived With Disability (YLD) (by the 'causes of disability' declared by disease modules). for index, row in self.YearsLivedWithDisability.reset_index().iterrows(): logger.info( key='yld_by_causes_of_disability', data=row.to_dict(), description='Years lived with disability by the declared cause_of_disability, ' 'broken down by year, sex, age-group' ) # 2) Log the Years of Live Lost (YLL) (by the 'causes of death' declared by disease modules). for index, row in self.YearsLifeLost.reset_index().iterrows(): logger.info( key='yll_by_causes_of_death', data=row.to_dict(), description='Years of live lost by the declared cause_of_death, ' 'broken down by year, sex, age-group' ) for index, row in self.YearsLifeLostStacked.reset_index().iterrows(): logger.info( key='yll_by_causes_of_death_stacked', data=row.to_dict(), description='Years of live lost by the declared cause_of_death, ' 'broken down by year, sex, age-group' ) # 3) Log total DALYS recorded (YLD + LYL) (by the labels declared) dalys, dalys_stacked = self.compute_dalys() # - dump to log, line-by-line for index, row in dalys.reset_index().iterrows(): logger.info( key='dalys', data=row.to_dict(), description='DALYS, by the labels are that are declared for each cause_of_death and cause_of_disability' ', broken down by year, sex, age-group' ) for index, row in dalys_stacked.reset_index().iterrows(): logger.info( key='dalys_stacked', data=row.to_dict(), description='DALYS, by the labels are that are declared for each cause_of_death and cause_of_disability' ', broken down by year, sex, age-group' )
[docs] def compute_dalys(self): """Compute total DALYS (by label), by age, sex and year. Do this by summing the YLD and LYL with respect to the label of the corresponding cause of each, and give output by label.""" def add_duplicated_columns(_df): return _df.groupby(_df.columns, axis=1).sum() yld = add_duplicated_columns(self.YearsLivedWithDisability.rename( columns={c: self.causes_of_disability[c].label for c in self.YearsLivedWithDisability.columns} )) yll = add_duplicated_columns(self.YearsLifeLost.rename( columns={c: self.sim.modules['Demography'].causes_of_death[c].label for c in self.YearsLifeLost.columns} )) yll_stacked = add_duplicated_columns(self.YearsLifeLostStacked.rename( columns={c: self.sim.modules['Demography'].causes_of_death[c].label for c in self.YearsLifeLost.columns} )) return yld.add(yll, fill_value=0), yld.add(yll_stacked, fill_value=0)
[docs] def get_daly_weight(self, sequlae_code): """ This can be used to look up the DALY weight for a particular condition identified by the 'sequela code' Sequela code for particular conditions can be looked-up in ResourceFile_DALY_Weights.csv :param sequela_code: :return: the daly weight associated with that sequela code """ w = self.parameters['DALY_Weight_Database'] daly_wt = w.loc[w['TLO_Sequela_Code'] == sequlae_code, 'disability weight'].values[0] # Check that the sequela code was found assert (not pd.isnull(daly_wt)) # Check that the value is within bounds [0,1] assert (daly_wt >= 0) & (daly_wt <= 1) return daly_wt
[docs] def report_live_years_lost(self, sex, date_of_birth, cause_of_death): """ Calculate the start and end dates of the period for which there is 'years of lost life' when someone died (assuming that the person has died on today's date in the simulation). :param sex: sex of the person that had died :param date_of_birth: date_of_birth of the person that has died :param cause_of_death: title for the column in YLL dataframe (of form <ModuleName>_<Cause>) """ assert self.YearsLifeLost.index.equals(self.multi_index) # date from which years of life are lost date_of_death = self.sim.date # Get the years of life lost split out by year and age-group (counting years of life lost up to the earliest of # the age_limit or end of simulation) yll = self.decompose_yll_by_age_and_time(start_date=date_of_death, end_date=min( self.sim.end_date, (date_of_birth + pd.DateOffset(years=self.parameters['Age_Limit_For_YLL'])) ), date_of_birth=date_of_birth ) # augment the multi-index of yll with sex so that it is sex/age_range/year yll['sex'] = sex yll = yll.set_index('sex', append=True).reorder_levels(['sex', 'age_range', 'year']) # Get the years of live lost "stacked" (where all the life-years lost up to the age_limit are ascribed to the # year of death) yll_stacked = self.decompose_yll_by_age_and_time( start_date=date_of_death, end_date=date_of_birth + pd.DateOffset(years=self.parameters['Age_Limit_For_YLL']), date_of_birth=date_of_birth ) yll_stacked = yll_stacked.sum(level=1) yll_stacked['year'] = date_of_death.year yll_stacked['sex'] = sex yll_stacked = yll_stacked.set_index(['sex', 'year'], append=True).reorder_levels(['sex', 'age_range', 'year']) # Add the years-of-life-lost from this death to the overall YLL dataframe keeping track if cause_of_death not in self.YearsLifeLost.columns: # cause has not been added to the LifeYearsLost dataframe, so make a new columns self.YearsLifeLost[cause_of_death] = 0.0 self.YearsLifeLostStacked[cause_of_death] = 0.0 # Add the life-years-lost from this death to the running total in LifeYearsLost dataframe self.YearsLifeLost[cause_of_death] = self.YearsLifeLost[cause_of_death].add( yll['person_years'], fill_value=0) self.YearsLifeLostStacked[cause_of_death] = self.YearsLifeLostStacked[cause_of_death].add( yll_stacked['person_years'], fill_value=0) # Check that the index of the YLL dataframe is not changed assert self.YearsLifeLost.index.equals(self.multi_index) assert self.YearsLifeLostStacked.index.equals(self.multi_index)
[docs] def decompose_yll_by_age_and_time(self, start_date, end_date, date_of_birth): """ This helper function will decompose a period of years of lost life into time-spent in each age group in each calendar year :return: a dataframe (X) of the person-time (in years) spent by age-group and time-period """ df = pd.DataFrame() # Get all the days between start and end df['days'] = pd.date_range(start=start_date, end=end_date, freq='D') df['year'] = df['days'].dt.year # Get the age that this person will be on each day df['age_in_years'] = ((df['days'] - date_of_birth).dt.days.values / 365).astype(int) age_range_lookup = self.sim.modules['Demography'].AGE_RANGE_LOOKUP # get the age_range_lookup from demography df['age_range'] = df['age_in_years'].map(age_range_lookup) period = pd.DataFrame(df.groupby(by=['year', 'age_range'])['days'].count()) period['person_years'] = (period['days'] / 365).clip(lower=0.0, upper=1.0) period = period.drop(columns=['days'], axis=1) return period
[docs] def get_gbd_causes_of_disability_not_represented_in_disease_modules(self, causes_of_disability): """ Find the causes of disability in the GBD datasets that are not represented within the causes of death defined in the modules registered in this simulation. :return: set of gbd_causes of disability that are not represented in disease modules """ all_gbd_causes_in_sim = set() for c in causes_of_disability.values(): all_gbd_causes_in_sim.update(c.gbd_causes) return set(self.parameters['gbd_causes_of_disability']) - all_gbd_causes_in_sim
[docs] def create_mappers_from_causes_of_death_to_label(self): """Use a helper function to create mappers for causes of disability to label.""" return create_mappers_from_causes_to_label( causes=self.causes_of_disability, all_gbd_causes=set(self.parameters['gbd_causes_of_disability']) )
[docs]class Get_Current_DALYS(RegularEvent, PopulationScopeEventMixin): """ This event runs every months and asks each disease module to report the average disability weight for each living person during the previous month. It reconciles this with reports from other disease modules to ensure that no person has a total weight greater than one. A known (small) limitation of this is that persons who died during the previous month do not contribute any YLD. """
[docs] def __init__(self, module): super().__init__(module, frequency=DateOffset(months=1))
[docs] def apply(self, population): # Running the DALY Logger # Do nothing if no disease modules are registered or no causes of disability are registered if (not self.module.recognised_modules_names) or (not self.module.causes_of_disability): return # Get the population dataframe df = self.sim.population.props idx_alive = set(df.loc[df.is_alive].index) # 1) Ask each disease module to log the DALYS for the previous month dalys_from_each_disease_module = list() for disease_module_name in self.module.recognised_modules_names: disease_module = self.sim.modules[disease_module_name] declared_causes_of_disability_module = disease_module.CAUSES_OF_DISABILITY.keys() if declared_causes_of_disability_module: # if some causes of disability are declared, collect the disability reported by this disease module: dalys_from_disease_module = disease_module.report_daly_values() # Check type is in acceptable form and make into dataframe if not already assert type(dalys_from_disease_module) in (pd.Series, pd.DataFrame) if type(dalys_from_disease_module) is pd.Series: # if a pd.Series is returned, it implies there is only one cause of disability registered by module: assert 1 == len(declared_causes_of_disability_module), \ "pd.Series returned but number of causes of disability declared is not equal to one." # name the returned pd.Series as the only cause of disability that is defined by the module dalys_from_disease_module.name = list(declared_causes_of_disability_module)[0] # convert to pd.DataFrame dalys_from_disease_module = pd.DataFrame(dalys_from_disease_module) # Perform checks on what has been returned assert set(dalys_from_disease_module.columns) == set(declared_causes_of_disability_module) assert set(dalys_from_disease_module.index) == idx_alive assert not pd.isnull(dalys_from_disease_module).any().any() assert ((dalys_from_disease_module >= 0) & (dalys_from_disease_module <= 1)).all().all() assert (dalys_from_disease_module.sum(axis=1) <= 1).all() # Append to list of dalys reported by each module dalys_from_each_disease_module.append(dalys_from_disease_module) # 2) Combine into a single dataframe (each column of this dataframe gives the reports from each module), and # add together dalys reported by different modules that have the same cause (i.e., add together columns with # the same name). disease_specific_daly_values_this_month = pd.concat( dalys_from_each_disease_module, axis=1).groupby(axis=1, level=0).sum() # 3) Rescale the DALY weights # Create a scaling-factor (if total DALYS for one person is more than 1, all DALYS weights are scaled so that # their sum equals one). scaling_factor = (disease_specific_daly_values_this_month.sum(axis=1).clip(lower=0, upper=1) / disease_specific_daly_values_this_month.sum(axis=1)).fillna(1.0) disease_specific_daly_values_this_month = disease_specific_daly_values_this_month.multiply(scaling_factor, axis=0) assert ((disease_specific_daly_values_this_month.sum(axis=1) - 1.0) < 1e-6).all() # Multiply 1/12 as these weights are for one month only disease_specific_daly_values_this_month = disease_specific_daly_values_this_month * (1 / 12) # 4) Summarise the results for this month wrt age and sex # - merge in age/sex information disease_specific_daly_values_this_month = disease_specific_daly_values_this_month.merge( df.loc[idx_alive, ['sex', 'age_range']], left_index=True, right_index=True, how='left') # - sum of daly_weight, by sex and age disability_monthly_summary = pd.DataFrame( disease_specific_daly_values_this_month.groupby(['sex', 'age_range']).sum().fillna(0)) # - add the year into the multi-index disability_monthly_summary['year'] = self.sim.date.year disability_monthly_summary.set_index('year', append=True, inplace=True) disability_monthly_summary = disability_monthly_summary.reorder_levels(['sex', 'age_range', 'year']) # 5) Add the monthly summary to the overall dataframe for YearsLivedWithDisability dalys_to_add = disability_monthly_summary.sum().sum() # for checking dalys_current = self.module.YearsLivedWithDisability.sum().sum() # for checking # (Nb. this will add columns that are not otherwise present and add values to columns where they are.) combined = self.module.YearsLivedWithDisability.combine( disability_monthly_summary, fill_value=0.0, func=np.add, overwrite=False) # Merge into a dataframe with the correct multi-index (the multindex from combine is subtly different) self.module.YearsLivedWithDisability = pd.DataFrame(index=self.module.multi_index).merge( combined, left_index=True, right_index=True, how='left') # Check multi-index is in check and that the addition of DALYS has worked assert self.module.YearsLivedWithDisability.index.equals(self.module.multi_index) assert abs(self.module.YearsLivedWithDisability.sum().sum() - (dalys_to_add + dalys_current)) < 1e-5