Source code for cerebralcortex.algorithms.glucose.glucose_variability_metrics

# Copyright (c) 2017, MD2K Center of Excellence
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import numpy as np
import pandas as pd
from pyspark.sql.functions import pandas_udf, PandasUDFType
from pyspark.sql.group import GroupedData
from pyspark.sql.types import StructField, StructType, StringType, FloatType, TimestampType, IntegerType

from cerebralcortex.algorithms.utils.util import update_metadata
from cerebralcortex.core.datatypes import DataStream
from cerebralcortex.core.metadata_manager.stream.metadata import Metadata


[docs]def glucose_var(ds): """ Compute CGM Glucose Variability Metrics: This algorithm computes 23 clinically validated glucose variability metrics from continuous glucose monitor data. Input: ds (DataStream): Windowed/grouped DataStream of CGM data Returns: DataStream with glucose variability metrics Glucose Variability Metrics include: Interday Mean Glucose Interday Median Glucose Interday Maximum Glucose Interday Minimum Glucose Interday Standard Deviation of Glucose Interday Coefficient of Variation of Glucose Intraday Standard Deviation of Glucose (mean, median, standard deviation) Intraday Coefficient of Variation of Glucose (mean, median, standard deviation) TIR (Time in Range of default 1 SD) TOR (Time outside Range of default 1 SD) POR (Percent outside Range of default 1 SD) MAGE (Mean Amplitude of Glucose Excursions, default 1 SD) MAGN (Mean Amplitude of Normal Glucose, default 1 SD) J-index LBGI (Low Blood Glucose Index) HBGI (High Blood Glucose Index) MODD (Mean of Daily Differences) CONGA24 (Continuous overall net glycemic action over 24 hours) ADRR (Average Daily Risk Range) GMI (Glucose Management Indicator) eA1c (estimated A1c according to American Diabetes Association) Q1G (intraday first quartile glucose) Q3G (intraday third quartile glucose) ** for more information on these glucose metrics see dbdp.org** """ def interdayCV(df): """ computes the interday coefficient of variation on pandas dataframe glucose column Args: df (pandas.DataFrame): Returns: cvx (IntegerType): interday coefficient of variation of glucose """ cvx = (np.std(df['Glucose']) / (np.mean(df['Glucose']))) * 100 return cvx def interdaySD(df): """ computes the interday standard deviation of pandas dataframe glucose column Args: df (pandas.DataFrame): Returns: interdaysd (IntegerType): interday standard deviation of glucose """ interdaysd = np.std(df['Glucose']) return interdaysd def intradayCV(df): """ computes the intradaycv, returns the mean, median, and sd of intraday cv glucose column in pandas dataframe Args: df (pandas.DataFrame): Returns: intradaycv_mean (IntegerType): Mean, Median, and SD of intraday coefficient of variation of glucose intradaycv_median (IntegerType): Median of intraday coefficient of variation of glucose intradaycv_sd (IntegerType): SD of intraday coefficient of variation of glucose """ intradaycv = [] for i in pd.unique(df['Day']): intradaycv.append(interdayCV(df[df['Day'] == i])) intradaycv_mean = np.mean(intradaycv) intradaycv_median = np.median(intradaycv) intradaycv_sd = np.std(intradaycv) return intradaycv_mean, intradaycv_median, intradaycv_sd def intradaySD(df): """ computes the intradaysd, returns the mean, median, and sd of intraday sd glucose column in pandas dataframe Args: df (pandas.DataFrame): Returns: intradaysd_mean (IntegerType): Mean, Median, and SD of intraday standard deviation of glucose intradaysd_median (IntegerType): Median of intraday standard deviation of glucose intradaysd_sd (IntegerType): SD of intraday standard deviation of glucose """ intradaysd = [] for i in pd.unique(df['Day']): intradaysd.append(np.std(df[df['Day'] == i])) intradaysd_mean = np.mean(intradaysd) intradaysd_median = np.median(intradaysd) intradaysd_sd = np.std(intradaysd) return intradaysd_mean, intradaysd_median, intradaysd_sd def TIR(df, sd=1, sr=5): """ computes time in the range of (default=1 sd from the mean) glucose column in pandas dataframe Args: df (pandas.DataFrame): sd (IntegerType): standard deviation from mean for range calculation (default = 1 SD) sr (IntegerType): Number of minutes between measurements on CGM (default: 5 minutes, standard sampling rate of devices) Returns: TIR (IntegerType): Time in Range set by sd """ up = np.mean(df['Glucose']) + sd * np.std(df['Glucose']) dw = np.mean(df['Glucose']) - sd * np.std(df['Glucose']) TIR = len(df[(df['Glucose'] <= up) & (df['Glucose'] >= dw)]) * sr return TIR def TOR(df, sd=1, sr=5): """ computes time outside the range of (default=1 sd from the mean) glucose column in pandas dataframe Args: df (pandas.DataFrame): sd (IntegerType): standard deviation from mean for range calculation (default = 1 SD) sr (IntegerType): Number of minutes between measurements on CGM (default: 5 minutes, standard sampling rate of devices) Returns: TOR (IntegerType): Time outside of range set by sd """ up = np.mean(df['Glucose']) + sd * np.std(df['Glucose']) dw = np.mean(df['Glucose']) - sd * np.std(df['Glucose']) TOR = len(df[(df['Glucose'] >= up) | (df['Glucose'] <= dw)]) * sr return TOR def POR(df, sd=1, sr=5): """ computes percent time outside the range of (default=1 sd from the mean) glucose column in pandas dataframe Args: df (pandas.DataFrame): sd (IntegerType): standard deviation from mean for range calculation (default = 1 SD) sr (IntegerType): Number of minutes between measurements on CGM (default: 5 minutes, standard sampling rate of devices) Returns: POR (IntegerType): percent of time spent outside range set by sd """ up = np.mean(df['Glucose']) + sd * np.std(df['Glucose']) dw = np.mean(df['Glucose']) - sd * np.std(df['Glucose']) TOR = len(df[(df['Glucose'] >= up) | (df['Glucose'] <= dw)]) * sr POR = (TOR / (len(df) * sr)) * 100 return POR def MAGE(df, sd=1): """ computes the mean amplitude of glucose excursions (default = 1 sd from the mean) Args: df (pandas.DataFrame): sd (IntegerType): standard deviation from mean to set as a glucose excursion (default = 1 SD) Returns: MAGE (IntegerType): Mean Amplitude of glucose excursions """ up = np.mean(df['Glucose']) + sd * np.std(df['Glucose']) dw = np.mean(df['Glucose']) - sd * np.std(df['Glucose']) MAGE = np.mean((df['Glucose'] >= up) | (df['Glucose'] <= dw)) return MAGE def MAGN(df, sd=1): """ computes the mean amplitude of normal glucose (default = 1 sd from the mean) Args: df (pandas.DataFrame): sd (IntegerType): standard deviation from mean to set as a glucose excursion (default = 1 SD) Returns: MAGN (IntegerType): Mean Amplitude of Normal Glucose """ up = np.mean(df['Glucose']) + sd * np.std(df['Glucose']) dw = np.mean(df['Glucose']) - sd * np.std(df['Glucose']) MAGN = np.mean((df['Glucose'] <= up) & (df['Glucose'] >= dw)) return MAGN def J_index(df): """ computes the J index, a parameter of the mean and standard deviation of glucose Args: df (pandas.DataFrame): Returns: J (IntegerType): The J-index, a metric of GV that is a parameter of the mean and standard deviation of glucose """ J = 0.001 * ((np.mean(df['Glucose']) + np.std(df['Glucose'])) ** 2) return J def LBGI_HBGI(df): """ This is an intermediary function. This is needed for below functions. Please do not use this function on its own. computes the LBGI, HBGI, rh, and rl of glucose Args: df (pandas.DataFrame): Returns: LBGI (IntegerType): Do not use HBGI (IntegerType): Do not use rh (IntegerType): rh of glucose, supporting calculation for LBGI, HBGI, ADRR functions rl (IntegerType): rl of glucose, supporting calculation for LBGI, HBGI, ADRR functions """ f = ((np.log(df['Glucose']) ** 1.084) - 5.381) rl = [] for i in f: if (i <= 0): rl.append(22.77 * (i ** 2)) else: rl.append(0) LBGI = np.mean(rl) rh = [] for i in f: if (i > 0): rh.append(22.77 * (i ** 2)) else: rh.append(0) HBGI = np.mean(rh) return LBGI, HBGI, rh, rl def LBGI(df): """ computes LBGI of glucose Args: df (pandas.DataFrame): Returns: LBGI (IntegerType): Low Blood Glucose Index (metric of hypoglycemic risk) """ f = ((np.log(df['Glucose']) ** 1.084) - 5.381) rl = [] for i in f: if (i <= 0): rl.append(22.77 * (i ** 2)) else: rl.append(0) LBGI = np.mean(rl) return LBGI def HBGI(df): """ computes HBGI of glucose Args: df (pandas.DataFrame): Returns: HBGI (IntegerType): High Blood Glucose Index (metric of hyperglycemia risk) """ f = ((np.log(df['Glucose']) ** 1.084) - 5.381) rh = [] for i in f: if (i > 0): rh.append(22.77 * (i ** 2)) else: rh.append(0) HBGI = np.mean(rh) return HBGI def ADRR(df): """ computes ADRR of glucose (requires function LBGI_HBGI to calculate rh and rl parameters) Args: df (pandas.DataFrame): Returns: ADRRx (IntegerType): Average Daily Risk Range (an assesment of total daily glucose variations within a specific risk space, given by rh and rl) """ ADRRl = [] for i in pd.unique(df['Day']): LBGI, HBGI, rh, rl = LBGI_HBGI(df[df['Day'] == i]) LR = np.max(rl) HR = np.max(rh) ADRRl.append(LR + HR) ADRRx = np.mean(ADRRl) return ADRRx def uniquevalfilter(df, value): """ supporting function for MODD and CONGA24 calculations Args: df (pandas.DataFrame): value (IntegerType): a specific timepoint from the data frame given by MODD or CONGA24 function Returns: MODD_n (IntegerType): supporting calculation for MODD and CONGA24 """ xdf = df[df['Minfrommid'] == value] n = len(xdf) diff = abs(xdf['Glucose'].diff()) MODD_n = np.nanmean(diff) return MODD_n def MODD(df): """ computes mean of daily differences of glucose Args: df (pandas.DataFrame): Returns: MODD (IntegerType): Mean of Daily Differences, a measure of cyrccadian rhythmicity of glucose variability """ df['Timefrommidnight'] = df['Time'].dt.time lists = [] for i in range(0, len(df['Timefrommidnight'])): lists.append(int(df['Timefrommidnight'][i].strftime('%H:%M:%S')[0:2]) * 60 + int( df['Timefrommidnight'][i].strftime('%H:%M:%S')[3:5]) + round( int(df['Timefrommidnight'][i].strftime('%H:%M:%S')[6:9]) / 60)) df['Minfrommid'] = lists df = df.drop(columns=['Timefrommidnight']) # Calculation of MODD and CONGA: MODD_n = [] uniquetimes = df['Minfrommid'].unique() for i in uniquetimes: MODD_n.append(uniquevalfilter(df, i)) # Remove zeros from dataframe for calculation (in case there are random unique values that result in a mean of 0) MODD_n[MODD_n == 0] = np.nan MODD = np.nanmean(MODD_n) return MODD def CONGA24(df): """ computes CONGA over 24 hour interval Args: df (pandas.DataFrame): Returns: CONGA24 (IntegerType): continuous overall net glycemic action over 24 hours """ df['Timefrommidnight'] = df['Time'].dt.time lists = [] for i in range(0, len(df['Timefrommidnight'])): lists.append(int(df['Timefrommidnight'][i].strftime('%H:%M:%S')[0:2]) * 60 + int( df['Timefrommidnight'][i].strftime('%H:%M:%S')[3:5]) + round( int(df['Timefrommidnight'][i].strftime('%H:%M:%S')[6:9]) / 60)) df['Minfrommid'] = lists df = df.drop(columns=['Timefrommidnight']) # Calculation of MODD and CONGA: MODD_n = [] uniquetimes = df['Minfrommid'].unique() for i in uniquetimes: MODD_n.append(uniquevalfilter(df, i)) # Remove zeros from dataframe for calculation (in case there are random unique values that result in a mean of 0) MODD_n[MODD_n == 0] = np.nan CONGA24 = np.nanstd(MODD_n) return CONGA24 def GMI(df): """ computes glucose management index Args: df (pandas.DataFrame): Returns: GMI (IntegerType): glucose management index """ GMI = 3.31 + (0.02392 * np.mean(df['Glucose'])) return GMI def eA1c(df): """ computes ADA estimated A1c from glucose Args: df (pandas.DataFrame): Returns: eA1c (IntegerType): the estimated A1c according to American Diabetes Association algorithm """ eA1c = (46.7 + np.mean(df['Glucose'])) / 28.7 return eA1c def summary(df): """ computes interday mean glucose, median glucose, minimum and maximum glucose, and first and third quartile of glucose Args: df (pandas.DataFrame): Returns: meanG (FloatType): mean glucose medianG (FloatType): median glucose minG (FloatType): minimum glucose maxG (FloatType): maximum glucose Q1G (FloatType): first quartile glucose Q3G (FloatType): third quartile glucose """ meanG = np.nanmean(df['Glucose']) medianG = np.nanmedian(df['Glucose']) minG = np.nanmin(df['Glucose']) maxG = np.nanmax(df['Glucose']) Q1G = np.nanpercentile(df['Glucose'], 25) Q3G = np.nanpercentile(df['Glucose'], 75) return meanG, medianG, minG, maxG, Q1G, Q3G return_schema = StructType([ StructField("timestamp", TimestampType()), StructField("localtime", TimestampType()), StructField("user", StringType()), StructField("version", IntegerType()), StructField("meanG", FloatType()), StructField("medianG", FloatType()), StructField("minG", FloatType()), StructField("maxG", FloatType()), StructField("Q1G", FloatType()), StructField("Q3G", FloatType()), StructField("GMI", IntegerType()), StructField("eA1c", IntegerType()), StructField("CONGA24", IntegerType()), StructField("MODD", IntegerType()), StructField("ADRR", IntegerType()), StructField("HBGI", IntegerType()), StructField("LBGI", IntegerType()), StructField("J_index", IntegerType()), StructField("MAGE", IntegerType()), StructField("MAGN", IntegerType()), StructField("POR", IntegerType()), StructField("TOR", IntegerType()), StructField("TIR", IntegerType()), StructField("interdaySD", IntegerType()), StructField("interdayCV", IntegerType()), StructField("intradaySD", IntegerType()), StructField("intradayCV", IntegerType()) ]) @pandas_udf(return_schema, PandasUDFType.GROUPED_MAP) def get_all_metrics(data): ''' Compute CGM Metrics Input: Pandas data frame of raw continuous gluose monitor data Returns: Pandas data frame of CGM summary and variability metrics ''' df = pd.DataFrame() output = [] df['Time'] = data['timestamp'] df['Glucose'] = pd.to_numeric(data['glucose_value']) df.drop(df.index[:12], inplace=True) df['Time'] = pd.to_datetime(df['Time'], format='%Y-%m-%dT%H:%M:%S') df['Day'] = df['Time'].dt.date df = df.reset_index() meanG, medianG, minG, maxG, Q1G, Q3G = summary(df) intradaysd, intradaySDmedian, intradaySDSD = intradaySD(df) intradaycv, intradayCVmedian, intradayCVSD = intradayCV(df) output.append(data.timestamp.iloc[0]) output.append(data.localtime.iloc[0]) output.append(data.user.iloc[0]) output.append(1) output.append(meanG) output.append(medianG) output.append(minG) output.append(maxG) output.append(Q1G) output.append(Q3G) output.append(GMI(df)) output.append(eA1c(df)) output.append(CONGA24(df)) output.append(MODD(df)) output.append(ADRR(df)) output.append(HBGI(df)) output.append(LBGI(df)) output.append(J_index(df)) output.append(MAGE(df)) output.append(MAGN(df)) output.append(POR(df)) output.append(TOR(df)) output.append(TIR(df)) output.append(interdaySD(df)) output.append(interdayCV(df)) output.append(intradaysd) output.append(intradaycv) column_names = [a.name for a in return_schema] pdf = pd.DataFrame([output], columns=column_names) return pdf # check if datastream object contains grouped type of DataFrame # if not isinstance(ds._data, GroupedData): # raise Exception( # "DataStream object is not grouped data type. Please use 'window' operation on datastream object before running this algorithm") data = ds._data.groupBy(["user", "version"]).apply(get_all_metrics) results = DataStream(data=data, metadata=Metadata()) metadta = update_metadata(stream_metadata=results.metadata, stream_name="cgm_glucose_variability_metrics", stream_desc="This algorithm computes 23 clinically validated glucose variability metrics from continuous glucose monitor data. Datastream input is CGM data containing timestamp and glucose. Datastream output is 23 glucose variability metrics.", module_name="cerebralcortex.algorithms.glucose.glucose_variability_metrics.glucose_var", module_version="1.0.0", authors=[{"Digital Biomarker Discovery Pipeline (DBDP)":"brinnae.bent@duke.edu"}]) results.metadata = metadta return results