import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats as st
import statsmodels.api as sm
import math
import numpy as np
__all__ = ["deming", "passingbablok", "linear"]
class _Deming(object):
"""Internal class for drawing a Deming regression plot"""
def __init__(self, method1, method2,
vr, sdr, bootstrap,
x_label, y_label, title,
CI, line_reference, line_CI, legend,
color_points, color_deming,
point_kws):
self.method1: np.array = np.asarray(method1)
self.method2: np.array = np.asarray(method2)
self.vr = vr
self.sdr = sdr
self.bootstrap = bootstrap
self.x_title = x_label
self.y_title = y_label
self.graph_title = title
self.color_points = color_points
self.color_deming = color_deming
self.CI = CI
self.line_reference = line_reference
self.line_CI = line_CI
self.legend = legend
self.point_kws = {} if point_kws is None else point_kws.copy()
self._check_params()
self._derive_params()
def _check_params(self):
if len(self.method1) != len(self.method2):
raise ValueError('Length of method 1 and method 2 are not equal.')
if self.bootstrap is not None and not isinstance(self.bootstrap, int):
raise ValueError('Bootstrap argument should either be None or an integer.')
if self.CI is not None and (self.CI > 1 or self.CI < 0):
raise ValueError('Confidence interval must be between 0 and 1.')
if any([not isinstance(x, str) for x in [self.x_title, self.y_title]]):
raise ValueError('Axes labels arguments should be provided as a str.')
def _derive_params(self):
def _deming(x, y, lamb):
ssdx = np.var(x, ddof=1) * (self.n - 1)
ssdy = np.var(y, ddof=1) * (self.n - 1)
spdxy = np.cov(x, y)[1][1] * (self.n - 1)
beta = (ssdy - lamb * ssdx + math.sqrt((ssdy - lamb * ssdx) ** 2 + 4 * lamb * (ssdy ** 2))) / (
2 * spdxy)
alpha = y.mean() - beta * x.mean()
ksi = (lamb * x + beta * (y - alpha)) / (lamb + beta ** 2)
sigmax = lamb * sum((x - ksi) ** 2) + sum((y - alpha - beta * ksi) ** 2) / (
(self.n - 2) * beta)
sigmay = math.sqrt(lamb * sigmax)
sigmax = math.sqrt(sigmax)
return alpha, beta, sigmax, sigmay
self.n = len(self.method1)
if self.vr is not None:
_lambda = self.vr
elif self.sdr is not None:
_lambda = self.sdr
else:
_lambda = 1
params = _deming(self.method1, self.method2, _lambda)
if self.bootstrap is None:
self.alpha = params[0]
self.beta = params[1]
self.sigmax = params[2]
self.sigmay = params[3]
else:
_params = np.zeros([self.bootstrap, 4])
for i in range(self.bootstrap):
idx = np.random.choice(range(self.n), self.n, replace=True)
_params[i] = _deming(np.take(self.method1, idx), np.take(self.method2, idx), _lambda)
_paramsdf = pd.DataFrame(_params, columns=['alpha', 'beta', 'sigmax', 'sigmay'])
se = np.sqrt(np.diag(np.cov(_paramsdf.cov())))
t = np.transpose(
np.apply_along_axis(np.quantile, 0, _params, [0.5, (1 - self.CI) / 2, 1 - (1 - self.CI) / 2]))
self.alpha = [t[0][0], se[0], t[0][1], t[0][2]]
self.beta = [t[1][0], se[1], t[0][1], t[0][2]]
self.sigmax = [t[2][0], se[2], t[0][1], t[0][2]]
self.sigmay = [t[3][0], se[3], t[0][1], t[0][2]]
def plot(self, ax):
# plot individual points
ax.scatter(self.method1, self.method2, s=20, alpha=0.6, color=self.color_points)
# plot reference line
if self.line_reference:
ax.plot([0, 1], [0, 1], label='Reference',
color='grey', linestyle='--', transform=ax.transAxes)
# plot Deming-line
_xvals = np.array(ax.get_xlim())
if self.bootstrap is None:
_yvals = self.alpha + self.beta * _xvals
ax.plot(_xvals, _yvals, label=f'{self.alpha:.2f} + {self.beta:.2f} * Method 1',
color=self.color_deming, linestyle='-')
else:
_yvals = [self.alpha[s] + self.beta[0] * _xvals for s in range(0, 4)]
ax.plot(_xvals, _yvals[0], label=f'{self.alpha[0]:.2f} + {self.beta[0]:.2f} * Method 1',
color=self.color_deming, linestyle='-')
ax.fill_between(_xvals, _yvals[2], _yvals[3], color=self.color_deming, alpha=0.2)
if self.line_CI:
ax.plot(_xvals, _yvals[2], linestyle='--')
ax.plot(_xvals, _yvals[3], linestyle='--')
if self.legend:
ax.legend(loc='upper left', frameon=False)
ax.set_ylabel(self.y_title)
ax.set_xlabel(self.x_title)
if self.graph_title is not None:
ax.set_title(self.graph_title)
[docs]def deming(method1, method2,
vr=None, sdr=None, bootstrap=1000,
x_label='Method 1', y_label='Method 2', title=None,
CI=0.95, line_reference=True, line_CI=False, legend=True,
color_points='#000000', color_deming='#008bff',
point_kws=None,
square=False, ax=None):
"""Provide a method comparison using Deming regression.
This is an Axis-level function which will draw the Deming plot
onto the current active Axis object unless ``ax`` is provided.
Parameters
----------
method1, method2 : array, or list
Values obtained from both methods, preferably provided in a np.array.
vr : float
The assumed known ratio of the (residual) variance of the ys relative to that of the xs.
Defaults to 1.
sdr : float
The assumed known standard deviations. Parameter vr takes precedence if both are given.
Defaults to 1.
bootstrap : int or None
Amount of bootstrap estimates that should be performed to acquire standard errors (and confidence
intervals). If None, no bootstrapping is performed. Defaults to 1000.
x_label : str, optional
The label which is added to the X-axis. If None is provided, a standard
label will be added.
y_label : str, optional
The label which is added to the Y-axis. If None is provided, a standard
label will be added.
title : str, optional
Title of the plot. If None is provided, no title will be plotted.
CI : float, optional
The confidence interval employed in Deming line. Defaults to 0.95.
line_reference : bool, optional
If True, a grey reference line at y=x will be plotted in the plot.
Defaults to true.
line_CI : bool, optional
If True, dashed lines will be plotted at the boundaries of the confidence intervals.
Defaults to false.
legend : bool, optional
If True, will provide a legend containing the computed Deming equation.
Defaults to true.
color_points : str, optional
Color of the individual differences that will be plotted.
Color should be provided in format compatible with matplotlib.
color_deming : str, optional
Color of the mean difference line that will be plotted.
Color should be provided in format compatible with matplotlib.
square : bool, optional
If True, set the Axes aspect to "equal" so each cell will be
square-shaped.
point_kws : dict of key, value mappings, optional
Additional keyword arguments for `plt.scatter`.
ax : matplotlib Axes, optional
Axes in which to draw the plot, otherwise use the currently-active
Axes.
Returns
-------
ax : matplotlib Axes
Axes object with the Deming plot.
See Also
-------
Koopmans, T. C. (1937). Linear regression analysis of economic time series. DeErven F. Bohn, Haarlem, Netherlands.
Deming, W. E. (1943). Statistical adjustment of data. Wiley, NY (Dover Publications edition, 1985).
"""
plotter: _Deming = _Deming(method1, method2,
vr, sdr, bootstrap,
x_label, y_label, title,
CI, line_reference, line_CI, legend,
color_points, color_deming,
point_kws)
# Draw the plot and return the Axes
if ax is None:
ax = plt.gca()
if square:
ax.set_aspect('equal')
plotter.plot(ax)
return ax
class _PassingBablok(object):
"""Internal class for drawing a Passing-Bablok regression plot"""
def __init__(self, method1, method2,
x_label, y_label, title,
CI, line_reference, line_CI, legend,
color_points, color_paba,
point_kws):
self.method1: np.array = np.asarray(method1)
self.method2: np.array = np.asarray(method2)
self.x_title = x_label
self.y_title = y_label
self.graph_title = title
self.CI = CI
self.color_points = color_points
self.color_paba = color_paba
self.line_reference = line_reference
self.line_CI = line_CI
self.legend = legend
self.point_kws = {} if point_kws is None else point_kws.copy()
self._check_params()
self._derive_params()
def _check_params(self):
if len(self.method1) != len(self.method2):
raise ValueError('Length of method 1 and method 2 are not equal.')
if self.CI is not None and (self.CI > 1 or self.CI < 0):
raise ValueError('Confidence interval must be between 0 and 1.')
if any([not isinstance(x, str) for x in [self.x_title, self.y_title]]):
raise ValueError('Axes labels arguments should be provided as a str.')
def _derive_params(self):
self.n = len(self.method1)
self.sv = []
for i in range(self.n - 1):
for j in range(i + 1, self.n):
self.sv.append((self.method2[i] - self.method2[j]) /
(self.method1[i] - self.method1[j]))
self.sv.sort()
n = len(self.sv)
k = math.floor(len([a for a in self.sv if a < 0]) / 2)
if n % 2 == 1:
self.slope = self.sv[int((n + 1) / k + 2)]
else:
self.slope = math.sqrt(self.sv[int(n / 2 + k)] * self.sv[int(n / 2 + k + 1)])
_ci = st.norm.ppf(1 - (1 - self.CI) / 2) * math.sqrt((self.n * (self.n - 1) * (2 * self.n + 5)) / 18)
_m1 = int(round((n - _ci) / 2))
_m2 = n - _m1 - 1
self.slope = [self.slope, self.sv[k + _m1], self.sv[k + _m2]]
self.intercept = [np.median(self.method2 - self.slope[0] * self.method1),
np.median(self.method2 - self.slope[1] * self.method1),
np.median(self.method2 - self.slope[2] * self.method1)]
def plot(self, ax):
# plot individual points
ax.scatter(self.method1, self.method2, s=20, alpha=0.6, color=self.color_points,
**self.point_kws)
# plot reference line
if self.line_reference:
ax.plot([0, 1], [0, 1], label='Reference',
color='grey', linestyle='--', transform=ax.transAxes)
# plot PaBa-line
_xvals = np.array(ax.get_xlim())
_yvals = [self.intercept[s] + self.slope[s] * _xvals for s in range(0, 3)]
ax.plot(_xvals, _yvals[0], label=f'{self.intercept[0]:.2f} + {self.slope[0]:.2f} * Method 1',
color=self.color_paba, linestyle='-')
ax.fill_between(_xvals, _yvals[1], _yvals[2], color=self.color_paba, alpha=0.2)
if self.line_CI:
ax.plot(_xvals, _yvals[1], linestyle='--')
ax.plot(_xvals, _yvals[2], linestyle='--')
if self.legend:
ax.legend(loc='upper left', frameon=False)
ax.set_ylabel(self.y_title)
ax.set_xlabel(self.x_title)
if self.graph_title is not None:
ax.set_title(self.graph_title)
[docs]def passingbablok(method1, method2,
x_label='Method 1', y_label='Method 2', title=None,
CI=0.95, line_reference=True, line_CI=False, legend=True,
color_points='#000000', color_paba='#008bff',
point_kws=None,
square=False, ax=None):
"""Provide a method comparison using Passing-Bablok regression.
This is an Axis-level function which will draw the Passing-Bablok plot
onto the current active Axis object unless ``ax`` is provided.
Parameters
----------
method1, method2 : array, or list
Values obtained from both methods, preferably provided in a np.array.
x_label : str, optional
The label which is added to the X-axis. If None is provided, a standard
label will be added.
y_label : str, optional
The label which is added to the Y-axis. If None is provided, a standard
label will be added.
title : str, optional
Title of the Passing-Bablok plot. If None is provided, no title will be plotted.
CI : float, optional
The confidence interval employed in the passing-bablok line. Defaults to 0.95.
line_reference : bool, optional
If True, a grey reference line at y=x will be plotted in the plot.
Defaults to true.
line_CI : bool, optional
If True, dashed lines will be plotted at the boundaries of the confidence intervals.
Defaults to false.
legend : bool, optional
If True, will provide a legend containing the computed Passing-Bablok equation.
Defaults to true.
color_points : str, optional
Color of the individual differences that will be plotted.
Color should be provided in format compatible with matplotlib.
color_paba : str, optional
Color of the mean difference line that will be plotted.
Color should be provided in format compatible with matplotlib.
square : bool, optional
If True, set the Axes aspect to "equal" so each cell will be
square-shaped.
point_kws : dict of key, value mappings, optional
Additional keyword arguments for `plt.scatter`.
ax : matplotlib Axes, optional
Axes in which to draw the plot, otherwise use the currently-active
Axes.
Returns
-------
ax : matplotlib Axes
Axes object with the Passing-Bablok plot.
See Also
-------
Passing H and Bablok W. J Clin Chem Clin Biochem, vol. 21, no. 11, 1983, pp. 709 - 720
"""
plotter: _PassingBablok = _PassingBablok(method1, method2,
x_label, y_label, title,
CI, line_reference, line_CI, legend,
color_points, color_paba,
point_kws)
# Draw the plot and return the Axes
if ax is None:
ax = plt.gca()
if square:
ax.set_aspect('equal')
plotter.plot(ax)
return ax
class _Linear(object):
"""Internal class for drawing a simple, linear regression plot"""
def __init__(self, method1, method2,
x_label, y_label, title,
CI, line_reference, line_CI, legend,
color_points, color_regr,
point_kws):
self.method1: np.array = np.asarray(method1)
self.method2: np.array = np.asarray(method2)
self.x_title = x_label
self.y_title = y_label
self.graph_title = title
self.CI = CI
self.color_points = color_points
self.color_regr = color_regr
self.line_reference = line_reference
self.line_CI = line_CI
self.legend = legend
self.point_kws = {} if point_kws is None else point_kws.copy()
self._check_params()
self._derive_params()
def _check_params(self):
if len(self.method1) != len(self.method2):
raise ValueError('Length of method 1 and method 2 are not equal.')
if self.CI is not None and (self.CI > 1 or self.CI < 0):
raise ValueError('Confidence interval must be between 0 and 1.')
if any([not isinstance(x, str) for x in [self.x_title, self.y_title]]):
raise ValueError('Axes labels arguments should be provided as a str.')
def _derive_params(self):
self.n = len(self.method1)
_model = sm.OLS(self.method1, sm.add_constant(self.method2)).fit()
_params = _model.params
_confint = _model.conf_int(alpha=self.CI)
self.intercept = [_confint[0][0], _params[0], _confint[0][1]]
self.slope = [_confint[1][0], _params[1], _confint[1][1]]
def plot(self, ax):
# plot individual points
ax.scatter(self.method1, self.method2, s=20, alpha=0.6, color=self.color_points,
**self.point_kws)
# plot reference line
if self.line_reference:
ax.plot([0, 1], [0, 1], label='Reference',
color='grey', linestyle='--', transform=ax.transAxes)
# plot linear regression
_xvals = np.array(ax.get_xlim())
_yvals = [self.intercept[s] + self.slope[s] * _xvals for s in range(0, 3)]
ax.plot(_xvals, _yvals[0], label=f'{self.intercept[0]:.2f} + {self.slope[0]:.2f} * Method 1',
color=self.color_regr, linestyle='-')
ax.fill_between(_xvals, _yvals[1], _yvals[2], color=self.color_regr, alpha=0.2)
if self.line_CI:
ax.plot(_xvals, _yvals[1], linestyle='--')
ax.plot(_xvals, _yvals[2], linestyle='--')
if self.legend:
ax.legend(loc='upper left', frameon=False)
ax.set_ylabel(self.y_title)
ax.set_xlabel(self.x_title)
if self.graph_title is not None:
ax.set_title(self.graph_title)
[docs]def linear(method1, method2,
x_label='Method 1', y_label='Method 2', title=None,
CI=0.95, line_reference=True, line_CI=False, legend=True,
color_points='#000000', color_regr='#008bff',
point_kws=None,
square=False, ax=None):
"""Provide a method comparison using simple, linear regression.
This is an Axis-level function which will draw the linear regression plot
onto the current active Axis object unless ``ax`` is provided.
Parameters
----------
method1, method2 : array, or list
Values obtained from both methods, preferably provided in a np.array.
x_label : str, optional
The label which is added to the X-axis. If None is provided, a standard
label will be added.
y_label : str, optional
The label which is added to the Y-axis. If None is provided, a standard
label will be added.
title : str, optional
Title of the linear regression plot. If None is provided, no title will be plotted.
CI : float, optional
The confidence interval employed in the linear regression line. Defaults to 0.95.
line_reference : bool, optional
If True, a grey reference line at y=x will be plotted in the plot.
Defaults to true.
line_CI : bool, optional
If True, dashed lines will be plotted at the boundaries of the confidence intervals.
Defaults to false.
legend : bool, optional
If True, will provide a legend containing the computed Linear regression equation.
Defaults to true.
color_points : str, optional
Color of the individual differences that will be plotted.
Color should be provided in format compatible with matplotlib.
color_paba : str, optional
Color of the mean difference line that will be plotted.
Color should be provided in format compatible with matplotlib.
square : bool, optional
If True, set the Axes aspect to "equal" so each cell will be
square-shaped.
point_kws : dict of key, value mappings, optional
Additional keyword arguments for `plt.scatter`.
ax : matplotlib Axes, optional
Axes in which to draw the plot, otherwise use the currently-active
Axes.
Returns
-------
ax : matplotlib Axes
Axes object with the linear regression plot.
See Also
-------
..............
"""
plotter: _Linear = _Linear(method1, method2,
x_label, y_label, title,
CI, line_reference, line_CI, legend,
color_points, color_regr,
point_kws)
# Draw the plot and return the Axes
if ax is None:
ax = plt.gca()
if square:
ax.set_aspect('equal')
plotter.plot(ax)
return ax