Custom logistic regression model

We try to reproduce the logistic regression models with custom cost functions and show that the results are similar to the built-in logistic regression models.

The built-in logistic regression model in fastcpd() is implemented with the help of the fastglm package. The fastglm package utilizes the iteratively reweighted least squares with the step-halving approach to help safeguard against convergence issues. If a custom cost function is used with gradient descent, we should expect the results will be similar to the built-in logistic regression model.

Specifying the cost, cost_gradient and cost_hessian parameters below, we can obtain similar results as the built-in logistic regression model.

set.seed(1)
x <- matrix(rnorm(1500, 0, 1), ncol = 5)
theta <- rbind(rnorm(5, 0, 1), rnorm(5, 2, 1))
y <- c(
  rbinom(125, 1, 1 / (1 + exp(-x[1:125, ] %*% theta[1, ]))),
  rbinom(175, 1, 1 / (1 + exp(-x[126:300, ] %*% theta[2, ])))
)
binomial_data <- data.frame(y = y, x = x)

result <- fastcpd.binomial(cbind(y, x), r.progress = FALSE, cost_adjustment = NULL)
#> Warning: fit_glm: fitted probabilities numerically 0 or 1 occurred

#> Warning: fit_glm: fitted probabilities numerically 0 or 1 occurred

#> Warning: fit_glm: fitted probabilities numerically 0 or 1 occurred

#> Warning: fit_glm: fitted probabilities numerically 0 or 1 occurred
summary(result)
#>
#> Call:
#> fastcpd.binomial(data = cbind(y, x), r.progress = FALSE, cost_adjustment = NULL)
#>
#> Change points:
#> 126
#>
#> Cost values:
#> 56.90525 30.76875
#>
#> Parameters:
#>    segment 1 segment 2
#> 1  0.7259293  1.878525
#> 2 -1.0294802  2.704376
#> 3  1.0576503  3.702310
#> 4 -0.8812767  2.258796
#> 5  0.2419351  2.524173
logistic_loss <- function(data, theta) {
  x <- data[, -1]
  y <- data[, 1]
  u <- x %*% theta
  nll <- -y * u + log(1 + exp(u))
  nll[u > 10] <- -y[u > 10] * u[u > 10] + u[u > 10]
  sum(nll)
}
logistic_gradient <- function(data, theta) {
  x <- data[nrow(data), -1]
  y <- data[nrow(data), 1]
  c(-(y - 1 / (1 + exp(-x %*% theta)))) * x
}
logistic_hessian <- function(data, theta) {
  x <- data[nrow(data), -1]
  prob <- 1 / (1 + exp(-x %*% theta))
  (x %o% x) * c((1 - prob) * prob)
}
result <- fastcpd(
  y ~ . - 1, binomial_data, epsilon = 1e-5, cost = logistic_loss,
  cost_gradient = logistic_gradient, cost_hessian = logistic_hessian,
  r.progress = FALSE
)
summary(result)
#>
#> Call:
#> fastcpd(formula = y ~ . - 1, data = binomial_data, cost = logistic_loss,
#>     cost_gradient = logistic_gradient, cost_hessian = logistic_hessian,
#>     epsilon = 1e-05, r.progress = FALSE)
#>
#> Change points:
#> 22 125
#>
#> Parameters:
#>   segment 1  segment 2 segment 3
#> 1 -59.20045  0.8170446  1.902379
#> 2 -34.56676 -0.9600438  2.751578
#> 3 216.53373  0.9353306  3.734179
#> 4 -80.96420 -0.7393653  2.247423
#> 5  51.25224  0.1390591  2.535372

Note that the result obtained through custom cost functions is inferior compared to the one obtained through built-in models. We remark that the results can be improved with extra parameters already provided in the package. The detailed discussion of several advanced usages of the package can be found in Advanced examples.

Notes

The evaluation of this vignette is set to be FALSE.

Appendix: all code snippets

knitr::opts_chunk$set(collapse = TRUE, comment = "#>", eval = FALSE)
library(fastcpd)
set.seed(1)
x <- matrix(rnorm(1500, 0, 1), ncol = 5)
theta <- rbind(rnorm(5, 0, 1), rnorm(5, 2, 1))
y <- c(
  rbinom(125, 1, 1 / (1 + exp(-x[1:125, ] %*% theta[1, ]))),
  rbinom(175, 1, 1 / (1 + exp(-x[126:300, ] %*% theta[2, ])))
)
binomial_data <- data.frame(y = y, x = x)

result <- fastcpd.binomial(cbind(y, x), r.progress = FALSE, cost_adjustment = NULL)
#> Warning: fit_glm: fitted probabilities numerically 0 or 1 occurred

#> Warning: fit_glm: fitted probabilities numerically 0 or 1 occurred

#> Warning: fit_glm: fitted probabilities numerically 0 or 1 occurred

#> Warning: fit_glm: fitted probabilities numerically 0 or 1 occurred
summary(result)
#>
#> Call:
#> fastcpd.binomial(data = cbind(y, x), r.progress = FALSE, cost_adjustment = NULL)
#>
#> Change points:
#> 126
#>
#> Cost values:
#> 56.90525 30.76875
#>
#> Parameters:
#>    segment 1 segment 2
#> 1  0.7259293  1.878525
#> 2 -1.0294802  2.704376
#> 3  1.0576503  3.702310
#> 4 -0.8812767  2.258796
#> 5  0.2419351  2.524173
logistic_loss <- function(data, theta) {
  x <- data[, -1]
  y <- data[, 1]
  u <- x %*% theta
  nll <- -y * u + log(1 + exp(u))
  nll[u > 10] <- -y[u > 10] * u[u > 10] + u[u > 10]
  sum(nll)
}
logistic_gradient <- function(data, theta) {
  x <- data[nrow(data), -1]
  y <- data[nrow(data), 1]
  c(-(y - 1 / (1 + exp(-x %*% theta)))) * x
}
logistic_hessian <- function(data, theta) {
  x <- data[nrow(data), -1]
  prob <- 1 / (1 + exp(-x %*% theta))
  (x %o% x) * c((1 - prob) * prob)
}
result <- fastcpd(
  y ~ . - 1, binomial_data, epsilon = 1e-5, cost = logistic_loss,
  cost_gradient = logistic_gradient, cost_hessian = logistic_hessian,
  r.progress = FALSE
)
summary(result)
#>
#> Call:
#> fastcpd(formula = y ~ . - 1, data = binomial_data, cost = logistic_loss,
#>     cost_gradient = logistic_gradient, cost_hessian = logistic_hessian,
#>     epsilon = 1e-05, r.progress = FALSE)
#>
#> Change points:
#> 22 125
#>
#> Parameters:
#>   segment 1  segment 2 segment 3
#> 1 -59.20045  0.8170446  1.902379
#> 2 -34.56676 -0.9600438  2.751578
#> 3 216.53373  0.9353306  3.734179
#> 4 -80.96420 -0.7393653  2.247423
#> 5  51.25224  0.1390591  2.535372