Skip to content
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,9 @@ inst/doc
cran-comments.md
CRAN-RELEASE
CRAN-SUBMISSION

# agent skills (Posit/Cursor skills installation)
.agent/
.agents/
skills/
skills-lock.json
6 changes: 6 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,9 @@ S3method(pareto_khat_threshold,default)
S3method(pareto_khat_threshold,rvar)
S3method(pareto_min_ss,default)
S3method(pareto_min_ss,rvar)
S3method(pareto_pit,default)
S3method(pareto_pit,draws_matrix)
S3method(pareto_pit,rvar)
S3method(pareto_smooth,default)
S3method(pareto_smooth,rvar)
S3method(pillar_shaft,rvar)
Expand Down Expand Up @@ -481,7 +484,9 @@ export(pareto_diags)
export(pareto_khat)
export(pareto_khat_threshold)
export(pareto_min_ss)
export(pareto_pit)
export(pareto_smooth)
export(pgeneralized_pareto)
export(pit)
export(ps_convergence_rate)
export(ps_khat_threshold)
Expand Down Expand Up @@ -532,6 +537,7 @@ export(summarise_draws)
export(summarize_draws)
export(thin_draws)
export(u_scale)
export(uniformity_test)
export(var)
export(variables)
export(variance)
Expand Down
80 changes: 76 additions & 4 deletions R/gpd.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,46 @@ qgeneralized_pareto <- function(p, mu = 0, sigma = 1, k = 0, lower.tail = TRUE,
q
}

#' Distribution function for the generalized Pareto distribution
#'
#' Computes the cumulative distribution function (CDF) for a generalized
#' Pareto distribution with location `mu`, scale `sigma`, and shape `k`.
#'
#' @param q Numeric vector of quantiles.
#' @param mu Location parameter.
#' @param sigma Scale parameter (must be positive).
#' @param k Shape parameter.
#' @param lower.tail Logical; if `TRUE` (default), probabilities are `P[X <= x]`.
#' @param log.p Logical; if `TRUE`, probabilities are returned as `log(p)`.
#' @return A numeric vector of probabilities.
#' @keywords internal
#' @export
#' @examples
#' pgeneralized_pareto(q = c(1, 2, 5), mu = 0, sigma = 1, k = 0.2)
pgeneralized_pareto <- function(q, mu = 0, sigma = 1, k = 0, lower.tail = TRUE, log.p = FALSE) {
stopifnot(length(mu) == 1 && length(sigma) == 1 && length(k) == 1)
if (is.na(sigma) || sigma <= 0) {
return(rep(NaN, length(q)))
Comment thread
florence-bockting marked this conversation as resolved.
}
z <- (q - mu) / sigma
if (abs(k) < 1e-15) {
# for very small values of indistinguishable in floating point accuracy from the case k=0
p <- -expm1(-z)
} else {
# pmax handles values outside the support
p <- -expm1(log1p(pmax(k * z, -1)) / -k)
}
# force to [0, 1] for values outside the support
p <- pmin(pmax(p, 0), 1)
if (!lower.tail) {
p <- 1 - p
}
if (log.p) {
p <- log(p)
}
p
}

#' Estimate parameters of the Generalized Pareto distribution
#'
#' Given a sample \eqn{x}, Estimate the parameters \eqn{k} and
Expand All @@ -57,6 +97,10 @@ qgeneralized_pareto <- function(p, mu = 0, sigma = 1, k = 0, lower.tail = TRUE,
#' fitting algorithm is to sort the elements of `x`. If `x` is
#' already sorted in ascending order then `sort_x` can be set to
#' `FALSE` to skip the initial sorting step.
#' @param weights An optional numeric vector of positive weights the same
#' length as `x`. If `NULL` (the default), all observations are
#' weighted equally and the result is identical to the unweighted fit.
#' Weights are normalized internally to sum to `length(x)`.
#' @return A named list with components `k` and `sigma`.
#'
#' @details Here the parameter \eqn{k} is the negative of \eqn{k} in Zhang &
Expand All @@ -69,24 +113,52 @@ qgeneralized_pareto <- function(p, mu = 0, sigma = 1, k = 0, lower.tail = TRUE,
#'
#' @keywords internal
#' @export
gpdfit <- function(x, wip = TRUE, min_grid_pts = 30, sort_x = TRUE) {
gpdfit <- function(x, wip = TRUE, min_grid_pts = 30, sort_x = TRUE,
weights = NULL) {
# see section 4 of Zhang and Stephens (2009)
if (sort_x) {
x <- sort.int(x)
if (!is.null(weights)) {
ord <- sort.int(x, index.return = TRUE)
x <- ord$x
weights <- weights[ord$ix]
} else {
x <- sort.int(x)
}
}
N <- length(x)

# normalize weights to sum to N so the log-likelihood scale is preserved
if (!is.null(weights)) {
weights <- weights / sum(weights) * N
}

prior <- 3
M <- min_grid_pts + floor(sqrt(N))
jj <- seq_len(M)
xstar <- x[floor(N / 4 + 0.5)] # first quartile of sample
if (xstar > x[1]) {
# first quantile is bigger than the minimum
theta <- 1 / x[N] + (1 - sqrt(M / (jj - 0.5))) / prior / xstar
k <- matrixStats::rowMeans2(log1p(-theta %o% x))

# log1p(-theta %o% x) is M x N matrix
log1p_mat <- log1p(-theta %o% x)

if (!is.null(weights)) {
# weighted mean across observations for each theta value
k <- drop(log1p_mat %*% weights) / N
} else {
k <- matrixStats::rowMeans2(log1p_mat)
}

l_theta <- N * (log(-theta / k) - k - 1) # profile log-lik
w_theta <- exp(l_theta - matrixStats::logSumExp(l_theta)) # normalize
theta_hat <- sum(theta * w_theta)
k_hat <- mean.default(log1p(-theta_hat * x))

if (!is.null(weights)) {
k_hat <- sum(weights * log1p(-theta_hat * x)) / N
} else {
k_hat <- mean.default(log1p(-theta_hat * x))
}
sigma_hat <- -k_hat / theta_hat

# adjust k_hat based on weakly informative prior, Gaussian centered on 0.5.
Expand Down
Loading
Loading