stan-dev · avehtari · Mar 26, 2026 · Feb 18, 2026 · Feb 19, 2026 · Feb 19, 2026
diff --git a/.gitignore b/.gitignore
@@ -8,3 +8,9 @@ inst/doc
 cran-comments.md
 CRAN-RELEASE
 CRAN-SUBMISSION
+
+# agent skills (Posit/Cursor skills installation)
+.agent/
+.agents/
+skills/
+skills-lock.json
diff --git a/NAMESPACE b/NAMESPACE
@@ -241,6 +241,9 @@ S3method(pareto_khat_threshold,default)
 S3method(pareto_khat_threshold,rvar)
 S3method(pareto_min_ss,default)
 S3method(pareto_min_ss,rvar)
+S3method(pareto_pit,default)
+S3method(pareto_pit,draws_matrix)
+S3method(pareto_pit,rvar)
 S3method(pareto_smooth,default)
 S3method(pareto_smooth,rvar)
 S3method(pillar_shaft,rvar)
@@ -481,7 +484,9 @@ export(pareto_diags)
 export(pareto_khat)
 export(pareto_khat_threshold)
 export(pareto_min_ss)
+export(pareto_pit)
 export(pareto_smooth)
+export(pgeneralized_pareto)
 export(pit)
 export(ps_convergence_rate)
 export(ps_khat_threshold)
@@ -532,6 +537,7 @@ export(summarise_draws)
 export(summarize_draws)
 export(thin_draws)
 export(u_scale)
+export(uniformity_test)
 export(var)
 export(variables)
 export(variance)

diff --git a/R/gpd.R b/R/gpd.R
@@ -33,6 +33,46 @@ qgeneralized_pareto <- function(p, mu = 0, sigma = 1, k = 0, lower.tail = TRUE,
   q
 }
 
+#' Distribution function for the generalized Pareto distribution
+#'
+#' Computes the cumulative distribution function (CDF) for a generalized
+#' Pareto distribution with location `mu`, scale `sigma`, and shape `k`.
+#'
+#' @param q Numeric vector of quantiles.
+#' @param mu Location parameter.
+#' @param sigma Scale parameter (must be positive).
+#' @param k Shape parameter.
+#' @param lower.tail Logical; if `TRUE` (default), probabilities are `P[X <= x]`.
+#' @param log.p Logical; if `TRUE`, probabilities are returned as `log(p)`.
+#' @return A numeric vector of probabilities.
+#' @keywords internal
+#' @export
+#' @examples
+#' pgeneralized_pareto(q = c(1, 2, 5), mu = 0, sigma = 1, k = 0.2)
+pgeneralized_pareto <- function(q, mu = 0, sigma = 1, k = 0, lower.tail = TRUE, log.p = FALSE) {
+  stopifnot(length(mu) == 1 && length(sigma) == 1 && length(k) == 1)
+  if (is.na(sigma) || sigma <= 0) {
+    return(rep(NaN, length(q)))
+  }
+  z <- (q - mu) / sigma
+  if (abs(k) < 1e-15) {
+    # for very small values of indistinguishable in floating point accuracy from the case k=0
+    p <- -expm1(-z)
+  } else {
+    # pmax handles values outside the support
+    p <- -expm1(log1p(pmax(k * z, -1)) / -k)
+  }
+  # force to [0, 1] for values outside the support
+  p <- pmin(pmax(p, 0), 1)
+  if (!lower.tail) {
+    p <- 1 - p
+  }
+  if (log.p) {
+    p <- log(p)
+  }
+  p
+}
+
 #' Estimate parameters of the Generalized Pareto distribution
 #'
 #' Given a sample \eqn{x}, Estimate the parameters \eqn{k} and
@@ -57,6 +97,10 @@ qgeneralized_pareto <- function(p, mu = 0, sigma = 1, k = 0, lower.tail = TRUE,
 #'   fitting algorithm is to sort the elements of `x`. If `x` is
 #'   already sorted in ascending order then `sort_x` can be set to
 #'   `FALSE` to skip the initial sorting step.
+#' @param weights An optional numeric vector of positive weights the same
+#'   length as `x`. If `NULL` (the default), all observations are
+#'   weighted equally and the result is identical to the unweighted fit.
+#'   Weights are normalized internally to sum to `length(x)`.
 #' @return A named list with components `k` and `sigma`.
 #'
 #' @details Here the parameter \eqn{k} is the negative of \eqn{k} in Zhang &
@@ -69,24 +113,52 @@ qgeneralized_pareto <- function(p, mu = 0, sigma = 1, k = 0, lower.tail = TRUE,
 #'
 #' @keywords internal
 #' @export
-gpdfit <- function(x, wip = TRUE, min_grid_pts = 30, sort_x = TRUE) {
+gpdfit <- function(x, wip = TRUE, min_grid_pts = 30, sort_x = TRUE,
+                   weights = NULL) {
   # see section 4 of Zhang and Stephens (2009)
   if (sort_x) {
-    x <- sort.int(x)
+    if (!is.null(weights)) {
+      ord <- sort.int(x, index.return = TRUE)
+      x <- ord$x
+      weights <- weights[ord$ix]
+    } else {
+      x <- sort.int(x)
+    }
   }
   N <- length(x)
+
+  # normalize weights to sum to N so the log-likelihood scale is preserved
+  if (!is.null(weights)) {
+    weights <- weights / sum(weights) * N
+  }
+
   prior <- 3
   M <- min_grid_pts + floor(sqrt(N))
   jj <- seq_len(M)
   xstar <- x[floor(N / 4 + 0.5)] # first quartile of sample
   if (xstar > x[1])  {
     # first quantile is bigger than the minimum
     theta <- 1 / x[N] + (1 - sqrt(M / (jj - 0.5))) / prior / xstar
-    k <- matrixStats::rowMeans2(log1p(-theta %o% x))
+
+    # log1p(-theta %o% x) is M x N matrix
+    log1p_mat <- log1p(-theta %o% x)
+
+    if (!is.null(weights)) {
+      # weighted mean across observations for each theta value
+      k <- drop(log1p_mat %*% weights) / N
+    } else {
+      k <- matrixStats::rowMeans2(log1p_mat)
+    }
+
     l_theta <- N * (log(-theta / k) - k - 1) # profile log-lik
     w_theta <- exp(l_theta - matrixStats::logSumExp(l_theta)) # normalize
     theta_hat <- sum(theta * w_theta)
-    k_hat <- mean.default(log1p(-theta_hat * x))
+
+    if (!is.null(weights)) {
+      k_hat <- sum(weights * log1p(-theta_hat * x)) / N
+    } else {
+      k_hat <- mean.default(log1p(-theta_hat * x))
+    }
     sigma_hat <- -k_hat / theta_hat
 
     # adjust k_hat based on weakly informative prior, Gaussian centered on 0.5.