diff --git a/NAMESPACE b/NAMESPACE index d75f824..c1a7cea 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1 +1,2 @@ exportPattern("^[[:alpha:]]+") +importFrom("stats", "quantile") diff --git a/R/meanimpute.R b/R/meanimpute.R index cc7cf5e..f7f79e5 100644 --- a/R/meanimpute.R +++ b/R/meanimpute.R @@ -1,4 +1,5 @@ #' Meanimputation +#' @param x A vector #' @export meanimpute <- function(x) { x[is.na(x)] <- mean(x, na.rm = TRUE) diff --git a/R/transform_log.R b/R/transform_log.R new file mode 100644 index 0000000..004f883 --- /dev/null +++ b/R/transform_log.R @@ -0,0 +1,19 @@ +#' transform_log +#' Transform numerical values into their log values +#' @param x A vector +#' @return logarithm of x +#' @export +#' @examples +#' transform_log(1) +#' transform_log(c(1, 2, 3, 4, 5)) +#' +transform_log <- function(x) { + if (!is.numeric(x)) { + warning("transform_log: Expecting numeric argument") + } + x_badval <- is.na(suppressWarnings(as.numeric(x))) + x[x_badval] <- 1 + y <- log(as.numeric(x)) + y[x_badval] <- NA + y +} \ No newline at end of file diff --git a/R/windsorize.R b/R/windsorize.R index b4e15e6..de18782 100644 --- a/R/windsorize.R +++ b/R/windsorize.R @@ -1,10 +1,30 @@ #' Windsorize #' -#' Do some windsorization. +#' set all outliers to a specified percentile of the data; +#' a 90% winsorization would see all data below the 5th percentile set +#' to the 5th percentile, +#' and data above the 95th percentile set to the 95th percentile. +#' +#' @param x A vector. +#' @param p A quantile. +#' @return dataset with trimmed outliers with 10% percentile +#' @examples +#' windsorize(c(3,4,4,3,4,5,1)) #' @export windsorize <- function(x, p = .90) { - q <- quantile(x, p) - x[x >= q] <- q + if (length(x) == 0) stop("argument should not be a empty vector") + if (all(is.na(x))) { + stop("argument should not be a vector containing only NA") + } + if (!is.numeric(x)) stop("argument should be a numeric vector") + if (!is.numeric(p)) stop("argument should be a number from 0 to 1") + if (p < 0 || p > 1) { + stop("p invalid percentale. Expected values from 0 to 1") + } + q_lower <- quantile(x, (1-p)/2) + q_upper <- quantile(x, 1 - (1-p)/2) + x[x <= q_lower] <- q_lower + x[x >= q_upper] <- q_upper x } diff --git a/man/meanimpute.Rd b/man/meanimpute.Rd index 8139e8f..77f2fbc 100644 --- a/man/meanimpute.Rd +++ b/man/meanimpute.Rd @@ -6,6 +6,9 @@ \usage{ meanimpute(x) } +\arguments{ +\item{x}{A vector} +} \description{ Meanimputation } diff --git a/man/transform_log.Rd b/man/transform_log.Rd new file mode 100644 index 0000000..c5b287c --- /dev/null +++ b/man/transform_log.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/transform_log.R +\name{transform_log} +\alias{transform_log} +\title{transform_log +Transform numerical values into their log values} +\usage{ +transform_log(x) +} +\arguments{ +\item{x}{A vector} +} +\value{ +logarithm of x +} +\description{ +transform_log +Transform numerical values into their log values +} +\examples{ +transform_log(1) +transform_log(c(1, 2, 3, 4, 5)) + +} diff --git a/man/windsorize.Rd b/man/windsorize.Rd index 832c3cb..2a0ff5d 100644 --- a/man/windsorize.Rd +++ b/man/windsorize.Rd @@ -6,6 +6,20 @@ \usage{ windsorize(x, p = 0.9) } +\arguments{ +\item{x}{A vector.} + +\item{p}{A quantile.} +} +\value{ +dataset with trimmed outliers with 10% percentile +} \description{ -Do some windsorization. +set all outliers to a specified percentile of the data; +a 90% winsorization would see all data below the 5th percentile set +to the 5th percentile, +and data above the 95th percentile set to the 95th percentile. +} +\examples{ +windsorize(c(3,4,4,3,4,5,1)) } diff --git a/tests/testthat.R b/tests/testthat.R new file mode 100644 index 0000000..8adbfff --- /dev/null +++ b/tests/testthat.R @@ -0,0 +1,4 @@ +library(testthat) +library(datacleaner) + +test_check("datacleaner") diff --git a/tests/testthat/test_tranform_log.R b/tests/testthat/test_tranform_log.R new file mode 100644 index 0000000..70bae1c --- /dev/null +++ b/tests/testthat/test_tranform_log.R @@ -0,0 +1,11 @@ +context("tranform_log") +library(datacleaner) +test_that("tranform_log is correct", { + expect_equal(as.character(transform_log(c(2, 2, 2, 2, 3))), as.character(c(0.693147180559945, 0.693147180559945, 0.693147180559945, 0.693147180559945, 1.09861228866811))) + expect_equal(as.character(transform_log(c(2, 3, NA))), as.character(c(0.693147180559945, 1.09861228866811, NA))) +}) + +test_that("unexpected parameters", { + expect_warning(t <- transform_log(c(2, 3, "NA")), "transform_log: Expecting numeric argument") + expect_equal(as.character(t), as.character(c(0.693147180559945, 1.09861228866811, NA))) +}) \ No newline at end of file diff --git a/tests/testthat/test_windorize.R b/tests/testthat/test_windorize.R new file mode 100644 index 0000000..5e0beb4 --- /dev/null +++ b/tests/testthat/test_windorize.R @@ -0,0 +1,17 @@ +context("Windsorize") +library(datacleaner) +test_that("windorizing is correct", { + expect_equal(windsorize(c(2,2,2,2,3),.9), c(2,2,2,2,2.8) ) + expect_equal(windsorize(c(2,2,2,2,1),.9), c(2,2,2,2,1.2) ) +}) + +test_that("unexpected parameters", { + expect_error(windsorize(c(NA,NA,NA), .9), "argument should not be a vector containing only NA") + expect_error(windsorize(c(), .9), "argument should not be a empty vector") + expect_error(windsorize(c(1,2,3,"4"), .9), "argument should be a numeric vector") + expect_error(windsorize(c(1,2,3,4), ".9"), "argument should be a number from 0 to 1") + expect_error(windsorize(c(1,2,3,4), -1), "p invalid percentale. Expected values from 0 to 1") + expect_error(windsorize(c(1,2,3,4), 1.2), "p invalid percentale. Expected values from 0 to 1") + expect_error(windsorize(c(1,2,3,NA), .9)) + +})