Data Driven Smooth Tests with ddst package

This document presents tests avaliable in the ddst package.

library("ddst")
library("polynom")

II. Data Driven Smooth Tests for Selected Goodness-of-Fit Problems

1. Data Driven Smooth Test for Uniformity

set.seed(7)
# H0 is true
z <- runif(80)
t <- ddst.uniform.test(z, compute.p = TRUE,
                       compute.cv = TRUE,
                       d.n = 10)
t

## 
##  Data Driven Smooth Test for Uniformity
## 
## data:  z, base: ddst.base.legendre  c: 2.4  d.n: 10  cv(0.05) : 0.0045668)
## WT = 0.31054, T = 1, p-value = 0.5989

plot(t)

# H0 is false
z <- rbeta(80,4,2)
t <- ddst.uniform.test(z, compute.p = TRUE,compute.cv = TRUE,  d.n = 10)
t

## 
##  Data Driven Smooth Test for Uniformity
## 
## data:  z, base: ddst.base.legendre  c: 2.4  d.n: 10  cv(0.05) : 0.0044916)
## WT = 40.553, T = 3, p-value = 8e-05

t$p.value

## [1] 8e-05

plot(t)

2. Data Driven Smooth Test for Exponentiality

set.seed(7)
# H0 is true
z <- rexp(80,4)
t <- ddst.exp.test(z, compute.p = TRUE, d.n = 10)
t

## 
##  Data Driven Smooth Test for Expotentiality
## 
## data:  z, base: ddst.base.legendre, c: 100, d.n: 10
## W*T* = 2.1593, T* = 1, p-value = 0.1748

plot(t)

# H0 is false
z = rchisq(80,4)
t = ddst.exp.test (z, compute.p = TRUE, d.n = 10)
t

## 
##  Data Driven Smooth Test for Expotentiality
## 
## data:  z, base: ddst.base.legendre, c: 100, d.n: 10
## W*T* = 15.543, T* = 1, p-value = 0.00484

t$p.value

## [1] 0.00484

plot(t)

3. Data Driven Smooth Tests for Normality

3.1. Bounded Basis Functions

set.seed(7)
# H0 is true
z <- rnorm(100)
# let's look on first 10 coordinates
d.n <- 10
t <- ddst.normbounded.test(z, compute.p = TRUE, d.n = d.n)
t

## 
##  Data Driven Smooth Test for Normality - Bounded Basis Functions
## 
## data:  z, base: ddst.base.legendre, c: 100, d.n: 10
## W*T* = 2.2232, T* = 1, p-value = 0.1581

plot(t)

# H0 is false
z <- rexp(100, 1)
t <- ddst.normbounded.test(z, compute.p = TRUE, d.n = d.n)
t

## 
##  Data Driven Smooth Test for Normality - Bounded Basis Functions
## 
## data:  z, base: ddst.base.legendre, c: 100, d.n: 10
## W*T* = 299.45, T* = 10, p-value < 2.2e-16

plot(t)

# for Tephra data
z <- c(-1.748789, -1.75753, -1.740102, -1.740102, -1.731467, -1.765523,
     -1.761521, -1.72522, -1.80371, -1.745624, -1.872957, -1.729121,
     -1.81529, -1.888637, -1.887761, -1.881645, -1.91518, -1.849769,
     -1.755141, -1.665687, -1.764721, -1.736171, -1.736956, -1.737742,
     -1.687537, -1.804534, -1.790593, -1.808661, -1.784081, -1.729903,
     -1.711263, -1.748789, -1.772755, -1.72756, -1.71358, -1.821116,
     -1.839588, -1.839588, -1.830321, -1.807835, -1.747206, -1.788147,
     -1.759923, -1.786519, -1.726779, -1.738528, -1.754345, -1.781646,
     -1.641949, -1.755936, -1.775175, -1.736956, -1.705103, -1.743255,
     -1.82613, -1.826967, -1.780025, -1.684504, -1.751168)
t <- ddst.normbounded.test(z, compute.p = TRUE, Dmax = d.n)
t

## 
##  Data Driven Smooth Test for Normality - Bounded Basis Functions
## 
## data:  z, base: ddst.base.legendre, c: 100, d.n: 10
## W*T* = 3.7615, T* = 1, p-value = 0.08891

plot(t)

3.2. Unbounded Basis Functions

set.seed(7)
# H0 is true
z <- rnorm(100)
# let's look on first 10 coordinates
d.n <- 10

# calculate finite sample corrections
# see 6.2. Composite null hypothesis H in the appendix materials
e.v <- ddst.normunbounded.bias(n = length(z), d.n = d.n)
e.v

## $e.0
##              [,1]       [,2]         [,3]       [,4]        [,5]      [,6]
## [1,] -0.003118967 -0.6935202 -0.002956126 -0.6265916 0.005252232 0.2493225
##              [,7]      [,8]        [,9]      [,10]
## [1,] -0.005572803 0.1643331 0.005080106 -0.3043065
## 
## $v.0
##           [,1]      [,2]      [,3]      [,4]      [,5]      [,6]      [,7]
## [1,] 0.9478394 0.8147186 0.6556153 0.5867975 0.6271109 0.5654675 0.5199736
##           [,8]      [,9]    [,10]
## [1,] 0.5518196 0.4929518 0.489381

# simulated 1-alpha qunatiles, s(n, alpha) 
# see Table 1 in the JSCS article
s.n.alpha <- 4.4
r.alpha <- 2.708

t <- ddst.normubounded.test(z, d.n, e.v$e.0, e.v$v.0, r.alpha, s.n.alpha)
t

## 
##  Data Driven Smooth Test for Normality - Unbounded Basis Functions
## 
## data:  z, d.n: 10, r.alpha: 2.708, s.n.alpha: 4.4
## NAs = 2.7435, As = 1

plot(t)

# H0 is false, same lenght n = 100
z <- rexp(100, 1)

t <- ddst.normubounded.test(z, d.n, e.v$e.0, e.v$v.0, r.alpha, s.n.alpha)
t

## 
##  Data Driven Smooth Test for Normality - Unbounded Basis Functions
## 
## data:  z, d.n: 10, r.alpha: 2.708, s.n.alpha: 4.4
## NAs = 249.08, As = 9

plot(t)

# for Tephra data
z <- c(-1.748789, -1.75753, -1.740102, -1.740102, -1.731467, -1.765523,
       -1.761521, -1.72522, -1.80371, -1.745624, -1.872957, -1.729121,
       -1.81529, -1.888637, -1.887761, -1.881645, -1.91518, -1.849769,
       -1.755141, -1.665687, -1.764721, -1.736171, -1.736956, -1.737742,
       -1.687537, -1.804534, -1.790593, -1.808661, -1.784081, -1.729903,
       -1.711263, -1.748789, -1.772755, -1.72756, -1.71358, -1.821116,
       -1.839588, -1.839588, -1.830321, -1.807835, -1.747206, -1.788147,
       -1.759923, -1.786519, -1.726779, -1.738528, -1.754345, -1.781646,
       -1.641949, -1.755936, -1.775175, -1.736956, -1.705103, -1.743255,
       -1.82613, -1.826967, -1.780025, -1.684504, -1.751168)

# calculate finite sample corrections
e.v <- ddst.normunbounded.bias(n = length(z))
e.v

## $e.0
##              [,1]       [,2]          [,3]       [,4]        [,5]      [,6]
## [1,] -0.001745287 -0.7812591 -0.0003146211 -0.5289889 0.003045283 0.3671568
##              [,7]         [,8]       [,9]      [,10]        [,11]     [,12]
## [1,] -0.005166266 -0.006556786 0.00614837 -0.2153776 -0.005973216 0.2596683
##           [,13]      [,14]        [,15]      [,16]       [,17]   [,18]
## [1,] 0.00490113 -0.1825443 -0.003298743 0.05528399 0.001527895 0.06612
##            [,19]      [,20]
## [1,] 0.000115756 -0.1487876
## 
## $v.0
##           [,1]      [,2]      [,3]      [,4]      [,5]      [,6]      [,7]
## [1,] 0.9422175 0.7691471 0.6028542 0.5825068 0.6072724 0.5109567 0.5263274
##          [,8]      [,9]     [,10]     [,11]     [,12]     [,13]     [,14]
## [1,] 0.524183 0.4622242 0.4965074 0.4581659 0.4443366 0.4631899 0.4147061
##          [,15]     [,16]     [,17]     [,18]     [,19]     [,20]
## [1,] 0.4448983 0.4167827 0.4114816 0.4252005 0.3854767 0.4197615

# simulated 1-alpha qunatiles, s(n, alpha) and  s.o(n, alpha)
# see Table 1 in the JSCS article
s.n.alpha <- 3.3
r.alpha <- 2.142

t <- ddst.normubounded.test(z, d.n, e.v$e.0, e.v$v.0, r.alpha, s.n.alpha)
t

## 
##  Data Driven Smooth Test for Normality - Unbounded Basis Functions
## 
## data:  z, d.n: 10, r.alpha: 2.142, s.n.alpha: 3.3
## NAs = 66.704, As = 17

plot(t)

4. Data Driven Smooth Test for Extreme Value Distribution

library(evd)
set.seed(7)


# H0 is true
x <- -qgumbel(runif(100), -1, 1)
t <- ddst.evd.test (x, compute.p = TRUE, d.n = 10)
t

## 
##  Data Driven Smooth Test for Extreme Values
## 
## data:  x, base: ddst.base.legendre, c: 100, d.n: 10
## W*T* = 1.4531, T* = 1, p-value = 0.9941

plot(t)

# H0 is false
x <- rexp(80,4)
t <- ddst.evd.test (x, compute.p = TRUE, d.n = 10)
t

## 
##  Data Driven Smooth Test for Extreme Values
## 
## data:  x, base: ddst.base.legendre, c: 100, d.n: 10
## W*T* = 11731652, T* = 10, p-value < 2.2e-16

plot(t)

III. Nonparametric Data Driven Smooth Tests for Comparing Distributions

5. Data Driven Smooth Test for k-Sample Problem

set.seed(7)
# H0 is false
x <- runif(80)
y <- rexp(80, 1)
t <- ddst.twosample.test(x, y, compute.p = TRUE)
t

## 
##  Data Driven Smooth Test for Two-Sample Problem
## 
## data:  listxy, c: 2, d.n: 10
## WT = 36.108, T = (6, 6)

plot(t)

# H0 is false
x <- runif(80)
y <- rexp(80, 1)
z <- runif(80)
t <- ddst.ksample.test(list(x, y, z))
t

## 
##  Data Driven Smooth Test for Two-Sample Problem
## 
## data:  listxyz, c: 2.3, d.n: 10
## WT = 66.196, T = (3, 7, 1)

plot(t)

# H0 is true
x <- rnorm(80)
y <- rnorm(80)
z <- rnorm(80)
t <- ddst.ksample.test(list(x, y, z))
t

## 
##  Data Driven Smooth Test for Two-Sample Problem
## 
## data:  listxyz, c: 2.3, d.n: 10
## WT = 0.063224, T = (1, 1, 1)

plot(t)

6. Data Driven Test for Stochastic Ordering

set.seed(7)
library("rmutil", warn.conflicts = FALSE)
# H0 is false
# 1. Pareto(1)/Pareto(1.5)
x <- rpareto(50, 2, 2)
y <- rpareto(50, 1.5, 1.5)
t <- ddst.forstochdom.test(x, y, t = 2.2, K.N = 4)
t

## 
##  Data Driven Stochastic Ordering Test
## 
## data:  x y, t: 2.2, K.N: 4
## QT = 124.46, T = 5

plot(t)

# H0 is false
# 2. Laplace(0,1)/Laplace(1,25)
x <- rlaplace(50, 0, 1)
y <- rlaplace(50, 1, 25)
t <- ddst.forstochdom.test(x, y, t = 2.2, K.N = 4)
t

## 
##  Data Driven Stochastic Ordering Test
## 
## data:  x y, t: 2.2, K.N: 4
## QT = 129.93, T = 5

plot(t)

# H0 is true
# 3. LN(0.85,0.6)/LN(1.2,0.2)
x <- rlnorm(50, 0.85, 0.6)
y <- rlnorm(50, 1.2, 0.2)
t <- ddst.forstochdom.test(x, y, t = 2.2, K.N = 4)
t

## 
##  Data Driven Stochastic Ordering Test
## 
## data:  x y, t: 2.2, K.N: 4
## QT = 0, T = 1

plot(t)

7. Two-Sample Test Against Stochastic Dominance

set.seed(7)
# H0 is true
x <- runif(80)
y <- runif(80)
t <- ddst.againststochdom.test(x, y, alpha = 0.05, t = 2.2, k.N = 4)
t

## 
##  Data Driven Test Against Stochastic Dominance
## 
## data:  x, alpha: 0.05, t: 2.2, k.N: 4
## VT = 0.9, T = 1

plot(t)

# H0 is false
x <- runif(80)
y <- rbeta(80, 4, 2)
t <- ddst.againststochdom.test(x, y, alpha = 0.05, t = 2.2, k.N = 4)
t

## 
##  Data Driven Test Against Stochastic Dominance
## 
## data:  x, alpha: 0.05, t: 2.2, k.N: 4
## VT = 41.6, T = 5

plot(t)

8. Data Driven Smooth Test for Upward Trend Alternative in k samples

set.seed(7)
# H0 is true
x = runif(80)
y = runif(80) + 0.2
z = runif(80) + 0.4
t <- ddst.upwardtrend.test(list(x, y, z), t.p = 2.2, t.n = 2.2)
t

## 
##  Data Driven k-Sample Upward Trend Test
## 
## data:  listxyz, t.p: 2.2, t.n: 2.2
## U.T = 1105, p = 3

plot(t)

# H0 is false
x1 = rnorm(80)
x2 = rnorm(80) + 2
x3 = rnorm(80) + 4
x4 = rnorm(80) + 3
t <- ddst.upwardtrend.test(list(x1, x2, x3, x4), t.p = 2.2, t.n = 2.2)
t

## 
##  Data Driven k-Sample Upward Trend Test
## 
## data:  listx1x2x3x4, t.p: 2.2, t.n: 2.2
## U.T = -3401.6, p = 4

plot(t)

9. Data Driven Smooth Test for Umbrella Alternatives in k samples

set.seed(7)
# H0 is true
x = runif(80)
y = runif(80) + 0.2
z = runif(80)
t <- ddst.umbrellaknownp.test(list(x, y, z), p = 2, t.p = 2.2, t.n = 2.2)
t

## 
##  Data Driven k-Sample Umbrella Test
## 
## data:  listxyz, t.p: 2.2, t.n: 2.2, p: 2
## U.T = 412.82, p = 2

plot(t)

# H0 is true
x1 = rnorm(80)
x2 = rnorm(80) + 2
x3 = rnorm(80) + 4
x4 = rnorm(80) + 3
x5 = rnorm(80) + 2
x6 = rnorm(80) + 1
x7 = rnorm(80)
t <- ddst.umbrellaknownp.test(list(x1, x2, x3, x4, x5, x6, x7), p = 3, t.p = 2.2, t.n = 2.2)
t

## 
##  Data Driven k-Sample Umbrella Test
## 
## data:  listx1x2x3x4x5x6x7, t.p: 2.2, t.n: 2.2, p: 3
## U.T = 4662.7, p = 3

plot(t)

t <- ddst.umbrellaknownp.test(list(x1, x2, x3, x4, x5, x6, x7), p = 5, t.p = 2.2, t.n = 2.2)
t

## 
##  Data Driven k-Sample Umbrella Test
## 
## data:  listx1x2x3x4x5x6x7, t.p: 2.2, t.n: 2.2, p: 5
## U.T = -3101, p = 5

plot(t)

Unknown peak

# true umbrella pattern
x1 = rnorm(80)
x2 = rnorm(80) + 2
x3 = rnorm(80) + 4
x4 = rnorm(80) + 3
x5 = rnorm(80) + 2
x6 = rnorm(80) + 1
x7 = rnorm(80)

##  peak is unknown, so we test for each possible position
statistics <- sapply(1:7, function(i)
ddst.umbrellaknownp.test(list(x1, x2, x3, x4, x5, x6, x7), p = i, t.p = 2.2, t.n = 2.2)$statistic)

statistics

##       U.T       U.T       U.T       U.T       U.T       U.T       U.T 
## -3887.927 -4291.024  4627.885 -3936.036 -3162.127 -2773.183 -2589.604

max(statistics)

## [1] 4627.885

Examples for ddst package

Przemyslaw Biecek

July 26, 2019

Data Driven Smooth Tests with ddst package

II. Data Driven Smooth Tests for Selected Goodness-of-Fit Problems

1. Data Driven Smooth Test for Uniformity

2. Data Driven Smooth Test for Exponentiality

3. Data Driven Smooth Tests for Normality

3.1. Bounded Basis Functions

3.2. Unbounded Basis Functions

4. Data Driven Smooth Test for Extreme Value Distribution

III. Nonparametric Data Driven Smooth Tests for Comparing Distributions

5. Data Driven Smooth Test for k-Sample Problem

6. Data Driven Test for Stochastic Ordering

7. Two-Sample Test Against Stochastic Dominance

8. Data Driven Smooth Test for Upward Trend Alternative in k samples

9. Data Driven Smooth Test for Umbrella Alternatives in k samples

Unknown peak