Skip to content

Commit 313a007

Browse files
committed
add deepest regression estimator (#13)
1 parent f7bac83 commit 313a007

9 files changed

Lines changed: 163 additions & 36 deletions

File tree

CHANGELOG.md

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,12 @@
1-
# v0.10.2 (Upcoming Release)
1+
# v0.11.1 (Upcoming Release)
2+
3+
4+
# v0.11.0
5+
6+
- Deepest Regression Estimator added.
7+
8+
9+
# v0.10.2
210

311
- mahalanobisSquaredBetweenPairs() return Union{Nothing, Matrix} depending on the determinant of the covariance matrix
412
- mahalanobisSquaredMatrix() returns Union{Nothing, Matrix} depending on the determinant of the covariance matrix

Project.toml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "LinRegOutliers"
22
uuid = "6d4de0fb-32d9-4c65-aac1-cc9ed8b94b1a"
33
authors = ["Mehmet Hakan Satman <mhsatman@gmail.com>", "Shreesh Adiga <16567adigashreesh@gmail.com>", "Guillermo Angeris <angeris@stanford.edu>", "Emre Akadal <emre.akadal@istanbul.edu.tr>"]
4-
version = "0.10.2"
4+
version = "0.11.0"
55

66
[deps]
77
Clustering = "aaaa29a8-35af-508c-8bc3-b662a17a0fe5"
@@ -14,6 +14,7 @@ LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
1414
PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a"
1515
Requires = "ae029012-a4dd-5104-9daa-d747884805df"
1616
StatsModels = "3eaba693-59b7-5ba5-a881-562e759f1c8d"
17+
mrfDepth_jll = "53656f53-9700-50e7-bf9c-d3aea1338d1b"
1718

1819
[compat]
1920
Clustering = "0.12.2, 0.13, 0.14, 0.15"
@@ -26,6 +27,7 @@ PrecompileTools = "1"
2627
Requires = "1"
2728
StatsModels = "0.4, 0.5, 0.6, 0.7"
2829
julia = "1.4"
30+
mrfDepth_jll = "1.0.14"
2931

3032
[extras]
3133
Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"

README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,11 +38,12 @@ A Julia package for outlier detection in linear regression.
3838
- Hadi (1994) Algorithm
3939
- Chatterjee & Mächler (1997)
4040
- Theil-Sen estimator for multiple regression
41+
- Deepest Regression Estimator
4142
- Summary
4243

4344

4445
## Unimplemented Methods
45-
- Depth based estimators (Regression depth, deepest regression, etc.) See [#13](https://github.com/jbytecode/LinRegOutliers/issues/13) for the related issue.
46+
4647
- Pena & Yohai (1999). See [#25](https://github.com/jbytecode/LinRegOutliers/issues/25) for the related issue.
4748

4849

docs/src/algorithms.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,10 @@ LinRegOutliers.quantileregression
135135
LinRegOutliers.theilsen
136136
```
137137

138+
## Deepest Regression Estimator
139+
```@docs
140+
LinRegOutliers.deepestregression
141+
```
138142

139143

140144

src/LinRegOutliers.jl

Lines changed: 38 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -5,33 +5,33 @@ using Requires
55
# After the module is loaded, we check if Plots is installed and loaded.
66
# If Plots is installed and loaded, we load the corresponding modules.
77
function __init__()
8-
@require Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" begin
9-
10-
import .Plots: RGBX
8+
@require Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" begin
119

12-
include("mveltsplot.jl")
13-
include("dataimage.jl")
14-
include("bchplot.jl")
10+
import .Plots: RGBX
1511

16-
import .MVELTSPlot: mveltsplot
17-
import .DataImage: dataimage
18-
import .BCHPlot: bchplot
19-
20-
export mveltsplot, dataimage, bchplot, RGBX
12+
include("mveltsplot.jl")
13+
include("dataimage.jl")
14+
include("bchplot.jl")
2115

22-
end
16+
import .MVELTSPlot: mveltsplot
17+
import .DataImage: dataimage
18+
import .BCHPlot: bchplot
19+
20+
export mveltsplot, dataimage, bchplot, RGBX
21+
22+
end
2323
end
2424

2525
# Basis
2626
include("basis.jl")
2727
import .Basis:
28-
RegressionSetting,
29-
createRegressionSetting,
30-
@extractRegressionSetting,
31-
applyColumns,
32-
find_minimum_nonzero,
33-
designMatrix,
34-
responseVector
28+
RegressionSetting,
29+
createRegressionSetting,
30+
@extractRegressionSetting,
31+
applyColumns,
32+
find_minimum_nonzero,
33+
designMatrix,
34+
responseVector
3535
export RegressionSetting
3636
export createRegressionSetting
3737
export designMatrix
@@ -65,20 +65,20 @@ import .OrdinaryLeastSquares: OLS, ols, wls, residuals, predict, coef
6565
# Regression diagnostics
6666
include("diagnostics.jl")
6767
import .Diagnostics:
68-
dffit,
69-
dffits,
70-
dfbeta,
71-
dfbetas,
72-
hatmatrix,
73-
studentizedResiduals,
74-
adjustedResiduals,
75-
jacknifedS,
76-
cooks,
77-
cooksoutliers,
78-
mahalanobisSquaredMatrix,
79-
covratio,
80-
hadimeasure,
81-
diagnose
68+
dffit,
69+
dffits,
70+
dfbeta,
71+
dfbetas,
72+
hatmatrix,
73+
studentizedResiduals,
74+
adjustedResiduals,
75+
jacknifedS,
76+
cooks,
77+
cooksoutliers,
78+
mahalanobisSquaredMatrix,
79+
covratio,
80+
hadimeasure,
81+
diagnose
8282

8383

8484
# Hadi & Simonoff (1993) algorithm
@@ -205,6 +205,10 @@ import .CM97: cm97
205205
include("theilsen.jl")
206206
import .TheilSen: theilsen
207207

208+
# Deepest Regression Estimator
209+
include("deepestregression.jl")
210+
import .DeepestRegression: deepestregression
211+
208212
# All-in-one
209213
include("summary.jl")
210214
import .Summary: detectOutliers
@@ -267,6 +271,7 @@ export atkinson94, atkinsonstalactiteplot, generate_stalactite_plot
267271
export bacon
268272
export cm97
269273
export theilsen
274+
export deepestregression
270275

271276

272277
# Snoop-Precompile

src/deepestregression.jl

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
module DeepestRegression
2+
3+
export deepestregression
4+
5+
import ..Basis:
6+
RegressionSetting, @extractRegressionSetting, designMatrix, responseVector
7+
8+
using mrfDepth_jll: mrfDepth_jll
9+
10+
11+
"""
12+
deepestregression(setting; maxit = 1000)
13+
14+
Estimate Deepest Regression paramaters.
15+
16+
17+
# Arguments
18+
- `setting::RegressionSetting`: RegressionSetting object with a formula and dataset.
19+
- `maxit`: Maximum number of iterations
20+
21+
# Description
22+
Estimates Deepest Regression Estimator coefficients.
23+
24+
# References
25+
Van Aelst S., Rousseeuw P.J., Hubert M., Struyf A. (2002). The
26+
deepest regression method. Journal of Multivariate Analysis,
27+
81, 138-166.
28+
29+
30+
# Output
31+
- `betas`: Vector of regression coefficients estimated.
32+
"""
33+
function deepestregression(setting::RegressionSetting; maxit::Int = 10000)
34+
X = designMatrix(setting)
35+
y = responseVector(setting)
36+
if all(x -> isone(x), X[:, 1])
37+
X = X[:, 2:end]
38+
end
39+
return deepestregression(X, y, maxit = maxit)
40+
end
41+
42+
function deepestregression(X::Matrix{Float64}, y::Vector{Float64}; maxit::Int = 10000)::Vector{Float64}
43+
drdata = hcat(X, y)
44+
n, p = size(drdata)
45+
n = Int32(n)
46+
p = Int32(p)
47+
betas = zeros(Float64, p)
48+
maxit = Int32(maxit)
49+
iter = Int32(1)
50+
MDEPAPPR = Int32(p)
51+
ccall((:sweepmedres_, mrfDepth_jll.libmrfDepth),
52+
Cint,
53+
(Ref{Float64}, # X
54+
Ref{Int32}, # n
55+
Ref{Int32}, # np
56+
Ref{Float64}, # betas
57+
Ref{Cint}, # maxit
58+
Ref{Cint}, # iter
59+
Ref{Cint}, # MDEPAPPR
60+
), drdata, n, p, betas, maxit, iter, MDEPAPPR)
61+
62+
return vcat(betas[end], betas[1:(end-1)])
63+
end
64+
65+
66+
end # end of module DeepestRegression

src/precompile/precompile.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,5 +26,6 @@ using PrecompileTools
2626
smr98(reg)
2727
ransac(reg, t = 0.8, w = 0.85)
2828
theilsen(reg, 2, nsamples = 10)
29+
deepestregression(reg)
2930
end
3031
end

test/runtests.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,3 +36,4 @@ include("testbacon2000.jl")
3636
include("testdataimage.jl")
3737
include("testtheilsen.jl")
3838
include("testsummary.jl")
39+
include("testdeepestregression.jl")

test/testdeepestregression.jl

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
import LinRegOutliers: DataSets
2+
3+
@testset "Deepest Regression" begin
4+
5+
@testset "Simple Data" begin
6+
eps = 0.1
7+
8+
n = 100000
9+
x1 = rand(n)
10+
x2 = rand(n)
11+
o = ones(Float64, n)
12+
e = randn(n)
13+
y = 15 .+ 10 .* x1 + 5 .* x2 + e
14+
X = hcat(x1, x2)
15+
16+
result = deepestregression(X, y)
17+
18+
@test isapprox(result[1], 15, atol = eps)
19+
@test isapprox(result[2], 10, atol = eps)
20+
@test isapprox(result[3], 5, atol = eps)
21+
end
22+
23+
@testset "Stackloss Data Example" begin
24+
25+
eps = 0.001
26+
27+
setting = createRegressionSetting(
28+
@formula(stackloss ~ airflow + watertemp + acidcond),
29+
DataSets.stackloss)
30+
31+
result = deepestregression(setting)
32+
33+
@test isapprox(result[1], -35.37610619, atol = eps)
34+
@test isapprox(result[2], 0.82522124, atol = eps)
35+
@test isapprox(result[3], 0.44247788, atol = eps)
36+
@test isapprox(result[4], -0.07964602, atol = eps)
37+
38+
end
39+
end

0 commit comments

Comments
 (0)