Skip to content

Commit a08e0ad

Browse files
committed
initial implementation of the robust hat matrix based robust regression estimator
1 parent 78051c7 commit a08e0ad

5 files changed

Lines changed: 150 additions & 2 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# v0.11.5 (Upcoming Release)
22

3+
- Initial implementation of the robust hat matrix regression estimator
34

45
# v0.11.4
56

src/LinRegOutliers.jl

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,10 @@ import .TheilSen: theilsen
212212
include("deepestregression.jl")
213213
import .DeepestRegression: deepestregression
214214

215+
# Robust Hat matrix based regression
216+
include("robhatreg.jl")
217+
import .RobustHatRegression: robhatreg
218+
215219
# All-in-one
216220
include("summary.jl")
217221
import .Summary: detectOutliers
@@ -275,10 +279,11 @@ export bacon
275279
export cm97
276280
export theilsen
277281
export deepestregression
282+
export robhatreg
278283

279284

280285
# Snoop-Precompile
281-
include("precompile/precompile.jl")
286+
# include("precompile/precompile.jl")
282287

283288

284289
end # module

src/robhatreg.jl

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
module RobustHatRegression
2+
3+
4+
export robhatreg
5+
6+
import ..Basis: RegressionSetting, @extractRegressionSetting, designMatrix, responseVector
7+
import ..OrdinaryLeastSquares: ols, residuals, coef
8+
import ..LTS: iterateCSteps
9+
10+
import Distributions: quantile
11+
import LinearAlgebra: inv, diag
12+
13+
14+
function trimean(u::AbstractVector{T})::Float64 where T <: Real
15+
return (quantile(u, 0.25) + 2.0 * quantile(u, 0.50) + quantile(u, 0.75)) / 4.0
16+
end
17+
18+
function m(v::Vector, u::Vector)::Float64
19+
return trimean(u .* v) * length(u)
20+
end
21+
22+
function m(mat::AbstractMatrix, u::AbstractVector)::AbstractMatrix
23+
L = length(u)
24+
y = zeros(Float64, L, 1)
25+
for i in 1:L
26+
y[i, 1] = u[i]
27+
end
28+
result = m(mat, y)
29+
return result
30+
end
31+
32+
function m(m1::AbstractMatrix, m2::AbstractMatrix)
33+
n1, _ = size(m1)
34+
_ , p2 = size(m2)
35+
newmat = zeros(Float64, n1, p2)
36+
for i in 1:n1
37+
for j in 1:p2
38+
newmat[i, j] = m(m1[i, :], m2[:, j])
39+
end
40+
end
41+
return newmat
42+
end
43+
44+
function hatrob(x::AbstractMatrix)
45+
return x * inv(m(x', x)) * x'
46+
end
47+
48+
49+
"""
50+
robhatreg(setting::RegressionSetting)
51+
52+
Perform robust regression using the robust hat matrix method.
53+
54+
# Arguments
55+
- `setting::RegressionSetting`: The regression setting.
56+
57+
# Returns
58+
59+
- A dictionary containing the following
60+
- `betas::AbstractVector`: The estimated coefficients.
61+
62+
# References
63+
64+
Satman, Mehmet Hakan, A robust initial basic subset selection
65+
method for outlier detection algorithms in linear regression, In Press
66+
"""
67+
function robhatreg(setting::RegressionSetting)
68+
X, y = @extractRegressionSetting setting
69+
return robhatreg(X, y)
70+
end
71+
72+
73+
"""
74+
robhatreg(X, y)
75+
76+
Perform robust regression using the robust hat matrix method.
77+
78+
# Arguments
79+
80+
- `X::AbstractMatrix`: The design matrix.
81+
- `y::AbstractVector`: The response vector.
82+
83+
# Returns
84+
85+
- A dictionary containing the following
86+
- `betas::AbstractVector`: The estimated coefficients.
87+
88+
# References
89+
90+
Satman, Mehmet Hakan, A robust initial basic subset selection
91+
method for outlier detection algorithms in linear regression, In Press
92+
"""
93+
function robhatreg(X, y)
94+
n, p = size(X)
95+
h = Int(ceil((n + p + 1)/2))
96+
myhat = hatrob(X)
97+
diagonals = diag(myhat)
98+
prms = sortperm(diagonals)
99+
bestindices = prms[1:(p+1)]
100+
_, indices = iterateCSteps(X, y, bestindices, h)
101+
betas = X[indices, :] \ y[indices]
102+
return Dict("betas" => betas)
103+
end
104+
105+
106+
107+
end # end of module RobustHatRegression

test/runtests.jl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,4 +36,5 @@ include("testbacon2000.jl")
3636
include("testdataimage.jl")
3737
include("testtheilsen.jl")
3838
include("testsummary.jl")
39-
include("testdeepestregression.jl")
39+
include("testdeepestregression.jl")
40+
include("testrobhatreg.jl")

test/testrobhatreg.jl

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
2+
3+
@testset "Robust Hat Matrix based Robust Regression" begin
4+
# Create simple data
5+
rng = MersenneTwister(12345)
6+
n = 50
7+
x = collect(1:n)
8+
e = randn(rng, n) .* 2.0
9+
y = 5 .+ 5 .* x .+ e
10+
11+
# Contaminate some values
12+
y[n] = y[n] * 2.0
13+
y[n-1] = y[n-1] * 2.0
14+
y[n-2] = y[n-2] * 2.0
15+
y[n-3] = y[n-3] * 2.0
16+
y[n-4] = y[n-4] * 2.0
17+
18+
df = DataFrame(x=x, y=y)
19+
20+
reg = createRegressionSetting(@formula(y ~ x), df)
21+
result = robhatreg(reg)
22+
23+
betas = result["betas"]
24+
25+
atol = 1.0
26+
27+
@test isapprox(betas[1], 5.0, atol=atol)
28+
@test isapprox(betas[2], 5.0, atol=atol)
29+
end
30+
31+
32+
33+
34+

0 commit comments

Comments
 (0)