Skip to content

Commit cde5c6a

Browse files
committed
Allow non-Real eltype with quantile
This is consistent with `Statistics.quantile` and avoids breakage due to #977. It's particularly useful for `Union{T, Missing}`, e.g. a view of nonmissing entries in a vector. This also allows supporting some types such as `Date`, though currently this only works for some values (would need to implement `type=1`).
1 parent d70c4a2 commit cde5c6a

File tree

2 files changed

+25
-9
lines changed

2 files changed

+25
-9
lines changed

src/weights.jl

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -626,7 +626,7 @@ is strictly superior to ``h``. The weighted ``p`` quantile is given by ``v_k +
626626
with ``γ = (h - S_k)/(S_{k+1} - S_k)``. In particular, when all weights are equal,
627627
the function returns the same result as the unweighted `quantile`.
628628
"""
629-
function quantile(v::AbstractVector{V}, w::AbstractWeights{W}, p::AbstractVector{<:Real}) where {V<:Real,W<:Real}
629+
function quantile(v::AbstractVector{V}, w::AbstractWeights{W}, p::AbstractVector{<:Real}) where {V, W<:Real}
630630
# checks
631631
isempty(v) && throw(ArgumentError("quantile of an empty array is undefined"))
632632
isempty(p) && throw(ArgumentError("empty quantile array"))
@@ -650,21 +650,29 @@ function quantile(v::AbstractVector{V}, w::AbstractWeights{W}, p::AbstractVector
650650
vw = sort!(collect(zip(view(v, nz), view(w, nz))))
651651
N = length(vw)
652652

653+
# missing is always sorted last
654+
if ismissing(vw[end][1])
655+
throw(ArgumentError("quantiles are undefined in presence of missing values"))
656+
end
657+
653658
# prepare percentiles
654659
ppermute = sortperm(p)
655660
p = p[ppermute]
656661

657662
# prepare out vector
658-
out = Vector{typeof(zero(V)/1)}(undef, length(p))
663+
v1 = vw[1][1]
664+
out = Vector{typeof(v1 + zero(eltype(p))*zero(W)*zero(v1))}(undef, length(p))
659665
fill!(out, vw[end][1])
660666

661-
for x in v
662-
isnan(x) && return fill!(out, x)
667+
# NaN is always sorted last in the absence of missing
668+
# This behavior isn't consistent with Statistics.quantile, but preserve it for backward compatibility
669+
if vw[end][1] isa Number && isnan(vw[end][1])
670+
return fill(vw[end][1], length(p))
663671
end
664672

665673
# loop on quantiles
666674
Sk, Skold = zero(W), zero(W)
667-
vk, vkold = zero(V), zero(V)
675+
vk, vkold = zero(v1), zero(v1)
668676
k = 0
669677

670678
w1 = vw[1][2]
@@ -693,19 +701,19 @@ function quantile(v::AbstractVector{V}, w::AbstractWeights{W}, p::AbstractVector
693701
return out
694702
end
695703

696-
function quantile(v::AbstractVector{<:Real}, w::UnitWeights, p::AbstractVector{<:Real})
704+
function quantile(v::AbstractVector, w::UnitWeights, p::AbstractVector{<:Real})
697705
length(v) != length(w) && throw(DimensionMismatch("Inconsistent array dimension."))
698706
return quantile(v, p)
699707
end
700708

701-
quantile(v::AbstractVector{<:Real}, w::AbstractWeights{<:Real}, p::Number) = quantile(v, w, [p])[1]
709+
quantile(v::AbstractVector, w::AbstractWeights, p::Real) = quantile(v, w, [p])[1]
702710

703711
##### Weighted median #####
704712

705713
"""
706-
median(v::AbstractVector{<:Real}, w::AbstractWeights)
714+
median(v::AbstractVector, w::AbstractWeights)
707715
708716
Compute the weighted median of `v` with weights `w`
709717
(of type `AbstractWeights`). See the documentation for [`quantile`](@ref) for more details.
710718
"""
711-
median(v::AbstractVector{<:Real}, w::AbstractWeights{<:Real}) = quantile(v, w, 0.5)
719+
median(v::AbstractVector, w::AbstractWeights) = quantile(v, w, 0.5)

test/weights.jl

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -459,6 +459,14 @@ end
459459
w = [1, 1/3, 1/3, 1/3, 1]
460460
answer = 6.0
461461
@test quantile(data[1], f(w), 0.5) answer atol = 1e-5
462+
463+
# Test non-Real eltype
464+
@test_throws ArgumentError quantile([missing, 1], f([1, 2]), 0.5)
465+
@test quantile(Union{Float64, Missing}[1, 2, 3, 4], f([1, 2, 2, 1]), 0.5) ==
466+
quantile(Any[1, 2, 3, 4], f([1, 2, 2, 1]), 0.5) ==
467+
quantile([1, 2, 3, 4], f([1, 2, 2, 1]), 0.5)
468+
@test quantile([Date(2005, 01, 01), Date(2005, 01, 01)], f([1, 1]), 0.5) ==
469+
Date(2005, 01, 01)
462470
end
463471

464472
@testset "Median $f" for f in weight_funcs

0 commit comments

Comments
 (0)