Skip to content

No method matching _vstore_unroll! on ARM #543

@benegee

Description

@benegee

We are using @turbo extensively in Trixi.jl.
Recently, we have started running our code on ARM-based machines and encountered the following error:

LoadError: MethodError: no method matching _vstore_unroll!(::LayoutPointers.StridedPointer{Float64, 4, 1, 0, (1, 2, 3, 4), Tuple{Static.StaticInt{8}, Static.StaticInt{8}, Static.StaticInt{40}, Static.StaticInt{200}}, NTuple{4, Static.StaticInt{0}}}, ::VectorizationBase.VecUnroll{4, 1, Float64, VectorizationBase.VecUnroll{4, 1, Float64, Float64}}, ::VectorizationBase.Unroll{2, 1, 5, 1, 1, 0x0000000000000000, 1, VectorizationBase.Unroll{4, 1, 5, 1, 1, 0x0000000000000000, 1, Static.StaticInt{0}}}, ::Static.False, ::Static.False, ::Static.False, ::Static.StaticInt{16}, ::Static.StaticInt{8})
  
  Closest candidates are:
    _vstore_unroll!(::LayoutPointers.AbstractStridedPointer{T1, D, C, B, R, X, O} where {B, R, X<:Tuple{Vararg{Union{Int16, Int32, Int64, Int8, UInt16, UInt32, UInt64, UInt8, Static.StaticInt}, D}}, O<:Tuple{Vararg{Union{Int16, Int32, Int64, Int8, UInt16, UInt32, UInt64, UInt8, Static.StaticInt}, D}}}, ::VectorizationBase.VecUnroll{<:Any, W, T2, <:VectorizationBase.VecUnroll{<:Any, W, T2, VectorizationBase.Vec{W, T2}}}, ::UU, ::M, ::A, ::S, ::NT, ::Static.StaticInt{RS}, ::SVUS) where {T1, D, C, W, T2, UU, A, S, NT, RS, SVUS, M}
     @ VectorizationBase ~/.julia/packages/VectorizationBase/LqJbS/src/vecunroll/memory.jl:2552
    _vstore_unroll!(::LayoutPointers.AbstractStridedPointer{T, D, C, B, R, X, O} where {B, R, X<:Tuple{Vararg{Union{Int16, Int32, Int64, Int8, UInt16, UInt32, UInt64, UInt8, Static.StaticInt}, D}}, O<:Tuple{Vararg{Union{Int16, Int32, Int64, Int8, UInt16, UInt32, UInt64, UInt8, Static.StaticInt}, D}}}, ::VectorizationBase.VecUnroll{<:Any, W, T, <:VectorizationBase.VecUnroll{<:Any, W, T, VectorizationBase.Vec{W, T}}}, ::UU, ::A, ::S, ::NT, ::Static.StaticInt{RS}, ::Static.StaticInt{SVUS}) where {W, T, A<:Static.StaticBool, S<:Static.StaticBool, NT<:Static.StaticBool, RS, D, C, SVUS, UU<:(VectorizationBase.Unroll{AUO, FO, NO, AV, W, MO, X, VectorizationBase.Unroll{AUI, FI, NI, AV, W, MI, X, I}} where {AV, X, I, AUO, FO, NO, MO, AUI, FI, NI, MI})}
     @ VectorizationBase ~/.julia/packages/VectorizationBase/LqJbS/src/vecunroll/memory.jl:2575
    _vstore_unroll!(::LayoutPointers.AbstractStridedPointer{T1, D, C, B, R, X, O} where {B, R, X<:Tuple{Vararg{Union{Int16, Int32, Int64, Int8, UInt16, UInt32, UInt64, UInt8, Static.StaticInt}, D}}, O<:Tuple{Vararg{Union{Int16, Int32, Int64, Int8, UInt16, UInt32, UInt64, UInt8, Static.StaticInt}, D}}}, ::VectorizationBase.VecUnroll{<:Any, W, T2, <:VectorizationBase.VecUnroll{<:Any, W, T2, VectorizationBase.Vec{W, T2}}}, ::UU, ::A, ::S, ::NT, ::Static.StaticInt{RS}, ::SVUS) where {T1, D, C, W, T2, UU, A, S, NT, RS, SVUS}
     @ VectorizationBase ~/.julia/packages/VectorizationBase/LqJbS/src/vecunroll/memory.jl:2531
    ...

This was caused by one of our helper functions, which basically does a specialized matrix-vector multiplication. We were able to reproduce this issue with this example:

MWE
using StaticArrays
using StrideArrays: PtrArray, StaticInt
using LoopVectorization: @turbo

function multiply_dimensionwise!(data_out, matrix)

    tmp = zeros(eltype(data_out), size(data_out, 1), size(matrix, 1), size(matrix, 2), size(matrix, 2))

    @turbo for k in axes(data_out, 4), j in axes(data_out, 3), i in axes(data_out, 2), v in axes(data_out, 1)

        res = zero(eltype(data_out))
        for kk in axes(matrix, 2)
            res += matrix[k, kk] * tmp[v, i, j, kk]
        end
        data_out[v, i, j, k] = res
    end

    return nothing
end

dims = 3
nodes = 5 # important!
els = 1

test_u = fill(2.0, nodes^dims * els)
test_ptr = PtrArray(pointer(test_u), (StaticInt(1), ntuple(_ -> StaticInt(nodes), dims)..., els))
test_mat = fill(1.0, nodes, nodes)
test_smat = SMatrix{nodes, nodes}(test_mat)

multiply_dimensionwise!(view(test_ptr, :, :, :, :, 1), test_smat)

Xref: trixi-framework/Trixi.jl#2075

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions