Skip to content

Commit 87e9ff2

Browse files
committed
gh-231: add riscv64 and img gpu support
1 parent 74658a9 commit 87e9ff2

8 files changed

Lines changed: 52 additions & 17 deletions

CMakeLists.txt

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,15 @@ if (${CMAKE_SYSTEM_NAME} MATCHES "Windows")
3535
list(APPEND SPLA_DEFINES SPLA_TARGET_WINDOWS)
3636
elseif (${CMAKE_SYSTEM_NAME} MATCHES "Linux")
3737
set(SPLA_TARGET_LINUX YES)
38-
set(SPLA_ARCH "x64")
3938
set(SPLA_EXT "so")
39+
if (NOT SPLA_ARCH)
40+
if (CMAKE_SYSTEM_PROCESSOR MATCHES riscv)
41+
set(SPLA_ARCH "riscv")
42+
else ()
43+
set(SPLA_ARCH "x64")
44+
endif ()
45+
endif()
46+
message(STATUS "Build Linux binaries for ${SPLA_ARCH} architecture")
4047
list(APPEND SPLA_DEFINES SPLA_TARGET_LINUX)
4148
elseif (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
4249
set(SPLA_TARGET_MACOSX YES)
@@ -417,4 +424,4 @@ if (SPLA_BUILD_TESTS)
417424
COMMENT "Copy ${TARGET_FILE} into test directory")
418425
endforeach ()
419426
endif ()
420-
endif ()
427+
endif ()

src/opencl/cl_accelerator.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,7 @@ namespace spla {
108108
m_is_nvidia = false;
109109
m_is_amd = false;
110110
m_is_intel = false;
111+
m_is_img = false;
111112

112113
if (m_vendor_name.find("Intel") != std::string::npos ||
113114
m_vendor_name.find("intel") != std::string::npos ||
@@ -141,6 +142,15 @@ namespace spla {
141142
// Likely, it is an integrated amd device
142143
if (m_max_wgs <= 256 || m_max_cu == 1) m_wave_size = 16;
143144
}
145+
if (m_vendor_name.find("Imagination Technologies") != std::string::npos ||
146+
m_vendor_name.find("IMG") != std::string::npos ||
147+
m_vendor_name.find("img") != std::string::npos ||
148+
m_vendor_id == 0x1010) {
149+
m_vendor_code = VENDOR_CODE_IMG;
150+
m_default_wgs = 32;
151+
m_wave_size = 32;
152+
m_is_img = true;
153+
}
144154

145155
if (m_vendor_code.empty()) {
146156
LOG_MSG(Status::Error, "failed to match one of the pre-defined vendors");

src/opencl/cl_accelerator.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949
#define VENDOR_CODE_NVIDIA "nvidia"
5050
#define VENDOR_CODE_INTEL "intel"
5151
#define VENDOR_CODE_AMD "amd"
52+
#define VENDOR_CODE_IMG "img"
5253

5354
namespace spla {
5455

@@ -96,6 +97,7 @@ namespace spla {
9697
[[nodiscard]] bool is_nvidia() const { return m_is_nvidia; }
9798
[[nodiscard]] bool is_amd() const { return m_is_amd; }
9899
[[nodiscard]] bool is_intel() const { return m_is_intel; }
100+
[[nodiscard]] bool is_img() const { return m_is_img; }
99101

100102
private:
101103
cl::Platform m_platform;
@@ -123,6 +125,7 @@ namespace spla {
123125
bool m_is_nvidia = false;
124126
bool m_is_amd = false;
125127
bool m_is_intel = false;
128+
bool m_is_img = false;
126129

127130
ankerl::svector<cl::CommandQueue, 2> m_queues;
128131
};

src/opencl/cl_format_coo_vec.hpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -105,18 +105,18 @@ namespace spla {
105105
T* Ax,
106106
const CLCooVec<T>& storage,
107107
cl::CommandQueue& queue,
108-
bool blocking = true) {
108+
cl_mem_flags staging_flags = CL_MEM_READ_ONLY | CL_MEM_HOST_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
109+
bool blocking = true) {
109110
if (n_values == 0) {
110111
LOG_MSG(Status::Ok, "nothing to do");
111112
return;
112113
}
113114

114115
const std::size_t buffer_size_Ai = n_values * sizeof(uint);
115116
const std::size_t buffer_size_Ax = n_values * sizeof(T);
116-
const auto flags = CL_MEM_READ_ONLY | CL_MEM_HOST_READ_ONLY | CL_MEM_ALLOC_HOST_PTR;
117117

118-
cl::Buffer staging_Ai(get_acc_cl()->get_context(), flags, buffer_size_Ai);
119-
cl::Buffer staging_Ax(get_acc_cl()->get_context(), flags, buffer_size_Ax);
118+
cl::Buffer staging_Ai(get_acc_cl()->get_context(), staging_flags, buffer_size_Ai);
119+
cl::Buffer staging_Ax(get_acc_cl()->get_context(), staging_flags, buffer_size_Ax);
120120

121121
queue.enqueueCopyBuffer(storage.Ai, staging_Ai, 0, 0, buffer_size_Ai);
122122
queue.enqueueCopyBuffer(storage.Ax, staging_Ax, 0, 0, buffer_size_Ax);

src/opencl/cl_format_csr.hpp

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -84,16 +84,15 @@ namespace spla {
8484
T* Ax,
8585
CLCsr<T>& storage,
8686
cl::CommandQueue& queue,
87-
bool blocking = true) {
87+
cl_mem_flags staging_flags = CL_MEM_READ_ONLY | CL_MEM_HOST_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
88+
bool blocking = true) {
8889
const std::size_t buffer_size_Ap = (n_rows + 1) * sizeof(uint);
8990
const std::size_t buffer_size_Aj = n_values * sizeof(uint);
9091
const std::size_t buffer_size_Ax = n_values * sizeof(T);
9192

92-
const auto flags = CL_MEM_READ_ONLY | CL_MEM_HOST_READ_ONLY | CL_MEM_ALLOC_HOST_PTR;
93-
94-
cl::Buffer staging_Ap(get_acc_cl()->get_context(), flags, buffer_size_Ap);
95-
cl::Buffer staging_Aj(get_acc_cl()->get_context(), flags, buffer_size_Aj);
96-
cl::Buffer staging_Ax(get_acc_cl()->get_context(), flags, buffer_size_Ax);
93+
cl::Buffer staging_Ap(get_acc_cl()->get_context(), staging_flags, buffer_size_Ap);
94+
cl::Buffer staging_Aj(get_acc_cl()->get_context(), staging_flags, buffer_size_Aj);
95+
cl::Buffer staging_Ax(get_acc_cl()->get_context(), staging_flags, buffer_size_Ax);
9796

9897
queue.enqueueCopyBuffer(storage.Ap, staging_Ap, 0, 0, buffer_size_Ap);
9998
queue.enqueueCopyBuffer(storage.Aj, staging_Aj, 0, 0, buffer_size_Aj);

src/opencl/cl_format_dense_vec.hpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,9 +77,10 @@ namespace spla {
7777
T* values,
7878
CLDenseVec<T>& storage,
7979
cl::CommandQueue& queue,
80-
bool blocking = true) {
80+
cl_mem_flags staging_flags = CL_MEM_READ_ONLY | CL_MEM_HOST_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
81+
bool blocking = true) {
8182
const std::size_t buffer_size = n_rows * sizeof(T);
82-
cl::Buffer staging(get_acc_cl()->get_context(), CL_MEM_READ_ONLY | CL_MEM_HOST_READ_ONLY | CL_MEM_ALLOC_HOST_PTR, buffer_size);
83+
cl::Buffer staging(get_acc_cl()->get_context(), staging_flags, buffer_size);
8384

8485
queue.enqueueCopyBuffer(storage.Ax, staging, 0, 0, buffer_size);
8586
queue.enqueueReadBuffer(staging, blocking, 0, buffer_size, values);

src/storage/storage_manager_matrix.hpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,12 @@ namespace spla {
146146
auto* cl_csr = s.template get<CLCsr<T>>();
147147
auto* cpu_csr = s.template get<CpuCsr<T>>();
148148
cpu_csr_resize(s.get_n_rows(), cl_csr->values, *cpu_csr);
149-
cl_csr_read(s.get_n_rows(), cl_csr->values, cpu_csr->Ap.data(), cpu_csr->Aj.data(), cpu_csr->Ax.data(), *cl_csr, cl_acc->get_queue_default());
149+
if (!cl_acc->is_img()) {
150+
cl_csr_read(s.get_n_rows(), cl_csr->values, cpu_csr->Ap.data(), cpu_csr->Aj.data(), cpu_csr->Ax.data(), *cl_csr, cl_acc->get_queue_default());
151+
} else {
152+
cl_csr_read(s.get_n_rows(), cl_csr->values, cpu_csr->Ap.data(), cpu_csr->Aj.data(), cpu_csr->Ax.data(), *cl_csr, cl_acc->get_queue_default(),
153+
CL_MEM_HOST_READ_ONLY | CL_MEM_ALLOC_HOST_PTR);
154+
}
150155
});
151156
#endif
152157
}

src/storage/storage_manager_vector.hpp

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,12 @@ namespace spla {
126126
auto* cl_acc = get_acc_cl();
127127
auto* cl_dense = s.template get<CLDenseVec<T>>();
128128
auto* cpu_dense = s.template get<CpuDenseVec<T>>();
129-
cl_dense_vec_read(s.get_n_rows(), cpu_dense->Ax.data(), *cl_dense, cl_acc->get_queue_default());
129+
if (!cl_acc->is_img()) {
130+
cl_dense_vec_read(s.get_n_rows(), cpu_dense->Ax.data(), *cl_dense, cl_acc->get_queue_default());
131+
} else {
132+
cl_dense_vec_read(s.get_n_rows(), cpu_dense->Ax.data(), *cl_dense, cl_acc->get_queue_default(),
133+
CL_MEM_HOST_READ_ONLY | CL_MEM_ALLOC_HOST_PTR);
134+
}
130135
});
131136
manager.register_converter(FormatVector::CpuCoo, FormatVector::AccCoo, [](Storage& s) {
132137
auto* cpu_coo = s.template get<CpuCooVec<T>>();
@@ -138,7 +143,12 @@ namespace spla {
138143
auto* cl_coo = s.template get<CLCooVec<T>>();
139144
auto* cpu_coo = s.template get<CpuCooVec<T>>();
140145
cpu_coo_vec_resize(cl_coo->values, *cpu_coo);
141-
cl_coo_vec_read(cl_coo->values, cpu_coo->Ai.data(), cpu_coo->Ax.data(), *cl_coo, cl_acc->get_queue_default());
146+
if (!cl_acc->is_img()) {
147+
cl_coo_vec_read(cl_coo->values, cpu_coo->Ai.data(), cpu_coo->Ax.data(), *cl_coo, cl_acc->get_queue_default());
148+
} else {
149+
cl_coo_vec_read(cl_coo->values, cpu_coo->Ai.data(), cpu_coo->Ax.data(), *cl_coo, cl_acc->get_queue_default(),
150+
CL_MEM_HOST_READ_ONLY | CL_MEM_ALLOC_HOST_PTR);
151+
}
142152
});
143153
manager.register_converter(FormatVector::AccCoo, FormatVector::AccDense, [](Storage& s) {
144154
auto* cl_acc = get_acc_cl();

0 commit comments

Comments
 (0)