[WebGPU] Add backend build system

digantdesai · digantdesai · commit 2a8a3cdec8d2 · 2026-04-09T23:33:43.000-05:00
CMake integration: backend library target, Vulkan FlatBuffer
schema dependency, root build flags, and glslc guard fix.
diff --git a/.gitignore b/.gitignore
@@ -16,6 +16,7 @@ cmake-android-out/
 cmake-ios-out/
 cmake-out*
 cmake-out-android/
+backends/webgpu/third-party/
 build-android/
 build-x86/
 build-hexagon/
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -1154,6 +1154,11 @@ if(EXECUTORCH_BUILD_VULKAN)
   list(APPEND _executorch_backends vulkan_backend vulkan_schema)
 endif()
 
+if(EXECUTORCH_BUILD_WEBGPU)
+  add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/webgpu)
+  list(APPEND _executorch_backends webgpu_backend)
+endif()
+
 if(EXECUTORCH_BUILD_VGF)
   list(APPEND _executorch_backends vgf_backend)
 endif()
diff --git a/backends/vulkan/cmake/ShaderLibrary.cmake b/backends/vulkan/cmake/ShaderLibrary.cmake
@@ -26,7 +26,7 @@ endif()
 
 find_program(GLSLC_PATH glslc PATHS $ENV{PATH})
 
-if(NOT GLSLC_PATH)
+if(NOT GLSLC_PATH AND EXECUTORCH_BUILD_VULKAN)
   message(
     FATAL_ERROR
       "glslc from the Vulkan SDK must be installed to build the Vulkan backend. "
diff --git a/backends/webgpu/CMakeLists.txt b/backends/webgpu/CMakeLists.txt
@@ -0,0 +1,56 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+cmake_minimum_required(VERSION 3.19)
+
+if(NOT EXECUTORCH_ROOT)
+  set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../..)
+endif()
+
+include(${EXECUTORCH_ROOT}/tools/cmake/Utils.cmake)
+
+# Ensure vulkan_schema is available even when EXECUTORCH_BUILD_VULKAN is OFF.
+# The WebGPU backend reuses the Vulkan FlatBuffer serialization format.
+if(NOT TARGET vulkan_schema)
+  # We need the schema generation from the Vulkan backend. Build only the
+  # schema target by including the Vulkan CMakeLists.txt. The full Vulkan
+  # backend will only build if EXECUTORCH_BUILD_VULKAN is ON (which gates the
+  # vulkan_backend target), but vulkan_schema is unconditionally defined.
+  add_subdirectory(
+    ${CMAKE_CURRENT_SOURCE_DIR}/../vulkan
+    ${CMAKE_CURRENT_BINARY_DIR}/_vulkan_schema
+  )
+endif()
+
+set(WEBGPU_SRCS
+    runtime/WebGPUBackend.cpp
+    runtime/WebGPUGraph.cpp
+    runtime/WebGPUDelegateHeader.cpp
+    runtime/ops/OperatorRegistry.cpp
+    runtime/ops/add/BinaryOp.cpp
+)
+
+add_library(webgpu_backend ${WEBGPU_SRCS})
+
+target_include_directories(
+  webgpu_backend
+  PRIVATE $<BUILD_INTERFACE:${EXECUTORCH_ROOT}/..>
+)
+
+target_link_libraries(webgpu_backend PRIVATE vulkan_schema executorch_core)
+
+target_compile_options(webgpu_backend PRIVATE -fexceptions)
+
+# Link with --whole-archive for static registration of backend + ops
+executorch_target_link_options_shared_lib(webgpu_backend)
+
+set_property(TARGET webgpu_backend PROPERTY CXX_STANDARD 17)
+
+install(
+  TARGETS webgpu_backend
+  EXPORT ExecuTorchTargets
+  DESTINATION ${CMAKE_INSTALL_LIBDIR}
+)
diff --git a/backends/webgpu/README.md b/backends/webgpu/README.md
@@ -0,0 +1,113 @@
+# WebGPU Backend
+
+Run ExecuTorch models on the GPU via [WebGPU](https://www.w3.org/TR/webgpu/). The backend compiles delegated subgraphs into WGSL compute shaders executed natively through [wgpu-native](https://github.com/gfx-rs/wgpu-native) (Metal on macOS, Vulkan on Linux/Windows).
+
+> **Status: Prototype.** The backend supports a single operator today and is under active development. See [TODO.md](TODO.md) for the roadmap.
+
+## Architecture
+
+```
+PyTorch model
+    │  torch.export
+    ▼
+Exported Program
+    │  VulkanPartitioner (tags supported fp32 ops)
+    ▼
+Edge Dialect IR
+    │  VulkanBackend.preprocess (builds Vulkan FlatBuffer, buffer-only storage)
+    ▼
+.pte file (with VH00/VK00 delegate blob)
+    │
+    ▼
+Native runtime (wgpu-native → Metal / Vulkan)
+    │  WebGPUGraph::build  → creates GPU buffers, pipelines, bind groups
+    │  WebGPUGraph::execute → encodes + submits compute passes
+    ▼
+GPU output (mapped back to CPU via wgpuDevicePoll)
+```
+
+Key design choices:
+- **Reuses Vulkan serialization** — the delegate blob is a Vulkan FlatBuffer (`VK00`) with a `VH00` header. All tensor storage is forced to `BUFFER` (WebGPU has no 3D storage textures).
+- **Built-in WGSL shaders** — shader source is compiled as C++ string constants. Future work will embed fused shaders in the FlatBuffer for compile-time mega-kernel fusion.
+- **No Python AOT code** — directly consumes .pte files exported via `VulkanPartitioner`.
+
+## Operator Support
+
+| Operator | WGSL Shader | Notes |
+|---|---|---|
+| `aten.add.Tensor` | `binary_add.wgsl` | Element-wise with alpha: `out = in1 + alpha * in2` |
+
+**Planned:** `sub`, `mul`, `relu`, `linear` (matmul), `softmax`, `layer_norm`
+
+## Quick Start
+
+### 1. Setup
+
+```bash
+bash backends/webgpu/scripts/setup-wgpu-native.sh
+```
+
+This downloads prebuilt wgpu-native binaries for your platform.
+
+### 2. Export a model
+
+```python
+import torch
+from executorch.backends.vulkan import VulkanPartitioner
+from executorch.exir import to_edge_transform_and_lower
+
+class AddModule(torch.nn.Module):
+    def forward(self, a: torch.Tensor, b: torch.Tensor) -> torch.Tensor:
+        return a + b
+
+ep = torch.export.export(AddModule(), (torch.randn(4, 4), torch.randn(4, 4)))
+et_program = to_edge_transform_and_lower(
+    ep, partitioner=[VulkanPartitioner()]
+).to_executorch()
+
+with open("add.pte", "wb") as f:
+    f.write(et_program.buffer)
+```
+
+### 3. Build and run
+
+```bash
+bash backends/webgpu/test/test_build_webgpu.sh
+```
+
+This runs Python export tests, exports a .pte, builds the native runtime, and validates GPU output.
+
+## Directory Structure
+
+```
+backends/webgpu/
+├── CMakeLists.txt
+├── README.md
+├── TODO.md
+├── runtime/
+│   ├── WebGPUBackend.h/cpp        # BackendInterface (init/execute)
+│   ├── WebGPUGraph.h/cpp          # GPU graph: buffers, pipelines, dispatch
+│   ├── WebGPUDelegateHeader.h/cpp # VH00 header parser
+│   ├── WebGPUDevice.h/cpp         # wgpu-native device abstraction
+│   └── ops/
+│       ├── OperatorRegistry.h/cpp # Op dispatch table
+│       └── add/
+│           ├── BinaryOp.cpp       # aten.add.Tensor implementation
+│           ├── binary_add.wgsl    # WGSL shader source
+│           └── binary_add_wgsl.h  # Shader as C++ string constant
+├── scripts/
+│   └── setup-wgpu-native.sh      # Download wgpu-native binaries
+└── test/
+    ├── conftest.py
+    ├── test_build_webgpu.sh       # End-to-end build + test
+    ├── test_webgpu_native.cpp     # C++ native test runner
+    └── ops/
+        └── add/
+            └── test_add.py        # Python export tests
+```
+
+## Requirements
+
+- **macOS**: Metal-capable GPU
+- **Linux**: Vulkan-capable GPU + drivers
+- **Build**: CMake 3.19+, conda environment with ExecuTorch installed
diff --git a/tools/cmake/preset/default.cmake b/tools/cmake/preset/default.cmake
@@ -168,6 +168,9 @@ define_overridable_option(
 define_overridable_option(
   EXECUTORCH_BUILD_VULKAN "Build the Vulkan backend" BOOL OFF
 )
+define_overridable_option(
+  EXECUTORCH_BUILD_WEBGPU "Build the WebGPU backend" BOOL OFF
+)
 define_overridable_option(
   EXECUTORCH_BUILD_PORTABLE_OPS "Build portable_ops library" BOOL ON
 )

Original file line number	Diff line number	Diff line change
`@@ -168,6 +168,9 @@ define_overridable_option(`
`168`	`168`	`define_overridable_option(`
`169`	`169`	`EXECUTORCH_BUILD_VULKAN "Build the Vulkan backend" BOOL OFF`
`170`	`170`	`)`
	`171`	`+define_overridable_option(`
	`172`	`+ EXECUTORCH_BUILD_WEBGPU "Build the WebGPU backend" BOOL OFF`
	`173`	`+)`
`171`	`174`	`define_overridable_option(`
`172`	`175`	`EXECUTORCH_BUILD_PORTABLE_OPS "Build portable_ops library" BOOL ON`
`173`	`176`	`)`