Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions backends/webgpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ set(WEBGPU_SRCS
runtime/ops/quantized_linear/QuantizedLinear.cpp
runtime/ops/embedding_q4gsw/EmbeddingQ4gsw.cpp
runtime/ops/rope/RotaryEmbedding.cpp
runtime/ops/prepack/Prepack.cpp
)

add_library(webgpu_backend ${WEBGPU_SRCS})
Expand Down
207 changes: 161 additions & 46 deletions backends/webgpu/runtime/WebGPUGraph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,10 @@ void WebGPUGraph::build(

const auto* graph = vkgraph::GetVkGraph(flatbuffer_data);

// .pte byte sources for prepack-time constant materialization (build-only).
constant_data_ = constant_data;
named_data_map_ = named_data_map;

// Phase 1: Create all values
const auto* values = graph->values();
const int num_vals = values ? values->size() : 0;
Expand All @@ -241,6 +245,41 @@ void WebGPUGraph::build(
bools_.resize(num_vals, false);
value_lists_.resize(num_vals);

// Pre-scan the op chain: a constant may be DEFERRED (no eager GPU buffer; the
// prepack node materializes it once) only if it is a prepack source AND never
// a direct arg of a non-prepack op. ValueList args are expanded so a constant
// reached through a list still counts as a direct use.
std::unordered_set<int> prepack_src_ids;
std::unordered_set<int> direct_use_ids;
const auto* chain_prescan = graph->chain();
if (chain_prescan) {
for (unsigned ci = 0; ci < chain_prescan->size(); ci++) {
const auto* oc = chain_prescan->Get(ci);
const bool is_prepack = oc->name()->str() == "et_vk.prepack.default";
const auto* a = oc->args();
if (!a) {
continue;
}
for (unsigned j = 0; j < a->size(); j++) {
int id = static_cast<int>(a->Get(j));
if (is_prepack && j == 0) {
prepack_src_ids.insert(id);
} else if (!is_prepack) {
direct_use_ids.insert(id);
const auto* v = values ? values->Get(id) : nullptr;
if (v && v->value_type() == vkgraph::GraphTypes::ValueList) {
const auto* items = v->value_as_ValueList()->items();
if (items) {
for (unsigned k = 0; k < items->size(); k++) {
direct_use_ids.insert(static_cast<int>(items->Get(k)));
}
}
}
}
}
}
}

for (int i = 0; i < num_vals; i++) {
const auto* val = values->Get(i);
if (!val || val->value_type() == vkgraph::GraphTypes::NONE) {
Expand Down Expand Up @@ -269,60 +308,94 @@ void WebGPUGraph::build(
int constant_id = vk_tensor->constant_id();
int mem_obj_id = vk_tensor->mem_obj_id();

// Constants always get dedicated buffers regardless of mem_obj_id
// Constants are dedicated. A prepack-routed constant is recorded as a
// ConstantSource (materialized once by its prepack node); if it is used
// ONLY via prepack it is deferred (no eager GPU buffer).
if (constant_id >= 0 || mem_obj_id < 0) {
tensor_mem_obj_ids_[i] = -1;
WGPUBufferDescriptor buf_desc = {};
buf_desc.size = std::max(tensor.nbytes, size_t(4));
buf_desc.usage = WGPUBufferUsage_Storage | WGPUBufferUsage_CopyDst |
WGPUBufferUsage_CopySrc;
buf_desc.mappedAtCreation = false;
tensor.buffer = wgpuDeviceCreateBuffer(device_, &buf_desc);

if (constant_id >= 0 && constant_data && tensor.nbytes > 0) {

// Record the source for every prepack-routed constant (metadata only,
// resolved straight from the constants table) so materialize_constant
// always has a source -- independent of the defer decision below.
const bool is_prepack_src =
constant_id >= 0 && prepack_src_ids.count(i) != 0;
if (is_prepack_src) {
const auto* constants = graph->constants();
if (constants &&
constant_id < static_cast<int>(constants->size())) {
const auto* vk_bytes = constants->Get(constant_id);
if (vk_bytes->offset() != UINT64_MAX) {
const uint8_t* src = constant_data + vk_bytes->offset();
wgpuQueueWriteBuffer(
queue_, tensor.buffer, 0, src, tensor.nbytes);
} else if (
vk_bytes->named_key() != nullptr &&
named_data_map != nullptr) {
// Constant stored in the PTE named-data map.
auto buf =
named_data_map->get_data(vk_bytes->named_key()->c_str());
if (!buf.ok()) {
throw std::runtime_error(
std::string("WebGPU: named constant '") +
vk_bytes->named_key()->c_str() +
"' not found in NamedDataMap");
}
if (buf->size() < tensor.nbytes) {
if (!constants ||
constant_id >= static_cast<int>(constants->size())) {
throw std::runtime_error(
"WebGPU: constant_id set but the constants table is missing "
"or the id is out of range");
}
const auto* vk_bytes = constants->Get(constant_id);
ConstantSource cs;
cs.nbytes = tensor.nbytes;
if (vk_bytes->offset() != UINT64_MAX) {
cs.inline_offset = vk_bytes->offset();
} else if (vk_bytes->named_key() != nullptr) {
cs.named_key = vk_bytes->named_key()->str();
} else {
throw std::runtime_error(
"WebGPU: constant has no inline offset and no named-data key");
}
constant_sources_[i] = std::move(cs);
}

// Defer constants consumed solely via prepack: skip the eager buffer.
const bool defer = is_prepack_src && direct_use_ids.count(i) == 0;
if (!defer) {
WGPUBufferDescriptor buf_desc = {};
buf_desc.size = std::max(tensor.nbytes, size_t(4));
buf_desc.usage = WGPUBufferUsage_Storage | WGPUBufferUsage_CopyDst |
WGPUBufferUsage_CopySrc;
buf_desc.mappedAtCreation = false;
tensor.buffer = wgpuDeviceCreateBuffer(device_, &buf_desc);

if (constant_id >= 0 && constant_data && tensor.nbytes > 0) {
const auto* constants = graph->constants();
if (constants &&
constant_id < static_cast<int>(constants->size())) {
const auto* vk_bytes = constants->Get(constant_id);
if (vk_bytes->offset() != UINT64_MAX) {
const uint8_t* src = constant_data + vk_bytes->offset();
wgpuQueueWriteBuffer(
queue_, tensor.buffer, 0, src, tensor.nbytes);
} else if (
vk_bytes->named_key() != nullptr &&
named_data_map != nullptr) {
// Constant stored in the PTE named-data map.
auto buf =
named_data_map->get_data(vk_bytes->named_key()->c_str());
if (!buf.ok()) {
throw std::runtime_error(
std::string("WebGPU: named constant '") +
vk_bytes->named_key()->c_str() +
"' not found in NamedDataMap");
}
if (buf->size() < tensor.nbytes) {
throw std::runtime_error(
std::string("WebGPU: named constant '") +
vk_bytes->named_key()->c_str() + "' undersized: have " +
std::to_string(buf->size()) + " bytes, need " +
std::to_string(tensor.nbytes));
}
wgpuQueueWriteBuffer(
queue_, tensor.buffer, 0, buf->data(), tensor.nbytes);
buf->Free();
} else {
throw std::runtime_error(
std::string("WebGPU: named constant '") +
vk_bytes->named_key()->c_str() + "' undersized: have " +
std::to_string(buf->size()) + " bytes, need " +
std::to_string(tensor.nbytes));
"WebGPU: constant has no inline offset and no named-data key");
}
wgpuQueueWriteBuffer(
queue_, tensor.buffer, 0, buf->data(), tensor.nbytes);
buf->Free();
} else {
throw std::runtime_error(
"WebGPU: constant has no inline offset and no named-data key");
"WebGPU: constant_id set but the constants table is missing "
"or the id is out of range");
}
} else {
} else if (constant_id >= 0 && tensor.nbytes > 0) {
// constant_id set but constant_data null -> fail loud.
throw std::runtime_error(
"WebGPU: constant_id set but the constants table is missing "
"or the id is out of range");
"WebGPU: constant_id set but constant_data is null");
}
} else if (constant_id >= 0 && tensor.nbytes > 0) {
// constant_id set but constant_data null -> fail loud.
throw std::runtime_error(
"WebGPU: constant_id set but constant_data is null");
}
} else {
// Shared buffer: track required size, defer allocation to pass 2
Expand Down Expand Up @@ -458,6 +531,47 @@ void WebGPUGraph::build(
webgpu_operator_registry().get_op_fn(op_name)(*this, args);
}
}

// Prepack nodes (Phase 3) materialized their constants directly into the
// consumer buffers via materialize_constant; no separate copy pass needed.
// The .pte bytes are freed right after build() returns (WebGPUBackend
// processed->Free()), so clear the build-only source pointers.
constant_data_ = nullptr;
named_data_map_ = nullptr;
}

void WebGPUGraph::materialize_constant(int const_value_id, WGPUBuffer dst) {
auto it = constant_sources_.find(const_value_id);
if (it == constant_sources_.end()) {
throw std::runtime_error(
"WebGPU prepack: no source recorded for constant id " +
std::to_string(const_value_id));
}
const ConstantSource& cs = it->second;
if (cs.nbytes == 0) {
return;
}
if (cs.inline_offset != UINT64_MAX) {
if (constant_data_ == nullptr) {
throw std::runtime_error("WebGPU prepack: inline constant data is null");
}
wgpuQueueWriteBuffer(
queue_, dst, 0, constant_data_ + cs.inline_offset, cs.nbytes);
} else if (!cs.named_key.empty() && named_data_map_ != nullptr) {
auto buf = named_data_map_->get_data(cs.named_key.c_str());
if (!buf.ok()) {
throw std::runtime_error(
"WebGPU prepack: named constant '" + cs.named_key + "' not found");
}
if (buf->size() < cs.nbytes) {
throw std::runtime_error(
"WebGPU prepack: named constant '" + cs.named_key + "' undersized");
}
wgpuQueueWriteBuffer(queue_, dst, 0, buf->data(), cs.nbytes);
buf->Free();
} else {
throw std::runtime_error("WebGPU prepack: constant has no source");
}
}

WGPUShaderModule WebGPUGraph::get_or_create_shader(
Expand Down Expand Up @@ -773,10 +887,11 @@ WebGPUMemoryStats WebGPUGraph::memory_stats() const {
for (size_t i = 0; i < value_types_.size(); i++) {
if (value_types_[i] == ValueType::Tensor && tensors_[i].nbytes > 0) {
stats.num_tensors++;
// Shared tensors are tracked via shared_buffer_sizes_
// Shared tensors are tracked via shared_buffer_sizes_; a deferred
// prepack-routed constant has no buffer (no GPU memory) -> not counted.
bool is_shared =
i < tensor_mem_obj_ids_.size() && tensor_mem_obj_ids_[i] >= 0;
if (!is_shared) {
if (!is_shared && tensors_[i].buffer != nullptr) {
stats.unshared_tensor_buffer_bytes += tensors_[i].nbytes;
}
}
Expand Down
21 changes: 21 additions & 0 deletions backends/webgpu/runtime/WebGPUGraph.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,15 @@ struct OutputCopy {
size_t nbytes = 0;
};

// CPU-side record for a prepack-routed constant; mirrors Vulkan's TensorRef
// (sizes + a data reference, not a live GPU tensor). The prepack node is the
// sole materialization, so the constant needs no eager GPU buffer.
struct ConstantSource {
uint64_t inline_offset = UINT64_MAX; // offset into constant_data_; else key
std::string named_key; // non-empty => fetch from named_data_map_
size_t nbytes = 0;
};

struct ExecuteConfig {
size_t chunk_size = 0;
size_t initial_chunk_size = 0;
Expand Down Expand Up @@ -180,6 +189,11 @@ class WebGPUGraph {
dispatches_.push_back(dispatch);
}

// Materialize a recorded prepack-routed constant into dst via one CPU->GPU
// transfer. Build-time only (the .pte bytes are freed after build()).
// Mirrors Vulkan prepack_standard.
void materialize_constant(int const_value_id, WGPUBuffer dst);

void add_uniform_buffer_bytes(size_t bytes) {
uniform_buffer_bytes_ += bytes;
}
Expand Down Expand Up @@ -286,6 +300,13 @@ class WebGPUGraph {

std::vector<WebGPUDispatch> dispatches_;

// Prepack-routed constant sources (offset/named-key + size); the prepack node
// materializes these once. constant_data_/named_data_map_ point at the .pte
// bytes and are valid only during build().
const uint8_t* constant_data_ = nullptr;
const executorch::runtime::NamedDataMap* named_data_map_ = nullptr;
std::unordered_map<int, ConstantSource> constant_sources_;

ExecuteConfig execute_config_;

// Caches for reusing GPU objects across dispatches.
Expand Down
52 changes: 52 additions & 0 deletions backends/webgpu/runtime/ops/prepack/Prepack.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#include <executorch/backends/webgpu/runtime/WebGPUGraph.h>
#include <executorch/backends/webgpu/runtime/ops/OperatorRegistry.h>

#include <stdexcept>

namespace executorch::backends::webgpu {

namespace {

// Materialize a constant into the prepack-output buffer via one CPU->GPU write.
void prepack_impl(WebGPUGraph& graph, const std::vector<int>& args) {
// et_vk.prepack.default args: [src (constant), out].
if (args.size() != 2) {
throw std::runtime_error("WebGPU prepack: expected 2 args (src, out)");
}
const auto& src = graph.get_tensor(args.at(0));
const auto& out = graph.get_tensor(args.at(1));

if (src.dims != out.dims) {
throw std::runtime_error("WebGPU prepack: src/out shape mismatch");
}
if (src.elem_size != out.elem_size) {
throw std::runtime_error(
"WebGPU prepack: src/out dtype mismatch (cast unsupported)");
}
if (src.nbytes != out.nbytes) {
throw std::runtime_error("WebGPU prepack: src/out byte-size mismatch");
}
if (out.buffer == nullptr) {
throw std::runtime_error("WebGPU prepack: null out buffer binding");
}

// Sole materialization: write the .pte bytes once, straight into the
// consumer's buffer (no eager src buffer, no buffer->buffer copy).
graph.materialize_constant(args.at(0), out.buffer);
}

} // namespace

WEBGPU_REGISTER_OPERATORS {
WEBGPU_REGISTER_OP(et_vk.prepack.default, prepack_impl);
}

} // namespace executorch::backends::webgpu
Loading