Skip to content
Open
Show file tree
Hide file tree
Changes from 16 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 67 additions & 10 deletions codegen/hparser.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,66 @@
from cffi import FFI
from cffi.cparser import _preprocess

from codegen.utils import print, remove_c_comments
from codegen.files import read_file

from wgpu._coreutils import get_header_filename

# from wgpu.backends.wgpu_native._ffi import _get_wgpu_header # turns out this import has plenty of side effects -.-
_parser = None


def _get_wgpu_header():
# TODO: likely remove this duplicated code as the import above is the smarter way to stay in sync
def _get_wgpu_header(*filenames):
"""Func written so we can use this in both wgpu_native/_ffi.py and codegen/hparser.py"""
# Read files
# TODO: maybe this should be per file instead of concat at the beginning? so it's a bit easier to debug step just wgpu.h for exmaple
lines1 = []
lines1.extend(read_file("resources", "webgpu.h").splitlines())
lines1.extend(read_file("resources", "wgpu.h").splitlines())
for filename in filenames:
with open(filename, "rb") as f:
lines1.extend(
f.read()
.decode()
.replace("\r\n", "\n")
.replace("\\\n", "")
.splitlines(True)
)
# Deal with pre-processor commands, because cffi cannot handle them.
# Just removing them, plus a few extra lines, seems to do the trick.
lines2 = []
in_ifdef = False
for line in lines1:
if line.startswith("#define ") and len(line.split()) > 2 and "0x" in line:
# skip #ifdef blocks, which cffi doesn't support. In both headers they are used for `#ifdef __cplusplus` which we were skipping anyway.
if line.startswith("#ifdef "):
in_ifdef = True
continue
if line.startswith("#endif"):
in_ifdef = False
continue
if in_ifdef:
continue
if (
line.startswith("#define ")
and len(line.split()) > 2
and ("0x" in line or "_MAX" in line)
):
# pattern to find: #define WGPU_CONSTANT (0x1234)
# we use ffi.sizeof() to hopefully get the correct max sizes per platform
# we don't have ffi in this namespace, so I just put the hardcoded values for now, we could use ctypes.sizeof(ctypes.c_size_t)
max_size = hex((1 << (8 * 8)) - 1) # sizeof(size_t)
max_32 = hex((1 << (4 * 8)) - 1) # sizeof(uint32_t)
max_64 = hex((1 << (8 * 8)) - 1) # sizeof(uint64_t)
line = (
line.replace("SIZE_MAX", max_size)
.replace("UINT32_MAX", max_32)
.replace("UINT64_MAX", max_64)
)
# cffi seems to struggle with these macros, so we can just skip them I hope, the idl spec alreay contains defaults.
if line.startswith("#define") and "_INIT" in line:
# print("Dropping line from header:", line.strip())
continue
line = line.replace("(", "").replace(")", "")
elif line.startswith("#"):
continue
elif 'extern "C"' in line:
continue
for define_to_drop in [
"WGPU_EXPORT ",
"WGPU_NULLABLE ",
Expand All @@ -44,10 +82,27 @@ def get_h_parser(*, allow_cache=True):
if _parser and allow_cache:
return _parser

source = _get_wgpu_header()
source = _get_wgpu_header(
get_header_filename("webgpu.h"),
get_header_filename("wgpu.h"),
)

# TODO: we have file management utils, so perhaps we can just use them.
# just simply implementation to test the idea.

cleaned_source, macros = _preprocess(source)
cleaned_source += "\n".join(
f"#define {k} {v}" for k, v in macros.items()
) # add them back?
cleaned_source = "\n".join(
line.strip() for line in cleaned_source.splitlines() if line.strip()
)
combined_header_file = get_header_filename("combined_header.h")
with open(combined_header_file, "w") as f:
f.write(cleaned_source)

# Create parser
hp = HParser(source)
hp = HParser(cleaned_source)
hp.parse()
_parser = hp
return hp
Expand All @@ -74,8 +129,10 @@ def parse(self, verbose=True):
stats = ", ".join(f"{len(getattr(self, key))} {key}" for key in keys)
print("webgpu.h/wgpu.h define " + stats)

# TODO: maybe we should use pycparser as it's used by cffi anyway and we have that.
def _parse_from_h(self):
code = self.source
# code, _ = _preprocess(code) # private method from _cffi, even the pycparser cffi uses relies on an external preprocessor to remove comments

# Collect enums and flags. This is easy.
# Note that flags are first defined as enums and then redefined as flags later.
Expand All @@ -91,7 +148,7 @@ def _parse_from_h(self):
# Decompose "typedef enum XX {...} XX;"
name1 = code[i1 + 13 : i2].strip()
name2 = code[i3 + 1 : i4].strip()
assert name1 == name2
assert name1 == name2, f"mismatch in enum name: {name1} vs {name2}"
assert name1.startswith("WGPU")
name = name1[4:]
self.enums[name] = enum = {}
Expand Down
99 changes: 36 additions & 63 deletions docs/backends.rst
Original file line number Diff line number Diff line change
Expand Up @@ -61,100 +61,73 @@ The wgpu_native backend provides a few extra functionalities:
:return: Device
:rtype: wgpu.GPUDevice

The wgpu_native backend provides support for push constants.
Since WebGPU does not support this feature, documentation on its use is hard to find.
A full explanation of push constants and its use in Vulkan can be found
`here <https://vkguide.dev/docs/chapter-3/push_constants/>`_.
Using push constants in WGPU closely follows the Vulkan model.
The wgpu_native backend provides support for immediates.
Immediates are not yet part of the WebGPU spec, but the headers for native webgpu have converged officially.
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it looks like this is rather close to becoming official spec gpuweb/gpuweb#5423


The advantage of push constants is that they are typically faster to update than uniform buffers.
Modifications to push constants are included in the command encoder; updating a uniform
buffer involves sending a separate command to the GPU.
The disadvantage of push constants is that their size limit is much smaller. The limit
is guaranteed to be at least 128 bytes, and 256 bytes is typical.
Immediates offer a way to set send a small amount of data to the GPU in the command encoder directly, no need for uniform buffer uploads.
They are restricted to rather small sizes, usually 128 or 265 bytes.

Given an adapter, first determine if it supports push constants::
Given an adapter, first determine if it supports immediates::

>> "push-constants" in adapter.features
>> "immediates" in adapter.features
True

If push constants are supported, determine the maximum number of bytes that can
be allocated for push constants::
If immediates are supported, determine the maximum number of bytes that can
be allocated for immediates::

>> adapter.limits["max-push-constant-size"]
>> adapter.limits["max-immediate-size"]
256

You must tell the adapter to create a device that supports push constants,
and you must tell it the number of bytes of push constants that you are using.
You must tell the adapter to create a device that supports immediates,
and you must tell it the number of bytes of immediates that you are using.
Overestimating is okay::

device = adapter.request_device_sync(
required_features=["push-constants"],
required_limits={"max-push-constant-size": 256},
required_features=["immediates"],
required_limits={"max-immediate-size": 256},
)

Creating a push constant in your shader code is similar to the way you would create
Creating a immediate data struct in your shader code is similar to the way you would create
a uniform buffer.
The fields that are only used in the ``@vertex`` shader should be separated from the fields
that are only used in the ``@fragment`` shader which should be separated from the fields
used in both shaders::
The same data can be accessed across all shader stages: vertex, fragment and compute.

struct PushConstants {
// vertex shader
struct Immediates {
vertex_transform: vec4x4f,
// fragment shader
fragment_transform: vec4x4f,
// used in both
generic_transform: vec4x4f,
fragment_color: vec4f,
pick_position: vec2f,
frame_counter: u32,
}
var<push_constant> push_constants: PushConstants;
var<immediate> immediate_data: Immediates;

To the pipeline layout for this shader, use
``wgpu.backends.wpgu_native.create_pipeline_layout`` instead of
``device.create_pipelinelayout``. It takes an additional argument,
``push_constant_layouts``, describing
the layout of the push constants. For example, in the above example::
``device.create_pipeline_layout``. It takes an additional argument,
``immediate_size`` simply the number of bytes of immediate data you are using.

push_constant_layouts = [
{"visibility": ShaderState.VERTEX, "start": 0, "end": 64},
{"visibility": ShaderStage.FRAGMENT, "start": 64, "end": 128},
{"visibility": ShaderState.VERTEX + ShaderStage.FRAGMENT , "start": 128, "end": 192},
],
Finally, you set the value of the immediates by using
``wgpu.backends.wpgu_native.set_immediates``::

Finally, you set the value of the push constant by using
``wgpu.backends.wpgu_native.set_push_constants``::
set_immediates(pass_encoder, offset=0, size_in_bytes=64, data=<64 bytes>, data_offset=0)

set_push_constants(this_pass, ShaderStage.VERTEX, 0, 64, <64 bytes>)
set_push_constants(this_pass, ShaderStage.FRAGMENT, 64, 128, <64 bytes>)
set_push_constants(this_pass, ShaderStage.VERTEX + ShaderStage.FRAGMENT, 128, 192, <64 bytes>)

Bytes must be set separately for each of the three shader stages. If the push constant has
already been set, on the next use you only need to call ``set_push_constants`` on those
bytes you wish to change.

.. py:function:: wgpu.backends.wpgu_native.create_pipeline_layout(device, *, label="", bind_group_layouts, push_constant_layouts=[])
.. py:function:: wgpu.backends.wpgu_native.create_pipeline_layout(device, *, label="", bind_group_layouts, immediate_size=0)

This method provides the same functionality as :func:`wgpu.GPUDevice.create_pipeline_layout`,
but provides an extra `push_constant_layouts` argument.
When using push constants, this argument is a list of dictionaries, where each item
in the dictionary has three fields: `visibility`, `start`, and `end`.
but provides an extra `immediate_size` argument.
When using immediates, this argument is the number of bytes of immediate data you are using.

:param device: The device on which we are creating the pipeline layout
:param label: An optional label
:param bind_group_layouts:
:param push_constant_layouts: Described above.
:param bind_group_layouts:
:param immediate_size: number of bytes for immediates data.

.. py:function:: wgpu.backends.wgpu_native.set_push_constants(render_pass_encoder, visibility, offset, size_in_bytes, data, data_offset=0)
.. py:function:: wgpu.backends.wgpu_native.set_immediates(render_pass_encoder,offset, size_in_bytes, data, data_offset=0)

This function requires that the underlying GPU implement `push_constants`.
These push constants are a buffer of bytes available to the `fragment` and `vertex`
shaders. They are similar to a bound buffer, but the buffer is set using this
function call.
This function requires that the underlying GPU implement `immediates`.
These immediates are a buffer of bytes available to all shader stages.

:param render_pass_encoder: The render pass encoder to which we are pushing constants.
:param visibility: The stages (vertex, fragment, or both) to which these constants are visible
:param offset: The offset into the push constants at which the bytes are to be written
:param size_in_bytes: The number of bytes to copy from the ata
:param render_pass_encoder: The render pass encoder to which we are providing immediates.
:param offset: The offset into the immedaite data at which the bytes are to be written
:param size_in_bytes: The number of bytes to copy from the data
:param data: The data to copy to the buffer
:param data_offset: The starting offset in the data at which to begin copying.

Expand Down
Loading
Loading