Skip to content

Commit 7397dda

Browse files
authored
feat: add webm support (#1391)
1 parent 9369ab7 commit 7397dda

12 files changed

Lines changed: 286 additions & 11 deletions

File tree

.github/workflows/build.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,7 @@ jobs:
239239
id: build-push
240240
uses: docker/build-push-action@v6
241241
with:
242+
context: .
242243
platforms: linux/amd64
243244
push: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
244245
file: Dockerfile.${{ matrix.variant }}

.gitmodules

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,6 @@
77
[submodule "thirdparty/libwebp"]
88
path = thirdparty/libwebp
99
url = https://github.com/webmproject/libwebp.git
10+
[submodule "thirdparty/libwebm"]
11+
path = thirdparty/libwebm
12+
url = https://github.com/webmproject/libwebm.git

CMakeLists.txt

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,16 @@ else()
3232
set(SD_WEBP_DEFAULT ${SD_USE_SYSTEM_WEBP})
3333
endif()
3434

35+
set(SD_SUBMODULE_WEBM FALSE)
36+
if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/libwebm/CMakeLists.txt")
37+
set(SD_SUBMODULE_WEBM TRUE)
38+
endif()
39+
if(SD_SUBMODULE_WEBM)
40+
set(SD_WEBM_DEFAULT ON)
41+
else()
42+
set(SD_WEBM_DEFAULT ${SD_USE_SYSTEM_WEBM})
43+
endif()
44+
3545
#
3646
# Option list
3747
#
@@ -41,6 +51,8 @@ endif()
4151
option(SD_BUILD_EXAMPLES "sd: build examples" ${SD_STANDALONE})
4252
option(SD_WEBP "sd: enable WebP image I/O support" ${SD_WEBP_DEFAULT})
4353
option(SD_USE_SYSTEM_WEBP "sd: link against system libwebp" OFF)
54+
option(SD_WEBM "sd: enable WebM video output support" ${SD_WEBM_DEFAULT})
55+
option(SD_USE_SYSTEM_WEBM "sd: link against system libwebm" OFF)
4456
option(SD_CUDA "sd: cuda backend" OFF)
4557
option(SD_HIPBLAS "sd: rocm backend" OFF)
4658
option(SD_METAL "sd: metal backend" OFF)
@@ -111,7 +123,31 @@ if(SD_WEBP)
111123
)
112124
endif()
113125
endif()
114-
add_compile_definitions(SD_USE_WEBP)
126+
endif()
127+
128+
if(SD_WEBM)
129+
if(NOT SD_WEBP)
130+
message(FATAL_ERROR "SD_WEBM requires SD_WEBP because WebM output reuses libwebp VP8 encoding.")
131+
endif()
132+
if(NOT SD_SUBMODULE_WEBM AND NOT SD_USE_SYSTEM_WEBM)
133+
message(FATAL_ERROR "WebM support enabled but no source found.
134+
Either initialize the submodule:\n git submodule update --init thirdparty/libwebm\n\n"
135+
"Or link against system library:\n cmake (...) -DSD_USE_SYSTEM_WEBM=ON")
136+
endif()
137+
if(SD_USE_SYSTEM_WEBM)
138+
find_path(WEBM_INCLUDE_DIR
139+
NAMES mkvmuxer/mkvmuxer.h mkvparser/mkvparser.h common/webmids.h
140+
PATH_SUFFIXES webm
141+
REQUIRED)
142+
find_library(WEBM_LIBRARY
143+
NAMES webm libwebm
144+
REQUIRED)
145+
146+
add_library(webm UNKNOWN IMPORTED)
147+
set_target_properties(webm PROPERTIES
148+
IMPORTED_LOCATION "${WEBM_LIBRARY}"
149+
INTERFACE_INCLUDE_DIRECTORIES "${WEBM_INCLUDE_DIR}")
150+
endif()
115151
endif()
116152

117153
set(SD_LIB stable-diffusion)

docs/build.md

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,15 +16,23 @@ git submodule init
1616
git submodule update
1717
```
1818

19-
## WebP Support in Examples
19+
## WebP and WebM Support in Examples
2020

21-
The example applications (`examples/cli` and `examples/server`) use `libwebp` to support WebP image I/O. This is enabled by default.
21+
The example applications (`examples/cli` and `examples/server`) use `libwebp` to support WebP image I/O, and `examples/cli` can also use `libwebm` for `.webm` video output. Both are enabled by default. WebM output currently reuses `libwebp` to encode each frame as VP8 before muxing with `libwebm`.
2222

23-
If you do not want WebP support, you can disable it at configure time:
23+
If you do not want WebP/WebM support, you can disable them at configure time:
2424

2525
```shell
2626
mkdir build && cd build
27-
cmake .. -DSD_WEBP=OFF
27+
cmake .. -DSD_WEBP=OFF -DSD_WEBM=OFF
28+
cmake --build . --config Release
29+
```
30+
31+
If the submodules are not available, you can also link against system packages instead:
32+
33+
```shell
34+
mkdir build && cd build
35+
cmake .. -DSD_USE_SYSTEM_WEBP=ON -DSD_USE_SYSTEM_WEBM=ON
2836
cmake --build . --config Release
2937
```
3038

examples/cli/CMakeLists.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,11 @@ add_executable(${TARGET}
99
install(TARGETS ${TARGET} RUNTIME)
1010
target_link_libraries(${TARGET} PRIVATE stable-diffusion zip ${CMAKE_THREAD_LIBS_INIT})
1111
if(SD_WEBP)
12+
target_compile_definitions(${TARGET} PRIVATE SD_USE_WEBP)
1213
target_link_libraries(${TARGET} PRIVATE webp libwebpmux)
1314
endif()
15+
if(SD_WEBM)
16+
target_compile_definitions(${TARGET} PRIVATE SD_USE_WEBM)
17+
target_link_libraries(${TARGET} PRIVATE webm)
18+
endif()
1419
target_compile_features(${TARGET} PUBLIC c_std_11 cxx_std_17)

examples/cli/README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@ usage: ./bin/sd-cli [options]
55
66
CLI Options:
77
-o, --output <string> path to write result image to. you can use printf-style %d format specifiers for image sequences (default:
8-
./output.png) (eg. output_%03d.png). For video generation, single-file outputs support .avi and animated .webp
9-
--preview-path <string> path to write preview image to (default: ./preview.png). Multi-frame previews support .avi and animated .webp
8+
./output.png) (eg. output_%03d.png). For video generation, single-file outputs support .avi, .webm, and animated .webp
9+
--preview-path <string> path to write preview image to (default: ./preview.png). Multi-frame previews support .avi, .webm, and animated .webp
1010
--preview-interval <int> interval in denoising steps between consecutive updates of the image preview file (default is 1, meaning updating at
1111
every step)
1212
--output-begin-idx <int> starting index for output image sequence, must be non-negative (default 0 if specified %d in output path, 1 otherwise)

examples/cli/main.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ struct SDCliParams {
5858
options.string_options = {
5959
{"-o",
6060
"--output",
61-
"path to write result image to. you can use printf-style %d format specifiers for image sequences (default: ./output.png) (eg. output_%03d.png)",
61+
"path to write result image to. you can use printf-style %d format specifiers for image sequences (default: ./output.png) (eg. output_%03d.png). Single-file video outputs support .avi, .webm, and animated .webp",
6262
&output_path},
6363
{"",
6464
"--image",
@@ -70,7 +70,7 @@ struct SDCliParams {
7070
&metadata_format},
7171
{"",
7272
"--preview-path",
73-
"path to write preview image to (default: ./preview.png)",
73+
"path to write preview image to (default: ./preview.png). Multi-frame previews support .avi, .webm, and animated .webp",
7474
&preview_path},
7575
};
7676

@@ -396,7 +396,9 @@ bool save_results(const SDCliParams& cli_params,
396396
if (!ext.empty()) {
397397
if (output_format == EncodedImageFormat::JPEG ||
398398
output_format == EncodedImageFormat::PNG ||
399-
output_format == EncodedImageFormat::WEBP) {
399+
output_format == EncodedImageFormat::WEBP ||
400+
ext_lower == ".avi" ||
401+
ext_lower == ".webm") {
400402
base_path.replace_extension();
401403
}
402404
}
@@ -438,7 +440,7 @@ bool save_results(const SDCliParams& cli_params,
438440
}
439441

440442
if (cli_params.mode == VID_GEN && num_results > 1) {
441-
if (ext_lower != ".avi" && ext_lower != ".webp")
443+
if (ext_lower != ".avi" && ext_lower != ".webp" && ext_lower != ".webm")
442444
ext = ".avi";
443445
fs::path video_path = base_path;
444446
video_path += ext;

examples/common/media_io.cpp

Lines changed: 181 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,11 @@
3030
#include "webp/mux.h"
3131
#endif
3232

33+
#ifdef SD_USE_WEBM
34+
#include "mkvmuxer/mkvmuxer.h"
35+
#include "mkvmuxer/mkvwriter.h"
36+
#endif
37+
3338
namespace fs = std::filesystem;
3439

3540
namespace {
@@ -71,6 +76,13 @@ bool write_binary_file_bytes(const std::string& path, const std::vector<uint8_t>
7176
return true;
7277
}
7378

79+
uint32_t read_u32_le_bytes(const uint8_t* data) {
80+
return static_cast<uint32_t>(data[0]) |
81+
(static_cast<uint32_t>(data[1]) << 8) |
82+
(static_cast<uint32_t>(data[2]) << 16) |
83+
(static_cast<uint32_t>(data[3]) << 24);
84+
}
85+
7486
int stbi_ext_write_png_to_func(stbi_write_func* func,
7587
void* context,
7688
int x,
@@ -289,6 +301,76 @@ bool encode_webp_image_to_vector(const uint8_t* image,
289301
WebPMuxDelete(mux);
290302
return ok;
291303
}
304+
305+
#ifdef SD_USE_WEBM
306+
bool extract_vp8_frame_from_webp(const std::vector<uint8_t>& webp_data, std::vector<uint8_t>& vp8_frame) {
307+
if (!is_webp_signature(webp_data.data(), webp_data.size())) {
308+
return false;
309+
}
310+
311+
size_t offset = 12;
312+
while (offset + 8 <= webp_data.size()) {
313+
const uint8_t* chunk = webp_data.data() + offset;
314+
const uint32_t chunk_len = read_u32_le_bytes(chunk + 4);
315+
const size_t chunk_start = offset + 8;
316+
const size_t padded_len = static_cast<size_t>(chunk_len) + (chunk_len & 1u);
317+
318+
if (chunk_start + chunk_len > webp_data.size()) {
319+
return false;
320+
}
321+
322+
if (memcmp(chunk, "VP8 ", 4) == 0) {
323+
vp8_frame.assign(webp_data.data() + chunk_start,
324+
webp_data.data() + chunk_start + chunk_len);
325+
return !vp8_frame.empty();
326+
}
327+
328+
offset = chunk_start + padded_len;
329+
}
330+
331+
return false;
332+
}
333+
334+
bool encode_sd_image_to_vp8_frame(const sd_image_t& image, int quality, std::vector<uint8_t>& vp8_frame) {
335+
if (image.data == nullptr || image.width == 0 || image.height == 0) {
336+
return false;
337+
}
338+
339+
const int width = static_cast<int>(image.width);
340+
const int height = static_cast<int>(image.height);
341+
const int input_channel = static_cast<int>(image.channel);
342+
if (input_channel != 1 && input_channel != 3 && input_channel != 4) {
343+
return false;
344+
}
345+
346+
std::vector<uint8_t> rgb_buffer;
347+
const uint8_t* rgb_data = image.data;
348+
if (input_channel == 1) {
349+
rgb_buffer.resize(static_cast<size_t>(width) * static_cast<size_t>(height) * 3);
350+
for (int i = 0; i < width * height; ++i) {
351+
rgb_buffer[i * 3 + 0] = image.data[i];
352+
rgb_buffer[i * 3 + 1] = image.data[i];
353+
rgb_buffer[i * 3 + 2] = image.data[i];
354+
}
355+
rgb_data = rgb_buffer.data();
356+
} else if (input_channel == 4) {
357+
rgb_buffer.resize(static_cast<size_t>(width) * static_cast<size_t>(height) * 3);
358+
for (int i = 0; i < width * height; ++i) {
359+
rgb_buffer[i * 3 + 0] = image.data[i * 4 + 0];
360+
rgb_buffer[i * 3 + 1] = image.data[i * 4 + 1];
361+
rgb_buffer[i * 3 + 2] = image.data[i * 4 + 2];
362+
}
363+
rgb_data = rgb_buffer.data();
364+
}
365+
366+
std::vector<uint8_t> encoded_webp;
367+
if (!encode_webp_image_to_vector(rgb_data, width, height, 3, "", quality, encoded_webp)) {
368+
return false;
369+
}
370+
371+
return extract_vp8_frame_from_webp(encoded_webp, vp8_frame);
372+
}
373+
#endif
292374
#endif
293375

294376
uint8_t* load_image_common(bool from_memory,
@@ -861,6 +943,99 @@ int create_animated_webp_from_sd_images(const char* filename, sd_image_t* images
861943
}
862944
#endif
863945

946+
#ifdef SD_USE_WEBM
947+
int create_webm_from_sd_images(const char* filename, sd_image_t* images, int num_images, int fps, int quality) {
948+
if (num_images == 0) {
949+
fprintf(stderr, "Error: Image array is empty.\n");
950+
return -1;
951+
}
952+
if (fps <= 0) {
953+
fprintf(stderr, "Error: FPS must be positive.\n");
954+
return -1;
955+
}
956+
957+
const int width = static_cast<int>(images[0].width);
958+
const int height = static_cast<int>(images[0].height);
959+
if (width <= 0 || height <= 0) {
960+
fprintf(stderr, "Error: Invalid frame dimensions.\n");
961+
return -1;
962+
}
963+
964+
mkvmuxer::MkvWriter writer;
965+
if (!writer.Open(filename)) {
966+
fprintf(stderr, "Error: Could not open WebM file for writing.\n");
967+
return -1;
968+
}
969+
970+
const int ret = [&]() -> int {
971+
mkvmuxer::Segment segment;
972+
if (!segment.Init(&writer)) {
973+
fprintf(stderr, "Error: Failed to initialize WebM muxer.\n");
974+
return -1;
975+
}
976+
977+
segment.set_mode(mkvmuxer::Segment::kFile);
978+
segment.OutputCues(true);
979+
980+
const uint64_t track_number = segment.AddVideoTrack(width, height, 0);
981+
if (track_number == 0) {
982+
fprintf(stderr, "Error: Failed to add VP8 video track.\n");
983+
return -1;
984+
}
985+
if (!segment.CuesTrack(track_number)) {
986+
fprintf(stderr, "Error: Failed to set WebM cues track.\n");
987+
return -1;
988+
}
989+
990+
mkvmuxer::VideoTrack* video_track = static_cast<mkvmuxer::VideoTrack*>(segment.GetTrackByNumber(track_number));
991+
if (video_track != nullptr) {
992+
video_track->set_display_width(static_cast<uint64_t>(width));
993+
video_track->set_display_height(static_cast<uint64_t>(height));
994+
video_track->set_frame_rate(static_cast<double>(fps));
995+
}
996+
segment.GetSegmentInfo()->set_writing_app("stable-diffusion.cpp");
997+
segment.GetSegmentInfo()->set_muxing_app("stable-diffusion.cpp");
998+
999+
const uint64_t frame_duration_ns = std::max<uint64_t>(
1000+
1, static_cast<uint64_t>(std::llround(1000000000.0 / static_cast<double>(fps))));
1001+
uint64_t timestamp_ns = 0;
1002+
1003+
for (int i = 0; i < num_images; ++i) {
1004+
const sd_image_t& image = images[i];
1005+
if (static_cast<int>(image.width) != width || static_cast<int>(image.height) != height) {
1006+
fprintf(stderr, "Error: Frame dimensions do not match.\n");
1007+
return -1;
1008+
}
1009+
1010+
std::vector<uint8_t> vp8_frame;
1011+
if (!encode_sd_image_to_vp8_frame(image, quality, vp8_frame)) {
1012+
fprintf(stderr, "Error: Failed to encode frame %d as VP8.\n", i);
1013+
return -1;
1014+
}
1015+
1016+
if (!segment.AddFrame(vp8_frame.data(),
1017+
static_cast<uint64_t>(vp8_frame.size()),
1018+
track_number,
1019+
timestamp_ns,
1020+
true)) {
1021+
fprintf(stderr, "Error: Failed to mux frame %d into WebM.\n", i);
1022+
return -1;
1023+
}
1024+
1025+
timestamp_ns += frame_duration_ns;
1026+
}
1027+
1028+
if (!segment.Finalize()) {
1029+
fprintf(stderr, "Error: Failed to finalize WebM output.\n");
1030+
return -1;
1031+
}
1032+
return 0;
1033+
}();
1034+
writer.Close();
1035+
return ret;
1036+
}
1037+
#endif
1038+
8641039
int create_video_from_sd_images(const char* filename, sd_image_t* images, int num_images, int fps, int quality) {
8651040
std::string path = filename ? filename : "";
8661041
auto pos = path.find_last_of('.');
@@ -869,6 +1044,12 @@ int create_video_from_sd_images(const char* filename, sd_image_t* images, int nu
8691044
ch = static_cast<char>(tolower(static_cast<unsigned char>(ch)));
8701045
}
8711046

1047+
#ifdef SD_USE_WEBM
1048+
if (ext == ".webm") {
1049+
return create_webm_from_sd_images(filename, images, num_images, fps, quality);
1050+
}
1051+
#endif
1052+
8721053
#ifdef SD_USE_WEBP
8731054
if (ext == ".webp") {
8741055
return create_animated_webp_from_sd_images(filename, images, num_images, fps, quality);

examples/common/media_io.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,14 @@ int create_animated_webp_from_sd_images(const char* filename,
6767
int quality = 90);
6868
#endif
6969

70+
#ifdef SD_USE_WEBM
71+
int create_webm_from_sd_images(const char* filename,
72+
sd_image_t* images,
73+
int num_images,
74+
int fps,
75+
int quality = 90);
76+
#endif
77+
7078
int create_video_from_sd_images(const char* filename,
7179
sd_image_t* images,
7280
int num_images,

0 commit comments

Comments
 (0)