Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 58 additions & 0 deletions crates/bevy_post_process/src/bloom/bloom.wgsl
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,52 @@ fn karis_average(color: vec3<f32>) -> f32 {
return 1.0 / (1.0 + luma);
}

// BloomDownKernel4 https://www.shadertoy.com/view/mdsyDf
#ifdef FAST_BLUR
fn bloom_down_kernel4(uv: vec2<f32>) -> vec3<f32> {
let ps = uniforms.scale / vec2<f32>(textureDimensions(input_texture));
var col = vec3<f32>(0.0);

col += textureSample(input_texture, s, uv + vec2<f32>(-1.0, -1.0) * 0.75 * ps).rgb * 0.25;
col += textureSample(input_texture, s, uv + vec2<f32>(1.0, -1.0) * 0.75 * ps).rgb * 0.25;
col += textureSample(input_texture, s, uv + vec2<f32>(-1.0, 1.0) * 0.75 * ps).rgb * 0.25;
col += textureSample(input_texture, s, uv + vec2<f32>(1.0, 1.0) * 0.75 * ps).rgb * 0.25;

#ifdef FIRST_DOWNSAMPLE
return col * karis_average(col);
#else
return col;
#endif
}

// BloomUpKernel4B https://www.shadertoy.com/view/mdsyDf.
fn bloom_up_kernel4b(uv: vec2<f32>) -> vec3<f32> {
let tex_size = vec2<f32>(textureDimensions(input_texture));
let ps = uniforms.scale / tex_size;

let l00 = vec2<f32>(0.347209, 0.526425);
let l10 = vec2<f32>(0.109840, 0.334045);
let l01 = vec2<f32>(0.334045, 0.109840);
let l11 = vec2<f32>(0.526425, 0.347209);


// Different from the BloomUpKernel4B, we flip weights, don't flip positions and add 0.1 offset.
// This eliminates grid-like artifacts and branching, but slightly less radial symmetry.
var w = vec4<f32>(0.288971, 0.211029, 0.211029, 0.288971);
w = vec4<f32>(w.y, w.x, w.w, w.z);
let ofs = 0.1;

var col = vec3<f32>(0.0);

col += textureSample(input_texture, s, uv + (vec2<f32>(-0.5, -1.5) + ofs + l00) * ps).rgb * w.x;
col += textureSample(input_texture, s, uv + (vec2<f32>(0.5, -0.5) + ofs + l10) * ps).rgb * w.y;
col += textureSample(input_texture, s, uv + (vec2<f32>(-0.5, 0.5) + ofs + l01) * ps).rgb * w.z;
col += textureSample(input_texture, s, uv + (vec2<f32>(-1.5, -0.5) + ofs + l11) * ps).rgb * w.w;

return col;
}
#endif

// [COD] slide 153
fn sample_input_13_tap(uv: vec2<f32>) -> vec3<f32> {
#ifdef UNIFORM_SCALE
Expand Down Expand Up @@ -161,7 +207,11 @@ fn sample_input_3x3_tent(uv: vec2<f32>) -> vec3<f32> {
@fragment
fn downsample_first(@location(0) output_uv: vec2<f32>) -> @location(0) vec4<f32> {
let sample_uv = uniforms.viewport.xy + output_uv * uniforms.viewport.zw;
#ifdef FAST_BLUR
var sample = bloom_down_kernel4(sample_uv);
#else
var sample = sample_input_13_tap(sample_uv);
#endif
// Lower bound of 0.0001 is to avoid propagating multiplying by 0.0 through the
// downscaling and upscaling which would result in black boxes.
// The upper bound is to prevent NaNs.
Expand All @@ -178,10 +228,18 @@ fn downsample_first(@location(0) output_uv: vec2<f32>) -> @location(0) vec4<f32>

@fragment
fn downsample(@location(0) uv: vec2<f32>) -> @location(0) vec4<f32> {
#ifdef FAST_BLUR
return vec4<f32>(bloom_down_kernel4(uv), 1.0);
#else
return vec4<f32>(sample_input_13_tap(uv), 1.0);
#endif
}

@fragment
fn upsample(@location(0) uv: vec2<f32>) -> @location(0) vec4<f32> {
#ifdef FAST_BLUR
return vec4<f32>(bloom_up_kernel4b(uv), 1.0);
#else
return vec4<f32>(sample_input_3x3_tent(uv), 1.0);
#endif
}
7 changes: 7 additions & 0 deletions crates/bevy_post_process/src/bloom/downsampling_pipeline.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ pub struct BloomDownsamplingPipelineKeys {
prefilter: bool,
first_downsample: bool,
uniform_scale: bool,
high_quality: bool,
}

/// The uniform struct extracted from [`Bloom`] attached to a Camera.
Expand Down Expand Up @@ -110,6 +111,10 @@ impl SpecializedRenderPipeline for BloomDownsamplingPipeline {
shader_defs.push("FIRST_DOWNSAMPLE".into());
}

if !key.high_quality {
shader_defs.push("FAST_BLUR".into());
}

if key.prefilter {
shader_defs.push("USE_THRESHOLD".into());
}
Expand Down Expand Up @@ -161,6 +166,7 @@ pub fn prepare_downsampling_pipeline(
prefilter,
first_downsample: false,
uniform_scale: bloom.scale == Vec2::ONE,
high_quality: bloom.high_quality,
},
);

Expand All @@ -171,6 +177,7 @@ pub fn prepare_downsampling_pipeline(
prefilter,
first_downsample: true,
uniform_scale: bloom.scale == Vec2::ONE,
high_quality: bloom.high_quality,
},
);

Expand Down
23 changes: 12 additions & 11 deletions crates/bevy_post_process/src/bloom/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ impl ViewNode for BloomNode {
&BindGroupEntries::sequential((
// Read from main texture directly
view_texture,
&bind_groups.sampler,
&downsampling_pipeline_res.sampler,
uniforms.clone(),
)),
);
Expand Down Expand Up @@ -362,16 +362,21 @@ fn prepare_bloom_textures(
for (entity, camera, bloom) in &views {
if let Some(viewport) = camera.physical_viewport_size {
// How many times we can halve the resolution minus one so we don't go unnecessarily low
let mip_count = bloom.max_mip_dimension.ilog2().max(2) - 1;
let mip_height_ratio = if viewport.y != 0 {
bloom.max_mip_dimension as f32 / viewport.y as f32
let mip_count = bloom
.max_mip_dimension
.ilog2()
.clamp(2, bloom.max_mip_count)
- 1;
let mip_dim_ratio = if viewport.y != 0 && viewport.x != 0 {
// To better predict the mip count, `bloom.max_mip_dimension` actually controls the maximum size of the short side.
bloom.max_mip_dimension as f32 / viewport.as_vec2().min_element()
} else {
0.
};

let texture_descriptor = TextureDescriptor {
label: Some("bloom_texture"),
size: (viewport.as_vec2() * mip_height_ratio)
size: (viewport.as_vec2() * mip_dim_ratio)
.round()
.as_uvec2()
.max(UVec2::ONE)
Expand Down Expand Up @@ -420,7 +425,6 @@ struct BloomBindGroups {
cache_key: (TextureId, BufferId),
downsampling_bind_groups: Box<[BindGroup]>,
upsampling_bind_groups: Box<[BindGroup]>,
sampler: Sampler,
}

fn prepare_bloom_bind_groups(
Expand All @@ -432,8 +436,6 @@ fn prepare_bloom_bind_groups(
uniforms: Res<ComponentUniforms<BloomUniforms>>,
pipeline_cache: Res<PipelineCache>,
) {
let sampler = &downsampling_pipeline.sampler;

for (entity, bloom_texture, bloom_bind_groups) in &views {
if let Some(b) = bloom_bind_groups
&& b.cache_key
Expand All @@ -454,7 +456,7 @@ fn prepare_bloom_bind_groups(
&pipeline_cache.get_bind_group_layout(&downsampling_pipeline.bind_group_layout),
&BindGroupEntries::sequential((
&bloom_texture.view(mip - 1),
sampler,
&downsampling_pipeline.sampler,
uniforms.binding().unwrap(),
)),
));
Expand All @@ -467,7 +469,7 @@ fn prepare_bloom_bind_groups(
&pipeline_cache.get_bind_group_layout(&upsampling_pipeline.bind_group_layout),
&BindGroupEntries::sequential((
&bloom_texture.view(mip),
sampler,
&upsampling_pipeline.sampler,
uniforms.binding().unwrap(),
)),
));
Expand All @@ -480,7 +482,6 @@ fn prepare_bloom_bind_groups(
),
downsampling_bind_groups: downsampling_bind_groups.into_boxed_slice(),
upsampling_bind_groups: upsampling_bind_groups.into_boxed_slice(),
sampler: sampler.clone(),
});
}
}
Expand Down
18 changes: 16 additions & 2 deletions crates/bevy_post_process/src/bloom/settings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -115,14 +115,22 @@ pub struct Bloom {
/// otherwise set to [`BloomCompositeMode::EnergyConserving`].
pub composite_mode: BloomCompositeMode,

/// Maximum size of each dimension for the largest mipchain texture used in downscaling/upscaling.
/// Only tweak if you are seeing visual artifacts.
/// Maximum size of the short side for the largest mipchain texture used in downscaling/upscaling.
/// Lower values can improve performance but result in more aliasing.
pub max_mip_dimension: u32,

/// Maximum number of mipmaps to use in downscaling/upscaling (default: [`u32::MAX`]).
/// Lower values can improve performance but lose some low frequency contributions.
pub max_mip_count: u32,

/// Amount to stretch the bloom on each axis. Artistic control, can be used to emulate
/// anamorphic blur by using a large x-value. For large values, you may need to increase
/// [`Bloom::max_mip_dimension`] to reduce sampling artifacts.
pub scale: Vec2,

// Whether to use a high quality bloom implementation (default: true).
// If false, bloom will use an implementation that significantly reduces the number of texture samples and improves performance, but at the cost of lower quality.
pub high_quality: bool,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we make this an enum to allow adding more possible quality settings / configuration in the future?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure if we should add an enum. The two bloom implementations seem sufficient, and there are other quality configuration. The performance difference of using moderate samples may not be significant.

}

impl Bloom {
Expand All @@ -143,6 +151,8 @@ impl Bloom {
composite_mode: BloomCompositeMode::EnergyConserving,
max_mip_dimension: Self::DEFAULT_MAX_MIP_DIMENSION,
scale: Vec2::ONE,
high_quality: true,
max_mip_count: u32::MAX,
};

/// Emulates the look of stylized anamorphic bloom, stretched horizontally.
Expand All @@ -166,6 +176,8 @@ impl Bloom {
composite_mode: BloomCompositeMode::Additive,
max_mip_dimension: Self::DEFAULT_MAX_MIP_DIMENSION,
scale: Vec2::ONE,
high_quality: true,
max_mip_count: u32::MAX,
};

/// A preset that applies a very strong bloom, and blurs the whole screen.
Expand All @@ -181,6 +193,8 @@ impl Bloom {
composite_mode: BloomCompositeMode::EnergyConserving,
max_mip_dimension: Self::DEFAULT_MAX_MIP_DIMENSION,
scale: Vec2::ONE,
high_quality: true,
max_mip_count: u32::MAX,
};
}

Expand Down
23 changes: 22 additions & 1 deletion crates/bevy_post_process/src/bloom/upsampling_pipeline.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ use bevy_render::{
binding_types::{sampler, texture_2d, uniform_buffer},
*,
},
renderer::RenderDevice,
view::ViewTarget,
};
use bevy_shader::Shader;
Expand All @@ -28,6 +29,7 @@ pub struct UpsamplingPipelineIds {
#[derive(Resource)]
pub struct BloomUpsamplingPipeline {
pub bind_group_layout: BindGroupLayoutDescriptor,
pub sampler: Sampler,
/// The asset handle for the fullscreen vertex shader.
pub fullscreen_shader: FullscreenShader,
/// The fragment shader asset handle.
Expand All @@ -38,10 +40,12 @@ pub struct BloomUpsamplingPipeline {
pub struct BloomUpsamplingPipelineKeys {
composite_mode: BloomCompositeMode,
final_pipeline: bool,
high_quality: bool,
}

pub fn init_bloom_upscaling_pipeline(
mut commands: Commands,
render_device: Res<RenderDevice>,
fullscreen_shader: Res<FullscreenShader>,
asset_server: Res<AssetServer>,
) {
Expand All @@ -60,8 +64,18 @@ pub fn init_bloom_upscaling_pipeline(
),
);

// Sampler
let sampler = render_device.create_sampler(&SamplerDescriptor {
min_filter: FilterMode::Linear,
mag_filter: FilterMode::Linear,
address_mode_u: AddressMode::ClampToEdge,
address_mode_v: AddressMode::ClampToEdge,
..Default::default()
});

commands.insert_resource(BloomUpsamplingPipeline {
bind_group_layout,
sampler,
fullscreen_shader: fullscreen_shader.clone(),
fragment_shader: load_embedded_asset!(asset_server.as_ref(), "bloom.wgsl"),
});
Expand All @@ -77,6 +91,11 @@ impl SpecializedRenderPipeline for BloomUpsamplingPipeline {
BLOOM_TEXTURE_FORMAT
};

let mut shader_defs = vec![];
if !key.high_quality {
shader_defs.push("FAST_BLUR".into());
}

let color_blend = match key.composite_mode {
BloomCompositeMode::EnergyConserving => {
// At the time of developing this we decided to blend our
Expand Down Expand Up @@ -115,6 +134,7 @@ impl SpecializedRenderPipeline for BloomUpsamplingPipeline {
vertex: self.fullscreen_shader.to_vertex_state(),
fragment: Some(FragmentState {
shader: self.fragment_shader.clone(),
shader_defs,
entry_point: Some("upsample".into()),
targets: vec![Some(ColorTargetState {
format: texture_format,
Expand All @@ -128,7 +148,6 @@ impl SpecializedRenderPipeline for BloomUpsamplingPipeline {
}),
write_mask: ColorWrites::ALL,
})],
..default()
}),
..default()
}
Expand All @@ -149,6 +168,7 @@ pub fn prepare_upsampling_pipeline(
BloomUpsamplingPipelineKeys {
composite_mode: bloom.composite_mode,
final_pipeline: false,
high_quality: bloom.high_quality,
},
);

Expand All @@ -158,6 +178,7 @@ pub fn prepare_upsampling_pipeline(
BloomUpsamplingPipelineKeys {
composite_mode: bloom.composite_mode,
final_pipeline: true,
high_quality: bloom.high_quality,
},
);

Expand Down
5 changes: 5 additions & 0 deletions examples/2d/bloom_2d.rs
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ fn update_bloom_settings(
bloom.prefilter.threshold_softness
));
text.push_str(&format!("(I/K) Horizontal Scale: {:.2}\n", bloom.scale.x));
text.push_str(&format!("(P) High quality: {}\n", bloom.high_quality));

if keycode.just_pressed(KeyCode::Space) {
commands.entity(camera_entity).remove::<Bloom>();
Expand Down Expand Up @@ -180,6 +181,10 @@ fn update_bloom_settings(
bloom.scale.x += dt * 2.0;
}
bloom.scale.x = bloom.scale.x.clamp(0.0, 16.0);

if keycode.just_pressed(KeyCode::KeyP) {
bloom.high_quality = !bloom.high_quality;
}
}

None => {
Expand Down
7 changes: 6 additions & 1 deletion examples/3d/bloom_3d.rs
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,8 @@ fn update_bloom_settings(
"(U/J) Threshold softness: {:.2}\n",
bloom.prefilter.threshold_softness
));
text.push_str(&format!("(I/K) Horizontal Scale: {:.2}\n", bloom.scale.x));
text.push_str(&format!("(I/K) Horizontal scale: {:.2}\n", bloom.scale.x));
text.push_str(&format!("(P) High quality: {}\n", bloom.high_quality));

if keycode.just_pressed(KeyCode::Space) {
commands.entity(entity).remove::<Bloom>();
Expand Down Expand Up @@ -205,6 +206,10 @@ fn update_bloom_settings(
bloom.scale.x += dt * 2.0;
}
bloom.scale.x = bloom.scale.x.clamp(0.0, 8.0);

if keycode.just_pressed(KeyCode::KeyP) {
bloom.high_quality = !bloom.high_quality;
}
}

(entity, None) => {
Expand Down
11 changes: 11 additions & 0 deletions release-content/release-notes/bloom_performance_options.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
---
title: Add performance options to Bloom.
authors: ["@beicause"]
pull_requests: [21340]
---

Bloom is a relatively expensive post-processing for low-end devices, as it requires multiple render passes for downsampling and upsampling. For more performance configurability, we added the `high_quality` (default: true) and `max_mip_count` (default: unlimited) options to Bloom, in addition to the existing `max_mip_dimension`.

If `high_quality` is false, Bloom will use a faster but lower quality implementation, which significantly reduces texture sampling but still maintains reasonable visual quality. For low-end devices, this could potentially reduce frame time by a few milliseconds.

You can also set `max_mip_count` and/or `max_mip_dimension` to a lower value for a significant performance gain. By default the bloom texture has a maximum short-side size of 512 and uses all 8 mipmaps. You may be able to cut the Bloom frame time in half by reducing the mipmap count to a smaller value (such as 3 or 4). However, please note that these two options impact the bloom quality and need to be balanced for your needs.