Vulkan中的实时软阴影与硬件优化
ShadowMap算法需要解决阴影失真(Shadow Acne)和阴影抗锯齿问题,前者可以通过 shadow bias 解决,后者可以使用自定义滤波器如pcf、vsm、esm、evsm等解决,而 shadow bias和pcf滤波,vulkan都提供了对应的硬件能力来加速计算。
对于shadow bias,如果不使用硬件方法,而在shader中计算偏移量的话,常用的计算方法(learnopengl给出来的)是:
float bias = max(0.05f * (1.0 - dot(normal, lightDir)), 0.005f);
当然,渲染时把Cull Mode改为Cull Front,可以进一步弥补深度的比较精度。
Vulkan光栅化器内置了一个Pipeline State:depthBiasState,开启后会将光栅化得到的深度值偏移一段距离。
可以配置的参数有depthBiasSlopeFactor、depthBiasConstantFactor以及depthBiasClamp,偏移的计算公式如下:
更详细的介绍见spec.
- depthBiasSlopeFactor:根据几何体表面梯度大小来缩放的因子。
- depthBiasConstantFactor: 恒定偏移量。
- depthBiasClamp:偏移量钳制值。
开启步骤:
- 填写VkPipelineRasterizationStateCreateInfo时把depthBiasEnable设为VK_TRUE。
- (可选)填写VkPipelineRasterizationStateCreateInfo时把cullMode设为VK_CULL_MODE_FRONT_BIT。
- (可选)填写VkPipelineDynamicStateCreateInfo时把VK_DYNAMIC_STATE_DEPTH_BIAS也加进去。
- 如果选用了第三步,则在VkCommandBuffer录制时使用vkCmdSetDepthBias设置动态的深度偏移值,否则直接在创建Pipeline时把这个值填好。
整体的代码流程如下:
// 1. ShadowMap深度偏移开启
pipe_info.rasterization_state.depthBiasEnable = VK_TRUE;
// ...
// 2. ShadowMap Cull Front
pipe_info.rasterization_state.cullMode = VK_CULL_MODE_FRONT_BIT;
pipe_info.depth_stencil_state.depthCompareOp = VK_COMPARE_OP_LESS_OR_EQUAL;
// ...
// 3. 添加VK_DYNAMIC_STATE_DEPTH_BIAS动态阶段
std::vector<VkDynamicState> dynamicStateEnables = {
VK_DYNAMIC_STATE_VIEWPORT,
VK_DYNAMIC_STATE_SCISSOR,
VK_DYNAMIC_STATE_DEPTH_BIAS
};
// ...
// 4. 绘制记录阶段
pass_shadowdepth->cmd_buf->begin(VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT);
{
// ...
vkCmdSetViewport(*pass_shadowdepth->cmd_buf, 0, 1, &viewport);
vkCmdSetScissor(*pass_shadowdepth->cmd_buf, 0, 1, &scissor);
static const float depthBiasConstant = 1.25f;
static const float depthBiasSlope = 1.75f;
vkCmdSetDepthBias(
*pass_shadowdepth->cmd_buf,
depthBiasConstant,
0.0f,
depthBiasSlope
);
// ...
draw();
}
pass_shadowdepth->cmd_buf->end();
这样,便无需再在着色器中计算一次偏移了。
接下来是硬件PCF,更准确的来说是双线性的深度比较纹理滤波。
开启步骤如下:
- shadowdepth纹理的采样方式设为Bilinear。
- compareEnable设为True。
- compare_op设为VK_COMPARE_OP_LESS。
- shader中采样器设为sampler2DShadow。
- 采样时把ShadowCoord的xyz都传入。
代码流程如下:
// 1. cpp
VkSamplerCreateInfo sampler_info{};
sampler_info.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO;
sampler_info.magFilter = VK_FILTER_LINEAR;
sampler_info.minFilter = VK_FILTER_LINEAR;
sampler_info.compareOp = VK_COMPARE_OP_LESS;
sampler_info.compareEnable = VK_TRUE;
// 边界情况优化
sampler_info.address_mode_U = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER;
sampler_info.address_mode_V = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER;
sampler_info.address_mode_W = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER;
sampler_info.bordercolor = VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE;
// 2. shader...
// layout (binding = 5) uniform sampler2D directional_light_shadowdepth;
layout (binding = 5) uniform sampler2DShadow directional_light_shadowdepth;
vec3 hardware_shadow_pcf(sampler2DShadow shadow_tex,float NoL,vec3 fragworldpos,vec2 screen_uv)
{
vec3 shadowMaskColor = vec3(0.0, 0.0, 0.0);
vec4 shadow_clip_pos = ub_directional_light_vp.proj * ub_directional_light_vp.view * vec4(fragworldpos, 1.0);
vec4 shadow_coord = shadow_clip_pos / shadow_clip_pos.w;
shadow_coord.st = shadow_coord.st * 0.5f + 0.5f;
float shadowMapCmp = texture(shadow_tex, shadow_coord.xyz);
shadowMaskColor += vec3(shadowMapCmp);
return shadowMaskColor;
}
得出的结果和双线性纹理滤波差不多,因为它只是4个滤波核:
可以配合泊松盘采样多次,得到一个还不错的软阴影:
const vec2 poisson_disk_25[25] = vec2[](
vec2(-0.978698, -0.0884121),
vec2(-0.841121, 0.521165),
vec2(-0.71746, -0.50322),
vec2(-0.702933, 0.903134),
vec2(-0.663198, 0.15482),
vec2(-0.495102, -0.232887),
vec2(-0.364238, -0.961791),
vec2(-0.345866, -0.564379),
vec2(-0.325663, 0.64037),
vec2(-0.182714, 0.321329),
vec2(-0.142613, -0.0227363),
vec2(-0.0564287, -0.36729),
vec2(-0.0185858, 0.918882),
vec2(0.0381787, -0.728996),
vec2(0.16599, 0.093112),
vec2(0.253639, 0.719535),
vec2(0.369549, -0.655019),
vec2(0.423627, 0.429975),
vec2(0.530747, -0.364971),
vec2(0.566027, -0.940489),
vec2(0.639332, 0.0284127),
vec2(0.652089, 0.669668),
vec2(0.773797, 0.345012),
vec2(0.968871, 0.840449),
vec2(0.991882, -0.657338)
);
vec3 hardware_shadow_pcf(sampler2DShadow shadow_tex,float NoL,vec3 fragworldpos,vec2 screen_uv,float dilation)
{
vec3 shadowMaskColor = vec3(0.0, 0.0, 0.0);
ivec2 tex_dim = textureSize(shadow_tex, 0).xy;
float dx = 1.0 / float(tex_dim.x);
float dy = 1.0 / float(tex_dim.y);
vec2 texel_size = vec2(dx, dy);
vec4 shadow_clip_pos = ub_directional_light_vp.proj * ub_directional_light_vp.view * vec4(fragworldpos, 1.0);
vec4 shadow_coord = shadow_clip_pos / shadow_clip_pos.w;
shadow_coord.st = shadow_coord.st * 0.5f + 0.5f;
for (int x = 0; x < 25; x++)
{
vec2 offset_uv = texel_size * poisson_disk_25[x] * dilation;
float shadowMapCmp = texture(shadow_tex, vec3(shadow_coord.xy + offset_uv,shadow_coord.z));
shadowMaskColor += vec3(shadowMapCmp);
}
shadowMaskColor /= 25.0f;
return shadowMaskColor;
}
下图是25次采样的pcf软阴影,其中dilation = 1.0f:
微软则设计了一种类似双线性过滤可缩放滤波核的算法,9次采样就能得到很不错的效果, 效果如下:
vec3 hardware_shadow_pcf_microsoft(sampler2DShadow shadow_tex,float NoL,vec3 fragworldpos,vec2 screen_uv)
{
ivec2 tex_dim = textureSize(shadow_tex, 0).xy;
float dx = 1.0 / float(tex_dim.x);
float dy = 1.0 / float(tex_dim.y);
vec2 texel_size = vec2(dx, dy);
vec4 shadow_clip_pos = ub_directional_light_vp.proj * ub_directional_light_vp.view * vec4(fragworldpos, 1.0);
vec4 shadow_coord = shadow_clip_pos / shadow_clip_pos.w;
shadow_coord.st = shadow_coord.st * 0.5f + 0.5f;
// texel_size.x = texel_size.y = 2048
const float dilation = 1.0f;
float d1 = dilation * texel_size.x * 0.125;
float d2 = dilation * texel_size.x * 0.875;
float d3 = dilation * texel_size.x * 0.625;
float d4 = dilation * texel_size.x * 0.375;
// acne
shadow_coord.z -= 0.001f;
float result = (
2.0 * texture(shadow_tex,vec3(shadow_coord.xy,shadow_coord.z)) +
texture( shadow_tex, vec3(shadow_coord.xy + vec2(-d2, d1), shadow_coord.z )) +
texture( shadow_tex, vec3(shadow_coord.xy + vec2(-d1, -d2), shadow_coord.z )) +
texture( shadow_tex, vec3(shadow_coord.xy + vec2( d2, -d1), shadow_coord.z )) +
texture( shadow_tex, vec3(shadow_coord.xy + vec2( d1, d2), shadow_coord.z )) +
texture( shadow_tex, vec3(shadow_coord.xy + vec2(-d4, d3), shadow_coord.z )) +
texture( shadow_tex, vec3(shadow_coord.xy + vec2(-d3, -d4), shadow_coord.z )) +
texture( shadow_tex, vec3(shadow_coord.xy + vec2( d4, -d3), shadow_coord.z )) +
texture( shadow_tex, vec3(shadow_coord.xy + vec2( d3, d4), shadow_coord.z ))
) / 10.0f;
return vec3(result*result);
}
下图为9次采样的软阴影:
当然如果不考虑性能还可以结合PCSS一起使用:
float random(vec3 seed, int i)
{
vec4 seed4 = vec4(seed,i);
float dot_product = dot(seed4, vec4(12.9898,78.233,45.164,94.673));
return fract(sin(dot_product) * 43758.5453);
}
// pcss 查找遮挡物
vec2 pcss_search_blocker(float bias,vec4 shadow_coord,float NoL,float radius,sampler2DShadow shadowdepth_tex,vec2 texel_size)
{
float blocker_depth = 0.0;
float count = 0;
int flag = 0;
for (int x = 0; x < 32; x++)
{
int index = int( 32.0 * random(shadow_coord.xyy,x) ) % 32;
vec2 sample_uv = vec2(radius) * poisson_disk_32[index] * texel_size + shadow_coord.st;
float dist = texture(shadowdepth_tex, vec3(sample_uv,shadow_coord.z)).r;
if (shadow_coord.w > 0.0f && dist < shadow_coord.z - bias)
{
blocker_depth += dist;
count += 1.0;
flag = 1;
}
}
if(flag == 1)
{
return vec2(blocker_depth / count,float(flag));
}
return vec2(1.0f,0.0f);
}
// pcss软阴影
float shadow_pcss(vec4 sample_clip_pos,float NoL,sampler2DShadow shadowdepth_tex,float penumbra_width,float radius)
{
ivec2 tex_dim = textureSize(shadowdepth_tex, 0).xy;
float dx = 1.0 / float(tex_dim.x);
float dy = 1.0 / float(tex_dim.y);
vec2 texel_size = vec2(dx, dy);
float bias = max(0.05 * (1.0 - NoL), 0.005);
vec4 shadow_coord = sample_clip_pos / sample_clip_pos.w;
shadow_coord.st = shadow_coord.st * 0.5f + 0.5f;
vec2 ret = pcss_search_blocker(bias,shadow_coord, NoL, radius,shadowdepth_tex,texel_size);
float avg_blocker_depth = ret.x;
if(ret.y < 0.5f)
{
return 1.0f; // 提前退出节省非阴影区域的pcf消耗。
}
float penumbra_size = max(shadow_coord.z - avg_blocker_depth,0.1f) / avg_blocker_depth * penumbra_width;
return shadow_pcf(bias,shadow_coord, NoL, penumbra_size,shadowdepth_tex,texel_size);
}
画外音:
PCSS效果很好,但我以后都不会考虑了,因为实在是太慢了。