Vulkan中的实时软阴影与硬件优化

Image

ShadowMap算法需要解决阴影失真(Shadow Acne)阴影抗锯齿问题,前者可以通过 shadow bias 解决,后者可以使用自定义滤波器如pcf、vsm、esm、evsm等解决,而 shadow bias和pcf滤波,vulkan都提供了对应的硬件能力来加速计算。

对于shadow bias,如果不使用硬件方法,而在shader中计算偏移量的话,常用的计算方法(learnopengl给出来的)是:

float bias = max(0.05f * (1.0 - dot(normal, lightDir)), 0.005f);

当然,渲染时把Cull Mode改为Cull Front,可以进一步弥补深度的比较精度。

Vulkan光栅化器内置了一个Pipeline State:depthBiasState,开启后会将光栅化得到的深度值偏移一段距离。

可以配置的参数有depthBiasSlopeFactor、depthBiasConstantFactor以及depthBiasClamp,偏移的计算公式如下:

Image

更详细的介绍见spec.

  1. depthBiasSlopeFactor:根据几何体表面梯度大小来缩放的因子。
  2. depthBiasConstantFactor: 恒定偏移量。
  3. depthBiasClamp:偏移量钳制值。

开启步骤:

  1. 填写VkPipelineRasterizationStateCreateInfo时把depthBiasEnable设为VK_TRUE。
  2. (可选)填写VkPipelineRasterizationStateCreateInfo时把cullMode设为VK_CULL_MODE_FRONT_BIT。
  3. (可选)填写VkPipelineDynamicStateCreateInfo时把VK_DYNAMIC_STATE_DEPTH_BIAS也加进去。
  4. 如果选用了第三步,则在VkCommandBuffer录制时使用vkCmdSetDepthBias设置动态的深度偏移值,否则直接在创建Pipeline时把这个值填好。

整体的代码流程如下:

// 1. ShadowMap深度偏移开启
pipe_info.rasterization_state.depthBiasEnable = VK_TRUE;

// ...

// 2. ShadowMap Cull Front
pipe_info.rasterization_state.cullMode = VK_CULL_MODE_FRONT_BIT;
pipe_info.depth_stencil_state.depthCompareOp = VK_COMPARE_OP_LESS_OR_EQUAL;

// ...

// 3. 添加VK_DYNAMIC_STATE_DEPTH_BIAS动态阶段
std::vector<VkDynamicState> dynamicStateEnables = {
	VK_DYNAMIC_STATE_VIEWPORT,
	VK_DYNAMIC_STATE_SCISSOR,
	VK_DYNAMIC_STATE_DEPTH_BIAS
};

// ...

// 4. 绘制记录阶段
pass_shadowdepth->cmd_buf->begin(VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT);
{
    // ...
    vkCmdSetViewport(*pass_shadowdepth->cmd_buf, 0, 1, &viewport);
	vkCmdSetScissor(*pass_shadowdepth->cmd_buf, 0, 1, &scissor);
    
    static const float depthBiasConstant = 1.25f;
	static const float depthBiasSlope = 1.75f;

	vkCmdSetDepthBias(
		*pass_shadowdepth->cmd_buf,
		depthBiasConstant,
		0.0f,
		depthBiasSlope
	);
    
    // ...
    draw();
}
pass_shadowdepth->cmd_buf->end();

这样,便无需再在着色器中计算一次偏移了。

Image

接下来是硬件PCF,更准确的来说是双线性的深度比较纹理滤波。

开启步骤如下:

  1. shadowdepth纹理的采样方式设为Bilinear。
  2. compareEnable设为True。
  3. compare_op设为VK_COMPARE_OP_LESS。
  4. shader中采样器设为sampler2DShadow。
  5. 采样时把ShadowCoord的xyz都传入。

代码流程如下:

// 1. cpp
VkSamplerCreateInfo sampler_info{};
sampler_info.sType                   = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO;
sampler_info.magFilter               = VK_FILTER_LINEAR;
sampler_info.minFilter               = VK_FILTER_LINEAR;
sampler_info.compareOp               = VK_COMPARE_OP_LESS;
sampler_info.compareEnable           = VK_TRUE;
// 边界情况优化
sampler_info.address_mode_U = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER; 
sampler_info.address_mode_V = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER; 
sampler_info.address_mode_W = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER;
sampler_info.bordercolor = VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE;

// 2. shader...
// layout (binding = 5) uniform sampler2D directional_light_shadowdepth;
layout (binding = 5) uniform sampler2DShadow directional_light_shadowdepth;
vec3 hardware_shadow_pcf(sampler2DShadow shadow_tex,float NoL,vec3 fragworldpos,vec2 screen_uv)
{
	vec3 shadowMaskColor = vec3(0.0, 0.0, 0.0);
	vec4 shadow_clip_pos = ub_directional_light_vp.proj * ub_directional_light_vp.view * vec4(fragworldpos, 1.0);
	vec4 shadow_coord = shadow_clip_pos / shadow_clip_pos.w;
	shadow_coord.st = shadow_coord.st * 0.5f + 0.5f;

	float shadowMapCmp = texture(shadow_tex, shadow_coord.xyz);
	shadowMaskColor += vec3(shadowMapCmp);
	return shadowMaskColor;
}

得出的结果和双线性纹理滤波差不多,因为它只是4个滤波核:

Image

可以配合泊松盘采样多次,得到一个还不错的软阴影:

const vec2 poisson_disk_25[25] = vec2[](
    vec2(-0.978698, -0.0884121),
    vec2(-0.841121, 0.521165),
    vec2(-0.71746, -0.50322),
    vec2(-0.702933, 0.903134),
    vec2(-0.663198, 0.15482),
    vec2(-0.495102, -0.232887),
    vec2(-0.364238, -0.961791),
    vec2(-0.345866, -0.564379),
    vec2(-0.325663, 0.64037),
    vec2(-0.182714, 0.321329),
    vec2(-0.142613, -0.0227363),
    vec2(-0.0564287, -0.36729),
    vec2(-0.0185858, 0.918882),
    vec2(0.0381787, -0.728996),
    vec2(0.16599, 0.093112),
    vec2(0.253639, 0.719535),
    vec2(0.369549, -0.655019),
    vec2(0.423627, 0.429975),
    vec2(0.530747, -0.364971),
    vec2(0.566027, -0.940489),
    vec2(0.639332, 0.0284127),
    vec2(0.652089, 0.669668),
    vec2(0.773797, 0.345012),
    vec2(0.968871, 0.840449),
    vec2(0.991882, -0.657338)
);

vec3 hardware_shadow_pcf(sampler2DShadow shadow_tex,float NoL,vec3 fragworldpos,vec2 screen_uv,float dilation)
{
	vec3 shadowMaskColor = vec3(0.0, 0.0, 0.0);
	ivec2 tex_dim = textureSize(shadow_tex, 0).xy;
	float dx = 1.0 / float(tex_dim.x);
	float dy = 1.0 / float(tex_dim.y);
	vec2 texel_size = vec2(dx, dy);
	vec4 shadow_clip_pos = ub_directional_light_vp.proj * ub_directional_light_vp.view * vec4(fragworldpos, 1.0);
	vec4 shadow_coord = shadow_clip_pos / shadow_clip_pos.w;
	shadow_coord.st = shadow_coord.st * 0.5f + 0.5f;

	for (int x = 0; x < 25; x++)
	{
		vec2 offset_uv = texel_size * poisson_disk_25[x]  * dilation;
		float shadowMapCmp = texture(shadow_tex, vec3(shadow_coord.xy + offset_uv,shadow_coord.z));
		shadowMaskColor += vec3(shadowMapCmp);
	}

	shadowMaskColor /= 25.0f;

	return shadowMaskColor;
}

下图是25次采样的pcf软阴影,其中dilation = 1.0f:

Image

微软则设计了一种类似双线性过滤可缩放滤波核的算法,9次采样就能得到很不错的效果, 效果如下:

Image

vec3 hardware_shadow_pcf_microsoft(sampler2DShadow shadow_tex,float NoL,vec3 fragworldpos,vec2 screen_uv)
{
	ivec2 tex_dim = textureSize(shadow_tex, 0).xy;
	float dx = 1.0 / float(tex_dim.x);
	float dy = 1.0 / float(tex_dim.y);
	vec2 texel_size = vec2(dx, dy);
	vec4 shadow_clip_pos = ub_directional_light_vp.proj * ub_directional_light_vp.view * vec4(fragworldpos, 1.0);
	vec4 shadow_coord = shadow_clip_pos / shadow_clip_pos.w;
	shadow_coord.st = shadow_coord.st * 0.5f + 0.5f;

	// texel_size.x = texel_size.y = 2048
	const float dilation = 1.0f;
	float d1 = dilation * texel_size.x * 0.125;
    float d2 = dilation * texel_size.x * 0.875;
    float d3 = dilation * texel_size.x * 0.625;
    float d4 = dilation * texel_size.x * 0.375;
    
    // acne
    shadow_coord.z -= 0.001f;

    float result = (
        2.0 * texture(shadow_tex,vec3(shadow_coord.xy,shadow_coord.z)) +
        texture( shadow_tex, vec3(shadow_coord.xy + vec2(-d2,  d1), shadow_coord.z )) +
        texture( shadow_tex, vec3(shadow_coord.xy + vec2(-d1, -d2), shadow_coord.z )) +
        texture( shadow_tex, vec3(shadow_coord.xy + vec2( d2, -d1), shadow_coord.z )) +
        texture( shadow_tex, vec3(shadow_coord.xy + vec2( d1,  d2), shadow_coord.z )) +
        texture( shadow_tex, vec3(shadow_coord.xy + vec2(-d4,  d3), shadow_coord.z )) +
        texture( shadow_tex, vec3(shadow_coord.xy + vec2(-d3, -d4), shadow_coord.z )) +
        texture( shadow_tex, vec3(shadow_coord.xy + vec2( d4, -d3), shadow_coord.z )) +
        texture( shadow_tex, vec3(shadow_coord.xy + vec2( d3,  d4), shadow_coord.z ))
    ) / 10.0f;

	return vec3(result*result);
}

下图为9次采样的软阴影:

Image

当然如果不考虑性能还可以结合PCSS一起使用:

Image

float random(vec3 seed, int i)
{
    vec4 seed4 = vec4(seed,i);
    float dot_product = dot(seed4, vec4(12.9898,78.233,45.164,94.673));
    return fract(sin(dot_product) * 43758.5453);
}

// pcss 查找遮挡物
vec2 pcss_search_blocker(float bias,vec4 shadow_coord,float NoL,float radius,sampler2DShadow shadowdepth_tex,vec2 texel_size) 
{
	float blocker_depth = 0.0;
	float count = 0;
	int flag = 0;

	for (int x = 0; x < 32; x++)
	{
		int index = int( 32.0 * random(shadow_coord.xyy,x) ) % 32;
		vec2 sample_uv = vec2(radius) * poisson_disk_32[index] * texel_size + shadow_coord.st;
		float dist = texture(shadowdepth_tex, vec3(sample_uv,shadow_coord.z)).r;

		if (shadow_coord.w > 0.0f && dist < shadow_coord.z - bias) 
		{
			blocker_depth += dist;
			count += 1.0;
			flag = 1;
		}
	}

	if(flag == 1)
	{
		return vec2(blocker_depth / count,float(flag));
	}

	return vec2(1.0f,0.0f);
}

// pcss软阴影
float shadow_pcss(vec4 sample_clip_pos,float NoL,sampler2DShadow shadowdepth_tex,float penumbra_width,float radius)
{
    ivec2 tex_dim = textureSize(shadowdepth_tex, 0).xy;
	float dx = 1.0 / float(tex_dim.x);
	float dy = 1.0 / float(tex_dim.y);
	vec2 texel_size = vec2(dx, dy);
	float bias = max(0.05 * (1.0 - NoL), 0.005);

	vec4 shadow_coord = sample_clip_pos / sample_clip_pos.w;
	shadow_coord.st = shadow_coord.st * 0.5f + 0.5f;

	vec2 ret = pcss_search_blocker(bias,shadow_coord, NoL, radius,shadowdepth_tex,texel_size);
	float avg_blocker_depth = ret.x;
	if(ret.y < 0.5f)
	{
		return 1.0f; // 提前退出节省非阴影区域的pcf消耗。
	}

	float penumbra_size = max(shadow_coord.z - avg_blocker_depth,0.1f) / avg_blocker_depth * penumbra_width;
	return shadow_pcf(bias,shadow_coord, NoL, penumbra_size,shadowdepth_tex,texel_size);
}

画外音:

PCSS效果很好,但我以后都不会考虑了,因为实在是太慢了。

© - 2024 · 月光下的旅行。 禁止转载