radv: export SampleMask from pixel shaders at full rate
authorSamuel Pitoiset <samuel.pitoiset@gmail.com>
Thu, 14 Dec 2017 12:51:47 +0000 (13:51 +0100)
committerSamuel Pitoiset <samuel.pitoiset@gmail.com>
Thu, 14 Dec 2017 21:23:28 +0000 (22:23 +0100)
Use 16_ABGR instead of 32_ABGR if Z isn't written.

Ported from RadeonSI.

No CTS regressions on Polaris.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
src/amd/common/ac_nir_to_llvm.c
src/amd/vulkan/radv_pipeline.c

index 2fe346b012e06b2a809b222bf975e57ef2beeab2..63803f146444d67a44417b5328b380c90a9b0ec5 100644 (file)
@@ -32,6 +32,7 @@
 #include <llvm-c/Transforms/Scalar.h>
 #include "ac_shader_abi.h"
 #include "ac_shader_info.h"
+#include "ac_shader_util.h"
 #include "ac_exp_param.h"
 
 enum radeon_llvm_calling_convention {
@@ -6211,19 +6212,42 @@ si_export_mrt_z(struct nir_to_llvm_context *ctx,
        args.out[2] = LLVMGetUndef(ctx->ac.f32); /* B, sample mask */
        args.out[3] = LLVMGetUndef(ctx->ac.f32); /* A, alpha to mask */
 
-       if (depth) {
-               args.out[0] = depth;
-               args.enabled_channels |= 0x1;
-       }
+       unsigned format = ac_get_spi_shader_z_format(depth != NULL,
+                                                    stencil != NULL,
+                                                    samplemask != NULL);
+
+       if (format == V_028710_SPI_SHADER_UINT16_ABGR) {
+               assert(!depth);
+               args.compr = 1; /* COMPR flag */
+
+               if (stencil) {
+                       /* Stencil should be in X[23:16]. */
+                       stencil = ac_to_integer(&ctx->ac, stencil);
+                       stencil = LLVMBuildShl(ctx->builder, stencil,
+                                              LLVMConstInt(ctx->ac.i32, 16, 0), "");
+                       args.out[0] = ac_to_float(&ctx->ac, stencil);
+                       args.enabled_channels |= 0x3;
+               }
+               if (samplemask) {
+                       /* SampleMask should be in Y[15:0]. */
+                       args.out[1] = samplemask;
+                       args.enabled_channels |= 0xc;
+               }
+       } else {
+               if (depth) {
+                       args.out[0] = depth;
+                       args.enabled_channels |= 0x1;
+               }
 
-       if (stencil) {
-               args.out[1] = stencil;
-               args.enabled_channels |= 0x2;
-       }
+               if (stencil) {
+                       args.out[1] = stencil;
+                       args.enabled_channels |= 0x2;
+               }
 
-       if (samplemask) {
-               args.out[2] = samplemask;
-               args.enabled_channels |= 0x4;
+               if (samplemask) {
+                       args.out[2] = samplemask;
+                       args.enabled_channels |= 0x4;
+               }
        }
 
        /* SI (except OLAND and HAINAN) has a bug that it only looks
index 0146d6935e00f47b87b36143f0b52a783387d5c0..1ada69d92f8274f7dc555fb1fdb57e893bba8ca6 100644 (file)
@@ -46,6 +46,7 @@
 #include "vk_format.h"
 #include "util/debug.h"
 #include "ac_exp_param.h"
+#include "ac_shader_util.h"
 
 static void
 radv_pipeline_destroy(struct radv_device *device,
@@ -2108,11 +2109,11 @@ radv_pipeline_init(struct radv_pipeline *pipeline,
        if (pipeline->device->physical_device->has_rbplus)
                pipeline->graphics.db_shader_control |= S_02880C_DUAL_QUAD_DISABLE(1);
 
-       pipeline->graphics.shader_z_format =
-               ps->info.fs.writes_sample_mask ? V_028710_SPI_SHADER_32_ABGR :
-               ps->info.fs.writes_stencil ? V_028710_SPI_SHADER_32_GR :
-               ps->info.fs.writes_z ? V_028710_SPI_SHADER_32_R :
-               V_028710_SPI_SHADER_ZERO;
+       unsigned shader_z_format =
+               ac_get_spi_shader_z_format(ps->info.fs.writes_z,
+                                          ps->info.fs.writes_stencil,
+                                          ps->info.fs.writes_sample_mask);
+       pipeline->graphics.shader_z_format = shader_z_format;
 
        calculate_vgt_gs_mode(pipeline);
        calculate_vs_outinfo(pipeline);