radeonsi: export SampleMask from pixel shaders at full rate
authorMarek Olšák <marek.olsak@amd.com>
Fri, 9 Sep 2016 23:21:11 +0000 (01:21 +0200)
committerMarek Olšák <marek.olsak@amd.com>
Tue, 13 Sep 2016 18:38:25 +0000 (20:38 +0200)
Heaven and Valley write gl_SampleMask and not Z.
Use 16_ABGR instead of 32_ABGR if Z isn't written.

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
src/gallium/drivers/radeonsi/si_shader.c
src/gallium/drivers/radeonsi/si_shader.h
src/gallium/drivers/radeonsi/si_state_shaders.c

index 68017221391d1bd5b64815ac9eb6bb26fef6f43c..6d30d1c9eb30c429e6a0950fd24d478c5826b099 100644 (file)
@@ -2928,6 +2928,25 @@ struct si_ps_exports {
        LLVMValueRef args[10][9];
 };
 
+unsigned si_get_spi_shader_z_format(bool writes_z, bool writes_stencil,
+                                   bool writes_samplemask)
+{
+       if (writes_z) {
+               /* Z needs 32 bits. */
+               if (writes_samplemask)
+                       return V_028710_SPI_SHADER_32_ABGR;
+               else if (writes_stencil)
+                       return V_028710_SPI_SHADER_32_GR;
+               else
+                       return V_028710_SPI_SHADER_32_R;
+       } else if (writes_stencil || writes_samplemask) {
+               /* Both stencil and sample mask need only 16 bits. */
+               return V_028710_SPI_SHADER_UINT16_ABGR;
+       } else {
+               return V_028710_SPI_SHADER_ZERO;
+       }
+}
+
 static void si_export_mrt_z(struct lp_build_tgsi_context *bld_base,
                            LLVMValueRef depth, LLVMValueRef stencil,
                            LLVMValueRef samplemask, struct si_ps_exports *exp)
@@ -2937,6 +2956,9 @@ static void si_export_mrt_z(struct lp_build_tgsi_context *bld_base,
        struct lp_build_context *uint = &bld_base->uint_bld;
        LLVMValueRef args[9];
        unsigned mask = 0;
+       unsigned format = si_get_spi_shader_z_format(depth != NULL,
+                                                    stencil != NULL,
+                                                    samplemask != NULL);
 
        assert(depth || stencil || samplemask);
 
@@ -2952,19 +2974,36 @@ static void si_export_mrt_z(struct lp_build_tgsi_context *bld_base,
        args[7] = base->undef; /* B, sample mask */
        args[8] = base->undef; /* A, alpha to mask */
 
-       if (depth) {
-               args[5] = depth;
-               mask |= 0x1;
-       }
-
-       if (stencil) {
-               args[6] = stencil;
-               mask |= 0x2;
-       }
+       if (format == V_028710_SPI_SHADER_UINT16_ABGR) {
+               assert(!depth);
+               args[4] = uint->one; /* COMPR flag */
 
-       if (samplemask) {
-               args[7] = samplemask;
-               mask |= 0x4;
+               if (stencil) {
+                       /* Stencil should be in X[23:16]. */
+                       stencil = bitcast(bld_base, TGSI_TYPE_UNSIGNED, stencil);
+                       stencil = LLVMBuildShl(base->gallivm->builder, stencil,
+                                              LLVMConstInt(ctx->i32, 16, 0), "");
+                       args[5] = bitcast(bld_base, TGSI_TYPE_FLOAT, stencil);
+                       mask |= 0x3;
+               }
+               if (samplemask) {
+                       /* SampleMask should be in Y[15:0]. */
+                       args[6] = samplemask;
+                       mask |= 0xc;
+               }
+       } else {
+               if (depth) {
+                       args[5] = depth;
+                       mask |= 0x1;
+               }
+               if (stencil) {
+                       args[6] = stencil;
+                       mask |= 0x2;
+               }
+               if (samplemask) {
+                       args[7] = samplemask;
+                       mask |= 0x4;
+               }
        }
 
        /* SI (except OLAND) has a bug that it only looks
index fc1b22d7383b8caf453655dec963503261201016..de4705dd513a412fb05eae44e59527fc926ef61a 100644 (file)
@@ -518,5 +518,7 @@ void si_shader_apply_scratch_relocs(struct si_context *sctx,
 void si_shader_binary_read_config(struct radeon_shader_binary *binary,
                                  struct si_shader_config *conf,
                                  unsigned symbol_offset);
+unsigned si_get_spi_shader_z_format(bool writes_z, bool writes_stencil,
+                                   bool writes_samplemask);
 
 #endif
index b4f19fea2d52de73752d826fc7339fafcb03d25c..816aadcc4ff3375633c5f3842e22fa9d469c0dab 100644 (file)
@@ -748,10 +748,9 @@ static void si_shader_ps(struct si_shader *shader)
        si_pm4_set_reg(pm4, R_0286D8_SPI_PS_IN_CONTROL, spi_ps_in_control);
 
        si_pm4_set_reg(pm4, R_028710_SPI_SHADER_Z_FORMAT,
-                      info->writes_samplemask ? V_028710_SPI_SHADER_32_ABGR :
-                      info->writes_stencil ? V_028710_SPI_SHADER_32_GR :
-                      info->writes_z ? V_028710_SPI_SHADER_32_R :
-                      V_028710_SPI_SHADER_ZERO);
+                      si_get_spi_shader_z_format(info->writes_z,
+                                                 info->writes_stencil,
+                                                 info->writes_samplemask));
 
        si_pm4_set_reg(pm4, R_028714_SPI_SHADER_COL_FORMAT, spi_shader_col_format);
        si_pm4_set_reg(pm4, R_02823C_CB_SHADER_MASK, cb_shader_mask);