radeonsi/compute: Fix LDS size calculation
authorTom Stellard <thomas.stellard@amd.com>
Thu, 22 Aug 2013 15:22:58 +0000 (11:22 -0400)
committerTom Stellard <thomas.stellard@amd.com>
Fri, 22 Nov 2013 00:14:58 +0000 (16:14 -0800)
We need to include the number of LDS bytes allocated by the state tracker.

CC: "10.0" <mesa-stable@lists.freedesktop.org>
src/gallium/drivers/radeonsi/radeonsi_compute.c

index 5df972fb20f23cb4d633bc3412c7f66f3968d2c3..2d53f2d9864d85f6790c4d4c02a730c607bc7d6e 100644 (file)
@@ -103,6 +103,7 @@ static void radeonsi_launch_grid(
        unsigned arg_user_sgpr_count = 2;
        unsigned i;
        struct si_pipe_shader *shader = &program->kernels[pc];
+       unsigned lds_blocks;
 
        pm4->compute_pkt = true;
        si_cmd_context_control(pm4);
@@ -194,6 +195,20 @@ static void radeonsi_launch_grid(
                                        shader->num_sgprs)) - 1) / 8))
                ;
 
+       lds_blocks = shader->lds_size;
+       /* XXX: We are over allocating LDS.  For SI, the shader reports LDS in
+        * blocks of 256 bytes, so if there are 4 bytes lds allocated in
+        * the shader and 4 bytes allocated by the state tracker, then
+        * we will set LDS_SIZE to 512 bytes rather than 256.
+        */
+       if (rctx->b.chip_class <= SI) {
+               lds_blocks += align(program->local_size, 256) >> 8;
+       } else {
+               lds_blocks += align(program->local_size, 512) >> 9;
+       }
+
+       assert(lds_blocks <= 0xFF);
+
        si_pm4_set_reg(pm4, R_00B84C_COMPUTE_PGM_RSRC2,
                S_00B84C_SCRATCH_EN(0)
                | S_00B84C_USER_SGPR(arg_user_sgpr_count)
@@ -202,7 +217,7 @@ static void radeonsi_launch_grid(
                | S_00B84C_TGID_Z_EN(1)
                | S_00B84C_TG_SIZE_EN(1)
                | S_00B84C_TIDIG_COMP_CNT(2)
-               | S_00B84C_LDS_SIZE(shader->lds_size)
+               | S_00B84C_LDS_SIZE(lds_blocks)
                | S_00B84C_EXCP_EN(0))
                ;
        si_pm4_set_reg(pm4, R_00B854_COMPUTE_RESOURCE_LIMITS, 0);