radeonsi: enable TGSI support cap for compute shaders
authorBas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Sat, 19 Mar 2016 14:16:50 +0000 (15:16 +0100)
committerBas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Tue, 19 Apr 2016 16:31:23 +0000 (18:31 +0200)
v2: Use chip_class instead of family.

v3: Check kernel version for SI.

v4: Preemptively allow amdgpu winsys for SI.

Signed-off-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
docs/GL3.txt
docs/relnotes/11.3.0.html
src/gallium/drivers/radeon/r600_pipe_common.c
src/gallium/drivers/radeonsi/si_pipe.c

index 3febd6e45f0435a5f7a2eda666f6868d1f628768..6214f8d307303b90cd6357c9e867f09ce865e1f3 100644 (file)
@@ -167,7 +167,7 @@ GL 4.3, GLSL 4.30:
   GL_ARB_arrays_of_arrays                               DONE (all drivers that support GLSL 1.30)
   GL_ARB_ES3_compatibility                              DONE (all drivers that support GLSL 3.30)
   GL_ARB_clear_buffer_object                            DONE (all drivers)
-  GL_ARB_compute_shader                                 DONE (i965)
+  GL_ARB_compute_shader                                 DONE (i965, radeonsi)
   GL_ARB_copy_image                                     DONE (i965, nv50, nvc0, r600, radeonsi)
   GL_KHR_debug                                          DONE (all drivers)
   GL_ARB_explicit_uniform_location                      DONE (all drivers that support GLSL)
@@ -225,7 +225,7 @@ GL 4.5, GLSL 4.50:
 These are the extensions cherry-picked to make GLES 3.1
 GLES3.1, GLSL ES 3.1
   GL_ARB_arrays_of_arrays                               DONE (all drivers that support GLSL 1.30)
-  GL_ARB_compute_shader                                 DONE (i965)
+  GL_ARB_compute_shader                                 DONE (i965, radeonsi)
   GL_ARB_draw_indirect                                  DONE (i965, nvc0, r600, radeonsi, llvmpipe, softpipe)
   GL_ARB_explicit_uniform_location                      DONE (all drivers that support GLSL)
   GL_ARB_framebuffer_no_attachments                     DONE (i965, nvc0, r600, radeonsi, softpipe)
index 0f9aed8fd1e26ecfd7d18c4fe35986683fd7d5ee..5a7083c5ba4287529e8fff76a1b70bb9b8da3804 100644 (file)
@@ -45,6 +45,7 @@ Note: some of the new features are only available with certain drivers.
 
 <ul>
 <li>OpenGL 4.2 on radeonsi</li>
+<li>GL_ARB_compute_shader on radeonsi</li>
 <li>GL_ARB_framebuffer_no_attachments on nvc0, r600, radeonsi, softpipe</li>
 <li>GL_ARB_internalformat_query2 on all drivers</li>
 <li>GL_ARB_robust_buffer_access_behavior on radeonsi</li>
index a8660f20c86287cbd8fd6f29d71fae5f9de2c56e..9ed6da6a82becae0d3e2fba8bb4025fef49e89bc 100644 (file)
@@ -646,23 +646,34 @@ static int r600_get_compute_param(struct pipe_screen *screen,
                        uint64_t *grid_size = ret;
                        grid_size[0] = 65535;
                        grid_size[1] = 65535;
-                       grid_size[2] = 1;
+                       grid_size[2] = 65535;
                }
                return 3 * sizeof(uint64_t) ;
 
        case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
                if (ret) {
                        uint64_t *block_size = ret;
-                       block_size[0] = 256;
-                       block_size[1] = 256;
-                       block_size[2] = 256;
+                       if (rscreen->chip_class >= SI && HAVE_LLVM >= 0x309 &&
+                           ir_type == PIPE_SHADER_IR_TGSI) {
+                               block_size[0] = 2048;
+                               block_size[1] = 2048;
+                               block_size[2] = 2048;
+                       } else {
+                               block_size[0] = 256;
+                               block_size[1] = 256;
+                               block_size[2] = 256;
+                       }
                }
                return 3 * sizeof(uint64_t);
 
        case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
                if (ret) {
                        uint64_t *max_threads_per_block = ret;
-                       *max_threads_per_block = 256;
+                       if (rscreen->chip_class >= SI && HAVE_LLVM >= 0x309 &&
+                           ir_type == PIPE_SHADER_IR_TGSI)
+                               *max_threads_per_block = 2048;
+                       else
+                               *max_threads_per_block = 256;
                }
                return sizeof(uint64_t);
 
index dabd28a4bc223b0f07c4fdd97f198b7686a2a1cb..17d59b60d06eba884d223f26a8cb2777a59ffd18 100644 (file)
@@ -473,6 +473,8 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
 
 static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enum pipe_shader_cap param)
 {
+       struct si_screen *sscreen = (struct si_screen *)pscreen;
+
        switch(shader)
        {
        case PIPE_SHADER_FRAGMENT:
@@ -490,9 +492,19 @@ static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu
                case PIPE_SHADER_CAP_PREFERRED_IR:
                        return PIPE_SHADER_IR_NATIVE;
 
-               case PIPE_SHADER_CAP_SUPPORTED_IRS:
-                       return 0;
+               case PIPE_SHADER_CAP_SUPPORTED_IRS: {
+                       int ir = 1 << PIPE_SHADER_IR_NATIVE;
 
+                       /* Old kernels disallowed some register writes for SI
+                        * that are used for indirect dispatches. */
+                       if (HAVE_LLVM >= 0x309 && (sscreen->b.chip_class >= CIK ||
+                                                  sscreen->b.info.drm_major == 3 ||
+                                                  (sscreen->b.info.drm_major == 2 &&
+                                                   sscreen->b.info.drm_minor >= 45)))
+                               ir |= 1 << PIPE_SHADER_IR_TGSI;
+
+                       return ir;
+               }
                case PIPE_SHADER_CAP_DOUBLES:
                        return HAVE_LLVM >= 0x0307;