i965/compute: Skip SIMD8 generation if it can't be used
authorJordan Justen <jordan.l.justen@intel.com>
Mon, 22 Feb 2016 18:42:07 +0000 (10:42 -0800)
committerJordan Justen <jordan.l.justen@intel.com>
Tue, 8 Mar 2016 22:27:18 +0000 (14:27 -0800)
If the local workgroup size is sufficiently large, then the SIMD8
program can't be used. In this case we can skip generating the SIMD8
program. For complex programs this can save a significant amount of
time.

Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
src/mesa/drivers/dri/i965/brw_fs.cpp

index 27f2123b7a0fa74595c4c83a79e9a731bdd1a359..b5404e1df5089df66bb8ab84a48751d2e3602683 100644 (file)
@@ -1933,8 +1933,8 @@ fs_visitor::compact_virtual_grfs()
 void
 fs_visitor::assign_constant_locations()
 {
-   /* Only the first compile (SIMD8 mode) gets to decide on locations. */
-   if (dispatch_width != 8)
+   /* Only the first compile gets to decide on locations. */
+   if (dispatch_width != min_dispatch_width)
       return;
 
    unsigned int num_pull_constants = 0;
@@ -5733,6 +5733,7 @@ brw_compile_cs(const struct brw_compiler *compiler, void *log_data,
       shader->info.cs.local_size[2];
 
    unsigned max_cs_threads = compiler->devinfo->max_cs_threads;
+   unsigned simd_required = DIV_ROUND_UP(local_workgroup_size, max_cs_threads);
 
    cfg_t *cfg = NULL;
    const char *fail_msg = NULL;
@@ -5742,11 +5743,13 @@ brw_compile_cs(const struct brw_compiler *compiler, void *log_data,
    fs_visitor v8(compiler, log_data, mem_ctx, key, &prog_data->base,
                  NULL, /* Never used in core profile */
                  shader, 8, shader_time_index);
-   if (!v8.run_cs()) {
-      fail_msg = v8.fail_msg;
-   } else if (local_workgroup_size <= 8 * max_cs_threads) {
-      cfg = v8.cfg;
-      prog_data->simd_size = 8;
+   if (simd_required <= 8) {
+      if (!v8.run_cs()) {
+         fail_msg = v8.fail_msg;
+      } else {
+         cfg = v8.cfg;
+         prog_data->simd_size = 8;
+      }
    }
 
    fs_visitor v16(compiler, log_data, mem_ctx, key, &prog_data->base,
@@ -5756,7 +5759,8 @@ brw_compile_cs(const struct brw_compiler *compiler, void *log_data,
        !fail_msg && !v8.simd16_unsupported &&
        local_workgroup_size <= 16 * max_cs_threads) {
       /* Try a SIMD16 compile */
-      v16.import_uniforms(&v8);
+      if (simd_required <= 8)
+         v16.import_uniforms(&v8);
       if (!v16.run_cs()) {
          compiler->shader_perf_log(log_data,
                                    "SIMD16 shader failed to compile: %s",