iris: Enable nir_opt_large_constants
authorJason Ekstrand <jason@jlekstrand.net>
Tue, 28 May 2019 22:33:58 +0000 (17:33 -0500)
committerJason Ekstrand <jason@jlekstrand.net>
Wed, 29 May 2019 21:09:16 +0000 (21:09 +0000)
Shader-db results on Kaby Lake:

    total instructions in shared programs: 15306230 -> 15304726 (<.01%)
    instructions in affected programs: 4570 -> 3066 (-32.91%)
    helped: 16
    HURT: 0

    total cycles in shared programs: 361703436 -> 361680041 (<.01%)
    cycles in affected programs: 129388 -> 105993 (-18.08%)
    helped: 16
    HURT: 0

    LOST:   0
    GAINED: 2

The helped programs were in XCom 2, Deus Ex: Mankind Divided, and Kerbal
Space Program

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
src/gallium/drivers/iris/iris_context.h
src/gallium/drivers/iris/iris_program.c
src/gallium/drivers/iris/iris_screen.c
src/gallium/drivers/iris/iris_state.c

index 3009c8d5627f2c172dc305e7cbbb3f5e508b50f4..d54f348debe59329d6d375d84e97850e706a2d09 100644 (file)
@@ -273,6 +273,12 @@ struct iris_uncompiled_shader {
 
    /** Should we use ALT mode for math?  Useful for ARB programs. */
    bool use_alt_mode;
+
+   /** Constant data scraped from the shader by nir_opt_large_constants */
+   struct pipe_resource *const_data;
+
+   /** Surface state for const_data */
+   struct iris_state_ref const_data_state;
 };
 
 /**
index e0a6a5c1edd5a13a32a647e0e066007b6261d9ee..87b887aa3b6fe881f9544125aa508e7d127d2c2a 100644 (file)
@@ -273,6 +273,14 @@ assign_common_binding_table_offsets(const struct gen_device_info *devinfo,
       prog_data->binding_table.image_start = 0xd0d0d0d0;
    }
 
+   /* Allocate a slot in the UBO section for NIR constants if present.
+    * We don't include them in iris_compiled_shader::num_cbufs because
+    * they are uploaded separately from shs->constbuf[], but from a shader
+    * point of view, they're another UBO (at the end of the section).
+    */
+   if (nir->constant_data_size > 0)
+      num_cbufs++;
+
    if (num_cbufs) {
       //assert(info->num_ubos <= BRW_MAX_UBO);
       prog_data->binding_table.ubo_start = next_binding_table_offset;
@@ -361,6 +369,7 @@ iris_setup_uniforms(const struct brw_compiler *compiler,
 
    b.cursor = nir_before_block(nir_start_block(impl));
    nir_ssa_def *temp_ubo_name = nir_ssa_undef(&b, 1, 32);
+   nir_ssa_def *temp_const_ubo_name = NULL;
 
    /* Turn system value intrinsics into uniforms */
    nir_foreach_block(block, impl) {
@@ -372,6 +381,34 @@ iris_setup_uniforms(const struct brw_compiler *compiler,
          nir_ssa_def *offset;
 
          switch (intrin->intrinsic) {
+         case nir_intrinsic_load_constant: {
+            /* This one is special because it reads from the shader constant
+             * data and not cbuf0 which gallium uploads for us.
+             */
+            b.cursor = nir_before_instr(instr);
+            nir_ssa_def *offset =
+               nir_iadd_imm(&b, nir_ssa_for_src(&b, intrin->src[0], 1),
+                                nir_intrinsic_base(intrin));
+
+            if (temp_const_ubo_name == NULL)
+               temp_const_ubo_name = nir_imm_int(&b, 0);
+
+            nir_intrinsic_instr *load_ubo =
+               nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_ubo);
+            load_ubo->num_components = intrin->num_components;
+            load_ubo->src[0] = nir_src_for_ssa(temp_const_ubo_name);
+            load_ubo->src[1] = nir_src_for_ssa(offset);
+            nir_ssa_dest_init(&load_ubo->instr, &load_ubo->dest,
+                              intrin->dest.ssa.num_components,
+                              intrin->dest.ssa.bit_size,
+                              intrin->dest.ssa.name);
+            nir_builder_instr_insert(&b, &load_ubo->instr);
+
+            nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
+                                     nir_src_for_ssa(&load_ubo->dest.ssa));
+            nir_instr_remove(&intrin->instr);
+            continue;
+         }
          case nir_intrinsic_load_user_clip_plane: {
             unsigned ucp = nir_intrinsic_ucp_id(intrin);
 
@@ -529,6 +566,16 @@ iris_setup_uniforms(const struct brw_compiler *compiler,
    if (num_cbufs || num_system_values || nir->num_uniforms)
       num_cbufs++;
 
+   /* Constant loads (if any) need to go at the end of the constant buffers so
+    * we need to know num_cbufs before we can lower to them.
+    */
+   if (temp_const_ubo_name != NULL) {
+      nir_load_const_instr *const_ubo_index =
+         nir_instr_as_load_const(temp_const_ubo_name->parent_instr);
+      assert(const_ubo_index->def.bit_size == 32);
+      const_ubo_index->value[0].u32 = num_cbufs;
+   }
+
    *out_system_values = system_values;
    *out_num_system_values = num_system_values;
    *out_num_cbufs = num_cbufs;
@@ -1514,6 +1561,7 @@ iris_create_uncompiled_shader(struct pipe_context *ctx,
                               nir_shader *nir,
                               const struct pipe_stream_output_info *so_info)
 {
+   struct iris_context *ice = (void *)ctx;
    struct iris_screen *screen = (struct iris_screen *)ctx->screen;
    const struct gen_device_info *devinfo = &screen->devinfo;
 
@@ -1527,6 +1575,19 @@ iris_create_uncompiled_shader(struct pipe_context *ctx,
    NIR_PASS_V(nir, brw_nir_lower_image_load_store, devinfo);
    NIR_PASS_V(nir, iris_lower_storage_image_derefs);
 
+   if (nir->constant_data_size > 0) {
+      unsigned data_offset;
+      u_upload_data(ice->shaders.uploader, 0, nir->constant_data_size,
+                    32, nir->constant_data, &data_offset, &ish->const_data);
+
+      struct pipe_shader_buffer psb = {
+         .buffer = ish->const_data,
+         .buffer_offset = data_offset,
+         .buffer_size = nir->constant_data_size,
+      };
+      iris_upload_ubo_ssbo_surf_state(ice, &psb, &ish->const_data_state, false);
+   }
+
    ish->program_id = get_new_program_id(screen);
    ish->nir = nir;
    if (so_info) {
@@ -1771,6 +1832,11 @@ iris_delete_shader_state(struct pipe_context *ctx, void *state, gl_shader_stage
       ice->state.dirty |= IRIS_DIRTY_UNCOMPILED_VS << stage;
    }
 
+   if (ish->const_data) {
+      pipe_resource_reference(&ish->const_data, NULL);
+      pipe_resource_reference(&ish->const_data_state.res, NULL);
+   }
+
    ralloc_free(ish->nir);
    free(ish);
 }
index 611718e90df1ed271aaf8f8c7f866e6b615b252e..b3c4466805e0f3ebd8deddc2f21a7f17b96ca35e 100644 (file)
@@ -647,6 +647,7 @@ iris_screen_create(int fd, const struct pipe_screen_config *config)
    screen->compiler->shader_debug_log = iris_shader_debug_log;
    screen->compiler->shader_perf_log = iris_shader_perf_log;
    screen->compiler->supports_pull_constants = false;
+   screen->compiler->supports_shader_constants = true;
 
    iris_disk_cache_init(screen);
 
index 0f6290ae8dcabc65bf6407e434b3b1d270b628ac..fb72c4b5b7dfe34d03dcdce873d44e5aa78ad7e6 100644 (file)
@@ -4120,6 +4120,7 @@ iris_populate_binding_table(struct iris_context *ice,
                             bool pin_only)
 {
    const struct iris_binder *binder = &ice->state.binder;
+   struct iris_uncompiled_shader *ish = ice->shaders.uncompiled[stage];
    struct iris_compiled_shader *shader = ice->shaders.prog[stage];
    if (!shader)
       return;
@@ -4194,6 +4195,14 @@ iris_populate_binding_table(struct iris_context *ice,
       push_bt_entry(addr);
    }
 
+   if (ish->const_data) {
+      iris_use_pinned_bo(batch, iris_resource_bo(ish->const_data), false);
+      iris_use_pinned_bo(batch, iris_resource_bo(ish->const_data_state.res),
+                         false);
+      uint32_t addr = ish->const_data_state.offset;
+      push_bt_entry(addr);
+   }
+
    bt_assert(ssbo_start, info->num_abos + info->num_ssbos > 0);
 
    /* XXX: st is wasting 16 binding table slots for ABOs.  Should add a cap