Merge remote-tracking branch 'jekstrand/wip/i965-uniforms' into vulkan

[mesa.git] / src / mesa / drivers / dri / i965 / brw_fs_generator.cpp
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp

index 68c6443306e5eaa994b249ce12290ae546e0736e..b1134cff3c86f0a788dfd8006c19d10190e42fc3 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -111,14 +111,14 @@ fs_generator::fs_generator(const struct brw_compiler *compiler, void *log_data,
                             struct brw_stage_prog_data *prog_data,
                             unsigned promoted_constants,
                             bool runtime_check_aads_emit,
-                           const char *stage_abbrev)
+                           gl_shader_stage stage)
  
     : compiler(compiler), log_data(log_data),
       devinfo(compiler->devinfo), key(key),
       prog_data(prog_data),
       promoted_constants(promoted_constants),
       runtime_check_aads_emit(runtime_check_aads_emit), debug_flag(false),
-     stage_abbrev(stage_abbrev), mem_ctx(mem_ctx)
+     stage(stage), mem_ctx(mem_ctx)
  {
     p = rzalloc(mem_ctx, struct brw_codegen);
     brw_init_codegen(devinfo, p, mem_ctx);
@@ -351,23 +351,47 @@ fs_generator::generate_mov_indirect(fs_inst *inst,
  
     unsigned imm_byte_offset = reg.nr * REG_SIZE + reg.subnr;
  
-   /* We use VxH indirect addressing, clobbering a0.0 through a0.7. */
-   struct brw_reg addr = vec8(brw_address_reg(0));
+   if (indirect_byte_offset.file == BRW_IMMEDIATE_VALUE) {
+      imm_byte_offset += indirect_byte_offset.ud;
  
-   /* The destination stride of an instruction (in bytes) must be greater
-    * than or equal to the size of the rest of the instruction.  Since the
-    * address register is of type UW, we can't use a D-type instruction.
-    * In order to get around this, re re-type to UW and use a stride.
-    */
-   indirect_byte_offset =
-      retype(spread(indirect_byte_offset, 2), BRW_REGISTER_TYPE_UW);
+      reg.nr = imm_byte_offset / REG_SIZE;
+      reg.subnr = imm_byte_offset % REG_SIZE;
+      brw_MOV(p, dst, reg);
+   } else {
+      /* Prior to Broadwell, there are only 8 address registers. */
+      assert(inst->exec_size == 8 || devinfo->gen >= 8);
  
-   /* Prior to Broadwell, there are only 8 address registers. */
-   assert(inst->exec_size == 8 || devinfo->gen >= 8);
+      /* We use VxH indirect addressing, clobbering a0.0 through a0.7. */
+      struct brw_reg addr = vec8(brw_address_reg(0));
  
-   brw_MOV(p, addr, indirect_byte_offset);
-   brw_inst_set_mask_control(devinfo, brw_last_inst, BRW_MASK_DISABLE);
-   brw_MOV(p, dst, retype(brw_VxH_indirect(0, imm_byte_offset), dst.type));
+      /* The destination stride of an instruction (in bytes) must be greater
+       * than or equal to the size of the rest of the instruction.  Since the
+       * address register is of type UW, we can't use a D-type instruction.
+       * In order to get around this, re re-type to UW and use a stride.
+       */
+      indirect_byte_offset =
+         retype(spread(indirect_byte_offset, 2), BRW_REGISTER_TYPE_UW);
+
+      if (devinfo->gen < 8) {
+         /* Prior to broadwell, we have a restriction that the bottom 5 bits
+          * of the base offset and the bottom 5 bits of the indirect must add
+          * to less than 32.  In other words, the hardware needs to be able to
+          * add the bottom five bits of the two to get the subnumber and add
+          * the next 7 bits of each to get the actual register number.  Since
+          * the indirect may cause us to cross a register boundary, this makes
+          * it almost useless.  We could try and do something clever where we
+          * use a actual base offset if base_offset % 32 == 0 but that would
+          * mean we were generating different code depending on the base
+          * offset.  Instead, for the sake of consistency, we'll just do the
+          * add ourselves.
+          */
+         brw_ADD(p, addr, indirect_byte_offset, brw_imm_uw(imm_byte_offset));
+         brw_MOV(p, dst, retype(brw_VxH_indirect(0, 0), dst.type));
+      } else {
+         brw_MOV(p, addr, indirect_byte_offset);
+         brw_MOV(p, dst, retype(brw_VxH_indirect(0, imm_byte_offset), dst.type));
+      }
+   }
  }
  
  void
@@ -914,6 +938,14 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src
              /* Set the offset bits in DWord 2. */
              brw_MOV(p, get_element_ud(header_reg, 2),
                         brw_imm_ud(inst->offset));
+         } else if (stage != MESA_SHADER_VERTEX &&
+                    stage != MESA_SHADER_FRAGMENT) {
+            /* The vertex and fragment stages have g0.2 set to 0, so
+             * header0.2 is 0 when g0 is copied. Other stages may not, so we
+             * must set it to 0 to avoid setting undesirable bits in the
+             * message.
+             */
+            brw_MOV(p, get_element_ud(header_reg, 2), brw_imm_ud(0));
           }
  
           brw_adjust_sampler_state_pointer(p, header_reg, sampler_index);
@@ -2313,8 +2345,9 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
     compiler->shader_debug_log(log_data,
                                "%s SIMD%d shader: %d inst, %d loops, %u cycles, "
                                "%d:%d spills:fills, Promoted %u constants, "
-                              "compacted %d to %d bytes.\n",
-                              stage_abbrev, dispatch_width, before_size / 16,
+                              "compacted %d to %d bytes.",
+                              _mesa_shader_stage_to_abbrev(stage),
+                              dispatch_width, before_size / 16,
                                loop_count, cfg->cycle_count, spill_count,
                                fill_count, promoted_constants, before_size,
                                after_size);