i965/fs: Split pull parameter decision making from mechanical demoting.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_fs.cpp
index 65f2c808626c22c30a7357d15caf024ad6b6c7bc..3c8237a9d544eef8d223db2dde31fb57fa493f14 100644 (file)
@@ -55,7 +55,6 @@ void
 fs_inst::init()
 {
    memset(this, 0, sizeof(*this));
-   this->opcode = BRW_OPCODE_NOP;
    this->conditional_mod = BRW_CONDITIONAL_NONE;
 
    this->dst = reg_undef;
@@ -70,6 +69,7 @@ fs_inst::init()
 fs_inst::fs_inst()
 {
    init();
+   this->opcode = BRW_OPCODE_NOP;
 }
 
 fs_inst::fs_inst(enum opcode opcode)
@@ -244,8 +244,9 @@ fs_visitor::CMP(fs_reg dst, fs_reg src0, fs_reg src1, uint32_t condition)
 }
 
 exec_list
-fs_visitor::VARYING_PULL_CONSTANT_LOAD(fs_reg dst, fs_reg surf_index,
-                                       fs_reg varying_offset,
+fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_reg &dst,
+                                       const fs_reg &surf_index,
+                                       const fs_reg &varying_offset,
                                        uint32_t const_offset)
 {
    exec_list instructions;
@@ -641,8 +642,8 @@ fs_visitor::emit_shader_time_write(enum shader_time_shader_type type,
    else
       payload = fs_reg(this, glsl_type::uint_type);
 
-   emit(fs_inst(SHADER_OPCODE_SHADER_TIME_ADD,
-                fs_reg(), payload, offset, value));
+   emit(new(mem_ctx) fs_inst(SHADER_OPCODE_SHADER_TIME_ADD,
+                             fs_reg(), payload, offset, value));
 }
 
 void
@@ -671,32 +672,32 @@ fs_visitor::fail(const char *format, ...)
 fs_inst *
 fs_visitor::emit(enum opcode opcode)
 {
-   return emit(fs_inst(opcode));
+   return emit(new(mem_ctx) fs_inst(opcode));
 }
 
 fs_inst *
 fs_visitor::emit(enum opcode opcode, fs_reg dst)
 {
-   return emit(fs_inst(opcode, dst));
+   return emit(new(mem_ctx) fs_inst(opcode, dst));
 }
 
 fs_inst *
 fs_visitor::emit(enum opcode opcode, fs_reg dst, fs_reg src0)
 {
-   return emit(fs_inst(opcode, dst, src0));
+   return emit(new(mem_ctx) fs_inst(opcode, dst, src0));
 }
 
 fs_inst *
 fs_visitor::emit(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1)
 {
-   return emit(fs_inst(opcode, dst, src0, src1));
+   return emit(new(mem_ctx) fs_inst(opcode, dst, src0, src1));
 }
 
 fs_inst *
 fs_visitor::emit(enum opcode opcode, fs_reg dst,
                  fs_reg src0, fs_reg src1, fs_reg src2)
 {
-   return emit(fs_inst(opcode, dst, src0, src1, src2));
+   return emit(new(mem_ctx) fs_inst(opcode, dst, src0, src1, src2));
 }
 
 void
@@ -1826,7 +1827,7 @@ fs_visitor::remove_dead_constants()
 void
 fs_visitor::move_uniform_array_access_to_pull_constants()
 {
-   int pull_constant_loc[uniforms];
+   pull_constant_loc = ralloc_array(mem_ctx, int, uniforms);
 
    for (unsigned int i = 0; i < uniforms; i++) {
       pull_constant_loc[i] = -1;
@@ -1853,35 +1854,21 @@ fs_visitor::move_uniform_array_access_to_pull_constants()
          if (pull_constant_loc[uniform] == -1) {
             const float **values = &stage_prog_data->param[uniform];
 
-            pull_constant_loc[uniform] = stage_prog_data->nr_pull_params;
-
             assert(param_size[uniform]);
 
             for (int j = 0; j < param_size[uniform]; j++) {
+               pull_constant_loc[uniform + j] = stage_prog_data->nr_pull_params;
+
                stage_prog_data->pull_param[stage_prog_data->nr_pull_params++] =
                   values[j];
             }
          }
-
-         /* Set up the annotation tracking for new generated instructions. */
-         base_ir = inst->ir;
-         current_annotation = inst->annotation;
-
-         fs_reg surf_index(stage_prog_data->binding_table.pull_constants_start);
-         fs_reg temp = fs_reg(this, glsl_type::float_type);
-         exec_list list = VARYING_PULL_CONSTANT_LOAD(temp,
-                                                     surf_index,
-                                                     *inst->src[i].reladdr,
-                                                     pull_constant_loc[uniform] +
-                                                     inst->src[i].reg_offset);
-         inst->insert_before(&list);
-
-         inst->src[i].file = temp.file;
-         inst->src[i].reg = temp.reg;
-         inst->src[i].reg_offset = temp.reg_offset;
-         inst->src[i].reladdr = NULL;
       }
    }
+   demote_pull_constants(true);
+
+   ralloc_free(pull_constant_loc);
+   pull_constant_loc = NULL;
 }
 
 /**
@@ -1902,17 +1889,12 @@ fs_visitor::setup_pull_constants()
    if (uniforms <= max_uniform_components)
       return;
 
-   if (dispatch_width == 16) {
-      fail("Pull constants not supported in SIMD16\n");
-      return;
-   }
-
    /* Just demote the end of the list.  We could probably do better
     * here, demoting things that are rarely used in the program first.
     */
    unsigned int pull_uniform_base = max_uniform_components;
 
-   int pull_constant_loc[uniforms];
+   pull_constant_loc = ralloc_array(mem_ctx, int, uniforms);
    for (unsigned int i = 0; i < uniforms; i++) {
       if (i < pull_uniform_base) {
          pull_constant_loc[i] = -1;
@@ -1936,6 +1918,16 @@ fs_visitor::setup_pull_constants()
    }
    uniforms = pull_uniform_base;
 
+   demote_pull_constants(false);
+}
+
+/**
+ * Replace UNIFORM register file access with either UNIFORM_PULL_CONSTANT_LOAD
+ * or VARYING_PULL_CONSTANT_LOAD instructions which load values into VGRFs.
+ */
+void
+fs_visitor::demote_pull_constants(bool reladdr_only)
+{
    foreach_list(node, &this->instructions) {
       fs_inst *inst = (fs_inst *)node;
 
@@ -1948,25 +1940,40 @@ fs_visitor::setup_pull_constants()
          if (pull_index == -1)
            continue;
 
-         assert(!inst->src[i].reladdr);
+         /* Set up the annotation tracking for new generated instructions. */
+         base_ir = inst->ir;
+         current_annotation = inst->annotation;
 
-        fs_reg dst = fs_reg(this, glsl_type::float_type);
-        fs_reg index(stage_prog_data->binding_table.pull_constants_start);
-        fs_reg offset = fs_reg((unsigned)(pull_index * 4) & ~15);
-        fs_inst *pull =
-            new(mem_ctx) fs_inst(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
-                                 dst, index, offset);
-        pull->ir = inst->ir;
-        pull->annotation = inst->annotation;
+         fs_reg surf_index(stage_prog_data->binding_table.pull_constants_start);
+         fs_reg dst = fs_reg(this, glsl_type::float_type);
 
-        inst->insert_before(pull);
+         if (reladdr_only != (inst->src[i].reladdr != NULL))
+            continue;
 
-        inst->src[i].file = GRF;
-        inst->src[i].reg = dst.reg;
-        inst->src[i].reg_offset = 0;
-        inst->src[i].set_smear(pull_index & 3);
+         /* Generate a pull load into dst. */
+         if (inst->src[i].reladdr) {
+            exec_list list = VARYING_PULL_CONSTANT_LOAD(dst,
+                                                        surf_index,
+                                                        *inst->src[i].reladdr,
+                                                        pull_index);
+            inst->insert_before(&list);
+            inst->src[i].reladdr = NULL;
+         } else {
+            fs_reg offset = fs_reg((unsigned)(pull_index * 4) & ~15);
+            fs_inst *pull =
+               new(mem_ctx) fs_inst(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
+                                    dst, surf_index, offset);
+            inst->insert_before(pull);
+            inst->src[i].set_smear(pull_index & 3);
+         }
+
+         /* Rewrite the instruction to use the temporary VGRF. */
+         inst->src[i].file = GRF;
+         inst->src[i].reg = dst.reg;
+         inst->src[i].reg_offset = 0;
       }
    }
+   invalidate_live_intervals();
 }
 
 bool
@@ -2986,11 +2993,11 @@ fs_visitor::dump_instructions()
    foreach_list(node, &this->instructions) {
       backend_instruction *inst = (backend_instruction *)node;
       max_pressure = MAX2(max_pressure, regs_live_at_ip[ip]);
-      printf("{%3d} %4d: ", regs_live_at_ip[ip], ip);
+      fprintf(stderr, "{%3d} %4d: ", regs_live_at_ip[ip], ip);
       dump_instruction(inst);
       ++ip;
    }
-   printf("Maximum %3d registers live at once.\n", max_pressure);
+   fprintf(stderr, "Maximum %3d registers live at once.\n", max_pressure);
 }
 
 void
@@ -2999,174 +3006,178 @@ fs_visitor::dump_instruction(backend_instruction *be_inst)
    fs_inst *inst = (fs_inst *)be_inst;
 
    if (inst->predicate) {
-      printf("(%cf0.%d) ",
+      fprintf(stderr, "(%cf0.%d) ",
              inst->predicate_inverse ? '-' : '+',
              inst->flag_subreg);
    }
 
-   printf("%s", brw_instruction_name(inst->opcode));
+   fprintf(stderr, "%s", brw_instruction_name(inst->opcode));
    if (inst->saturate)
-      printf(".sat");
+      fprintf(stderr, ".sat");
    if (inst->conditional_mod) {
-      printf("%s", conditional_modifier[inst->conditional_mod]);
+      fprintf(stderr, "%s", conditional_modifier[inst->conditional_mod]);
       if (!inst->predicate &&
           (brw->gen < 5 || (inst->opcode != BRW_OPCODE_SEL &&
                               inst->opcode != BRW_OPCODE_IF &&
                               inst->opcode != BRW_OPCODE_WHILE))) {
-         printf(".f0.%d", inst->flag_subreg);
+         fprintf(stderr, ".f0.%d", inst->flag_subreg);
       }
    }
-   printf(" ");
+   fprintf(stderr, " ");
 
 
    switch (inst->dst.file) {
    case GRF:
-      printf("vgrf%d", inst->dst.reg);
+      fprintf(stderr, "vgrf%d", inst->dst.reg);
       if (virtual_grf_sizes[inst->dst.reg] != 1 ||
           inst->dst.subreg_offset)
-         printf("+%d.%d", inst->dst.reg_offset, inst->dst.subreg_offset);
+         fprintf(stderr, "+%d.%d",
+                 inst->dst.reg_offset, inst->dst.subreg_offset);
       break;
    case MRF:
-      printf("m%d", inst->dst.reg);
+      fprintf(stderr, "m%d", inst->dst.reg);
       break;
    case BAD_FILE:
-      printf("(null)");
+      fprintf(stderr, "(null)");
       break;
    case UNIFORM:
-      printf("***u%d***", inst->dst.reg);
+      fprintf(stderr, "***u%d***", inst->dst.reg);
       break;
    case HW_REG:
       if (inst->dst.fixed_hw_reg.file == BRW_ARCHITECTURE_REGISTER_FILE) {
          switch (inst->dst.fixed_hw_reg.nr) {
          case BRW_ARF_NULL:
-            printf("null");
+            fprintf(stderr, "null");
             break;
          case BRW_ARF_ADDRESS:
-            printf("a0.%d", inst->dst.fixed_hw_reg.subnr);
+            fprintf(stderr, "a0.%d", inst->dst.fixed_hw_reg.subnr);
             break;
          case BRW_ARF_ACCUMULATOR:
-            printf("acc%d", inst->dst.fixed_hw_reg.subnr);
+            fprintf(stderr, "acc%d", inst->dst.fixed_hw_reg.subnr);
             break;
          case BRW_ARF_FLAG:
-            printf("f%d.%d", inst->dst.fixed_hw_reg.nr & 0xf,
+            fprintf(stderr, "f%d.%d", inst->dst.fixed_hw_reg.nr & 0xf,
                              inst->dst.fixed_hw_reg.subnr);
             break;
          default:
-            printf("arf%d.%d", inst->dst.fixed_hw_reg.nr & 0xf,
+            fprintf(stderr, "arf%d.%d", inst->dst.fixed_hw_reg.nr & 0xf,
                                inst->dst.fixed_hw_reg.subnr);
             break;
          }
       } else {
-         printf("hw_reg%d", inst->dst.fixed_hw_reg.nr);
+         fprintf(stderr, "hw_reg%d", inst->dst.fixed_hw_reg.nr);
       }
       if (inst->dst.fixed_hw_reg.subnr)
-         printf("+%d", inst->dst.fixed_hw_reg.subnr);
+         fprintf(stderr, "+%d", inst->dst.fixed_hw_reg.subnr);
       break;
    default:
-      printf("???");
+      fprintf(stderr, "???");
       break;
    }
-   printf(":%s, ", reg_encoding[inst->dst.type]);
+   fprintf(stderr, ":%s, ", brw_reg_type_letters(inst->dst.type));
 
    for (int i = 0; i < 3 && inst->src[i].file != BAD_FILE; i++) {
       if (inst->src[i].negate)
-         printf("-");
+         fprintf(stderr, "-");
       if (inst->src[i].abs)
-         printf("|");
+         fprintf(stderr, "|");
       switch (inst->src[i].file) {
       case GRF:
-         printf("vgrf%d", inst->src[i].reg);
+         fprintf(stderr, "vgrf%d", inst->src[i].reg);
          if (virtual_grf_sizes[inst->src[i].reg] != 1 ||
              inst->src[i].subreg_offset)
-            printf("+%d.%d", inst->src[i].reg_offset,
-                   inst->src[i].subreg_offset);
+            fprintf(stderr, "+%d.%d", inst->src[i].reg_offset,
+                    inst->src[i].subreg_offset);
          break;
       case MRF:
-         printf("***m%d***", inst->src[i].reg);
+         fprintf(stderr, "***m%d***", inst->src[i].reg);
          break;
       case UNIFORM:
-         printf("u%d", inst->src[i].reg);
-         if (virtual_grf_sizes[inst->src[i].reg] != 1 ||
-             inst->src[i].subreg_offset)
-            printf("+%d.%d", inst->src[i].reg_offset,
-                   inst->src[i].subreg_offset);
+         fprintf(stderr, "u%d", inst->src[i].reg);
+         if (inst->src[i].reladdr) {
+            fprintf(stderr, "+reladdr");
+         } else if (virtual_grf_sizes[inst->src[i].reg] != 1 ||
+             inst->src[i].subreg_offset) {
+            fprintf(stderr, "+%d.%d", inst->src[i].reg_offset,
+                    inst->src[i].subreg_offset);
+         }
          break;
       case BAD_FILE:
-         printf("(null)");
+         fprintf(stderr, "(null)");
          break;
       case IMM:
          switch (inst->src[i].type) {
          case BRW_REGISTER_TYPE_F:
-            printf("%ff", inst->src[i].imm.f);
+            fprintf(stderr, "%ff", inst->src[i].imm.f);
             break;
          case BRW_REGISTER_TYPE_D:
-            printf("%dd", inst->src[i].imm.i);
+            fprintf(stderr, "%dd", inst->src[i].imm.i);
             break;
          case BRW_REGISTER_TYPE_UD:
-            printf("%uu", inst->src[i].imm.u);
+            fprintf(stderr, "%uu", inst->src[i].imm.u);
             break;
          default:
-            printf("???");
+            fprintf(stderr, "???");
             break;
          }
          break;
       case HW_REG:
          if (inst->src[i].fixed_hw_reg.negate)
-            printf("-");
+            fprintf(stderr, "-");
          if (inst->src[i].fixed_hw_reg.abs)
-            printf("|");
+            fprintf(stderr, "|");
          if (inst->src[i].fixed_hw_reg.file == BRW_ARCHITECTURE_REGISTER_FILE) {
             switch (inst->src[i].fixed_hw_reg.nr) {
             case BRW_ARF_NULL:
-               printf("null");
+               fprintf(stderr, "null");
                break;
             case BRW_ARF_ADDRESS:
-               printf("a0.%d", inst->src[i].fixed_hw_reg.subnr);
+               fprintf(stderr, "a0.%d", inst->src[i].fixed_hw_reg.subnr);
                break;
             case BRW_ARF_ACCUMULATOR:
-               printf("acc%d", inst->src[i].fixed_hw_reg.subnr);
+               fprintf(stderr, "acc%d", inst->src[i].fixed_hw_reg.subnr);
                break;
             case BRW_ARF_FLAG:
-               printf("f%d.%d", inst->src[i].fixed_hw_reg.nr & 0xf,
+               fprintf(stderr, "f%d.%d", inst->src[i].fixed_hw_reg.nr & 0xf,
                                 inst->src[i].fixed_hw_reg.subnr);
                break;
             default:
-               printf("arf%d.%d", inst->src[i].fixed_hw_reg.nr & 0xf,
+               fprintf(stderr, "arf%d.%d", inst->src[i].fixed_hw_reg.nr & 0xf,
                                   inst->src[i].fixed_hw_reg.subnr);
                break;
             }
          } else {
-            printf("hw_reg%d", inst->src[i].fixed_hw_reg.nr);
+            fprintf(stderr, "hw_reg%d", inst->src[i].fixed_hw_reg.nr);
          }
          if (inst->src[i].fixed_hw_reg.subnr)
-            printf("+%d", inst->src[i].fixed_hw_reg.subnr);
+            fprintf(stderr, "+%d", inst->src[i].fixed_hw_reg.subnr);
          if (inst->src[i].fixed_hw_reg.abs)
-            printf("|");
+            fprintf(stderr, "|");
          break;
       default:
-         printf("???");
+         fprintf(stderr, "???");
          break;
       }
       if (inst->src[i].abs)
-         printf("|");
+         fprintf(stderr, "|");
 
       if (inst->src[i].file != IMM) {
-         printf(":%s", brw_reg_type_letters(inst->src[i].type));
+         fprintf(stderr, ":%s", brw_reg_type_letters(inst->src[i].type));
       }
 
       if (i < 2 && inst->src[i + 1].file != BAD_FILE)
-         printf(", ");
+         fprintf(stderr, ", ");
    }
 
-   printf(" ");
+   fprintf(stderr, " ");
 
    if (inst->force_uncompressed)
-      printf("1sthalf ");
+      fprintf(stderr, "1sthalf ");
 
    if (inst->force_sechalf)
-      printf("2ndhalf ");
+      fprintf(stderr, "2ndhalf ");
 
-   printf("\n");
+   fprintf(stderr, "\n");
 }
 
 /**
@@ -3185,7 +3196,7 @@ fs_visitor::dump_instruction(backend_instruction *be_inst)
 fs_inst *
 fs_visitor::get_instruction_generating_reg(fs_inst *start,
                                           fs_inst *end,
-                                          fs_reg reg)
+                                          const fs_reg &reg)
 {
    if (end == start ||
        end->is_partial_write() ||
@@ -3506,7 +3517,7 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c,
                unsigned *final_assembly_size)
 {
    bool start_busy = false;
-   float start_time = 0;
+   double start_time = 0;
 
    if (unlikely(brw->perf_debug)) {
       start_busy = (brw->batch.last_bo &&
@@ -3518,17 +3529,8 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c,
    if (prog)
       shader = (brw_shader *) prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
 
-   if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
-      if (prog) {
-         printf("GLSL IR for native fragment shader %d:\n", prog->Name);
-         _mesa_print_ir(shader->base.ir, NULL);
-         printf("\n\n");
-      } else {
-         printf("ARB_fragment_program %d ir for native fragment shader\n",
-                fp->Base.Id);
-         _mesa_print_program(&fp->Base);
-      }
-   }
+   if (unlikely(INTEL_DEBUG & DEBUG_WM))
+      brw_dump_ir(brw, "fragment", prog, &shader->base, &fp->Base);
 
    /* Now the main event: Visit the shader IR and generate our FS IR for it.
     */