i965/nir: Sort uniforms direct-first and use two different uniform registers
[mesa.git] / src / mesa / drivers / dri / i965 / brw_vec4_vp.cpp
index c9dc797d5d68d5ce86328b9e963b9e2f4054c6a7..c3b0233eba223e8c65414abb2c8588aeb95d4bec 100644 (file)
@@ -29,6 +29,7 @@
 
 #include "brw_context.h"
 #include "brw_vec4.h"
+#include "brw_vs.h"
 extern "C" {
 #include "program/prog_parameter.h"
 #include "program/prog_print.h"
@@ -36,36 +37,20 @@ extern "C" {
 using namespace brw;
 
 void
-vec4_visitor::emit_vp_sop(uint32_t conditional_mod,
+vec4_visitor::emit_vp_sop(enum brw_conditional_mod conditional_mod,
                           dst_reg dst, src_reg src0, src_reg src1,
                           src_reg one)
 {
    vec4_instruction *inst;
 
-   inst = emit(BRW_OPCODE_CMP, dst_null_d(), src0, src1);
-   inst->conditional_mod = conditional_mod;
+   inst = emit(CMP(dst_null_f(), src0, src1, conditional_mod));
 
    inst = emit(BRW_OPCODE_SEL, dst, one, src_reg(0.0f));
    inst->predicate = BRW_PREDICATE_NORMAL;
 }
 
-/**
- * Reswizzle a given source register.
- * \sa brw_swizzle().
- */
-static inline src_reg
-reswizzle(src_reg orig, unsigned x, unsigned y, unsigned z, unsigned w)
-{
-   src_reg t = orig;
-   t.swizzle = BRW_SWIZZLE4(BRW_GET_SWZ(orig.swizzle, x),
-                            BRW_GET_SWZ(orig.swizzle, y),
-                            BRW_GET_SWZ(orig.swizzle, z),
-                            BRW_GET_SWZ(orig.swizzle, w));
-   return t;
-}
-
 void
-vec4_visitor::emit_vertex_program_code()
+vec4_vs_visitor::emit_program_code()
 {
    this->need_all_constants_in_pull_buffer = false;
 
@@ -84,8 +69,8 @@ vec4_visitor::emit_vertex_program_code()
    src_reg one = src_reg(this, glsl_type::float_type);
    emit(MOV(dst_reg(one), src_reg(1.0f)));
 
-   for (unsigned int insn = 0; insn < vp->Base.NumInstructions; insn++) {
-      const struct prog_instruction *vpi = &vp->Base.Instructions[insn];
+   for (unsigned int insn = 0; insn < prog->NumInstructions; insn++) {
+      const struct prog_instruction *vpi = &prog->Instructions[insn];
       base_ir = vpi;
 
       dst_reg dst;
@@ -111,7 +96,7 @@ vec4_visitor::emit_vertex_program_code()
          break;
 
       case OPCODE_ARL:
-         if (intel->gen >= 6) {
+         if (brw->gen >= 6) {
             dst.writemask = WRITEMASK_X;
             dst_reg dst_f = dst;
             dst_f.type = BRW_REGISTER_TYPE_F;
@@ -160,7 +145,7 @@ vec4_visitor::emit_vertex_program_code()
             /* tmp_d = floor(src[0].x) */
             src_reg tmp_d = src_reg(this, glsl_type::ivec4_type);
             assert(tmp_d.type == BRW_REGISTER_TYPE_D);
-            emit(RNDD(dst_reg(tmp_d), reswizzle(src[0], 0, 0, 0, 0)));
+            emit(RNDD(dst_reg(tmp_d), swizzle(src[0], BRW_SWIZZLE_XXXX)));
 
             /* result[0] = 2.0 ^ tmp */
             /* Adjust exponent for floating point: exp += 127 */
@@ -227,7 +212,7 @@ vec4_visitor::emit_vertex_program_code()
             result.writemask = WRITEMASK_YZ;
             emit(MOV(result, src_reg(0.0f)));
 
-            src_reg tmp_x = reswizzle(src[0], 0, 0, 0, 0);
+            src_reg tmp_x = swizzle(src[0], BRW_SWIZZLE_XXXX);
 
             emit(CMP(dst_null_d(), tmp_x, src_reg(0.0f), BRW_CONDITIONAL_G));
             emit(IF(BRW_PREDICATE_NORMAL));
@@ -239,14 +224,14 @@ vec4_visitor::emit_vertex_program_code()
 
             if (vpi->DstReg.WriteMask & WRITEMASK_Z) {
                /* if (tmp.y < 0) tmp.y = 0; */
-               src_reg tmp_y = reswizzle(src[0], 1, 1, 1, 1);
+               src_reg tmp_y = swizzle(src[0], BRW_SWIZZLE_YYYY);
                result.writemask = WRITEMASK_Z;
-               emit_minmax(BRW_CONDITIONAL_G, result, tmp_y, src_reg(0.0f));
+               emit_minmax(BRW_CONDITIONAL_GE, result, tmp_y, src_reg(0.0f));
 
                src_reg clamped_y(result);
                clamped_y.swizzle = BRW_SWIZZLE_ZZZZ;
 
-               src_reg tmp_w = reswizzle(src[0], 3, 3, 3, 3);
+               src_reg tmp_w = swizzle(src[0], BRW_SWIZZLE_WWWW);
 
                emit_math(SHADER_OPCODE_POW, result, clamped_y, tmp_w);
             }
@@ -260,7 +245,7 @@ vec4_visitor::emit_vertex_program_code()
          result.type = BRW_REGISTER_TYPE_UD;
          src_reg result_src = src_reg(result);
 
-         src_reg arg0_ud = reswizzle(src[0], 0, 0, 0, 0);
+         src_reg arg0_ud = swizzle(src[0], BRW_SWIZZLE_XXXX);
          arg0_ud.type = BRW_REGISTER_TYPE_UD;
 
          /* Perform mant = frexpf(fabsf(x), &exp), adjust exp and mnt
@@ -328,7 +313,7 @@ vec4_visitor::emit_vertex_program_code()
       }
 
       case OPCODE_MAX:
-         emit_minmax(BRW_CONDITIONAL_G, dst, src[0], src[1]);
+         emit_minmax(BRW_CONDITIONAL_GE, dst, src[0], src[1]);
          break;
 
       case OPCODE_MIN:
@@ -382,11 +367,11 @@ vec4_visitor::emit_vertex_program_code()
          src_reg t2 = src_reg(this, glsl_type::vec4_type);
 
          emit(MUL(dst_reg(t1),
-                  reswizzle(src[0], 1, 2, 0, 3),
-                  reswizzle(src[1], 2, 0, 1, 3)));
+                  swizzle(src[0], BRW_SWIZZLE_YZXW),
+                  swizzle(src[1], BRW_SWIZZLE_ZXYW)));
          emit(MUL(dst_reg(t2),
-                  reswizzle(src[0], 2, 0, 1, 3),
-                  reswizzle(src[1], 1, 2, 0, 3)));
+                  swizzle(src[0], BRW_SWIZZLE_ZXYW),
+                  swizzle(src[1], BRW_SWIZZLE_YZXW)));
          t2.negate = true;
          emit(ADD(dst, t1, t2));
          break;
@@ -401,7 +386,7 @@ vec4_visitor::emit_vertex_program_code()
       }
 
       /* Copy the temporary back into the actual destination register. */
-      if (vpi->Opcode != OPCODE_END) {
+      if (_mesa_num_inst_dst_regs(vpi->Opcode) != 0) {
          emit(MOV(get_vp_dst_reg(vpi->DstReg), src_reg(dst)));
       }
    }
@@ -410,26 +395,29 @@ vec4_visitor::emit_vertex_program_code()
     * pull constants.  Do that now.
     */
    if (this->need_all_constants_in_pull_buffer) {
-      const struct gl_program_parameter_list *params = c->vp->program.Base.Parameters;
+      const struct gl_program_parameter_list *params =
+         vs_compile->vp->program.Base.Parameters;
       unsigned i;
       for (i = 0; i < params->NumParameters * 4; i++) {
-         c->prog_data.pull_param[i] = &params->ParameterValues[i / 4][i % 4].f;
+         stage_prog_data->pull_param[i] =
+            &params->ParameterValues[i / 4][i % 4];
       }
-      c->prog_data.nr_pull_params = i;
+      stage_prog_data->nr_pull_params = i;
    }
 }
 
 void
-vec4_visitor::setup_vp_regs()
+vec4_vs_visitor::setup_vp_regs()
 {
    /* PROGRAM_TEMPORARY */
-   int num_temp = vp->Base.NumTemporaries;
+   int num_temp = prog->NumTemporaries;
    vp_temp_regs = rzalloc_array(mem_ctx, src_reg, num_temp);
    for (int i = 0; i < num_temp; i++)
       vp_temp_regs[i] = src_reg(this, glsl_type::vec4_type);
 
    /* PROGRAM_STATE_VAR etc. */
-   struct gl_program_parameter_list *plist = c->vp->program.Base.Parameters;
+   struct gl_program_parameter_list *plist =
+      vs_compile->vp->program.Base.Parameters;
    for (unsigned p = 0; p < plist->NumParameters; p++) {
       unsigned components = plist->Parameters[p].Size;
 
@@ -442,20 +430,20 @@ vec4_visitor::setup_vp_regs()
       this->uniform_size[this->uniforms] = 1; /* 1 vec4 */
       this->uniform_vector_size[this->uniforms] = components;
       for (unsigned i = 0; i < 4; i++) {
-         c->prog_data.param[this->uniforms * 4 + i] = i >= components ? 0 :
-            &plist->ParameterValues[p][i].f;
+         stage_prog_data->param[this->uniforms * 4 + i] = i >= components
+            ? 0 : &plist->ParameterValues[p][i];
       }
       this->uniforms++; /* counted in vec4 units */
    }
 
    /* PROGRAM_OUTPUT */
-   for (int slot = 0; slot < c->prog_data.vue_map.num_slots; slot++) {
-      int vert_result = c->prog_data.vue_map.slot_to_vert_result[slot];
-      if (vert_result == VARYING_SLOT_PSIZ)
-         output_reg[vert_result] = dst_reg(this, glsl_type::float_type);
+   for (int slot = 0; slot < prog_data->vue_map.num_slots; slot++) {
+      int varying = prog_data->vue_map.slot_to_varying[slot];
+      if (varying == VARYING_SLOT_PSIZ)
+         output_reg[varying] = dst_reg(this, glsl_type::float_type);
       else
-         output_reg[vert_result] = dst_reg(this, glsl_type::vec4_type);
-      assert(output_reg[vert_result].type == BRW_REGISTER_TYPE_F);
+         output_reg[varying] = dst_reg(this, glsl_type::vec4_type);
+      assert(output_reg[varying].type == BRW_REGISTER_TYPE_F);
    }
 
    /* PROGRAM_ADDRESS */
@@ -464,7 +452,7 @@ vec4_visitor::setup_vp_regs()
 }
 
 dst_reg
-vec4_visitor::get_vp_dst_reg(const prog_dst_register &dst)
+vec4_vs_visitor::get_vp_dst_reg(const prog_dst_register &dst)
 {
    dst_reg result;
 
@@ -489,8 +477,7 @@ vec4_visitor::get_vp_dst_reg(const prog_dst_register &dst)
       return dst_null_f();
 
    default:
-      assert("vec4_vp: bad destination register file");
-      return dst_reg(this, glsl_type::vec4_type);
+      unreachable("vec4_vp: bad destination register file");
    }
 
    result.writemask = dst.WriteMask;
@@ -498,9 +485,10 @@ vec4_visitor::get_vp_dst_reg(const prog_dst_register &dst)
 }
 
 src_reg
-vec4_visitor::get_vp_src_reg(const prog_src_register &src)
+vec4_vs_visitor::get_vp_src_reg(const prog_src_register &src)
 {
-   struct gl_program_parameter_list *plist = c->vp->program.Base.Parameters;
+   struct gl_program_parameter_list *plist =
+      vs_compile->vp->program.Base.Parameters;
 
    src_reg result;
 
@@ -539,11 +527,20 @@ vec4_visitor::get_vp_src_reg(const prog_src_register &src)
 
          /* Add the small constant index to the address register */
          src_reg reladdr = src_reg(this, glsl_type::int_type);
+
+         /* We have to use a message header on Skylake to get SIMD4x2 mode.
+          * Reserve space for the register.
+          */
+         if (brw->gen >= 9) {
+            reladdr.reg_offset++;
+            alloc.sizes[reladdr.reg] = 2;
+         }
+
          dst_reg dst_reladdr = dst_reg(reladdr);
          dst_reladdr.writemask = WRITEMASK_X;
          emit(ADD(dst_reladdr, this->vp_addr_reg, src_reg(src.Index)));
 
-         if (intel->gen < 6)
+         if (brw->gen < 6)
             emit(MUL(dst_reladdr, reladdr, src_reg(16)));
 
       #if 0
@@ -555,12 +552,20 @@ vec4_visitor::get_vp_src_reg(const prog_src_register &src)
       #endif
 
          result = src_reg(this, glsl_type::vec4_type);
-         src_reg surf_index = src_reg(unsigned(SURF_INDEX_VERT_CONST_BUFFER));
-         vec4_instruction *load =
-            new(mem_ctx) vec4_instruction(this, VS_OPCODE_PULL_CONSTANT_LOAD,
-                                          dst_reg(result), surf_index, reladdr);
-         load->base_mrf = 14;
-         load->mlen = 1;
+         src_reg surf_index = src_reg(unsigned(prog_data->base.binding_table.pull_constants_start));
+         vec4_instruction *load;
+         if (brw->gen >= 7) {
+            load = new(mem_ctx)
+               vec4_instruction(VS_OPCODE_PULL_CONSTANT_LOAD_GEN7,
+                                dst_reg(result), surf_index, reladdr);
+            load->mlen = 1;
+         } else {
+            load = new(mem_ctx)
+               vec4_instruction(VS_OPCODE_PULL_CONSTANT_LOAD,
+                                dst_reg(result), surf_index, reladdr);
+            load->base_mrf = 14;
+            load->mlen = 1;
+         }
          emit(load);
          break;
       }