i965/eu: Take into account the target cache argument in brw_set_dp_read_message.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_vec4_tes.cpp
index ce5fefc75a920834b4065ed9bef62cdd0665334d..226dcb4f6facfc0c3768e547c63fa763cd89176f 100644 (file)
@@ -28,6 +28,7 @@
  */
 
 #include "brw_vec4_tes.h"
+#include "brw_cfg.h"
 
 namespace brw {
 
@@ -45,7 +46,7 @@ vec4_tes_visitor::vec4_tes_visitor(const struct brw_compiler *compiler,
 
 
 dst_reg *
-vec4_tes_visitor::make_reg_for_system_value(int location, const glsl_type *type)
+vec4_tes_visitor::make_reg_for_system_value(int location)
 {
    return NULL;
 }
@@ -53,39 +54,10 @@ vec4_tes_visitor::make_reg_for_system_value(int location, const glsl_type *type)
 void
 vec4_tes_visitor::nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr)
 {
-   const struct brw_tes_prog_data *tes_prog_data =
-      (const struct brw_tes_prog_data *) prog_data;
-
    switch (instr->intrinsic) {
-   case nir_intrinsic_load_tess_level_outer: {
-      dst_reg dst(this, glsl_type::vec4_type);
-      nir_system_values[SYSTEM_VALUE_TESS_LEVEL_OUTER] = dst;
-
-      dst_reg temp(this, glsl_type::vec4_type);
-      vec4_instruction *read =
-         emit(VEC4_OPCODE_URB_READ, temp, input_read_header);
-      read->offset = 1;
-      read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;
-      emit(MOV(dst, swizzle(src_reg(temp), BRW_SWIZZLE_WZYX)));
-      break;
-   }
-   case nir_intrinsic_load_tess_level_inner: {
-      dst_reg dst(this, glsl_type::vec2_type);
-      nir_system_values[SYSTEM_VALUE_TESS_LEVEL_INNER] = dst;
-
-      /* Set up the message header to reference the proper parts of the URB */
-      dst_reg temp(this, glsl_type::vec4_type);
-      vec4_instruction *read =
-         emit(VEC4_OPCODE_URB_READ, temp, input_read_header);
-      read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;
-      if (tes_prog_data->domain == BRW_TESS_DOMAIN_QUAD) {
-         emit(MOV(dst, swizzle(src_reg(temp), BRW_SWIZZLE_WZYX)));
-      } else {
-         read->offset = 1;
-         emit(MOV(dst, src_reg(temp)));
-      }
+   case nir_intrinsic_load_tess_level_outer:
+   case nir_intrinsic_load_tess_level_inner:
       break;
-   }
    default:
       vec4_visitor::nir_setup_system_value_intrinsic(instr);
    }
@@ -105,6 +77,25 @@ vec4_tes_visitor::setup_payload()
 
    reg = setup_uniforms(reg);
 
+   foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
+      for (int i = 0; i < 3; i++) {
+         if (inst->src[i].file != ATTR)
+            continue;
+
+         struct brw_reg grf =
+            brw_vec4_grf(reg + inst->src[i].nr / 2, 4 * (inst->src[i].nr % 2));
+         grf = stride(grf, 0, 4, 1);
+         grf.swizzle = inst->src[i].swizzle;
+         grf.type = inst->src[i].type;
+         grf.abs = inst->src[i].abs;
+         grf.negate = inst->src[i].negate;
+
+         inst->src[i] = grf;
+      }
+   }
+
+   reg += 8 * prog_data->urb_read_length;
+
    this->first_non_payload_grf = reg;
 }
 
@@ -148,12 +139,36 @@ vec4_tes_visitor::emit_urb_write_opcode(bool complete)
 void
 vec4_tes_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
 {
+   const struct brw_tes_prog_data *tes_prog_data =
+      (const struct brw_tes_prog_data *) prog_data;
+
    switch (instr->intrinsic) {
    case nir_intrinsic_load_tess_coord:
       /* gl_TessCoord is part of the payload in g1 channels 0-2 and 4-6. */
       emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
                src_reg(brw_vec8_grf(1, 0))));
       break;
+   case nir_intrinsic_load_tess_level_outer:
+      if (tes_prog_data->domain == BRW_TESS_DOMAIN_ISOLINE) {
+         emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
+                  swizzle(src_reg(ATTR, 1, glsl_type::vec4_type),
+                          BRW_SWIZZLE_ZWZW)));
+      } else {
+         emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
+                  swizzle(src_reg(ATTR, 1, glsl_type::vec4_type),
+                          BRW_SWIZZLE_WZYX)));
+      }
+      break;
+   case nir_intrinsic_load_tess_level_inner:
+      if (tes_prog_data->domain == BRW_TESS_DOMAIN_QUAD) {
+         emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
+                  swizzle(src_reg(ATTR, 0, glsl_type::vec4_type),
+                          BRW_SWIZZLE_WZYX)));
+      } else {
+         emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
+                  src_reg(ATTR, 1, glsl_type::float_type)));
+      }
+      break;
    case nir_intrinsic_load_primitive_id:
       emit(TES_OPCODE_GET_PRIMITIVE_ID,
            get_nir_dest(instr->dest, BRW_REGISTER_TYPE_UD));
@@ -162,13 +177,30 @@ vec4_tes_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
    case nir_intrinsic_load_input:
    case nir_intrinsic_load_per_vertex_input: {
       src_reg indirect_offset = get_indirect_offset(instr);
+      dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D);
       unsigned imm_offset = instr->const_index[0];
+      unsigned first_component = nir_intrinsic_component(instr);
       src_reg header = input_read_header;
 
       if (indirect_offset.file != BAD_FILE) {
          header = src_reg(this, glsl_type::uvec4_type);
          emit(TES_OPCODE_ADD_INDIRECT_URB_OFFSET, dst_reg(header),
               input_read_header, indirect_offset);
+      } else {
+         /* Arbitrarily only push up to 24 vec4 slots worth of data,
+          * which is 12 registers (since each holds 2 vec4 slots).
+          */
+         const unsigned max_push_slots = 24;
+         if (imm_offset < max_push_slots) {
+            src_reg src = src_reg(ATTR, imm_offset, glsl_type::ivec4_type);
+            src.swizzle = BRW_SWZ_COMP_INPUT(first_component);
+
+            emit(MOV(dst, src));
+            prog_data->urb_read_length =
+               MAX2(prog_data->urb_read_length,
+                    DIV_ROUND_UP(imm_offset + 1, 2));
+            break;
+         }
       }
 
       dst_reg temp(this, glsl_type::ivec4_type);
@@ -177,12 +209,14 @@ vec4_tes_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
       read->offset = imm_offset;
       read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;
 
+      src_reg src = src_reg(temp);
+      src.swizzle = BRW_SWZ_COMP_INPUT(first_component);
+
       /* Copy to target.  We might end up with some funky writemasks landing
        * in here, but we really don't want them in the above pseudo-ops.
        */
-      dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D);
       dst.writemask = brw_writemask_for_size(instr->num_components);
-      emit(MOV(dst, src_reg(temp)));
+      emit(MOV(dst, src));
       break;
    }
    default: