src/intel/compiler/brw_vec4_tes.cpp

   1 /*
   2  * Copyright © 2013 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  21  * DEALINGS IN THE SOFTWARE.
  22  */
  23
  24 /**
  25  * \file brw_vec4_tes.cpp
  26  *
  27  * Tessellaton evaluation shader specific code derived from the vec4_visitor class.
  28  */
  29
  30 #include "brw_vec4_tes.h"
  31 #include "brw_cfg.h"
  32 #include "common/gen_debug.h"
  33
  34 namespace brw {
  35
  36 vec4_tes_visitor::vec4_tes_visitor(const struct brw_compiler *compiler,
  37                                   void *log_data,
  38                                   const struct brw_tes_prog_key *key,
  39                                   struct brw_tes_prog_data *prog_data,
  40                                   const nir_shader *shader,
  41                                   void *mem_ctx,
  42                                   int shader_time_index)
  43    : vec4_visitor(compiler, log_data, &key->tex, &prog_data->base,
  44                   shader, mem_ctx, false, shader_time_index)
  45 {
  46 }
  47
  48
  49 dst_reg *
  50 vec4_tes_visitor::make_reg_for_system_value(int location)
  51 {
  52    return NULL;
  53 }
  54
  55 void
  56 vec4_tes_visitor::nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr)
  57 {
  58    switch (instr->intrinsic) {
  59    case nir_intrinsic_load_tess_level_outer:
  60    case nir_intrinsic_load_tess_level_inner:
  61       break;
  62    default:
  63       vec4_visitor::nir_setup_system_value_intrinsic(instr);
  64    }
  65 }
  66
  67
  68 void
  69 vec4_tes_visitor::setup_payload()
  70 {
  71    int reg = 0;
  72
  73    /* The payload always contains important data in r0 and r1, which contains
  74     * the URB handles that are passed on to the URB write at the end
  75     * of the thread.
  76     */
  77    reg += 2;
  78
  79    reg = setup_uniforms(reg);
  80
  81    foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
  82       for (int i = 0; i < 3; i++) {
  83          if (inst->src[i].file != ATTR)
  84             continue;
  85
  86          bool is_64bit = type_sz(inst->src[i].type) == 8;
  87
  88          unsigned slot = inst->src[i].nr + inst->src[i].offset / 16;
  89          struct brw_reg grf = brw_vec4_grf(reg + slot / 2, 4 * (slot % 2));
  90          grf = stride(grf, 0, is_64bit ? 2 : 4, 1);
  91          grf.swizzle = inst->src[i].swizzle;
  92          grf.type = inst->src[i].type;
  93          grf.abs = inst->src[i].abs;
  94          grf.negate = inst->src[i].negate;
  95
  96          /* For 64-bit attributes we can end up with components XY in the
  97           * second half of a register and components ZW in the first half
  98           * of the next. Fix it up here.
  99           */
 100          if (is_64bit && grf.subnr > 0) {
 101             /* We can't do swizzles that mix XY and ZW channels in this case.
 102              * Such cases should have been handled by the scalarization pass.
 103              */
 104             assert((brw_mask_for_swizzle(grf.swizzle) & 0x3) ^
 105                    (brw_mask_for_swizzle(grf.swizzle) & 0xc));
 106             if (brw_mask_for_swizzle(grf.swizzle) & 0xc) {
 107                grf.subnr = 0;
 108                grf.nr++;
 109                grf.swizzle -= BRW_SWIZZLE_ZZZZ;
 110             }
 111          }
 112
 113          inst->src[i] = grf;
 114       }
 115    }
 116
 117    reg += 8 * prog_data->urb_read_length;
 118
 119    this->first_non_payload_grf = reg;
 120 }
 121
 122
 123 void
 124 vec4_tes_visitor::emit_prolog()
 125 {
 126    input_read_header = src_reg(this, glsl_type::uvec4_type);
 127    emit(TES_OPCODE_CREATE_INPUT_READ_HEADER, dst_reg(input_read_header));
 128
 129    this->current_annotation = NULL;
 130 }
 131
 132
 133 void
 134 vec4_tes_visitor::emit_urb_write_header(int mrf)
 135 {
 136    /* No need to do anything for DS; an implied write to this MRF will be
 137     * performed by VS_OPCODE_URB_WRITE.
 138     */
 139    (void) mrf;
 140 }
 141
 142
 143 vec4_instruction *
 144 vec4_tes_visitor::emit_urb_write_opcode(bool complete)
 145 {
 146    /* For DS, the URB writes end the thread. */
 147    if (complete) {
 148       if (INTEL_DEBUG & DEBUG_SHADER_TIME)
 149          emit_shader_time_end();
 150    }
 151
 152    vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE);
 153    inst->urb_write_flags = complete ?
 154       BRW_URB_WRITE_EOT_COMPLETE : BRW_URB_WRITE_NO_FLAGS;
 155
 156    return inst;
 157 }
 158
 159 void
 160 vec4_tes_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
 161 {
 162    const struct brw_tes_prog_data *tes_prog_data =
 163       (const struct brw_tes_prog_data *) prog_data;
 164
 165    switch (instr->intrinsic) {
 166    case nir_intrinsic_load_tess_coord:
 167       /* gl_TessCoord is part of the payload in g1 channels 0-2 and 4-6. */
 168       emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
 169                src_reg(brw_vec8_grf(1, 0))));
 170       break;
 171    case nir_intrinsic_load_tess_level_outer:
 172       if (tes_prog_data->domain == BRW_TESS_DOMAIN_ISOLINE) {
 173          emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
 174                   swizzle(src_reg(ATTR, 1, glsl_type::vec4_type),
 175                           BRW_SWIZZLE_ZWZW)));
 176       } else {
 177          emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
 178                   swizzle(src_reg(ATTR, 1, glsl_type::vec4_type),
 179                           BRW_SWIZZLE_WZYX)));
 180       }
 181       break;
 182    case nir_intrinsic_load_tess_level_inner:
 183       if (tes_prog_data->domain == BRW_TESS_DOMAIN_QUAD) {
 184          emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
 185                   swizzle(src_reg(ATTR, 0, glsl_type::vec4_type),
 186                           BRW_SWIZZLE_WZYX)));
 187       } else {
 188          emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
 189                   src_reg(ATTR, 1, glsl_type::float_type)));
 190       }
 191       break;
 192    case nir_intrinsic_load_primitive_id:
 193       emit(TES_OPCODE_GET_PRIMITIVE_ID,
 194            get_nir_dest(instr->dest, BRW_REGISTER_TYPE_UD));
 195       break;
 196
 197    case nir_intrinsic_load_input:
 198    case nir_intrinsic_load_per_vertex_input: {
 199       src_reg indirect_offset = get_indirect_offset(instr);
 200       unsigned imm_offset = instr->const_index[0];
 201       src_reg header = input_read_header;
 202       bool is_64bit = nir_dest_bit_size(instr->dest) == 64;
 203       unsigned first_component = nir_intrinsic_component(instr);
 204       if (is_64bit)
 205          first_component /= 2;
 206
 207       if (indirect_offset.file != BAD_FILE) {
 208          header = src_reg(this, glsl_type::uvec4_type);
 209          emit(TES_OPCODE_ADD_INDIRECT_URB_OFFSET, dst_reg(header),
 210               input_read_header, indirect_offset);
 211       } else {
 212          /* Arbitrarily only push up to 24 vec4 slots worth of data,
 213           * which is 12 registers (since each holds 2 vec4 slots).
 214           */
 215          const unsigned max_push_slots = 24;
 216          if (imm_offset < max_push_slots) {
 217             const glsl_type *src_glsl_type =
 218                is_64bit ? glsl_type::dvec4_type : glsl_type::ivec4_type;
 219             src_reg src = src_reg(ATTR, imm_offset, src_glsl_type);
 220             src.swizzle = BRW_SWZ_COMP_INPUT(first_component);
 221
 222             const brw_reg_type dst_reg_type =
 223                is_64bit ? BRW_REGISTER_TYPE_DF : BRW_REGISTER_TYPE_D;
 224             emit(MOV(get_nir_dest(instr->dest, dst_reg_type), src));
 225
 226             prog_data->urb_read_length =
 227                MAX2(prog_data->urb_read_length,
 228                     DIV_ROUND_UP(imm_offset + (is_64bit ? 2 : 1), 2));
 229             break;
 230          }
 231       }
 232
 233       if (!is_64bit) {
 234          dst_reg temp(this, glsl_type::ivec4_type);
 235          vec4_instruction *read =
 236             emit(VEC4_OPCODE_URB_READ, temp, src_reg(header));
 237          read->offset = imm_offset;
 238          read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;
 239
 240          src_reg src = src_reg(temp);
 241          src.swizzle = BRW_SWZ_COMP_INPUT(first_component);
 242
 243          /* Copy to target.  We might end up with some funky writemasks landing
 244           * in here, but we really don't want them in the above pseudo-ops.
 245           */
 246          dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D);
 247          dst.writemask = brw_writemask_for_size(instr->num_components);
 248          emit(MOV(dst, src));
 249       } else {
 250          /* For 64-bit we need to load twice as many 32-bit components, and for
 251           * dvec3/4 we need to emit 2 URB Read messages
 252           */
 253          dst_reg temp(this, glsl_type::dvec4_type);
 254          dst_reg temp_d = retype(temp, BRW_REGISTER_TYPE_D);
 255
 256          vec4_instruction *read =
 257             emit(VEC4_OPCODE_URB_READ, temp_d, src_reg(header));
 258          read->offset = imm_offset;
 259          read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;
 260
 261          if (instr->num_components > 2) {
 262             read = emit(VEC4_OPCODE_URB_READ, byte_offset(temp_d, REG_SIZE),
 263                         src_reg(header));
 264             read->offset = imm_offset + 1;
 265             read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;
 266          }
 267
 268          src_reg temp_as_src = src_reg(temp);
 269          temp_as_src.swizzle = BRW_SWZ_COMP_INPUT(first_component);
 270
 271          dst_reg shuffled(this, glsl_type::dvec4_type);
 272          shuffle_64bit_data(shuffled, temp_as_src, false);
 273
 274          dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_DF);
 275          dst.writemask = brw_writemask_for_size(instr->num_components);
 276          emit(MOV(dst, src_reg(shuffled)));
 277       }
 278       break;
 279    }
 280    default:
 281       vec4_visitor::nir_emit_intrinsic(instr);
 282    }
 283 }
 284
 285
 286 void
 287 vec4_tes_visitor::emit_thread_end()
 288 {
 289    /* For DS, we always end the thread by emitting a single vertex.
 290     * emit_urb_write_opcode() will take care of setting the eot flag on the
 291     * SEND instruction.
 292     */
 293    emit_vertex();
 294 }
 295
 296 } /* namespace brw */