src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp

   1 /*
   2  * Copyright © 2013 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  21  * DEALINGS IN THE SOFTWARE.
  22  */
  23
  24 /**
  25  * \file brw_vec4_tcs.cpp
  26  *
  27  * Tessellaton control shader specific code derived from the vec4_visitor class.
  28  */
  29
  30 #include "brw_nir.h"
  31 #include "brw_vec4_tcs.h"
  32
  33 namespace brw {
  34
  35 vec4_tcs_visitor::vec4_tcs_visitor(const struct brw_compiler *compiler,
  36                                    void *log_data,
  37                                    const struct brw_tcs_prog_key *key,
  38                                    struct brw_tcs_prog_data *prog_data,
  39                                    const nir_shader *nir,
  40                                    void *mem_ctx,
  41                                    int shader_time_index)
  42    : vec4_visitor(compiler, log_data, &key->tex, &prog_data->base,
  43                   nir, mem_ctx, false, shader_time_index),
  44      key(key)
  45 {
  46 }
  47
  48
  49 void
  50 vec4_tcs_visitor::nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr)
  51 {
  52 }
  53
  54 dst_reg *
  55 vec4_tcs_visitor::make_reg_for_system_value(int location, const glsl_type *type)
  56 {
  57    return NULL;
  58 }
  59
  60
  61 void
  62 vec4_tcs_visitor::setup_payload()
  63 {
  64    int reg = 0;
  65
  66    /* The payload always contains important data in r0, which contains
  67     * the URB handles that are passed on to the URB write at the end
  68     * of the thread.
  69     */
  70    reg++;
  71
  72    /* r1.0 - r4.7 may contain the input control point URB handles,
  73     * which we use to pull vertex data.
  74     */
  75    reg += 4;
  76
  77    /* Push constants may start at r5.0 */
  78    reg = setup_uniforms(reg);
  79
  80    this->first_non_payload_grf = reg;
  81 }
  82
  83
  84 void
  85 vec4_tcs_visitor::emit_prolog()
  86 {
  87    invocation_id = src_reg(this, glsl_type::uint_type);
  88    emit(TCS_OPCODE_GET_INSTANCE_ID, dst_reg(invocation_id));
  89
  90    /* HS threads are dispatched with the dispatch mask set to 0xFF.
  91     * If there are an odd number of output vertices, then the final
  92     * HS instance dispatched will only have its bottom half doing real
  93     * work, and so we need to disable the upper half:
  94     */
  95    if (nir->info.tcs.vertices_out % 2) {
  96       emit(CMP(dst_null_d(), invocation_id,
  97                brw_imm_ud(nir->info.tcs.vertices_out), BRW_CONDITIONAL_L));
  98
  99       /* Matching ENDIF is in emit_thread_end() */
 100       emit(IF(BRW_PREDICATE_NORMAL));
 101    }
 102 }
 103
 104
 105 void
 106 vec4_tcs_visitor::emit_thread_end()
 107 {
 108    current_annotation = "thread end";
 109
 110    if (nir->info.tcs.vertices_out % 2) {
 111       emit(BRW_OPCODE_ENDIF);
 112    }
 113
 114    if (unlikely(INTEL_DEBUG & DEBUG_SHADER_TIME))
 115       emit_shader_time_end();
 116
 117    vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE);
 118    inst->mlen = 1;   /* just the header, no data. */
 119    inst->urb_write_flags = BRW_URB_WRITE_EOT_COMPLETE;
 120 }
 121
 122
 123 void
 124 vec4_tcs_visitor::emit_input_urb_read(const dst_reg &dst,
 125                                       const src_reg &vertex_index,
 126                                       unsigned base_offset,
 127                                       const src_reg &indirect_offset)
 128 {
 129    vec4_instruction *inst;
 130    dst_reg temp(this, glsl_type::ivec4_type);
 131    temp.type = dst.type;
 132
 133    /* Set up the message header to reference the proper parts of the URB */
 134    dst_reg header = dst_reg(this, glsl_type::uvec4_type);
 135    inst = emit(TCS_OPCODE_SET_INPUT_URB_OFFSETS, header, vertex_index,
 136                indirect_offset);
 137    inst->force_writemask_all = true;
 138
 139    /* Read into a temporary, ignoring writemasking. */
 140    inst = emit(VEC4_OPCODE_URB_READ, temp, src_reg(header));
 141    inst->offset = base_offset;
 142    inst->mlen = 1;
 143    inst->base_mrf = -1;
 144
 145    /* Copy the temporary to the destination to deal with writemasking.
 146     *
 147     * Also attempt to deal with gl_PointSize being in the .w component.
 148     */
 149    if (inst->offset == 0 && indirect_offset.file == BAD_FILE) {
 150       emit(MOV(dst, swizzle(src_reg(temp), BRW_SWIZZLE_WWWW)));
 151    } else {
 152       emit(MOV(dst, src_reg(temp)));
 153    }
 154 }
 155
 156 void
 157 vec4_tcs_visitor::emit_output_urb_read(const dst_reg &dst,
 158                                        unsigned base_offset,
 159                                        const src_reg &indirect_offset)
 160 {
 161    vec4_instruction *inst;
 162
 163    /* Set up the message header to reference the proper parts of the URB */
 164    dst_reg header = dst_reg(this, glsl_type::uvec4_type);
 165    inst = emit(TCS_OPCODE_SET_OUTPUT_URB_OFFSETS, header,
 166                brw_imm_ud(dst.writemask), indirect_offset);
 167    inst->force_writemask_all = true;
 168
 169    /* Read into a temporary, ignoring writemasking. */
 170    vec4_instruction *read = emit(VEC4_OPCODE_URB_READ, dst, src_reg(header));
 171    read->offset = base_offset;
 172    read->mlen = 1;
 173    read->base_mrf = -1;
 174 }
 175
 176 void
 177 vec4_tcs_visitor::emit_urb_write(const src_reg &value,
 178                                  unsigned writemask,
 179                                  unsigned base_offset,
 180                                  const src_reg &indirect_offset)
 181 {
 182    if (writemask == 0)
 183       return;
 184
 185    src_reg message(this, glsl_type::uvec4_type, 2);
 186    vec4_instruction *inst;
 187
 188    inst = emit(TCS_OPCODE_SET_OUTPUT_URB_OFFSETS, dst_reg(message),
 189                brw_imm_ud(writemask), indirect_offset);
 190    inst->force_writemask_all = true;
 191    inst = emit(MOV(offset(dst_reg(retype(message, value.type)), 1), value));
 192    inst->force_writemask_all = true;
 193
 194    inst = emit(TCS_OPCODE_URB_WRITE, dst_null_f(), message);
 195    inst->offset = base_offset;
 196    inst->mlen = 2;
 197    inst->base_mrf = -1;
 198 }
 199
 200 static unsigned
 201 tesslevel_outer_components(GLenum tes_primitive_mode)
 202 {
 203    switch (tes_primitive_mode) {
 204    case GL_QUADS:
 205       return 4;
 206    case GL_TRIANGLES:
 207       return 3;
 208    case GL_ISOLINES:
 209       return 2;
 210    default:
 211       unreachable("Bogus tessellation domain");
 212    }
 213    return 0;
 214 }
 215
 216 static unsigned
 217 tesslevel_inner_components(GLenum tes_primitive_mode)
 218 {
 219    switch (tes_primitive_mode) {
 220    case GL_QUADS:
 221       return 2;
 222    case GL_TRIANGLES:
 223       return 1;
 224    case GL_ISOLINES:
 225       return 0;
 226    default:
 227       unreachable("Bogus tessellation domain");
 228    }
 229    return 0;
 230 }
 231
 232 /**
 233  * Given a normal .xyzw writemask, convert it to a writemask for a vector
 234  * that's stored backwards, i.e. .wzyx.
 235  */
 236 static unsigned
 237 writemask_for_backwards_vector(unsigned mask)
 238 {
 239    unsigned new_mask = 0;
 240
 241    for (int i = 0; i < 4; i++)
 242       new_mask |= ((mask >> i) & 1) << (3 - i);
 243
 244    return new_mask;
 245 }
 246
 247 void
 248 vec4_tcs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
 249 {
 250    switch (instr->intrinsic) {
 251    case nir_intrinsic_load_invocation_id:
 252       emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_UD),
 253                invocation_id));
 254       break;
 255    case nir_intrinsic_load_primitive_id:
 256       emit(TCS_OPCODE_GET_PRIMITIVE_ID,
 257            get_nir_dest(instr->dest, BRW_REGISTER_TYPE_UD));
 258       break;
 259    case nir_intrinsic_load_patch_vertices_in:
 260       emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D),
 261                brw_imm_d(key->input_vertices)));
 262       break;
 263    case nir_intrinsic_load_per_vertex_input: {
 264       src_reg indirect_offset = get_indirect_offset(instr);
 265       unsigned imm_offset = instr->const_index[0];
 266
 267       nir_const_value *vertex_const = nir_src_as_const_value(instr->src[0]);
 268       src_reg vertex_index =
 269          vertex_const ? src_reg(brw_imm_ud(vertex_const->u[0]))
 270                       : get_nir_src(instr->src[0], BRW_REGISTER_TYPE_UD, 1);
 271
 272       dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D);
 273       dst.writemask = brw_writemask_for_size(instr->num_components);
 274
 275       emit_input_urb_read(dst, vertex_index, imm_offset, indirect_offset);
 276       break;
 277    }
 278    case nir_intrinsic_load_input:
 279       unreachable("nir_lower_io should use load_per_vertex_input intrinsics");
 280       break;
 281    case nir_intrinsic_load_output:
 282    case nir_intrinsic_load_per_vertex_output: {
 283       src_reg indirect_offset = get_indirect_offset(instr);
 284       unsigned imm_offset = instr->const_index[0];;
 285
 286       dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D);
 287       dst.writemask = brw_writemask_for_size(instr->num_components);
 288
 289       if (imm_offset == 0 && indirect_offset.file == BAD_FILE) {
 290          dst.type = BRW_REGISTER_TYPE_F;
 291
 292          /* This is a read of gl_TessLevelInner[], which lives in the
 293           * Patch URB header.  The layout depends on the domain.
 294           */
 295          switch (key->tes_primitive_mode) {
 296          case GL_QUADS: {
 297             /* DWords 3-2 (reversed); use offset 0 and WZYX swizzle. */
 298             dst_reg tmp(this, glsl_type::vec4_type);
 299             emit_output_urb_read(tmp, 0, src_reg());
 300             emit(MOV(writemask(dst, WRITEMASK_XY),
 301                      swizzle(src_reg(tmp), BRW_SWIZZLE_WZYX)));
 302             break;
 303          }
 304          case GL_TRIANGLES:
 305             /* DWord 4; use offset 1 but normal swizzle/writemask. */
 306             emit_output_urb_read(writemask(dst, WRITEMASK_X), 1, src_reg());
 307             break;
 308          case GL_ISOLINES:
 309             /* All channels are undefined. */
 310             return;
 311          default:
 312             unreachable("Bogus tessellation domain");
 313          }
 314       } else if (imm_offset == 1 && indirect_offset.file == BAD_FILE) {
 315          dst.type = BRW_REGISTER_TYPE_F;
 316
 317          /* This is a read of gl_TessLevelOuter[], which lives in the
 318           * high 4 DWords of the Patch URB header, in reverse order.
 319           */
 320          switch (key->tes_primitive_mode) {
 321          case GL_QUADS:
 322             dst.writemask = WRITEMASK_XYZW;
 323             break;
 324          case GL_TRIANGLES:
 325             dst.writemask = WRITEMASK_XYZ;
 326             break;
 327          case GL_ISOLINES:
 328             dst.writemask = WRITEMASK_XY;
 329             return;
 330          default:
 331             unreachable("Bogus tessellation domain");
 332          }
 333
 334          dst_reg tmp(this, glsl_type::vec4_type);
 335          emit_output_urb_read(tmp, 1, src_reg());
 336          emit(MOV(dst, swizzle(src_reg(tmp), BRW_SWIZZLE_WZYX)));
 337       } else {
 338          emit_output_urb_read(dst, imm_offset, indirect_offset);
 339       }
 340       break;
 341    }
 342    case nir_intrinsic_store_output:
 343    case nir_intrinsic_store_per_vertex_output: {
 344       src_reg value = get_nir_src(instr->src[0]);
 345       unsigned mask = instr->const_index[1];
 346       unsigned swiz = BRW_SWIZZLE_XYZW;
 347
 348       src_reg indirect_offset = get_indirect_offset(instr);
 349       unsigned imm_offset = instr->const_index[0];
 350
 351       if (imm_offset == 0 && indirect_offset.file == BAD_FILE) {
 352          value.type = BRW_REGISTER_TYPE_F;
 353
 354          mask &= (1 << tesslevel_inner_components(key->tes_primitive_mode)) - 1;
 355
 356          /* This is a write to gl_TessLevelInner[], which lives in the
 357           * Patch URB header.  The layout depends on the domain.
 358           */
 359          switch (key->tes_primitive_mode) {
 360          case GL_QUADS:
 361             /* gl_TessLevelInner[].xy lives at DWords 3-2 (reversed).
 362              * We use an XXYX swizzle to reverse put .xy in the .wz
 363              * channels, and use a .zw writemask.
 364              */
 365             swiz = BRW_SWIZZLE4(0, 0, 1, 0);
 366             mask = writemask_for_backwards_vector(mask);
 367             break;
 368          case GL_TRIANGLES:
 369             /* gl_TessLevelInner[].x lives at DWord 4, so we set the
 370              * writemask to X and bump the URB offset by 1.
 371              */
 372             imm_offset = 1;
 373             break;
 374          case GL_ISOLINES:
 375             /* Skip; gl_TessLevelInner[] doesn't exist for isolines. */
 376             return;
 377          default:
 378             unreachable("Bogus tessellation domain");
 379          }
 380       } else if (imm_offset == 1 && indirect_offset.file == BAD_FILE) {
 381          value.type = BRW_REGISTER_TYPE_F;
 382
 383          mask &= (1 << tesslevel_outer_components(key->tes_primitive_mode)) - 1;
 384
 385          /* This is a write to gl_TessLevelOuter[] which lives in the
 386           * Patch URB Header at DWords 4-7.  However, it's reversed, so
 387           * instead of .xyzw we have .wzyx.
 388           */
 389          swiz = BRW_SWIZZLE_WZYX;
 390          mask = writemask_for_backwards_vector(mask);
 391       }
 392
 393       emit_urb_write(swizzle(value, swiz), mask,
 394                      imm_offset, indirect_offset);
 395       break;
 396    }
 397
 398    case nir_intrinsic_barrier: {
 399       dst_reg header = dst_reg(this, glsl_type::uvec4_type);
 400       emit(TCS_OPCODE_CREATE_BARRIER_HEADER, header);
 401       emit(SHADER_OPCODE_BARRIER, dst_null_ud(), src_reg(header));
 402       break;
 403    }
 404
 405    default:
 406       vec4_visitor::nir_emit_intrinsic(instr);
 407    }
 408 }
 409
 410
 411 extern "C" const unsigned *
 412 brw_compile_tcs(const struct brw_compiler *compiler,
 413                 void *log_data,
 414                 void *mem_ctx,
 415                 const struct brw_tcs_prog_key *key,
 416                 struct brw_tcs_prog_data *prog_data,
 417                 const nir_shader *src_shader,
 418                 int shader_time_index,
 419                 unsigned *final_assembly_size,
 420                 char **error_str)
 421 {
 422    const struct brw_device_info *devinfo = compiler->devinfo;
 423    struct brw_vue_prog_data *vue_prog_data = &prog_data->base;
 424    const bool is_scalar = compiler->scalar_stage[MESA_SHADER_TESS_CTRL];
 425
 426    nir_shader *nir = nir_shader_clone(mem_ctx, src_shader);
 427    nir = brw_nir_apply_sampler_key(nir, devinfo, &key->tex, is_scalar);
 428    nir = brw_postprocess_nir(nir, compiler->devinfo, is_scalar);
 429
 430    prog_data->instances = DIV_ROUND_UP(nir->info.tcs.vertices_out, 2);
 431
 432    brw_compute_tess_vue_map(&vue_prog_data->vue_map,
 433                             nir->info.outputs_written,
 434                             nir->info.patch_outputs_written);
 435
 436    /* Compute URB entry size.  The maximum allowed URB entry size is 32k.
 437     * That divides up as follows:
 438     *
 439     *     32 bytes for the patch header (tessellation factors)
 440     *    480 bytes for per-patch varyings (a varying component is 4 bytes and
 441     *              gl_MaxTessPatchComponents = 120)
 442     *  16384 bytes for per-vertex varyings (a varying component is 4 bytes,
 443     *              gl_MaxPatchVertices = 32 and
 444     *              gl_MaxTessControlOutputComponents = 128)
 445     *
 446     *  15808 bytes left for varying packing overhead
 447     */
 448    const int num_per_patch_slots = vue_prog_data->vue_map.num_per_patch_slots;
 449    const int num_per_vertex_slots = vue_prog_data->vue_map.num_per_vertex_slots;
 450    unsigned output_size_bytes = 0;
 451    /* Note that the patch header is counted in num_per_patch_slots. */
 452    output_size_bytes += num_per_patch_slots * 16;
 453    output_size_bytes += nir->info.tcs.vertices_out * num_per_vertex_slots * 16;
 454
 455    assert(output_size_bytes >= 1);
 456    if (output_size_bytes > GEN7_MAX_HS_URB_ENTRY_SIZE_BYTES)
 457       return false;
 458
 459    /* URB entry sizes are stored as a multiple of 64 bytes. */
 460    vue_prog_data->urb_entry_size = ALIGN(output_size_bytes, 64) / 64;
 461
 462    struct brw_vue_map input_vue_map;
 463    brw_compute_vue_map(devinfo, &input_vue_map,
 464                        nir->info.inputs_read & ~VARYING_BIT_PRIMITIVE_ID,
 465                        true);
 466
 467    /* HS does not use the usual payload pushing from URB to GRFs,
 468     * because we don't have enough registers for a full-size payload, and
 469     * the hardware is broken on Haswell anyway.
 470     */
 471    vue_prog_data->urb_read_length = 0;
 472
 473    if (unlikely(INTEL_DEBUG & DEBUG_TCS)) {
 474       fprintf(stderr, "TCS Input ");
 475       brw_print_vue_map(stderr, &input_vue_map);
 476       fprintf(stderr, "TCS Output ");
 477       brw_print_vue_map(stderr, &vue_prog_data->vue_map);
 478    }
 479
 480    vec4_tcs_visitor v(compiler, log_data, key, prog_data,
 481                       nir, mem_ctx, shader_time_index);
 482    if (!v.run()) {
 483       if (error_str)
 484          *error_str = ralloc_strdup(mem_ctx, v.fail_msg);
 485       return NULL;
 486    }
 487
 488    if (unlikely(INTEL_DEBUG & DEBUG_TCS))
 489       v.dump_instructions();
 490
 491    return brw_vec4_generate_assembly(compiler, log_data, mem_ctx, nir,
 492                                      &prog_data->base, v.cfg,
 493                                      final_assembly_size);
 494 }
 495
 496
 497 } /* namespace brw */