src/mesa/drivers/dri/i965/brw_gs_emit.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keith@tungstengraphics.com>
  30   */
  31
  32
  33 #include "main/glheader.h"
  34 #include "main/macros.h"
  35 #include "main/enums.h"
  36
  37 #include "program/program.h"
  38 #include "intel_batchbuffer.h"
  39
  40 #include "brw_defines.h"
  41 #include "brw_context.h"
  42 #include "brw_eu.h"
  43 #include "brw_gs.h"
  44
  45 /**
  46  * Allocate registers for GS.
  47  *
  48  * If svbi_payload_enable is true, then the thread will be spawned with the
  49  * "SVBI Payload Enable" bit set, so GRF 1 needs to be set aside to hold the
  50  * streamed vertex buffer indices.
  51  */
  52 static void brw_gs_alloc_regs( struct brw_gs_compile *c,
  53                                GLuint nr_verts,
  54                                bool svbi_payload_enable )
  55 {
  56    GLuint i = 0,j;
  57
  58    /* Register usage is static, precompute here:
  59     */
  60    c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++;
  61
  62    /* Streamed vertex buffer indices */
  63    if (svbi_payload_enable)
  64       c->reg.SVBI = retype(brw_vec8_grf(i++, 0), BRW_REGISTER_TYPE_UD);
  65
  66    /* Payload vertices plus space for more generated vertices:
  67     */
  68    for (j = 0; j < nr_verts; j++) {
  69       c->reg.vertex[j] = brw_vec4_grf(i, 0);
  70       i += c->nr_regs;
  71    }
  72
  73    c->reg.header = retype(brw_vec8_grf(i++, 0), BRW_REGISTER_TYPE_UD);
  74    c->reg.temp = retype(brw_vec8_grf(i++, 0), BRW_REGISTER_TYPE_UD);
  75
  76    c->prog_data.urb_read_length = c->nr_regs;
  77    c->prog_data.total_grf = i;
  78 }
  79
  80
  81 /**
  82  * Set up the initial value of c->reg.header register based on c->reg.R0.
  83  *
  84  * The following information is passed to the GS thread in R0, and needs to be
  85  * included in the first URB_WRITE or FF_SYNC message sent by the GS:
  86  *
  87  * - DWORD 0 [31:0] handle info (Gen4 only)
  88  * - DWORD 5 [7:0] FFTID
  89  * - DWORD 6 [31:0] Debug info
  90  * - DWORD 7 [31:0] Debug info
  91  *
  92  * This function sets up the above data by copying by copying the contents of
  93  * R0 to the header register.
  94  */
  95 static void brw_gs_initialize_header(struct brw_gs_compile *c)
  96 {
  97    struct brw_compile *p = &c->func;
  98    brw_MOV(p, c->reg.header, c->reg.R0);
  99 }
 100
 101 /**
 102  * Overwrite DWORD 2 of c->reg.header with the given immediate unsigned value.
 103  *
 104  * In URB_WRITE messages, DWORD 2 contains the fields PrimType, PrimStart,
 105  * PrimEnd, Increment CL_INVOCATIONS, and SONumPrimsWritten, many of which we
 106  * need to be able to update on a per-vertex basis.
 107  */
 108 static void brw_gs_overwrite_header_dw2(struct brw_gs_compile *c,
 109                                         unsigned dw2)
 110 {
 111    struct brw_compile *p = &c->func;
 112    brw_MOV(p, get_element_ud(c->reg.header, 2), brw_imm_ud(dw2));
 113 }
 114
 115 /**
 116  * Overwrite DWORD 2 of c->reg.header with the primitive type from c->reg.R0.
 117  *
 118  * When the thread is spawned, GRF 0 contains the primitive type in bits 4:0
 119  * of DWORD 2.  URB_WRITE messages need the primitive type in bits 6:2 of
 120  * DWORD 2.  So this function extracts the primitive type field, bitshifts it
 121  * appropriately, and stores it in c->reg.header.
 122  */
 123 static void brw_gs_overwrite_header_dw2_from_r0(struct brw_gs_compile *c)
 124 {
 125    struct brw_compile *p = &c->func;
 126    brw_AND(p, get_element_ud(c->reg.header, 2), get_element_ud(c->reg.R0, 2),
 127            brw_imm_ud(0x1f));
 128    brw_SHL(p, get_element_ud(c->reg.header, 2),
 129            get_element_ud(c->reg.header, 2), brw_imm_ud(2));
 130 }
 131
 132 /**
 133  * Apply an additive offset to DWORD 2 of c->reg.header.
 134  *
 135  * This is used to set/unset the "PrimStart" and "PrimEnd" flags appropriately
 136  * for each vertex.
 137  */
 138 static void brw_gs_offset_header_dw2(struct brw_gs_compile *c, int offset)
 139 {
 140    struct brw_compile *p = &c->func;
 141    brw_ADD(p, get_element_d(c->reg.header, 2), get_element_d(c->reg.header, 2),
 142            brw_imm_d(offset));
 143 }
 144
 145
 146 /**
 147  * Emit a vertex using the URB_WRITE message.  Use the contents of
 148  * c->reg.header for the message header, and the registers starting at \c vert
 149  * for the vertex data.
 150  *
 151  * If \c last is true, then this is the last vertex, so no further URB space
 152  * should be allocated, and this message should end the thread.
 153  *
 154  * If \c last is false, then a new URB entry will be allocated, and its handle
 155  * will be stored in DWORD 0 of c->reg.header for use in the next URB_WRITE
 156  * message.
 157  */
 158 static void brw_gs_emit_vue(struct brw_gs_compile *c,
 159                             struct brw_reg vert,
 160                             bool last)
 161 {
 162    struct brw_compile *p = &c->func;
 163    bool allocate = !last;
 164
 165    /* Copy the vertex from vertn into m1..mN+1:
 166     */
 167    brw_copy8(p, brw_message_reg(1), vert, c->nr_regs);
 168
 169    /* Send each vertex as a seperate write to the urb.  This is
 170     * different to the concept in brw_sf_emit.c, where subsequent
 171     * writes are used to build up a single urb entry.  Each of these
 172     * writes instantiates a seperate urb entry, and a new one must be
 173     * allocated each time.
 174     */
 175    brw_urb_WRITE(p,
 176                  allocate ? c->reg.temp
 177                           : retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
 178                  0,
 179                  c->reg.header,
 180                  allocate,
 181                  1,             /* used */
 182                  c->nr_regs + 1, /* msg length */
 183                  allocate ? 1 : 0, /* response length */
 184                  allocate ? 0 : 1, /* eot */
 185                  1,             /* writes_complete */
 186                  0,             /* urb offset */
 187                  BRW_URB_SWIZZLE_NONE);
 188
 189    if (allocate) {
 190       brw_MOV(p, get_element_ud(c->reg.header, 0),
 191               get_element_ud(c->reg.temp, 0));
 192    }
 193 }
 194
 195 /**
 196  * Send an FF_SYNC message to ensure that all previously spawned GS threads
 197  * have finished sending primitives down the pipeline, and to allocate a URB
 198  * entry for the first output vertex.  Only needed when intel->needs_ff_sync
 199  * is true.
 200  *
 201  * This function modifies c->reg.header: in DWORD 1, it stores num_prim (which
 202  * is needed by the FF_SYNC message), and in DWORD 0, it stores the handle to
 203  * the allocated URB entry (which will be needed by the URB_WRITE meesage that
 204  * follows).
 205  */
 206 static void brw_gs_ff_sync(struct brw_gs_compile *c, int num_prim)
 207 {
 208    struct brw_compile *p = &c->func;
 209
 210    brw_MOV(p, get_element_ud(c->reg.header, 1), brw_imm_ud(num_prim));
 211    brw_ff_sync(p,
 212                c->reg.temp,
 213                0,
 214                c->reg.header,
 215                1, /* allocate */
 216                1, /* response length */
 217                0 /* eot */);
 218    brw_MOV(p, get_element_ud(c->reg.header, 0),
 219            get_element_ud(c->reg.temp, 0));
 220 }
 221
 222
 223 void brw_gs_quads( struct brw_gs_compile *c, struct brw_gs_prog_key *key )
 224 {
 225    struct intel_context *intel = &c->func.brw->intel;
 226
 227    brw_gs_alloc_regs(c, 4, false);
 228    brw_gs_initialize_header(c);
 229    /* Use polygons for correct edgeflag behaviour. Note that vertex 3
 230     * is the PV for quads, but vertex 0 for polygons:
 231     */
 232    if (intel->needs_ff_sync)
 233       brw_gs_ff_sync(c, 1);
 234    brw_gs_overwrite_header_dw2(
 235       c, ((_3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT)
 236           | URB_WRITE_PRIM_START));
 237    if (key->pv_first) {
 238       brw_gs_emit_vue(c, c->reg.vertex[0], 0);
 239       brw_gs_overwrite_header_dw2(
 240          c, _3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT);
 241       brw_gs_emit_vue(c, c->reg.vertex[1], 0);
 242       brw_gs_emit_vue(c, c->reg.vertex[2], 0);
 243       brw_gs_overwrite_header_dw2(
 244          c, ((_3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT)
 245              | URB_WRITE_PRIM_END));
 246       brw_gs_emit_vue(c, c->reg.vertex[3], 1);
 247    }
 248    else {
 249       brw_gs_emit_vue(c, c->reg.vertex[3], 0);
 250       brw_gs_overwrite_header_dw2(
 251          c, _3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT);
 252       brw_gs_emit_vue(c, c->reg.vertex[0], 0);
 253       brw_gs_emit_vue(c, c->reg.vertex[1], 0);
 254       brw_gs_overwrite_header_dw2(
 255          c, ((_3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT)
 256              | URB_WRITE_PRIM_END));
 257       brw_gs_emit_vue(c, c->reg.vertex[2], 1);
 258    }
 259 }
 260
 261 void brw_gs_quad_strip( struct brw_gs_compile *c, struct brw_gs_prog_key *key )
 262 {
 263    struct intel_context *intel = &c->func.brw->intel;
 264
 265    brw_gs_alloc_regs(c, 4, false);
 266    brw_gs_initialize_header(c);
 267
 268    if (intel->needs_ff_sync)
 269       brw_gs_ff_sync(c, 1);
 270    brw_gs_overwrite_header_dw2(
 271       c, ((_3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT)
 272           | URB_WRITE_PRIM_START));
 273    if (key->pv_first) {
 274       brw_gs_emit_vue(c, c->reg.vertex[0], 0);
 275       brw_gs_overwrite_header_dw2(
 276          c, _3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT);
 277       brw_gs_emit_vue(c, c->reg.vertex[1], 0);
 278       brw_gs_emit_vue(c, c->reg.vertex[2], 0);
 279       brw_gs_overwrite_header_dw2(
 280          c, ((_3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT)
 281              | URB_WRITE_PRIM_END));
 282       brw_gs_emit_vue(c, c->reg.vertex[3], 1);
 283    }
 284    else {
 285       brw_gs_emit_vue(c, c->reg.vertex[2], 0);
 286       brw_gs_overwrite_header_dw2(
 287          c, _3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT);
 288       brw_gs_emit_vue(c, c->reg.vertex[3], 0);
 289       brw_gs_emit_vue(c, c->reg.vertex[0], 0);
 290       brw_gs_overwrite_header_dw2(
 291          c, ((_3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT)
 292              | URB_WRITE_PRIM_END));
 293       brw_gs_emit_vue(c, c->reg.vertex[1], 1);
 294    }
 295 }
 296
 297 void brw_gs_lines( struct brw_gs_compile *c )
 298 {
 299    struct intel_context *intel = &c->func.brw->intel;
 300
 301    brw_gs_alloc_regs(c, 2, false);
 302    brw_gs_initialize_header(c);
 303
 304    if (intel->needs_ff_sync)
 305       brw_gs_ff_sync(c, 1);
 306    brw_gs_overwrite_header_dw2(
 307       c, ((_3DPRIM_LINESTRIP << URB_WRITE_PRIM_TYPE_SHIFT)
 308           | URB_WRITE_PRIM_START));
 309    brw_gs_emit_vue(c, c->reg.vertex[0], 0);
 310    brw_gs_overwrite_header_dw2(
 311       c, ((_3DPRIM_LINESTRIP << URB_WRITE_PRIM_TYPE_SHIFT)
 312           | URB_WRITE_PRIM_END));
 313    brw_gs_emit_vue(c, c->reg.vertex[1], 1);
 314 }
 315
 316 /**
 317  * Generate the geometry shader program used on Gen6 to perform stream output
 318  * (transform feedback).
 319  */
 320 void
 321 gen6_sol_program(struct brw_gs_compile *c, struct brw_gs_prog_key *key,
 322                  unsigned num_verts, bool check_edge_flags)
 323 {
 324    struct brw_compile *p = &c->func;
 325    c->prog_data.svbi_postincrement_value = num_verts;
 326
 327    brw_gs_alloc_regs(c, num_verts, true);
 328    brw_gs_initialize_header(c);
 329
 330    if (key->num_transform_feedback_bindings > 0) {
 331       unsigned vertex, binding;
 332       /* Note: since we use the binding table to keep track of buffer offsets
 333        * and stride, the GS doesn't need to keep track of a separate pointer
 334        * into each buffer; it uses a single pointer which increments by 1 for
 335        * each vertex.  So we use SVBI0 for this pointer, regardless of whether
 336        * transform feedback is in interleaved or separate attribs mode.
 337        */
 338       brw_MOV(p, get_element_ud(c->reg.header, 5),
 339               get_element_ud(c->reg.SVBI, 0));
 340
 341       /* Make sure that the buffers have enough room for all the vertices. */
 342       brw_ADD(p, get_element_ud(c->reg.temp, 0),
 343                  get_element_ud(c->reg.SVBI, 0), brw_imm_ud(num_verts));
 344       brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_LE,
 345                  get_element_ud(c->reg.temp, 0),
 346                  get_element_ud(c->reg.SVBI, 4));
 347       brw_IF(p, BRW_EXECUTE_1);
 348
 349       /* For each vertex, generate code to output each varying using the
 350        * appropriate binding table entry.
 351        */
 352       for (vertex = 0; vertex < num_verts; ++vertex) {
 353          for (binding = 0; binding < key->num_transform_feedback_bindings;
 354               ++binding) {
 355             unsigned char vert_result =
 356                key->transform_feedback_bindings[binding];
 357             unsigned char slot = c->vue_map.vert_result_to_slot[vert_result];
 358             /* From the Sandybridge PRM, Volume 2, Part 1, Section 4.5.1:
 359              *
 360              *   "Prior to End of Thread with a URB_WRITE, the kernel must
 361              *   ensure that all writes are complete by sending the final
 362              *   write as a committed write."
 363              */
 364             bool final_write =
 365                binding == key->num_transform_feedback_bindings - 1 &&
 366                vertex == num_verts - 1;
 367             struct brw_reg vertex_slot = c->reg.vertex[vertex];
 368             vertex_slot.nr += slot / 2;
 369             vertex_slot.subnr = (slot % 2) * 16;
 370             brw_MOV(p, stride(c->reg.header, 4, 4, 1),
 371                     retype(vertex_slot, BRW_REGISTER_TYPE_UD));
 372             brw_svb_write(p,
 373                           final_write ? c->reg.temp : brw_null_reg(), /* dest */
 374                           1, /* msg_reg_nr */
 375                           c->reg.header, /* src0 */
 376                           SURF_INDEX_SOL_BINDING(binding), /* binding_table_index */
 377                           final_write); /* send_commit_msg */
 378          }
 379
 380          /* If there are more vertices to output, increment the pointer so
 381           * that we will start outputting to the next location in the
 382           * transform feedback buffers.
 383           */
 384          if (vertex != num_verts - 1) {
 385             brw_ADD(p, get_element_ud(c->reg.header, 5),
 386                     get_element_ud(c->reg.header, 5), brw_imm_ud(1));
 387          }
 388       }
 389       brw_ENDIF(p);
 390
 391       /* Now, reinitialize the header register from R0 to restore the parts of
 392        * the register that we overwrote while streaming out transform feedback
 393        * data.
 394        */
 395       brw_gs_initialize_header(c);
 396
 397       /* Finally, wait for the write commit to occur so that we can proceed to
 398        * other things safely.
 399        *
 400        * From the Sandybridge PRM, Volume 4, Part 1, Section 3.3:
 401        *
 402        *   The write commit does not modify the destination register, but
 403        *   merely clears the dependency associated with the destination
 404        *   register. Thus, a simple “mov” instruction using the register as a
 405        *   source is sufficient to wait for the write commit to occur.
 406        */
 407       brw_MOV(p, c->reg.temp, c->reg.temp);
 408    }
 409
 410    brw_gs_ff_sync(c, 1);
 411
 412    brw_gs_overwrite_header_dw2_from_r0(c);
 413    switch (num_verts) {
 414    case 1:
 415       brw_gs_offset_header_dw2(c, URB_WRITE_PRIM_START | URB_WRITE_PRIM_END);
 416       brw_gs_emit_vue(c, c->reg.vertex[0], true);
 417       break;
 418    case 2:
 419       brw_gs_offset_header_dw2(c, URB_WRITE_PRIM_START);
 420       brw_gs_emit_vue(c, c->reg.vertex[0], false);
 421       brw_gs_offset_header_dw2(c, URB_WRITE_PRIM_END - URB_WRITE_PRIM_START);
 422       brw_gs_emit_vue(c, c->reg.vertex[1], true);
 423       break;
 424    case 3:
 425       if (check_edge_flags) {
 426          /* Only emit vertices 0 and 1 if this is the first triangle of the
 427           * polygon.  Otherwise they are redundant.
 428           */
 429          brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
 430          brw_AND(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
 431                  get_element_ud(c->reg.R0, 2),
 432                  brw_imm_ud(BRW_GS_EDGE_INDICATOR_0));
 433          brw_IF(p, BRW_EXECUTE_1);
 434       }
 435       brw_gs_offset_header_dw2(c, URB_WRITE_PRIM_START);
 436       brw_gs_emit_vue(c, c->reg.vertex[0], false);
 437       brw_gs_offset_header_dw2(c, -URB_WRITE_PRIM_START);
 438       brw_gs_emit_vue(c, c->reg.vertex[1], false);
 439       if (check_edge_flags) {
 440          brw_ENDIF(p);
 441          /* Only emit vertex 2 in PRIM_END mode if this is the last triangle
 442           * of the polygon.  Otherwise leave the primitive incomplete because
 443           * there are more polygon vertices coming.
 444           */
 445          brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
 446          brw_AND(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
 447                  get_element_ud(c->reg.R0, 2),
 448                  brw_imm_ud(BRW_GS_EDGE_INDICATOR_1));
 449          brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
 450       }
 451       brw_gs_offset_header_dw2(c, URB_WRITE_PRIM_END);
 452       brw_set_predicate_control(p, BRW_PREDICATE_NONE);
 453       brw_gs_emit_vue(c, c->reg.vertex[2], true);
 454       break;
 455    }
 456 }