src/gallium/drivers/i965/brw_clip_util.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keith@tungstengraphics.com>
  30   */
  31
  32
  33 #include "brw_defines.h"
  34 #include "brw_eu.h"
  35 #include "brw_clip.h"
  36
  37
  38
  39
  40 struct brw_reg get_tmp( struct brw_clip_compile *c )
  41 {
  42    struct brw_reg tmp = brw_vec4_grf(c->last_tmp, 0);
  43
  44    if (++c->last_tmp > c->prog_data.total_grf)
  45       c->prog_data.total_grf = c->last_tmp;
  46
  47    return tmp;
  48 }
  49
  50 static void release_tmp( struct brw_clip_compile *c, struct brw_reg tmp )
  51 {
  52    if (tmp.nr == c->last_tmp-1)
  53       c->last_tmp--;
  54 }
  55
  56
  57 static struct brw_reg make_plane_ud(GLuint x, GLuint y, GLuint z, GLuint w)
  58 {
  59    return brw_imm_ud((w<<24) | (z<<16) | (y<<8) | x);
  60 }
  61
  62
  63 void brw_clip_init_planes( struct brw_clip_compile *c )
  64 {
  65    struct brw_compile *p = &c->func;
  66
  67    if (!c->key.nr_userclip) {
  68       brw_MOV(p, get_element_ud(c->reg.fixed_planes, 0), make_plane_ud( 0,    0, 0xff, 1));
  69       brw_MOV(p, get_element_ud(c->reg.fixed_planes, 1), make_plane_ud( 0,    0,    1, 1));
  70       brw_MOV(p, get_element_ud(c->reg.fixed_planes, 2), make_plane_ud( 0, 0xff,    0, 1));
  71       brw_MOV(p, get_element_ud(c->reg.fixed_planes, 3), make_plane_ud( 0,    1,    0, 1));
  72       brw_MOV(p, get_element_ud(c->reg.fixed_planes, 4), make_plane_ud(0xff,  0,    0, 1));
  73       brw_MOV(p, get_element_ud(c->reg.fixed_planes, 5), make_plane_ud( 1,    0,    0, 1));
  74    }
  75 }
  76
  77
  78
  79 #define W 3
  80
  81 /* Project 'pos' to screen space (or back again), overwrite with results:
  82  */
  83 void brw_clip_project_position(struct brw_clip_compile *c, struct brw_reg pos )
  84 {
  85    struct brw_compile *p = &c->func;
  86
  87    /* calc rhw
  88     */
  89    brw_math_invert(p, get_element(pos, W), get_element(pos, W));
  90
  91    /* value.xyz *= value.rhw
  92     */
  93    brw_set_access_mode(p, BRW_ALIGN_16);
  94    brw_MUL(p, brw_writemask(pos, BRW_WRITEMASK_XYZ), pos, brw_swizzle1(pos, W));
  95    brw_set_access_mode(p, BRW_ALIGN_1);
  96 }
  97
  98
  99 static void brw_clip_project_vertex( struct brw_clip_compile *c,
 100                                      struct brw_indirect vert_addr )
 101 {
 102    struct brw_compile *p = &c->func;
 103    struct brw_reg tmp = get_tmp(c);
 104
 105    /* Fixup position.  Extract from the original vertex and re-project
 106     * to screen space:
 107     */
 108    brw_MOV(p, tmp, deref_4f(vert_addr, c->offset_hpos));
 109    brw_clip_project_position(c, tmp);
 110    brw_MOV(p, deref_4f(vert_addr, c->header_position_offset), tmp);
 111
 112    release_tmp(c, tmp);
 113 }
 114
 115
 116
 117
 118 /* Interpolate between two vertices and put the result into a0.0.
 119  * Increment a0.0 accordingly.
 120  */
 121 void brw_clip_interp_vertex( struct brw_clip_compile *c,
 122                              struct brw_indirect dest_ptr,
 123                              struct brw_indirect v0_ptr, /* from */
 124                              struct brw_indirect v1_ptr, /* to */
 125                              struct brw_reg t0,
 126                              GLboolean force_edgeflag)
 127 {
 128    struct brw_compile *p = &c->func;
 129    struct brw_reg tmp = get_tmp(c);
 130    GLuint i;
 131
 132    /* Just copy the vertex header:
 133     */
 134    /*
 135     * After CLIP stage, only first 256 bits of the VUE are read
 136     * back on IGDNG, so needn't change it
 137     */
 138    brw_copy_indirect_to_indirect(p, dest_ptr, v0_ptr, 1);
 139
 140    /* Iterate over each attribute (could be done in pairs?)
 141     */
 142    for (i = 0; i < c->key.nr_attrs; i++) {
 143       GLuint delta = i*16 + 32;
 144
 145       if (c->chipset.is_igdng)
 146           delta = i * 16 + 32 * 3;
 147
 148       if (delta == c->offset_edgeflag) {
 149          if (force_edgeflag)
 150             brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(1));
 151          else
 152             brw_MOV(p, deref_4f(dest_ptr, delta), deref_4f(v0_ptr, delta));
 153       }
 154       else {
 155          /* Interpolate:
 156           *
 157           *        New = attr0 + t*attr1 - t*attr0
 158           */
 159          brw_MUL(p,
 160                  vec4(brw_null_reg()),
 161                  deref_4f(v1_ptr, delta),
 162                  t0);
 163
 164          brw_MAC(p,
 165                  tmp,
 166                  negate(deref_4f(v0_ptr, delta)),
 167                  t0);
 168
 169          brw_ADD(p,
 170                  deref_4f(dest_ptr, delta),
 171                  deref_4f(v0_ptr, delta),
 172                  tmp);
 173       }
 174    }
 175
 176    if (i & 1) {
 177       GLuint delta = i*16 + 32;
 178
 179       if (c->chipset.is_igdng)
 180           delta = i * 16 + 32 * 3;
 181
 182       brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(0));
 183    }
 184
 185    release_tmp(c, tmp);
 186
 187    /* Recreate the projected (NDC) coordinate in the new vertex
 188     * header:
 189     */
 190    brw_clip_project_vertex(c, dest_ptr );
 191 }
 192
 193
 194
 195
 196 #define MAX_MRF 16
 197
 198 void brw_clip_emit_vue(struct brw_clip_compile *c,
 199                        struct brw_indirect vert,
 200                        GLboolean allocate,
 201                        GLboolean eot,
 202                        GLuint header)
 203 {
 204    struct brw_compile *p = &c->func;
 205    GLuint start = c->last_mrf;
 206
 207    brw_clip_ff_sync(c);
 208
 209    assert(!(allocate && eot));
 210
 211    /* Cycle through mrf regs - probably futile as we have to wait for
 212     * the allocation response anyway.  Also, the order this function
 213     * is invoked doesn't correspond to the order the instructions will
 214     * be executed, so it won't have any effect in many cases.
 215     */
 216 #if 0
 217    if (start + c->nr_regs + 1 >= MAX_MRF)
 218       start = 0;
 219
 220    c->last_mrf = start + c->nr_regs + 1;
 221 #endif
 222
 223    /* Copy the vertex from vertn into m1..mN+1:
 224     */
 225    brw_copy_from_indirect(p, brw_message_reg(start+1), vert, c->nr_regs);
 226
 227    /* Overwrite PrimType and PrimStart in the message header, for
 228     * each vertex in turn:
 229     */
 230    brw_MOV(p, get_element_ud(c->reg.R0, 2), brw_imm_ud(header));
 231
 232
 233    /* Send each vertex as a seperate write to the urb.  This
 234     * is different to the concept in brw_sf_emit.c, where
 235     * subsequent writes are used to build up a single urb
 236     * entry.  Each of these writes instantiates a seperate
 237     * urb entry - (I think... what about 'allocate'?)
 238     */
 239    brw_urb_WRITE(p,
 240                  allocate ? c->reg.R0 : retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
 241                  start,
 242                  c->reg.R0,
 243                  allocate,
 244                  1,             /* used */
 245                  c->nr_regs + 1, /* msg length */
 246                  allocate ? 1 : 0, /* response_length */
 247                  eot,           /* eot */
 248                  1,             /* writes_complete */
 249                  0,             /* urb offset */
 250                  BRW_URB_SWIZZLE_NONE);
 251 }
 252
 253
 254
 255 void brw_clip_kill_thread(struct brw_clip_compile *c)
 256 {
 257    struct brw_compile *p = &c->func;
 258
 259    brw_clip_ff_sync(c);
 260    /* Send an empty message to kill the thread and release any
 261     * allocated urb entry:
 262     */
 263    brw_urb_WRITE(p,
 264                  retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
 265                  0,
 266                  c->reg.R0,
 267                  0,             /* allocate */
 268                  0,             /* used */
 269                  1,             /* msg len */
 270                  0,             /* response len */
 271                  1,             /* eot */
 272                  1,             /* writes complete */
 273                  0,
 274                  BRW_URB_SWIZZLE_NONE);
 275 }
 276
 277
 278
 279
 280 struct brw_reg brw_clip_plane0_address( struct brw_clip_compile *c )
 281 {
 282    return brw_address(c->reg.fixed_planes);
 283 }
 284
 285
 286 struct brw_reg brw_clip_plane_stride( struct brw_clip_compile *c )
 287 {
 288    if (c->key.nr_userclip) {
 289       return brw_imm_uw(16);
 290    }
 291    else {
 292       return brw_imm_uw(4);
 293    }
 294 }
 295
 296
 297 /* If flatshading, distribute color from provoking vertex prior to
 298  * clipping.
 299  */
 300 void brw_clip_copy_colors( struct brw_clip_compile *c,
 301                            GLuint to, GLuint from )
 302 {
 303    struct brw_compile *p = &c->func;
 304
 305    if (c->offset_color0)
 306       brw_MOV(p,
 307               byte_offset(c->reg.vertex[to], c->offset_color0),
 308               byte_offset(c->reg.vertex[from], c->offset_color0));
 309
 310    if (c->offset_color1)
 311       brw_MOV(p,
 312               byte_offset(c->reg.vertex[to], c->offset_color1),
 313               byte_offset(c->reg.vertex[from], c->offset_color1));
 314
 315    if (c->offset_bfc0)
 316       brw_MOV(p,
 317               byte_offset(c->reg.vertex[to], c->offset_bfc0),
 318               byte_offset(c->reg.vertex[from], c->offset_bfc0));
 319
 320    if (c->offset_bfc1)
 321       brw_MOV(p,
 322               byte_offset(c->reg.vertex[to], c->offset_bfc1),
 323               byte_offset(c->reg.vertex[from], c->offset_bfc1));
 324 }
 325
 326
 327
 328 void brw_clip_init_clipmask( struct brw_clip_compile *c )
 329 {
 330    struct brw_compile *p = &c->func;
 331    struct brw_reg incoming = get_element_ud(c->reg.R0, 2);
 332
 333    /* Shift so that lowest outcode bit is rightmost:
 334     */
 335    brw_SHR(p, c->reg.planemask, incoming, brw_imm_ud(26));
 336
 337    if (c->key.nr_userclip) {
 338       struct brw_reg tmp = retype(vec1(get_tmp(c)), BRW_REGISTER_TYPE_UD);
 339
 340       /* Rearrange userclip outcodes so that they come directly after
 341        * the fixed plane bits.
 342        */
 343       brw_AND(p, tmp, incoming, brw_imm_ud(0x3f<<14));
 344       brw_SHR(p, tmp, tmp, brw_imm_ud(8));
 345       brw_OR(p, c->reg.planemask, c->reg.planemask, tmp);
 346
 347       release_tmp(c, tmp);
 348    }
 349 }
 350
 351 void brw_clip_ff_sync(struct brw_clip_compile *c)
 352 {
 353     if (c->need_ff_sync) {
 354         struct brw_compile *p = &c->func;
 355         struct brw_instruction *need_ff_sync;
 356
 357         brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
 358         brw_AND(p, brw_null_reg(), c->reg.ff_sync, brw_imm_ud(0x1));
 359         need_ff_sync = brw_IF(p, BRW_EXECUTE_1);
 360         {
 361             brw_OR(p, c->reg.ff_sync, c->reg.ff_sync, brw_imm_ud(0x1));
 362             brw_ff_sync(p,
 363                     c->reg.R0,
 364                     0,
 365                     c->reg.R0,
 366                     1,
 367                     1,          /* used */
 368                     1,          /* msg length */
 369                     1,          /* response length */
 370                     0,          /* eot */
 371                     1,          /* write compelete */
 372                     0,          /* urb offset */
 373                     BRW_URB_SWIZZLE_NONE);
 374         }
 375         brw_ENDIF(p, need_ff_sync);
 376         brw_set_predicate_control(p, BRW_PREDICATE_NONE);
 377     }
 378 }
 379
 380 void brw_clip_init_ff_sync(struct brw_clip_compile *c)
 381 {
 382     if (c->need_ff_sync) {
 383         struct brw_compile *p = &c->func;
 384
 385         brw_MOV(p, c->reg.ff_sync, brw_imm_ud(0));
 386     }
 387 }