src/mesa/drivers/dri/i965/brw_clip_util.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keithw@vmware.com>
  30   */
  31
  32
  33 #include "main/macros.h"
  34 #include "main/enums.h"
  35 #include "program/program.h"
  36
  37 #include "intel_batchbuffer.h"
  38
  39 #include "brw_defines.h"
  40 #include "brw_context.h"
  41 #include "brw_eu.h"
  42 #include "brw_clip.h"
  43
  44
  45
  46
  47 struct brw_reg get_tmp( struct brw_clip_compile *c )
  48 {
  49    struct brw_reg tmp = brw_vec4_grf(c->last_tmp, 0);
  50
  51    if (++c->last_tmp > c->prog_data.total_grf)
  52       c->prog_data.total_grf = c->last_tmp;
  53
  54    return tmp;
  55 }
  56
  57 static void release_tmp( struct brw_clip_compile *c, struct brw_reg tmp )
  58 {
  59    if (tmp.nr == c->last_tmp-1)
  60       c->last_tmp--;
  61 }
  62
  63
  64 static struct brw_reg make_plane_ud(GLuint x, GLuint y, GLuint z, GLuint w)
  65 {
  66    return brw_imm_ud((w<<24) | (z<<16) | (y<<8) | x);
  67 }
  68
  69
  70 void brw_clip_init_planes( struct brw_clip_compile *c )
  71 {
  72    struct brw_codegen *p = &c->func;
  73
  74    if (!c->key.nr_userclip) {
  75       brw_MOV(p, get_element_ud(c->reg.fixed_planes, 0), make_plane_ud( 0,    0, 0xff, 1));
  76       brw_MOV(p, get_element_ud(c->reg.fixed_planes, 1), make_plane_ud( 0,    0,    1, 1));
  77       brw_MOV(p, get_element_ud(c->reg.fixed_planes, 2), make_plane_ud( 0, 0xff,    0, 1));
  78       brw_MOV(p, get_element_ud(c->reg.fixed_planes, 3), make_plane_ud( 0,    1,    0, 1));
  79       brw_MOV(p, get_element_ud(c->reg.fixed_planes, 4), make_plane_ud(0xff,  0,    0, 1));
  80       brw_MOV(p, get_element_ud(c->reg.fixed_planes, 5), make_plane_ud( 1,    0,    0, 1));
  81    }
  82 }
  83
  84
  85
  86 #define W 3
  87
  88 /* Project 'pos' to screen space (or back again), overwrite with results:
  89  */
  90 void brw_clip_project_position(struct brw_clip_compile *c, struct brw_reg pos )
  91 {
  92    struct brw_codegen *p = &c->func;
  93
  94    /* calc rhw
  95     */
  96    brw_math_invert(p, get_element(pos, W), get_element(pos, W));
  97
  98    /* value.xyz *= value.rhw
  99     */
 100    brw_set_default_access_mode(p, BRW_ALIGN_16);
 101    brw_MUL(p, brw_writemask(pos, WRITEMASK_XYZ), pos, brw_swizzle1(pos, W));
 102    brw_set_default_access_mode(p, BRW_ALIGN_1);
 103 }
 104
 105
 106 static void brw_clip_project_vertex( struct brw_clip_compile *c,
 107                                      struct brw_indirect vert_addr )
 108 {
 109    struct brw_codegen *p = &c->func;
 110    struct brw_reg tmp = get_tmp(c);
 111    GLuint hpos_offset = brw_varying_to_offset(&c->vue_map, VARYING_SLOT_POS);
 112    GLuint ndc_offset = brw_varying_to_offset(&c->vue_map,
 113                                              BRW_VARYING_SLOT_NDC);
 114
 115    /* Fixup position.  Extract from the original vertex and re-project
 116     * to screen space:
 117     */
 118    brw_MOV(p, tmp, deref_4f(vert_addr, hpos_offset));
 119    brw_clip_project_position(c, tmp);
 120    brw_MOV(p, deref_4f(vert_addr, ndc_offset), tmp);
 121
 122    release_tmp(c, tmp);
 123 }
 124
 125
 126
 127
 128 /* Interpolate between two vertices and put the result into a0.0.
 129  * Increment a0.0 accordingly.
 130  *
 131  * Beware that dest_ptr can be equal to v0_ptr!
 132  */
 133 void brw_clip_interp_vertex( struct brw_clip_compile *c,
 134                              struct brw_indirect dest_ptr,
 135                              struct brw_indirect v0_ptr, /* from */
 136                              struct brw_indirect v1_ptr, /* to */
 137                              struct brw_reg t0,
 138                              bool force_edgeflag)
 139 {
 140    struct brw_codegen *p = &c->func;
 141    struct brw_reg t_nopersp, v0_ndc_copy;
 142    GLuint slot;
 143
 144    /* Just copy the vertex header:
 145     */
 146    /*
 147     * After CLIP stage, only first 256 bits of the VUE are read
 148     * back on Ironlake, so needn't change it
 149     */
 150    brw_copy_indirect_to_indirect(p, dest_ptr, v0_ptr, 1);
 151
 152
 153    /* First handle the 3D and NDC interpolation, in case we
 154     * need noperspective interpolation. Doing it early has no
 155     * performance impact in any case.
 156     */
 157
 158    /* Take a copy of the v0 NDC coordinates, in case dest == v0. */
 159    if (c->has_noperspective_shading) {
 160       GLuint offset = brw_varying_to_offset(&c->vue_map,
 161                                                  BRW_VARYING_SLOT_NDC);
 162       v0_ndc_copy = get_tmp(c);
 163       brw_MOV(p, v0_ndc_copy, deref_4f(v0_ptr, offset));
 164    }
 165
 166    /* Compute the new 3D position
 167     *
 168     * dest_hpos = v0_hpos * (1 - t0) + v1_hpos * t0
 169     */
 170    {
 171       GLuint delta = brw_varying_to_offset(&c->vue_map, VARYING_SLOT_POS);
 172       struct brw_reg tmp = get_tmp(c);
 173       brw_MUL(p, vec4(brw_null_reg()), deref_4f(v1_ptr, delta), t0);
 174       brw_MAC(p, tmp, negate(deref_4f(v0_ptr, delta)), t0);
 175       brw_ADD(p, deref_4f(dest_ptr, delta), deref_4f(v0_ptr, delta), tmp);
 176       release_tmp(c, tmp);
 177    }
 178
 179    /* Recreate the projected (NDC) coordinate in the new vertex header */
 180    brw_clip_project_vertex(c, dest_ptr);
 181
 182    /* If we have noperspective attributes,
 183     * we need to compute the screen-space t
 184     */
 185    if (c->has_noperspective_shading) {
 186       GLuint delta = brw_varying_to_offset(&c->vue_map,
 187                                                 BRW_VARYING_SLOT_NDC);
 188       struct brw_reg tmp = get_tmp(c);
 189       t_nopersp = get_tmp(c);
 190
 191       /* t_nopersp = vec4(v1.xy, dest.xy) */
 192       brw_MOV(p, t_nopersp, deref_4f(v1_ptr, delta));
 193       brw_MOV(p, tmp, deref_4f(dest_ptr, delta));
 194       brw_set_default_access_mode(p, BRW_ALIGN_16);
 195       brw_MOV(p,
 196               brw_writemask(t_nopersp, WRITEMASK_ZW),
 197               brw_swizzle(tmp, 0, 1, 0, 1));
 198
 199       /* t_nopersp = vec4(v1.xy, dest.xy) - v0.xyxy */
 200       brw_ADD(p, t_nopersp, t_nopersp,
 201               negate(brw_swizzle(v0_ndc_copy, 0, 1, 0, 1)));
 202
 203       /* Add the absolute values of the X and Y deltas so that if
 204        * the points aren't in the same place on the screen we get
 205        * nonzero values to divide.
 206        *
 207        * After that, we have vert1 - vert0 in t_nopersp.x and
 208        * vertnew - vert0 in t_nopersp.y
 209        *
 210        * t_nopersp = vec2(|v1.x  -v0.x| + |v1.y  -v0.y|,
 211        *                  |dest.x-v0.x| + |dest.y-v0.y|)
 212        */
 213       brw_ADD(p,
 214               brw_writemask(t_nopersp, WRITEMASK_XY),
 215               brw_abs(brw_swizzle(t_nopersp, 0, 2, 0, 0)),
 216               brw_abs(brw_swizzle(t_nopersp, 1, 3, 0, 0)));
 217       brw_set_default_access_mode(p, BRW_ALIGN_1);
 218
 219       /* If the points are in the same place, just substitute a
 220        * value to avoid divide-by-zero
 221        */
 222       brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_EQ,
 223               vec1(t_nopersp),
 224               brw_imm_f(0));
 225       brw_IF(p, BRW_EXECUTE_1);
 226       brw_MOV(p, t_nopersp, brw_imm_vf4(brw_float_to_vf(1.0),
 227                                         brw_float_to_vf(0.0),
 228                                         brw_float_to_vf(0.0),
 229                                         brw_float_to_vf(0.0)));
 230       brw_ENDIF(p);
 231
 232       /* Now compute t_nopersp = t_nopersp.y/t_nopersp.x and broadcast it. */
 233       brw_math_invert(p, get_element(t_nopersp, 0), get_element(t_nopersp, 0));
 234       brw_MUL(p, vec1(t_nopersp), vec1(t_nopersp),
 235             vec1(suboffset(t_nopersp, 1)));
 236       brw_set_default_access_mode(p, BRW_ALIGN_16);
 237       brw_MOV(p, t_nopersp, brw_swizzle(t_nopersp, 0, 0, 0, 0));
 238       brw_set_default_access_mode(p, BRW_ALIGN_1);
 239
 240       release_tmp(c, tmp);
 241       release_tmp(c, v0_ndc_copy);
 242    }
 243
 244    /* Now we can iterate over each attribute
 245     * (could be done in pairs?)
 246     */
 247    for (slot = 0; slot < c->vue_map.num_slots; slot++) {
 248       int varying = c->vue_map.slot_to_varying[slot];
 249       GLuint delta = brw_vue_slot_to_offset(slot);
 250
 251       /* HPOS, NDC already handled above */
 252       if (varying == VARYING_SLOT_POS || varying == BRW_VARYING_SLOT_NDC)
 253          continue;
 254
 255
 256       if (varying == VARYING_SLOT_EDGE) {
 257          if (force_edgeflag)
 258             brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(1));
 259          else
 260             brw_MOV(p, deref_4f(dest_ptr, delta), deref_4f(v0_ptr, delta));
 261       } else if (varying == VARYING_SLOT_PSIZ) {
 262          /* PSIZ doesn't need interpolation because it isn't used by the
 263           * fragment shader.
 264           */
 265       } else if (varying < VARYING_SLOT_MAX) {
 266          /* This is a true vertex result (and not a special value for the VUE
 267           * header), so interpolate:
 268           *
 269           *        New = attr0 + t*attr1 - t*attr0
 270           *
 271           * Unless the attribute is flat shaded -- in which case just copy
 272           * from one of the sources (doesn't matter which; already copied from pv)
 273           */
 274          GLuint interp = c->key.interpolation_mode.mode[slot];
 275
 276          if (interp != INTERP_QUALIFIER_FLAT) {
 277             struct brw_reg tmp = get_tmp(c);
 278             struct brw_reg t =
 279                interp == INTERP_QUALIFIER_NOPERSPECTIVE ? t_nopersp : t0;
 280
 281             brw_MUL(p,
 282                   vec4(brw_null_reg()),
 283                   deref_4f(v1_ptr, delta),
 284                   t);
 285
 286             brw_MAC(p,
 287                   tmp,
 288                   negate(deref_4f(v0_ptr, delta)),
 289                   t);
 290
 291             brw_ADD(p,
 292                   deref_4f(dest_ptr, delta),
 293                   deref_4f(v0_ptr, delta),
 294                   tmp);
 295
 296             release_tmp(c, tmp);
 297          }
 298          else {
 299             brw_MOV(p,
 300                   deref_4f(dest_ptr, delta),
 301                   deref_4f(v0_ptr, delta));
 302          }
 303       }
 304    }
 305
 306    if (c->vue_map.num_slots % 2) {
 307       GLuint delta = brw_vue_slot_to_offset(c->vue_map.num_slots);
 308
 309       brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(0));
 310    }
 311
 312    if (c->has_noperspective_shading)
 313       release_tmp(c, t_nopersp);
 314 }
 315
 316 void brw_clip_emit_vue(struct brw_clip_compile *c,
 317                        struct brw_indirect vert,
 318                        enum brw_urb_write_flags flags,
 319                        GLuint header)
 320 {
 321    struct brw_codegen *p = &c->func;
 322    bool allocate = flags & BRW_URB_WRITE_ALLOCATE;
 323
 324    brw_clip_ff_sync(c);
 325
 326    /* Any URB entry that is allocated must subsequently be used or discarded,
 327     * so it doesn't make sense to mark EOT and ALLOCATE at the same time.
 328     */
 329    assert(!(allocate && (flags & BRW_URB_WRITE_EOT)));
 330
 331    /* Copy the vertex from vertn into m1..mN+1:
 332     */
 333    brw_copy_from_indirect(p, brw_message_reg(1), vert, c->nr_regs);
 334
 335    /* Overwrite PrimType and PrimStart in the message header, for
 336     * each vertex in turn:
 337     */
 338    brw_MOV(p, get_element_ud(c->reg.R0, 2), brw_imm_ud(header));
 339
 340
 341    /* Send each vertex as a separate write to the urb.  This
 342     * is different to the concept in brw_sf_emit.c, where
 343     * subsequent writes are used to build up a single urb
 344     * entry.  Each of these writes instantiates a separate
 345     * urb entry - (I think... what about 'allocate'?)
 346     */
 347    brw_urb_WRITE(p,
 348                  allocate ? c->reg.R0 : retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
 349                  0,
 350                  c->reg.R0,
 351                  flags,
 352                  c->nr_regs + 1, /* msg length */
 353                  allocate ? 1 : 0, /* response_length */
 354                  0,             /* urb offset */
 355                  BRW_URB_SWIZZLE_NONE);
 356 }
 357
 358
 359
 360 void brw_clip_kill_thread(struct brw_clip_compile *c)
 361 {
 362    struct brw_codegen *p = &c->func;
 363
 364    brw_clip_ff_sync(c);
 365    /* Send an empty message to kill the thread and release any
 366     * allocated urb entry:
 367     */
 368    brw_urb_WRITE(p,
 369                  retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
 370                  0,
 371                  c->reg.R0,
 372                  BRW_URB_WRITE_UNUSED | BRW_URB_WRITE_EOT_COMPLETE,
 373                  1,             /* msg len */
 374                  0,             /* response len */
 375                  0,
 376                  BRW_URB_SWIZZLE_NONE);
 377 }
 378
 379
 380
 381
 382 struct brw_reg brw_clip_plane0_address( struct brw_clip_compile *c )
 383 {
 384    return brw_address(c->reg.fixed_planes);
 385 }
 386
 387
 388 struct brw_reg brw_clip_plane_stride( struct brw_clip_compile *c )
 389 {
 390    if (c->key.nr_userclip) {
 391       return brw_imm_uw(16);
 392    }
 393    else {
 394       return brw_imm_uw(4);
 395    }
 396 }
 397
 398
 399 /* Distribute flatshaded attributes from provoking vertex prior to
 400  * clipping.
 401  */
 402 void brw_clip_copy_flatshaded_attributes( struct brw_clip_compile *c,
 403                            GLuint to, GLuint from )
 404 {
 405    struct brw_codegen *p = &c->func;
 406
 407    for (int i = 0; i < c->vue_map.num_slots; i++) {
 408       if (c->key.interpolation_mode.mode[i] == INTERP_QUALIFIER_FLAT) {
 409          brw_MOV(p,
 410                  byte_offset(c->reg.vertex[to], brw_vue_slot_to_offset(i)),
 411                  byte_offset(c->reg.vertex[from], brw_vue_slot_to_offset(i)));
 412       }
 413    }
 414 }
 415
 416
 417
 418 void brw_clip_init_clipmask( struct brw_clip_compile *c )
 419 {
 420    struct brw_codegen *p = &c->func;
 421    struct brw_reg incoming = get_element_ud(c->reg.R0, 2);
 422
 423    /* Shift so that lowest outcode bit is rightmost:
 424     */
 425    brw_SHR(p, c->reg.planemask, incoming, brw_imm_ud(26));
 426
 427    if (c->key.nr_userclip) {
 428       struct brw_reg tmp = retype(vec1(get_tmp(c)), BRW_REGISTER_TYPE_UD);
 429
 430       /* Rearrange userclip outcodes so that they come directly after
 431        * the fixed plane bits.
 432        */
 433       if (p->devinfo->gen == 5 || p->devinfo->is_g4x)
 434          brw_AND(p, tmp, incoming, brw_imm_ud(0xff<<14));
 435       else
 436          brw_AND(p, tmp, incoming, brw_imm_ud(0x3f<<14));
 437
 438       brw_SHR(p, tmp, tmp, brw_imm_ud(8));
 439       brw_OR(p, c->reg.planemask, c->reg.planemask, tmp);
 440
 441       release_tmp(c, tmp);
 442    }
 443 }
 444
 445 void brw_clip_ff_sync(struct brw_clip_compile *c)
 446 {
 447     struct brw_codegen *p = &c->func;
 448
 449     if (p->devinfo->gen == 5) {
 450         brw_AND(p, brw_null_reg(), c->reg.ff_sync, brw_imm_ud(0x1));
 451         brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_Z);
 452         brw_IF(p, BRW_EXECUTE_1);
 453         {
 454             brw_OR(p, c->reg.ff_sync, c->reg.ff_sync, brw_imm_ud(0x1));
 455             brw_ff_sync(p,
 456                         c->reg.R0,
 457                         0,
 458                         c->reg.R0,
 459                         1, /* allocate */
 460                         1, /* response length */
 461                         0 /* eot */);
 462         }
 463         brw_ENDIF(p);
 464         brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
 465     }
 466 }
 467
 468 void brw_clip_init_ff_sync(struct brw_clip_compile *c)
 469 {
 470     struct brw_codegen *p = &c->func;
 471
 472     if (p->devinfo->gen == 5) {
 473         brw_MOV(p, c->reg.ff_sync, brw_imm_ud(0));
 474     }
 475 }