src/gallium/drivers/i965simple/brw_vs_emit.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keith@tungstengraphics.com>
  30   */
  31
  32 #include "brw_context.h"
  33 #include "brw_vs.h"
  34
  35 #include "pipe/p_shader_tokens.h"
  36 #include "tgsi/tgsi_parse.h"
  37
  38 struct brw_prog_info {
  39    unsigned num_temps;
  40    unsigned num_addrs;
  41    unsigned num_consts;
  42
  43    unsigned writes_psize;
  44
  45    unsigned pos_idx;
  46    unsigned result_edge_idx;
  47    unsigned edge_flag_idx;
  48    unsigned psize_idx;
  49 };
  50
  51 /* Do things as simply as possible.  Allocate and populate all regs
  52  * ahead of time.
  53  */
  54 static void brw_vs_alloc_regs( struct brw_vs_compile *c,
  55                                struct brw_prog_info *info )
  56 {
  57    unsigned i, reg = 0, mrf;
  58    unsigned nr_params;
  59
  60    /* r0 -- reserved as usual
  61     */
  62    c->r0 = brw_vec8_grf(reg, 0); reg++;
  63
  64    /* User clip planes from curbe:
  65     */
  66    if (c->key.nr_userclip) {
  67       for (i = 0; i < c->key.nr_userclip; i++) {
  68          c->userplane[i] = stride( brw_vec4_grf(reg+3+i/2, (i%2) * 4), 0, 4, 1);
  69       }
  70
  71       /* Deal with curbe alignment:
  72        */
  73       reg += ((6+c->key.nr_userclip+3)/4)*2;
  74    }
  75
  76    /* Vertex program parameters from curbe:
  77     */
  78    nr_params = c->prog_data.max_const;
  79    for (i = 0; i < nr_params; i++) {
  80       c->regs[TGSI_FILE_CONSTANT][i] = stride(brw_vec4_grf(reg+i/2, (i%2) * 4), 0, 4, 1);
  81    }
  82    reg += (nr_params+1)/2;
  83    c->prog_data.curb_read_length = reg - 1;
  84
  85
  86
  87    /* Allocate input regs:
  88     */
  89    c->nr_inputs = c->vp->info.num_inputs;
  90    for (i = 0; i < c->nr_inputs; i++) {
  91          c->regs[TGSI_FILE_INPUT][i] = brw_vec8_grf(reg, 0);
  92          reg++;
  93    }
  94
  95
  96    /* Allocate outputs: TODO: could organize the non-position outputs
  97     * to go straight into message regs.
  98     */
  99    c->nr_outputs = 0;
 100    c->first_output = reg;
 101    mrf = 4;
 102    for (i = 0; i < c->vp->info.num_outputs; i++) {
 103       c->nr_outputs++;
 104 #if 0
 105       if (i == VERT_RESULT_HPOS) {
 106          c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0);
 107          reg++;
 108       }
 109       else if (i == VERT_RESULT_PSIZ) {
 110          c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0);
 111          reg++;
 112          mrf++;         /* just a placeholder?  XXX fix later stages & remove this */
 113       }
 114       else {
 115          c->regs[TGSI_FILE_OUTPUT][i] = brw_message_reg(mrf);
 116          mrf++;
 117       }
 118 #else
 119       /*treat pos differently for now */
 120       if (i == info->pos_idx) {
 121          c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0);
 122          reg++;
 123       } else {
 124          c->regs[TGSI_FILE_OUTPUT][i] = brw_message_reg(mrf);
 125          mrf++;
 126       }
 127 #endif
 128    }
 129
 130    /* Allocate program temporaries:
 131     */
 132    for (i = 0; i < info->num_temps; i++) {
 133       c->regs[TGSI_FILE_TEMPORARY][i] = brw_vec8_grf(reg, 0);
 134       reg++;
 135    }
 136
 137    /* Address reg(s).  Don't try to use the internal address reg until
 138     * deref time.
 139     */
 140    for (i = 0; i < info->num_addrs; i++) {
 141       c->regs[TGSI_FILE_ADDRESS][i] =  brw_reg(BRW_GENERAL_REGISTER_FILE,
 142                                                reg,
 143                                                0,
 144                                                BRW_REGISTER_TYPE_D,
 145                                                BRW_VERTICAL_STRIDE_8,
 146                                                BRW_WIDTH_8,
 147                                                BRW_HORIZONTAL_STRIDE_1,
 148                                                BRW_SWIZZLE_XXXX,
 149                                                TGSI_WRITEMASK_X);
 150       reg++;
 151    }
 152
 153    for (i = 0; i < 128; i++) {
 154       if (c->output_regs[i].used_in_src) {
 155          c->output_regs[i].reg = brw_vec8_grf(reg, 0);
 156          reg++;
 157       }
 158    }
 159
 160    c->stack =  brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg, 0);
 161    reg += 2;
 162
 163
 164    /* Some opcodes need an internal temporary:
 165     */
 166    c->first_tmp = reg;
 167    c->last_tmp = reg;           /* for allocation purposes */
 168
 169    /* Each input reg holds data from two vertices.  The
 170     * urb_read_length is the number of registers read from *each*
 171     * vertex urb, so is half the amount:
 172     */
 173    c->prog_data.urb_read_length = (c->nr_inputs+1)/2;
 174
 175    c->prog_data.urb_entry_size = (c->nr_outputs+2+3)/4;
 176    c->prog_data.total_grf = reg;
 177 }
 178
 179
 180 static struct brw_reg get_tmp( struct brw_vs_compile *c )
 181 {
 182    struct brw_reg tmp = brw_vec8_grf(c->last_tmp, 0);
 183
 184    if (++c->last_tmp > c->prog_data.total_grf)
 185       c->prog_data.total_grf = c->last_tmp;
 186
 187    return tmp;
 188 }
 189
 190 static void release_tmp( struct brw_vs_compile *c, struct brw_reg tmp )
 191 {
 192    if (tmp.nr == c->last_tmp-1)
 193       c->last_tmp--;
 194 }
 195
 196 static void release_tmps( struct brw_vs_compile *c )
 197 {
 198    c->last_tmp = c->first_tmp;
 199 }
 200
 201
 202 static void unalias1( struct brw_vs_compile *c,
 203                       struct brw_reg dst,
 204                       struct brw_reg arg0,
 205                       void (*func)( struct brw_vs_compile *,
 206                                     struct brw_reg,
 207                                     struct brw_reg ))
 208 {
 209    if (dst.file == arg0.file && dst.nr == arg0.nr) {
 210       struct brw_compile *p = &c->func;
 211       struct brw_reg tmp = brw_writemask(get_tmp(c), dst.dw1.bits.writemask);
 212       func(c, tmp, arg0);
 213       brw_MOV(p, dst, tmp);
 214    }
 215    else {
 216       func(c, dst, arg0);
 217    }
 218 }
 219
 220 static void unalias2( struct brw_vs_compile *c,
 221                       struct brw_reg dst,
 222                       struct brw_reg arg0,
 223                       struct brw_reg arg1,
 224                       void (*func)( struct brw_vs_compile *,
 225                                     struct brw_reg,
 226                                     struct brw_reg,
 227                                     struct brw_reg ))
 228 {
 229    if ((dst.file == arg0.file && dst.nr == arg0.nr) ||
 230        (dst.file == arg1.file && dst.nr == arg1.nr)) {
 231       struct brw_compile *p = &c->func;
 232       struct brw_reg tmp = brw_writemask(get_tmp(c), dst.dw1.bits.writemask);
 233       func(c, tmp, arg0, arg1);
 234       brw_MOV(p, dst, tmp);
 235    }
 236    else {
 237       func(c, dst, arg0, arg1);
 238    }
 239 }
 240
 241 static void emit_sop( struct brw_compile *p,
 242                       struct brw_reg dst,
 243                       struct brw_reg arg0,
 244                       struct brw_reg arg1,
 245                       unsigned cond)
 246 {
 247    brw_push_insn_state(p);
 248    brw_CMP(p, brw_null_reg(), cond, arg0, arg1);
 249    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
 250    brw_MOV(p, dst, brw_imm_f(1.0f));
 251    brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
 252    brw_MOV(p, dst, brw_imm_f(0.0f));
 253    brw_pop_insn_state(p);
 254 }
 255
 256 static void emit_seq( struct brw_compile *p,
 257                       struct brw_reg dst,
 258                       struct brw_reg arg0,
 259                       struct brw_reg arg1 )
 260 {
 261    emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_EQ);
 262 }
 263
 264 static void emit_sne( struct brw_compile *p,
 265                       struct brw_reg dst,
 266                       struct brw_reg arg0,
 267                       struct brw_reg arg1 )
 268 {
 269    emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_NEQ);
 270 }
 271 static void emit_slt( struct brw_compile *p,
 272                       struct brw_reg dst,
 273                       struct brw_reg arg0,
 274                       struct brw_reg arg1 )
 275 {
 276    emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_L);
 277 }
 278
 279 static void emit_sle( struct brw_compile *p,
 280                       struct brw_reg dst,
 281                       struct brw_reg arg0,
 282                       struct brw_reg arg1 )
 283 {
 284    emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_LE);
 285 }
 286
 287 static void emit_sgt( struct brw_compile *p,
 288                       struct brw_reg dst,
 289                       struct brw_reg arg0,
 290                       struct brw_reg arg1 )
 291 {
 292    emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_G);
 293 }
 294
 295 static void emit_sge( struct brw_compile *p,
 296                       struct brw_reg dst,
 297                       struct brw_reg arg0,
 298                       struct brw_reg arg1 )
 299 {
 300   emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_GE);
 301 }
 302
 303 static void emit_max( struct brw_compile *p,
 304                       struct brw_reg dst,
 305                       struct brw_reg arg0,
 306                       struct brw_reg arg1 )
 307 {
 308    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0, arg1);
 309    brw_SEL(p, dst, arg1, arg0);
 310    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
 311 }
 312
 313 static void emit_min( struct brw_compile *p,
 314                       struct brw_reg dst,
 315                       struct brw_reg arg0,
 316                       struct brw_reg arg1 )
 317 {
 318    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0, arg1);
 319    brw_SEL(p, dst, arg0, arg1);
 320    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
 321 }
 322
 323
 324 static void emit_math1( struct brw_vs_compile *c,
 325                         unsigned function,
 326                         struct brw_reg dst,
 327                         struct brw_reg arg0,
 328                         unsigned precision)
 329 {
 330    /* There are various odd behaviours with SEND on the simulator.  In
 331     * addition there are documented issues with the fact that the GEN4
 332     * processor doesn't do dependency control properly on SEND
 333     * results.  So, on balance, this kludge to get around failures
 334     * with writemasked math results looks like it might be necessary
 335     * whether that turns out to be a simulator bug or not:
 336     */
 337    struct brw_compile *p = &c->func;
 338    struct brw_reg tmp = dst;
 339    boolean need_tmp = (dst.dw1.bits.writemask != 0xf ||
 340                          dst.file != BRW_GENERAL_REGISTER_FILE);
 341
 342    if (need_tmp)
 343       tmp = get_tmp(c);
 344
 345    brw_math(p,
 346             tmp,
 347             function,
 348             BRW_MATH_SATURATE_NONE,
 349             2,
 350             arg0,
 351             BRW_MATH_DATA_SCALAR,
 352             precision);
 353
 354    if (need_tmp) {
 355       brw_MOV(p, dst, tmp);
 356       release_tmp(c, tmp);
 357    }
 358 }
 359
 360 static void emit_math2( struct brw_vs_compile *c,
 361                         unsigned function,
 362                         struct brw_reg dst,
 363                         struct brw_reg arg0,
 364                         struct brw_reg arg1,
 365                         unsigned precision)
 366 {
 367    struct brw_compile *p = &c->func;
 368    struct brw_reg tmp = dst;
 369    boolean need_tmp = (dst.dw1.bits.writemask != 0xf ||
 370                          dst.file != BRW_GENERAL_REGISTER_FILE);
 371
 372    if (need_tmp)
 373       tmp = get_tmp(c);
 374
 375    brw_MOV(p, brw_message_reg(3), arg1);
 376
 377    brw_math(p,
 378             tmp,
 379             function,
 380             BRW_MATH_SATURATE_NONE,
 381             2,
 382             arg0,
 383             BRW_MATH_DATA_SCALAR,
 384             precision);
 385
 386    if (need_tmp) {
 387       brw_MOV(p, dst, tmp);
 388       release_tmp(c, tmp);
 389    }
 390 }
 391
 392
 393
 394 static void emit_exp_noalias( struct brw_vs_compile *c,
 395                               struct brw_reg dst,
 396                               struct brw_reg arg0 )
 397 {
 398    struct brw_compile *p = &c->func;
 399
 400
 401    if (dst.dw1.bits.writemask & TGSI_WRITEMASK_X) {
 402       struct brw_reg tmp = get_tmp(c);
 403       struct brw_reg tmp_d = retype(tmp, BRW_REGISTER_TYPE_D);
 404
 405       /* tmp_d = floor(arg0.x) */
 406       brw_RNDD(p, tmp_d, brw_swizzle1(arg0, 0));
 407
 408       /* result[0] = 2.0 ^ tmp */
 409
 410       /* Adjust exponent for floating point:
 411        * exp += 127
 412        */
 413       brw_ADD(p, brw_writemask(tmp_d, TGSI_WRITEMASK_X), tmp_d, brw_imm_d(127));
 414
 415       /* Install exponent and sign.
 416        * Excess drops off the edge:
 417        */
 418       brw_SHL(p, brw_writemask(retype(dst, BRW_REGISTER_TYPE_D), TGSI_WRITEMASK_X),
 419               tmp_d, brw_imm_d(23));
 420
 421       release_tmp(c, tmp);
 422    }
 423
 424    if (dst.dw1.bits.writemask & TGSI_WRITEMASK_Y) {
 425       /* result[1] = arg0.x - floor(arg0.x) */
 426       brw_FRC(p, brw_writemask(dst, TGSI_WRITEMASK_Y), brw_swizzle1(arg0, 0));
 427    }
 428
 429    if (dst.dw1.bits.writemask & TGSI_WRITEMASK_Z) {
 430       /* As with the LOG instruction, we might be better off just
 431        * doing a taylor expansion here, seeing as we have to do all
 432        * the prep work.
 433        *
 434        * If mathbox partial precision is too low, consider also:
 435        * result[3] = result[0] * EXP(result[1])
 436        */
 437       emit_math1(c,
 438                  BRW_MATH_FUNCTION_EXP,
 439                  brw_writemask(dst, TGSI_WRITEMASK_Z),
 440                  brw_swizzle1(arg0, 0),
 441                  BRW_MATH_PRECISION_PARTIAL);
 442    }
 443
 444    if (dst.dw1.bits.writemask & TGSI_WRITEMASK_W) {
 445       /* result[3] = 1.0; */
 446       brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_W), brw_imm_f(1));
 447    }
 448 }
 449
 450
 451 static void emit_log_noalias( struct brw_vs_compile *c,
 452                               struct brw_reg dst,
 453                               struct brw_reg arg0 )
 454 {
 455    struct brw_compile *p = &c->func;
 456    struct brw_reg tmp = dst;
 457    struct brw_reg tmp_ud = retype(tmp, BRW_REGISTER_TYPE_UD);
 458    struct brw_reg arg0_ud = retype(arg0, BRW_REGISTER_TYPE_UD);
 459    boolean need_tmp = (dst.dw1.bits.writemask != 0xf ||
 460                          dst.file != BRW_GENERAL_REGISTER_FILE);
 461
 462    if (need_tmp) {
 463       tmp = get_tmp(c);
 464       tmp_ud = retype(tmp, BRW_REGISTER_TYPE_UD);
 465    }
 466
 467    /* Perform mant = frexpf(fabsf(x), &exp), adjust exp and mnt
 468     * according to spec:
 469     *
 470     * These almost look likey they could be joined up, but not really
 471     * practical:
 472     *
 473     * result[0].f = (x.i & ((1<<31)-1) >> 23) - 127
 474     * result[1].i = (x.i & ((1<<23)-1)        + (127<<23)
 475     */
 476    if (dst.dw1.bits.writemask & TGSI_WRITEMASK_XZ) {
 477       brw_AND(p,
 478               brw_writemask(tmp_ud, TGSI_WRITEMASK_X),
 479               brw_swizzle1(arg0_ud, 0),
 480               brw_imm_ud((1U<<31)-1));
 481
 482       brw_SHR(p,
 483               brw_writemask(tmp_ud, TGSI_WRITEMASK_X),
 484               tmp_ud,
 485               brw_imm_ud(23));
 486
 487       brw_ADD(p,
 488               brw_writemask(tmp, TGSI_WRITEMASK_X),
 489               retype(tmp_ud, BRW_REGISTER_TYPE_D),      /* does it matter? */
 490               brw_imm_d(-127));
 491    }
 492
 493    if (dst.dw1.bits.writemask & TGSI_WRITEMASK_YZ) {
 494       brw_AND(p,
 495               brw_writemask(tmp_ud, TGSI_WRITEMASK_Y),
 496               brw_swizzle1(arg0_ud, 0),
 497               brw_imm_ud((1<<23)-1));
 498
 499       brw_OR(p,
 500              brw_writemask(tmp_ud, TGSI_WRITEMASK_Y),
 501              tmp_ud,
 502              brw_imm_ud(127<<23));
 503    }
 504
 505    if (dst.dw1.bits.writemask & TGSI_WRITEMASK_Z) {
 506       /* result[2] = result[0] + LOG2(result[1]); */
 507
 508       /* Why bother?  The above is just a hint how to do this with a
 509        * taylor series.  Maybe we *should* use a taylor series as by
 510        * the time all the above has been done it's almost certainly
 511        * quicker than calling the mathbox, even with low precision.
 512        *
 513        * Options are:
 514        *    - result[0] + mathbox.LOG2(result[1])
 515        *    - mathbox.LOG2(arg0.x)
 516        *    - result[0] + inline_taylor_approx(result[1])
 517        */
 518       emit_math1(c,
 519                  BRW_MATH_FUNCTION_LOG,
 520                  brw_writemask(tmp, TGSI_WRITEMASK_Z),
 521                  brw_swizzle1(tmp, 1),
 522                  BRW_MATH_PRECISION_FULL);
 523
 524       brw_ADD(p,
 525               brw_writemask(tmp, TGSI_WRITEMASK_Z),
 526               brw_swizzle1(tmp, 2),
 527               brw_swizzle1(tmp, 0));
 528    }
 529
 530    if (dst.dw1.bits.writemask & TGSI_WRITEMASK_W) {
 531       /* result[3] = 1.0; */
 532       brw_MOV(p, brw_writemask(tmp, TGSI_WRITEMASK_W), brw_imm_f(1));
 533    }
 534
 535    if (need_tmp) {
 536       brw_MOV(p, dst, tmp);
 537       release_tmp(c, tmp);
 538    }
 539 }
 540
 541
 542
 543
 544 /* Need to unalias - consider swizzles:   r0 = DST r0.xxxx r1
 545  */
 546 static void emit_dst_noalias( struct brw_vs_compile *c,
 547                               struct brw_reg dst,
 548                               struct brw_reg arg0,
 549                               struct brw_reg arg1)
 550 {
 551    struct brw_compile *p = &c->func;
 552
 553    /* There must be a better way to do this:
 554     */
 555    if (dst.dw1.bits.writemask & TGSI_WRITEMASK_X)
 556       brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_X), brw_imm_f(1.0));
 557    if (dst.dw1.bits.writemask & TGSI_WRITEMASK_Y)
 558       brw_MUL(p, brw_writemask(dst, TGSI_WRITEMASK_Y), arg0, arg1);
 559    if (dst.dw1.bits.writemask & TGSI_WRITEMASK_Z)
 560       brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_Z), arg0);
 561    if (dst.dw1.bits.writemask & TGSI_WRITEMASK_W)
 562       brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_W), arg1);
 563 }
 564
 565 static void emit_xpd( struct brw_compile *p,
 566                       struct brw_reg dst,
 567                       struct brw_reg t,
 568                       struct brw_reg u)
 569 {
 570    brw_MUL(p, brw_null_reg(), brw_swizzle(t, 1,2,0,3),  brw_swizzle(u,2,0,1,3));
 571    brw_MAC(p, dst,     negate(brw_swizzle(t, 2,0,1,3)), brw_swizzle(u,1,2,0,3));
 572 }
 573
 574
 575
 576 static void emit_lit_noalias( struct brw_vs_compile *c,
 577                               struct brw_reg dst,
 578                               struct brw_reg arg0 )
 579 {
 580    struct brw_compile *p = &c->func;
 581    struct brw_instruction *if_insn;
 582    struct brw_reg tmp = dst;
 583    boolean need_tmp = (dst.file != BRW_GENERAL_REGISTER_FILE);
 584
 585    if (need_tmp)
 586       tmp = get_tmp(c);
 587
 588    brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_YZ), brw_imm_f(0));
 589    brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_XW), brw_imm_f(1));
 590
 591    /* Need to use BRW_EXECUTE_8 and also do an 8-wide compare in order
 592     * to get all channels active inside the IF.  In the clipping code
 593     * we run with NoMask, so it's not an option and we can use
 594     * BRW_EXECUTE_1 for all comparisions.
 595     */
 596    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, brw_swizzle1(arg0,0), brw_imm_f(0));
 597    if_insn = brw_IF(p, BRW_EXECUTE_8);
 598    {
 599       brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_Y), brw_swizzle1(arg0,0));
 600
 601       brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, brw_swizzle1(arg0,1), brw_imm_f(0));
 602       brw_MOV(p, brw_writemask(tmp, TGSI_WRITEMASK_Z),  brw_swizzle1(arg0,1));
 603       brw_set_predicate_control(p, BRW_PREDICATE_NONE);
 604
 605       emit_math2(c,
 606                  BRW_MATH_FUNCTION_POW,
 607                  brw_writemask(dst, TGSI_WRITEMASK_Z),
 608                  brw_swizzle1(tmp, 2),
 609                  brw_swizzle1(arg0, 3),
 610                  BRW_MATH_PRECISION_PARTIAL);
 611    }
 612
 613    brw_ENDIF(p, if_insn);
 614 }
 615
 616
 617
 618
 619
 620 /* TODO: relative addressing!
 621  */
 622 static struct brw_reg get_reg( struct brw_vs_compile *c,
 623                                unsigned file,
 624                                unsigned index )
 625 {
 626    switch (file) {
 627    case TGSI_FILE_TEMPORARY:
 628    case TGSI_FILE_INPUT:
 629    case TGSI_FILE_OUTPUT:
 630       assert(c->regs[file][index].nr != 0);
 631       return c->regs[file][index];
 632    case TGSI_FILE_CONSTANT:
 633       assert(c->regs[TGSI_FILE_CONSTANT][index + c->prog_data.num_imm].nr != 0);
 634       return c->regs[TGSI_FILE_CONSTANT][index + c->prog_data.num_imm];
 635    case TGSI_FILE_IMMEDIATE:
 636       assert(c->regs[TGSI_FILE_CONSTANT][index].nr != 0);
 637       return c->regs[TGSI_FILE_CONSTANT][index];
 638    case TGSI_FILE_ADDRESS:
 639       assert(index == 0);
 640       return c->regs[file][index];
 641
 642    case TGSI_FILE_NULL:                 /* undef values */
 643       return brw_null_reg();
 644
 645    default:
 646       assert(0);
 647       return brw_null_reg();
 648    }
 649 }
 650
 651
 652
 653 static struct brw_reg deref( struct brw_vs_compile *c,
 654                              struct brw_reg arg,
 655                              int offset)
 656 {
 657    struct brw_compile *p = &c->func;
 658    struct brw_reg tmp = vec4(get_tmp(c));
 659    struct brw_reg vp_address = retype(vec1(get_reg(c, TGSI_FILE_ADDRESS, 0)), BRW_REGISTER_TYPE_UW);
 660    unsigned byte_offset = arg.nr * 32 + arg.subnr + offset * 16;
 661    struct brw_reg indirect = brw_vec4_indirect(0,0);
 662
 663    {
 664       brw_push_insn_state(p);
 665       brw_set_access_mode(p, BRW_ALIGN_1);
 666
 667       /* This is pretty clunky - load the address register twice and
 668        * fetch each 4-dword value in turn.  There must be a way to do
 669        * this in a single pass, but I couldn't get it to work.
 670        */
 671       brw_ADD(p, brw_address_reg(0), vp_address, brw_imm_d(byte_offset));
 672       brw_MOV(p, tmp, indirect);
 673
 674       brw_ADD(p, brw_address_reg(0), suboffset(vp_address, 8), brw_imm_d(byte_offset));
 675       brw_MOV(p, suboffset(tmp, 4), indirect);
 676
 677       brw_pop_insn_state(p);
 678    }
 679
 680    return vec8(tmp);
 681 }
 682
 683
 684 static void emit_arl( struct brw_vs_compile *c,
 685                       struct brw_reg dst,
 686                       struct brw_reg arg0 )
 687 {
 688    struct brw_compile *p = &c->func;
 689    struct brw_reg tmp = dst;
 690    boolean need_tmp = (dst.file != BRW_GENERAL_REGISTER_FILE);
 691
 692    if (need_tmp)
 693       tmp = get_tmp(c);
 694
 695    brw_RNDD(p, tmp, arg0);
 696    brw_MUL(p, dst, tmp, brw_imm_d(16));
 697
 698    if (need_tmp)
 699       release_tmp(c, tmp);
 700 }
 701
 702
 703 /* Will return mangled results for SWZ op.  The emit_swz() function
 704  * ignores this result and recalculates taking extended swizzles into
 705  * account.
 706  */
 707 static struct brw_reg get_arg( struct brw_vs_compile *c,
 708                                struct tgsi_src_register *src )
 709 {
 710    struct brw_reg reg;
 711
 712    if (src->File == TGSI_FILE_NULL)
 713       return brw_null_reg();
 714
 715 #if 0
 716    if (src->RelAddr)
 717       reg = deref(c, c->regs[PROGRAM_STATE_VAR][0], src->Index);
 718    else
 719 #endif
 720       reg = get_reg(c, src->File, src->Index);
 721
 722    /* Convert 3-bit swizzle to 2-bit.
 723     */
 724    reg.dw1.bits.swizzle = BRW_SWIZZLE4(src->SwizzleX,
 725                                        src->SwizzleY,
 726                                        src->SwizzleZ,
 727                                        src->SwizzleW);
 728
 729    /* Note this is ok for non-swizzle instructions:
 730     */
 731    reg.negate = src->Negate ? 1 : 0;
 732
 733    return reg;
 734 }
 735
 736
 737 static struct brw_reg get_dst( struct brw_vs_compile *c,
 738                                const struct tgsi_dst_register *dst )
 739 {
 740    struct brw_reg reg = get_reg(c, dst->File, dst->Index);
 741
 742    reg.dw1.bits.writemask = dst->WriteMask;
 743
 744    return reg;
 745 }
 746
 747
 748
 749
 750 static void emit_swz( struct brw_vs_compile *c,
 751                       struct brw_reg dst,
 752                       struct tgsi_src_register src )
 753 {
 754    struct brw_compile *p = &c->func;
 755    unsigned zeros_mask = 0;
 756    unsigned ones_mask = 0;
 757    unsigned src_mask = 0;
 758    ubyte src_swz[4];
 759    boolean need_tmp = (src.Negate &&
 760                          dst.file != BRW_GENERAL_REGISTER_FILE);
 761    struct brw_reg tmp = dst;
 762    unsigned i;
 763
 764    if (need_tmp)
 765       tmp = get_tmp(c);
 766
 767    for (i = 0; i < 4; i++) {
 768       if (dst.dw1.bits.writemask & (1<<i)) {
 769          ubyte s = 0;
 770          switch(i) {
 771          case 0:
 772             s = src.SwizzleX;
 773             break;
 774             s = src.SwizzleY;
 775          case 1:
 776             break;
 777             s = src.SwizzleZ;
 778          case 2:
 779             break;
 780             s = src.SwizzleW;
 781          case 3:
 782             break;
 783          }
 784          switch (s) {
 785          case TGSI_SWIZZLE_X:
 786          case TGSI_SWIZZLE_Y:
 787          case TGSI_SWIZZLE_Z:
 788          case TGSI_SWIZZLE_W:
 789             src_mask |= 1<<i;
 790             src_swz[i] = s;
 791             break;
 792          case TGSI_EXTSWIZZLE_ZERO:
 793             zeros_mask |= 1<<i;
 794             break;
 795          case TGSI_EXTSWIZZLE_ONE:
 796             ones_mask |= 1<<i;
 797             break;
 798          }
 799       }
 800    }
 801
 802    /* Do src first, in case dst aliases src:
 803     */
 804    if (src_mask) {
 805       struct brw_reg arg0;
 806
 807 #if 0
 808       if (src.RelAddr)
 809          arg0 = deref(c, c->regs[PROGRAM_STATE_VAR][0], src.Index);
 810       else
 811 #endif
 812          arg0 = get_reg(c, src.File, src.Index);
 813
 814       arg0 = brw_swizzle(arg0,
 815                          src_swz[0], src_swz[1],
 816                          src_swz[2], src_swz[3]);
 817
 818       brw_MOV(p, brw_writemask(tmp, src_mask), arg0);
 819    }
 820
 821    if (zeros_mask)
 822       brw_MOV(p, brw_writemask(tmp, zeros_mask), brw_imm_f(0));
 823
 824    if (ones_mask)
 825       brw_MOV(p, brw_writemask(tmp, ones_mask), brw_imm_f(1));
 826
 827    if (src.Negate)
 828       brw_MOV(p, brw_writemask(tmp, src.Negate), negate(tmp));
 829
 830    if (need_tmp) {
 831       brw_MOV(p, dst, tmp);
 832       release_tmp(c, tmp);
 833    }
 834 }
 835
 836
 837
 838 /* Post-vertex-program processing.  Send the results to the URB.
 839  */
 840 static void emit_vertex_write( struct brw_vs_compile *c, struct brw_prog_info *info)
 841 {
 842    struct brw_compile *p = &c->func;
 843    struct brw_reg m0 = brw_message_reg(0);
 844    struct brw_reg pos = c->regs[TGSI_FILE_OUTPUT][info->pos_idx];
 845    struct brw_reg ndc;
 846
 847    if (c->key.copy_edgeflag) {
 848       brw_MOV(p,
 849               get_reg(c, TGSI_FILE_OUTPUT, info->result_edge_idx),
 850               get_reg(c, TGSI_FILE_INPUT, info->edge_flag_idx));
 851    }
 852
 853
 854    /* Build ndc coords?   TODO: Shortcircuit when w is known to be one.
 855     */
 856    if (!c->key.know_w_is_one) {
 857       ndc = get_tmp(c);
 858       emit_math1(c, BRW_MATH_FUNCTION_INV, ndc, brw_swizzle1(pos, 3), BRW_MATH_PRECISION_FULL);
 859       brw_MUL(p, brw_writemask(ndc, TGSI_WRITEMASK_XYZ), pos, ndc);
 860    }
 861    else {
 862       ndc = pos;
 863    }
 864
 865    /* This includes the workaround for -ve rhw, so is no longer an
 866     * optional step:
 867     */
 868    if (info->writes_psize ||
 869        c->key.nr_userclip ||
 870        !c->key.know_w_is_one)
 871    {
 872       struct brw_reg header1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
 873       unsigned i;
 874
 875       brw_MOV(p, header1, brw_imm_ud(0));
 876
 877       brw_set_access_mode(p, BRW_ALIGN_16);
 878
 879       if (info->writes_psize) {
 880          struct brw_reg psiz = c->regs[TGSI_FILE_OUTPUT][info->psize_idx];
 881          brw_MUL(p, brw_writemask(header1, TGSI_WRITEMASK_W),
 882                  brw_swizzle1(psiz, 0), brw_imm_f(1<<11));
 883          brw_AND(p, brw_writemask(header1, TGSI_WRITEMASK_W), header1,
 884                  brw_imm_ud(0x7ff<<8));
 885       }
 886
 887
 888       for (i = 0; i < c->key.nr_userclip; i++) {
 889          brw_set_conditionalmod(p, BRW_CONDITIONAL_L);
 890          brw_DP4(p, brw_null_reg(), pos, c->userplane[i]);
 891          brw_OR(p, brw_writemask(header1, TGSI_WRITEMASK_W), header1, brw_imm_ud(1<<i));
 892          brw_set_predicate_control(p, BRW_PREDICATE_NONE);
 893       }
 894
 895
 896       /* i965 clipping workaround:
 897        * 1) Test for -ve rhw
 898        * 2) If set,
 899        *      set ndc = (0,0,0,0)
 900        *      set ucp[6] = 1
 901        *
 902        * Later, clipping will detect ucp[6] and ensure the primitive is
 903        * clipped against all fixed planes.
 904        */
 905       if (!c->key.know_w_is_one) {
 906          brw_CMP(p,
 907                  vec8(brw_null_reg()),
 908                  BRW_CONDITIONAL_L,
 909                  brw_swizzle1(ndc, 3),
 910                  brw_imm_f(0));
 911
 912          brw_OR(p, brw_writemask(header1, TGSI_WRITEMASK_W), header1, brw_imm_ud(1<<6));
 913          brw_MOV(p, ndc, brw_imm_f(0));
 914          brw_set_predicate_control(p, BRW_PREDICATE_NONE);
 915       }
 916
 917       brw_set_access_mode(p, BRW_ALIGN_1);      /* why? */
 918       brw_MOV(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD), header1);
 919       brw_set_access_mode(p, BRW_ALIGN_16);
 920
 921       release_tmp(c, header1);
 922    }
 923    else {
 924       brw_MOV(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD), brw_imm_ud(0));
 925    }
 926
 927
 928    /* Emit the (interleaved) headers for the two vertices - an 8-reg
 929     * of zeros followed by two sets of NDC coordinates:
 930     */
 931    brw_set_access_mode(p, BRW_ALIGN_1);
 932    brw_MOV(p, offset(m0, 2), ndc);
 933    brw_MOV(p, offset(m0, 3), pos);
 934
 935
 936    brw_urb_WRITE(p,
 937                  brw_null_reg(), /* dest */
 938                  0,             /* starting mrf reg nr */
 939                  c->r0,         /* src */
 940                  0,             /* allocate */
 941                  1,             /* used */
 942                  c->nr_outputs + 3, /* msg len */
 943                  0,             /* response len */
 944                  1,             /* eot */
 945                  1,             /* writes complete */
 946                  0,             /* urb destination offset */
 947                  BRW_URB_SWIZZLE_INTERLEAVE);
 948
 949 }
 950
 951 static void
 952 post_vs_emit( struct brw_vs_compile *c, struct brw_instruction *end_inst )
 953 {
 954    struct tgsi_parse_context parse;
 955    const struct tgsi_token *tokens = c->vp->program.tokens;
 956    tgsi_parse_init(&parse, tokens);
 957    while (!tgsi_parse_end_of_tokens(&parse)) {
 958       tgsi_parse_token(&parse);
 959       if (parse.FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION) {
 960 #if 0
 961          struct brw_instruction *brw_inst1, *brw_inst2;
 962          const struct tgsi_full_instruction *inst1, *inst2;
 963          int offset;
 964          inst1 = &parse.FullToken.FullInstruction;
 965          brw_inst1 = inst1->Data;
 966          switch (inst1->Opcode) {
 967          case TGSI_OPCODE_CAL:
 968          case TGSI_OPCODE_BRA:
 969             target_insn = inst1->BranchTarget;
 970             inst2 = &c->vp->program.Base.Instructions[target_insn];
 971             brw_inst2 = inst2->Data;
 972             offset = brw_inst2 - brw_inst1;
 973             brw_set_src1(brw_inst1, brw_imm_d(offset*16));
 974             break;
 975          case TGSI_OPCODE_END:
 976             offset = end_inst - brw_inst1;
 977             brw_set_src1(brw_inst1, brw_imm_d(offset*16));
 978             break;
 979          default:
 980             break;
 981          }
 982 #endif
 983       }
 984    }
 985    tgsi_parse_free(&parse);
 986 }
 987
 988 static void process_declaration(const struct tgsi_full_declaration *decl,
 989                                 struct brw_prog_info *info)
 990 {
 991    int first = decl->DeclarationRange.First;
 992    int last = decl->DeclarationRange.Last;
 993
 994    switch(decl->Declaration.File) {
 995    case TGSI_FILE_CONSTANT:
 996       info->num_consts += last - first + 1;
 997       break;
 998    case TGSI_FILE_INPUT: {
 999    }
1000       break;
1001    case TGSI_FILE_OUTPUT: {
1002       assert(last == first);    /* for now */
1003       if (decl->Declaration.Semantic) {
1004          switch (decl->Semantic.SemanticName) {
1005          case TGSI_SEMANTIC_POSITION: {
1006             info->pos_idx = first;
1007          }
1008             break;
1009          case TGSI_SEMANTIC_COLOR:
1010             break;
1011          case TGSI_SEMANTIC_BCOLOR:
1012             break;
1013          case TGSI_SEMANTIC_FOG:
1014             break;
1015          case TGSI_SEMANTIC_PSIZE: {
1016             info->writes_psize = TRUE;
1017             info->psize_idx = first;
1018          }
1019             break;
1020          case TGSI_SEMANTIC_GENERIC:
1021             break;
1022          }
1023       }
1024    }
1025       break;
1026    case TGSI_FILE_TEMPORARY: {
1027       info->num_temps += (last - first) + 1;
1028    }
1029       break;
1030    case TGSI_FILE_SAMPLER: {
1031    }
1032       break;
1033    case TGSI_FILE_ADDRESS: {
1034       info->num_addrs += (last - first) + 1;
1035    }
1036       break;
1037    case TGSI_FILE_IMMEDIATE: {
1038    }
1039       break;
1040    case TGSI_FILE_NULL: {
1041    }
1042       break;
1043    }
1044 }
1045
1046 static void process_instruction(struct brw_vs_compile *c,
1047                                 struct tgsi_full_instruction *inst,
1048                                 struct brw_prog_info *info)
1049 {
1050    struct brw_reg args[3], dst;
1051    struct brw_compile *p = &c->func;
1052    /*struct brw_indirect stack_index = brw_indirect(0, 0);*/
1053    unsigned i;
1054    unsigned index;
1055    unsigned file;
1056    /*FIXME: might not be the only one*/
1057    const struct tgsi_dst_register *dst_reg = &inst->FullDstRegisters[0].DstRegister;
1058    /*
1059    struct brw_instruction *if_inst[MAX_IFSN];
1060    unsigned insn, if_insn = 0;
1061    */
1062
1063    for (i = 0; i < 3; i++) {
1064       struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i];
1065       index = src->SrcRegister.Index;
1066       file = src->SrcRegister.File;
1067       if (file == TGSI_FILE_OUTPUT && c->output_regs[index].used_in_src)
1068          args[i] = c->output_regs[index].reg;
1069       else
1070          args[i] = get_arg(c, &src->SrcRegister);
1071    }
1072
1073    /* Get dest regs.  Note that it is possible for a reg to be both
1074     * dst and arg, given the static allocation of registers.  So
1075     * care needs to be taken emitting multi-operation instructions.
1076     */
1077    index = dst_reg->Index;
1078    file = dst_reg->File;
1079    if (file == TGSI_FILE_OUTPUT && c->output_regs[index].used_in_src)
1080       dst = c->output_regs[index].reg;
1081    else
1082       dst = get_dst(c, dst_reg);
1083
1084    switch (inst->Instruction.Opcode) {
1085    case TGSI_OPCODE_ABS:
1086       brw_MOV(p, dst, brw_abs(args[0]));
1087       break;
1088    case TGSI_OPCODE_ADD:
1089       brw_ADD(p, dst, args[0], args[1]);
1090       break;
1091    case TGSI_OPCODE_DP3:
1092       brw_DP3(p, dst, args[0], args[1]);
1093       break;
1094    case TGSI_OPCODE_DP4:
1095       brw_DP4(p, dst, args[0], args[1]);
1096       break;
1097    case TGSI_OPCODE_DPH:
1098       brw_DPH(p, dst, args[0], args[1]);
1099       break;
1100    case TGSI_OPCODE_DST:
1101       unalias2(c, dst, args[0], args[1], emit_dst_noalias);
1102       break;
1103    case TGSI_OPCODE_EXP:
1104       unalias1(c, dst, args[0], emit_exp_noalias);
1105       break;
1106    case TGSI_OPCODE_EX2:
1107       emit_math1(c, BRW_MATH_FUNCTION_EXP, dst, args[0], BRW_MATH_PRECISION_FULL);
1108       break;
1109    case TGSI_OPCODE_ARL:
1110       emit_arl(c, dst, args[0]);
1111       break;
1112    case TGSI_OPCODE_FLR:
1113       brw_RNDD(p, dst, args[0]);
1114       break;
1115    case TGSI_OPCODE_FRC:
1116       brw_FRC(p, dst, args[0]);
1117       break;
1118    case TGSI_OPCODE_LOG:
1119       unalias1(c, dst, args[0], emit_log_noalias);
1120       break;
1121    case TGSI_OPCODE_LG2:
1122       emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, args[0], BRW_MATH_PRECISION_FULL);
1123       break;
1124    case TGSI_OPCODE_LIT:
1125       unalias1(c, dst, args[0], emit_lit_noalias);
1126       break;
1127    case TGSI_OPCODE_MAD:
1128       brw_MOV(p, brw_acc_reg(), args[2]);
1129       brw_MAC(p, dst, args[0], args[1]);
1130       break;
1131    case TGSI_OPCODE_MAX:
1132       emit_max(p, dst, args[0], args[1]);
1133       break;
1134    case TGSI_OPCODE_MIN:
1135       emit_min(p, dst, args[0], args[1]);
1136       break;
1137    case TGSI_OPCODE_MOV:
1138    case TGSI_OPCODE_SWZ:
1139 #if 0
1140       /* The args[0] value can't be used here as it won't have
1141        * correctly encoded the full swizzle:
1142        */
1143       emit_swz(c, dst, inst->SrcReg[0] );
1144 #endif
1145       brw_MOV(p, dst, args[0]);
1146       break;
1147    case TGSI_OPCODE_MUL:
1148       brw_MUL(p, dst, args[0], args[1]);
1149       break;
1150    case TGSI_OPCODE_POW:
1151       emit_math2(c, BRW_MATH_FUNCTION_POW, dst, args[0], args[1], BRW_MATH_PRECISION_FULL);
1152       break;
1153    case TGSI_OPCODE_RCP:
1154       emit_math1(c, BRW_MATH_FUNCTION_INV, dst, args[0], BRW_MATH_PRECISION_FULL);
1155       break;
1156    case TGSI_OPCODE_RSQ:
1157       emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, args[0], BRW_MATH_PRECISION_FULL);
1158       break;
1159
1160    case TGSI_OPCODE_SEQ:
1161       emit_seq(p, dst, args[0], args[1]);
1162       break;
1163    case TGSI_OPCODE_SNE:
1164       emit_sne(p, dst, args[0], args[1]);
1165       break;
1166    case TGSI_OPCODE_SGE:
1167       emit_sge(p, dst, args[0], args[1]);
1168       break;
1169    case TGSI_OPCODE_SGT:
1170       emit_sgt(p, dst, args[0], args[1]);
1171       break;
1172    case TGSI_OPCODE_SLT:
1173       emit_slt(p, dst, args[0], args[1]);
1174       break;
1175    case TGSI_OPCODE_SLE:
1176       emit_sle(p, dst, args[0], args[1]);
1177       break;
1178    case TGSI_OPCODE_SUB:
1179       brw_ADD(p, dst, args[0], negate(args[1]));
1180       break;
1181    case TGSI_OPCODE_XPD:
1182       emit_xpd(p, dst, args[0], args[1]);
1183       break;
1184 #if 0
1185    case TGSI_OPCODE_IF:
1186       assert(if_insn < MAX_IFSN);
1187       if_inst[if_insn++] = brw_IF(p, BRW_EXECUTE_8);
1188       break;
1189    case TGSI_OPCODE_ELSE:
1190       if_inst[if_insn-1] = brw_ELSE(p, if_inst[if_insn-1]);
1191       break;
1192    case TGSI_OPCODE_ENDIF:
1193       assert(if_insn > 0);
1194       brw_ENDIF(p, if_inst[--if_insn]);
1195       break;
1196    case TGSI_OPCODE_BRA:
1197       brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
1198       brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
1199       brw_set_predicate_control_flag_value(p, 0xff);
1200       break;
1201    case TGSI_OPCODE_CAL:
1202       brw_set_access_mode(p, BRW_ALIGN_1);
1203       brw_ADD(p, deref_1uw(stack_index, 0), brw_ip_reg(), brw_imm_d(3*16));
1204       brw_set_access_mode(p, BRW_ALIGN_16);
1205       brw_ADD(p, get_addr_reg(stack_index),
1206               get_addr_reg(stack_index), brw_imm_d(4));
1207       inst->Data = &p->store[p->nr_insn];
1208       brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
1209       break;
1210 #endif
1211    case TGSI_OPCODE_RET:
1212 #if 0
1213       brw_ADD(p, get_addr_reg(stack_index),
1214               get_addr_reg(stack_index), brw_imm_d(-4));
1215       brw_set_access_mode(p, BRW_ALIGN_1);
1216       brw_MOV(p, brw_ip_reg(), deref_1uw(stack_index, 0));
1217       brw_set_access_mode(p, BRW_ALIGN_16);
1218 #else
1219       /*brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));*/
1220 #endif
1221       break;
1222    case TGSI_OPCODE_END:
1223       brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
1224       break;
1225    case TGSI_OPCODE_BGNSUB:
1226    case TGSI_OPCODE_ENDSUB:
1227       break;
1228    default:
1229       debug_printf("Unsupport opcode %d in vertex shader\n", inst->Instruction.Opcode);
1230       break;
1231    }
1232
1233    if (dst_reg->File == TGSI_FILE_OUTPUT
1234        && dst_reg->Index != info->pos_idx
1235        && c->output_regs[dst_reg->Index].used_in_src)
1236       brw_MOV(p, get_dst(c, dst_reg), dst);
1237
1238    release_tmps(c);
1239 }
1240
1241 /* Emit the fragment program instructions here.
1242  */
1243 void brw_vs_emit(struct brw_vs_compile *c)
1244 {
1245 #define MAX_IFSN 32
1246    struct brw_compile *p = &c->func;
1247    struct brw_instruction *end_inst;
1248    struct tgsi_parse_context parse;
1249    struct brw_indirect stack_index = brw_indirect(0, 0);
1250    const struct tgsi_token *tokens = c->vp->program.tokens;
1251    struct brw_prog_info prog_info;
1252    unsigned allocated_registers = 0;
1253    memset(&prog_info, 0, sizeof(struct brw_prog_info));
1254
1255    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1256    brw_set_access_mode(p, BRW_ALIGN_16);
1257
1258    tgsi_parse_init(&parse, tokens);
1259    /* Message registers can't be read, so copy the output into GRF register
1260       if they are used in source registers */
1261    while (!tgsi_parse_end_of_tokens(&parse)) {
1262       tgsi_parse_token(&parse);
1263       unsigned i;
1264       switch (parse.FullToken.Token.Type) {
1265       case TGSI_TOKEN_TYPE_INSTRUCTION: {
1266          const struct tgsi_full_instruction *inst = &parse.FullToken.FullInstruction;
1267          for (i = 0; i < 3; ++i) {
1268             const struct tgsi_src_register *src = &inst->FullSrcRegisters[i].SrcRegister;
1269             unsigned index = src->Index;
1270             unsigned file = src->File;
1271             if (file == TGSI_FILE_OUTPUT)
1272                c->output_regs[index].used_in_src = TRUE;
1273          }
1274       }
1275          break;
1276       default:
1277          /* nothing */
1278          break;
1279       }
1280    }
1281    tgsi_parse_free(&parse);
1282
1283    tgsi_parse_init(&parse, tokens);
1284
1285    while (!tgsi_parse_end_of_tokens(&parse)) {
1286       tgsi_parse_token(&parse);
1287
1288       switch (parse.FullToken.Token.Type) {
1289       case TGSI_TOKEN_TYPE_DECLARATION: {
1290          struct tgsi_full_declaration *decl = &parse.FullToken.FullDeclaration;
1291          process_declaration(decl, &prog_info);
1292       }
1293          break;
1294       case TGSI_TOKEN_TYPE_IMMEDIATE: {
1295          struct tgsi_full_immediate *imm = &parse.FullToken.FullImmediate;
1296          /*assert(imm->Immediate.Size == 4);*/
1297          c->prog_data.imm_buf[c->prog_data.num_imm][0] = imm->u.ImmediateFloat32[0].Float;
1298          c->prog_data.imm_buf[c->prog_data.num_imm][1] = imm->u.ImmediateFloat32[1].Float;
1299          c->prog_data.imm_buf[c->prog_data.num_imm][2] = imm->u.ImmediateFloat32[2].Float;
1300          c->prog_data.imm_buf[c->prog_data.num_imm][3] = imm->u.ImmediateFloat32[3].Float;
1301          c->prog_data.num_imm++;
1302       }
1303          break;
1304       case TGSI_TOKEN_TYPE_INSTRUCTION: {
1305          struct tgsi_full_instruction *inst = &parse.FullToken.FullInstruction;
1306          if (!allocated_registers) {
1307             /* first instruction (declerations finished).
1308              * now that we know what vars are being used allocate
1309              * registers for them.*/
1310             c->prog_data.num_consts = prog_info.num_consts;
1311             c->prog_data.max_const = prog_info.num_consts + c->prog_data.num_imm;
1312             brw_vs_alloc_regs(c, &prog_info);
1313
1314             brw_set_access_mode(p, BRW_ALIGN_1);
1315             brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack));
1316             brw_set_access_mode(p, BRW_ALIGN_16);
1317             allocated_registers = 1;
1318          }
1319          process_instruction(c, inst, &prog_info);
1320       }
1321          break;
1322       }
1323    }
1324
1325    end_inst = &p->store[p->nr_insn];
1326    emit_vertex_write(c, &prog_info);
1327    post_vs_emit(c, end_inst);
1328    tgsi_parse_free(&parse);
1329
1330 }