src/gallium/drivers/nv50/nv50_program.c

   1 #include "pipe/p_context.h"
   2 #include "pipe/p_defines.h"
   3 #include "pipe/p_state.h"
   4 #include "pipe/p_inlines.h"
   5
   6 #include "pipe/p_shader_tokens.h"
   7 #include "tgsi/util/tgsi_parse.h"
   8 #include "tgsi/util/tgsi_util.h"
   9
  10 #include "nv50_context.h"
  11 #include "nv50_state.h"
  12
  13 #define NV50_SU_MAX_TEMP 64
  14
  15 /* ARL - gallium craps itself on progs/vp/arl.txt
  16  *
  17  * MSB - Like MAD, but MUL+SUB
  18  *      - Fuck it off, introduce a way to negate args for ops that
  19  *        support it.
  20  *
  21  * Look into inlining IMMD for ops other than MOV (make it general?)
  22  *      - Maybe even relax restrictions a bit, can't do P_RESULT + P_IMMD,
  23  *        but can emit to P_TEMP first - then MOV later. NVIDIA does this
  24  *
  25  * Verify half-insns work where expected - and force disable them where they
  26  * don't work - MUL has it forcibly disabled atm as it fixes POW..
  27  *
  28  * FUCK! watch dst==src vectors, can overwrite components that are needed.
  29  *      ie. SUB R0, R0.yzxw, R0
  30  *
  31  * MOV dst, -src
  32  *      "delta" tmp, -src (0xa0000204,0xe4004780 - delta r0, -r0)
  33  *      mov dst, tmp
  34  *
  35  * Things to check with renouveau:
  36  *      FP attr/result assignment - how?
  37  *              attrib
  38  *                      - 0x16bc maps vp output onto fp hpos
  39  *                      - 0x16c0 maps vp output onto fp col0
  40  *              result
  41  *                      - colr always 0-3
  42  *                      - depr always 4
  43  * 0x16bc->0x16e8 --> some binding between vp/fp regs
  44  * 0x16b8 --> VP output count
  45  *
  46  * 0x1298 --> "MOV rcol.x, fcol.y" "MOV depr, fcol.y" = 0x00000005
  47  *            "MOV rcol.x, fcol.y" = 0x00000004
  48  * 0x19a8 --> as above but 0x00000100 and 0x00000000
  49  *      - 0x00100000 used when KIL used
  50  * 0x196c --> as above but 0x00000011 and 0x00000000
  51  *
  52  * 0x1988 --> 0xXXNNNNNN
  53  *      - XX == FP high something
  54  */
  55 struct nv50_reg {
  56         enum {
  57                 P_TEMP,
  58                 P_ATTR,
  59                 P_RESULT,
  60                 P_CONST,
  61                 P_IMMD
  62         } type;
  63         int index;
  64
  65         int hw;
  66         int neg;
  67 };
  68
  69 struct nv50_pc {
  70         struct nv50_program *p;
  71
  72         /* hw resources */
  73         struct nv50_reg *r_temp[NV50_SU_MAX_TEMP];
  74
  75         /* tgsi resources */
  76         struct nv50_reg *temp;
  77         int temp_nr;
  78         struct nv50_reg *attr;
  79         int attr_nr;
  80         struct nv50_reg *result;
  81         int result_nr;
  82         struct nv50_reg *param;
  83         int param_nr;
  84         struct nv50_reg *immd;
  85         float *immd_buf;
  86         int immd_nr;
  87
  88         struct nv50_reg *temp_temp[16];
  89         unsigned temp_temp_nr;
  90 };
  91
  92 static void
  93 alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg)
  94 {
  95         int i;
  96
  97         if (reg->type != P_TEMP)
  98                 return;
  99
 100         if (reg->hw >= 0) {
 101                 /*XXX: do this here too to catch FP temp-as-attr usage..
 102                  *     not clean, but works */
 103                 if (pc->p->cfg.high_temp < (reg->hw + 1))
 104                         pc->p->cfg.high_temp = reg->hw + 1;
 105                 return;
 106         }
 107
 108         for (i = 0; i < NV50_SU_MAX_TEMP; i++) {
 109                 if (!(pc->r_temp[i])) {
 110                         pc->r_temp[i] = reg;
 111                         reg->hw = i;
 112                         if (pc->p->cfg.high_temp < (i + 1))
 113                                 pc->p->cfg.high_temp = i + 1;
 114                         return;
 115                 }
 116         }
 117
 118         assert(0);
 119 }
 120
 121 static struct nv50_reg *
 122 alloc_temp(struct nv50_pc *pc, struct nv50_reg *dst)
 123 {
 124         struct nv50_reg *r;
 125         int i;
 126
 127         if (dst && dst->type == P_TEMP && dst->hw == -1)
 128                 return dst;
 129
 130         for (i = 0; i < NV50_SU_MAX_TEMP; i++) {
 131                 if (!pc->r_temp[i]) {
 132                         r = CALLOC_STRUCT(nv50_reg);
 133                         r->type = P_TEMP;
 134                         r->index = -1;
 135                         r->hw = i;
 136                         pc->r_temp[i] = r;
 137                         return r;
 138                 }
 139         }
 140
 141         assert(0);
 142         return NULL;
 143 }
 144
 145 static void
 146 free_temp(struct nv50_pc *pc, struct nv50_reg *r)
 147 {
 148         if (r->index == -1) {
 149                 FREE(pc->r_temp[r->hw]);
 150                 pc->r_temp[r->hw] = NULL;
 151         }
 152 }
 153
 154 static struct nv50_reg *
 155 temp_temp(struct nv50_pc *pc)
 156 {
 157         if (pc->temp_temp_nr >= 16)
 158                 assert(0);
 159
 160         pc->temp_temp[pc->temp_temp_nr] = alloc_temp(pc, NULL);
 161         return pc->temp_temp[pc->temp_temp_nr++];
 162 }
 163
 164 static void
 165 kill_temp_temp(struct nv50_pc *pc)
 166 {
 167         int i;
 168
 169         for (i = 0; i < pc->temp_temp_nr; i++)
 170                 free_temp(pc, pc->temp_temp[i]);
 171         pc->temp_temp_nr = 0;
 172 }
 173
 174 static int
 175 ctor_immd(struct nv50_pc *pc, float x, float y, float z, float w)
 176 {
 177         pc->immd_buf = realloc(pc->immd_buf, (pc->immd_nr + 1) * 4 *
 178                                              sizeof(float));
 179         pc->immd_buf[(pc->immd_nr * 4) + 0] = x;
 180         pc->immd_buf[(pc->immd_nr * 4) + 1] = y;
 181         pc->immd_buf[(pc->immd_nr * 4) + 2] = z;
 182         pc->immd_buf[(pc->immd_nr * 4) + 3] = w;
 183
 184         return pc->immd_nr++;
 185 }
 186
 187 static struct nv50_reg *
 188 alloc_immd(struct nv50_pc *pc, float f)
 189 {
 190         struct nv50_reg *r = CALLOC_STRUCT(nv50_reg);
 191         unsigned hw;
 192
 193         hw = ctor_immd(pc, f, 0, 0, 0) * 4;
 194         r->type = P_IMMD;
 195         r->hw = hw;
 196         r->index = -1;
 197         return r;
 198 }
 199
 200 static void
 201 emit(struct nv50_pc *pc, unsigned *inst)
 202 {
 203         struct nv50_program *p = pc->p;
 204
 205         if (inst[0] & 1) {
 206                 p->insns_nr += 2;
 207                 p->insns = realloc(p->insns, sizeof(unsigned) * p->insns_nr);
 208                 memcpy(p->insns + (p->insns_nr - 2), inst, sizeof(unsigned)*2);
 209         } else {
 210                 p->insns_nr += 1;
 211                 p->insns = realloc(p->insns, sizeof(unsigned) * p->insns_nr);
 212                 memcpy(p->insns + (p->insns_nr - 1), inst, sizeof(unsigned));
 213         }
 214 }
 215
 216 static INLINE void set_long(struct nv50_pc *, unsigned *);
 217
 218 static boolean
 219 is_long(unsigned *inst)
 220 {
 221         if (inst[0] & 1)
 222                 return TRUE;
 223         return FALSE;
 224 }
 225
 226 static boolean
 227 is_immd(unsigned *inst)
 228 {
 229         if (is_long(inst) && (inst[1] & 3) == 3)
 230                 return TRUE;
 231         return FALSE;
 232 }
 233
 234 static INLINE void
 235 set_pred(struct nv50_pc *pc, unsigned pred, unsigned idx, unsigned *inst)
 236 {
 237         set_long(pc, inst);
 238         inst[1] &= ~((0x1f << 7) | (0x3 << 12));
 239         inst[1] |= (pred << 7) | (idx << 12);
 240 }
 241
 242 static INLINE void
 243 set_pred_wr(struct nv50_pc *pc, unsigned on, unsigned idx, unsigned *inst)
 244 {
 245         set_long(pc, inst);
 246         inst[1] &= ~((0x3 << 4) | (1 << 6));
 247         inst[1] |= (idx << 4) | (on << 6);
 248 }
 249
 250 static INLINE void
 251 set_long(struct nv50_pc *pc, unsigned *inst)
 252 {
 253         if (is_long(inst))
 254                 return;
 255
 256         inst[0] |= 1;
 257         set_pred(pc, 0xf, 0, inst);
 258         set_pred_wr(pc, 0, 0, inst);
 259 }
 260
 261 static INLINE void
 262 set_dst(struct nv50_pc *pc, struct nv50_reg *dst, unsigned *inst)
 263 {
 264         if (dst->type == P_RESULT) {
 265                 set_long(pc, inst);
 266                 inst[1] |= 0x00000008;
 267         }
 268
 269         alloc_reg(pc, dst);
 270         inst[0] |= (dst->hw << 2);
 271 }
 272
 273 static INLINE void
 274 set_immd(struct nv50_pc *pc, struct nv50_reg *imm, unsigned *inst)
 275 {
 276         unsigned val = fui(pc->immd_buf[imm->hw]); /* XXX */
 277
 278         set_long(pc, inst);
 279         /*XXX: can't be predicated - bits overlap.. catch cases where both
 280          *     are required and avoid them. */
 281         set_pred(pc, 0, 0, inst);
 282         set_pred_wr(pc, 0, 0, inst);
 283
 284         inst[1] |= 0x00000002 | 0x00000001;
 285         inst[0] |= (val & 0x3f) << 16;
 286         inst[1] |= (val >> 6) << 2;
 287 }
 288
 289 static void
 290 emit_interp(struct nv50_pc *pc, struct nv50_reg *dst,
 291             struct nv50_reg *src, struct nv50_reg *iv, boolean noperspective)
 292 {
 293         unsigned inst[2] = { 0, 0 };
 294
 295         inst[0] |= 0x80000000;
 296         set_dst(pc, dst, inst);
 297         alloc_reg(pc, iv);
 298         inst[0] |= (iv->hw << 9);
 299         alloc_reg(pc, src);
 300         inst[0] |= (src->hw << 16);
 301         if (noperspective)
 302                 inst[0] |= (1 << 25);
 303
 304         emit(pc, inst);
 305 }
 306
 307 static void
 308 set_cseg(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst)
 309 {
 310         set_long(pc, inst);
 311         if (src->type == P_IMMD) {
 312                 inst[1] |= (NV50_CB_PMISC << 22);
 313         } else {
 314                 if (pc->p->type == PIPE_SHADER_VERTEX)
 315                         inst[1] |= (NV50_CB_PVP << 22);
 316                 else
 317                         inst[1] |= (NV50_CB_PFP << 22);
 318         }
 319 }
 320
 321 static void
 322 emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
 323 {
 324         unsigned inst[2] = { 0, 0 };
 325
 326         inst[0] |= 0x10000000;
 327
 328         set_dst(pc, dst, inst);
 329
 330         if (dst->type != P_RESULT && src->type == P_IMMD) {
 331                 set_immd(pc, src, inst);
 332                 /*XXX: 32-bit, but steals part of "half" reg space - need to
 333                  *     catch and handle this case if/when we do half-regs
 334                  */
 335                 inst[0] |= 0x00008000;
 336         } else
 337         if (src->type == P_IMMD || src->type == P_CONST) {
 338                 set_long(pc, inst);
 339                 set_cseg(pc, src, inst);
 340                 inst[0] |= (src->hw << 9);
 341                 inst[1] |= 0x20000000; /* src0 const? */
 342         } else {
 343                 if (src->type == P_ATTR) {
 344                         set_long(pc, inst);
 345                         inst[1] |= 0x00200000;
 346                 }
 347
 348                 alloc_reg(pc, src);
 349                 inst[0] |= (src->hw << 9);
 350         }
 351
 352         /* We really should support "half" instructions here at some point,
 353          * but I don't feel confident enough about them yet.
 354          */
 355         set_long(pc, inst);
 356         if (is_long(inst) && !is_immd(inst)) {
 357                 inst[1] |= 0x04000000; /* 32-bit */
 358                 inst[1] |= 0x0003c000; /* "subsubop" 0xf == mov */
 359         }
 360
 361         emit(pc, inst);
 362 }
 363
 364 static boolean
 365 check_swap_src_0_1(struct nv50_pc *pc,
 366                    struct nv50_reg **s0, struct nv50_reg **s1)
 367 {
 368         struct nv50_reg *src0 = *s0, *src1 = *s1;
 369
 370         if (src0->type == P_CONST) {
 371                 if (src1->type != P_CONST) {
 372                         *s0 = src1;
 373                         *s1 = src0;
 374                         return TRUE;
 375                 }
 376         } else
 377         if (src1->type == P_ATTR) {
 378                 if (src0->type != P_ATTR) {
 379                         *s0 = src1;
 380                         *s1 = src0;
 381                         return TRUE;
 382                 }
 383         }
 384
 385         return FALSE;
 386 }
 387
 388 static void
 389 set_src_0(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst)
 390 {
 391         if (src->type == P_ATTR) {
 392                 set_long(pc, inst);
 393                 inst[1] |= 0x00200000;
 394         } else
 395         if (src->type == P_CONST || src->type == P_IMMD) {
 396                 struct nv50_reg *temp = temp_temp(pc);
 397
 398                 emit_mov(pc, temp, src);
 399                 src = temp;
 400         }
 401
 402         alloc_reg(pc, src);
 403         inst[0] |= (src->hw << 9);
 404 }
 405
 406 static void
 407 set_src_1(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst)
 408 {
 409         if (src->type == P_ATTR) {
 410                 struct nv50_reg *temp = temp_temp(pc);
 411
 412                 emit_mov(pc, temp, src);
 413                 src = temp;
 414         } else
 415         if (src->type == P_CONST || src->type == P_IMMD) {
 416                 assert(!(inst[0] & 0x00800000));
 417                 if (inst[0] & 0x01000000) {
 418                         struct nv50_reg *temp = temp_temp(pc);
 419
 420                         emit_mov(pc, temp, src);
 421                         src = temp;
 422                 } else {
 423                         set_cseg(pc, src, inst);
 424                         inst[0] |= 0x00800000;
 425                 }
 426         }
 427
 428         alloc_reg(pc, src);
 429         inst[0] |= (src->hw << 16);
 430 }
 431
 432 static void
 433 set_src_2(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst)
 434 {
 435         set_long(pc, inst);
 436
 437         if (src->type == P_ATTR) {
 438                 struct nv50_reg *temp = temp_temp(pc);
 439
 440                 emit_mov(pc, temp, src);
 441                 src = temp;
 442         } else
 443         if (src->type == P_CONST || src->type == P_IMMD) {
 444                 assert(!(inst[0] & 0x01000000));
 445                 if (inst[0] & 0x00800000) {
 446                         struct nv50_reg *temp = temp_temp(pc);
 447
 448                         emit_mov(pc, temp, src);
 449                         src = temp;
 450                 } else {
 451                         set_cseg(pc, src, inst);
 452                         inst[0] |= 0x01000000;
 453                 }
 454         }
 455
 456         alloc_reg(pc, src);
 457         inst[1] |= (src->hw << 14);
 458 }
 459
 460 static void
 461 emit_mul(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
 462          struct nv50_reg *src1)
 463 {
 464         unsigned inst[2] = { 0, 0 };
 465
 466         inst[0] |= 0xc0000000;
 467         set_long(pc, inst);
 468
 469         check_swap_src_0_1(pc, &src0, &src1);
 470         set_dst(pc, dst, inst);
 471         set_src_0(pc, src0, inst);
 472         set_src_1(pc, src1, inst);
 473
 474         emit(pc, inst);
 475 }
 476
 477 static void
 478 emit_add(struct nv50_pc *pc, struct nv50_reg *dst,
 479          struct nv50_reg *src0, struct nv50_reg *src1)
 480 {
 481         unsigned inst[2] = { 0, 0 };
 482
 483         inst[0] |= 0xb0000000;
 484
 485         check_swap_src_0_1(pc, &src0, &src1);
 486         set_dst(pc, dst, inst);
 487         set_src_0(pc, src0, inst);
 488         if (is_long(inst))
 489                 set_src_2(pc, src1, inst);
 490         else
 491                 set_src_1(pc, src1, inst);
 492
 493         emit(pc, inst);
 494 }
 495
 496 static void
 497 emit_minmax(struct nv50_pc *pc, unsigned sub, struct nv50_reg *dst,
 498             struct nv50_reg *src0, struct nv50_reg *src1)
 499 {
 500         unsigned inst[2] = { 0, 0 };
 501
 502         set_long(pc, inst);
 503         inst[0] |= 0xb0000000;
 504         inst[1] |= (sub << 29);
 505
 506         check_swap_src_0_1(pc, &src0, &src1);
 507         set_dst(pc, dst, inst);
 508         set_src_0(pc, src0, inst);
 509         set_src_1(pc, src1, inst);
 510
 511         emit(pc, inst);
 512 }
 513
 514 static void
 515 emit_sub(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
 516          struct nv50_reg *src1)
 517 {
 518         unsigned inst[2] = { 0, 0 };
 519
 520         inst[0] |= 0xb0000000;
 521
 522         set_long(pc, inst);
 523         if (check_swap_src_0_1(pc, &src0, &src1))
 524                 inst[1] |= 0x04000000;
 525         else
 526                 inst[1] |= 0x08000000;
 527
 528         set_dst(pc, dst, inst);
 529         set_src_0(pc, src0, inst);
 530         set_src_2(pc, src1, inst);
 531
 532         emit(pc, inst);
 533 }
 534
 535 static void
 536 emit_mad(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
 537          struct nv50_reg *src1, struct nv50_reg *src2)
 538 {
 539         unsigned inst[2] = { 0, 0 };
 540
 541         inst[0] |= 0xe0000000;
 542
 543         check_swap_src_0_1(pc, &src0, &src1);
 544         set_dst(pc, dst, inst);
 545         set_src_0(pc, src0, inst);
 546         set_src_1(pc, src1, inst);
 547         set_src_2(pc, src2, inst);
 548
 549         emit(pc, inst);
 550 }
 551
 552 static void
 553 emit_msb(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
 554          struct nv50_reg *src1, struct nv50_reg *src2)
 555 {
 556         unsigned inst[2] = { 0, 0 };
 557
 558         inst[0] |= 0xe0000000;
 559         set_long(pc, inst);
 560         inst[1] |= 0x08000000; /* src0 * src1 - src2 */
 561
 562         check_swap_src_0_1(pc, &src0, &src1);
 563         set_dst(pc, dst, inst);
 564         set_src_0(pc, src0, inst);
 565         set_src_1(pc, src1, inst);
 566         set_src_2(pc, src2, inst);
 567
 568         emit(pc, inst);
 569 }
 570
 571 static void
 572 emit_flop(struct nv50_pc *pc, unsigned sub,
 573           struct nv50_reg *dst, struct nv50_reg *src)
 574 {
 575         unsigned inst[2] = { 0, 0 };
 576
 577         inst[0] |= 0x90000000;
 578         if (sub) {
 579                 set_long(pc, inst);
 580                 inst[1] |= (sub << 29);
 581         }
 582
 583         set_dst(pc, dst, inst);
 584         set_src_0(pc, src, inst);
 585
 586         emit(pc, inst);
 587 }
 588
 589 static void
 590 emit_preex2(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
 591 {
 592         unsigned inst[2] = { 0, 0 };
 593
 594         inst[0] |= 0xb0000000;
 595
 596         set_dst(pc, dst, inst);
 597         set_src_0(pc, src, inst);
 598         set_long(pc, inst);
 599         inst[1] |= (6 << 29) | 0x00004000;
 600
 601         emit(pc, inst);
 602 }
 603
 604 static void
 605 emit_precossin(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
 606 {
 607         unsigned inst[2] = { 0, 0 };
 608
 609         inst[0] |= 0xb0000000;
 610
 611         set_dst(pc, dst, inst);
 612         set_src_0(pc, src, inst);
 613         set_long(pc, inst);
 614         inst[1] |= (6 << 29);
 615
 616         emit(pc, inst);
 617 }
 618
 619 static void
 620 emit_set(struct nv50_pc *pc, unsigned c_op, struct nv50_reg *dst,
 621          struct nv50_reg *src0, struct nv50_reg *src1)
 622 {
 623         unsigned inst[2] = { 0, 0 };
 624         unsigned inv_cop[8] = { 0, 4, 2, 6, 1, 5, 3, 7 };
 625         struct nv50_reg *rdst;
 626
 627         assert(c_op <= 7);
 628         if (check_swap_src_0_1(pc, &src0, &src1))
 629                 c_op = inv_cop[c_op];
 630
 631         rdst = dst;
 632         if (dst->type != P_TEMP)
 633                 dst = alloc_temp(pc, NULL);
 634
 635         /* set.u32 */
 636         set_long(pc, inst);
 637         inst[0] |= 0xb0000000;
 638         inst[1] |= (3 << 29);
 639         inst[1] |= (c_op << 14);
 640         /*XXX: breaks things, .u32 by default?
 641          *     decuda will disasm as .u16 and use .lo/.hi regs, but this
 642          *     doesn't seem to match what the hw actually does.
 643         inst[1] |= 0x04000000; << breaks things.. .u32 by default?
 644          */
 645         set_dst(pc, dst, inst);
 646         set_src_0(pc, src0, inst);
 647         set_src_1(pc, src1, inst);
 648         emit(pc, inst);
 649
 650         /* cvt.f32.u32 */
 651         inst[0] = 0xa0000001;
 652         inst[1] = 0x64014780;
 653         set_dst(pc, rdst, inst);
 654         set_src_0(pc, dst, inst);
 655         emit(pc, inst);
 656
 657         if (dst != rdst)
 658                 free_temp(pc, dst);
 659 }
 660
 661 static void
 662 emit_flr(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
 663 {
 664         unsigned inst[2] = { 0, 0 };
 665
 666         inst[0] = 0xa0000000; /* cvt */
 667         set_long(pc, inst);
 668         inst[1] |= (6 << 29); /* cvt */
 669         inst[1] |= 0x08000000; /* integer mode */
 670         inst[1] |= 0x04000000; /* 32 bit */
 671         inst[1] |= ((0x1 << 3)) << 14; /* .rn */
 672         inst[1] |= (1 << 14); /* src .f32 */
 673         set_dst(pc, dst, inst);
 674         set_src_0(pc, src, inst);
 675
 676         emit(pc, inst);
 677 }
 678
 679 static void
 680 emit_pow(struct nv50_pc *pc, struct nv50_reg *dst,
 681          struct nv50_reg *v, struct nv50_reg *e)
 682 {
 683         struct nv50_reg *temp = alloc_temp(pc, NULL);
 684
 685         emit_flop(pc, 3, temp, v);
 686         emit_mul(pc, temp, temp, e);
 687         emit_preex2(pc, temp, temp);
 688         emit_flop(pc, 6, dst, temp);
 689
 690         free_temp(pc, temp);
 691 }
 692
 693 static void
 694 emit_abs(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
 695 {
 696         unsigned inst[2] = { 0, 0 };
 697
 698         inst[0] = 0xa0000000; /* cvt */
 699         set_long(pc, inst);
 700         inst[1] |= (6 << 29); /* cvt */
 701         inst[1] |= 0x04000000; /* 32 bit */
 702         inst[1] |= (1 << 14); /* src .f32 */
 703         inst[1] |= ((1 << 6) << 14); /* .abs */
 704         set_dst(pc, dst, inst);
 705         set_src_0(pc, src, inst);
 706
 707         emit(pc, inst);
 708 }
 709
 710 static void
 711 emit_lit(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask,
 712          struct nv50_reg **src)
 713 {
 714         struct nv50_reg *one = alloc_immd(pc, 1.0);
 715         struct nv50_reg *zero = alloc_immd(pc, 0.0);
 716         struct nv50_reg *neg128 = alloc_immd(pc, -127.999999);
 717         struct nv50_reg *pos128 = alloc_immd(pc,  127.999999);
 718         struct nv50_reg *tmp[4];
 719
 720         if (mask & (1 << 0))
 721                 emit_mov(pc, dst[0], one);
 722
 723         if (mask & (1 << 3))
 724                 emit_mov(pc, dst[3], one);
 725
 726         if (mask & (3 << 1)) {
 727                 if (mask & (1 << 1))
 728                         tmp[0] = dst[1];
 729                 else
 730                         tmp[0] = temp_temp(pc);
 731                 emit_minmax(pc, 4, tmp[0], src[0], zero);
 732         }
 733
 734         if (mask & (1 << 2)) {
 735                 set_pred_wr(pc, 1, 0, &pc->p->insns[pc->p->insns_nr - 2]);
 736
 737                 tmp[1] = temp_temp(pc);
 738                 emit_minmax(pc, 4, tmp[1], src[1], zero);
 739
 740                 tmp[3] = temp_temp(pc);
 741                 emit_minmax(pc, 4, tmp[3], src[3], neg128);
 742                 emit_minmax(pc, 5, tmp[3], tmp[3], pos128);
 743
 744                 emit_pow(pc, dst[2], tmp[1], tmp[3]);
 745                 emit_mov(pc, dst[2], zero);
 746                 set_pred(pc, 3, 0, &pc->p->insns[pc->p->insns_nr - 2]);
 747         }
 748 }
 749
 750 static void
 751 emit_neg(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
 752 {
 753         unsigned inst[2] = { 0, 0 };
 754
 755         set_long(pc, inst);
 756         inst[0] |= 0xa0000000; /* delta */
 757         inst[1] |= (7 << 29); /* delta */
 758         inst[1] |= 0x04000000; /* negate arg0? probably not */
 759         inst[1] |= (1 << 14); /* src .f32 */
 760         set_dst(pc, dst, inst);
 761         set_src_0(pc, src, inst);
 762
 763         emit(pc, inst);
 764 }
 765
 766 static struct nv50_reg *
 767 tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst)
 768 {
 769         switch (dst->DstRegister.File) {
 770         case TGSI_FILE_TEMPORARY:
 771                 return &pc->temp[dst->DstRegister.Index * 4 + c];
 772         case TGSI_FILE_OUTPUT:
 773                 return &pc->result[dst->DstRegister.Index * 4 + c];
 774         case TGSI_FILE_NULL:
 775                 return NULL;
 776         default:
 777                 break;
 778         }
 779
 780         return NULL;
 781 }
 782
 783 static struct nv50_reg *
 784 tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src)
 785 {
 786         struct nv50_reg *r = NULL;
 787         struct nv50_reg *temp;
 788         unsigned c;
 789
 790         c = tgsi_util_get_full_src_register_extswizzle(src, chan);
 791         switch (c) {
 792         case TGSI_EXTSWIZZLE_X:
 793         case TGSI_EXTSWIZZLE_Y:
 794         case TGSI_EXTSWIZZLE_Z:
 795         case TGSI_EXTSWIZZLE_W:
 796                 switch (src->SrcRegister.File) {
 797                 case TGSI_FILE_INPUT:
 798                         r = &pc->attr[src->SrcRegister.Index * 4 + c];
 799                         break;
 800                 case TGSI_FILE_TEMPORARY:
 801                         r = &pc->temp[src->SrcRegister.Index * 4 + c];
 802                         break;
 803                 case TGSI_FILE_CONSTANT:
 804                         r = &pc->param[src->SrcRegister.Index * 4 + c];
 805                         break;
 806                 case TGSI_FILE_IMMEDIATE:
 807                         r = &pc->immd[src->SrcRegister.Index * 4 + c];
 808                         break;
 809                 default:
 810                         assert(0);
 811                         break;
 812                 }
 813                 break;
 814         case TGSI_EXTSWIZZLE_ZERO:
 815                 r = alloc_immd(pc, 0.0);
 816                 break;
 817         case TGSI_EXTSWIZZLE_ONE:
 818                 r = alloc_immd(pc, 1.0);
 819                 break;
 820         default:
 821                 assert(0);
 822                 break;
 823         }
 824
 825         switch (tgsi_util_get_full_src_register_sign_mode(src, chan)) {
 826         case TGSI_UTIL_SIGN_KEEP:
 827                 break;
 828         case TGSI_UTIL_SIGN_CLEAR:
 829                 temp = temp_temp(pc);
 830                 emit_abs(pc, temp, r);
 831                 r = temp;
 832                 break;
 833         case TGSI_UTIL_SIGN_TOGGLE:
 834                 temp = temp_temp(pc);
 835                 emit_neg(pc, temp, r);
 836                 r = temp;
 837                 break;
 838         case TGSI_UTIL_SIGN_SET:
 839                 temp = temp_temp(pc);
 840                 emit_abs(pc, temp, r);
 841                 emit_neg(pc, temp, r);
 842                 r = temp;
 843                 break;
 844         default:
 845                 assert(0);
 846                 break;
 847         }
 848
 849         return r;
 850 }
 851
 852 static boolean
 853 nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
 854 {
 855         const struct tgsi_full_instruction *inst = &tok->FullInstruction;
 856         struct nv50_reg *rdst[4], *dst[4], *src[3][4], *temp;
 857         unsigned mask, sat;
 858         int i, c;
 859
 860         NOUVEAU_ERR("insn %p\n", tok);
 861
 862         mask = inst->FullDstRegisters[0].DstRegister.WriteMask;
 863         sat = inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE;
 864
 865         for (c = 0; c < 4; c++) {
 866                 if (mask & (1 << c))
 867                         dst[c] = tgsi_dst(pc, c, &inst->FullDstRegisters[0]);
 868                 else
 869                         dst[c] = NULL;
 870         }
 871
 872         for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
 873                 for (c = 0; c < 4; c++)
 874                         src[i][c] = tgsi_src(pc, c, &inst->FullSrcRegisters[i]);
 875         }
 876
 877         if (sat) {
 878                 for (c = 0; c < 4; c++) {
 879                         rdst[c] = dst[c];
 880                         dst[c] = temp_temp(pc);
 881                 }
 882         }
 883
 884         switch (inst->Instruction.Opcode) {
 885         case TGSI_OPCODE_ABS:
 886                 for (c = 0; c < 4; c++) {
 887                         if (!(mask & (1 << c)))
 888                                 continue;
 889                         emit_abs(pc, dst[c], src[0][c]);
 890                 }
 891                 break;
 892         case TGSI_OPCODE_ADD:
 893                 for (c = 0; c < 4; c++) {
 894                         if (!(mask & (1 << c)))
 895                                 continue;
 896                         emit_add(pc, dst[c], src[0][c], src[1][c]);
 897                 }
 898                 break;
 899         case TGSI_OPCODE_COS:
 900                 temp = alloc_temp(pc, NULL);
 901                 emit_precossin(pc, temp, src[0][0]);
 902                 emit_flop(pc, 5, temp, temp);
 903                 for (c = 0; c < 4; c++) {
 904                         if (!(mask & (1 << c)))
 905                                 continue;
 906                         emit_mov(pc, dst[c], temp);
 907                 }
 908                 break;
 909         case TGSI_OPCODE_DP3:
 910                 temp = alloc_temp(pc, NULL);
 911                 emit_mul(pc, temp, src[0][0], src[1][0]);
 912                 emit_mad(pc, temp, src[0][1], src[1][1], temp);
 913                 emit_mad(pc, temp, src[0][2], src[1][2], temp);
 914                 for (c = 0; c < 4; c++) {
 915                         if (!(mask & (1 << c)))
 916                                 continue;
 917                         emit_mov(pc, dst[c], temp);
 918                 }
 919                 free_temp(pc, temp);
 920                 break;
 921         case TGSI_OPCODE_DP4:
 922                 temp = alloc_temp(pc, NULL);
 923                 emit_mul(pc, temp, src[0][0], src[1][0]);
 924                 emit_mad(pc, temp, src[0][1], src[1][1], temp);
 925                 emit_mad(pc, temp, src[0][2], src[1][2], temp);
 926                 emit_mad(pc, temp, src[0][3], src[1][3], temp);
 927                 for (c = 0; c < 4; c++) {
 928                         if (!(mask & (1 << c)))
 929                                 continue;
 930                         emit_mov(pc, dst[c], temp);
 931                 }
 932                 free_temp(pc, temp);
 933                 break;
 934         case TGSI_OPCODE_DPH:
 935                 temp = alloc_temp(pc, NULL);
 936                 emit_mul(pc, temp, src[0][0], src[1][0]);
 937                 emit_mad(pc, temp, src[0][1], src[1][1], temp);
 938                 emit_mad(pc, temp, src[0][2], src[1][2], temp);
 939                 emit_add(pc, temp, src[1][3], temp);
 940                 for (c = 0; c < 4; c++) {
 941                         if (!(mask & (1 << c)))
 942                                 continue;
 943                         emit_mov(pc, dst[c], temp);
 944                 }
 945                 free_temp(pc, temp);
 946                 break;
 947         case TGSI_OPCODE_DST:
 948         {
 949                 struct nv50_reg *one = alloc_immd(pc, 1.0);
 950                 if (mask & (1 << 0))
 951                         emit_mov(pc, dst[0], one);
 952                 if (mask & (1 << 1))
 953                         emit_mul(pc, dst[1], src[0][1], src[1][1]);
 954                 if (mask & (1 << 2))
 955                         emit_mov(pc, dst[2], src[0][2]);
 956                 if (mask & (1 << 3))
 957                         emit_mov(pc, dst[3], src[1][3]);
 958                 FREE(one);
 959         }
 960                 break;
 961         case TGSI_OPCODE_EX2:
 962                 temp = alloc_temp(pc, NULL);
 963                 emit_preex2(pc, temp, src[0][0]);
 964                 emit_flop(pc, 6, temp, temp);
 965                 for (c = 0; c < 4; c++) {
 966                         if (!(mask & (1 << c)))
 967                                 continue;
 968                         emit_mov(pc, dst[c], temp);
 969                 }
 970                 free_temp(pc, temp);
 971                 break;
 972         case TGSI_OPCODE_FLR:
 973                 for (c = 0; c < 4; c++) {
 974                         if (!(mask & (1 << c)))
 975                                 continue;
 976                         emit_flr(pc, dst[c], src[0][c]);
 977                 }
 978                 break;
 979         case TGSI_OPCODE_FRC:
 980                 temp = alloc_temp(pc, NULL);
 981                 for (c = 0; c < 4; c++) {
 982                         if (!(mask & (1 << c)))
 983                                 continue;
 984                         emit_flr(pc, temp, src[0][c]);
 985                         emit_sub(pc, dst[c], src[0][c], temp);
 986                 }
 987                 free_temp(pc, temp);
 988                 break;
 989         case TGSI_OPCODE_LIT:
 990                 emit_lit(pc, &dst[0], mask, &src[0][0]);
 991                 break;
 992         case TGSI_OPCODE_LG2:
 993                 temp = alloc_temp(pc, NULL);
 994                 emit_flop(pc, 3, temp, src[0][0]);
 995                 for (c = 0; c < 4; c++) {
 996                         if (!(mask & (1 << c)))
 997                                 continue;
 998                         emit_mov(pc, dst[c], temp);
 999                 }
1000                 break;
1001         case TGSI_OPCODE_LRP:
1002                 for (c = 0; c < 4; c++) {
1003                         if (!(mask & (1 << c)))
1004                                 continue;
1005                         /*XXX: we can do better than this */
1006                         temp = alloc_temp(pc, NULL);
1007                         emit_neg(pc, temp, src[0][c]);
1008                         emit_mad(pc, temp, temp, src[2][c], src[2][c]);
1009                         emit_mad(pc, dst[c], src[0][c], src[1][c], temp);
1010                         free_temp(pc, temp);
1011                 }
1012                 break;
1013         case TGSI_OPCODE_MAD:
1014                 for (c = 0; c < 4; c++) {
1015                         if (!(mask & (1 << c)))
1016                                 continue;
1017                         emit_mad(pc, dst[c], src[0][c], src[1][c], src[2][c]);
1018                 }
1019                 break;
1020         case TGSI_OPCODE_MAX:
1021                 for (c = 0; c < 4; c++) {
1022                         if (!(mask & (1 << c)))
1023                                 continue;
1024                         emit_minmax(pc, 4, dst[c], src[0][c], src[1][c]);
1025                 }
1026                 break;
1027         case TGSI_OPCODE_MIN:
1028                 for (c = 0; c < 4; c++) {
1029                         if (!(mask & (1 << c)))
1030                                 continue;
1031                         emit_minmax(pc, 5, dst[c], src[0][c], src[1][c]);
1032                 }
1033                 break;
1034         case TGSI_OPCODE_MOV:
1035                 for (c = 0; c < 4; c++) {
1036                         if (!(mask & (1 << c)))
1037                                 continue;
1038                         emit_mov(pc, dst[c], src[0][c]);
1039                 }
1040                 break;
1041         case TGSI_OPCODE_MUL:
1042                 for (c = 0; c < 4; c++) {
1043                         if (!(mask & (1 << c)))
1044                                 continue;
1045                         emit_mul(pc, dst[c], src[0][c], src[1][c]);
1046                 }
1047                 break;
1048         case TGSI_OPCODE_POW:
1049                 temp = alloc_temp(pc, NULL);
1050                 emit_pow(pc, temp, src[0][0], src[1][0]);
1051                 for (c = 0; c < 4; c++) {
1052                         if (!(mask & (1 << c)))
1053                                 continue;
1054                         emit_mov(pc, dst[c], temp);
1055                 }
1056                 free_temp(pc, temp);
1057                 break;
1058         case TGSI_OPCODE_RCP:
1059                 for (c = 0; c < 4; c++) {
1060                         if (!(mask & (1 << c)))
1061                                 continue;
1062                         emit_flop(pc, 0, dst[c], src[0][0]);
1063                 }
1064                 break;
1065         case TGSI_OPCODE_RSQ:
1066                 for (c = 0; c < 4; c++) {
1067                         if (!(mask & (1 << c)))
1068                                 continue;
1069                         emit_flop(pc, 2, dst[c], src[0][0]);
1070                 }
1071                 break;
1072         case TGSI_OPCODE_SCS:
1073                 temp = alloc_temp(pc, NULL);
1074                 emit_precossin(pc, temp, src[0][0]);
1075                 if (mask & (1 << 0))
1076                         emit_flop(pc, 5, dst[0], temp);
1077                 if (mask & (1 << 1))
1078                         emit_flop(pc, 4, dst[1], temp);
1079                 break;
1080         case TGSI_OPCODE_SGE:
1081                 for (c = 0; c < 4; c++) {
1082                         if (!(mask & (1 << c)))
1083                                 continue;
1084                         emit_set(pc, 6, dst[c], src[0][c], src[1][c]);
1085                 }
1086                 break;
1087         case TGSI_OPCODE_SIN:
1088                 temp = alloc_temp(pc, NULL);
1089                 emit_precossin(pc, temp, src[0][0]);
1090                 emit_flop(pc, 4, temp, temp);
1091                 for (c = 0; c < 4; c++) {
1092                         if (!(mask & (1 << c)))
1093                                 continue;
1094                         emit_mov(pc, dst[c], temp);
1095                 }
1096                 break;
1097         case TGSI_OPCODE_SLT:
1098                 for (c = 0; c < 4; c++) {
1099                         if (!(mask & (1 << c)))
1100                                 continue;
1101                         emit_set(pc, 1, dst[c], src[0][c], src[1][c]);
1102                 }
1103                 break;
1104         case TGSI_OPCODE_SUB:
1105                 for (c = 0; c < 4; c++) {
1106                         if (!(mask & (1 << c)))
1107                                 continue;
1108                         emit_sub(pc, dst[c], src[0][c], src[1][c]);
1109                 }
1110                 break;
1111         case TGSI_OPCODE_XPD:
1112                 temp = alloc_temp(pc, NULL);
1113                 if (mask & (1 << 0)) {
1114                         emit_mul(pc, temp, src[0][2], src[1][1]);
1115                         emit_msb(pc, dst[0], src[0][1], src[1][2], temp);
1116                 }
1117                 if (mask & (1 << 1)) {
1118                         emit_mul(pc, temp, src[0][0], src[1][2]);
1119                         emit_msb(pc, dst[1], src[0][2], src[1][0], temp);
1120                 }
1121                 if (mask & (1 << 2)) {
1122                         emit_mul(pc, temp, src[0][1], src[1][0]);
1123                         emit_msb(pc, dst[2], src[0][0], src[1][1], temp);
1124                 }
1125                 free_temp(pc, temp);
1126                 break;
1127         case TGSI_OPCODE_END:
1128                 break;
1129         default:
1130                 NOUVEAU_ERR("invalid opcode %d\n", inst->Instruction.Opcode);
1131                 return FALSE;
1132         }
1133
1134         if (sat) {
1135                 for (c = 0; c < 4; c++) {
1136                         unsigned inst[2] = { 0, 0 };
1137
1138                         if (!(mask & (1 << c)))
1139                                 continue;
1140
1141                         inst[0] = 0xa0000000; /* cvt */
1142                         set_long(pc, inst);
1143                         inst[1] |= (6 << 29); /* cvt */
1144                         inst[1] |= 0x04000000; /* 32 bit */
1145                         inst[1] |= (1 << 14); /* src .f32 */
1146                         inst[1] |= ((1 << 5) << 14); /* .sat */
1147                         set_dst(pc, rdst[c], inst);
1148                         set_src_0(pc, dst[c], inst);
1149                         emit(pc, inst);
1150                 }
1151         }
1152
1153         kill_temp_temp(pc);
1154         return TRUE;
1155 }
1156
1157 static boolean
1158 nv50_program_tx_prep(struct nv50_pc *pc)
1159 {
1160         struct tgsi_parse_context p;
1161         boolean ret = FALSE;
1162         unsigned i, c;
1163
1164         tgsi_parse_init(&p, pc->p->pipe.tokens);
1165         while (!tgsi_parse_end_of_tokens(&p)) {
1166                 const union tgsi_full_token *tok = &p.FullToken;
1167
1168                 tgsi_parse_token(&p);
1169                 switch (tok->Token.Type) {
1170                 case TGSI_TOKEN_TYPE_IMMEDIATE:
1171                 {
1172                         const struct tgsi_full_immediate *imm =
1173                                 &p.FullToken.FullImmediate;
1174
1175                         ctor_immd(pc, imm->u.ImmediateFloat32[0].Float,
1176                                       imm->u.ImmediateFloat32[1].Float,
1177                                       imm->u.ImmediateFloat32[2].Float,
1178                                       imm->u.ImmediateFloat32[3].Float);
1179                 }
1180                         break;
1181                 case TGSI_TOKEN_TYPE_DECLARATION:
1182                 {
1183                         const struct tgsi_full_declaration *d;
1184                         unsigned last;
1185
1186                         d = &p.FullToken.FullDeclaration;
1187                         last = d->u.DeclarationRange.Last;
1188
1189                         switch (d->Declaration.File) {
1190                         case TGSI_FILE_TEMPORARY:
1191                                 if (pc->temp_nr < (last + 1))
1192                                         pc->temp_nr = last + 1;
1193                                 break;
1194                         case TGSI_FILE_OUTPUT:
1195                                 if (pc->result_nr < (last + 1))
1196                                         pc->result_nr = last + 1;
1197                                 break;
1198                         case TGSI_FILE_INPUT:
1199                                 if (pc->attr_nr < (last + 1))
1200                                         pc->attr_nr = last + 1;
1201                                 break;
1202                         case TGSI_FILE_CONSTANT:
1203                                 if (pc->param_nr < (last + 1))
1204                                         pc->param_nr = last + 1;
1205                                 break;
1206                         default:
1207                                 NOUVEAU_ERR("bad decl file %d\n",
1208                                             d->Declaration.File);
1209                                 goto out_err;
1210                         }
1211                 }
1212                         break;
1213                 case TGSI_TOKEN_TYPE_INSTRUCTION:
1214                         break;
1215                 default:
1216                         break;
1217                 }
1218         }
1219
1220         NOUVEAU_ERR("%d temps\n", pc->temp_nr);
1221         if (pc->temp_nr) {
1222                 pc->temp = calloc(pc->temp_nr * 4, sizeof(struct nv50_reg));
1223                 if (!pc->temp)
1224                         goto out_err;
1225
1226                 for (i = 0; i < pc->temp_nr; i++) {
1227                         for (c = 0; c < 4; c++) {
1228                                 pc->temp[i*4+c].type = P_TEMP;
1229                                 pc->temp[i*4+c].hw = -1;
1230                                 pc->temp[i*4+c].index = i;
1231                         }
1232                 }
1233         }
1234
1235         NOUVEAU_ERR("%d attrib regs\n", pc->attr_nr);
1236         if (pc->attr_nr) {
1237                 struct nv50_reg *iv = NULL, *tmp = NULL;
1238                 int aid = 0;
1239
1240                 pc->attr = calloc(pc->attr_nr * 4, sizeof(struct nv50_reg));
1241                 if (!pc->attr)
1242                         goto out_err;
1243
1244                 if (pc->p->type == PIPE_SHADER_FRAGMENT) {
1245                         iv = alloc_temp(pc, NULL);
1246                         aid++;
1247                 }
1248
1249                 for (i = 0; i < pc->attr_nr; i++) {
1250                         struct nv50_reg *a = &pc->attr[i*4];
1251
1252                         for (c = 0; c < 4; c++) {
1253                                 if (pc->p->type == PIPE_SHADER_FRAGMENT) {
1254                                         struct nv50_reg *at =
1255                                                 alloc_temp(pc, NULL);
1256                                         pc->attr[i*4+c].type = at->type;
1257                                         pc->attr[i*4+c].hw = at->hw;
1258                                         pc->attr[i*4+c].index = at->index;
1259                                 } else {
1260                                         pc->p->cfg.vp.attr[aid/32] |=
1261                                                 (1 << (aid % 32));
1262                                         pc->attr[i*4+c].type = P_ATTR;
1263                                         pc->attr[i*4+c].hw = aid++;
1264                                         pc->attr[i*4+c].index = i;
1265                                 }
1266                         }
1267
1268                         if (pc->p->type != PIPE_SHADER_FRAGMENT)
1269                                 continue;
1270
1271                         emit_interp(pc, iv, iv, iv, FALSE);
1272                         tmp = alloc_temp(pc, NULL);
1273                         emit_flop(pc, 0, tmp, iv);
1274                         emit_interp(pc, &a[0], &a[0], tmp, TRUE);
1275                         emit_interp(pc, &a[1], &a[1], tmp, TRUE);
1276                         emit_interp(pc, &a[2], &a[2], tmp, TRUE);
1277                         emit_interp(pc, &a[3], &a[3], tmp, TRUE);
1278                         free_temp(pc, tmp);
1279                 }
1280
1281                 if (iv)
1282                         free_temp(pc, iv);
1283         }
1284
1285         NOUVEAU_ERR("%d result regs\n", pc->result_nr);
1286         if (pc->result_nr) {
1287                 int rid = 0;
1288
1289                 pc->result = calloc(pc->result_nr * 4, sizeof(struct nv50_reg));
1290                 if (!pc->result)
1291                         goto out_err;
1292
1293                 for (i = 0; i < pc->result_nr; i++) {
1294                         for (c = 0; c < 4; c++) {
1295                                 if (pc->p->type == PIPE_SHADER_FRAGMENT) {
1296                                         pc->result[i*4+c].type = P_TEMP;
1297                                         pc->result[i*4+c].hw = -1;
1298                                 } else {
1299                                         pc->result[i*4+c].type = P_RESULT;
1300                                         pc->result[i*4+c].hw = rid++;
1301                                 }
1302                                 pc->result[i*4+c].index = i;
1303                         }
1304                 }
1305         }
1306
1307         NOUVEAU_ERR("%d param regs\n", pc->param_nr);
1308         if (pc->param_nr) {
1309                 int rid = 0;
1310
1311                 pc->param = calloc(pc->param_nr * 4, sizeof(struct nv50_reg));
1312                 if (!pc->param)
1313                         goto out_err;
1314
1315                 for (i = 0; i < pc->param_nr; i++) {
1316                         for (c = 0; c < 4; c++) {
1317                                 pc->param[i*4+c].type = P_CONST;
1318                                 pc->param[i*4+c].hw = rid++;
1319                                 pc->param[i*4+c].index = i;
1320                         }
1321                 }
1322         }
1323
1324         if (pc->immd_nr) {
1325                 int rid = 0;
1326
1327                 pc->immd = calloc(pc->immd_nr * 4, sizeof(struct nv50_reg));
1328                 if (!pc->immd)
1329                         goto out_err;
1330
1331                 for (i = 0; i < pc->immd_nr; i++) {
1332                         for (c = 0; c < 4; c++) {
1333                                 pc->immd[i*4+c].type = P_IMMD;
1334                                 pc->immd[i*4+c].hw = rid++;
1335                                 pc->immd[i*4+c].index = i;
1336                         }
1337                 }
1338         }
1339
1340         ret = TRUE;
1341 out_err:
1342         tgsi_parse_free(&p);
1343         return ret;
1344 }
1345
1346 static boolean
1347 nv50_program_tx(struct nv50_program *p)
1348 {
1349         struct tgsi_parse_context parse;
1350         struct nv50_pc *pc;
1351         boolean ret;
1352
1353         pc = CALLOC_STRUCT(nv50_pc);
1354         if (!pc)
1355                 return FALSE;
1356         pc->p = p;
1357         pc->p->cfg.high_temp = 4;
1358
1359         ret = nv50_program_tx_prep(pc);
1360         if (ret == FALSE)
1361                 goto out_cleanup;
1362
1363         tgsi_parse_init(&parse, pc->p->pipe.tokens);
1364         while (!tgsi_parse_end_of_tokens(&parse)) {
1365                 const union tgsi_full_token *tok = &parse.FullToken;
1366
1367                 tgsi_parse_token(&parse);
1368
1369                 switch (tok->Token.Type) {
1370                 case TGSI_TOKEN_TYPE_INSTRUCTION:
1371                         ret = nv50_program_tx_insn(pc, tok);
1372                         if (ret == FALSE)
1373                                 goto out_err;
1374                         break;
1375                 default:
1376                         break;
1377                 }
1378         }
1379
1380         if (p->type == PIPE_SHADER_FRAGMENT) {
1381                 struct nv50_reg out;
1382
1383                 out.type = P_TEMP;
1384                 for (out.hw = 0; out.hw < pc->result_nr * 4; out.hw++)
1385                         emit_mov(pc, &out, &pc->result[out.hw]);
1386         }
1387
1388         p->immd_nr = pc->immd_nr * 4;
1389         p->immd = pc->immd_buf;
1390
1391 out_err:
1392         tgsi_parse_free(&parse);
1393
1394 out_cleanup:
1395         return ret;
1396 }
1397
1398 static void
1399 nv50_program_validate(struct nv50_context *nv50, struct nv50_program *p)
1400 {
1401         int i;
1402
1403         if (nv50_program_tx(p) == FALSE)
1404                 assert(0);
1405         /* *not* sufficient, it's fine if last inst is long and
1406          * NOT immd - otherwise it's fucked fucked fucked */
1407         p->insns[p->insns_nr - 1] |= 0x00000001;
1408
1409         if (p->type == PIPE_SHADER_VERTEX) {
1410         for (i = 0; i < p->insns_nr; i++)
1411                 NOUVEAU_ERR("VP0x%08x\n", p->insns[i]);
1412         } else {
1413         for (i = 0; i < p->insns_nr; i++)
1414                 NOUVEAU_ERR("FP0x%08x\n", p->insns[i]);
1415         }
1416
1417         p->translated = TRUE;
1418 }
1419
1420 static void
1421 nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p)
1422 {
1423         int i;
1424
1425         for (i = 0; i < p->immd_nr; i++) {
1426                 BEGIN_RING(tesla, 0x0f00, 2);
1427                 OUT_RING  ((NV50_CB_PMISC << 0) | (i << 8));
1428                 OUT_RING  (fui(p->immd[i]));
1429         }
1430 }
1431
1432 static void
1433 nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p)
1434 {
1435         struct pipe_winsys *ws = nv50->pipe.winsys;
1436         void *map;
1437
1438         if (!p->buffer)
1439                 p->buffer = ws->buffer_create(ws, 0x100, 0, p->insns_nr * 4);
1440         map = ws->buffer_map(ws, p->buffer, PIPE_BUFFER_USAGE_CPU_WRITE);
1441         memcpy(map, p->insns, p->insns_nr * 4);
1442         ws->buffer_unmap(ws, p->buffer);
1443 }
1444
1445 void
1446 nv50_vertprog_validate(struct nv50_context *nv50)
1447 {
1448         struct nouveau_grobj *tesla = nv50->screen->tesla;
1449         struct nv50_program *p = nv50->vertprog;
1450         struct nouveau_stateobj *so;
1451
1452         if (!p->translated) {
1453                 nv50_program_validate(nv50, p);
1454                 if (!p->translated)
1455                         assert(0);
1456         }
1457
1458         nv50_program_validate_data(nv50, p);
1459         nv50_program_validate_code(nv50, p);
1460
1461         so = so_new(11, 2);
1462         so_method(so, tesla, NV50TCL_VP_ADDRESS_HIGH, 2);
1463         so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
1464                   NOUVEAU_BO_HIGH, 0, 0);
1465         so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
1466                   NOUVEAU_BO_LOW, 0, 0);
1467         so_method(so, tesla, 0x1650, 2);
1468         so_data  (so, p->cfg.vp.attr[0]);
1469         so_data  (so, p->cfg.vp.attr[1]);
1470         so_method(so, tesla, 0x16ac, 2);
1471         so_data  (so, 8);
1472         so_data  (so, p->cfg.high_temp);
1473         so_method(so, tesla, 0x140c, 1);
1474         so_data  (so, 0); /* program start offset */
1475         so_emit(nv50->screen->nvws, so);
1476         so_ref(NULL, &so);
1477 }
1478
1479 void
1480 nv50_fragprog_validate(struct nv50_context *nv50)
1481 {
1482         struct nouveau_grobj *tesla = nv50->screen->tesla;
1483         struct nv50_program *p = nv50->fragprog;
1484         struct nouveau_stateobj *so;
1485
1486         if (!p->translated) {
1487                 nv50_program_validate(nv50, p);
1488                 if (!p->translated)
1489                         assert(0);
1490         }
1491
1492         nv50_program_validate_data(nv50, p);
1493         nv50_program_validate_code(nv50, p);
1494
1495         so = so_new(64, 2);
1496         so_method(so, tesla, NV50TCL_FP_ADDRESS_HIGH, 2);
1497         so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
1498                   NOUVEAU_BO_HIGH, 0, 0);
1499         so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
1500                   NOUVEAU_BO_LOW, 0, 0);
1501         so_method(so, tesla, 0x1904, 4);
1502         so_data  (so, 0x01040404); /* p: 0x01000404 */
1503         so_data  (so, 0x00000004);
1504         so_data  (so, 0x00000000);
1505         so_data  (so, 0x00000000);
1506         so_method(so, tesla, 0x16bc, 2);
1507         so_data  (so, 0x03020100);
1508         so_data  (so, 0x07060504);
1509         so_method(so, tesla, 0x1988, 2);
1510         so_data  (so, 0x08040404); /* p: 0x0f000401 */
1511         so_data  (so, p->cfg.high_temp);
1512         so_method(so, tesla, 0x16ac, 2);
1513         so_data  (so, 0x00000008); /* p: 0x00000004 */
1514         so_data  (so, 0x00000004);
1515         so_method(so, tesla, 0x1414, 1);
1516         so_data  (so, 0); /* program start offset */
1517         so_emit(nv50->screen->nvws, so);
1518         so_ref(NULL, &so);
1519 }
1520
1521 void
1522 nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p)
1523 {
1524         struct pipe_winsys *ws = nv50->pipe.winsys;
1525
1526         if (p->insns_nr) {
1527                 if (p->insns)
1528                         FREE(p->insns);
1529                 p->insns_nr = 0;
1530         }
1531
1532         if (p->buffer)
1533                 pipe_buffer_reference(ws, &p->buffer, NULL);
1534
1535         p->translated = 0;
1536 }
1537