src/mesa/program/prog_to_nir.c

   1 /*
   2  * Copyright © 2015 Intel Corporation
   3  * Copyright © 2014-2015 Broadcom
   4  * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the "Software"),
   8  * to deal in the Software without restriction, including without limitation
   9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10  * and/or sell copies of the Software, and to permit persons to whom the
  11  * Software is furnished to do so, subject to the following conditions:
  12  *
  13  * The above copyright notice and this permission notice (including the next
  14  * paragraph) shall be included in all copies or substantial portions of the
  15  * Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  22  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  23  * IN THE SOFTWARE.
  24  */
  25
  26 #include "nir/nir.h"
  27 #include "nir/nir_builder.h"
  28 #include "glsl/list.h"
  29 #include "main/imports.h"
  30 #include "util/ralloc.h"
  31
  32 #include "prog_to_nir.h"
  33 #include "prog_instruction.h"
  34 #include "prog_parameter.h"
  35 #include "prog_print.h"
  36
  37 /**
  38  * \file prog_to_nir.c
  39  *
  40  * A translator from Mesa IR (prog_instruction.h) to NIR.  This is primarily
  41  * intended to support ARB_vertex_program, ARB_fragment_program, and fixed-function
  42  * vertex processing.  Full GLSL support should use glsl_to_nir instead.
  43  */
  44
  45 struct ptn_compile {
  46    struct gl_program *prog;
  47    nir_builder build;
  48    bool error;
  49
  50    nir_variable *input_vars[VARYING_SLOT_MAX];
  51    nir_variable *output_vars[VARYING_SLOT_MAX];
  52    nir_register **output_regs;
  53    nir_register **temp_regs;
  54
  55    nir_register *addr_reg;
  56 };
  57
  58 #define SWIZ(X, Y, Z, W) \
  59    (unsigned[4]){ SWIZZLE_##X, SWIZZLE_##Y, SWIZZLE_##Z, SWIZZLE_##W }
  60 #define ptn_swizzle(b, src, x, y, z, w) nir_swizzle(b, src, SWIZ(x, y, z, w), 4, true)
  61 #define ptn_channel(b, src, ch) nir_swizzle(b, src, SWIZ(ch, ch, ch, ch), 1, true)
  62
  63 static nir_ssa_def *
  64 ptn_src_for_dest(struct ptn_compile *c, nir_alu_dest *dest)
  65 {
  66    nir_builder *b = &c->build;
  67
  68    nir_alu_src src;
  69    memset(&src, 0, sizeof(src));
  70
  71    if (dest->dest.is_ssa)
  72       src.src = nir_src_for_ssa(&dest->dest.ssa);
  73    else {
  74       assert(!dest->dest.reg.indirect);
  75       src.src = nir_src_for_reg(dest->dest.reg.reg);
  76       src.src.reg.base_offset = dest->dest.reg.base_offset;
  77    }
  78
  79    for (int i = 0; i < 4; i++)
  80       src.swizzle[i] = i;
  81
  82    return nir_fmov_alu(b, src, 4);
  83 }
  84
  85 static nir_alu_dest
  86 ptn_get_dest(struct ptn_compile *c, const struct prog_dst_register *prog_dst)
  87 {
  88    nir_alu_dest dest;
  89
  90    memset(&dest, 0, sizeof(dest));
  91
  92    switch (prog_dst->File) {
  93    case PROGRAM_TEMPORARY:
  94       dest.dest.reg.reg = c->temp_regs[prog_dst->Index];
  95       break;
  96    case PROGRAM_OUTPUT:
  97       dest.dest.reg.reg = c->output_regs[prog_dst->Index];
  98       break;
  99    case PROGRAM_ADDRESS:
 100       assert(prog_dst->Index == 0);
 101       dest.dest.reg.reg = c->addr_reg;
 102       break;
 103    case PROGRAM_UNDEFINED:
 104       break;
 105    }
 106
 107    dest.write_mask = prog_dst->WriteMask;
 108    dest.saturate = false;
 109
 110    assert(!prog_dst->RelAddr);
 111
 112    return dest;
 113 }
 114
 115 /**
 116  * Multiply the contents of the ADDR register by 4 to convert from the number
 117  * of vec4s to the number of floating point components.
 118  */
 119 static nir_ssa_def *
 120 ptn_addr_reg_value(struct ptn_compile *c)
 121 {
 122    nir_builder *b = &c->build;
 123    nir_alu_src src;
 124    memset(&src, 0, sizeof(src));
 125    src.src = nir_src_for_reg(c->addr_reg);
 126
 127    return nir_imul(b, nir_fmov_alu(b, src, 1), nir_imm_int(b, 4));
 128 }
 129
 130 static nir_ssa_def *
 131 ptn_get_src(struct ptn_compile *c, const struct prog_src_register *prog_src)
 132 {
 133    nir_builder *b = &c->build;
 134    nir_alu_src src;
 135
 136    memset(&src, 0, sizeof(src));
 137
 138    switch (prog_src->File) {
 139    case PROGRAM_UNDEFINED:
 140       return nir_imm_float(b, 0.0);
 141    case PROGRAM_TEMPORARY:
 142       assert(!prog_src->RelAddr && prog_src->Index >= 0);
 143       src.src.reg.reg = c->temp_regs[prog_src->Index];
 144       break;
 145    case PROGRAM_INPUT: {
 146       /* ARB_vertex_program doesn't allow relative addressing on vertex
 147        * attributes; ARB_fragment_program has no relative addressing at all.
 148        */
 149       assert(!prog_src->RelAddr);
 150
 151       assert(prog_src->Index >= 0 && prog_src->Index < VARYING_SLOT_MAX);
 152
 153       nir_intrinsic_instr *load =
 154          nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_var);
 155       load->num_components = 4;
 156       load->variables[0] =
 157          nir_deref_var_create(b->shader, c->input_vars[prog_src->Index]);
 158
 159       nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL);
 160       nir_instr_insert_after_cf_list(b->cf_node_list, &load->instr);
 161
 162       src.src = nir_src_for_ssa(&load->dest.ssa);
 163       break;
 164    }
 165    case PROGRAM_STATE_VAR:
 166    case PROGRAM_CONSTANT: {
 167       /* We actually want to look at the type in the Parameters list for this,
 168        * because it lets us upload constant builtin uniforms as actual
 169        * constants.
 170        */
 171       struct gl_program_parameter_list *plist = c->prog->Parameters;
 172       gl_register_file file = prog_src->RelAddr ? prog_src->File :
 173          plist->Parameters[prog_src->Index].Type;
 174
 175       switch (file) {
 176       case PROGRAM_CONSTANT:
 177          if ((c->prog->IndirectRegisterFiles & (1 << PROGRAM_CONSTANT)) == 0) {
 178             float *v = (float *) plist->ParameterValues[prog_src->Index];
 179             src.src = nir_src_for_ssa(nir_imm_vec4(b, v[0], v[1], v[2], v[3]));
 180             break;
 181          }
 182          /* FALLTHROUGH */
 183       case PROGRAM_STATE_VAR: {
 184          nir_intrinsic_op load_op =
 185             prog_src->RelAddr ? nir_intrinsic_load_uniform_indirect :
 186                                 nir_intrinsic_load_uniform;
 187          nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, load_op);
 188          nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL);
 189          load->num_components = 4;
 190
 191          /* Multiply src->Index by 4 to scale from # of vec4s to components. */
 192          load->const_index[0] = 4 * prog_src->Index;
 193          load->const_index[1] = 1;
 194
 195          if (prog_src->RelAddr) {
 196             nir_ssa_def *reladdr = ptn_addr_reg_value(c);
 197             if (prog_src->Index < 0) {
 198                /* This is a negative offset which should be added to the address
 199                 * register's value.
 200                 */
 201                reladdr = nir_iadd(b, reladdr, nir_imm_int(b, load->const_index[0]));
 202                load->const_index[0] = 0;
 203             }
 204             load->src[0] = nir_src_for_ssa(reladdr);
 205          }
 206
 207          nir_instr_insert_after_cf_list(b->cf_node_list, &load->instr);
 208
 209          src.src = nir_src_for_ssa(&load->dest.ssa);
 210          break;
 211       }
 212       default:
 213          fprintf(stderr, "bad uniform src register file: %s (%d)\n",
 214                  _mesa_register_file_name(file), file);
 215          abort();
 216       }
 217       break;
 218    }
 219    default:
 220       fprintf(stderr, "unknown src register file: %s (%d)\n",
 221               _mesa_register_file_name(prog_src->File), prog_src->File);
 222       abort();
 223    }
 224
 225    nir_ssa_def *def;
 226    if (!HAS_EXTENDED_SWIZZLE(prog_src->Swizzle)) {
 227       for (int i = 0; i < 4; i++)
 228          src.swizzle[i] = GET_SWZ(prog_src->Swizzle, i);
 229
 230       def = nir_fmov_alu(b, src, 4);
 231    } else {
 232       nir_ssa_def *chans[4];
 233       for (int i = 0; i < 4; i++) {
 234          int swizzle = GET_SWZ(prog_src->Swizzle, i);
 235          if (swizzle == SWIZZLE_ZERO) {
 236             chans[i] = nir_imm_float(b, 0.0);
 237          } else if (swizzle == SWIZZLE_ONE) {
 238             chans[i] = nir_imm_float(b, 1.0);
 239          } else {
 240             assert(swizzle != SWIZZLE_NIL);
 241             nir_alu_instr *mov = nir_alu_instr_create(b->shader, nir_op_fmov);
 242             nir_ssa_dest_init(&mov->instr, &mov->dest.dest, 1, NULL);
 243             mov->dest.write_mask = 0x1;
 244             mov->src[0] = src;
 245             mov->src[0].swizzle[0] = swizzle;
 246             nir_instr_insert_after_cf_list(b->cf_node_list, &mov->instr);
 247
 248             chans[i] = &mov->dest.dest.ssa;
 249          }
 250       }
 251       def = nir_vec4(b, chans[0], chans[1], chans[2], chans[3]);
 252    }
 253
 254    if (prog_src->Abs)
 255       def = nir_fabs(b, def);
 256
 257    if (prog_src->Negate)
 258       def = nir_fneg(b, def);
 259
 260    return def;
 261 }
 262
 263 static void
 264 ptn_alu(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
 265 {
 266    unsigned num_srcs = nir_op_infos[op].num_inputs;
 267    nir_alu_instr *instr = nir_alu_instr_create(b->shader, op);
 268    unsigned i;
 269
 270    for (i = 0; i < num_srcs; i++)
 271       instr->src[i].src = nir_src_for_ssa(src[i]);
 272
 273    instr->dest = dest;
 274    nir_instr_insert_after_cf_list(b->cf_node_list, &instr->instr);
 275 }
 276
 277 static void
 278 ptn_move_dest_masked(nir_builder *b, nir_alu_dest dest,
 279                      nir_ssa_def *def, unsigned write_mask)
 280 {
 281    if (!(dest.write_mask & write_mask))
 282       return;
 283
 284    nir_alu_instr *mov = nir_alu_instr_create(b->shader, nir_op_fmov);
 285    if (!mov)
 286       return;
 287
 288    mov->dest = dest;
 289    mov->dest.write_mask &= write_mask;
 290    mov->src[0].src = nir_src_for_ssa(def);
 291    for (unsigned i = def->num_components; i < 4; i++)
 292       mov->src[0].swizzle[i] = def->num_components - 1;
 293    nir_instr_insert_after_cf_list(b->cf_node_list, &mov->instr);
 294 }
 295
 296 static void
 297 ptn_move_dest(nir_builder *b, nir_alu_dest dest, nir_ssa_def *def)
 298 {
 299    ptn_move_dest_masked(b, dest, def, WRITEMASK_XYZW);
 300 }
 301
 302 static void
 303 ptn_arl(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
 304 {
 305    ptn_move_dest(b, dest, nir_f2i(b, nir_ffloor(b, src[0])));
 306 }
 307
 308 /* EXP - Approximate Exponential Base 2
 309  *  dst.x = 2^{\lfloor src.x\rfloor}
 310  *  dst.y = src.x - \lfloor src.x\rfloor
 311  *  dst.z = 2^{src.x}
 312  *  dst.w = 1.0
 313  */
 314 static void
 315 ptn_exp(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
 316 {
 317    nir_ssa_def *srcx = ptn_channel(b, src[0], X);
 318
 319    ptn_move_dest_masked(b, dest, nir_fexp2(b, nir_ffloor(b, srcx)), WRITEMASK_X);
 320    ptn_move_dest_masked(b, dest, nir_fsub(b, srcx, nir_ffloor(b, srcx)), WRITEMASK_Y);
 321    ptn_move_dest_masked(b, dest, nir_fexp2(b, srcx), WRITEMASK_Z);
 322    ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
 323 }
 324
 325 /* LOG - Approximate Logarithm Base 2
 326  *  dst.x = \lfloor\log_2{|src.x|}\rfloor
 327  *  dst.y = |src.x| * 2^{-\lfloor\log_2{|src.x|}\rfloor}}
 328  *  dst.z = \log_2{|src.x|}
 329  *  dst.w = 1.0
 330  */
 331 static void
 332 ptn_log(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
 333 {
 334    nir_ssa_def *abs_srcx = nir_fabs(b, ptn_channel(b, src[0], X));
 335    nir_ssa_def *log2 = nir_flog2(b, abs_srcx);
 336    nir_ssa_def *floor_log2 = nir_ffloor(b, log2);
 337
 338    ptn_move_dest_masked(b, dest, floor_log2, WRITEMASK_X);
 339    ptn_move_dest_masked(b, dest,
 340                         nir_fmul(b, abs_srcx,
 341                                  nir_fexp2(b, nir_fneg(b, floor_log2))),
 342                         WRITEMASK_Y);
 343    ptn_move_dest_masked(b, dest, log2, WRITEMASK_Z);
 344    ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
 345 }
 346
 347 /* DST - Distance Vector
 348  *   dst.x = 1.0
 349  *   dst.y = src0.y \times src1.y
 350  *   dst.z = src0.z
 351  *   dst.w = src1.w
 352  */
 353 static void
 354 ptn_dst(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
 355 {
 356    ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_X);
 357    ptn_move_dest_masked(b, dest, nir_fmul(b, src[0], src[1]), WRITEMASK_Y);
 358    ptn_move_dest_masked(b, dest, nir_fmov(b, src[0]), WRITEMASK_Z);
 359    ptn_move_dest_masked(b, dest, nir_fmov(b, src[1]), WRITEMASK_W);
 360 }
 361
 362 /* LIT - Light Coefficients
 363  *  dst.x = 1.0
 364  *  dst.y = max(src.x, 0.0)
 365  *  dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0
 366  *  dst.w = 1.0
 367  */
 368 static void
 369 ptn_lit(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
 370 {
 371    ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_XW);
 372
 373    ptn_move_dest_masked(b, dest, nir_fmax(b, ptn_channel(b, src[0], X),
 374                                           nir_imm_float(b, 0.0)), WRITEMASK_Y);
 375
 376    if (dest.write_mask & WRITEMASK_Z) {
 377       nir_ssa_def *src0_y = ptn_channel(b, src[0], Y);
 378       nir_ssa_def *wclamp = nir_fmax(b, nir_fmin(b, ptn_channel(b, src[0], W),
 379                                                  nir_imm_float(b, 128.0)),
 380                                      nir_imm_float(b, -128.0));
 381       nir_ssa_def *pow = nir_fpow(b, nir_fmax(b, src0_y, nir_imm_float(b, 0.0)),
 382                                   wclamp);
 383
 384       nir_ssa_def *z;
 385       if (b->shader->options->native_integers) {
 386          z = nir_bcsel(b,
 387                        nir_fge(b, nir_imm_float(b, 0.0), ptn_channel(b, src[0], X)),
 388                        nir_imm_float(b, 0.0),
 389                        pow);
 390       } else {
 391          z = nir_fcsel(b,
 392                        nir_sge(b, nir_imm_float(b, 0.0), ptn_channel(b, src[0], X)),
 393                        nir_imm_float(b, 0.0),
 394                        pow);
 395       }
 396
 397       ptn_move_dest_masked(b, dest, z, WRITEMASK_Z);
 398    }
 399 }
 400
 401 /* SCS - Sine Cosine
 402  *   dst.x = \cos{src.x}
 403  *   dst.y = \sin{src.x}
 404  *   dst.z = 0.0
 405  *   dst.w = 1.0
 406  */
 407 static void
 408 ptn_scs(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
 409 {
 410    ptn_move_dest_masked(b, dest, nir_fcos(b, ptn_channel(b, src[0], X)),
 411                         WRITEMASK_X);
 412    ptn_move_dest_masked(b, dest, nir_fsin(b, ptn_channel(b, src[0], X)),
 413                         WRITEMASK_Y);
 414    ptn_move_dest_masked(b, dest, nir_imm_float(b, 0.0), WRITEMASK_Z);
 415    ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
 416 }
 417
 418 /**
 419  * Emit SLT.  For platforms with integers, prefer b2f(flt(...)).
 420  */
 421 static void
 422 ptn_slt(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
 423 {
 424    if (b->shader->options->native_integers) {
 425       ptn_move_dest(b, dest, nir_b2f(b, nir_flt(b, src[0], src[1])));
 426    } else {
 427       ptn_move_dest(b, dest, nir_slt(b, src[0], src[1]));
 428    }
 429 }
 430
 431 /**
 432  * Emit SGE.  For platforms with integers, prefer b2f(fge(...)).
 433  */
 434 static void
 435 ptn_sge(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
 436 {
 437    if (b->shader->options->native_integers) {
 438       ptn_move_dest(b, dest, nir_b2f(b, nir_fge(b, src[0], src[1])));
 439    } else {
 440       ptn_move_dest(b, dest, nir_sge(b, src[0], src[1]));
 441    }
 442 }
 443
 444 static void
 445 ptn_sle(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
 446 {
 447    nir_ssa_def *commuted[] = { src[1], src[0] };
 448    ptn_sge(b, dest, commuted);
 449 }
 450
 451 static void
 452 ptn_sgt(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
 453 {
 454    nir_ssa_def *commuted[] = { src[1], src[0] };
 455    ptn_slt(b, dest, commuted);
 456 }
 457
 458 /**
 459  * Emit SEQ.  For platforms with integers, prefer b2f(feq(...)).
 460  */
 461 static void
 462 ptn_seq(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
 463 {
 464    if (b->shader->options->native_integers) {
 465       ptn_move_dest(b, dest, nir_b2f(b, nir_feq(b, src[0], src[1])));
 466    } else {
 467       ptn_move_dest(b, dest, nir_seq(b, src[0], src[1]));
 468    }
 469 }
 470
 471 /**
 472  * Emit SNE.  For platforms with integers, prefer b2f(fne(...)).
 473  */
 474 static void
 475 ptn_sne(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
 476 {
 477    if (b->shader->options->native_integers) {
 478       ptn_move_dest(b, dest, nir_b2f(b, nir_fne(b, src[0], src[1])));
 479    } else {
 480       ptn_move_dest(b, dest, nir_sne(b, src[0], src[1]));
 481    }
 482 }
 483
 484 static void
 485 ptn_xpd(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
 486 {
 487    ptn_move_dest_masked(b, dest,
 488                         nir_fsub(b,
 489                                  nir_fmul(b,
 490                                           ptn_swizzle(b, src[0], Y, Z, X, X),
 491                                           ptn_swizzle(b, src[1], Z, X, Y, X)),
 492                                  nir_fmul(b,
 493                                           ptn_swizzle(b, src[1], Y, Z, X, X),
 494                                           ptn_swizzle(b, src[0], Z, X, Y, X))),
 495                         WRITEMASK_XYZ);
 496    ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
 497 }
 498
 499 static void
 500 ptn_dp2(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
 501 {
 502    ptn_move_dest(b, dest, nir_fdot2(b, src[0], src[1]));
 503 }
 504
 505 static void
 506 ptn_dp3(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
 507 {
 508    ptn_move_dest(b, dest, nir_fdot3(b, src[0], src[1]));
 509 }
 510
 511 static void
 512 ptn_dp4(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
 513 {
 514    ptn_move_dest(b, dest, nir_fdot4(b, src[0], src[1]));
 515 }
 516
 517 static void
 518 ptn_dph(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
 519 {
 520    nir_ssa_def *dp3 = nir_fdot3(b, src[0], src[1]);
 521    ptn_move_dest(b, dest, nir_fadd(b, dp3, ptn_channel(b, src[1], W)));
 522 }
 523
 524 static void
 525 ptn_cmp(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
 526 {
 527    if (b->shader->options->native_integers) {
 528       ptn_move_dest(b, dest, nir_bcsel(b,
 529                                        nir_flt(b, src[0], nir_imm_float(b, 0.0)),
 530                                        src[1], src[2]));
 531    } else {
 532       ptn_move_dest(b, dest, nir_fcsel(b,
 533                                        nir_slt(b, src[0], nir_imm_float(b, 0.0)),
 534                                        src[1], src[2]));
 535    }
 536 }
 537
 538 static void
 539 ptn_lrp(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
 540 {
 541    ptn_move_dest(b, dest, nir_flrp(b, src[2], src[1], src[0]));
 542 }
 543
 544 static void
 545 ptn_kil(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
 546 {
 547    nir_ssa_def *cmp = b->shader->options->native_integers ?
 548       nir_bany4(b, nir_flt(b, src[0], nir_imm_float(b, 0.0))) :
 549       nir_fany4(b, nir_slt(b, src[0], nir_imm_float(b, 0.0)));
 550
 551    nir_intrinsic_instr *discard =
 552       nir_intrinsic_instr_create(b->shader, nir_intrinsic_discard_if);
 553    discard->src[0] = nir_src_for_ssa(cmp);
 554    nir_instr_insert_after_cf_list(b->cf_node_list, &discard->instr);
 555 }
 556
 557 static void
 558 ptn_tex(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src,
 559         struct prog_instruction *prog_inst)
 560 {
 561    nir_tex_instr *instr;
 562    nir_texop op;
 563    unsigned num_srcs;
 564
 565    switch (prog_inst->Opcode) {
 566    case OPCODE_TEX:
 567       op = nir_texop_tex;
 568       num_srcs = 1;
 569       break;
 570    case OPCODE_TXB:
 571       op = nir_texop_txb;
 572       num_srcs = 2;
 573       break;
 574    case OPCODE_TXD:
 575       op = nir_texop_txd;
 576       num_srcs = 3;
 577       break;
 578    case OPCODE_TXL:
 579       op = nir_texop_txl;
 580       num_srcs = 2;
 581       break;
 582    case OPCODE_TXP:
 583       op = nir_texop_tex;
 584       num_srcs = 2;
 585       break;
 586    case OPCODE_TXP_NV:
 587       assert(!"not handled");
 588       op = nir_texop_tex;
 589       num_srcs = 2;
 590       break;
 591    default:
 592       fprintf(stderr, "unknown tex op %d\n", prog_inst->Opcode);
 593       abort();
 594    }
 595
 596    if (prog_inst->TexShadow)
 597       num_srcs++;
 598
 599    instr = nir_tex_instr_create(b->shader, num_srcs);
 600    instr->op = op;
 601    instr->dest_type = nir_type_float;
 602    instr->is_shadow = prog_inst->TexShadow;
 603    instr->sampler_index = prog_inst->TexSrcUnit;
 604
 605    switch (prog_inst->TexSrcTarget) {
 606    case TEXTURE_1D_INDEX:
 607       instr->sampler_dim = GLSL_SAMPLER_DIM_1D;
 608       break;
 609    case TEXTURE_2D_INDEX:
 610       instr->sampler_dim = GLSL_SAMPLER_DIM_2D;
 611       break;
 612    case TEXTURE_3D_INDEX:
 613       instr->sampler_dim = GLSL_SAMPLER_DIM_3D;
 614       break;
 615    case TEXTURE_CUBE_INDEX:
 616       instr->sampler_dim = GLSL_SAMPLER_DIM_CUBE;
 617       break;
 618    case TEXTURE_RECT_INDEX:
 619       instr->sampler_dim = GLSL_SAMPLER_DIM_RECT;
 620       break;
 621    default:
 622       fprintf(stderr, "Unknown texture target %d\n", prog_inst->TexSrcTarget);
 623       abort();
 624    }
 625
 626    switch (instr->sampler_dim) {
 627    case GLSL_SAMPLER_DIM_1D:
 628    case GLSL_SAMPLER_DIM_BUF:
 629       instr->coord_components = 1;
 630       break;
 631    case GLSL_SAMPLER_DIM_2D:
 632    case GLSL_SAMPLER_DIM_RECT:
 633    case GLSL_SAMPLER_DIM_EXTERNAL:
 634    case GLSL_SAMPLER_DIM_MS:
 635       instr->coord_components = 2;
 636       break;
 637    case GLSL_SAMPLER_DIM_3D:
 638    case GLSL_SAMPLER_DIM_CUBE:
 639       instr->coord_components = 3;
 640       break;
 641    }
 642
 643    unsigned src_number = 0;
 644
 645    instr->src[src_number].src =
 646       nir_src_for_ssa(ptn_swizzle(b, src[0], X, Y, Z, W));
 647    instr->src[src_number].src_type = nir_tex_src_coord;
 648    src_number++;
 649
 650    if (prog_inst->Opcode == OPCODE_TXP) {
 651       instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
 652       instr->src[src_number].src_type = nir_tex_src_projector;
 653       src_number++;
 654    }
 655
 656    if (prog_inst->Opcode == OPCODE_TXB) {
 657       instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
 658       instr->src[src_number].src_type = nir_tex_src_bias;
 659       src_number++;
 660    }
 661
 662    if (prog_inst->Opcode == OPCODE_TXL) {
 663       instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
 664       instr->src[src_number].src_type = nir_tex_src_lod;
 665       src_number++;
 666    }
 667
 668    if (instr->is_shadow) {
 669       if (instr->coord_components < 3)
 670          instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], Z));
 671       else
 672          instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
 673
 674       instr->src[src_number].src_type = nir_tex_src_comparitor;
 675       src_number++;
 676    }
 677
 678    assert(src_number == num_srcs);
 679
 680    nir_ssa_dest_init(&instr->instr, &instr->dest, 4, NULL);
 681    nir_instr_insert_after_cf_list(b->cf_node_list, &instr->instr);
 682
 683    /* Resolve the writemask on the texture op. */
 684    ptn_move_dest(b, dest, &instr->dest.ssa);
 685 }
 686
 687 static const nir_op op_trans[MAX_OPCODE] = {
 688    [OPCODE_NOP] = 0,
 689    [OPCODE_ABS] = nir_op_fabs,
 690    [OPCODE_ADD] = nir_op_fadd,
 691    [OPCODE_ARL] = 0,
 692    [OPCODE_CMP] = 0,
 693    [OPCODE_COS] = nir_op_fcos,
 694    [OPCODE_DDX] = nir_op_fddx,
 695    [OPCODE_DDY] = nir_op_fddy,
 696    [OPCODE_DP2] = 0,
 697    [OPCODE_DP3] = 0,
 698    [OPCODE_DP4] = 0,
 699    [OPCODE_DPH] = 0,
 700    [OPCODE_DST] = 0,
 701    [OPCODE_END] = 0,
 702    [OPCODE_EX2] = nir_op_fexp2,
 703    [OPCODE_EXP] = nir_op_fexp,
 704    [OPCODE_FLR] = nir_op_ffloor,
 705    [OPCODE_FRC] = nir_op_ffract,
 706    [OPCODE_LG2] = nir_op_flog2,
 707    [OPCODE_LIT] = 0,
 708    [OPCODE_LOG] = 0,
 709    [OPCODE_LRP] = 0,
 710    [OPCODE_MAD] = nir_op_ffma,
 711    [OPCODE_MAX] = nir_op_fmax,
 712    [OPCODE_MIN] = nir_op_fmin,
 713    [OPCODE_MOV] = nir_op_fmov,
 714    [OPCODE_MUL] = nir_op_fmul,
 715    [OPCODE_POW] = nir_op_fpow,
 716    [OPCODE_RCP] = nir_op_frcp,
 717
 718    [OPCODE_RSQ] = nir_op_frsq,
 719    [OPCODE_SCS] = 0,
 720    [OPCODE_SEQ] = 0,
 721    [OPCODE_SGE] = 0,
 722    [OPCODE_SGT] = 0,
 723    [OPCODE_SIN] = nir_op_fsin,
 724    [OPCODE_SLE] = 0,
 725    [OPCODE_SLT] = 0,
 726    [OPCODE_SNE] = 0,
 727    [OPCODE_SSG] = nir_op_fsign,
 728    [OPCODE_SUB] = nir_op_fsub,
 729    [OPCODE_SWZ] = 0,
 730    [OPCODE_TEX] = 0,
 731    [OPCODE_TRUNC] = nir_op_ftrunc,
 732    [OPCODE_TXB] = 0,
 733    [OPCODE_TXD] = 0,
 734    [OPCODE_TXL] = 0,
 735    [OPCODE_TXP] = 0,
 736    [OPCODE_TXP_NV] = 0,
 737    [OPCODE_XPD] = 0,
 738 };
 739
 740 static void
 741 ptn_emit_instruction(struct ptn_compile *c, struct prog_instruction *prog_inst)
 742 {
 743    nir_builder *b = &c->build;
 744    unsigned i;
 745    const unsigned op = prog_inst->Opcode;
 746
 747    if (op == OPCODE_END)
 748       return;
 749
 750    nir_ssa_def *src[3];
 751    for (i = 0; i < 3; i++) {
 752       src[i] = ptn_get_src(c, &prog_inst->SrcReg[i]);
 753    }
 754    nir_alu_dest dest = ptn_get_dest(c, &prog_inst->DstReg);
 755    if (c->error)
 756       return;
 757
 758    switch (op) {
 759    case OPCODE_RSQ:
 760       ptn_move_dest(b, dest, nir_frsq(b, ptn_channel(b, src[0], X)));
 761       break;
 762
 763    case OPCODE_RCP:
 764       ptn_move_dest(b, dest, nir_frcp(b, ptn_channel(b, src[0], X)));
 765       break;
 766
 767    case OPCODE_EX2:
 768       ptn_move_dest(b, dest, nir_fexp2(b, ptn_channel(b, src[0], X)));
 769       break;
 770
 771    case OPCODE_LG2:
 772       ptn_move_dest(b, dest, nir_flog2(b, ptn_channel(b, src[0], X)));
 773       break;
 774
 775    case OPCODE_POW:
 776       ptn_move_dest(b, dest, nir_fpow(b,
 777                                       ptn_channel(b, src[0], X),
 778                                       ptn_channel(b, src[1], X)));
 779       break;
 780
 781    case OPCODE_COS:
 782       ptn_move_dest(b, dest, nir_fcos(b, ptn_channel(b, src[0], X)));
 783       break;
 784
 785    case OPCODE_SIN:
 786       ptn_move_dest(b, dest, nir_fsin(b, ptn_channel(b, src[0], X)));
 787       break;
 788
 789    case OPCODE_ARL:
 790       ptn_arl(b, dest, src);
 791       break;
 792
 793    case OPCODE_EXP:
 794       ptn_exp(b, dest, src);
 795       break;
 796
 797    case OPCODE_LOG:
 798       ptn_log(b, dest, src);
 799       break;
 800
 801    case OPCODE_LRP:
 802       ptn_lrp(b, dest, src);
 803       break;
 804
 805    case OPCODE_DST:
 806       ptn_dst(b, dest, src);
 807       break;
 808
 809    case OPCODE_LIT:
 810       ptn_lit(b, dest, src);
 811       break;
 812
 813    case OPCODE_XPD:
 814       ptn_xpd(b, dest, src);
 815       break;
 816
 817    case OPCODE_DP2:
 818       ptn_dp2(b, dest, src);
 819       break;
 820
 821    case OPCODE_DP3:
 822       ptn_dp3(b, dest, src);
 823       break;
 824
 825    case OPCODE_DP4:
 826       ptn_dp4(b, dest, src);
 827       break;
 828
 829    case OPCODE_DPH:
 830       ptn_dph(b, dest, src);
 831       break;
 832
 833    case OPCODE_KIL:
 834       ptn_kil(b, dest, src);
 835       break;
 836
 837    case OPCODE_CMP:
 838       ptn_cmp(b, dest, src);
 839       break;
 840
 841    case OPCODE_SCS:
 842       ptn_scs(b, dest, src);
 843       break;
 844
 845    case OPCODE_SLT:
 846       ptn_slt(b, dest, src);
 847       break;
 848
 849    case OPCODE_SGT:
 850       ptn_sgt(b, dest, src);
 851       break;
 852
 853    case OPCODE_SLE:
 854       ptn_sle(b, dest, src);
 855       break;
 856
 857    case OPCODE_SGE:
 858       ptn_sge(b, dest, src);
 859       break;
 860
 861    case OPCODE_SEQ:
 862       ptn_seq(b, dest, src);
 863       break;
 864
 865    case OPCODE_SNE:
 866       ptn_sne(b, dest, src);
 867       break;
 868
 869    case OPCODE_TEX:
 870    case OPCODE_TXB:
 871    case OPCODE_TXD:
 872    case OPCODE_TXL:
 873    case OPCODE_TXP:
 874    case OPCODE_TXP_NV:
 875       ptn_tex(b, dest, src, prog_inst);
 876       break;
 877
 878    case OPCODE_SWZ:
 879       /* Extended swizzles were already handled in ptn_get_src(). */
 880       ptn_alu(b, nir_op_fmov, dest, src);
 881       break;
 882
 883    case OPCODE_NOP:
 884       break;
 885
 886    default:
 887       if (op_trans[op] != 0 || op == OPCODE_MOV) {
 888          ptn_alu(b, op_trans[op], dest, src);
 889       } else {
 890          fprintf(stderr, "unknown opcode: %s\n", _mesa_opcode_string(op));
 891          abort();
 892       }
 893       break;
 894    }
 895
 896    if (prog_inst->SaturateMode) {
 897       assert(prog_inst->SaturateMode == SATURATE_ZERO_ONE);
 898       assert(!dest.dest.is_ssa);
 899       ptn_move_dest(b, dest, nir_fsat(b, ptn_src_for_dest(c, &dest)));
 900    }
 901 }
 902
 903 /**
 904  * Puts a NIR intrinsic to store of each PROGRAM_OUTPUT value to the output
 905  * variables at the end of the shader.
 906  *
 907  * We don't generate these incrementally as the PROGRAM_OUTPUT values are
 908  * written, because there's no output load intrinsic, which means we couldn't
 909  * handle writemasks.
 910  */
 911 static void
 912 ptn_add_output_stores(struct ptn_compile *c)
 913 {
 914    nir_builder *b = &c->build;
 915
 916    foreach_list_typed(nir_variable, var, node, &b->shader->outputs) {
 917       nir_intrinsic_instr *store =
 918          nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_var);
 919       store->num_components = 4;
 920       store->variables[0] =
 921          nir_deref_var_create(b->shader, c->output_vars[var->data.location]);
 922       store->src[0].reg.reg = c->output_regs[var->data.location];
 923       nir_instr_insert_after_cf_list(c->build.cf_node_list, &store->instr);
 924    }
 925 }
 926
 927 static void
 928 setup_registers_and_variables(struct ptn_compile *c)
 929 {
 930    nir_builder *b = &c->build;
 931    struct nir_shader *shader = b->shader;
 932
 933    /* Create input variables. */
 934    const int num_inputs = _mesa_flsll(c->prog->InputsRead);
 935    for (int i = 0; i < num_inputs; i++) {
 936       if (!(c->prog->InputsRead & BITFIELD64_BIT(i)))
 937          continue;
 938       nir_variable *var = rzalloc(shader, nir_variable);
 939       var->type = glsl_vec4_type();
 940       var->data.read_only = true;
 941       var->data.mode = nir_var_shader_in;
 942       var->name = ralloc_asprintf(var, "in_%d", i);
 943       var->data.location = i;
 944       var->data.index = 0;
 945
 946       if (c->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
 947          struct gl_fragment_program *fp =
 948             (struct gl_fragment_program *) c->prog;
 949
 950          var->data.interpolation = fp->InterpQualifier[i];
 951
 952          if (i == VARYING_SLOT_POS) {
 953             var->data.origin_upper_left = fp->OriginUpperLeft;
 954             var->data.pixel_center_integer = fp->PixelCenterInteger;
 955          } else if (i == VARYING_SLOT_FOGC) {
 956             /* fogcoord is defined as <f, 0.0, 0.0, 1.0>.  Make the actual
 957              * input variable a float, and create a local containing the
 958              * full vec4 value.
 959              */
 960             var->type = glsl_float_type();
 961
 962             nir_intrinsic_instr *load_x =
 963                nir_intrinsic_instr_create(shader, nir_intrinsic_load_var);
 964             load_x->num_components = 1;
 965             load_x->variables[0] = nir_deref_var_create(shader, var);
 966             nir_ssa_dest_init(&load_x->instr, &load_x->dest, 1, NULL);
 967             nir_instr_insert_after_cf_list(b->cf_node_list, &load_x->instr);
 968
 969             nir_ssa_def *f001 = nir_vec4(b, &load_x->dest.ssa, nir_imm_float(b, 0.0),
 970                                          nir_imm_float(b, 0.0), nir_imm_float(b, 1.0));
 971
 972             nir_variable *fullvar = rzalloc(shader, nir_variable);
 973             fullvar->type = glsl_vec4_type();
 974             fullvar->data.mode = nir_var_local;
 975             fullvar->name = "fogcoord_tmp";
 976             exec_list_push_tail(&b->impl->locals, &fullvar->node);
 977
 978             nir_intrinsic_instr *store =
 979                nir_intrinsic_instr_create(shader, nir_intrinsic_store_var);
 980             store->num_components = 4;
 981             store->variables[0] = nir_deref_var_create(shader, fullvar);
 982             store->src[0] = nir_src_for_ssa(f001);
 983             nir_instr_insert_after_cf_list(b->cf_node_list, &store->instr);
 984
 985             /* Insert the real input into the list so the driver has real
 986              * inputs, but set c->input_vars[i] to the temporary so we use
 987              * the splatted value.
 988              */
 989             exec_list_push_tail(&shader->inputs, &var->node);
 990             c->input_vars[i] = fullvar;
 991             continue;
 992          }
 993       }
 994
 995       exec_list_push_tail(&shader->inputs, &var->node);
 996       c->input_vars[i] = var;
 997    }
 998
 999    /* Create output registers and variables. */
1000    int max_outputs = _mesa_fls(c->prog->OutputsWritten);
1001    c->output_regs = rzalloc_array(c, nir_register *, max_outputs);
1002
1003    for (int i = 0; i < max_outputs; i++) {
1004       if (!(c->prog->OutputsWritten & BITFIELD64_BIT(i)))
1005          continue;
1006
1007       /* Since we can't load from outputs in the IR, we make temporaries
1008        * for the outputs and emit stores to the real outputs at the end of
1009        * the shader.
1010        */
1011       nir_register *reg = nir_local_reg_create(b->impl);
1012       reg->num_components = 4;
1013
1014       nir_variable *var = rzalloc(shader, nir_variable);
1015       var->type = glsl_vec4_type();
1016       var->data.mode = nir_var_shader_out;
1017       var->name = ralloc_asprintf(var, "out_%d", i);
1018
1019       var->data.location = i;
1020       var->data.index = 0;
1021
1022       c->output_regs[i] = reg;
1023
1024       exec_list_push_tail(&shader->outputs, &var->node);
1025       c->output_vars[i] = var;
1026    }
1027
1028    /* Create temporary registers. */
1029    c->temp_regs = rzalloc_array(c, nir_register *, c->prog->NumTemporaries);
1030
1031    nir_register *reg;
1032    for (int i = 0; i < c->prog->NumTemporaries; i++) {
1033       reg = nir_local_reg_create(b->impl);
1034       if (!reg) {
1035          c->error = true;
1036          return;
1037       }
1038       reg->num_components = 4;
1039       c->temp_regs[i] = reg;
1040    }
1041
1042    /* Create the address register (for ARB_vertex_program). */
1043    reg = nir_local_reg_create(b->impl);
1044    if (!reg) {
1045       c->error = true;
1046       return;
1047    }
1048    reg->num_components = 1;
1049    c->addr_reg = reg;
1050
1051    /* Set the number of uniforms */
1052    shader->num_uniforms = 4 * c->prog->Parameters->NumParameters;
1053 }
1054
1055 struct nir_shader *
1056 prog_to_nir(struct gl_program *prog, const nir_shader_compiler_options *options)
1057 {
1058    struct ptn_compile *c;
1059    struct nir_shader *s;
1060
1061    c = rzalloc(NULL, struct ptn_compile);
1062    if (!c)
1063       return NULL;
1064    s = nir_shader_create(NULL, options);
1065    if (!s)
1066       goto fail;
1067    c->prog = prog;
1068
1069    nir_function *func = nir_function_create(s, "main");
1070    nir_function_overload *overload = nir_function_overload_create(func);
1071    nir_function_impl *impl = nir_function_impl_create(overload);
1072
1073    c->build.shader = s;
1074    c->build.impl = impl;
1075    c->build.cf_node_list = &impl->body;
1076
1077    setup_registers_and_variables(c);
1078    if (unlikely(c->error))
1079       goto fail;
1080
1081    for (unsigned int i = 0; i < prog->NumInstructions; i++) {
1082       ptn_emit_instruction(c, &prog->Instructions[i]);
1083
1084       if (unlikely(c->error))
1085          break;
1086    }
1087
1088    ptn_add_output_stores(c);
1089
1090 fail:
1091    if (c->error) {
1092       ralloc_free(s);
1093       s = NULL;
1094    }
1095    ralloc_free(c);
1096    return s;
1097 }