src/mesa/program/prog_to_nir.c

   1 /*
   2  * Copyright © 2015 Intel Corporation
   3  * Copyright © 2014-2015 Broadcom
   4  * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the "Software"),
   8  * to deal in the Software without restriction, including without limitation
   9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10  * and/or sell copies of the Software, and to permit persons to whom the
  11  * Software is furnished to do so, subject to the following conditions:
  12  *
  13  * The above copyright notice and this permission notice (including the next
  14  * paragraph) shall be included in all copies or substantial portions of the
  15  * Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  22  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  23  * IN THE SOFTWARE.
  24  */
  25
  26 #include "nir/nir.h"
  27 #include "nir/nir_builder.h"
  28 #include "glsl/list.h"
  29 #include "main/imports.h"
  30 #include "util/ralloc.h"
  31
  32 #include "prog_to_nir.h"
  33 #include "prog_instruction.h"
  34 #include "prog_parameter.h"
  35 #include "prog_print.h"
  36
  37 /**
  38  * \file prog_to_nir.c
  39  *
  40  * A translator from Mesa IR (prog_instruction.h) to NIR.  This is primarily
  41  * intended to support ARB_vertex_program, ARB_fragment_program, and fixed-function
  42  * vertex processing.  Full GLSL support should use glsl_to_nir instead.
  43  */
  44
  45 struct ptn_compile {
  46    const struct gl_program *prog;
  47    nir_builder build;
  48    bool error;
  49
  50    nir_variable *input_vars[VARYING_SLOT_MAX];
  51    nir_variable *output_vars[VARYING_SLOT_MAX];
  52    nir_register **output_regs;
  53    nir_register **temp_regs;
  54
  55    nir_register *addr_reg;
  56 };
  57
  58 #define SWIZ(X, Y, Z, W) \
  59    (unsigned[4]){ SWIZZLE_##X, SWIZZLE_##Y, SWIZZLE_##Z, SWIZZLE_##W }
  60 #define ptn_swizzle(b, src, x, y, z, w) nir_swizzle(b, src, SWIZ(x, y, z, w), 4, true)
  61 #define ptn_channel(b, src, ch) nir_swizzle(b, src, SWIZ(ch, ch, ch, ch), 1, true)
  62
  63 static nir_ssa_def *
  64 ptn_src_for_dest(struct ptn_compile *c, nir_alu_dest *dest)
  65 {
  66    nir_builder *b = &c->build;
  67
  68    nir_alu_src src;
  69    memset(&src, 0, sizeof(src));
  70
  71    if (dest->dest.is_ssa)
  72       src.src = nir_src_for_ssa(&dest->dest.ssa);
  73    else {
  74       assert(!dest->dest.reg.indirect);
  75       src.src = nir_src_for_reg(dest->dest.reg.reg);
  76       src.src.reg.base_offset = dest->dest.reg.base_offset;
  77    }
  78
  79    for (int i = 0; i < 4; i++)
  80       src.swizzle[i] = i;
  81
  82    return nir_fmov_alu(b, src, 4);
  83 }
  84
  85 static nir_alu_dest
  86 ptn_get_dest(struct ptn_compile *c, const struct prog_dst_register *prog_dst)
  87 {
  88    nir_alu_dest dest;
  89
  90    memset(&dest, 0, sizeof(dest));
  91
  92    switch (prog_dst->File) {
  93    case PROGRAM_TEMPORARY:
  94       dest.dest.reg.reg = c->temp_regs[prog_dst->Index];
  95       break;
  96    case PROGRAM_OUTPUT:
  97       dest.dest.reg.reg = c->output_regs[prog_dst->Index];
  98       break;
  99    case PROGRAM_ADDRESS:
 100       assert(prog_dst->Index == 0);
 101       dest.dest.reg.reg = c->addr_reg;
 102       break;
 103    case PROGRAM_UNDEFINED:
 104       break;
 105    }
 106
 107    dest.write_mask = prog_dst->WriteMask;
 108    dest.saturate = false;
 109
 110    assert(!prog_dst->RelAddr);
 111
 112    return dest;
 113 }
 114
 115 /**
 116  * Multiply the contents of the ADDR register by 4 to convert from the number
 117  * of vec4s to the number of floating point components.
 118  */
 119 static nir_ssa_def *
 120 ptn_addr_reg_value(struct ptn_compile *c)
 121 {
 122    nir_builder *b = &c->build;
 123    nir_alu_src src;
 124    memset(&src, 0, sizeof(src));
 125    src.src = nir_src_for_reg(c->addr_reg);
 126
 127    return nir_imul(b, nir_fmov_alu(b, src, 1), nir_imm_int(b, 4));
 128 }
 129
 130 static nir_ssa_def *
 131 ptn_get_src(struct ptn_compile *c, const struct prog_src_register *prog_src)
 132 {
 133    nir_builder *b = &c->build;
 134    nir_alu_src src;
 135
 136    memset(&src, 0, sizeof(src));
 137
 138    switch (prog_src->File) {
 139    case PROGRAM_UNDEFINED:
 140       return nir_imm_float(b, 0.0);
 141    case PROGRAM_TEMPORARY:
 142       assert(!prog_src->RelAddr && prog_src->Index >= 0);
 143       src.src.reg.reg = c->temp_regs[prog_src->Index];
 144       break;
 145    case PROGRAM_INPUT: {
 146       /* ARB_vertex_program doesn't allow relative addressing on vertex
 147        * attributes; ARB_fragment_program has no relative addressing at all.
 148        */
 149       assert(!prog_src->RelAddr);
 150
 151       assert(prog_src->Index >= 0 && prog_src->Index < VARYING_SLOT_MAX);
 152
 153       nir_intrinsic_instr *load =
 154          nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_var);
 155       load->num_components = 4;
 156       load->variables[0] = nir_deref_var_create(load, c->input_vars[prog_src->Index]);
 157
 158       nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL);
 159       nir_instr_insert_after_cf_list(b->cf_node_list, &load->instr);
 160
 161       src.src = nir_src_for_ssa(&load->dest.ssa);
 162       break;
 163    }
 164    case PROGRAM_STATE_VAR:
 165    case PROGRAM_CONSTANT: {
 166       /* We actually want to look at the type in the Parameters list for this,
 167        * because it lets us upload constant builtin uniforms as actual
 168        * constants.
 169        */
 170       struct gl_program_parameter_list *plist = c->prog->Parameters;
 171       gl_register_file file = prog_src->RelAddr ? prog_src->File :
 172          plist->Parameters[prog_src->Index].Type;
 173
 174       switch (file) {
 175       case PROGRAM_CONSTANT:
 176          if ((c->prog->IndirectRegisterFiles & (1 << PROGRAM_CONSTANT)) == 0) {
 177             float *v = (float *) plist->ParameterValues[prog_src->Index];
 178             src.src = nir_src_for_ssa(nir_imm_vec4(b, v[0], v[1], v[2], v[3]));
 179             break;
 180          }
 181          /* FALLTHROUGH */
 182       case PROGRAM_STATE_VAR: {
 183          nir_intrinsic_op load_op =
 184             prog_src->RelAddr ? nir_intrinsic_load_uniform_indirect :
 185                                 nir_intrinsic_load_uniform;
 186          nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, load_op);
 187          nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL);
 188          load->num_components = 4;
 189
 190          /* Multiply src->Index by 4 to scale from # of vec4s to components. */
 191          load->const_index[0] = 4 * prog_src->Index;
 192          load->const_index[1] = 1;
 193
 194          if (prog_src->RelAddr) {
 195             nir_ssa_def *reladdr = ptn_addr_reg_value(c);
 196             if (prog_src->Index < 0) {
 197                /* This is a negative offset which should be added to the address
 198                 * register's value.
 199                 */
 200                reladdr = nir_iadd(b, reladdr, nir_imm_int(b, load->const_index[0]));
 201                load->const_index[0] = 0;
 202             }
 203             load->src[0] = nir_src_for_ssa(reladdr);
 204          }
 205
 206          nir_instr_insert_after_cf_list(b->cf_node_list, &load->instr);
 207
 208          src.src = nir_src_for_ssa(&load->dest.ssa);
 209          break;
 210       }
 211       default:
 212          fprintf(stderr, "bad uniform src register file: %s (%d)\n",
 213                  _mesa_register_file_name(file), file);
 214          abort();
 215       }
 216       break;
 217    }
 218    default:
 219       fprintf(stderr, "unknown src register file: %s (%d)\n",
 220               _mesa_register_file_name(prog_src->File), prog_src->File);
 221       abort();
 222    }
 223
 224    nir_ssa_def *def;
 225    if (!HAS_EXTENDED_SWIZZLE(prog_src->Swizzle) &&
 226        (prog_src->Negate == NEGATE_NONE || prog_src->Negate == NEGATE_XYZW)) {
 227       /* The simple non-SWZ case. */
 228       for (int i = 0; i < 4; i++)
 229          src.swizzle[i] = GET_SWZ(prog_src->Swizzle, i);
 230
 231       def = nir_fmov_alu(b, src, 4);
 232
 233       if (prog_src->Abs)
 234          def = nir_fabs(b, def);
 235
 236       if (prog_src->Negate)
 237          def = nir_fneg(b, def);
 238    } else {
 239       /* The SWZ instruction allows per-component zero/one swizzles, and also
 240        * per-component negation.
 241        */
 242       nir_ssa_def *chans[4];
 243       for (int i = 0; i < 4; i++) {
 244          int swizzle = GET_SWZ(prog_src->Swizzle, i);
 245          if (swizzle == SWIZZLE_ZERO) {
 246             chans[i] = nir_imm_float(b, 0.0);
 247          } else if (swizzle == SWIZZLE_ONE) {
 248             chans[i] = nir_imm_float(b, 1.0);
 249          } else {
 250             assert(swizzle != SWIZZLE_NIL);
 251             nir_alu_instr *mov = nir_alu_instr_create(b->shader, nir_op_fmov);
 252             nir_ssa_dest_init(&mov->instr, &mov->dest.dest, 1, NULL);
 253             mov->dest.write_mask = 0x1;
 254             mov->src[0] = src;
 255             mov->src[0].swizzle[0] = swizzle;
 256             nir_instr_insert_after_cf_list(b->cf_node_list, &mov->instr);
 257
 258             chans[i] = &mov->dest.dest.ssa;
 259          }
 260
 261          if (prog_src->Abs)
 262             chans[i] = nir_fabs(b, chans[i]);
 263
 264          if (prog_src->Negate & (1 << i))
 265             chans[i] = nir_fneg(b, chans[i]);
 266       }
 267       def = nir_vec4(b, chans[0], chans[1], chans[2], chans[3]);
 268    }
 269
 270    return def;
 271 }
 272
 273 static void
 274 ptn_alu(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
 275 {
 276    unsigned num_srcs = nir_op_infos[op].num_inputs;
 277    nir_alu_instr *instr = nir_alu_instr_create(b->shader, op);
 278    unsigned i;
 279
 280    for (i = 0; i < num_srcs; i++)
 281       instr->src[i].src = nir_src_for_ssa(src[i]);
 282
 283    instr->dest = dest;
 284    nir_instr_insert_after_cf_list(b->cf_node_list, &instr->instr);
 285 }
 286
 287 static void
 288 ptn_move_dest_masked(nir_builder *b, nir_alu_dest dest,
 289                      nir_ssa_def *def, unsigned write_mask)
 290 {
 291    if (!(dest.write_mask & write_mask))
 292       return;
 293
 294    nir_alu_instr *mov = nir_alu_instr_create(b->shader, nir_op_fmov);
 295    if (!mov)
 296       return;
 297
 298    mov->dest = dest;
 299    mov->dest.write_mask &= write_mask;
 300    mov->src[0].src = nir_src_for_ssa(def);
 301    for (unsigned i = def->num_components; i < 4; i++)
 302       mov->src[0].swizzle[i] = def->num_components - 1;
 303    nir_instr_insert_after_cf_list(b->cf_node_list, &mov->instr);
 304 }
 305
 306 static void
 307 ptn_move_dest(nir_builder *b, nir_alu_dest dest, nir_ssa_def *def)
 308 {
 309    ptn_move_dest_masked(b, dest, def, WRITEMASK_XYZW);
 310 }
 311
 312 static void
 313 ptn_arl(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
 314 {
 315    ptn_move_dest(b, dest, nir_f2i(b, nir_ffloor(b, src[0])));
 316 }
 317
 318 /* EXP - Approximate Exponential Base 2
 319  *  dst.x = 2^{\lfloor src.x\rfloor}
 320  *  dst.y = src.x - \lfloor src.x\rfloor
 321  *  dst.z = 2^{src.x}
 322  *  dst.w = 1.0
 323  */
 324 static void
 325 ptn_exp(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
 326 {
 327    nir_ssa_def *srcx = ptn_channel(b, src[0], X);
 328
 329    ptn_move_dest_masked(b, dest, nir_fexp2(b, nir_ffloor(b, srcx)), WRITEMASK_X);
 330    ptn_move_dest_masked(b, dest, nir_fsub(b, srcx, nir_ffloor(b, srcx)), WRITEMASK_Y);
 331    ptn_move_dest_masked(b, dest, nir_fexp2(b, srcx), WRITEMASK_Z);
 332    ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
 333 }
 334
 335 /* LOG - Approximate Logarithm Base 2
 336  *  dst.x = \lfloor\log_2{|src.x|}\rfloor
 337  *  dst.y = |src.x| * 2^{-\lfloor\log_2{|src.x|}\rfloor}}
 338  *  dst.z = \log_2{|src.x|}
 339  *  dst.w = 1.0
 340  */
 341 static void
 342 ptn_log(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
 343 {
 344    nir_ssa_def *abs_srcx = nir_fabs(b, ptn_channel(b, src[0], X));
 345    nir_ssa_def *log2 = nir_flog2(b, abs_srcx);
 346    nir_ssa_def *floor_log2 = nir_ffloor(b, log2);
 347
 348    ptn_move_dest_masked(b, dest, floor_log2, WRITEMASK_X);
 349    ptn_move_dest_masked(b, dest,
 350                         nir_fmul(b, abs_srcx,
 351                                  nir_fexp2(b, nir_fneg(b, floor_log2))),
 352                         WRITEMASK_Y);
 353    ptn_move_dest_masked(b, dest, log2, WRITEMASK_Z);
 354    ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
 355 }
 356
 357 /* DST - Distance Vector
 358  *   dst.x = 1.0
 359  *   dst.y = src0.y \times src1.y
 360  *   dst.z = src0.z
 361  *   dst.w = src1.w
 362  */
 363 static void
 364 ptn_dst(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
 365 {
 366    ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_X);
 367    ptn_move_dest_masked(b, dest, nir_fmul(b, src[0], src[1]), WRITEMASK_Y);
 368    ptn_move_dest_masked(b, dest, nir_fmov(b, src[0]), WRITEMASK_Z);
 369    ptn_move_dest_masked(b, dest, nir_fmov(b, src[1]), WRITEMASK_W);
 370 }
 371
 372 /* LIT - Light Coefficients
 373  *  dst.x = 1.0
 374  *  dst.y = max(src.x, 0.0)
 375  *  dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0
 376  *  dst.w = 1.0
 377  */
 378 static void
 379 ptn_lit(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
 380 {
 381    ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_XW);
 382
 383    ptn_move_dest_masked(b, dest, nir_fmax(b, ptn_channel(b, src[0], X),
 384                                           nir_imm_float(b, 0.0)), WRITEMASK_Y);
 385
 386    if (dest.write_mask & WRITEMASK_Z) {
 387       nir_ssa_def *src0_y = ptn_channel(b, src[0], Y);
 388       nir_ssa_def *wclamp = nir_fmax(b, nir_fmin(b, ptn_channel(b, src[0], W),
 389                                                  nir_imm_float(b, 128.0)),
 390                                      nir_imm_float(b, -128.0));
 391       nir_ssa_def *pow = nir_fpow(b, nir_fmax(b, src0_y, nir_imm_float(b, 0.0)),
 392                                   wclamp);
 393
 394       nir_ssa_def *z;
 395       if (b->shader->options->native_integers) {
 396          z = nir_bcsel(b,
 397                        nir_fge(b, nir_imm_float(b, 0.0), ptn_channel(b, src[0], X)),
 398                        nir_imm_float(b, 0.0),
 399                        pow);
 400       } else {
 401          z = nir_fcsel(b,
 402                        nir_sge(b, nir_imm_float(b, 0.0), ptn_channel(b, src[0], X)),
 403                        nir_imm_float(b, 0.0),
 404                        pow);
 405       }
 406
 407       ptn_move_dest_masked(b, dest, z, WRITEMASK_Z);
 408    }
 409 }
 410
 411 /* SCS - Sine Cosine
 412  *   dst.x = \cos{src.x}
 413  *   dst.y = \sin{src.x}
 414  *   dst.z = 0.0
 415  *   dst.w = 1.0
 416  */
 417 static void
 418 ptn_scs(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
 419 {
 420    ptn_move_dest_masked(b, dest, nir_fcos(b, ptn_channel(b, src[0], X)),
 421                         WRITEMASK_X);
 422    ptn_move_dest_masked(b, dest, nir_fsin(b, ptn_channel(b, src[0], X)),
 423                         WRITEMASK_Y);
 424    ptn_move_dest_masked(b, dest, nir_imm_float(b, 0.0), WRITEMASK_Z);
 425    ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
 426 }
 427
 428 /**
 429  * Emit SLT.  For platforms with integers, prefer b2f(flt(...)).
 430  */
 431 static void
 432 ptn_slt(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
 433 {
 434    if (b->shader->options->native_integers) {
 435       ptn_move_dest(b, dest, nir_b2f(b, nir_flt(b, src[0], src[1])));
 436    } else {
 437       ptn_move_dest(b, dest, nir_slt(b, src[0], src[1]));
 438    }
 439 }
 440
 441 /**
 442  * Emit SGE.  For platforms with integers, prefer b2f(fge(...)).
 443  */
 444 static void
 445 ptn_sge(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
 446 {
 447    if (b->shader->options->native_integers) {
 448       ptn_move_dest(b, dest, nir_b2f(b, nir_fge(b, src[0], src[1])));
 449    } else {
 450       ptn_move_dest(b, dest, nir_sge(b, src[0], src[1]));
 451    }
 452 }
 453
 454 static void
 455 ptn_sle(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
 456 {
 457    nir_ssa_def *commuted[] = { src[1], src[0] };
 458    ptn_sge(b, dest, commuted);
 459 }
 460
 461 static void
 462 ptn_sgt(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
 463 {
 464    nir_ssa_def *commuted[] = { src[1], src[0] };
 465    ptn_slt(b, dest, commuted);
 466 }
 467
 468 /**
 469  * Emit SEQ.  For platforms with integers, prefer b2f(feq(...)).
 470  */
 471 static void
 472 ptn_seq(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
 473 {
 474    if (b->shader->options->native_integers) {
 475       ptn_move_dest(b, dest, nir_b2f(b, nir_feq(b, src[0], src[1])));
 476    } else {
 477       ptn_move_dest(b, dest, nir_seq(b, src[0], src[1]));
 478    }
 479 }
 480
 481 /**
 482  * Emit SNE.  For platforms with integers, prefer b2f(fne(...)).
 483  */
 484 static void
 485 ptn_sne(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
 486 {
 487    if (b->shader->options->native_integers) {
 488       ptn_move_dest(b, dest, nir_b2f(b, nir_fne(b, src[0], src[1])));
 489    } else {
 490       ptn_move_dest(b, dest, nir_sne(b, src[0], src[1]));
 491    }
 492 }
 493
 494 static void
 495 ptn_xpd(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
 496 {
 497    ptn_move_dest_masked(b, dest,
 498                         nir_fsub(b,
 499                                  nir_fmul(b,
 500                                           ptn_swizzle(b, src[0], Y, Z, X, X),
 501                                           ptn_swizzle(b, src[1], Z, X, Y, X)),
 502                                  nir_fmul(b,
 503                                           ptn_swizzle(b, src[1], Y, Z, X, X),
 504                                           ptn_swizzle(b, src[0], Z, X, Y, X))),
 505                         WRITEMASK_XYZ);
 506    ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
 507 }
 508
 509 static void
 510 ptn_dp2(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
 511 {
 512    ptn_move_dest(b, dest, nir_fdot2(b, src[0], src[1]));
 513 }
 514
 515 static void
 516 ptn_dp3(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
 517 {
 518    ptn_move_dest(b, dest, nir_fdot3(b, src[0], src[1]));
 519 }
 520
 521 static void
 522 ptn_dp4(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
 523 {
 524    ptn_move_dest(b, dest, nir_fdot4(b, src[0], src[1]));
 525 }
 526
 527 static void
 528 ptn_dph(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
 529 {
 530    nir_ssa_def *dp3 = nir_fdot3(b, src[0], src[1]);
 531    ptn_move_dest(b, dest, nir_fadd(b, dp3, ptn_channel(b, src[1], W)));
 532 }
 533
 534 static void
 535 ptn_cmp(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
 536 {
 537    if (b->shader->options->native_integers) {
 538       ptn_move_dest(b, dest, nir_bcsel(b,
 539                                        nir_flt(b, src[0], nir_imm_float(b, 0.0)),
 540                                        src[1], src[2]));
 541    } else {
 542       ptn_move_dest(b, dest, nir_fcsel(b,
 543                                        nir_slt(b, src[0], nir_imm_float(b, 0.0)),
 544                                        src[1], src[2]));
 545    }
 546 }
 547
 548 static void
 549 ptn_lrp(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
 550 {
 551    ptn_move_dest(b, dest, nir_flrp(b, src[2], src[1], src[0]));
 552 }
 553
 554 static void
 555 ptn_kil(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
 556 {
 557    nir_ssa_def *cmp = b->shader->options->native_integers ?
 558       nir_bany4(b, nir_flt(b, src[0], nir_imm_float(b, 0.0))) :
 559       nir_fany4(b, nir_slt(b, src[0], nir_imm_float(b, 0.0)));
 560
 561    nir_intrinsic_instr *discard =
 562       nir_intrinsic_instr_create(b->shader, nir_intrinsic_discard_if);
 563    discard->src[0] = nir_src_for_ssa(cmp);
 564    nir_instr_insert_after_cf_list(b->cf_node_list, &discard->instr);
 565 }
 566
 567 static void
 568 ptn_tex(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src,
 569         struct prog_instruction *prog_inst)
 570 {
 571    nir_tex_instr *instr;
 572    nir_texop op;
 573    unsigned num_srcs;
 574
 575    switch (prog_inst->Opcode) {
 576    case OPCODE_TEX:
 577       op = nir_texop_tex;
 578       num_srcs = 1;
 579       break;
 580    case OPCODE_TXB:
 581       op = nir_texop_txb;
 582       num_srcs = 2;
 583       break;
 584    case OPCODE_TXD:
 585       op = nir_texop_txd;
 586       num_srcs = 3;
 587       break;
 588    case OPCODE_TXL:
 589       op = nir_texop_txl;
 590       num_srcs = 2;
 591       break;
 592    case OPCODE_TXP:
 593       op = nir_texop_tex;
 594       num_srcs = 2;
 595       break;
 596    case OPCODE_TXP_NV:
 597       assert(!"not handled");
 598       op = nir_texop_tex;
 599       num_srcs = 2;
 600       break;
 601    default:
 602       fprintf(stderr, "unknown tex op %d\n", prog_inst->Opcode);
 603       abort();
 604    }
 605
 606    if (prog_inst->TexShadow)
 607       num_srcs++;
 608
 609    instr = nir_tex_instr_create(b->shader, num_srcs);
 610    instr->op = op;
 611    instr->dest_type = nir_type_float;
 612    instr->is_shadow = prog_inst->TexShadow;
 613    instr->sampler_index = prog_inst->TexSrcUnit;
 614
 615    switch (prog_inst->TexSrcTarget) {
 616    case TEXTURE_1D_INDEX:
 617       instr->sampler_dim = GLSL_SAMPLER_DIM_1D;
 618       break;
 619    case TEXTURE_2D_INDEX:
 620       instr->sampler_dim = GLSL_SAMPLER_DIM_2D;
 621       break;
 622    case TEXTURE_3D_INDEX:
 623       instr->sampler_dim = GLSL_SAMPLER_DIM_3D;
 624       break;
 625    case TEXTURE_CUBE_INDEX:
 626       instr->sampler_dim = GLSL_SAMPLER_DIM_CUBE;
 627       break;
 628    case TEXTURE_RECT_INDEX:
 629       instr->sampler_dim = GLSL_SAMPLER_DIM_RECT;
 630       break;
 631    default:
 632       fprintf(stderr, "Unknown texture target %d\n", prog_inst->TexSrcTarget);
 633       abort();
 634    }
 635
 636    switch (instr->sampler_dim) {
 637    case GLSL_SAMPLER_DIM_1D:
 638    case GLSL_SAMPLER_DIM_BUF:
 639       instr->coord_components = 1;
 640       break;
 641    case GLSL_SAMPLER_DIM_2D:
 642    case GLSL_SAMPLER_DIM_RECT:
 643    case GLSL_SAMPLER_DIM_EXTERNAL:
 644    case GLSL_SAMPLER_DIM_MS:
 645       instr->coord_components = 2;
 646       break;
 647    case GLSL_SAMPLER_DIM_3D:
 648    case GLSL_SAMPLER_DIM_CUBE:
 649       instr->coord_components = 3;
 650       break;
 651    }
 652
 653    unsigned src_number = 0;
 654
 655    instr->src[src_number].src =
 656       nir_src_for_ssa(ptn_swizzle(b, src[0], X, Y, Z, W));
 657    instr->src[src_number].src_type = nir_tex_src_coord;
 658    src_number++;
 659
 660    if (prog_inst->Opcode == OPCODE_TXP) {
 661       instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
 662       instr->src[src_number].src_type = nir_tex_src_projector;
 663       src_number++;
 664    }
 665
 666    if (prog_inst->Opcode == OPCODE_TXB) {
 667       instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
 668       instr->src[src_number].src_type = nir_tex_src_bias;
 669       src_number++;
 670    }
 671
 672    if (prog_inst->Opcode == OPCODE_TXL) {
 673       instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
 674       instr->src[src_number].src_type = nir_tex_src_lod;
 675       src_number++;
 676    }
 677
 678    if (instr->is_shadow) {
 679       if (instr->coord_components < 3)
 680          instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], Z));
 681       else
 682          instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
 683
 684       instr->src[src_number].src_type = nir_tex_src_comparitor;
 685       src_number++;
 686    }
 687
 688    assert(src_number == num_srcs);
 689
 690    nir_ssa_dest_init(&instr->instr, &instr->dest, 4, NULL);
 691    nir_instr_insert_after_cf_list(b->cf_node_list, &instr->instr);
 692
 693    /* Resolve the writemask on the texture op. */
 694    ptn_move_dest(b, dest, &instr->dest.ssa);
 695 }
 696
 697 static const nir_op op_trans[MAX_OPCODE] = {
 698    [OPCODE_NOP] = 0,
 699    [OPCODE_ABS] = nir_op_fabs,
 700    [OPCODE_ADD] = nir_op_fadd,
 701    [OPCODE_ARL] = 0,
 702    [OPCODE_CMP] = 0,
 703    [OPCODE_COS] = nir_op_fcos,
 704    [OPCODE_DDX] = nir_op_fddx,
 705    [OPCODE_DDY] = nir_op_fddy,
 706    [OPCODE_DP2] = 0,
 707    [OPCODE_DP3] = 0,
 708    [OPCODE_DP4] = 0,
 709    [OPCODE_DPH] = 0,
 710    [OPCODE_DST] = 0,
 711    [OPCODE_END] = 0,
 712    [OPCODE_EX2] = nir_op_fexp2,
 713    [OPCODE_EXP] = 0,
 714    [OPCODE_FLR] = nir_op_ffloor,
 715    [OPCODE_FRC] = nir_op_ffract,
 716    [OPCODE_LG2] = nir_op_flog2,
 717    [OPCODE_LIT] = 0,
 718    [OPCODE_LOG] = 0,
 719    [OPCODE_LRP] = 0,
 720    [OPCODE_MAD] = nir_op_ffma,
 721    [OPCODE_MAX] = nir_op_fmax,
 722    [OPCODE_MIN] = nir_op_fmin,
 723    [OPCODE_MOV] = nir_op_fmov,
 724    [OPCODE_MUL] = nir_op_fmul,
 725    [OPCODE_POW] = nir_op_fpow,
 726    [OPCODE_RCP] = nir_op_frcp,
 727
 728    [OPCODE_RSQ] = nir_op_frsq,
 729    [OPCODE_SCS] = 0,
 730    [OPCODE_SEQ] = 0,
 731    [OPCODE_SGE] = 0,
 732    [OPCODE_SGT] = 0,
 733    [OPCODE_SIN] = nir_op_fsin,
 734    [OPCODE_SLE] = 0,
 735    [OPCODE_SLT] = 0,
 736    [OPCODE_SNE] = 0,
 737    [OPCODE_SSG] = nir_op_fsign,
 738    [OPCODE_SUB] = nir_op_fsub,
 739    [OPCODE_SWZ] = 0,
 740    [OPCODE_TEX] = 0,
 741    [OPCODE_TRUNC] = nir_op_ftrunc,
 742    [OPCODE_TXB] = 0,
 743    [OPCODE_TXD] = 0,
 744    [OPCODE_TXL] = 0,
 745    [OPCODE_TXP] = 0,
 746    [OPCODE_TXP_NV] = 0,
 747    [OPCODE_XPD] = 0,
 748 };
 749
 750 static void
 751 ptn_emit_instruction(struct ptn_compile *c, struct prog_instruction *prog_inst)
 752 {
 753    nir_builder *b = &c->build;
 754    unsigned i;
 755    const unsigned op = prog_inst->Opcode;
 756
 757    if (op == OPCODE_END)
 758       return;
 759
 760    nir_ssa_def *src[3];
 761    for (i = 0; i < 3; i++) {
 762       src[i] = ptn_get_src(c, &prog_inst->SrcReg[i]);
 763    }
 764    nir_alu_dest dest = ptn_get_dest(c, &prog_inst->DstReg);
 765    if (c->error)
 766       return;
 767
 768    switch (op) {
 769    case OPCODE_RSQ:
 770       ptn_move_dest(b, dest, nir_frsq(b, ptn_channel(b, src[0], X)));
 771       break;
 772
 773    case OPCODE_RCP:
 774       ptn_move_dest(b, dest, nir_frcp(b, ptn_channel(b, src[0], X)));
 775       break;
 776
 777    case OPCODE_EX2:
 778       ptn_move_dest(b, dest, nir_fexp2(b, ptn_channel(b, src[0], X)));
 779       break;
 780
 781    case OPCODE_LG2:
 782       ptn_move_dest(b, dest, nir_flog2(b, ptn_channel(b, src[0], X)));
 783       break;
 784
 785    case OPCODE_POW:
 786       ptn_move_dest(b, dest, nir_fpow(b,
 787                                       ptn_channel(b, src[0], X),
 788                                       ptn_channel(b, src[1], X)));
 789       break;
 790
 791    case OPCODE_COS:
 792       ptn_move_dest(b, dest, nir_fcos(b, ptn_channel(b, src[0], X)));
 793       break;
 794
 795    case OPCODE_SIN:
 796       ptn_move_dest(b, dest, nir_fsin(b, ptn_channel(b, src[0], X)));
 797       break;
 798
 799    case OPCODE_ARL:
 800       ptn_arl(b, dest, src);
 801       break;
 802
 803    case OPCODE_EXP:
 804       ptn_exp(b, dest, src);
 805       break;
 806
 807    case OPCODE_LOG:
 808       ptn_log(b, dest, src);
 809       break;
 810
 811    case OPCODE_LRP:
 812       ptn_lrp(b, dest, src);
 813       break;
 814
 815    case OPCODE_DST:
 816       ptn_dst(b, dest, src);
 817       break;
 818
 819    case OPCODE_LIT:
 820       ptn_lit(b, dest, src);
 821       break;
 822
 823    case OPCODE_XPD:
 824       ptn_xpd(b, dest, src);
 825       break;
 826
 827    case OPCODE_DP2:
 828       ptn_dp2(b, dest, src);
 829       break;
 830
 831    case OPCODE_DP3:
 832       ptn_dp3(b, dest, src);
 833       break;
 834
 835    case OPCODE_DP4:
 836       ptn_dp4(b, dest, src);
 837       break;
 838
 839    case OPCODE_DPH:
 840       ptn_dph(b, dest, src);
 841       break;
 842
 843    case OPCODE_KIL:
 844       ptn_kil(b, dest, src);
 845       break;
 846
 847    case OPCODE_CMP:
 848       ptn_cmp(b, dest, src);
 849       break;
 850
 851    case OPCODE_SCS:
 852       ptn_scs(b, dest, src);
 853       break;
 854
 855    case OPCODE_SLT:
 856       ptn_slt(b, dest, src);
 857       break;
 858
 859    case OPCODE_SGT:
 860       ptn_sgt(b, dest, src);
 861       break;
 862
 863    case OPCODE_SLE:
 864       ptn_sle(b, dest, src);
 865       break;
 866
 867    case OPCODE_SGE:
 868       ptn_sge(b, dest, src);
 869       break;
 870
 871    case OPCODE_SEQ:
 872       ptn_seq(b, dest, src);
 873       break;
 874
 875    case OPCODE_SNE:
 876       ptn_sne(b, dest, src);
 877       break;
 878
 879    case OPCODE_TEX:
 880    case OPCODE_TXB:
 881    case OPCODE_TXD:
 882    case OPCODE_TXL:
 883    case OPCODE_TXP:
 884    case OPCODE_TXP_NV:
 885       ptn_tex(b, dest, src, prog_inst);
 886       break;
 887
 888    case OPCODE_SWZ:
 889       /* Extended swizzles were already handled in ptn_get_src(). */
 890       ptn_alu(b, nir_op_fmov, dest, src);
 891       break;
 892
 893    case OPCODE_NOP:
 894       break;
 895
 896    default:
 897       if (op_trans[op] != 0 || op == OPCODE_MOV) {
 898          ptn_alu(b, op_trans[op], dest, src);
 899       } else {
 900          fprintf(stderr, "unknown opcode: %s\n", _mesa_opcode_string(op));
 901          abort();
 902       }
 903       break;
 904    }
 905
 906    if (prog_inst->SaturateMode) {
 907       assert(prog_inst->SaturateMode == SATURATE_ZERO_ONE);
 908       assert(!dest.dest.is_ssa);
 909       ptn_move_dest(b, dest, nir_fsat(b, ptn_src_for_dest(c, &dest)));
 910    }
 911 }
 912
 913 /**
 914  * Puts a NIR intrinsic to store of each PROGRAM_OUTPUT value to the output
 915  * variables at the end of the shader.
 916  *
 917  * We don't generate these incrementally as the PROGRAM_OUTPUT values are
 918  * written, because there's no output load intrinsic, which means we couldn't
 919  * handle writemasks.
 920  */
 921 static void
 922 ptn_add_output_stores(struct ptn_compile *c)
 923 {
 924    nir_builder *b = &c->build;
 925
 926    foreach_list_typed(nir_variable, var, node, &b->shader->outputs) {
 927       nir_intrinsic_instr *store =
 928          nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_var);
 929       store->num_components = 4;
 930       store->variables[0] =
 931          nir_deref_var_create(store, c->output_vars[var->data.location]);
 932       store->src[0].reg.reg = c->output_regs[var->data.location];
 933       nir_instr_insert_after_cf_list(c->build.cf_node_list, &store->instr);
 934    }
 935 }
 936
 937 static void
 938 setup_registers_and_variables(struct ptn_compile *c)
 939 {
 940    nir_builder *b = &c->build;
 941    struct nir_shader *shader = b->shader;
 942
 943    /* Create input variables. */
 944    const int num_inputs = _mesa_flsll(c->prog->InputsRead);
 945    for (int i = 0; i < num_inputs; i++) {
 946       if (!(c->prog->InputsRead & BITFIELD64_BIT(i)))
 947          continue;
 948       nir_variable *var = rzalloc(shader, nir_variable);
 949       var->type = glsl_vec4_type();
 950       var->data.read_only = true;
 951       var->data.mode = nir_var_shader_in;
 952       var->name = ralloc_asprintf(var, "in_%d", i);
 953       var->data.location = i;
 954       var->data.index = 0;
 955
 956       if (c->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
 957          struct gl_fragment_program *fp =
 958             (struct gl_fragment_program *) c->prog;
 959
 960          var->data.interpolation = fp->InterpQualifier[i];
 961
 962          if (i == VARYING_SLOT_POS) {
 963             var->data.origin_upper_left = fp->OriginUpperLeft;
 964             var->data.pixel_center_integer = fp->PixelCenterInteger;
 965          } else if (i == VARYING_SLOT_FOGC) {
 966             /* fogcoord is defined as <f, 0.0, 0.0, 1.0>.  Make the actual
 967              * input variable a float, and create a local containing the
 968              * full vec4 value.
 969              */
 970             var->type = glsl_float_type();
 971
 972             nir_intrinsic_instr *load_x =
 973                nir_intrinsic_instr_create(shader, nir_intrinsic_load_var);
 974             load_x->num_components = 1;
 975             load_x->variables[0] = nir_deref_var_create(load_x, var);
 976             nir_ssa_dest_init(&load_x->instr, &load_x->dest, 1, NULL);
 977             nir_instr_insert_after_cf_list(b->cf_node_list, &load_x->instr);
 978
 979             nir_ssa_def *f001 = nir_vec4(b, &load_x->dest.ssa, nir_imm_float(b, 0.0),
 980                                          nir_imm_float(b, 0.0), nir_imm_float(b, 1.0));
 981
 982             nir_variable *fullvar = rzalloc(shader, nir_variable);
 983             fullvar->type = glsl_vec4_type();
 984             fullvar->data.mode = nir_var_local;
 985             fullvar->name = "fogcoord_tmp";
 986             exec_list_push_tail(&b->impl->locals, &fullvar->node);
 987
 988             nir_intrinsic_instr *store =
 989                nir_intrinsic_instr_create(shader, nir_intrinsic_store_var);
 990             store->num_components = 4;
 991             store->variables[0] = nir_deref_var_create(store, fullvar);
 992             store->src[0] = nir_src_for_ssa(f001);
 993             nir_instr_insert_after_cf_list(b->cf_node_list, &store->instr);
 994
 995             /* Insert the real input into the list so the driver has real
 996              * inputs, but set c->input_vars[i] to the temporary so we use
 997              * the splatted value.
 998              */
 999             exec_list_push_tail(&shader->inputs, &var->node);
1000             c->input_vars[i] = fullvar;
1001             continue;
1002          }
1003       }
1004
1005       exec_list_push_tail(&shader->inputs, &var->node);
1006       c->input_vars[i] = var;
1007    }
1008
1009    /* Create output registers and variables. */
1010    int max_outputs = _mesa_fls(c->prog->OutputsWritten);
1011    c->output_regs = rzalloc_array(c, nir_register *, max_outputs);
1012
1013    for (int i = 0; i < max_outputs; i++) {
1014       if (!(c->prog->OutputsWritten & BITFIELD64_BIT(i)))
1015          continue;
1016
1017       /* Since we can't load from outputs in the IR, we make temporaries
1018        * for the outputs and emit stores to the real outputs at the end of
1019        * the shader.
1020        */
1021       nir_register *reg = nir_local_reg_create(b->impl);
1022       reg->num_components = 4;
1023
1024       nir_variable *var = rzalloc(shader, nir_variable);
1025       var->type = glsl_vec4_type();
1026       var->data.mode = nir_var_shader_out;
1027       var->name = ralloc_asprintf(var, "out_%d", i);
1028
1029       var->data.location = i;
1030       var->data.index = 0;
1031
1032       c->output_regs[i] = reg;
1033
1034       exec_list_push_tail(&shader->outputs, &var->node);
1035       c->output_vars[i] = var;
1036    }
1037
1038    /* Create temporary registers. */
1039    c->temp_regs = rzalloc_array(c, nir_register *, c->prog->NumTemporaries);
1040
1041    nir_register *reg;
1042    for (int i = 0; i < c->prog->NumTemporaries; i++) {
1043       reg = nir_local_reg_create(b->impl);
1044       if (!reg) {
1045          c->error = true;
1046          return;
1047       }
1048       reg->num_components = 4;
1049       c->temp_regs[i] = reg;
1050    }
1051
1052    /* Create the address register (for ARB_vertex_program). */
1053    reg = nir_local_reg_create(b->impl);
1054    if (!reg) {
1055       c->error = true;
1056       return;
1057    }
1058    reg->num_components = 1;
1059    c->addr_reg = reg;
1060
1061    /* Set the number of uniforms */
1062    shader->num_uniforms = 4 * c->prog->Parameters->NumParameters;
1063 }
1064
1065 struct nir_shader *
1066 prog_to_nir(const struct gl_program *prog, const nir_shader_compiler_options *options)
1067 {
1068    struct ptn_compile *c;
1069    struct nir_shader *s;
1070
1071    c = rzalloc(NULL, struct ptn_compile);
1072    if (!c)
1073       return NULL;
1074    s = nir_shader_create(NULL, options);
1075    if (!s)
1076       goto fail;
1077    c->prog = prog;
1078
1079    nir_function *func = nir_function_create(s, "main");
1080    nir_function_overload *overload = nir_function_overload_create(func);
1081    nir_function_impl *impl = nir_function_impl_create(overload);
1082
1083    c->build.shader = s;
1084    c->build.impl = impl;
1085    c->build.cf_node_list = &impl->body;
1086
1087    setup_registers_and_variables(c);
1088    if (unlikely(c->error))
1089       goto fail;
1090
1091    for (unsigned int i = 0; i < prog->NumInstructions; i++) {
1092       ptn_emit_instruction(c, &prog->Instructions[i]);
1093
1094       if (unlikely(c->error))
1095          break;
1096    }
1097
1098    ptn_add_output_stores(c);
1099
1100 fail:
1101    if (c->error) {
1102       ralloc_free(s);
1103       s = NULL;
1104    }
1105    ralloc_free(c);
1106    return s;
1107 }