nir: nir_shader_compiler_options: drop native_integers
[mesa.git] / src / mesa / program / prog_to_nir.c
1 /*
2 * Copyright © 2015 Intel Corporation
3 * Copyright © 2014-2015 Broadcom
4 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
23 * IN THE SOFTWARE.
24 */
25
26 #include "compiler/nir/nir.h"
27 #include "compiler/nir/nir_builder.h"
28 #include "compiler/glsl/list.h"
29 #include "main/imports.h"
30 #include "main/mtypes.h"
31 #include "util/ralloc.h"
32
33 #include "prog_to_nir.h"
34 #include "prog_instruction.h"
35 #include "prog_parameter.h"
36 #include "prog_print.h"
37 #include "program.h"
38
39 /**
40 * \file prog_to_nir.c
41 *
42 * A translator from Mesa IR (prog_instruction.h) to NIR. This is primarily
43 * intended to support ARB_vertex_program, ARB_fragment_program, and fixed-function
44 * vertex processing. Full GLSL support should use glsl_to_nir instead.
45 */
46
47 struct ptn_compile {
48 const struct gl_program *prog;
49 nir_builder build;
50 bool error;
51
52 nir_variable *parameters;
53 nir_variable *input_vars[VARYING_SLOT_MAX];
54 nir_variable *output_vars[VARYING_SLOT_MAX];
55 nir_variable *sysval_vars[SYSTEM_VALUE_MAX];
56 nir_variable *sampler_vars[32]; /* matches number of bits in TexSrcUnit */
57 nir_register **output_regs;
58 nir_register **temp_regs;
59
60 nir_register *addr_reg;
61 };
62
63 #define SWIZ(X, Y, Z, W) \
64 (unsigned[4]){ SWIZZLE_##X, SWIZZLE_##Y, SWIZZLE_##Z, SWIZZLE_##W }
65 #define ptn_channel(b, src, ch) nir_swizzle(b, src, SWIZ(ch, ch, ch, ch), 1, true)
66
67 static nir_ssa_def *
68 ptn_src_for_dest(struct ptn_compile *c, nir_alu_dest *dest)
69 {
70 nir_builder *b = &c->build;
71
72 nir_alu_src src;
73 memset(&src, 0, sizeof(src));
74
75 if (dest->dest.is_ssa)
76 src.src = nir_src_for_ssa(&dest->dest.ssa);
77 else {
78 assert(!dest->dest.reg.indirect);
79 src.src = nir_src_for_reg(dest->dest.reg.reg);
80 src.src.reg.base_offset = dest->dest.reg.base_offset;
81 }
82
83 for (int i = 0; i < 4; i++)
84 src.swizzle[i] = i;
85
86 return nir_fmov_alu(b, src, 4);
87 }
88
89 static nir_alu_dest
90 ptn_get_dest(struct ptn_compile *c, const struct prog_dst_register *prog_dst)
91 {
92 nir_alu_dest dest;
93
94 memset(&dest, 0, sizeof(dest));
95
96 switch (prog_dst->File) {
97 case PROGRAM_TEMPORARY:
98 dest.dest.reg.reg = c->temp_regs[prog_dst->Index];
99 break;
100 case PROGRAM_OUTPUT:
101 dest.dest.reg.reg = c->output_regs[prog_dst->Index];
102 break;
103 case PROGRAM_ADDRESS:
104 assert(prog_dst->Index == 0);
105 dest.dest.reg.reg = c->addr_reg;
106 break;
107 case PROGRAM_UNDEFINED:
108 break;
109 }
110
111 dest.write_mask = prog_dst->WriteMask;
112 dest.saturate = false;
113
114 assert(!prog_dst->RelAddr);
115
116 return dest;
117 }
118
119 static nir_ssa_def *
120 ptn_get_src(struct ptn_compile *c, const struct prog_src_register *prog_src)
121 {
122 nir_builder *b = &c->build;
123 nir_alu_src src;
124
125 memset(&src, 0, sizeof(src));
126
127 switch (prog_src->File) {
128 case PROGRAM_UNDEFINED:
129 return nir_imm_float(b, 0.0);
130 case PROGRAM_TEMPORARY:
131 assert(!prog_src->RelAddr && prog_src->Index >= 0);
132 src.src.reg.reg = c->temp_regs[prog_src->Index];
133 break;
134 case PROGRAM_INPUT: {
135 /* ARB_vertex_program doesn't allow relative addressing on vertex
136 * attributes; ARB_fragment_program has no relative addressing at all.
137 */
138 assert(!prog_src->RelAddr);
139
140 assert(prog_src->Index >= 0 && prog_src->Index < VARYING_SLOT_MAX);
141
142 nir_variable *var = c->input_vars[prog_src->Index];
143 src.src = nir_src_for_ssa(nir_load_var(b, var));
144 break;
145 }
146 case PROGRAM_SYSTEM_VALUE: {
147 assert(!prog_src->RelAddr);
148
149 assert(prog_src->Index >= 0 && prog_src->Index < SYSTEM_VALUE_MAX);
150
151 nir_variable *var = c->sysval_vars[prog_src->Index];
152 src.src = nir_src_for_ssa(nir_load_var(b, var));
153 break;
154 }
155 case PROGRAM_STATE_VAR:
156 case PROGRAM_CONSTANT: {
157 /* We actually want to look at the type in the Parameters list for this,
158 * because it lets us upload constant builtin uniforms as actual
159 * constants.
160 */
161 struct gl_program_parameter_list *plist = c->prog->Parameters;
162 gl_register_file file = prog_src->RelAddr ? prog_src->File :
163 plist->Parameters[prog_src->Index].Type;
164
165 switch (file) {
166 case PROGRAM_CONSTANT:
167 if ((c->prog->arb.IndirectRegisterFiles &
168 (1 << PROGRAM_CONSTANT)) == 0) {
169 unsigned pvo = plist->ParameterValueOffset[prog_src->Index];
170 float *v = (float *) plist->ParameterValues + pvo;
171 src.src = nir_src_for_ssa(nir_imm_vec4(b, v[0], v[1], v[2], v[3]));
172 break;
173 }
174 /* FALLTHROUGH */
175 case PROGRAM_STATE_VAR: {
176 assert(c->parameters != NULL);
177
178 nir_deref_instr *deref = nir_build_deref_var(b, c->parameters);
179
180 nir_ssa_def *index = nir_imm_int(b, prog_src->Index);
181 if (prog_src->RelAddr)
182 index = nir_iadd(b, index, nir_load_reg(b, c->addr_reg));
183 deref = nir_build_deref_array(b, deref, nir_channel(b, index, 0));
184
185 src.src = nir_src_for_ssa(nir_load_deref(b, deref));
186 break;
187 }
188 default:
189 fprintf(stderr, "bad uniform src register file: %s (%d)\n",
190 _mesa_register_file_name(file), file);
191 abort();
192 }
193 break;
194 }
195 default:
196 fprintf(stderr, "unknown src register file: %s (%d)\n",
197 _mesa_register_file_name(prog_src->File), prog_src->File);
198 abort();
199 }
200
201 nir_ssa_def *def;
202 if (!HAS_EXTENDED_SWIZZLE(prog_src->Swizzle) &&
203 (prog_src->Negate == NEGATE_NONE || prog_src->Negate == NEGATE_XYZW)) {
204 /* The simple non-SWZ case. */
205 for (int i = 0; i < 4; i++)
206 src.swizzle[i] = GET_SWZ(prog_src->Swizzle, i);
207
208 def = nir_fmov_alu(b, src, 4);
209
210 if (prog_src->Negate)
211 def = nir_fneg(b, def);
212 } else {
213 /* The SWZ instruction allows per-component zero/one swizzles, and also
214 * per-component negation.
215 */
216 nir_ssa_def *chans[4];
217 for (int i = 0; i < 4; i++) {
218 int swizzle = GET_SWZ(prog_src->Swizzle, i);
219 if (swizzle == SWIZZLE_ZERO) {
220 chans[i] = nir_imm_float(b, 0.0);
221 } else if (swizzle == SWIZZLE_ONE) {
222 chans[i] = nir_imm_float(b, 1.0);
223 } else {
224 assert(swizzle != SWIZZLE_NIL);
225 nir_alu_instr *mov = nir_alu_instr_create(b->shader, nir_op_fmov);
226 nir_ssa_dest_init(&mov->instr, &mov->dest.dest, 1, 32, NULL);
227 mov->dest.write_mask = 0x1;
228 mov->src[0] = src;
229 mov->src[0].swizzle[0] = swizzle;
230 nir_builder_instr_insert(b, &mov->instr);
231
232 chans[i] = &mov->dest.dest.ssa;
233 }
234
235 if (prog_src->Negate & (1 << i))
236 chans[i] = nir_fneg(b, chans[i]);
237 }
238 def = nir_vec4(b, chans[0], chans[1], chans[2], chans[3]);
239 }
240
241 return def;
242 }
243
244 static void
245 ptn_alu(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
246 {
247 unsigned num_srcs = nir_op_infos[op].num_inputs;
248 nir_alu_instr *instr = nir_alu_instr_create(b->shader, op);
249 unsigned i;
250
251 for (i = 0; i < num_srcs; i++)
252 instr->src[i].src = nir_src_for_ssa(src[i]);
253
254 instr->dest = dest;
255 nir_builder_instr_insert(b, &instr->instr);
256 }
257
258 static void
259 ptn_move_dest_masked(nir_builder *b, nir_alu_dest dest,
260 nir_ssa_def *def, unsigned write_mask)
261 {
262 if (!(dest.write_mask & write_mask))
263 return;
264
265 nir_alu_instr *mov = nir_alu_instr_create(b->shader, nir_op_fmov);
266 if (!mov)
267 return;
268
269 mov->dest = dest;
270 mov->dest.write_mask &= write_mask;
271 mov->src[0].src = nir_src_for_ssa(def);
272 for (unsigned i = def->num_components; i < 4; i++)
273 mov->src[0].swizzle[i] = def->num_components - 1;
274 nir_builder_instr_insert(b, &mov->instr);
275 }
276
277 static void
278 ptn_move_dest(nir_builder *b, nir_alu_dest dest, nir_ssa_def *def)
279 {
280 ptn_move_dest_masked(b, dest, def, WRITEMASK_XYZW);
281 }
282
283 static void
284 ptn_arl(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
285 {
286 ptn_move_dest(b, dest, nir_f2i32(b, nir_ffloor(b, src[0])));
287 }
288
289 /* EXP - Approximate Exponential Base 2
290 * dst.x = 2^{\lfloor src.x\rfloor}
291 * dst.y = src.x - \lfloor src.x\rfloor
292 * dst.z = 2^{src.x}
293 * dst.w = 1.0
294 */
295 static void
296 ptn_exp(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
297 {
298 nir_ssa_def *srcx = ptn_channel(b, src[0], X);
299
300 ptn_move_dest_masked(b, dest, nir_fexp2(b, nir_ffloor(b, srcx)), WRITEMASK_X);
301 ptn_move_dest_masked(b, dest, nir_fsub(b, srcx, nir_ffloor(b, srcx)), WRITEMASK_Y);
302 ptn_move_dest_masked(b, dest, nir_fexp2(b, srcx), WRITEMASK_Z);
303 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
304 }
305
306 /* LOG - Approximate Logarithm Base 2
307 * dst.x = \lfloor\log_2{|src.x|}\rfloor
308 * dst.y = |src.x| * 2^{-\lfloor\log_2{|src.x|}\rfloor}}
309 * dst.z = \log_2{|src.x|}
310 * dst.w = 1.0
311 */
312 static void
313 ptn_log(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
314 {
315 nir_ssa_def *abs_srcx = nir_fabs(b, ptn_channel(b, src[0], X));
316 nir_ssa_def *log2 = nir_flog2(b, abs_srcx);
317 nir_ssa_def *floor_log2 = nir_ffloor(b, log2);
318
319 ptn_move_dest_masked(b, dest, floor_log2, WRITEMASK_X);
320 ptn_move_dest_masked(b, dest,
321 nir_fmul(b, abs_srcx,
322 nir_fexp2(b, nir_fneg(b, floor_log2))),
323 WRITEMASK_Y);
324 ptn_move_dest_masked(b, dest, log2, WRITEMASK_Z);
325 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
326 }
327
328 /* DST - Distance Vector
329 * dst.x = 1.0
330 * dst.y = src0.y \times src1.y
331 * dst.z = src0.z
332 * dst.w = src1.w
333 */
334 static void
335 ptn_dst(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
336 {
337 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_X);
338 ptn_move_dest_masked(b, dest, nir_fmul(b, src[0], src[1]), WRITEMASK_Y);
339 ptn_move_dest_masked(b, dest, nir_fmov(b, src[0]), WRITEMASK_Z);
340 ptn_move_dest_masked(b, dest, nir_fmov(b, src[1]), WRITEMASK_W);
341 }
342
343 /* LIT - Light Coefficients
344 * dst.x = 1.0
345 * dst.y = max(src.x, 0.0)
346 * dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0
347 * dst.w = 1.0
348 */
349 static void
350 ptn_lit(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
351 {
352 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_XW);
353
354 ptn_move_dest_masked(b, dest, nir_fmax(b, ptn_channel(b, src[0], X),
355 nir_imm_float(b, 0.0)), WRITEMASK_Y);
356
357 if (dest.write_mask & WRITEMASK_Z) {
358 nir_ssa_def *src0_y = ptn_channel(b, src[0], Y);
359 nir_ssa_def *wclamp = nir_fmax(b, nir_fmin(b, ptn_channel(b, src[0], W),
360 nir_imm_float(b, 128.0)),
361 nir_imm_float(b, -128.0));
362 nir_ssa_def *pow = nir_fpow(b, nir_fmax(b, src0_y, nir_imm_float(b, 0.0)),
363 wclamp);
364
365 nir_ssa_def *z = nir_bcsel(b,
366 nir_fge(b, nir_imm_float(b, 0.0), ptn_channel(b, src[0], X)),
367 nir_imm_float(b, 0.0),
368 pow);
369
370 ptn_move_dest_masked(b, dest, z, WRITEMASK_Z);
371 }
372 }
373
374 /* SCS - Sine Cosine
375 * dst.x = \cos{src.x}
376 * dst.y = \sin{src.x}
377 * dst.z = 0.0
378 * dst.w = 1.0
379 */
380 static void
381 ptn_scs(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
382 {
383 ptn_move_dest_masked(b, dest, nir_fcos(b, ptn_channel(b, src[0], X)),
384 WRITEMASK_X);
385 ptn_move_dest_masked(b, dest, nir_fsin(b, ptn_channel(b, src[0], X)),
386 WRITEMASK_Y);
387 ptn_move_dest_masked(b, dest, nir_imm_float(b, 0.0), WRITEMASK_Z);
388 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
389 }
390
391 /**
392 * Emit SLT. For platforms with integers, prefer b2f(flt(...)).
393 */
394 static void
395 ptn_slt(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
396 {
397 ptn_move_dest(b, dest, nir_b2f32(b, nir_flt(b, src[0], src[1])));
398 }
399
400 /**
401 * Emit SGE. For platforms with integers, prefer b2f(fge(...)).
402 */
403 static void
404 ptn_sge(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
405 {
406 ptn_move_dest(b, dest, nir_b2f32(b, nir_fge(b, src[0], src[1])));
407 }
408
409 static void
410 ptn_xpd(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
411 {
412 ptn_move_dest_masked(b, dest,
413 nir_fsub(b,
414 nir_fmul(b,
415 nir_swizzle(b, src[0], SWIZ(Y, Z, X, W), 3, true),
416 nir_swizzle(b, src[1], SWIZ(Z, X, Y, W), 3, true)),
417 nir_fmul(b,
418 nir_swizzle(b, src[1], SWIZ(Y, Z, X, W), 3, true),
419 nir_swizzle(b, src[0], SWIZ(Z, X, Y, W), 3, true))),
420 WRITEMASK_XYZ);
421 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
422 }
423
424 static void
425 ptn_dp2(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
426 {
427 ptn_move_dest(b, dest, nir_fdot2(b, src[0], src[1]));
428 }
429
430 static void
431 ptn_dp3(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
432 {
433 ptn_move_dest(b, dest, nir_fdot3(b, src[0], src[1]));
434 }
435
436 static void
437 ptn_dp4(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
438 {
439 ptn_move_dest(b, dest, nir_fdot4(b, src[0], src[1]));
440 }
441
442 static void
443 ptn_dph(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
444 {
445 ptn_move_dest(b, dest, nir_fdph(b, src[0], src[1]));
446 }
447
448 static void
449 ptn_cmp(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
450 {
451 ptn_move_dest(b, dest, nir_bcsel(b,
452 nir_flt(b, src[0], nir_imm_float(b, 0.0)),
453 src[1], src[2]));
454 }
455
456 static void
457 ptn_lrp(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
458 {
459 ptn_move_dest(b, dest, nir_flrp(b, src[2], src[1], src[0]));
460 }
461
462 static void
463 ptn_kil(nir_builder *b, nir_ssa_def **src)
464 {
465 nir_ssa_def *cmp = nir_bany(b, nir_flt(b, src[0], nir_imm_float(b, 0.0)));
466
467 nir_intrinsic_instr *discard =
468 nir_intrinsic_instr_create(b->shader, nir_intrinsic_discard_if);
469 discard->src[0] = nir_src_for_ssa(cmp);
470 nir_builder_instr_insert(b, &discard->instr);
471 }
472
473 static void
474 ptn_tex(struct ptn_compile *c, nir_alu_dest dest, nir_ssa_def **src,
475 struct prog_instruction *prog_inst)
476 {
477 nir_builder *b = &c->build;
478 nir_tex_instr *instr;
479 nir_texop op;
480 unsigned num_srcs;
481
482 switch (prog_inst->Opcode) {
483 case OPCODE_TEX:
484 op = nir_texop_tex;
485 num_srcs = 1;
486 break;
487 case OPCODE_TXB:
488 op = nir_texop_txb;
489 num_srcs = 2;
490 break;
491 case OPCODE_TXD:
492 op = nir_texop_txd;
493 num_srcs = 3;
494 break;
495 case OPCODE_TXL:
496 op = nir_texop_txl;
497 num_srcs = 2;
498 break;
499 case OPCODE_TXP:
500 op = nir_texop_tex;
501 num_srcs = 2;
502 break;
503 default:
504 fprintf(stderr, "unknown tex op %d\n", prog_inst->Opcode);
505 abort();
506 }
507
508 /* Deref sources */
509 num_srcs += 2;
510
511 if (prog_inst->TexShadow)
512 num_srcs++;
513
514 instr = nir_tex_instr_create(b->shader, num_srcs);
515 instr->op = op;
516 instr->dest_type = nir_type_float;
517 instr->is_shadow = prog_inst->TexShadow;
518
519 switch (prog_inst->TexSrcTarget) {
520 case TEXTURE_1D_INDEX:
521 instr->sampler_dim = GLSL_SAMPLER_DIM_1D;
522 break;
523 case TEXTURE_2D_INDEX:
524 instr->sampler_dim = GLSL_SAMPLER_DIM_2D;
525 break;
526 case TEXTURE_3D_INDEX:
527 instr->sampler_dim = GLSL_SAMPLER_DIM_3D;
528 break;
529 case TEXTURE_CUBE_INDEX:
530 instr->sampler_dim = GLSL_SAMPLER_DIM_CUBE;
531 break;
532 case TEXTURE_RECT_INDEX:
533 instr->sampler_dim = GLSL_SAMPLER_DIM_RECT;
534 break;
535 default:
536 fprintf(stderr, "Unknown texture target %d\n", prog_inst->TexSrcTarget);
537 abort();
538 }
539
540 switch (instr->sampler_dim) {
541 case GLSL_SAMPLER_DIM_1D:
542 case GLSL_SAMPLER_DIM_BUF:
543 instr->coord_components = 1;
544 break;
545 case GLSL_SAMPLER_DIM_2D:
546 case GLSL_SAMPLER_DIM_RECT:
547 case GLSL_SAMPLER_DIM_EXTERNAL:
548 case GLSL_SAMPLER_DIM_MS:
549 instr->coord_components = 2;
550 break;
551 case GLSL_SAMPLER_DIM_3D:
552 case GLSL_SAMPLER_DIM_CUBE:
553 instr->coord_components = 3;
554 break;
555 case GLSL_SAMPLER_DIM_SUBPASS:
556 case GLSL_SAMPLER_DIM_SUBPASS_MS:
557 unreachable("can't reach");
558 }
559
560 nir_variable *var = c->sampler_vars[prog_inst->TexSrcUnit];
561 if (!var) {
562 const struct glsl_type *type =
563 glsl_sampler_type(instr->sampler_dim, false, false, GLSL_TYPE_FLOAT);
564 var = nir_variable_create(b->shader, nir_var_uniform, type, "sampler");
565 var->data.binding = prog_inst->TexSrcUnit;
566 var->data.explicit_binding = true;
567 c->sampler_vars[prog_inst->TexSrcUnit] = var;
568 }
569
570 nir_deref_instr *deref = nir_build_deref_var(b, var);
571
572 unsigned src_number = 0;
573
574 instr->src[src_number].src = nir_src_for_ssa(&deref->dest.ssa);
575 instr->src[src_number].src_type = nir_tex_src_texture_deref;
576 src_number++;
577 instr->src[src_number].src = nir_src_for_ssa(&deref->dest.ssa);
578 instr->src[src_number].src_type = nir_tex_src_sampler_deref;
579 src_number++;
580
581 instr->src[src_number].src =
582 nir_src_for_ssa(nir_swizzle(b, src[0], SWIZ(X, Y, Z, W),
583 instr->coord_components, true));
584 instr->src[src_number].src_type = nir_tex_src_coord;
585 src_number++;
586
587 if (prog_inst->Opcode == OPCODE_TXP) {
588 instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
589 instr->src[src_number].src_type = nir_tex_src_projector;
590 src_number++;
591 }
592
593 if (prog_inst->Opcode == OPCODE_TXB) {
594 instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
595 instr->src[src_number].src_type = nir_tex_src_bias;
596 src_number++;
597 }
598
599 if (prog_inst->Opcode == OPCODE_TXL) {
600 instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
601 instr->src[src_number].src_type = nir_tex_src_lod;
602 src_number++;
603 }
604
605 if (instr->is_shadow) {
606 if (instr->coord_components < 3)
607 instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], Z));
608 else
609 instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
610
611 instr->src[src_number].src_type = nir_tex_src_comparator;
612 src_number++;
613 }
614
615 assert(src_number == num_srcs);
616
617 nir_ssa_dest_init(&instr->instr, &instr->dest, 4, 32, NULL);
618 nir_builder_instr_insert(b, &instr->instr);
619
620 /* Resolve the writemask on the texture op. */
621 ptn_move_dest(b, dest, &instr->dest.ssa);
622 }
623
624 static const nir_op op_trans[MAX_OPCODE] = {
625 [OPCODE_NOP] = 0,
626 [OPCODE_ABS] = nir_op_fabs,
627 [OPCODE_ADD] = nir_op_fadd,
628 [OPCODE_ARL] = 0,
629 [OPCODE_CMP] = 0,
630 [OPCODE_COS] = 0,
631 [OPCODE_DDX] = nir_op_fddx,
632 [OPCODE_DDY] = nir_op_fddy,
633 [OPCODE_DP2] = 0,
634 [OPCODE_DP3] = 0,
635 [OPCODE_DP4] = 0,
636 [OPCODE_DPH] = 0,
637 [OPCODE_DST] = 0,
638 [OPCODE_END] = 0,
639 [OPCODE_EX2] = 0,
640 [OPCODE_EXP] = 0,
641 [OPCODE_FLR] = nir_op_ffloor,
642 [OPCODE_FRC] = nir_op_ffract,
643 [OPCODE_LG2] = 0,
644 [OPCODE_LIT] = 0,
645 [OPCODE_LOG] = 0,
646 [OPCODE_LRP] = 0,
647 [OPCODE_MAD] = 0,
648 [OPCODE_MAX] = nir_op_fmax,
649 [OPCODE_MIN] = nir_op_fmin,
650 [OPCODE_MOV] = nir_op_fmov,
651 [OPCODE_MUL] = nir_op_fmul,
652 [OPCODE_POW] = 0,
653 [OPCODE_RCP] = 0,
654
655 [OPCODE_RSQ] = 0,
656 [OPCODE_SCS] = 0,
657 [OPCODE_SGE] = 0,
658 [OPCODE_SIN] = 0,
659 [OPCODE_SLT] = 0,
660 [OPCODE_SSG] = nir_op_fsign,
661 [OPCODE_SUB] = nir_op_fsub,
662 [OPCODE_SWZ] = 0,
663 [OPCODE_TEX] = 0,
664 [OPCODE_TRUNC] = nir_op_ftrunc,
665 [OPCODE_TXB] = 0,
666 [OPCODE_TXD] = 0,
667 [OPCODE_TXL] = 0,
668 [OPCODE_TXP] = 0,
669 [OPCODE_XPD] = 0,
670 };
671
672 static void
673 ptn_emit_instruction(struct ptn_compile *c, struct prog_instruction *prog_inst)
674 {
675 nir_builder *b = &c->build;
676 unsigned i;
677 const unsigned op = prog_inst->Opcode;
678
679 if (op == OPCODE_END)
680 return;
681
682 nir_ssa_def *src[3];
683 for (i = 0; i < 3; i++) {
684 src[i] = ptn_get_src(c, &prog_inst->SrcReg[i]);
685 }
686 nir_alu_dest dest = ptn_get_dest(c, &prog_inst->DstReg);
687 if (c->error)
688 return;
689
690 switch (op) {
691 case OPCODE_RSQ:
692 ptn_move_dest(b, dest,
693 nir_frsq(b, nir_fabs(b, ptn_channel(b, src[0], X))));
694 break;
695
696 case OPCODE_RCP:
697 ptn_move_dest(b, dest, nir_frcp(b, ptn_channel(b, src[0], X)));
698 break;
699
700 case OPCODE_EX2:
701 ptn_move_dest(b, dest, nir_fexp2(b, ptn_channel(b, src[0], X)));
702 break;
703
704 case OPCODE_LG2:
705 ptn_move_dest(b, dest, nir_flog2(b, ptn_channel(b, src[0], X)));
706 break;
707
708 case OPCODE_POW:
709 ptn_move_dest(b, dest, nir_fpow(b,
710 ptn_channel(b, src[0], X),
711 ptn_channel(b, src[1], X)));
712 break;
713
714 case OPCODE_COS:
715 ptn_move_dest(b, dest, nir_fcos(b, ptn_channel(b, src[0], X)));
716 break;
717
718 case OPCODE_SIN:
719 ptn_move_dest(b, dest, nir_fsin(b, ptn_channel(b, src[0], X)));
720 break;
721
722 case OPCODE_ARL:
723 ptn_arl(b, dest, src);
724 break;
725
726 case OPCODE_EXP:
727 ptn_exp(b, dest, src);
728 break;
729
730 case OPCODE_LOG:
731 ptn_log(b, dest, src);
732 break;
733
734 case OPCODE_LRP:
735 ptn_lrp(b, dest, src);
736 break;
737
738 case OPCODE_MAD:
739 ptn_move_dest(b, dest, nir_fadd(b, nir_fmul(b, src[0], src[1]), src[2]));
740 break;
741
742 case OPCODE_DST:
743 ptn_dst(b, dest, src);
744 break;
745
746 case OPCODE_LIT:
747 ptn_lit(b, dest, src);
748 break;
749
750 case OPCODE_XPD:
751 ptn_xpd(b, dest, src);
752 break;
753
754 case OPCODE_DP2:
755 ptn_dp2(b, dest, src);
756 break;
757
758 case OPCODE_DP3:
759 ptn_dp3(b, dest, src);
760 break;
761
762 case OPCODE_DP4:
763 ptn_dp4(b, dest, src);
764 break;
765
766 case OPCODE_DPH:
767 ptn_dph(b, dest, src);
768 break;
769
770 case OPCODE_KIL:
771 ptn_kil(b, src);
772 break;
773
774 case OPCODE_CMP:
775 ptn_cmp(b, dest, src);
776 break;
777
778 case OPCODE_SCS:
779 ptn_scs(b, dest, src);
780 break;
781
782 case OPCODE_SLT:
783 ptn_slt(b, dest, src);
784 break;
785
786 case OPCODE_SGE:
787 ptn_sge(b, dest, src);
788 break;
789
790 case OPCODE_TEX:
791 case OPCODE_TXB:
792 case OPCODE_TXD:
793 case OPCODE_TXL:
794 case OPCODE_TXP:
795 ptn_tex(c, dest, src, prog_inst);
796 break;
797
798 case OPCODE_SWZ:
799 /* Extended swizzles were already handled in ptn_get_src(). */
800 ptn_alu(b, nir_op_fmov, dest, src);
801 break;
802
803 case OPCODE_NOP:
804 break;
805
806 default:
807 if (op_trans[op] != 0) {
808 ptn_alu(b, op_trans[op], dest, src);
809 } else {
810 fprintf(stderr, "unknown opcode: %s\n", _mesa_opcode_string(op));
811 abort();
812 }
813 break;
814 }
815
816 if (prog_inst->Saturate) {
817 assert(prog_inst->Saturate);
818 assert(!dest.dest.is_ssa);
819 ptn_move_dest(b, dest, nir_fsat(b, ptn_src_for_dest(c, &dest)));
820 }
821 }
822
823 /**
824 * Puts a NIR intrinsic to store of each PROGRAM_OUTPUT value to the output
825 * variables at the end of the shader.
826 *
827 * We don't generate these incrementally as the PROGRAM_OUTPUT values are
828 * written, because there's no output load intrinsic, which means we couldn't
829 * handle writemasks.
830 */
831 static void
832 ptn_add_output_stores(struct ptn_compile *c)
833 {
834 nir_builder *b = &c->build;
835
836 nir_foreach_variable(var, &b->shader->outputs) {
837 nir_ssa_def *src = nir_load_reg(b, c->output_regs[var->data.location]);
838 if (c->prog->Target == GL_FRAGMENT_PROGRAM_ARB &&
839 var->data.location == FRAG_RESULT_DEPTH) {
840 /* result.depth has this strange convention of being the .z component of
841 * a vec4 with undefined .xyw components. We resolve it to a scalar, to
842 * match GLSL's gl_FragDepth and the expectations of most backends.
843 */
844 src = nir_channel(b, src, 2);
845 }
846 if (c->prog->Target == GL_VERTEX_PROGRAM_ARB &&
847 var->data.location == VARYING_SLOT_FOGC) {
848 /* result.fogcoord is a single component value */
849 src = nir_channel(b, src, 0);
850 }
851 unsigned num_components = glsl_get_vector_elements(var->type);
852 nir_store_var(b, var, src, (1 << num_components) - 1);
853 }
854 }
855
856 static void
857 setup_registers_and_variables(struct ptn_compile *c)
858 {
859 nir_builder *b = &c->build;
860 struct nir_shader *shader = b->shader;
861
862 /* Create input variables. */
863 uint64_t inputs_read = c->prog->info.inputs_read;
864 while (inputs_read) {
865 const int i = u_bit_scan64(&inputs_read);
866
867 nir_variable *var =
868 nir_variable_create(shader, nir_var_shader_in, glsl_vec4_type(),
869 ralloc_asprintf(shader, "in_%d", i));
870 var->data.location = i;
871 var->data.index = 0;
872
873 if (c->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
874 if (i == VARYING_SLOT_FOGC) {
875 /* fogcoord is defined as <f, 0.0, 0.0, 1.0>. Make the actual
876 * input variable a float, and create a local containing the
877 * full vec4 value.
878 */
879 var->type = glsl_float_type();
880
881 nir_variable *fullvar =
882 nir_local_variable_create(b->impl, glsl_vec4_type(),
883 "fogcoord_tmp");
884
885 nir_store_var(b, fullvar,
886 nir_vec4(b, nir_load_var(b, var),
887 nir_imm_float(b, 0.0),
888 nir_imm_float(b, 0.0),
889 nir_imm_float(b, 1.0)),
890 WRITEMASK_XYZW);
891
892 /* We inserted the real input into the list so the driver has real
893 * inputs, but we set c->input_vars[i] to the temporary so we use
894 * the splatted value.
895 */
896 c->input_vars[i] = fullvar;
897 continue;
898 }
899 }
900
901 c->input_vars[i] = var;
902 }
903
904 /* Create system value variables */
905 uint64_t system_values_read = c->prog->info.system_values_read;
906 while (system_values_read) {
907 const int i = u_bit_scan64(&system_values_read);
908
909 nir_variable *var =
910 nir_variable_create(shader, nir_var_system_value, glsl_vec4_type(),
911 ralloc_asprintf(shader, "sv_%d", i));
912 var->data.location = i;
913 var->data.index = 0;
914
915 c->sysval_vars[i] = var;
916 }
917
918 /* Create output registers and variables. */
919 int max_outputs = util_last_bit(c->prog->info.outputs_written);
920 c->output_regs = rzalloc_array(c, nir_register *, max_outputs);
921
922 uint64_t outputs_written = c->prog->info.outputs_written;
923 while (outputs_written) {
924 const int i = u_bit_scan64(&outputs_written);
925
926 /* Since we can't load from outputs in the IR, we make temporaries
927 * for the outputs and emit stores to the real outputs at the end of
928 * the shader.
929 */
930 nir_register *reg = nir_local_reg_create(b->impl);
931 reg->num_components = 4;
932
933 nir_variable *var = rzalloc(shader, nir_variable);
934 if ((c->prog->Target == GL_FRAGMENT_PROGRAM_ARB && i == FRAG_RESULT_DEPTH) ||
935 (c->prog->Target == GL_VERTEX_PROGRAM_ARB && i == VARYING_SLOT_FOGC))
936 var->type = glsl_float_type();
937 else
938 var->type = glsl_vec4_type();
939 var->data.mode = nir_var_shader_out;
940 var->name = ralloc_asprintf(var, "out_%d", i);
941
942 var->data.location = i;
943 var->data.index = 0;
944
945 c->output_regs[i] = reg;
946
947 exec_list_push_tail(&shader->outputs, &var->node);
948 c->output_vars[i] = var;
949 }
950
951 /* Create temporary registers. */
952 c->temp_regs = rzalloc_array(c, nir_register *,
953 c->prog->arb.NumTemporaries);
954
955 nir_register *reg;
956 for (unsigned i = 0; i < c->prog->arb.NumTemporaries; i++) {
957 reg = nir_local_reg_create(b->impl);
958 if (!reg) {
959 c->error = true;
960 return;
961 }
962 reg->num_components = 4;
963 c->temp_regs[i] = reg;
964 }
965
966 /* Create the address register (for ARB_vertex_program). */
967 reg = nir_local_reg_create(b->impl);
968 if (!reg) {
969 c->error = true;
970 return;
971 }
972 reg->num_components = 1;
973 c->addr_reg = reg;
974 }
975
976 struct nir_shader *
977 prog_to_nir(const struct gl_program *prog,
978 const nir_shader_compiler_options *options)
979 {
980 struct ptn_compile *c;
981 struct nir_shader *s;
982 gl_shader_stage stage = _mesa_program_enum_to_shader_stage(prog->Target);
983
984 c = rzalloc(NULL, struct ptn_compile);
985 if (!c)
986 return NULL;
987 c->prog = prog;
988
989 nir_builder_init_simple_shader(&c->build, NULL, stage, options);
990
991 /* Copy the shader_info from the gl_program */
992 c->build.shader->info = prog->info;
993
994 s = c->build.shader;
995
996 if (prog->Parameters->NumParameters > 0) {
997 const struct glsl_type *type =
998 glsl_array_type(glsl_vec4_type(), prog->Parameters->NumParameters, 0);
999 c->parameters =
1000 nir_variable_create(s, nir_var_uniform, type,
1001 prog->Parameters->Parameters[0].Name);
1002 }
1003
1004 setup_registers_and_variables(c);
1005 if (unlikely(c->error))
1006 goto fail;
1007
1008 for (unsigned int i = 0; i < prog->arb.NumInstructions; i++) {
1009 ptn_emit_instruction(c, &prog->arb.Instructions[i]);
1010
1011 if (unlikely(c->error))
1012 break;
1013 }
1014
1015 ptn_add_output_stores(c);
1016
1017 s->info.name = ralloc_asprintf(s, "ARB%d", prog->Id);
1018 s->info.num_textures = util_last_bit(prog->SamplersUsed);
1019 s->info.num_ubos = 0;
1020 s->info.num_abos = 0;
1021 s->info.num_ssbos = 0;
1022 s->info.num_images = 0;
1023 s->info.uses_texture_gather = false;
1024 s->info.clip_distance_array_size = 0;
1025 s->info.cull_distance_array_size = 0;
1026 s->info.separate_shader = false;
1027
1028 fail:
1029 if (c->error) {
1030 ralloc_free(s);
1031 s = NULL;
1032 }
1033 ralloc_free(c);
1034 return s;
1035 }