Fix an unused variable warning
[mesa.git] / src / mesa / program / prog_to_nir.c
1 /*
2 * Copyright © 2015 Intel Corporation
3 * Copyright © 2014-2015 Broadcom
4 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
23 * IN THE SOFTWARE.
24 */
25
26 #include "nir/nir.h"
27 #include "nir/nir_builder.h"
28 #include "glsl/list.h"
29 #include "main/imports.h"
30 #include "util/ralloc.h"
31
32 #include "prog_to_nir.h"
33 #include "prog_instruction.h"
34 #include "prog_parameter.h"
35 #include "prog_print.h"
36
37 /**
38 * \file prog_to_nir.c
39 *
40 * A translator from Mesa IR (prog_instruction.h) to NIR. This is primarily
41 * intended to support ARB_vertex_program, ARB_fragment_program, and fixed-function
42 * vertex processing. Full GLSL support should use glsl_to_nir instead.
43 */
44
45 struct ptn_compile {
46 const struct gl_program *prog;
47 nir_builder build;
48 bool error;
49
50 nir_variable *parameters;
51 nir_variable *input_vars[VARYING_SLOT_MAX];
52 nir_variable *output_vars[VARYING_SLOT_MAX];
53 nir_register **output_regs;
54 nir_register **temp_regs;
55
56 nir_register *addr_reg;
57 };
58
59 #define SWIZ(X, Y, Z, W) \
60 (unsigned[4]){ SWIZZLE_##X, SWIZZLE_##Y, SWIZZLE_##Z, SWIZZLE_##W }
61 #define ptn_swizzle(b, src, x, y, z, w) nir_swizzle(b, src, SWIZ(x, y, z, w), 4, true)
62 #define ptn_channel(b, src, ch) nir_swizzle(b, src, SWIZ(ch, ch, ch, ch), 1, true)
63
64 static nir_ssa_def *
65 ptn_src_for_dest(struct ptn_compile *c, nir_alu_dest *dest)
66 {
67 nir_builder *b = &c->build;
68
69 nir_alu_src src;
70 memset(&src, 0, sizeof(src));
71
72 if (dest->dest.is_ssa)
73 src.src = nir_src_for_ssa(&dest->dest.ssa);
74 else {
75 assert(!dest->dest.reg.indirect);
76 src.src = nir_src_for_reg(dest->dest.reg.reg);
77 src.src.reg.base_offset = dest->dest.reg.base_offset;
78 }
79
80 for (int i = 0; i < 4; i++)
81 src.swizzle[i] = i;
82
83 return nir_fmov_alu(b, src, 4);
84 }
85
86 static nir_alu_dest
87 ptn_get_dest(struct ptn_compile *c, const struct prog_dst_register *prog_dst)
88 {
89 nir_alu_dest dest;
90
91 memset(&dest, 0, sizeof(dest));
92
93 switch (prog_dst->File) {
94 case PROGRAM_TEMPORARY:
95 dest.dest.reg.reg = c->temp_regs[prog_dst->Index];
96 break;
97 case PROGRAM_OUTPUT:
98 dest.dest.reg.reg = c->output_regs[prog_dst->Index];
99 break;
100 case PROGRAM_ADDRESS:
101 assert(prog_dst->Index == 0);
102 dest.dest.reg.reg = c->addr_reg;
103 break;
104 case PROGRAM_UNDEFINED:
105 break;
106 }
107
108 dest.write_mask = prog_dst->WriteMask;
109 dest.saturate = false;
110
111 assert(!prog_dst->RelAddr);
112
113 return dest;
114 }
115
116 static nir_ssa_def *
117 ptn_get_src(struct ptn_compile *c, const struct prog_src_register *prog_src)
118 {
119 nir_builder *b = &c->build;
120 nir_alu_src src;
121
122 memset(&src, 0, sizeof(src));
123
124 switch (prog_src->File) {
125 case PROGRAM_UNDEFINED:
126 return nir_imm_float(b, 0.0);
127 case PROGRAM_TEMPORARY:
128 assert(!prog_src->RelAddr && prog_src->Index >= 0);
129 src.src.reg.reg = c->temp_regs[prog_src->Index];
130 break;
131 case PROGRAM_INPUT: {
132 /* ARB_vertex_program doesn't allow relative addressing on vertex
133 * attributes; ARB_fragment_program has no relative addressing at all.
134 */
135 assert(!prog_src->RelAddr);
136
137 assert(prog_src->Index >= 0 && prog_src->Index < VARYING_SLOT_MAX);
138
139 nir_intrinsic_instr *load =
140 nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_var);
141 load->num_components = 4;
142 load->variables[0] = nir_deref_var_create(load, c->input_vars[prog_src->Index]);
143
144 nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL);
145 nir_instr_insert_after_cf_list(b->cf_node_list, &load->instr);
146
147 src.src = nir_src_for_ssa(&load->dest.ssa);
148 break;
149 }
150 case PROGRAM_STATE_VAR:
151 case PROGRAM_CONSTANT: {
152 /* We actually want to look at the type in the Parameters list for this,
153 * because it lets us upload constant builtin uniforms as actual
154 * constants.
155 */
156 struct gl_program_parameter_list *plist = c->prog->Parameters;
157 gl_register_file file = prog_src->RelAddr ? prog_src->File :
158 plist->Parameters[prog_src->Index].Type;
159
160 switch (file) {
161 case PROGRAM_CONSTANT:
162 if ((c->prog->IndirectRegisterFiles & (1 << PROGRAM_CONSTANT)) == 0) {
163 float *v = (float *) plist->ParameterValues[prog_src->Index];
164 src.src = nir_src_for_ssa(nir_imm_vec4(b, v[0], v[1], v[2], v[3]));
165 break;
166 }
167 /* FALLTHROUGH */
168 case PROGRAM_STATE_VAR: {
169 nir_intrinsic_instr *load =
170 nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_var);
171 nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL);
172 load->num_components = 4;
173
174 load->variables[0] = nir_deref_var_create(load, c->parameters);
175 nir_deref_array *deref_arr =
176 nir_deref_array_create(load->variables[0]);
177 deref_arr->deref.type = glsl_vec4_type();
178 load->variables[0]->deref.child = &deref_arr->deref;
179
180 if (prog_src->RelAddr) {
181 deref_arr->deref_array_type = nir_deref_array_type_indirect;
182
183 nir_alu_src addr_src = { NIR_SRC_INIT };
184 addr_src.src = nir_src_for_reg(c->addr_reg);
185 nir_ssa_def *reladdr = nir_imov_alu(b, addr_src, 1);
186
187 if (prog_src->Index < 0) {
188 /* This is a negative offset which should be added to the address
189 * register's value.
190 */
191 reladdr = nir_iadd(b, reladdr, nir_imm_int(b, prog_src->Index));
192
193 deref_arr->base_offset = 0;
194 } else {
195 deref_arr->base_offset = prog_src->Index;
196 }
197 deref_arr->indirect = nir_src_for_ssa(reladdr);
198 } else {
199 deref_arr->deref_array_type = nir_deref_array_type_direct;
200 deref_arr->base_offset = prog_src->Index;
201 }
202
203 nir_instr_insert_after_cf_list(b->cf_node_list, &load->instr);
204
205 src.src = nir_src_for_ssa(&load->dest.ssa);
206 break;
207 }
208 default:
209 fprintf(stderr, "bad uniform src register file: %s (%d)\n",
210 _mesa_register_file_name(file), file);
211 abort();
212 }
213 break;
214 }
215 default:
216 fprintf(stderr, "unknown src register file: %s (%d)\n",
217 _mesa_register_file_name(prog_src->File), prog_src->File);
218 abort();
219 }
220
221 nir_ssa_def *def;
222 if (!HAS_EXTENDED_SWIZZLE(prog_src->Swizzle) &&
223 (prog_src->Negate == NEGATE_NONE || prog_src->Negate == NEGATE_XYZW)) {
224 /* The simple non-SWZ case. */
225 for (int i = 0; i < 4; i++)
226 src.swizzle[i] = GET_SWZ(prog_src->Swizzle, i);
227
228 def = nir_fmov_alu(b, src, 4);
229
230 if (prog_src->Abs)
231 def = nir_fabs(b, def);
232
233 if (prog_src->Negate)
234 def = nir_fneg(b, def);
235 } else {
236 /* The SWZ instruction allows per-component zero/one swizzles, and also
237 * per-component negation.
238 */
239 nir_ssa_def *chans[4];
240 for (int i = 0; i < 4; i++) {
241 int swizzle = GET_SWZ(prog_src->Swizzle, i);
242 if (swizzle == SWIZZLE_ZERO) {
243 chans[i] = nir_imm_float(b, 0.0);
244 } else if (swizzle == SWIZZLE_ONE) {
245 chans[i] = nir_imm_float(b, 1.0);
246 } else {
247 assert(swizzle != SWIZZLE_NIL);
248 nir_alu_instr *mov = nir_alu_instr_create(b->shader, nir_op_fmov);
249 nir_ssa_dest_init(&mov->instr, &mov->dest.dest, 1, NULL);
250 mov->dest.write_mask = 0x1;
251 mov->src[0] = src;
252 mov->src[0].swizzle[0] = swizzle;
253 nir_instr_insert_after_cf_list(b->cf_node_list, &mov->instr);
254
255 chans[i] = &mov->dest.dest.ssa;
256 }
257
258 if (prog_src->Abs)
259 chans[i] = nir_fabs(b, chans[i]);
260
261 if (prog_src->Negate & (1 << i))
262 chans[i] = nir_fneg(b, chans[i]);
263 }
264 def = nir_vec4(b, chans[0], chans[1], chans[2], chans[3]);
265 }
266
267 return def;
268 }
269
270 static void
271 ptn_alu(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
272 {
273 unsigned num_srcs = nir_op_infos[op].num_inputs;
274 nir_alu_instr *instr = nir_alu_instr_create(b->shader, op);
275 unsigned i;
276
277 for (i = 0; i < num_srcs; i++)
278 instr->src[i].src = nir_src_for_ssa(src[i]);
279
280 instr->dest = dest;
281 nir_instr_insert_after_cf_list(b->cf_node_list, &instr->instr);
282 }
283
284 static void
285 ptn_move_dest_masked(nir_builder *b, nir_alu_dest dest,
286 nir_ssa_def *def, unsigned write_mask)
287 {
288 if (!(dest.write_mask & write_mask))
289 return;
290
291 nir_alu_instr *mov = nir_alu_instr_create(b->shader, nir_op_fmov);
292 if (!mov)
293 return;
294
295 mov->dest = dest;
296 mov->dest.write_mask &= write_mask;
297 mov->src[0].src = nir_src_for_ssa(def);
298 for (unsigned i = def->num_components; i < 4; i++)
299 mov->src[0].swizzle[i] = def->num_components - 1;
300 nir_instr_insert_after_cf_list(b->cf_node_list, &mov->instr);
301 }
302
303 static void
304 ptn_move_dest(nir_builder *b, nir_alu_dest dest, nir_ssa_def *def)
305 {
306 ptn_move_dest_masked(b, dest, def, WRITEMASK_XYZW);
307 }
308
309 static void
310 ptn_arl(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
311 {
312 ptn_move_dest(b, dest, nir_f2i(b, nir_ffloor(b, src[0])));
313 }
314
315 /* EXP - Approximate Exponential Base 2
316 * dst.x = 2^{\lfloor src.x\rfloor}
317 * dst.y = src.x - \lfloor src.x\rfloor
318 * dst.z = 2^{src.x}
319 * dst.w = 1.0
320 */
321 static void
322 ptn_exp(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
323 {
324 nir_ssa_def *srcx = ptn_channel(b, src[0], X);
325
326 ptn_move_dest_masked(b, dest, nir_fexp2(b, nir_ffloor(b, srcx)), WRITEMASK_X);
327 ptn_move_dest_masked(b, dest, nir_fsub(b, srcx, nir_ffloor(b, srcx)), WRITEMASK_Y);
328 ptn_move_dest_masked(b, dest, nir_fexp2(b, srcx), WRITEMASK_Z);
329 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
330 }
331
332 /* LOG - Approximate Logarithm Base 2
333 * dst.x = \lfloor\log_2{|src.x|}\rfloor
334 * dst.y = |src.x| * 2^{-\lfloor\log_2{|src.x|}\rfloor}}
335 * dst.z = \log_2{|src.x|}
336 * dst.w = 1.0
337 */
338 static void
339 ptn_log(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
340 {
341 nir_ssa_def *abs_srcx = nir_fabs(b, ptn_channel(b, src[0], X));
342 nir_ssa_def *log2 = nir_flog2(b, abs_srcx);
343 nir_ssa_def *floor_log2 = nir_ffloor(b, log2);
344
345 ptn_move_dest_masked(b, dest, floor_log2, WRITEMASK_X);
346 ptn_move_dest_masked(b, dest,
347 nir_fmul(b, abs_srcx,
348 nir_fexp2(b, nir_fneg(b, floor_log2))),
349 WRITEMASK_Y);
350 ptn_move_dest_masked(b, dest, log2, WRITEMASK_Z);
351 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
352 }
353
354 /* DST - Distance Vector
355 * dst.x = 1.0
356 * dst.y = src0.y \times src1.y
357 * dst.z = src0.z
358 * dst.w = src1.w
359 */
360 static void
361 ptn_dst(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
362 {
363 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_X);
364 ptn_move_dest_masked(b, dest, nir_fmul(b, src[0], src[1]), WRITEMASK_Y);
365 ptn_move_dest_masked(b, dest, nir_fmov(b, src[0]), WRITEMASK_Z);
366 ptn_move_dest_masked(b, dest, nir_fmov(b, src[1]), WRITEMASK_W);
367 }
368
369 /* LIT - Light Coefficients
370 * dst.x = 1.0
371 * dst.y = max(src.x, 0.0)
372 * dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0
373 * dst.w = 1.0
374 */
375 static void
376 ptn_lit(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
377 {
378 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_XW);
379
380 ptn_move_dest_masked(b, dest, nir_fmax(b, ptn_channel(b, src[0], X),
381 nir_imm_float(b, 0.0)), WRITEMASK_Y);
382
383 if (dest.write_mask & WRITEMASK_Z) {
384 nir_ssa_def *src0_y = ptn_channel(b, src[0], Y);
385 nir_ssa_def *wclamp = nir_fmax(b, nir_fmin(b, ptn_channel(b, src[0], W),
386 nir_imm_float(b, 128.0)),
387 nir_imm_float(b, -128.0));
388 nir_ssa_def *pow = nir_fpow(b, nir_fmax(b, src0_y, nir_imm_float(b, 0.0)),
389 wclamp);
390
391 nir_ssa_def *z;
392 if (b->shader->options->native_integers) {
393 z = nir_bcsel(b,
394 nir_fge(b, nir_imm_float(b, 0.0), ptn_channel(b, src[0], X)),
395 nir_imm_float(b, 0.0),
396 pow);
397 } else {
398 z = nir_fcsel(b,
399 nir_sge(b, nir_imm_float(b, 0.0), ptn_channel(b, src[0], X)),
400 nir_imm_float(b, 0.0),
401 pow);
402 }
403
404 ptn_move_dest_masked(b, dest, z, WRITEMASK_Z);
405 }
406 }
407
408 /* SCS - Sine Cosine
409 * dst.x = \cos{src.x}
410 * dst.y = \sin{src.x}
411 * dst.z = 0.0
412 * dst.w = 1.0
413 */
414 static void
415 ptn_scs(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
416 {
417 ptn_move_dest_masked(b, dest, nir_fcos(b, ptn_channel(b, src[0], X)),
418 WRITEMASK_X);
419 ptn_move_dest_masked(b, dest, nir_fsin(b, ptn_channel(b, src[0], X)),
420 WRITEMASK_Y);
421 ptn_move_dest_masked(b, dest, nir_imm_float(b, 0.0), WRITEMASK_Z);
422 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
423 }
424
425 /**
426 * Emit SLT. For platforms with integers, prefer b2f(flt(...)).
427 */
428 static void
429 ptn_slt(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
430 {
431 if (b->shader->options->native_integers) {
432 ptn_move_dest(b, dest, nir_b2f(b, nir_flt(b, src[0], src[1])));
433 } else {
434 ptn_move_dest(b, dest, nir_slt(b, src[0], src[1]));
435 }
436 }
437
438 /**
439 * Emit SGE. For platforms with integers, prefer b2f(fge(...)).
440 */
441 static void
442 ptn_sge(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
443 {
444 if (b->shader->options->native_integers) {
445 ptn_move_dest(b, dest, nir_b2f(b, nir_fge(b, src[0], src[1])));
446 } else {
447 ptn_move_dest(b, dest, nir_sge(b, src[0], src[1]));
448 }
449 }
450
451 static void
452 ptn_sle(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
453 {
454 nir_ssa_def *commuted[] = { src[1], src[0] };
455 ptn_sge(b, dest, commuted);
456 }
457
458 static void
459 ptn_sgt(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
460 {
461 nir_ssa_def *commuted[] = { src[1], src[0] };
462 ptn_slt(b, dest, commuted);
463 }
464
465 /**
466 * Emit SEQ. For platforms with integers, prefer b2f(feq(...)).
467 */
468 static void
469 ptn_seq(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
470 {
471 if (b->shader->options->native_integers) {
472 ptn_move_dest(b, dest, nir_b2f(b, nir_feq(b, src[0], src[1])));
473 } else {
474 ptn_move_dest(b, dest, nir_seq(b, src[0], src[1]));
475 }
476 }
477
478 /**
479 * Emit SNE. For platforms with integers, prefer b2f(fne(...)).
480 */
481 static void
482 ptn_sne(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
483 {
484 if (b->shader->options->native_integers) {
485 ptn_move_dest(b, dest, nir_b2f(b, nir_fne(b, src[0], src[1])));
486 } else {
487 ptn_move_dest(b, dest, nir_sne(b, src[0], src[1]));
488 }
489 }
490
491 static void
492 ptn_xpd(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
493 {
494 ptn_move_dest_masked(b, dest,
495 nir_fsub(b,
496 nir_fmul(b,
497 ptn_swizzle(b, src[0], Y, Z, X, X),
498 ptn_swizzle(b, src[1], Z, X, Y, X)),
499 nir_fmul(b,
500 ptn_swizzle(b, src[1], Y, Z, X, X),
501 ptn_swizzle(b, src[0], Z, X, Y, X))),
502 WRITEMASK_XYZ);
503 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
504 }
505
506 static void
507 ptn_dp2(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
508 {
509 ptn_move_dest(b, dest, nir_fdot2(b, src[0], src[1]));
510 }
511
512 static void
513 ptn_dp3(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
514 {
515 ptn_move_dest(b, dest, nir_fdot3(b, src[0], src[1]));
516 }
517
518 static void
519 ptn_dp4(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
520 {
521 ptn_move_dest(b, dest, nir_fdot4(b, src[0], src[1]));
522 }
523
524 static void
525 ptn_dph(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
526 {
527 nir_ssa_def *dp3 = nir_fdot3(b, src[0], src[1]);
528 ptn_move_dest(b, dest, nir_fadd(b, dp3, ptn_channel(b, src[1], W)));
529 }
530
531 static void
532 ptn_cmp(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
533 {
534 if (b->shader->options->native_integers) {
535 ptn_move_dest(b, dest, nir_bcsel(b,
536 nir_flt(b, src[0], nir_imm_float(b, 0.0)),
537 src[1], src[2]));
538 } else {
539 ptn_move_dest(b, dest, nir_fcsel(b,
540 nir_slt(b, src[0], nir_imm_float(b, 0.0)),
541 src[1], src[2]));
542 }
543 }
544
545 static void
546 ptn_lrp(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
547 {
548 ptn_move_dest(b, dest, nir_flrp(b, src[2], src[1], src[0]));
549 }
550
551 static void
552 ptn_kil(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
553 {
554 nir_ssa_def *cmp = b->shader->options->native_integers ?
555 nir_bany4(b, nir_flt(b, src[0], nir_imm_float(b, 0.0))) :
556 nir_fany4(b, nir_slt(b, src[0], nir_imm_float(b, 0.0)));
557
558 nir_intrinsic_instr *discard =
559 nir_intrinsic_instr_create(b->shader, nir_intrinsic_discard_if);
560 discard->src[0] = nir_src_for_ssa(cmp);
561 nir_instr_insert_after_cf_list(b->cf_node_list, &discard->instr);
562 }
563
564 static void
565 ptn_tex(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src,
566 struct prog_instruction *prog_inst)
567 {
568 nir_tex_instr *instr;
569 nir_texop op;
570 unsigned num_srcs;
571
572 switch (prog_inst->Opcode) {
573 case OPCODE_TEX:
574 op = nir_texop_tex;
575 num_srcs = 1;
576 break;
577 case OPCODE_TXB:
578 op = nir_texop_txb;
579 num_srcs = 2;
580 break;
581 case OPCODE_TXD:
582 op = nir_texop_txd;
583 num_srcs = 3;
584 break;
585 case OPCODE_TXL:
586 op = nir_texop_txl;
587 num_srcs = 2;
588 break;
589 case OPCODE_TXP:
590 op = nir_texop_tex;
591 num_srcs = 2;
592 break;
593 case OPCODE_TXP_NV:
594 assert(!"not handled");
595 op = nir_texop_tex;
596 num_srcs = 2;
597 break;
598 default:
599 fprintf(stderr, "unknown tex op %d\n", prog_inst->Opcode);
600 abort();
601 }
602
603 if (prog_inst->TexShadow)
604 num_srcs++;
605
606 instr = nir_tex_instr_create(b->shader, num_srcs);
607 instr->op = op;
608 instr->dest_type = nir_type_float;
609 instr->is_shadow = prog_inst->TexShadow;
610 instr->sampler_index = prog_inst->TexSrcUnit;
611
612 switch (prog_inst->TexSrcTarget) {
613 case TEXTURE_1D_INDEX:
614 instr->sampler_dim = GLSL_SAMPLER_DIM_1D;
615 break;
616 case TEXTURE_2D_INDEX:
617 instr->sampler_dim = GLSL_SAMPLER_DIM_2D;
618 break;
619 case TEXTURE_3D_INDEX:
620 instr->sampler_dim = GLSL_SAMPLER_DIM_3D;
621 break;
622 case TEXTURE_CUBE_INDEX:
623 instr->sampler_dim = GLSL_SAMPLER_DIM_CUBE;
624 break;
625 case TEXTURE_RECT_INDEX:
626 instr->sampler_dim = GLSL_SAMPLER_DIM_RECT;
627 break;
628 default:
629 fprintf(stderr, "Unknown texture target %d\n", prog_inst->TexSrcTarget);
630 abort();
631 }
632
633 switch (instr->sampler_dim) {
634 case GLSL_SAMPLER_DIM_1D:
635 case GLSL_SAMPLER_DIM_BUF:
636 instr->coord_components = 1;
637 break;
638 case GLSL_SAMPLER_DIM_2D:
639 case GLSL_SAMPLER_DIM_RECT:
640 case GLSL_SAMPLER_DIM_EXTERNAL:
641 case GLSL_SAMPLER_DIM_MS:
642 instr->coord_components = 2;
643 break;
644 case GLSL_SAMPLER_DIM_3D:
645 case GLSL_SAMPLER_DIM_CUBE:
646 instr->coord_components = 3;
647 break;
648 }
649
650 unsigned src_number = 0;
651
652 instr->src[src_number].src =
653 nir_src_for_ssa(ptn_swizzle(b, src[0], X, Y, Z, W));
654 instr->src[src_number].src_type = nir_tex_src_coord;
655 src_number++;
656
657 if (prog_inst->Opcode == OPCODE_TXP) {
658 instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
659 instr->src[src_number].src_type = nir_tex_src_projector;
660 src_number++;
661 }
662
663 if (prog_inst->Opcode == OPCODE_TXB) {
664 instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
665 instr->src[src_number].src_type = nir_tex_src_bias;
666 src_number++;
667 }
668
669 if (prog_inst->Opcode == OPCODE_TXL) {
670 instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
671 instr->src[src_number].src_type = nir_tex_src_lod;
672 src_number++;
673 }
674
675 if (instr->is_shadow) {
676 if (instr->coord_components < 3)
677 instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], Z));
678 else
679 instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
680
681 instr->src[src_number].src_type = nir_tex_src_comparitor;
682 src_number++;
683 }
684
685 assert(src_number == num_srcs);
686
687 nir_ssa_dest_init(&instr->instr, &instr->dest, 4, NULL);
688 nir_instr_insert_after_cf_list(b->cf_node_list, &instr->instr);
689
690 /* Resolve the writemask on the texture op. */
691 ptn_move_dest(b, dest, &instr->dest.ssa);
692 }
693
694 static const nir_op op_trans[MAX_OPCODE] = {
695 [OPCODE_NOP] = 0,
696 [OPCODE_ABS] = nir_op_fabs,
697 [OPCODE_ADD] = nir_op_fadd,
698 [OPCODE_ARL] = 0,
699 [OPCODE_CMP] = 0,
700 [OPCODE_COS] = nir_op_fcos,
701 [OPCODE_DDX] = nir_op_fddx,
702 [OPCODE_DDY] = nir_op_fddy,
703 [OPCODE_DP2] = 0,
704 [OPCODE_DP3] = 0,
705 [OPCODE_DP4] = 0,
706 [OPCODE_DPH] = 0,
707 [OPCODE_DST] = 0,
708 [OPCODE_END] = 0,
709 [OPCODE_EX2] = nir_op_fexp2,
710 [OPCODE_EXP] = 0,
711 [OPCODE_FLR] = nir_op_ffloor,
712 [OPCODE_FRC] = nir_op_ffract,
713 [OPCODE_LG2] = nir_op_flog2,
714 [OPCODE_LIT] = 0,
715 [OPCODE_LOG] = 0,
716 [OPCODE_LRP] = 0,
717 [OPCODE_MAD] = nir_op_ffma,
718 [OPCODE_MAX] = nir_op_fmax,
719 [OPCODE_MIN] = nir_op_fmin,
720 [OPCODE_MOV] = nir_op_fmov,
721 [OPCODE_MUL] = nir_op_fmul,
722 [OPCODE_POW] = nir_op_fpow,
723 [OPCODE_RCP] = nir_op_frcp,
724
725 [OPCODE_RSQ] = nir_op_frsq,
726 [OPCODE_SCS] = 0,
727 [OPCODE_SEQ] = 0,
728 [OPCODE_SGE] = 0,
729 [OPCODE_SGT] = 0,
730 [OPCODE_SIN] = nir_op_fsin,
731 [OPCODE_SLE] = 0,
732 [OPCODE_SLT] = 0,
733 [OPCODE_SNE] = 0,
734 [OPCODE_SSG] = nir_op_fsign,
735 [OPCODE_SUB] = nir_op_fsub,
736 [OPCODE_SWZ] = 0,
737 [OPCODE_TEX] = 0,
738 [OPCODE_TRUNC] = nir_op_ftrunc,
739 [OPCODE_TXB] = 0,
740 [OPCODE_TXD] = 0,
741 [OPCODE_TXL] = 0,
742 [OPCODE_TXP] = 0,
743 [OPCODE_TXP_NV] = 0,
744 [OPCODE_XPD] = 0,
745 };
746
747 static void
748 ptn_emit_instruction(struct ptn_compile *c, struct prog_instruction *prog_inst)
749 {
750 nir_builder *b = &c->build;
751 unsigned i;
752 const unsigned op = prog_inst->Opcode;
753
754 if (op == OPCODE_END)
755 return;
756
757 nir_ssa_def *src[3];
758 for (i = 0; i < 3; i++) {
759 src[i] = ptn_get_src(c, &prog_inst->SrcReg[i]);
760 }
761 nir_alu_dest dest = ptn_get_dest(c, &prog_inst->DstReg);
762 if (c->error)
763 return;
764
765 switch (op) {
766 case OPCODE_RSQ:
767 ptn_move_dest(b, dest, nir_frsq(b, ptn_channel(b, src[0], X)));
768 break;
769
770 case OPCODE_RCP:
771 ptn_move_dest(b, dest, nir_frcp(b, ptn_channel(b, src[0], X)));
772 break;
773
774 case OPCODE_EX2:
775 ptn_move_dest(b, dest, nir_fexp2(b, ptn_channel(b, src[0], X)));
776 break;
777
778 case OPCODE_LG2:
779 ptn_move_dest(b, dest, nir_flog2(b, ptn_channel(b, src[0], X)));
780 break;
781
782 case OPCODE_POW:
783 ptn_move_dest(b, dest, nir_fpow(b,
784 ptn_channel(b, src[0], X),
785 ptn_channel(b, src[1], X)));
786 break;
787
788 case OPCODE_COS:
789 ptn_move_dest(b, dest, nir_fcos(b, ptn_channel(b, src[0], X)));
790 break;
791
792 case OPCODE_SIN:
793 ptn_move_dest(b, dest, nir_fsin(b, ptn_channel(b, src[0], X)));
794 break;
795
796 case OPCODE_ARL:
797 ptn_arl(b, dest, src);
798 break;
799
800 case OPCODE_EXP:
801 ptn_exp(b, dest, src);
802 break;
803
804 case OPCODE_LOG:
805 ptn_log(b, dest, src);
806 break;
807
808 case OPCODE_LRP:
809 ptn_lrp(b, dest, src);
810 break;
811
812 case OPCODE_DST:
813 ptn_dst(b, dest, src);
814 break;
815
816 case OPCODE_LIT:
817 ptn_lit(b, dest, src);
818 break;
819
820 case OPCODE_XPD:
821 ptn_xpd(b, dest, src);
822 break;
823
824 case OPCODE_DP2:
825 ptn_dp2(b, dest, src);
826 break;
827
828 case OPCODE_DP3:
829 ptn_dp3(b, dest, src);
830 break;
831
832 case OPCODE_DP4:
833 ptn_dp4(b, dest, src);
834 break;
835
836 case OPCODE_DPH:
837 ptn_dph(b, dest, src);
838 break;
839
840 case OPCODE_KIL:
841 ptn_kil(b, dest, src);
842 break;
843
844 case OPCODE_CMP:
845 ptn_cmp(b, dest, src);
846 break;
847
848 case OPCODE_SCS:
849 ptn_scs(b, dest, src);
850 break;
851
852 case OPCODE_SLT:
853 ptn_slt(b, dest, src);
854 break;
855
856 case OPCODE_SGT:
857 ptn_sgt(b, dest, src);
858 break;
859
860 case OPCODE_SLE:
861 ptn_sle(b, dest, src);
862 break;
863
864 case OPCODE_SGE:
865 ptn_sge(b, dest, src);
866 break;
867
868 case OPCODE_SEQ:
869 ptn_seq(b, dest, src);
870 break;
871
872 case OPCODE_SNE:
873 ptn_sne(b, dest, src);
874 break;
875
876 case OPCODE_TEX:
877 case OPCODE_TXB:
878 case OPCODE_TXD:
879 case OPCODE_TXL:
880 case OPCODE_TXP:
881 case OPCODE_TXP_NV:
882 ptn_tex(b, dest, src, prog_inst);
883 break;
884
885 case OPCODE_SWZ:
886 /* Extended swizzles were already handled in ptn_get_src(). */
887 ptn_alu(b, nir_op_fmov, dest, src);
888 break;
889
890 case OPCODE_NOP:
891 break;
892
893 default:
894 if (op_trans[op] != 0 || op == OPCODE_MOV) {
895 ptn_alu(b, op_trans[op], dest, src);
896 } else {
897 fprintf(stderr, "unknown opcode: %s\n", _mesa_opcode_string(op));
898 abort();
899 }
900 break;
901 }
902
903 if (prog_inst->SaturateMode) {
904 assert(prog_inst->SaturateMode == SATURATE_ZERO_ONE);
905 assert(!dest.dest.is_ssa);
906 ptn_move_dest(b, dest, nir_fsat(b, ptn_src_for_dest(c, &dest)));
907 }
908 }
909
910 /**
911 * Puts a NIR intrinsic to store of each PROGRAM_OUTPUT value to the output
912 * variables at the end of the shader.
913 *
914 * We don't generate these incrementally as the PROGRAM_OUTPUT values are
915 * written, because there's no output load intrinsic, which means we couldn't
916 * handle writemasks.
917 */
918 static void
919 ptn_add_output_stores(struct ptn_compile *c)
920 {
921 nir_builder *b = &c->build;
922
923 foreach_list_typed(nir_variable, var, node, &b->shader->outputs) {
924 nir_intrinsic_instr *store =
925 nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_var);
926 store->num_components = 4;
927 store->variables[0] =
928 nir_deref_var_create(store, c->output_vars[var->data.location]);
929 store->src[0].reg.reg = c->output_regs[var->data.location];
930 nir_instr_insert_after_cf_list(c->build.cf_node_list, &store->instr);
931 }
932 }
933
934 static void
935 setup_registers_and_variables(struct ptn_compile *c)
936 {
937 nir_builder *b = &c->build;
938 struct nir_shader *shader = b->shader;
939
940 /* Create input variables. */
941 const int num_inputs = _mesa_flsll(c->prog->InputsRead);
942 for (int i = 0; i < num_inputs; i++) {
943 if (!(c->prog->InputsRead & BITFIELD64_BIT(i)))
944 continue;
945 nir_variable *var = rzalloc(shader, nir_variable);
946 var->type = glsl_vec4_type();
947 var->data.read_only = true;
948 var->data.mode = nir_var_shader_in;
949 var->name = ralloc_asprintf(var, "in_%d", i);
950 var->data.location = i;
951 var->data.index = 0;
952
953 if (c->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
954 struct gl_fragment_program *fp =
955 (struct gl_fragment_program *) c->prog;
956
957 var->data.interpolation = fp->InterpQualifier[i];
958
959 if (i == VARYING_SLOT_POS) {
960 var->data.origin_upper_left = fp->OriginUpperLeft;
961 var->data.pixel_center_integer = fp->PixelCenterInteger;
962 } else if (i == VARYING_SLOT_FOGC) {
963 /* fogcoord is defined as <f, 0.0, 0.0, 1.0>. Make the actual
964 * input variable a float, and create a local containing the
965 * full vec4 value.
966 */
967 var->type = glsl_float_type();
968
969 nir_intrinsic_instr *load_x =
970 nir_intrinsic_instr_create(shader, nir_intrinsic_load_var);
971 load_x->num_components = 1;
972 load_x->variables[0] = nir_deref_var_create(load_x, var);
973 nir_ssa_dest_init(&load_x->instr, &load_x->dest, 1, NULL);
974 nir_instr_insert_after_cf_list(b->cf_node_list, &load_x->instr);
975
976 nir_ssa_def *f001 = nir_vec4(b, &load_x->dest.ssa, nir_imm_float(b, 0.0),
977 nir_imm_float(b, 0.0), nir_imm_float(b, 1.0));
978
979 nir_variable *fullvar = rzalloc(shader, nir_variable);
980 fullvar->type = glsl_vec4_type();
981 fullvar->data.mode = nir_var_local;
982 fullvar->name = "fogcoord_tmp";
983 exec_list_push_tail(&b->impl->locals, &fullvar->node);
984
985 nir_intrinsic_instr *store =
986 nir_intrinsic_instr_create(shader, nir_intrinsic_store_var);
987 store->num_components = 4;
988 store->variables[0] = nir_deref_var_create(store, fullvar);
989 store->src[0] = nir_src_for_ssa(f001);
990 nir_instr_insert_after_cf_list(b->cf_node_list, &store->instr);
991
992 /* Insert the real input into the list so the driver has real
993 * inputs, but set c->input_vars[i] to the temporary so we use
994 * the splatted value.
995 */
996 exec_list_push_tail(&shader->inputs, &var->node);
997 c->input_vars[i] = fullvar;
998 continue;
999 }
1000 }
1001
1002 exec_list_push_tail(&shader->inputs, &var->node);
1003 c->input_vars[i] = var;
1004 }
1005
1006 /* Create output registers and variables. */
1007 int max_outputs = _mesa_fls(c->prog->OutputsWritten);
1008 c->output_regs = rzalloc_array(c, nir_register *, max_outputs);
1009
1010 for (int i = 0; i < max_outputs; i++) {
1011 if (!(c->prog->OutputsWritten & BITFIELD64_BIT(i)))
1012 continue;
1013
1014 /* Since we can't load from outputs in the IR, we make temporaries
1015 * for the outputs and emit stores to the real outputs at the end of
1016 * the shader.
1017 */
1018 nir_register *reg = nir_local_reg_create(b->impl);
1019 reg->num_components = 4;
1020
1021 nir_variable *var = rzalloc(shader, nir_variable);
1022 var->type = glsl_vec4_type();
1023 var->data.mode = nir_var_shader_out;
1024 var->name = ralloc_asprintf(var, "out_%d", i);
1025
1026 var->data.location = i;
1027 var->data.index = 0;
1028
1029 c->output_regs[i] = reg;
1030
1031 exec_list_push_tail(&shader->outputs, &var->node);
1032 c->output_vars[i] = var;
1033 }
1034
1035 /* Create temporary registers. */
1036 c->temp_regs = rzalloc_array(c, nir_register *, c->prog->NumTemporaries);
1037
1038 nir_register *reg;
1039 for (int i = 0; i < c->prog->NumTemporaries; i++) {
1040 reg = nir_local_reg_create(b->impl);
1041 if (!reg) {
1042 c->error = true;
1043 return;
1044 }
1045 reg->num_components = 4;
1046 c->temp_regs[i] = reg;
1047 }
1048
1049 /* Create the address register (for ARB_vertex_program). */
1050 reg = nir_local_reg_create(b->impl);
1051 if (!reg) {
1052 c->error = true;
1053 return;
1054 }
1055 reg->num_components = 1;
1056 c->addr_reg = reg;
1057 }
1058
1059 struct nir_shader *
1060 prog_to_nir(const struct gl_program *prog,
1061 const nir_shader_compiler_options *options)
1062 {
1063 struct ptn_compile *c;
1064 struct nir_shader *s;
1065
1066 c = rzalloc(NULL, struct ptn_compile);
1067 if (!c)
1068 return NULL;
1069 s = nir_shader_create(NULL, options);
1070 if (!s)
1071 goto fail;
1072 c->prog = prog;
1073
1074 c->parameters = rzalloc(s, nir_variable);
1075 c->parameters->type = glsl_array_type(glsl_vec4_type(),
1076 prog->Parameters->NumParameters);
1077 c->parameters->name = "parameters";
1078 c->parameters->data.read_only = true;
1079 c->parameters->data.mode = nir_var_uniform;
1080 exec_list_push_tail(&s->uniforms, &c->parameters->node);
1081
1082 nir_function *func = nir_function_create(s, "main");
1083 nir_function_overload *overload = nir_function_overload_create(func);
1084 nir_function_impl *impl = nir_function_impl_create(overload);
1085
1086 c->build.shader = s;
1087 c->build.impl = impl;
1088 c->build.cf_node_list = &impl->body;
1089
1090 setup_registers_and_variables(c);
1091 if (unlikely(c->error))
1092 goto fail;
1093
1094 for (unsigned int i = 0; i < prog->NumInstructions; i++) {
1095 ptn_emit_instruction(c, &prog->Instructions[i]);
1096
1097 if (unlikely(c->error))
1098 break;
1099 }
1100
1101 ptn_add_output_stores(c);
1102
1103 fail:
1104 if (c->error) {
1105 ralloc_free(s);
1106 s = NULL;
1107 }
1108 ralloc_free(c);
1109 return s;
1110 }