nir: Use _mesa_flsll(InputsRead) in prog->nir.
[mesa.git] / src / mesa / program / prog_to_nir.c
1 /*
2 * Copyright © 2015 Intel Corporation
3 * Copyright © 2014-2015 Broadcom
4 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
23 * IN THE SOFTWARE.
24 */
25
26 #include "nir/nir.h"
27 #include "nir/nir_builder.h"
28 #include "glsl/list.h"
29 #include "main/imports.h"
30 #include "util/ralloc.h"
31
32 #include "prog_to_nir.h"
33 #include "prog_instruction.h"
34 #include "prog_parameter.h"
35 #include "prog_print.h"
36
37 /**
38 * \file prog_to_nir.c
39 *
40 * A translator from Mesa IR (prog_instruction.h) to NIR. This is primarily
41 * intended to support ARB_vertex_program, ARB_fragment_program, and fixed-function
42 * vertex processing. Full GLSL support should use glsl_to_nir instead.
43 */
44
45 struct ptn_compile {
46 struct gl_program *prog;
47 nir_builder build;
48 bool error;
49
50 nir_variable *input_vars[VARYING_SLOT_MAX];
51 nir_variable *output_vars[VARYING_SLOT_MAX];
52 nir_register **output_regs;
53 nir_register **temp_regs;
54
55 nir_register *addr_reg;
56 };
57
58 #define SWIZ(X, Y, Z, W) \
59 (unsigned[4]){ SWIZZLE_##X, SWIZZLE_##Y, SWIZZLE_##Z, SWIZZLE_##W }
60 #define ptn_swizzle(b, src, x, y, z, w) nir_swizzle(b, src, SWIZ(x, y, z, w), 4, true)
61 #define ptn_channel(b, src, ch) nir_swizzle(b, src, SWIZ(ch, ch, ch, ch), 1, true)
62
63 static nir_ssa_def *
64 ptn_src_for_dest(struct ptn_compile *c, nir_alu_dest *dest)
65 {
66 nir_builder *b = &c->build;
67
68 nir_alu_src src;
69 memset(&src, 0, sizeof(src));
70
71 if (dest->dest.is_ssa)
72 src.src = nir_src_for_ssa(&dest->dest.ssa);
73 else {
74 assert(!dest->dest.reg.indirect);
75 src.src = nir_src_for_reg(dest->dest.reg.reg);
76 src.src.reg.base_offset = dest->dest.reg.base_offset;
77 }
78
79 for (int i = 0; i < 4; i++)
80 src.swizzle[i] = i;
81
82 return nir_fmov_alu(b, src, 4);
83 }
84
85 static nir_alu_dest
86 ptn_get_dest(struct ptn_compile *c, const struct prog_dst_register *prog_dst)
87 {
88 nir_alu_dest dest;
89
90 memset(&dest, 0, sizeof(dest));
91
92 switch (prog_dst->File) {
93 case PROGRAM_TEMPORARY:
94 dest.dest.reg.reg = c->temp_regs[prog_dst->Index];
95 break;
96 case PROGRAM_OUTPUT:
97 dest.dest.reg.reg = c->output_regs[prog_dst->Index];
98 break;
99 case PROGRAM_ADDRESS:
100 assert(prog_dst->Index == 0);
101 dest.dest.reg.reg = c->addr_reg;
102 break;
103 case PROGRAM_UNDEFINED:
104 break;
105 }
106
107 dest.write_mask = prog_dst->WriteMask;
108 dest.saturate = false;
109
110 assert(!prog_dst->RelAddr);
111
112 return dest;
113 }
114
115 /**
116 * Multiply the contents of the ADDR register by 4 to convert from the number
117 * of vec4s to the number of floating point components.
118 */
119 static nir_ssa_def *
120 ptn_addr_reg_value(struct ptn_compile *c)
121 {
122 nir_builder *b = &c->build;
123 nir_alu_src src;
124 memset(&src, 0, sizeof(src));
125 src.src = nir_src_for_reg(c->addr_reg);
126
127 return nir_imul(b, nir_fmov_alu(b, src, 1), nir_imm_int(b, 4));
128 }
129
130 static nir_ssa_def *
131 ptn_get_src(struct ptn_compile *c, const struct prog_src_register *prog_src)
132 {
133 nir_builder *b = &c->build;
134 nir_alu_src src;
135
136 memset(&src, 0, sizeof(src));
137
138 switch (prog_src->File) {
139 case PROGRAM_UNDEFINED:
140 return nir_imm_float(b, 0.0);
141 case PROGRAM_TEMPORARY:
142 assert(!prog_src->RelAddr && prog_src->Index >= 0);
143 src.src.reg.reg = c->temp_regs[prog_src->Index];
144 break;
145 case PROGRAM_INPUT: {
146 /* ARB_vertex_program doesn't allow relative addressing on vertex
147 * attributes; ARB_fragment_program has no relative addressing at all.
148 */
149 assert(!prog_src->RelAddr);
150
151 assert(prog_src->Index >= 0 && prog_src->Index < VARYING_SLOT_MAX);
152
153 nir_intrinsic_instr *load =
154 nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_var);
155 load->num_components = 4;
156 load->variables[0] =
157 nir_deref_var_create(b->shader, c->input_vars[prog_src->Index]);
158
159 nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL);
160 nir_instr_insert_after_cf_list(b->cf_node_list, &load->instr);
161
162 src.src = nir_src_for_ssa(&load->dest.ssa);
163 break;
164 }
165 case PROGRAM_STATE_VAR:
166 case PROGRAM_CONSTANT: {
167 /* We actually want to look at the type in the Parameters list for this,
168 * because it lets us upload constant builtin uniforms as actual
169 * constants.
170 */
171 struct gl_program_parameter_list *plist = c->prog->Parameters;
172 gl_register_file file = prog_src->RelAddr ? prog_src->File :
173 plist->Parameters[prog_src->Index].Type;
174
175 switch (file) {
176 case PROGRAM_CONSTANT:
177 if ((c->prog->IndirectRegisterFiles & (1 << PROGRAM_CONSTANT)) == 0) {
178 float *v = (float *) plist->ParameterValues[prog_src->Index];
179 src.src = nir_src_for_ssa(nir_imm_vec4(b, v[0], v[1], v[2], v[3]));
180 break;
181 }
182 /* FALLTHROUGH */
183 case PROGRAM_STATE_VAR: {
184 nir_intrinsic_op load_op =
185 prog_src->RelAddr ? nir_intrinsic_load_uniform_indirect :
186 nir_intrinsic_load_uniform;
187 nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, load_op);
188 nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL);
189 load->num_components = 4;
190
191 /* Multiply src->Index by 4 to scale from # of vec4s to components. */
192 load->const_index[0] = 4 * prog_src->Index;
193 load->const_index[1] = 1;
194
195 if (prog_src->RelAddr) {
196 nir_ssa_def *reladdr = ptn_addr_reg_value(c);
197 if (prog_src->Index < 0) {
198 /* This is a negative offset which should be added to the address
199 * register's value.
200 */
201 reladdr = nir_iadd(b, reladdr, nir_imm_int(b, load->const_index[0]));
202 load->const_index[0] = 0;
203 }
204 load->src[0] = nir_src_for_ssa(reladdr);
205 }
206
207 nir_instr_insert_after_cf_list(b->cf_node_list, &load->instr);
208
209 src.src = nir_src_for_ssa(&load->dest.ssa);
210 break;
211 }
212 default:
213 fprintf(stderr, "bad uniform src register file: %s (%d)\n",
214 _mesa_register_file_name(file), file);
215 abort();
216 }
217 break;
218 }
219 default:
220 fprintf(stderr, "unknown src register file: %s (%d)\n",
221 _mesa_register_file_name(prog_src->File), prog_src->File);
222 abort();
223 }
224
225 nir_ssa_def *def;
226 if (!HAS_EXTENDED_SWIZZLE(prog_src->Swizzle)) {
227 for (int i = 0; i < 4; i++)
228 src.swizzle[i] = GET_SWZ(prog_src->Swizzle, i);
229
230 def = nir_fmov_alu(b, src, 4);
231 } else {
232 nir_ssa_def *chans[4];
233 for (int i = 0; i < 4; i++) {
234 int swizzle = GET_SWZ(prog_src->Swizzle, i);
235 if (swizzle == SWIZZLE_ZERO) {
236 chans[i] = nir_imm_float(b, 0.0);
237 } else if (swizzle == SWIZZLE_ONE) {
238 chans[i] = nir_imm_float(b, 1.0);
239 } else {
240 assert(swizzle != SWIZZLE_NIL);
241 nir_alu_instr *mov = nir_alu_instr_create(b->shader, nir_op_fmov);
242 nir_ssa_dest_init(&mov->instr, &mov->dest.dest, 1, NULL);
243 mov->dest.write_mask = 0x1;
244 mov->src[0] = src;
245 mov->src[0].swizzle[0] = swizzle;
246 nir_instr_insert_after_cf_list(b->cf_node_list, &mov->instr);
247
248 chans[i] = &mov->dest.dest.ssa;
249 }
250 }
251 def = nir_vec4(b, chans[0], chans[1], chans[2], chans[3]);
252 }
253
254 if (prog_src->Abs)
255 def = nir_fabs(b, def);
256
257 if (prog_src->Negate)
258 def = nir_fneg(b, def);
259
260 return def;
261 }
262
263 static void
264 ptn_alu(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
265 {
266 unsigned num_srcs = nir_op_infos[op].num_inputs;
267 nir_alu_instr *instr = nir_alu_instr_create(b->shader, op);
268 unsigned i;
269
270 for (i = 0; i < num_srcs; i++)
271 instr->src[i].src = nir_src_for_ssa(src[i]);
272
273 instr->dest = dest;
274 nir_instr_insert_after_cf_list(b->cf_node_list, &instr->instr);
275 }
276
277 static void
278 ptn_move_dest_masked(nir_builder *b, nir_alu_dest dest,
279 nir_ssa_def *def, unsigned write_mask)
280 {
281 if (!(dest.write_mask & write_mask))
282 return;
283
284 nir_alu_instr *mov = nir_alu_instr_create(b->shader, nir_op_fmov);
285 if (!mov)
286 return;
287
288 mov->dest = dest;
289 mov->dest.write_mask &= write_mask;
290 mov->src[0].src = nir_src_for_ssa(def);
291 for (unsigned i = def->num_components; i < 4; i++)
292 mov->src[0].swizzle[i] = def->num_components - 1;
293 nir_instr_insert_after_cf_list(b->cf_node_list, &mov->instr);
294 }
295
296 static void
297 ptn_move_dest(nir_builder *b, nir_alu_dest dest, nir_ssa_def *def)
298 {
299 ptn_move_dest_masked(b, dest, def, WRITEMASK_XYZW);
300 }
301
302 static void
303 ptn_arl(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
304 {
305 ptn_move_dest(b, dest, nir_f2i(b, nir_ffloor(b, src[0])));
306 }
307
308 /* EXP - Approximate Exponential Base 2
309 * dst.x = 2^{\lfloor src.x\rfloor}
310 * dst.y = src.x - \lfloor src.x\rfloor
311 * dst.z = 2^{src.x}
312 * dst.w = 1.0
313 */
314 static void
315 ptn_exp(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
316 {
317 nir_ssa_def *srcx = ptn_channel(b, src[0], X);
318
319 ptn_move_dest_masked(b, dest, nir_fexp2(b, nir_ffloor(b, srcx)), WRITEMASK_X);
320 ptn_move_dest_masked(b, dest, nir_fsub(b, srcx, nir_ffloor(b, srcx)), WRITEMASK_Y);
321 ptn_move_dest_masked(b, dest, nir_fexp2(b, srcx), WRITEMASK_Z);
322 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
323 }
324
325 /* LOG - Approximate Logarithm Base 2
326 * dst.x = \lfloor\log_2{|src.x|}\rfloor
327 * dst.y = |src.x| * 2^{-\lfloor\log_2{|src.x|}\rfloor}}
328 * dst.z = \log_2{|src.x|}
329 * dst.w = 1.0
330 */
331 static void
332 ptn_log(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
333 {
334 nir_ssa_def *abs_srcx = nir_fabs(b, ptn_channel(b, src[0], X));
335 nir_ssa_def *log2 = nir_flog2(b, abs_srcx);
336 nir_ssa_def *floor_log2 = nir_ffloor(b, log2);
337
338 ptn_move_dest_masked(b, dest, floor_log2, WRITEMASK_X);
339 ptn_move_dest_masked(b, dest,
340 nir_fmul(b, abs_srcx,
341 nir_fexp2(b, nir_fneg(b, floor_log2))),
342 WRITEMASK_Y);
343 ptn_move_dest_masked(b, dest, log2, WRITEMASK_Z);
344 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
345 }
346
347 /* DST - Distance Vector
348 * dst.x = 1.0
349 * dst.y = src0.y \times src1.y
350 * dst.z = src0.z
351 * dst.w = src1.w
352 */
353 static void
354 ptn_dst(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
355 {
356 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_X);
357 ptn_move_dest_masked(b, dest, nir_fmul(b, src[0], src[1]), WRITEMASK_Y);
358 ptn_move_dest_masked(b, dest, nir_fmov(b, src[0]), WRITEMASK_Z);
359 ptn_move_dest_masked(b, dest, nir_fmov(b, src[1]), WRITEMASK_W);
360 }
361
362 /* LIT - Light Coefficients
363 * dst.x = 1.0
364 * dst.y = max(src.x, 0.0)
365 * dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0
366 * dst.w = 1.0
367 */
368 static void
369 ptn_lit(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
370 {
371 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_XW);
372
373 ptn_move_dest_masked(b, dest, nir_fmax(b, ptn_channel(b, src[0], X),
374 nir_imm_float(b, 0.0)), WRITEMASK_Y);
375
376 if (dest.write_mask & WRITEMASK_Z) {
377 nir_ssa_def *src0_y = ptn_channel(b, src[0], Y);
378 nir_ssa_def *wclamp = nir_fmax(b, nir_fmin(b, ptn_channel(b, src[0], W),
379 nir_imm_float(b, 128.0)),
380 nir_imm_float(b, -128.0));
381 nir_ssa_def *pow = nir_fpow(b, nir_fmax(b, src0_y, nir_imm_float(b, 0.0)),
382 wclamp);
383
384 nir_ssa_def *z;
385 if (b->shader->options->native_integers) {
386 z = nir_bcsel(b,
387 nir_fge(b, nir_imm_float(b, 0.0), ptn_channel(b, src[0], X)),
388 nir_imm_float(b, 0.0),
389 pow);
390 } else {
391 z = nir_fcsel(b,
392 nir_sge(b, nir_imm_float(b, 0.0), ptn_channel(b, src[0], X)),
393 nir_imm_float(b, 0.0),
394 pow);
395 }
396
397 ptn_move_dest_masked(b, dest, z, WRITEMASK_Z);
398 }
399 }
400
401 /* SCS - Sine Cosine
402 * dst.x = \cos{src.x}
403 * dst.y = \sin{src.x}
404 * dst.z = 0.0
405 * dst.w = 1.0
406 */
407 static void
408 ptn_scs(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
409 {
410 ptn_move_dest_masked(b, dest, nir_fcos(b, ptn_channel(b, src[0], X)),
411 WRITEMASK_X);
412 ptn_move_dest_masked(b, dest, nir_fsin(b, ptn_channel(b, src[0], X)),
413 WRITEMASK_Y);
414 ptn_move_dest_masked(b, dest, nir_imm_float(b, 0.0), WRITEMASK_Z);
415 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
416 }
417
418 /**
419 * Emit SLT. For platforms with integers, prefer b2f(flt(...)).
420 */
421 static void
422 ptn_slt(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
423 {
424 if (b->shader->options->native_integers) {
425 ptn_move_dest(b, dest, nir_b2f(b, nir_flt(b, src[0], src[1])));
426 } else {
427 ptn_move_dest(b, dest, nir_slt(b, src[0], src[1]));
428 }
429 }
430
431 /**
432 * Emit SGE. For platforms with integers, prefer b2f(fge(...)).
433 */
434 static void
435 ptn_sge(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
436 {
437 if (b->shader->options->native_integers) {
438 ptn_move_dest(b, dest, nir_b2f(b, nir_fge(b, src[0], src[1])));
439 } else {
440 ptn_move_dest(b, dest, nir_sge(b, src[0], src[1]));
441 }
442 }
443
444 static void
445 ptn_sle(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
446 {
447 nir_ssa_def *commuted[] = { src[1], src[0] };
448 ptn_sge(b, dest, commuted);
449 }
450
451 static void
452 ptn_sgt(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
453 {
454 nir_ssa_def *commuted[] = { src[1], src[0] };
455 ptn_slt(b, dest, commuted);
456 }
457
458 /**
459 * Emit SEQ. For platforms with integers, prefer b2f(feq(...)).
460 */
461 static void
462 ptn_seq(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
463 {
464 if (b->shader->options->native_integers) {
465 ptn_move_dest(b, dest, nir_b2f(b, nir_feq(b, src[0], src[1])));
466 } else {
467 ptn_move_dest(b, dest, nir_seq(b, src[0], src[1]));
468 }
469 }
470
471 /**
472 * Emit SNE. For platforms with integers, prefer b2f(fne(...)).
473 */
474 static void
475 ptn_sne(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
476 {
477 if (b->shader->options->native_integers) {
478 ptn_move_dest(b, dest, nir_b2f(b, nir_fne(b, src[0], src[1])));
479 } else {
480 ptn_move_dest(b, dest, nir_sne(b, src[0], src[1]));
481 }
482 }
483
484 static void
485 ptn_xpd(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
486 {
487 ptn_move_dest_masked(b, dest,
488 nir_fsub(b,
489 nir_fmul(b,
490 ptn_swizzle(b, src[0], Y, Z, X, X),
491 ptn_swizzle(b, src[1], Z, X, Y, X)),
492 nir_fmul(b,
493 ptn_swizzle(b, src[1], Y, Z, X, X),
494 ptn_swizzle(b, src[0], Z, X, Y, X))),
495 WRITEMASK_XYZ);
496 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
497 }
498
499 static void
500 ptn_dp2(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
501 {
502 ptn_move_dest(b, dest, nir_fdot2(b, src[0], src[1]));
503 }
504
505 static void
506 ptn_dp3(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
507 {
508 ptn_move_dest(b, dest, nir_fdot3(b, src[0], src[1]));
509 }
510
511 static void
512 ptn_dp4(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
513 {
514 ptn_move_dest(b, dest, nir_fdot4(b, src[0], src[1]));
515 }
516
517 static void
518 ptn_dph(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
519 {
520 nir_ssa_def *dp3 = nir_fdot3(b, src[0], src[1]);
521 ptn_move_dest(b, dest, nir_fadd(b, dp3, ptn_channel(b, src[1], W)));
522 }
523
524 static void
525 ptn_cmp(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
526 {
527 if (b->shader->options->native_integers) {
528 ptn_move_dest(b, dest, nir_bcsel(b,
529 nir_flt(b, src[0], nir_imm_float(b, 0.0)),
530 src[1], src[2]));
531 } else {
532 ptn_move_dest(b, dest, nir_fcsel(b,
533 nir_slt(b, src[0], nir_imm_float(b, 0.0)),
534 src[1], src[2]));
535 }
536 }
537
538 static void
539 ptn_lrp(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
540 {
541 ptn_move_dest(b, dest, nir_flrp(b, src[2], src[1], src[0]));
542 }
543
544 static void
545 ptn_kil(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
546 {
547 nir_ssa_def *cmp = b->shader->options->native_integers ?
548 nir_bany4(b, nir_flt(b, src[0], nir_imm_float(b, 0.0))) :
549 nir_fany4(b, nir_slt(b, src[0], nir_imm_float(b, 0.0)));
550
551 nir_intrinsic_instr *discard =
552 nir_intrinsic_instr_create(b->shader, nir_intrinsic_discard_if);
553 discard->src[0] = nir_src_for_ssa(cmp);
554 nir_instr_insert_after_cf_list(b->cf_node_list, &discard->instr);
555 }
556
557 static void
558 ptn_tex(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src,
559 struct prog_instruction *prog_inst)
560 {
561 nir_tex_instr *instr;
562 nir_texop op;
563 unsigned num_srcs;
564
565 switch (prog_inst->Opcode) {
566 case OPCODE_TEX:
567 op = nir_texop_tex;
568 num_srcs = 1;
569 break;
570 case OPCODE_TXB:
571 op = nir_texop_txb;
572 num_srcs = 2;
573 break;
574 case OPCODE_TXD:
575 op = nir_texop_txd;
576 num_srcs = 3;
577 break;
578 case OPCODE_TXL:
579 op = nir_texop_txl;
580 num_srcs = 2;
581 break;
582 case OPCODE_TXP:
583 op = nir_texop_tex;
584 num_srcs = 2;
585 break;
586 case OPCODE_TXP_NV:
587 assert(!"not handled");
588 op = nir_texop_tex;
589 num_srcs = 2;
590 break;
591 default:
592 fprintf(stderr, "unknown tex op %d\n", prog_inst->Opcode);
593 abort();
594 }
595
596 if (prog_inst->TexShadow)
597 num_srcs++;
598
599 instr = nir_tex_instr_create(b->shader, num_srcs);
600 instr->op = op;
601 instr->dest_type = nir_type_float;
602 instr->is_shadow = prog_inst->TexShadow;
603 instr->sampler_index = prog_inst->TexSrcUnit;
604
605 switch (prog_inst->TexSrcTarget) {
606 case TEXTURE_1D_INDEX:
607 instr->sampler_dim = GLSL_SAMPLER_DIM_1D;
608 break;
609 case TEXTURE_2D_INDEX:
610 instr->sampler_dim = GLSL_SAMPLER_DIM_2D;
611 break;
612 case TEXTURE_3D_INDEX:
613 instr->sampler_dim = GLSL_SAMPLER_DIM_3D;
614 break;
615 case TEXTURE_CUBE_INDEX:
616 instr->sampler_dim = GLSL_SAMPLER_DIM_CUBE;
617 break;
618 case TEXTURE_RECT_INDEX:
619 instr->sampler_dim = GLSL_SAMPLER_DIM_RECT;
620 break;
621 default:
622 fprintf(stderr, "Unknown texture target %d\n", prog_inst->TexSrcTarget);
623 abort();
624 }
625
626 switch (instr->sampler_dim) {
627 case GLSL_SAMPLER_DIM_1D:
628 case GLSL_SAMPLER_DIM_BUF:
629 instr->coord_components = 1;
630 break;
631 case GLSL_SAMPLER_DIM_2D:
632 case GLSL_SAMPLER_DIM_RECT:
633 case GLSL_SAMPLER_DIM_EXTERNAL:
634 case GLSL_SAMPLER_DIM_MS:
635 instr->coord_components = 2;
636 break;
637 case GLSL_SAMPLER_DIM_3D:
638 case GLSL_SAMPLER_DIM_CUBE:
639 instr->coord_components = 3;
640 break;
641 }
642
643 unsigned src_number = 0;
644
645 instr->src[src_number].src =
646 nir_src_for_ssa(ptn_swizzle(b, src[0], X, Y, Z, W));
647 instr->src[src_number].src_type = nir_tex_src_coord;
648 src_number++;
649
650 if (prog_inst->Opcode == OPCODE_TXP) {
651 instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
652 instr->src[src_number].src_type = nir_tex_src_projector;
653 src_number++;
654 }
655
656 if (prog_inst->Opcode == OPCODE_TXB) {
657 instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
658 instr->src[src_number].src_type = nir_tex_src_bias;
659 src_number++;
660 }
661
662 if (prog_inst->Opcode == OPCODE_TXL) {
663 instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
664 instr->src[src_number].src_type = nir_tex_src_lod;
665 src_number++;
666 }
667
668 if (instr->is_shadow) {
669 if (instr->coord_components < 3)
670 instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], Z));
671 else
672 instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
673
674 instr->src[src_number].src_type = nir_tex_src_comparitor;
675 src_number++;
676 }
677
678 assert(src_number == num_srcs);
679
680 nir_ssa_dest_init(&instr->instr, &instr->dest, 4, NULL);
681 nir_instr_insert_after_cf_list(b->cf_node_list, &instr->instr);
682
683 /* Resolve the writemask on the texture op. */
684 ptn_move_dest(b, dest, &instr->dest.ssa);
685 }
686
687 static const nir_op op_trans[MAX_OPCODE] = {
688 [OPCODE_NOP] = 0,
689 [OPCODE_ABS] = nir_op_fabs,
690 [OPCODE_ADD] = nir_op_fadd,
691 [OPCODE_ARL] = 0,
692 [OPCODE_CMP] = 0,
693 [OPCODE_COS] = nir_op_fcos,
694 [OPCODE_DDX] = nir_op_fddx,
695 [OPCODE_DDY] = nir_op_fddy,
696 [OPCODE_DP2] = 0,
697 [OPCODE_DP3] = 0,
698 [OPCODE_DP4] = 0,
699 [OPCODE_DPH] = 0,
700 [OPCODE_DST] = 0,
701 [OPCODE_END] = 0,
702 [OPCODE_EX2] = nir_op_fexp2,
703 [OPCODE_EXP] = nir_op_fexp,
704 [OPCODE_FLR] = nir_op_ffloor,
705 [OPCODE_FRC] = nir_op_ffract,
706 [OPCODE_LG2] = nir_op_flog2,
707 [OPCODE_LIT] = 0,
708 [OPCODE_LOG] = 0,
709 [OPCODE_LRP] = 0,
710 [OPCODE_MAD] = nir_op_ffma,
711 [OPCODE_MAX] = nir_op_fmax,
712 [OPCODE_MIN] = nir_op_fmin,
713 [OPCODE_MOV] = nir_op_fmov,
714 [OPCODE_MUL] = nir_op_fmul,
715 [OPCODE_POW] = nir_op_fpow,
716 [OPCODE_RCP] = nir_op_frcp,
717
718 [OPCODE_RSQ] = nir_op_frsq,
719 [OPCODE_SCS] = 0,
720 [OPCODE_SEQ] = 0,
721 [OPCODE_SGE] = 0,
722 [OPCODE_SGT] = 0,
723 [OPCODE_SIN] = nir_op_fsin,
724 [OPCODE_SLE] = 0,
725 [OPCODE_SLT] = 0,
726 [OPCODE_SNE] = 0,
727 [OPCODE_SSG] = nir_op_fsign,
728 [OPCODE_SUB] = nir_op_fsub,
729 [OPCODE_SWZ] = 0,
730 [OPCODE_TEX] = 0,
731 [OPCODE_TRUNC] = nir_op_ftrunc,
732 [OPCODE_TXB] = 0,
733 [OPCODE_TXD] = 0,
734 [OPCODE_TXL] = 0,
735 [OPCODE_TXP] = 0,
736 [OPCODE_TXP_NV] = 0,
737 [OPCODE_XPD] = 0,
738 };
739
740 static void
741 ptn_emit_instruction(struct ptn_compile *c, struct prog_instruction *prog_inst)
742 {
743 nir_builder *b = &c->build;
744 unsigned i;
745 const unsigned op = prog_inst->Opcode;
746
747 if (op == OPCODE_END)
748 return;
749
750 nir_ssa_def *src[3];
751 for (i = 0; i < 3; i++) {
752 src[i] = ptn_get_src(c, &prog_inst->SrcReg[i]);
753 }
754 nir_alu_dest dest = ptn_get_dest(c, &prog_inst->DstReg);
755 if (c->error)
756 return;
757
758 switch (op) {
759 case OPCODE_RSQ:
760 ptn_move_dest(b, dest, nir_frsq(b, ptn_channel(b, src[0], X)));
761 break;
762
763 case OPCODE_RCP:
764 ptn_move_dest(b, dest, nir_frcp(b, ptn_channel(b, src[0], X)));
765 break;
766
767 case OPCODE_EX2:
768 ptn_move_dest(b, dest, nir_fexp2(b, ptn_channel(b, src[0], X)));
769 break;
770
771 case OPCODE_LG2:
772 ptn_move_dest(b, dest, nir_flog2(b, ptn_channel(b, src[0], X)));
773 break;
774
775 case OPCODE_POW:
776 ptn_move_dest(b, dest, nir_fpow(b,
777 ptn_channel(b, src[0], X),
778 ptn_channel(b, src[1], X)));
779 break;
780
781 case OPCODE_COS:
782 ptn_move_dest(b, dest, nir_fcos(b, ptn_channel(b, src[0], X)));
783 break;
784
785 case OPCODE_SIN:
786 ptn_move_dest(b, dest, nir_fsin(b, ptn_channel(b, src[0], X)));
787 break;
788
789 case OPCODE_ARL:
790 ptn_arl(b, dest, src);
791 break;
792
793 case OPCODE_EXP:
794 ptn_exp(b, dest, src);
795 break;
796
797 case OPCODE_LOG:
798 ptn_log(b, dest, src);
799 break;
800
801 case OPCODE_LRP:
802 ptn_lrp(b, dest, src);
803 break;
804
805 case OPCODE_DST:
806 ptn_dst(b, dest, src);
807 break;
808
809 case OPCODE_LIT:
810 ptn_lit(b, dest, src);
811 break;
812
813 case OPCODE_XPD:
814 ptn_xpd(b, dest, src);
815 break;
816
817 case OPCODE_DP2:
818 ptn_dp2(b, dest, src);
819 break;
820
821 case OPCODE_DP3:
822 ptn_dp3(b, dest, src);
823 break;
824
825 case OPCODE_DP4:
826 ptn_dp4(b, dest, src);
827 break;
828
829 case OPCODE_DPH:
830 ptn_dph(b, dest, src);
831 break;
832
833 case OPCODE_KIL:
834 ptn_kil(b, dest, src);
835 break;
836
837 case OPCODE_CMP:
838 ptn_cmp(b, dest, src);
839 break;
840
841 case OPCODE_SCS:
842 ptn_scs(b, dest, src);
843 break;
844
845 case OPCODE_SLT:
846 ptn_slt(b, dest, src);
847 break;
848
849 case OPCODE_SGT:
850 ptn_sgt(b, dest, src);
851 break;
852
853 case OPCODE_SLE:
854 ptn_sle(b, dest, src);
855 break;
856
857 case OPCODE_SGE:
858 ptn_sge(b, dest, src);
859 break;
860
861 case OPCODE_SEQ:
862 ptn_seq(b, dest, src);
863 break;
864
865 case OPCODE_SNE:
866 ptn_sne(b, dest, src);
867 break;
868
869 case OPCODE_TEX:
870 case OPCODE_TXB:
871 case OPCODE_TXD:
872 case OPCODE_TXL:
873 case OPCODE_TXP:
874 case OPCODE_TXP_NV:
875 ptn_tex(b, dest, src, prog_inst);
876 break;
877
878 case OPCODE_SWZ:
879 /* Extended swizzles were already handled in ptn_get_src(). */
880 ptn_alu(b, nir_op_fmov, dest, src);
881 break;
882
883 case OPCODE_NOP:
884 break;
885
886 default:
887 if (op_trans[op] != 0 || op == OPCODE_MOV) {
888 ptn_alu(b, op_trans[op], dest, src);
889 } else {
890 fprintf(stderr, "unknown opcode: %s\n", _mesa_opcode_string(op));
891 abort();
892 }
893 break;
894 }
895
896 if (prog_inst->SaturateMode) {
897 assert(prog_inst->SaturateMode == SATURATE_ZERO_ONE);
898 assert(!dest.dest.is_ssa);
899 ptn_move_dest(b, dest, nir_fsat(b, ptn_src_for_dest(c, &dest)));
900 }
901 }
902
903 /**
904 * Puts a NIR intrinsic to store of each PROGRAM_OUTPUT value to the output
905 * variables at the end of the shader.
906 *
907 * We don't generate these incrementally as the PROGRAM_OUTPUT values are
908 * written, because there's no output load intrinsic, which means we couldn't
909 * handle writemasks.
910 */
911 static void
912 ptn_add_output_stores(struct ptn_compile *c)
913 {
914 nir_builder *b = &c->build;
915
916 foreach_list_typed(nir_variable, var, node, &b->shader->outputs) {
917 nir_intrinsic_instr *store =
918 nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_var);
919 store->num_components = 4;
920 store->variables[0] =
921 nir_deref_var_create(b->shader, c->output_vars[var->data.location]);
922 store->src[0].reg.reg = c->output_regs[var->data.location];
923 nir_instr_insert_after_cf_list(c->build.cf_node_list, &store->instr);
924 }
925 }
926
927 static void
928 setup_registers_and_variables(struct ptn_compile *c)
929 {
930 nir_builder *b = &c->build;
931 struct nir_shader *shader = b->shader;
932
933 /* Create input variables. */
934 const int num_inputs = _mesa_flsll(c->prog->InputsRead);
935 for (int i = 0; i < num_inputs; i++) {
936 if (!(c->prog->InputsRead & BITFIELD64_BIT(i)))
937 continue;
938 nir_variable *var = rzalloc(shader, nir_variable);
939 var->type = glsl_vec4_type();
940 var->data.read_only = true;
941 var->data.mode = nir_var_shader_in;
942 var->name = ralloc_asprintf(var, "in_%d", i);
943 var->data.location = i;
944 var->data.index = 0;
945
946 if (c->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
947 struct gl_fragment_program *fp =
948 (struct gl_fragment_program *) c->prog;
949
950 var->data.interpolation = fp->InterpQualifier[i];
951
952 if (i == VARYING_SLOT_POS) {
953 var->data.origin_upper_left = fp->OriginUpperLeft;
954 var->data.pixel_center_integer = fp->PixelCenterInteger;
955 } else if (i == VARYING_SLOT_FOGC) {
956 /* fogcoord is defined as <f, 0.0, 0.0, 1.0>. Make the actual
957 * input variable a float, and create a local containing the
958 * full vec4 value.
959 */
960 var->type = glsl_float_type();
961
962 nir_intrinsic_instr *load_x =
963 nir_intrinsic_instr_create(shader, nir_intrinsic_load_var);
964 load_x->num_components = 1;
965 load_x->variables[0] = nir_deref_var_create(shader, var);
966 nir_ssa_dest_init(&load_x->instr, &load_x->dest, 1, NULL);
967 nir_instr_insert_after_cf_list(b->cf_node_list, &load_x->instr);
968
969 nir_ssa_def *f001 = nir_vec4(b, &load_x->dest.ssa, nir_imm_float(b, 0.0),
970 nir_imm_float(b, 0.0), nir_imm_float(b, 1.0));
971
972 nir_variable *fullvar = rzalloc(shader, nir_variable);
973 fullvar->type = glsl_vec4_type();
974 fullvar->data.mode = nir_var_local;
975 fullvar->name = "fogcoord_tmp";
976 exec_list_push_tail(&b->impl->locals, &fullvar->node);
977
978 nir_intrinsic_instr *store =
979 nir_intrinsic_instr_create(shader, nir_intrinsic_store_var);
980 store->num_components = 4;
981 store->variables[0] = nir_deref_var_create(shader, fullvar);
982 store->src[0] = nir_src_for_ssa(f001);
983 nir_instr_insert_after_cf_list(b->cf_node_list, &store->instr);
984
985 /* Insert the real input into the list so the driver has real
986 * inputs, but set c->input_vars[i] to the temporary so we use
987 * the splatted value.
988 */
989 exec_list_push_tail(&shader->inputs, &var->node);
990 c->input_vars[i] = fullvar;
991 continue;
992 }
993 }
994
995 exec_list_push_tail(&shader->inputs, &var->node);
996 c->input_vars[i] = var;
997 }
998
999 /* Create output registers and variables. */
1000 int max_outputs = _mesa_fls(c->prog->OutputsWritten);
1001 c->output_regs = rzalloc_array(c, nir_register *, max_outputs);
1002
1003 for (int i = 0; i < max_outputs; i++) {
1004 if (!(c->prog->OutputsWritten & BITFIELD64_BIT(i)))
1005 continue;
1006
1007 /* Since we can't load from outputs in the IR, we make temporaries
1008 * for the outputs and emit stores to the real outputs at the end of
1009 * the shader.
1010 */
1011 nir_register *reg = nir_local_reg_create(b->impl);
1012 reg->num_components = 4;
1013
1014 nir_variable *var = rzalloc(shader, nir_variable);
1015 var->type = glsl_vec4_type();
1016 var->data.mode = nir_var_shader_out;
1017 var->name = ralloc_asprintf(var, "out_%d", i);
1018
1019 var->data.location = i;
1020 var->data.index = 0;
1021
1022 c->output_regs[i] = reg;
1023
1024 exec_list_push_tail(&shader->outputs, &var->node);
1025 c->output_vars[i] = var;
1026 }
1027
1028 /* Create temporary registers. */
1029 c->temp_regs = rzalloc_array(c, nir_register *, c->prog->NumTemporaries);
1030
1031 nir_register *reg;
1032 for (int i = 0; i < c->prog->NumTemporaries; i++) {
1033 reg = nir_local_reg_create(b->impl);
1034 if (!reg) {
1035 c->error = true;
1036 return;
1037 }
1038 reg->num_components = 4;
1039 c->temp_regs[i] = reg;
1040 }
1041
1042 /* Create the address register (for ARB_vertex_program). */
1043 reg = nir_local_reg_create(b->impl);
1044 if (!reg) {
1045 c->error = true;
1046 return;
1047 }
1048 reg->num_components = 1;
1049 c->addr_reg = reg;
1050
1051 /* Set the number of uniforms */
1052 shader->num_uniforms = 4 * c->prog->Parameters->NumParameters;
1053 }
1054
1055 struct nir_shader *
1056 prog_to_nir(struct gl_program *prog, const nir_shader_compiler_options *options)
1057 {
1058 struct ptn_compile *c;
1059 struct nir_shader *s;
1060
1061 c = rzalloc(NULL, struct ptn_compile);
1062 if (!c)
1063 return NULL;
1064 s = nir_shader_create(NULL, options);
1065 if (!s)
1066 goto fail;
1067 c->prog = prog;
1068
1069 nir_function *func = nir_function_create(s, "main");
1070 nir_function_overload *overload = nir_function_overload_create(func);
1071 nir_function_impl *impl = nir_function_impl_create(overload);
1072
1073 c->build.shader = s;
1074 c->build.impl = impl;
1075 c->build.cf_node_list = &impl->body;
1076
1077 setup_registers_and_variables(c);
1078 if (unlikely(c->error))
1079 goto fail;
1080
1081 for (unsigned int i = 0; i < prog->NumInstructions; i++) {
1082 ptn_emit_instruction(c, &prog->Instructions[i]);
1083
1084 if (unlikely(c->error))
1085 break;
1086 }
1087
1088 ptn_add_output_stores(c);
1089
1090 fail:
1091 if (c->error) {
1092 ralloc_free(s);
1093 s = NULL;
1094 }
1095 ralloc_free(c);
1096 return s;
1097 }