glsl: Add GLSL_TYPE_FUNCTION to the base types enums
[mesa.git] / src / mesa / program / prog_to_nir.c
1 /*
2 * Copyright © 2015 Intel Corporation
3 * Copyright © 2014-2015 Broadcom
4 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
23 * IN THE SOFTWARE.
24 */
25
26 #include "nir/nir.h"
27 #include "nir/nir_builder.h"
28 #include "glsl/list.h"
29 #include "main/imports.h"
30 #include "util/ralloc.h"
31
32 #include "prog_to_nir.h"
33 #include "prog_instruction.h"
34 #include "prog_parameter.h"
35 #include "prog_print.h"
36
37 /**
38 * \file prog_to_nir.c
39 *
40 * A translator from Mesa IR (prog_instruction.h) to NIR. This is primarily
41 * intended to support ARB_vertex_program, ARB_fragment_program, and fixed-function
42 * vertex processing. Full GLSL support should use glsl_to_nir instead.
43 */
44
45 struct ptn_compile {
46 const struct gl_program *prog;
47 nir_builder build;
48 bool error;
49
50 nir_variable *input_vars[VARYING_SLOT_MAX];
51 nir_variable *output_vars[VARYING_SLOT_MAX];
52 nir_register **output_regs;
53 nir_register **temp_regs;
54
55 nir_register *addr_reg;
56 };
57
58 #define SWIZ(X, Y, Z, W) \
59 (unsigned[4]){ SWIZZLE_##X, SWIZZLE_##Y, SWIZZLE_##Z, SWIZZLE_##W }
60 #define ptn_swizzle(b, src, x, y, z, w) nir_swizzle(b, src, SWIZ(x, y, z, w), 4, true)
61 #define ptn_channel(b, src, ch) nir_swizzle(b, src, SWIZ(ch, ch, ch, ch), 1, true)
62
63 static nir_ssa_def *
64 ptn_src_for_dest(struct ptn_compile *c, nir_alu_dest *dest)
65 {
66 nir_builder *b = &c->build;
67
68 nir_alu_src src;
69 memset(&src, 0, sizeof(src));
70
71 if (dest->dest.is_ssa)
72 src.src = nir_src_for_ssa(&dest->dest.ssa);
73 else {
74 assert(!dest->dest.reg.indirect);
75 src.src = nir_src_for_reg(dest->dest.reg.reg);
76 src.src.reg.base_offset = dest->dest.reg.base_offset;
77 }
78
79 for (int i = 0; i < 4; i++)
80 src.swizzle[i] = i;
81
82 return nir_fmov_alu(b, src, 4);
83 }
84
85 static nir_alu_dest
86 ptn_get_dest(struct ptn_compile *c, const struct prog_dst_register *prog_dst)
87 {
88 nir_alu_dest dest;
89
90 memset(&dest, 0, sizeof(dest));
91
92 switch (prog_dst->File) {
93 case PROGRAM_TEMPORARY:
94 dest.dest.reg.reg = c->temp_regs[prog_dst->Index];
95 break;
96 case PROGRAM_OUTPUT:
97 dest.dest.reg.reg = c->output_regs[prog_dst->Index];
98 break;
99 case PROGRAM_ADDRESS:
100 assert(prog_dst->Index == 0);
101 dest.dest.reg.reg = c->addr_reg;
102 break;
103 case PROGRAM_UNDEFINED:
104 break;
105 }
106
107 dest.write_mask = prog_dst->WriteMask;
108 dest.saturate = false;
109
110 assert(!prog_dst->RelAddr);
111
112 return dest;
113 }
114
115 /**
116 * Multiply the contents of the ADDR register by 4 to convert from the number
117 * of vec4s to the number of floating point components.
118 */
119 static nir_ssa_def *
120 ptn_addr_reg_value(struct ptn_compile *c)
121 {
122 nir_builder *b = &c->build;
123 nir_alu_src src;
124 memset(&src, 0, sizeof(src));
125 src.src = nir_src_for_reg(c->addr_reg);
126
127 return nir_imul(b, nir_fmov_alu(b, src, 1), nir_imm_int(b, 4));
128 }
129
130 static nir_ssa_def *
131 ptn_get_src(struct ptn_compile *c, const struct prog_src_register *prog_src)
132 {
133 nir_builder *b = &c->build;
134 nir_alu_src src;
135
136 memset(&src, 0, sizeof(src));
137
138 switch (prog_src->File) {
139 case PROGRAM_UNDEFINED:
140 return nir_imm_float(b, 0.0);
141 case PROGRAM_TEMPORARY:
142 assert(!prog_src->RelAddr && prog_src->Index >= 0);
143 src.src.reg.reg = c->temp_regs[prog_src->Index];
144 break;
145 case PROGRAM_INPUT: {
146 /* ARB_vertex_program doesn't allow relative addressing on vertex
147 * attributes; ARB_fragment_program has no relative addressing at all.
148 */
149 assert(!prog_src->RelAddr);
150
151 assert(prog_src->Index >= 0 && prog_src->Index < VARYING_SLOT_MAX);
152
153 nir_intrinsic_instr *load =
154 nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_var);
155 load->num_components = 4;
156 load->variables[0] = nir_deref_var_create(load, c->input_vars[prog_src->Index]);
157
158 nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL);
159 nir_instr_insert_after_cf_list(b->cf_node_list, &load->instr);
160
161 src.src = nir_src_for_ssa(&load->dest.ssa);
162 break;
163 }
164 case PROGRAM_STATE_VAR:
165 case PROGRAM_CONSTANT: {
166 /* We actually want to look at the type in the Parameters list for this,
167 * because it lets us upload constant builtin uniforms as actual
168 * constants.
169 */
170 struct gl_program_parameter_list *plist = c->prog->Parameters;
171 gl_register_file file = prog_src->RelAddr ? prog_src->File :
172 plist->Parameters[prog_src->Index].Type;
173
174 switch (file) {
175 case PROGRAM_CONSTANT:
176 if ((c->prog->IndirectRegisterFiles & (1 << PROGRAM_CONSTANT)) == 0) {
177 float *v = (float *) plist->ParameterValues[prog_src->Index];
178 src.src = nir_src_for_ssa(nir_imm_vec4(b, v[0], v[1], v[2], v[3]));
179 break;
180 }
181 /* FALLTHROUGH */
182 case PROGRAM_STATE_VAR: {
183 nir_intrinsic_op load_op =
184 prog_src->RelAddr ? nir_intrinsic_load_uniform_indirect :
185 nir_intrinsic_load_uniform;
186 nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, load_op);
187 nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL);
188 load->num_components = 4;
189
190 /* Multiply src->Index by 4 to scale from # of vec4s to components. */
191 load->const_index[0] = 4 * prog_src->Index;
192 load->const_index[1] = 1;
193
194 if (prog_src->RelAddr) {
195 nir_ssa_def *reladdr = ptn_addr_reg_value(c);
196 if (prog_src->Index < 0) {
197 /* This is a negative offset which should be added to the address
198 * register's value.
199 */
200 reladdr = nir_iadd(b, reladdr, nir_imm_int(b, load->const_index[0]));
201 load->const_index[0] = 0;
202 }
203 load->src[0] = nir_src_for_ssa(reladdr);
204 }
205
206 nir_instr_insert_after_cf_list(b->cf_node_list, &load->instr);
207
208 src.src = nir_src_for_ssa(&load->dest.ssa);
209 break;
210 }
211 default:
212 fprintf(stderr, "bad uniform src register file: %s (%d)\n",
213 _mesa_register_file_name(file), file);
214 abort();
215 }
216 break;
217 }
218 default:
219 fprintf(stderr, "unknown src register file: %s (%d)\n",
220 _mesa_register_file_name(prog_src->File), prog_src->File);
221 abort();
222 }
223
224 nir_ssa_def *def;
225 if (!HAS_EXTENDED_SWIZZLE(prog_src->Swizzle) &&
226 (prog_src->Negate == NEGATE_NONE || prog_src->Negate == NEGATE_XYZW)) {
227 /* The simple non-SWZ case. */
228 for (int i = 0; i < 4; i++)
229 src.swizzle[i] = GET_SWZ(prog_src->Swizzle, i);
230
231 def = nir_fmov_alu(b, src, 4);
232
233 if (prog_src->Abs)
234 def = nir_fabs(b, def);
235
236 if (prog_src->Negate)
237 def = nir_fneg(b, def);
238 } else {
239 /* The SWZ instruction allows per-component zero/one swizzles, and also
240 * per-component negation.
241 */
242 nir_ssa_def *chans[4];
243 for (int i = 0; i < 4; i++) {
244 int swizzle = GET_SWZ(prog_src->Swizzle, i);
245 if (swizzle == SWIZZLE_ZERO) {
246 chans[i] = nir_imm_float(b, 0.0);
247 } else if (swizzle == SWIZZLE_ONE) {
248 chans[i] = nir_imm_float(b, 1.0);
249 } else {
250 assert(swizzle != SWIZZLE_NIL);
251 nir_alu_instr *mov = nir_alu_instr_create(b->shader, nir_op_fmov);
252 nir_ssa_dest_init(&mov->instr, &mov->dest.dest, 1, NULL);
253 mov->dest.write_mask = 0x1;
254 mov->src[0] = src;
255 mov->src[0].swizzle[0] = swizzle;
256 nir_instr_insert_after_cf_list(b->cf_node_list, &mov->instr);
257
258 chans[i] = &mov->dest.dest.ssa;
259 }
260
261 if (prog_src->Abs)
262 chans[i] = nir_fabs(b, chans[i]);
263
264 if (prog_src->Negate & (1 << i))
265 chans[i] = nir_fneg(b, chans[i]);
266 }
267 def = nir_vec4(b, chans[0], chans[1], chans[2], chans[3]);
268 }
269
270 return def;
271 }
272
273 static void
274 ptn_alu(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
275 {
276 unsigned num_srcs = nir_op_infos[op].num_inputs;
277 nir_alu_instr *instr = nir_alu_instr_create(b->shader, op);
278 unsigned i;
279
280 for (i = 0; i < num_srcs; i++)
281 instr->src[i].src = nir_src_for_ssa(src[i]);
282
283 instr->dest = dest;
284 nir_instr_insert_after_cf_list(b->cf_node_list, &instr->instr);
285 }
286
287 static void
288 ptn_move_dest_masked(nir_builder *b, nir_alu_dest dest,
289 nir_ssa_def *def, unsigned write_mask)
290 {
291 if (!(dest.write_mask & write_mask))
292 return;
293
294 nir_alu_instr *mov = nir_alu_instr_create(b->shader, nir_op_fmov);
295 if (!mov)
296 return;
297
298 mov->dest = dest;
299 mov->dest.write_mask &= write_mask;
300 mov->src[0].src = nir_src_for_ssa(def);
301 for (unsigned i = def->num_components; i < 4; i++)
302 mov->src[0].swizzle[i] = def->num_components - 1;
303 nir_instr_insert_after_cf_list(b->cf_node_list, &mov->instr);
304 }
305
306 static void
307 ptn_move_dest(nir_builder *b, nir_alu_dest dest, nir_ssa_def *def)
308 {
309 ptn_move_dest_masked(b, dest, def, WRITEMASK_XYZW);
310 }
311
312 static void
313 ptn_arl(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
314 {
315 ptn_move_dest(b, dest, nir_f2i(b, nir_ffloor(b, src[0])));
316 }
317
318 /* EXP - Approximate Exponential Base 2
319 * dst.x = 2^{\lfloor src.x\rfloor}
320 * dst.y = src.x - \lfloor src.x\rfloor
321 * dst.z = 2^{src.x}
322 * dst.w = 1.0
323 */
324 static void
325 ptn_exp(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
326 {
327 nir_ssa_def *srcx = ptn_channel(b, src[0], X);
328
329 ptn_move_dest_masked(b, dest, nir_fexp2(b, nir_ffloor(b, srcx)), WRITEMASK_X);
330 ptn_move_dest_masked(b, dest, nir_fsub(b, srcx, nir_ffloor(b, srcx)), WRITEMASK_Y);
331 ptn_move_dest_masked(b, dest, nir_fexp2(b, srcx), WRITEMASK_Z);
332 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
333 }
334
335 /* LOG - Approximate Logarithm Base 2
336 * dst.x = \lfloor\log_2{|src.x|}\rfloor
337 * dst.y = |src.x| * 2^{-\lfloor\log_2{|src.x|}\rfloor}}
338 * dst.z = \log_2{|src.x|}
339 * dst.w = 1.0
340 */
341 static void
342 ptn_log(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
343 {
344 nir_ssa_def *abs_srcx = nir_fabs(b, ptn_channel(b, src[0], X));
345 nir_ssa_def *log2 = nir_flog2(b, abs_srcx);
346 nir_ssa_def *floor_log2 = nir_ffloor(b, log2);
347
348 ptn_move_dest_masked(b, dest, floor_log2, WRITEMASK_X);
349 ptn_move_dest_masked(b, dest,
350 nir_fmul(b, abs_srcx,
351 nir_fexp2(b, nir_fneg(b, floor_log2))),
352 WRITEMASK_Y);
353 ptn_move_dest_masked(b, dest, log2, WRITEMASK_Z);
354 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
355 }
356
357 /* DST - Distance Vector
358 * dst.x = 1.0
359 * dst.y = src0.y \times src1.y
360 * dst.z = src0.z
361 * dst.w = src1.w
362 */
363 static void
364 ptn_dst(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
365 {
366 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_X);
367 ptn_move_dest_masked(b, dest, nir_fmul(b, src[0], src[1]), WRITEMASK_Y);
368 ptn_move_dest_masked(b, dest, nir_fmov(b, src[0]), WRITEMASK_Z);
369 ptn_move_dest_masked(b, dest, nir_fmov(b, src[1]), WRITEMASK_W);
370 }
371
372 /* LIT - Light Coefficients
373 * dst.x = 1.0
374 * dst.y = max(src.x, 0.0)
375 * dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0
376 * dst.w = 1.0
377 */
378 static void
379 ptn_lit(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
380 {
381 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_XW);
382
383 ptn_move_dest_masked(b, dest, nir_fmax(b, ptn_channel(b, src[0], X),
384 nir_imm_float(b, 0.0)), WRITEMASK_Y);
385
386 if (dest.write_mask & WRITEMASK_Z) {
387 nir_ssa_def *src0_y = ptn_channel(b, src[0], Y);
388 nir_ssa_def *wclamp = nir_fmax(b, nir_fmin(b, ptn_channel(b, src[0], W),
389 nir_imm_float(b, 128.0)),
390 nir_imm_float(b, -128.0));
391 nir_ssa_def *pow = nir_fpow(b, nir_fmax(b, src0_y, nir_imm_float(b, 0.0)),
392 wclamp);
393
394 nir_ssa_def *z;
395 if (b->shader->options->native_integers) {
396 z = nir_bcsel(b,
397 nir_fge(b, nir_imm_float(b, 0.0), ptn_channel(b, src[0], X)),
398 nir_imm_float(b, 0.0),
399 pow);
400 } else {
401 z = nir_fcsel(b,
402 nir_sge(b, nir_imm_float(b, 0.0), ptn_channel(b, src[0], X)),
403 nir_imm_float(b, 0.0),
404 pow);
405 }
406
407 ptn_move_dest_masked(b, dest, z, WRITEMASK_Z);
408 }
409 }
410
411 /* SCS - Sine Cosine
412 * dst.x = \cos{src.x}
413 * dst.y = \sin{src.x}
414 * dst.z = 0.0
415 * dst.w = 1.0
416 */
417 static void
418 ptn_scs(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
419 {
420 ptn_move_dest_masked(b, dest, nir_fcos(b, ptn_channel(b, src[0], X)),
421 WRITEMASK_X);
422 ptn_move_dest_masked(b, dest, nir_fsin(b, ptn_channel(b, src[0], X)),
423 WRITEMASK_Y);
424 ptn_move_dest_masked(b, dest, nir_imm_float(b, 0.0), WRITEMASK_Z);
425 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
426 }
427
428 /**
429 * Emit SLT. For platforms with integers, prefer b2f(flt(...)).
430 */
431 static void
432 ptn_slt(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
433 {
434 if (b->shader->options->native_integers) {
435 ptn_move_dest(b, dest, nir_b2f(b, nir_flt(b, src[0], src[1])));
436 } else {
437 ptn_move_dest(b, dest, nir_slt(b, src[0], src[1]));
438 }
439 }
440
441 /**
442 * Emit SGE. For platforms with integers, prefer b2f(fge(...)).
443 */
444 static void
445 ptn_sge(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
446 {
447 if (b->shader->options->native_integers) {
448 ptn_move_dest(b, dest, nir_b2f(b, nir_fge(b, src[0], src[1])));
449 } else {
450 ptn_move_dest(b, dest, nir_sge(b, src[0], src[1]));
451 }
452 }
453
454 static void
455 ptn_sle(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
456 {
457 nir_ssa_def *commuted[] = { src[1], src[0] };
458 ptn_sge(b, dest, commuted);
459 }
460
461 static void
462 ptn_sgt(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
463 {
464 nir_ssa_def *commuted[] = { src[1], src[0] };
465 ptn_slt(b, dest, commuted);
466 }
467
468 /**
469 * Emit SEQ. For platforms with integers, prefer b2f(feq(...)).
470 */
471 static void
472 ptn_seq(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
473 {
474 if (b->shader->options->native_integers) {
475 ptn_move_dest(b, dest, nir_b2f(b, nir_feq(b, src[0], src[1])));
476 } else {
477 ptn_move_dest(b, dest, nir_seq(b, src[0], src[1]));
478 }
479 }
480
481 /**
482 * Emit SNE. For platforms with integers, prefer b2f(fne(...)).
483 */
484 static void
485 ptn_sne(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
486 {
487 if (b->shader->options->native_integers) {
488 ptn_move_dest(b, dest, nir_b2f(b, nir_fne(b, src[0], src[1])));
489 } else {
490 ptn_move_dest(b, dest, nir_sne(b, src[0], src[1]));
491 }
492 }
493
494 static void
495 ptn_xpd(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
496 {
497 ptn_move_dest_masked(b, dest,
498 nir_fsub(b,
499 nir_fmul(b,
500 ptn_swizzle(b, src[0], Y, Z, X, X),
501 ptn_swizzle(b, src[1], Z, X, Y, X)),
502 nir_fmul(b,
503 ptn_swizzle(b, src[1], Y, Z, X, X),
504 ptn_swizzle(b, src[0], Z, X, Y, X))),
505 WRITEMASK_XYZ);
506 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
507 }
508
509 static void
510 ptn_dp2(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
511 {
512 ptn_move_dest(b, dest, nir_fdot2(b, src[0], src[1]));
513 }
514
515 static void
516 ptn_dp3(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
517 {
518 ptn_move_dest(b, dest, nir_fdot3(b, src[0], src[1]));
519 }
520
521 static void
522 ptn_dp4(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
523 {
524 ptn_move_dest(b, dest, nir_fdot4(b, src[0], src[1]));
525 }
526
527 static void
528 ptn_dph(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
529 {
530 nir_ssa_def *dp3 = nir_fdot3(b, src[0], src[1]);
531 ptn_move_dest(b, dest, nir_fadd(b, dp3, ptn_channel(b, src[1], W)));
532 }
533
534 static void
535 ptn_cmp(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
536 {
537 if (b->shader->options->native_integers) {
538 ptn_move_dest(b, dest, nir_bcsel(b,
539 nir_flt(b, src[0], nir_imm_float(b, 0.0)),
540 src[1], src[2]));
541 } else {
542 ptn_move_dest(b, dest, nir_fcsel(b,
543 nir_slt(b, src[0], nir_imm_float(b, 0.0)),
544 src[1], src[2]));
545 }
546 }
547
548 static void
549 ptn_lrp(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
550 {
551 ptn_move_dest(b, dest, nir_flrp(b, src[2], src[1], src[0]));
552 }
553
554 static void
555 ptn_kil(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
556 {
557 nir_ssa_def *cmp = b->shader->options->native_integers ?
558 nir_bany4(b, nir_flt(b, src[0], nir_imm_float(b, 0.0))) :
559 nir_fany4(b, nir_slt(b, src[0], nir_imm_float(b, 0.0)));
560
561 nir_intrinsic_instr *discard =
562 nir_intrinsic_instr_create(b->shader, nir_intrinsic_discard_if);
563 discard->src[0] = nir_src_for_ssa(cmp);
564 nir_instr_insert_after_cf_list(b->cf_node_list, &discard->instr);
565 }
566
567 static void
568 ptn_tex(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src,
569 struct prog_instruction *prog_inst)
570 {
571 nir_tex_instr *instr;
572 nir_texop op;
573 unsigned num_srcs;
574
575 switch (prog_inst->Opcode) {
576 case OPCODE_TEX:
577 op = nir_texop_tex;
578 num_srcs = 1;
579 break;
580 case OPCODE_TXB:
581 op = nir_texop_txb;
582 num_srcs = 2;
583 break;
584 case OPCODE_TXD:
585 op = nir_texop_txd;
586 num_srcs = 3;
587 break;
588 case OPCODE_TXL:
589 op = nir_texop_txl;
590 num_srcs = 2;
591 break;
592 case OPCODE_TXP:
593 op = nir_texop_tex;
594 num_srcs = 2;
595 break;
596 case OPCODE_TXP_NV:
597 assert(!"not handled");
598 op = nir_texop_tex;
599 num_srcs = 2;
600 break;
601 default:
602 fprintf(stderr, "unknown tex op %d\n", prog_inst->Opcode);
603 abort();
604 }
605
606 if (prog_inst->TexShadow)
607 num_srcs++;
608
609 instr = nir_tex_instr_create(b->shader, num_srcs);
610 instr->op = op;
611 instr->dest_type = nir_type_float;
612 instr->is_shadow = prog_inst->TexShadow;
613 instr->sampler_index = prog_inst->TexSrcUnit;
614
615 switch (prog_inst->TexSrcTarget) {
616 case TEXTURE_1D_INDEX:
617 instr->sampler_dim = GLSL_SAMPLER_DIM_1D;
618 break;
619 case TEXTURE_2D_INDEX:
620 instr->sampler_dim = GLSL_SAMPLER_DIM_2D;
621 break;
622 case TEXTURE_3D_INDEX:
623 instr->sampler_dim = GLSL_SAMPLER_DIM_3D;
624 break;
625 case TEXTURE_CUBE_INDEX:
626 instr->sampler_dim = GLSL_SAMPLER_DIM_CUBE;
627 break;
628 case TEXTURE_RECT_INDEX:
629 instr->sampler_dim = GLSL_SAMPLER_DIM_RECT;
630 break;
631 default:
632 fprintf(stderr, "Unknown texture target %d\n", prog_inst->TexSrcTarget);
633 abort();
634 }
635
636 switch (instr->sampler_dim) {
637 case GLSL_SAMPLER_DIM_1D:
638 case GLSL_SAMPLER_DIM_BUF:
639 instr->coord_components = 1;
640 break;
641 case GLSL_SAMPLER_DIM_2D:
642 case GLSL_SAMPLER_DIM_RECT:
643 case GLSL_SAMPLER_DIM_EXTERNAL:
644 case GLSL_SAMPLER_DIM_MS:
645 instr->coord_components = 2;
646 break;
647 case GLSL_SAMPLER_DIM_3D:
648 case GLSL_SAMPLER_DIM_CUBE:
649 instr->coord_components = 3;
650 break;
651 }
652
653 unsigned src_number = 0;
654
655 instr->src[src_number].src =
656 nir_src_for_ssa(ptn_swizzle(b, src[0], X, Y, Z, W));
657 instr->src[src_number].src_type = nir_tex_src_coord;
658 src_number++;
659
660 if (prog_inst->Opcode == OPCODE_TXP) {
661 instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
662 instr->src[src_number].src_type = nir_tex_src_projector;
663 src_number++;
664 }
665
666 if (prog_inst->Opcode == OPCODE_TXB) {
667 instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
668 instr->src[src_number].src_type = nir_tex_src_bias;
669 src_number++;
670 }
671
672 if (prog_inst->Opcode == OPCODE_TXL) {
673 instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
674 instr->src[src_number].src_type = nir_tex_src_lod;
675 src_number++;
676 }
677
678 if (instr->is_shadow) {
679 if (instr->coord_components < 3)
680 instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], Z));
681 else
682 instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
683
684 instr->src[src_number].src_type = nir_tex_src_comparitor;
685 src_number++;
686 }
687
688 assert(src_number == num_srcs);
689
690 nir_ssa_dest_init(&instr->instr, &instr->dest, 4, NULL);
691 nir_instr_insert_after_cf_list(b->cf_node_list, &instr->instr);
692
693 /* Resolve the writemask on the texture op. */
694 ptn_move_dest(b, dest, &instr->dest.ssa);
695 }
696
697 static const nir_op op_trans[MAX_OPCODE] = {
698 [OPCODE_NOP] = 0,
699 [OPCODE_ABS] = nir_op_fabs,
700 [OPCODE_ADD] = nir_op_fadd,
701 [OPCODE_ARL] = 0,
702 [OPCODE_CMP] = 0,
703 [OPCODE_COS] = nir_op_fcos,
704 [OPCODE_DDX] = nir_op_fddx,
705 [OPCODE_DDY] = nir_op_fddy,
706 [OPCODE_DP2] = 0,
707 [OPCODE_DP3] = 0,
708 [OPCODE_DP4] = 0,
709 [OPCODE_DPH] = 0,
710 [OPCODE_DST] = 0,
711 [OPCODE_END] = 0,
712 [OPCODE_EX2] = nir_op_fexp2,
713 [OPCODE_EXP] = 0,
714 [OPCODE_FLR] = nir_op_ffloor,
715 [OPCODE_FRC] = nir_op_ffract,
716 [OPCODE_LG2] = nir_op_flog2,
717 [OPCODE_LIT] = 0,
718 [OPCODE_LOG] = 0,
719 [OPCODE_LRP] = 0,
720 [OPCODE_MAD] = nir_op_ffma,
721 [OPCODE_MAX] = nir_op_fmax,
722 [OPCODE_MIN] = nir_op_fmin,
723 [OPCODE_MOV] = nir_op_fmov,
724 [OPCODE_MUL] = nir_op_fmul,
725 [OPCODE_POW] = nir_op_fpow,
726 [OPCODE_RCP] = nir_op_frcp,
727
728 [OPCODE_RSQ] = nir_op_frsq,
729 [OPCODE_SCS] = 0,
730 [OPCODE_SEQ] = 0,
731 [OPCODE_SGE] = 0,
732 [OPCODE_SGT] = 0,
733 [OPCODE_SIN] = nir_op_fsin,
734 [OPCODE_SLE] = 0,
735 [OPCODE_SLT] = 0,
736 [OPCODE_SNE] = 0,
737 [OPCODE_SSG] = nir_op_fsign,
738 [OPCODE_SUB] = nir_op_fsub,
739 [OPCODE_SWZ] = 0,
740 [OPCODE_TEX] = 0,
741 [OPCODE_TRUNC] = nir_op_ftrunc,
742 [OPCODE_TXB] = 0,
743 [OPCODE_TXD] = 0,
744 [OPCODE_TXL] = 0,
745 [OPCODE_TXP] = 0,
746 [OPCODE_TXP_NV] = 0,
747 [OPCODE_XPD] = 0,
748 };
749
750 static void
751 ptn_emit_instruction(struct ptn_compile *c, struct prog_instruction *prog_inst)
752 {
753 nir_builder *b = &c->build;
754 unsigned i;
755 const unsigned op = prog_inst->Opcode;
756
757 if (op == OPCODE_END)
758 return;
759
760 nir_ssa_def *src[3];
761 for (i = 0; i < 3; i++) {
762 src[i] = ptn_get_src(c, &prog_inst->SrcReg[i]);
763 }
764 nir_alu_dest dest = ptn_get_dest(c, &prog_inst->DstReg);
765 if (c->error)
766 return;
767
768 switch (op) {
769 case OPCODE_RSQ:
770 ptn_move_dest(b, dest, nir_frsq(b, ptn_channel(b, src[0], X)));
771 break;
772
773 case OPCODE_RCP:
774 ptn_move_dest(b, dest, nir_frcp(b, ptn_channel(b, src[0], X)));
775 break;
776
777 case OPCODE_EX2:
778 ptn_move_dest(b, dest, nir_fexp2(b, ptn_channel(b, src[0], X)));
779 break;
780
781 case OPCODE_LG2:
782 ptn_move_dest(b, dest, nir_flog2(b, ptn_channel(b, src[0], X)));
783 break;
784
785 case OPCODE_POW:
786 ptn_move_dest(b, dest, nir_fpow(b,
787 ptn_channel(b, src[0], X),
788 ptn_channel(b, src[1], X)));
789 break;
790
791 case OPCODE_COS:
792 ptn_move_dest(b, dest, nir_fcos(b, ptn_channel(b, src[0], X)));
793 break;
794
795 case OPCODE_SIN:
796 ptn_move_dest(b, dest, nir_fsin(b, ptn_channel(b, src[0], X)));
797 break;
798
799 case OPCODE_ARL:
800 ptn_arl(b, dest, src);
801 break;
802
803 case OPCODE_EXP:
804 ptn_exp(b, dest, src);
805 break;
806
807 case OPCODE_LOG:
808 ptn_log(b, dest, src);
809 break;
810
811 case OPCODE_LRP:
812 ptn_lrp(b, dest, src);
813 break;
814
815 case OPCODE_DST:
816 ptn_dst(b, dest, src);
817 break;
818
819 case OPCODE_LIT:
820 ptn_lit(b, dest, src);
821 break;
822
823 case OPCODE_XPD:
824 ptn_xpd(b, dest, src);
825 break;
826
827 case OPCODE_DP2:
828 ptn_dp2(b, dest, src);
829 break;
830
831 case OPCODE_DP3:
832 ptn_dp3(b, dest, src);
833 break;
834
835 case OPCODE_DP4:
836 ptn_dp4(b, dest, src);
837 break;
838
839 case OPCODE_DPH:
840 ptn_dph(b, dest, src);
841 break;
842
843 case OPCODE_KIL:
844 ptn_kil(b, dest, src);
845 break;
846
847 case OPCODE_CMP:
848 ptn_cmp(b, dest, src);
849 break;
850
851 case OPCODE_SCS:
852 ptn_scs(b, dest, src);
853 break;
854
855 case OPCODE_SLT:
856 ptn_slt(b, dest, src);
857 break;
858
859 case OPCODE_SGT:
860 ptn_sgt(b, dest, src);
861 break;
862
863 case OPCODE_SLE:
864 ptn_sle(b, dest, src);
865 break;
866
867 case OPCODE_SGE:
868 ptn_sge(b, dest, src);
869 break;
870
871 case OPCODE_SEQ:
872 ptn_seq(b, dest, src);
873 break;
874
875 case OPCODE_SNE:
876 ptn_sne(b, dest, src);
877 break;
878
879 case OPCODE_TEX:
880 case OPCODE_TXB:
881 case OPCODE_TXD:
882 case OPCODE_TXL:
883 case OPCODE_TXP:
884 case OPCODE_TXP_NV:
885 ptn_tex(b, dest, src, prog_inst);
886 break;
887
888 case OPCODE_SWZ:
889 /* Extended swizzles were already handled in ptn_get_src(). */
890 ptn_alu(b, nir_op_fmov, dest, src);
891 break;
892
893 case OPCODE_NOP:
894 break;
895
896 default:
897 if (op_trans[op] != 0 || op == OPCODE_MOV) {
898 ptn_alu(b, op_trans[op], dest, src);
899 } else {
900 fprintf(stderr, "unknown opcode: %s\n", _mesa_opcode_string(op));
901 abort();
902 }
903 break;
904 }
905
906 if (prog_inst->SaturateMode) {
907 assert(prog_inst->SaturateMode == SATURATE_ZERO_ONE);
908 assert(!dest.dest.is_ssa);
909 ptn_move_dest(b, dest, nir_fsat(b, ptn_src_for_dest(c, &dest)));
910 }
911 }
912
913 /**
914 * Puts a NIR intrinsic to store of each PROGRAM_OUTPUT value to the output
915 * variables at the end of the shader.
916 *
917 * We don't generate these incrementally as the PROGRAM_OUTPUT values are
918 * written, because there's no output load intrinsic, which means we couldn't
919 * handle writemasks.
920 */
921 static void
922 ptn_add_output_stores(struct ptn_compile *c)
923 {
924 nir_builder *b = &c->build;
925
926 foreach_list_typed(nir_variable, var, node, &b->shader->outputs) {
927 nir_intrinsic_instr *store =
928 nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_var);
929 store->num_components = 4;
930 store->variables[0] =
931 nir_deref_var_create(store, c->output_vars[var->data.location]);
932 store->src[0].reg.reg = c->output_regs[var->data.location];
933 nir_instr_insert_after_cf_list(c->build.cf_node_list, &store->instr);
934 }
935 }
936
937 static void
938 setup_registers_and_variables(struct ptn_compile *c)
939 {
940 nir_builder *b = &c->build;
941 struct nir_shader *shader = b->shader;
942
943 /* Create input variables. */
944 const int num_inputs = _mesa_flsll(c->prog->InputsRead);
945 for (int i = 0; i < num_inputs; i++) {
946 if (!(c->prog->InputsRead & BITFIELD64_BIT(i)))
947 continue;
948 nir_variable *var = rzalloc(shader, nir_variable);
949 var->type = glsl_vec4_type();
950 var->data.read_only = true;
951 var->data.mode = nir_var_shader_in;
952 var->name = ralloc_asprintf(var, "in_%d", i);
953 var->data.location = i;
954 var->data.index = 0;
955
956 if (c->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
957 struct gl_fragment_program *fp =
958 (struct gl_fragment_program *) c->prog;
959
960 var->data.interpolation = fp->InterpQualifier[i];
961
962 if (i == VARYING_SLOT_POS) {
963 var->data.origin_upper_left = fp->OriginUpperLeft;
964 var->data.pixel_center_integer = fp->PixelCenterInteger;
965 } else if (i == VARYING_SLOT_FOGC) {
966 /* fogcoord is defined as <f, 0.0, 0.0, 1.0>. Make the actual
967 * input variable a float, and create a local containing the
968 * full vec4 value.
969 */
970 var->type = glsl_float_type();
971
972 nir_intrinsic_instr *load_x =
973 nir_intrinsic_instr_create(shader, nir_intrinsic_load_var);
974 load_x->num_components = 1;
975 load_x->variables[0] = nir_deref_var_create(load_x, var);
976 nir_ssa_dest_init(&load_x->instr, &load_x->dest, 1, NULL);
977 nir_instr_insert_after_cf_list(b->cf_node_list, &load_x->instr);
978
979 nir_ssa_def *f001 = nir_vec4(b, &load_x->dest.ssa, nir_imm_float(b, 0.0),
980 nir_imm_float(b, 0.0), nir_imm_float(b, 1.0));
981
982 nir_variable *fullvar = rzalloc(shader, nir_variable);
983 fullvar->type = glsl_vec4_type();
984 fullvar->data.mode = nir_var_local;
985 fullvar->name = "fogcoord_tmp";
986 exec_list_push_tail(&b->impl->locals, &fullvar->node);
987
988 nir_intrinsic_instr *store =
989 nir_intrinsic_instr_create(shader, nir_intrinsic_store_var);
990 store->num_components = 4;
991 store->variables[0] = nir_deref_var_create(store, fullvar);
992 store->src[0] = nir_src_for_ssa(f001);
993 nir_instr_insert_after_cf_list(b->cf_node_list, &store->instr);
994
995 /* Insert the real input into the list so the driver has real
996 * inputs, but set c->input_vars[i] to the temporary so we use
997 * the splatted value.
998 */
999 exec_list_push_tail(&shader->inputs, &var->node);
1000 c->input_vars[i] = fullvar;
1001 continue;
1002 }
1003 }
1004
1005 exec_list_push_tail(&shader->inputs, &var->node);
1006 c->input_vars[i] = var;
1007 }
1008
1009 /* Create output registers and variables. */
1010 int max_outputs = _mesa_fls(c->prog->OutputsWritten);
1011 c->output_regs = rzalloc_array(c, nir_register *, max_outputs);
1012
1013 for (int i = 0; i < max_outputs; i++) {
1014 if (!(c->prog->OutputsWritten & BITFIELD64_BIT(i)))
1015 continue;
1016
1017 /* Since we can't load from outputs in the IR, we make temporaries
1018 * for the outputs and emit stores to the real outputs at the end of
1019 * the shader.
1020 */
1021 nir_register *reg = nir_local_reg_create(b->impl);
1022 reg->num_components = 4;
1023
1024 nir_variable *var = rzalloc(shader, nir_variable);
1025 var->type = glsl_vec4_type();
1026 var->data.mode = nir_var_shader_out;
1027 var->name = ralloc_asprintf(var, "out_%d", i);
1028
1029 var->data.location = i;
1030 var->data.index = 0;
1031
1032 c->output_regs[i] = reg;
1033
1034 exec_list_push_tail(&shader->outputs, &var->node);
1035 c->output_vars[i] = var;
1036 }
1037
1038 /* Create temporary registers. */
1039 c->temp_regs = rzalloc_array(c, nir_register *, c->prog->NumTemporaries);
1040
1041 nir_register *reg;
1042 for (int i = 0; i < c->prog->NumTemporaries; i++) {
1043 reg = nir_local_reg_create(b->impl);
1044 if (!reg) {
1045 c->error = true;
1046 return;
1047 }
1048 reg->num_components = 4;
1049 c->temp_regs[i] = reg;
1050 }
1051
1052 /* Create the address register (for ARB_vertex_program). */
1053 reg = nir_local_reg_create(b->impl);
1054 if (!reg) {
1055 c->error = true;
1056 return;
1057 }
1058 reg->num_components = 1;
1059 c->addr_reg = reg;
1060
1061 /* Set the number of uniforms */
1062 shader->num_uniforms = 4 * c->prog->Parameters->NumParameters;
1063 }
1064
1065 struct nir_shader *
1066 prog_to_nir(const struct gl_program *prog, const nir_shader_compiler_options *options)
1067 {
1068 struct ptn_compile *c;
1069 struct nir_shader *s;
1070
1071 c = rzalloc(NULL, struct ptn_compile);
1072 if (!c)
1073 return NULL;
1074 s = nir_shader_create(NULL, options);
1075 if (!s)
1076 goto fail;
1077 c->prog = prog;
1078
1079 nir_function *func = nir_function_create(s, "main");
1080 nir_function_overload *overload = nir_function_overload_create(func);
1081 nir_function_impl *impl = nir_function_impl_create(overload);
1082
1083 c->build.shader = s;
1084 c->build.impl = impl;
1085 c->build.cf_node_list = &impl->body;
1086
1087 setup_registers_and_variables(c);
1088 if (unlikely(c->error))
1089 goto fail;
1090
1091 for (unsigned int i = 0; i < prog->NumInstructions; i++) {
1092 ptn_emit_instruction(c, &prog->Instructions[i]);
1093
1094 if (unlikely(c->error))
1095 break;
1096 }
1097
1098 ptn_add_output_stores(c);
1099
1100 fail:
1101 if (c->error) {
1102 ralloc_free(s);
1103 s = NULL;
1104 }
1105 ralloc_free(c);
1106 return s;
1107 }