glsl: move to compiler/
[mesa.git] / src / mesa / program / prog_to_nir.c
1 /*
2 * Copyright © 2015 Intel Corporation
3 * Copyright © 2014-2015 Broadcom
4 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
23 * IN THE SOFTWARE.
24 */
25
26 #include "compiler/nir/nir.h"
27 #include "compiler/nir/nir_builder.h"
28 #include "compiler/glsl/list.h"
29 #include "main/imports.h"
30 #include "util/ralloc.h"
31
32 #include "prog_to_nir.h"
33 #include "prog_instruction.h"
34 #include "prog_parameter.h"
35 #include "prog_print.h"
36 #include "program.h"
37
38 /**
39 * \file prog_to_nir.c
40 *
41 * A translator from Mesa IR (prog_instruction.h) to NIR. This is primarily
42 * intended to support ARB_vertex_program, ARB_fragment_program, and fixed-function
43 * vertex processing. Full GLSL support should use glsl_to_nir instead.
44 */
45
46 struct ptn_compile {
47 const struct gl_program *prog;
48 nir_builder build;
49 bool error;
50
51 nir_variable *parameters;
52 nir_variable *input_vars[VARYING_SLOT_MAX];
53 nir_variable *output_vars[VARYING_SLOT_MAX];
54 nir_register **output_regs;
55 nir_register **temp_regs;
56
57 nir_register *addr_reg;
58 };
59
60 #define SWIZ(X, Y, Z, W) \
61 (unsigned[4]){ SWIZZLE_##X, SWIZZLE_##Y, SWIZZLE_##Z, SWIZZLE_##W }
62 #define ptn_swizzle(b, src, x, y, z, w) nir_swizzle(b, src, SWIZ(x, y, z, w), 4, true)
63 #define ptn_channel(b, src, ch) nir_swizzle(b, src, SWIZ(ch, ch, ch, ch), 1, true)
64
65 static nir_ssa_def *
66 ptn_src_for_dest(struct ptn_compile *c, nir_alu_dest *dest)
67 {
68 nir_builder *b = &c->build;
69
70 nir_alu_src src;
71 memset(&src, 0, sizeof(src));
72
73 if (dest->dest.is_ssa)
74 src.src = nir_src_for_ssa(&dest->dest.ssa);
75 else {
76 assert(!dest->dest.reg.indirect);
77 src.src = nir_src_for_reg(dest->dest.reg.reg);
78 src.src.reg.base_offset = dest->dest.reg.base_offset;
79 }
80
81 for (int i = 0; i < 4; i++)
82 src.swizzle[i] = i;
83
84 return nir_fmov_alu(b, src, 4);
85 }
86
87 static nir_alu_dest
88 ptn_get_dest(struct ptn_compile *c, const struct prog_dst_register *prog_dst)
89 {
90 nir_alu_dest dest;
91
92 memset(&dest, 0, sizeof(dest));
93
94 switch (prog_dst->File) {
95 case PROGRAM_TEMPORARY:
96 dest.dest.reg.reg = c->temp_regs[prog_dst->Index];
97 break;
98 case PROGRAM_OUTPUT:
99 dest.dest.reg.reg = c->output_regs[prog_dst->Index];
100 break;
101 case PROGRAM_ADDRESS:
102 assert(prog_dst->Index == 0);
103 dest.dest.reg.reg = c->addr_reg;
104 break;
105 case PROGRAM_UNDEFINED:
106 break;
107 }
108
109 dest.write_mask = prog_dst->WriteMask;
110 dest.saturate = false;
111
112 assert(!prog_dst->RelAddr);
113
114 return dest;
115 }
116
117 static nir_ssa_def *
118 ptn_get_src(struct ptn_compile *c, const struct prog_src_register *prog_src)
119 {
120 nir_builder *b = &c->build;
121 nir_alu_src src;
122
123 memset(&src, 0, sizeof(src));
124
125 switch (prog_src->File) {
126 case PROGRAM_UNDEFINED:
127 return nir_imm_float(b, 0.0);
128 case PROGRAM_TEMPORARY:
129 assert(!prog_src->RelAddr && prog_src->Index >= 0);
130 src.src.reg.reg = c->temp_regs[prog_src->Index];
131 break;
132 case PROGRAM_INPUT: {
133 /* ARB_vertex_program doesn't allow relative addressing on vertex
134 * attributes; ARB_fragment_program has no relative addressing at all.
135 */
136 assert(!prog_src->RelAddr);
137
138 assert(prog_src->Index >= 0 && prog_src->Index < VARYING_SLOT_MAX);
139
140 nir_intrinsic_instr *load =
141 nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_var);
142 load->num_components = 4;
143 load->variables[0] = nir_deref_var_create(load, c->input_vars[prog_src->Index]);
144
145 nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL);
146 nir_builder_instr_insert(b, &load->instr);
147
148 src.src = nir_src_for_ssa(&load->dest.ssa);
149 break;
150 }
151 case PROGRAM_STATE_VAR:
152 case PROGRAM_CONSTANT: {
153 /* We actually want to look at the type in the Parameters list for this,
154 * because it lets us upload constant builtin uniforms as actual
155 * constants.
156 */
157 struct gl_program_parameter_list *plist = c->prog->Parameters;
158 gl_register_file file = prog_src->RelAddr ? prog_src->File :
159 plist->Parameters[prog_src->Index].Type;
160
161 switch (file) {
162 case PROGRAM_CONSTANT:
163 if ((c->prog->IndirectRegisterFiles & (1 << PROGRAM_CONSTANT)) == 0) {
164 float *v = (float *) plist->ParameterValues[prog_src->Index];
165 src.src = nir_src_for_ssa(nir_imm_vec4(b, v[0], v[1], v[2], v[3]));
166 break;
167 }
168 /* FALLTHROUGH */
169 case PROGRAM_STATE_VAR: {
170 assert(c->parameters != NULL);
171
172 nir_intrinsic_instr *load =
173 nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_var);
174 nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL);
175 load->num_components = 4;
176
177 load->variables[0] = nir_deref_var_create(load, c->parameters);
178 nir_deref_array *deref_arr =
179 nir_deref_array_create(load->variables[0]);
180 deref_arr->deref.type = glsl_vec4_type();
181 load->variables[0]->deref.child = &deref_arr->deref;
182
183 if (prog_src->RelAddr) {
184 deref_arr->deref_array_type = nir_deref_array_type_indirect;
185
186 nir_alu_src addr_src = { NIR_SRC_INIT };
187 addr_src.src = nir_src_for_reg(c->addr_reg);
188 nir_ssa_def *reladdr = nir_imov_alu(b, addr_src, 1);
189
190 if (prog_src->Index < 0) {
191 /* This is a negative offset which should be added to the address
192 * register's value.
193 */
194 reladdr = nir_iadd(b, reladdr, nir_imm_int(b, prog_src->Index));
195
196 deref_arr->base_offset = 0;
197 } else {
198 deref_arr->base_offset = prog_src->Index;
199 }
200 deref_arr->indirect = nir_src_for_ssa(reladdr);
201 } else {
202 deref_arr->deref_array_type = nir_deref_array_type_direct;
203 deref_arr->base_offset = prog_src->Index;
204 }
205
206 nir_builder_instr_insert(b, &load->instr);
207
208 src.src = nir_src_for_ssa(&load->dest.ssa);
209 break;
210 }
211 default:
212 fprintf(stderr, "bad uniform src register file: %s (%d)\n",
213 _mesa_register_file_name(file), file);
214 abort();
215 }
216 break;
217 }
218 default:
219 fprintf(stderr, "unknown src register file: %s (%d)\n",
220 _mesa_register_file_name(prog_src->File), prog_src->File);
221 abort();
222 }
223
224 nir_ssa_def *def;
225 if (!HAS_EXTENDED_SWIZZLE(prog_src->Swizzle) &&
226 (prog_src->Negate == NEGATE_NONE || prog_src->Negate == NEGATE_XYZW)) {
227 /* The simple non-SWZ case. */
228 for (int i = 0; i < 4; i++)
229 src.swizzle[i] = GET_SWZ(prog_src->Swizzle, i);
230
231 def = nir_fmov_alu(b, src, 4);
232
233 if (prog_src->Abs)
234 def = nir_fabs(b, def);
235
236 if (prog_src->Negate)
237 def = nir_fneg(b, def);
238 } else {
239 /* The SWZ instruction allows per-component zero/one swizzles, and also
240 * per-component negation.
241 */
242 nir_ssa_def *chans[4];
243 for (int i = 0; i < 4; i++) {
244 int swizzle = GET_SWZ(prog_src->Swizzle, i);
245 if (swizzle == SWIZZLE_ZERO) {
246 chans[i] = nir_imm_float(b, 0.0);
247 } else if (swizzle == SWIZZLE_ONE) {
248 chans[i] = nir_imm_float(b, 1.0);
249 } else {
250 assert(swizzle != SWIZZLE_NIL);
251 nir_alu_instr *mov = nir_alu_instr_create(b->shader, nir_op_fmov);
252 nir_ssa_dest_init(&mov->instr, &mov->dest.dest, 1, NULL);
253 mov->dest.write_mask = 0x1;
254 mov->src[0] = src;
255 mov->src[0].swizzle[0] = swizzle;
256 nir_builder_instr_insert(b, &mov->instr);
257
258 chans[i] = &mov->dest.dest.ssa;
259 }
260
261 if (prog_src->Abs)
262 chans[i] = nir_fabs(b, chans[i]);
263
264 if (prog_src->Negate & (1 << i))
265 chans[i] = nir_fneg(b, chans[i]);
266 }
267 def = nir_vec4(b, chans[0], chans[1], chans[2], chans[3]);
268 }
269
270 return def;
271 }
272
273 static void
274 ptn_alu(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
275 {
276 unsigned num_srcs = nir_op_infos[op].num_inputs;
277 nir_alu_instr *instr = nir_alu_instr_create(b->shader, op);
278 unsigned i;
279
280 for (i = 0; i < num_srcs; i++)
281 instr->src[i].src = nir_src_for_ssa(src[i]);
282
283 instr->dest = dest;
284 nir_builder_instr_insert(b, &instr->instr);
285 }
286
287 static void
288 ptn_move_dest_masked(nir_builder *b, nir_alu_dest dest,
289 nir_ssa_def *def, unsigned write_mask)
290 {
291 if (!(dest.write_mask & write_mask))
292 return;
293
294 nir_alu_instr *mov = nir_alu_instr_create(b->shader, nir_op_fmov);
295 if (!mov)
296 return;
297
298 mov->dest = dest;
299 mov->dest.write_mask &= write_mask;
300 mov->src[0].src = nir_src_for_ssa(def);
301 for (unsigned i = def->num_components; i < 4; i++)
302 mov->src[0].swizzle[i] = def->num_components - 1;
303 nir_builder_instr_insert(b, &mov->instr);
304 }
305
306 static void
307 ptn_move_dest(nir_builder *b, nir_alu_dest dest, nir_ssa_def *def)
308 {
309 ptn_move_dest_masked(b, dest, def, WRITEMASK_XYZW);
310 }
311
312 static void
313 ptn_arl(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
314 {
315 ptn_move_dest(b, dest, nir_f2i(b, nir_ffloor(b, src[0])));
316 }
317
318 /* EXP - Approximate Exponential Base 2
319 * dst.x = 2^{\lfloor src.x\rfloor}
320 * dst.y = src.x - \lfloor src.x\rfloor
321 * dst.z = 2^{src.x}
322 * dst.w = 1.0
323 */
324 static void
325 ptn_exp(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
326 {
327 nir_ssa_def *srcx = ptn_channel(b, src[0], X);
328
329 ptn_move_dest_masked(b, dest, nir_fexp2(b, nir_ffloor(b, srcx)), WRITEMASK_X);
330 ptn_move_dest_masked(b, dest, nir_fsub(b, srcx, nir_ffloor(b, srcx)), WRITEMASK_Y);
331 ptn_move_dest_masked(b, dest, nir_fexp2(b, srcx), WRITEMASK_Z);
332 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
333 }
334
335 /* LOG - Approximate Logarithm Base 2
336 * dst.x = \lfloor\log_2{|src.x|}\rfloor
337 * dst.y = |src.x| * 2^{-\lfloor\log_2{|src.x|}\rfloor}}
338 * dst.z = \log_2{|src.x|}
339 * dst.w = 1.0
340 */
341 static void
342 ptn_log(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
343 {
344 nir_ssa_def *abs_srcx = nir_fabs(b, ptn_channel(b, src[0], X));
345 nir_ssa_def *log2 = nir_flog2(b, abs_srcx);
346 nir_ssa_def *floor_log2 = nir_ffloor(b, log2);
347
348 ptn_move_dest_masked(b, dest, floor_log2, WRITEMASK_X);
349 ptn_move_dest_masked(b, dest,
350 nir_fmul(b, abs_srcx,
351 nir_fexp2(b, nir_fneg(b, floor_log2))),
352 WRITEMASK_Y);
353 ptn_move_dest_masked(b, dest, log2, WRITEMASK_Z);
354 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
355 }
356
357 /* DST - Distance Vector
358 * dst.x = 1.0
359 * dst.y = src0.y \times src1.y
360 * dst.z = src0.z
361 * dst.w = src1.w
362 */
363 static void
364 ptn_dst(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
365 {
366 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_X);
367 ptn_move_dest_masked(b, dest, nir_fmul(b, src[0], src[1]), WRITEMASK_Y);
368 ptn_move_dest_masked(b, dest, nir_fmov(b, src[0]), WRITEMASK_Z);
369 ptn_move_dest_masked(b, dest, nir_fmov(b, src[1]), WRITEMASK_W);
370 }
371
372 /* LIT - Light Coefficients
373 * dst.x = 1.0
374 * dst.y = max(src.x, 0.0)
375 * dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0
376 * dst.w = 1.0
377 */
378 static void
379 ptn_lit(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
380 {
381 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_XW);
382
383 ptn_move_dest_masked(b, dest, nir_fmax(b, ptn_channel(b, src[0], X),
384 nir_imm_float(b, 0.0)), WRITEMASK_Y);
385
386 if (dest.write_mask & WRITEMASK_Z) {
387 nir_ssa_def *src0_y = ptn_channel(b, src[0], Y);
388 nir_ssa_def *wclamp = nir_fmax(b, nir_fmin(b, ptn_channel(b, src[0], W),
389 nir_imm_float(b, 128.0)),
390 nir_imm_float(b, -128.0));
391 nir_ssa_def *pow = nir_fpow(b, nir_fmax(b, src0_y, nir_imm_float(b, 0.0)),
392 wclamp);
393
394 nir_ssa_def *z;
395 if (b->shader->options->native_integers) {
396 z = nir_bcsel(b,
397 nir_fge(b, nir_imm_float(b, 0.0), ptn_channel(b, src[0], X)),
398 nir_imm_float(b, 0.0),
399 pow);
400 } else {
401 z = nir_fcsel(b,
402 nir_sge(b, nir_imm_float(b, 0.0), ptn_channel(b, src[0], X)),
403 nir_imm_float(b, 0.0),
404 pow);
405 }
406
407 ptn_move_dest_masked(b, dest, z, WRITEMASK_Z);
408 }
409 }
410
411 /* SCS - Sine Cosine
412 * dst.x = \cos{src.x}
413 * dst.y = \sin{src.x}
414 * dst.z = 0.0
415 * dst.w = 1.0
416 */
417 static void
418 ptn_scs(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
419 {
420 ptn_move_dest_masked(b, dest, nir_fcos(b, ptn_channel(b, src[0], X)),
421 WRITEMASK_X);
422 ptn_move_dest_masked(b, dest, nir_fsin(b, ptn_channel(b, src[0], X)),
423 WRITEMASK_Y);
424 ptn_move_dest_masked(b, dest, nir_imm_float(b, 0.0), WRITEMASK_Z);
425 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
426 }
427
428 /**
429 * Emit SLT. For platforms with integers, prefer b2f(flt(...)).
430 */
431 static void
432 ptn_slt(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
433 {
434 if (b->shader->options->native_integers) {
435 ptn_move_dest(b, dest, nir_b2f(b, nir_flt(b, src[0], src[1])));
436 } else {
437 ptn_move_dest(b, dest, nir_slt(b, src[0], src[1]));
438 }
439 }
440
441 /**
442 * Emit SGE. For platforms with integers, prefer b2f(fge(...)).
443 */
444 static void
445 ptn_sge(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
446 {
447 if (b->shader->options->native_integers) {
448 ptn_move_dest(b, dest, nir_b2f(b, nir_fge(b, src[0], src[1])));
449 } else {
450 ptn_move_dest(b, dest, nir_sge(b, src[0], src[1]));
451 }
452 }
453
454 static void
455 ptn_sle(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
456 {
457 nir_ssa_def *commuted[] = { src[1], src[0] };
458 ptn_sge(b, dest, commuted);
459 }
460
461 static void
462 ptn_sgt(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
463 {
464 nir_ssa_def *commuted[] = { src[1], src[0] };
465 ptn_slt(b, dest, commuted);
466 }
467
468 /**
469 * Emit SEQ. For platforms with integers, prefer b2f(feq(...)).
470 */
471 static void
472 ptn_seq(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
473 {
474 if (b->shader->options->native_integers) {
475 ptn_move_dest(b, dest, nir_b2f(b, nir_feq(b, src[0], src[1])));
476 } else {
477 ptn_move_dest(b, dest, nir_seq(b, src[0], src[1]));
478 }
479 }
480
481 /**
482 * Emit SNE. For platforms with integers, prefer b2f(fne(...)).
483 */
484 static void
485 ptn_sne(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
486 {
487 if (b->shader->options->native_integers) {
488 ptn_move_dest(b, dest, nir_b2f(b, nir_fne(b, src[0], src[1])));
489 } else {
490 ptn_move_dest(b, dest, nir_sne(b, src[0], src[1]));
491 }
492 }
493
494 static void
495 ptn_xpd(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
496 {
497 ptn_move_dest_masked(b, dest,
498 nir_fsub(b,
499 nir_fmul(b,
500 ptn_swizzle(b, src[0], Y, Z, X, X),
501 ptn_swizzle(b, src[1], Z, X, Y, X)),
502 nir_fmul(b,
503 ptn_swizzle(b, src[1], Y, Z, X, X),
504 ptn_swizzle(b, src[0], Z, X, Y, X))),
505 WRITEMASK_XYZ);
506 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
507 }
508
509 static void
510 ptn_dp2(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
511 {
512 ptn_move_dest(b, dest, nir_fdot2(b, src[0], src[1]));
513 }
514
515 static void
516 ptn_dp3(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
517 {
518 ptn_move_dest(b, dest, nir_fdot3(b, src[0], src[1]));
519 }
520
521 static void
522 ptn_dp4(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
523 {
524 ptn_move_dest(b, dest, nir_fdot4(b, src[0], src[1]));
525 }
526
527 static void
528 ptn_dph(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
529 {
530 ptn_move_dest(b, dest, nir_fdph(b, src[0], src[1]));
531 }
532
533 static void
534 ptn_cmp(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
535 {
536 if (b->shader->options->native_integers) {
537 ptn_move_dest(b, dest, nir_bcsel(b,
538 nir_flt(b, src[0], nir_imm_float(b, 0.0)),
539 src[1], src[2]));
540 } else {
541 ptn_move_dest(b, dest, nir_fcsel(b,
542 nir_slt(b, src[0], nir_imm_float(b, 0.0)),
543 src[1], src[2]));
544 }
545 }
546
547 static void
548 ptn_lrp(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
549 {
550 ptn_move_dest(b, dest, nir_flrp(b, src[2], src[1], src[0]));
551 }
552
553 static void
554 ptn_kil(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
555 {
556 nir_ssa_def *cmp = b->shader->options->native_integers ?
557 nir_bany_inequal4(b, nir_flt(b, src[0], nir_imm_float(b, 0.0)), nir_imm_int(b, 0)) :
558 nir_fany_nequal4(b, nir_slt(b, src[0], nir_imm_float(b, 0.0)), nir_imm_float(b, 0.0));
559
560 nir_intrinsic_instr *discard =
561 nir_intrinsic_instr_create(b->shader, nir_intrinsic_discard_if);
562 discard->src[0] = nir_src_for_ssa(cmp);
563 nir_builder_instr_insert(b, &discard->instr);
564 }
565
566 static void
567 ptn_tex(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src,
568 struct prog_instruction *prog_inst)
569 {
570 nir_tex_instr *instr;
571 nir_texop op;
572 unsigned num_srcs;
573
574 switch (prog_inst->Opcode) {
575 case OPCODE_TEX:
576 op = nir_texop_tex;
577 num_srcs = 1;
578 break;
579 case OPCODE_TXB:
580 op = nir_texop_txb;
581 num_srcs = 2;
582 break;
583 case OPCODE_TXD:
584 op = nir_texop_txd;
585 num_srcs = 3;
586 break;
587 case OPCODE_TXL:
588 op = nir_texop_txl;
589 num_srcs = 2;
590 break;
591 case OPCODE_TXP:
592 op = nir_texop_tex;
593 num_srcs = 2;
594 break;
595 case OPCODE_TXP_NV:
596 assert(!"not handled");
597 op = nir_texop_tex;
598 num_srcs = 2;
599 break;
600 default:
601 fprintf(stderr, "unknown tex op %d\n", prog_inst->Opcode);
602 abort();
603 }
604
605 if (prog_inst->TexShadow)
606 num_srcs++;
607
608 instr = nir_tex_instr_create(b->shader, num_srcs);
609 instr->op = op;
610 instr->dest_type = nir_type_float;
611 instr->is_shadow = prog_inst->TexShadow;
612 instr->sampler_index = prog_inst->TexSrcUnit;
613
614 switch (prog_inst->TexSrcTarget) {
615 case TEXTURE_1D_INDEX:
616 instr->sampler_dim = GLSL_SAMPLER_DIM_1D;
617 break;
618 case TEXTURE_2D_INDEX:
619 instr->sampler_dim = GLSL_SAMPLER_DIM_2D;
620 break;
621 case TEXTURE_3D_INDEX:
622 instr->sampler_dim = GLSL_SAMPLER_DIM_3D;
623 break;
624 case TEXTURE_CUBE_INDEX:
625 instr->sampler_dim = GLSL_SAMPLER_DIM_CUBE;
626 break;
627 case TEXTURE_RECT_INDEX:
628 instr->sampler_dim = GLSL_SAMPLER_DIM_RECT;
629 break;
630 default:
631 fprintf(stderr, "Unknown texture target %d\n", prog_inst->TexSrcTarget);
632 abort();
633 }
634
635 switch (instr->sampler_dim) {
636 case GLSL_SAMPLER_DIM_1D:
637 case GLSL_SAMPLER_DIM_BUF:
638 instr->coord_components = 1;
639 break;
640 case GLSL_SAMPLER_DIM_2D:
641 case GLSL_SAMPLER_DIM_RECT:
642 case GLSL_SAMPLER_DIM_EXTERNAL:
643 case GLSL_SAMPLER_DIM_MS:
644 instr->coord_components = 2;
645 break;
646 case GLSL_SAMPLER_DIM_3D:
647 case GLSL_SAMPLER_DIM_CUBE:
648 instr->coord_components = 3;
649 break;
650 }
651
652 unsigned src_number = 0;
653
654 instr->src[src_number].src =
655 nir_src_for_ssa(ptn_swizzle(b, src[0], X, Y, Z, W));
656 instr->src[src_number].src_type = nir_tex_src_coord;
657 src_number++;
658
659 if (prog_inst->Opcode == OPCODE_TXP) {
660 instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
661 instr->src[src_number].src_type = nir_tex_src_projector;
662 src_number++;
663 }
664
665 if (prog_inst->Opcode == OPCODE_TXB) {
666 instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
667 instr->src[src_number].src_type = nir_tex_src_bias;
668 src_number++;
669 }
670
671 if (prog_inst->Opcode == OPCODE_TXL) {
672 instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
673 instr->src[src_number].src_type = nir_tex_src_lod;
674 src_number++;
675 }
676
677 if (instr->is_shadow) {
678 if (instr->coord_components < 3)
679 instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], Z));
680 else
681 instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
682
683 instr->src[src_number].src_type = nir_tex_src_comparitor;
684 src_number++;
685 }
686
687 assert(src_number == num_srcs);
688
689 nir_ssa_dest_init(&instr->instr, &instr->dest, 4, NULL);
690 nir_builder_instr_insert(b, &instr->instr);
691
692 /* Resolve the writemask on the texture op. */
693 ptn_move_dest(b, dest, &instr->dest.ssa);
694 }
695
696 static const nir_op op_trans[MAX_OPCODE] = {
697 [OPCODE_NOP] = 0,
698 [OPCODE_ABS] = nir_op_fabs,
699 [OPCODE_ADD] = nir_op_fadd,
700 [OPCODE_ARL] = 0,
701 [OPCODE_CMP] = 0,
702 [OPCODE_COS] = 0,
703 [OPCODE_DDX] = nir_op_fddx,
704 [OPCODE_DDY] = nir_op_fddy,
705 [OPCODE_DP2] = 0,
706 [OPCODE_DP3] = 0,
707 [OPCODE_DP4] = 0,
708 [OPCODE_DPH] = 0,
709 [OPCODE_DST] = 0,
710 [OPCODE_END] = 0,
711 [OPCODE_EX2] = 0,
712 [OPCODE_EXP] = 0,
713 [OPCODE_FLR] = nir_op_ffloor,
714 [OPCODE_FRC] = nir_op_ffract,
715 [OPCODE_LG2] = 0,
716 [OPCODE_LIT] = 0,
717 [OPCODE_LOG] = 0,
718 [OPCODE_LRP] = 0,
719 [OPCODE_MAD] = nir_op_ffma,
720 [OPCODE_MAX] = nir_op_fmax,
721 [OPCODE_MIN] = nir_op_fmin,
722 [OPCODE_MOV] = nir_op_fmov,
723 [OPCODE_MUL] = nir_op_fmul,
724 [OPCODE_POW] = 0,
725 [OPCODE_RCP] = 0,
726
727 [OPCODE_RSQ] = 0,
728 [OPCODE_SCS] = 0,
729 [OPCODE_SEQ] = 0,
730 [OPCODE_SGE] = 0,
731 [OPCODE_SGT] = 0,
732 [OPCODE_SIN] = 0,
733 [OPCODE_SLE] = 0,
734 [OPCODE_SLT] = 0,
735 [OPCODE_SNE] = 0,
736 [OPCODE_SSG] = nir_op_fsign,
737 [OPCODE_SUB] = nir_op_fsub,
738 [OPCODE_SWZ] = 0,
739 [OPCODE_TEX] = 0,
740 [OPCODE_TRUNC] = nir_op_ftrunc,
741 [OPCODE_TXB] = 0,
742 [OPCODE_TXD] = 0,
743 [OPCODE_TXL] = 0,
744 [OPCODE_TXP] = 0,
745 [OPCODE_TXP_NV] = 0,
746 [OPCODE_XPD] = 0,
747 };
748
749 static void
750 ptn_emit_instruction(struct ptn_compile *c, struct prog_instruction *prog_inst)
751 {
752 nir_builder *b = &c->build;
753 unsigned i;
754 const unsigned op = prog_inst->Opcode;
755
756 if (op == OPCODE_END)
757 return;
758
759 nir_ssa_def *src[3];
760 for (i = 0; i < 3; i++) {
761 src[i] = ptn_get_src(c, &prog_inst->SrcReg[i]);
762 }
763 nir_alu_dest dest = ptn_get_dest(c, &prog_inst->DstReg);
764 if (c->error)
765 return;
766
767 switch (op) {
768 case OPCODE_RSQ:
769 ptn_move_dest(b, dest,
770 nir_frsq(b, nir_fabs(b, ptn_channel(b, src[0], X))));
771 break;
772
773 case OPCODE_RCP:
774 ptn_move_dest(b, dest, nir_frcp(b, ptn_channel(b, src[0], X)));
775 break;
776
777 case OPCODE_EX2:
778 ptn_move_dest(b, dest, nir_fexp2(b, ptn_channel(b, src[0], X)));
779 break;
780
781 case OPCODE_LG2:
782 ptn_move_dest(b, dest, nir_flog2(b, ptn_channel(b, src[0], X)));
783 break;
784
785 case OPCODE_POW:
786 ptn_move_dest(b, dest, nir_fpow(b,
787 ptn_channel(b, src[0], X),
788 ptn_channel(b, src[1], X)));
789 break;
790
791 case OPCODE_COS:
792 ptn_move_dest(b, dest, nir_fcos(b, ptn_channel(b, src[0], X)));
793 break;
794
795 case OPCODE_SIN:
796 ptn_move_dest(b, dest, nir_fsin(b, ptn_channel(b, src[0], X)));
797 break;
798
799 case OPCODE_ARL:
800 ptn_arl(b, dest, src);
801 break;
802
803 case OPCODE_EXP:
804 ptn_exp(b, dest, src);
805 break;
806
807 case OPCODE_LOG:
808 ptn_log(b, dest, src);
809 break;
810
811 case OPCODE_LRP:
812 ptn_lrp(b, dest, src);
813 break;
814
815 case OPCODE_DST:
816 ptn_dst(b, dest, src);
817 break;
818
819 case OPCODE_LIT:
820 ptn_lit(b, dest, src);
821 break;
822
823 case OPCODE_XPD:
824 ptn_xpd(b, dest, src);
825 break;
826
827 case OPCODE_DP2:
828 ptn_dp2(b, dest, src);
829 break;
830
831 case OPCODE_DP3:
832 ptn_dp3(b, dest, src);
833 break;
834
835 case OPCODE_DP4:
836 ptn_dp4(b, dest, src);
837 break;
838
839 case OPCODE_DPH:
840 ptn_dph(b, dest, src);
841 break;
842
843 case OPCODE_KIL:
844 ptn_kil(b, dest, src);
845 break;
846
847 case OPCODE_CMP:
848 ptn_cmp(b, dest, src);
849 break;
850
851 case OPCODE_SCS:
852 ptn_scs(b, dest, src);
853 break;
854
855 case OPCODE_SLT:
856 ptn_slt(b, dest, src);
857 break;
858
859 case OPCODE_SGT:
860 ptn_sgt(b, dest, src);
861 break;
862
863 case OPCODE_SLE:
864 ptn_sle(b, dest, src);
865 break;
866
867 case OPCODE_SGE:
868 ptn_sge(b, dest, src);
869 break;
870
871 case OPCODE_SEQ:
872 ptn_seq(b, dest, src);
873 break;
874
875 case OPCODE_SNE:
876 ptn_sne(b, dest, src);
877 break;
878
879 case OPCODE_TEX:
880 case OPCODE_TXB:
881 case OPCODE_TXD:
882 case OPCODE_TXL:
883 case OPCODE_TXP:
884 case OPCODE_TXP_NV:
885 ptn_tex(b, dest, src, prog_inst);
886 break;
887
888 case OPCODE_SWZ:
889 /* Extended swizzles were already handled in ptn_get_src(). */
890 ptn_alu(b, nir_op_fmov, dest, src);
891 break;
892
893 case OPCODE_NOP:
894 break;
895
896 default:
897 if (op_trans[op] != 0) {
898 ptn_alu(b, op_trans[op], dest, src);
899 } else {
900 fprintf(stderr, "unknown opcode: %s\n", _mesa_opcode_string(op));
901 abort();
902 }
903 break;
904 }
905
906 if (prog_inst->Saturate) {
907 assert(prog_inst->Saturate);
908 assert(!dest.dest.is_ssa);
909 ptn_move_dest(b, dest, nir_fsat(b, ptn_src_for_dest(c, &dest)));
910 }
911 }
912
913 /**
914 * Puts a NIR intrinsic to store of each PROGRAM_OUTPUT value to the output
915 * variables at the end of the shader.
916 *
917 * We don't generate these incrementally as the PROGRAM_OUTPUT values are
918 * written, because there's no output load intrinsic, which means we couldn't
919 * handle writemasks.
920 */
921 static void
922 ptn_add_output_stores(struct ptn_compile *c)
923 {
924 nir_builder *b = &c->build;
925
926 nir_foreach_variable(var, &b->shader->outputs) {
927 nir_intrinsic_instr *store =
928 nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_var);
929 store->num_components = glsl_get_vector_elements(var->type);
930 store->const_index[0] = (1 << store->num_components) - 1;
931 store->variables[0] =
932 nir_deref_var_create(store, c->output_vars[var->data.location]);
933
934 if (c->prog->Target == GL_FRAGMENT_PROGRAM_ARB &&
935 var->data.location == FRAG_RESULT_DEPTH) {
936 /* result.depth has this strange convention of being the .z component of
937 * a vec4 with undefined .xyw components. We resolve it to a scalar, to
938 * match GLSL's gl_FragDepth and the expectations of most backends.
939 */
940 nir_alu_src alu_src = { NIR_SRC_INIT };
941 alu_src.src = nir_src_for_reg(c->output_regs[FRAG_RESULT_DEPTH]);
942 alu_src.swizzle[0] = SWIZZLE_Z;
943 store->src[0] = nir_src_for_ssa(nir_fmov_alu(b, alu_src, 1));
944 } else {
945 store->src[0].reg.reg = c->output_regs[var->data.location];
946 }
947 nir_builder_instr_insert(b, &store->instr);
948 }
949 }
950
951 static void
952 setup_registers_and_variables(struct ptn_compile *c)
953 {
954 nir_builder *b = &c->build;
955 struct nir_shader *shader = b->shader;
956
957 /* Create input variables. */
958 const int num_inputs = _mesa_flsll(c->prog->InputsRead);
959 for (int i = 0; i < num_inputs; i++) {
960 if (!(c->prog->InputsRead & BITFIELD64_BIT(i)))
961 continue;
962
963 nir_variable *var =
964 nir_variable_create(shader, nir_var_shader_in, glsl_vec4_type(),
965 ralloc_asprintf(shader, "in_%d", i));
966 var->data.location = i;
967 var->data.index = 0;
968
969 if (c->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
970 struct gl_fragment_program *fp =
971 (struct gl_fragment_program *) c->prog;
972
973 var->data.interpolation = fp->InterpQualifier[i];
974
975 if (i == VARYING_SLOT_POS) {
976 var->data.origin_upper_left = fp->OriginUpperLeft;
977 var->data.pixel_center_integer = fp->PixelCenterInteger;
978 } else if (i == VARYING_SLOT_FOGC) {
979 /* fogcoord is defined as <f, 0.0, 0.0, 1.0>. Make the actual
980 * input variable a float, and create a local containing the
981 * full vec4 value.
982 */
983 var->type = glsl_float_type();
984
985 nir_intrinsic_instr *load_x =
986 nir_intrinsic_instr_create(shader, nir_intrinsic_load_var);
987 load_x->num_components = 1;
988 load_x->variables[0] = nir_deref_var_create(load_x, var);
989 nir_ssa_dest_init(&load_x->instr, &load_x->dest, 1, NULL);
990 nir_builder_instr_insert(b, &load_x->instr);
991
992 nir_ssa_def *f001 = nir_vec4(b, &load_x->dest.ssa, nir_imm_float(b, 0.0),
993 nir_imm_float(b, 0.0), nir_imm_float(b, 1.0));
994
995 nir_variable *fullvar =
996 nir_local_variable_create(b->impl, glsl_vec4_type(),
997 "fogcoord_tmp");
998 nir_intrinsic_instr *store =
999 nir_intrinsic_instr_create(shader, nir_intrinsic_store_var);
1000 store->num_components = 4;
1001 store->const_index[0] = WRITEMASK_XYZW;
1002 store->variables[0] = nir_deref_var_create(store, fullvar);
1003 store->src[0] = nir_src_for_ssa(f001);
1004 nir_builder_instr_insert(b, &store->instr);
1005
1006 /* We inserted the real input into the list so the driver has real
1007 * inputs, but we set c->input_vars[i] to the temporary so we use
1008 * the splatted value.
1009 */
1010 c->input_vars[i] = fullvar;
1011 continue;
1012 }
1013 }
1014
1015 c->input_vars[i] = var;
1016 }
1017
1018 /* Create output registers and variables. */
1019 int max_outputs = _mesa_fls(c->prog->OutputsWritten);
1020 c->output_regs = rzalloc_array(c, nir_register *, max_outputs);
1021
1022 for (int i = 0; i < max_outputs; i++) {
1023 if (!(c->prog->OutputsWritten & BITFIELD64_BIT(i)))
1024 continue;
1025
1026 /* Since we can't load from outputs in the IR, we make temporaries
1027 * for the outputs and emit stores to the real outputs at the end of
1028 * the shader.
1029 */
1030 nir_register *reg = nir_local_reg_create(b->impl);
1031 reg->num_components = 4;
1032
1033 nir_variable *var = rzalloc(shader, nir_variable);
1034 if (c->prog->Target == GL_FRAGMENT_PROGRAM_ARB && i == FRAG_RESULT_DEPTH)
1035 var->type = glsl_float_type();
1036 else
1037 var->type = glsl_vec4_type();
1038 var->data.mode = nir_var_shader_out;
1039 var->name = ralloc_asprintf(var, "out_%d", i);
1040
1041 var->data.location = i;
1042 var->data.index = 0;
1043
1044 c->output_regs[i] = reg;
1045
1046 exec_list_push_tail(&shader->outputs, &var->node);
1047 c->output_vars[i] = var;
1048 }
1049
1050 /* Create temporary registers. */
1051 c->temp_regs = rzalloc_array(c, nir_register *, c->prog->NumTemporaries);
1052
1053 nir_register *reg;
1054 for (unsigned i = 0; i < c->prog->NumTemporaries; i++) {
1055 reg = nir_local_reg_create(b->impl);
1056 if (!reg) {
1057 c->error = true;
1058 return;
1059 }
1060 reg->num_components = 4;
1061 c->temp_regs[i] = reg;
1062 }
1063
1064 /* Create the address register (for ARB_vertex_program). */
1065 reg = nir_local_reg_create(b->impl);
1066 if (!reg) {
1067 c->error = true;
1068 return;
1069 }
1070 reg->num_components = 1;
1071 c->addr_reg = reg;
1072 }
1073
1074 struct nir_shader *
1075 prog_to_nir(const struct gl_program *prog,
1076 const nir_shader_compiler_options *options)
1077 {
1078 struct ptn_compile *c;
1079 struct nir_shader *s;
1080 gl_shader_stage stage = _mesa_program_enum_to_shader_stage(prog->Target);
1081
1082 c = rzalloc(NULL, struct ptn_compile);
1083 if (!c)
1084 return NULL;
1085 c->prog = prog;
1086
1087 nir_builder_init_simple_shader(&c->build, NULL, stage, options);
1088 s = c->build.shader;
1089
1090 if (prog->Parameters->NumParameters > 0) {
1091 c->parameters = rzalloc(s, nir_variable);
1092 c->parameters->type =
1093 glsl_array_type(glsl_vec4_type(), prog->Parameters->NumParameters);
1094 c->parameters->name = "parameters";
1095 c->parameters->data.read_only = true;
1096 c->parameters->data.mode = nir_var_uniform;
1097 exec_list_push_tail(&s->uniforms, &c->parameters->node);
1098 }
1099
1100 setup_registers_and_variables(c);
1101 if (unlikely(c->error))
1102 goto fail;
1103
1104 for (unsigned int i = 0; i < prog->NumInstructions; i++) {
1105 ptn_emit_instruction(c, &prog->Instructions[i]);
1106
1107 if (unlikely(c->error))
1108 break;
1109 }
1110
1111 ptn_add_output_stores(c);
1112
1113 s->info.name = ralloc_asprintf(s, "ARB%d", prog->Id);
1114 s->info.num_textures = _mesa_fls(prog->SamplersUsed);
1115 s->info.num_ubos = 0;
1116 s->info.num_abos = 0;
1117 s->info.num_ssbos = 0;
1118 s->info.num_images = 0;
1119 s->info.inputs_read = prog->InputsRead;
1120 s->info.outputs_written = prog->OutputsWritten;
1121 s->info.system_values_read = prog->SystemValuesRead;
1122 s->info.uses_texture_gather = false;
1123 s->info.uses_clip_distance_out = false;
1124 s->info.separate_shader = false;
1125
1126 if (stage == MESA_SHADER_FRAGMENT) {
1127 struct gl_fragment_program *fp = (struct gl_fragment_program *)prog;
1128
1129 s->info.fs.uses_discard = fp->UsesKill;
1130 }
1131
1132 fail:
1133 if (c->error) {
1134 ralloc_free(s);
1135 s = NULL;
1136 }
1137 ralloc_free(c);
1138 return s;
1139 }