Merge remote-tracking branch 'public/master' into vulkan
[mesa.git] / src / mesa / program / prog_to_nir.c
1 /*
2 * Copyright © 2015 Intel Corporation
3 * Copyright © 2014-2015 Broadcom
4 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
23 * IN THE SOFTWARE.
24 */
25
26 #include "compiler/nir/nir.h"
27 #include "compiler/nir/nir_builder.h"
28 #include "compiler/glsl/list.h"
29 #include "main/imports.h"
30 #include "util/ralloc.h"
31
32 #include "prog_to_nir.h"
33 #include "prog_instruction.h"
34 #include "prog_parameter.h"
35 #include "prog_print.h"
36 #include "program.h"
37
38 /**
39 * \file prog_to_nir.c
40 *
41 * A translator from Mesa IR (prog_instruction.h) to NIR. This is primarily
42 * intended to support ARB_vertex_program, ARB_fragment_program, and fixed-function
43 * vertex processing. Full GLSL support should use glsl_to_nir instead.
44 */
45
46 struct ptn_compile {
47 const struct gl_program *prog;
48 nir_builder build;
49 bool error;
50
51 nir_variable *parameters;
52 nir_variable *input_vars[VARYING_SLOT_MAX];
53 nir_variable *output_vars[VARYING_SLOT_MAX];
54 nir_register **output_regs;
55 nir_register **temp_regs;
56
57 nir_register *addr_reg;
58 };
59
60 #define SWIZ(X, Y, Z, W) \
61 (unsigned[4]){ SWIZZLE_##X, SWIZZLE_##Y, SWIZZLE_##Z, SWIZZLE_##W }
62 #define ptn_channel(b, src, ch) nir_swizzle(b, src, SWIZ(ch, ch, ch, ch), 1, true)
63
64 static nir_ssa_def *
65 ptn_src_for_dest(struct ptn_compile *c, nir_alu_dest *dest)
66 {
67 nir_builder *b = &c->build;
68
69 nir_alu_src src;
70 memset(&src, 0, sizeof(src));
71
72 if (dest->dest.is_ssa)
73 src.src = nir_src_for_ssa(&dest->dest.ssa);
74 else {
75 assert(!dest->dest.reg.indirect);
76 src.src = nir_src_for_reg(dest->dest.reg.reg);
77 src.src.reg.base_offset = dest->dest.reg.base_offset;
78 }
79
80 for (int i = 0; i < 4; i++)
81 src.swizzle[i] = i;
82
83 return nir_fmov_alu(b, src, 4);
84 }
85
86 static nir_alu_dest
87 ptn_get_dest(struct ptn_compile *c, const struct prog_dst_register *prog_dst)
88 {
89 nir_alu_dest dest;
90
91 memset(&dest, 0, sizeof(dest));
92
93 switch (prog_dst->File) {
94 case PROGRAM_TEMPORARY:
95 dest.dest.reg.reg = c->temp_regs[prog_dst->Index];
96 break;
97 case PROGRAM_OUTPUT:
98 dest.dest.reg.reg = c->output_regs[prog_dst->Index];
99 break;
100 case PROGRAM_ADDRESS:
101 assert(prog_dst->Index == 0);
102 dest.dest.reg.reg = c->addr_reg;
103 break;
104 case PROGRAM_UNDEFINED:
105 break;
106 }
107
108 dest.write_mask = prog_dst->WriteMask;
109 dest.saturate = false;
110
111 assert(!prog_dst->RelAddr);
112
113 return dest;
114 }
115
116 static nir_ssa_def *
117 ptn_get_src(struct ptn_compile *c, const struct prog_src_register *prog_src)
118 {
119 nir_builder *b = &c->build;
120 nir_alu_src src;
121
122 memset(&src, 0, sizeof(src));
123
124 switch (prog_src->File) {
125 case PROGRAM_UNDEFINED:
126 return nir_imm_float(b, 0.0);
127 case PROGRAM_TEMPORARY:
128 assert(!prog_src->RelAddr && prog_src->Index >= 0);
129 src.src.reg.reg = c->temp_regs[prog_src->Index];
130 break;
131 case PROGRAM_INPUT: {
132 /* ARB_vertex_program doesn't allow relative addressing on vertex
133 * attributes; ARB_fragment_program has no relative addressing at all.
134 */
135 assert(!prog_src->RelAddr);
136
137 assert(prog_src->Index >= 0 && prog_src->Index < VARYING_SLOT_MAX);
138
139 nir_intrinsic_instr *load =
140 nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_var);
141 load->num_components = 4;
142 load->variables[0] = nir_deref_var_create(load, c->input_vars[prog_src->Index]);
143
144 nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL);
145 nir_builder_instr_insert(b, &load->instr);
146
147 src.src = nir_src_for_ssa(&load->dest.ssa);
148 break;
149 }
150 case PROGRAM_STATE_VAR:
151 case PROGRAM_CONSTANT: {
152 /* We actually want to look at the type in the Parameters list for this,
153 * because it lets us upload constant builtin uniforms as actual
154 * constants.
155 */
156 struct gl_program_parameter_list *plist = c->prog->Parameters;
157 gl_register_file file = prog_src->RelAddr ? prog_src->File :
158 plist->Parameters[prog_src->Index].Type;
159
160 switch (file) {
161 case PROGRAM_CONSTANT:
162 if ((c->prog->IndirectRegisterFiles & (1 << PROGRAM_CONSTANT)) == 0) {
163 float *v = (float *) plist->ParameterValues[prog_src->Index];
164 src.src = nir_src_for_ssa(nir_imm_vec4(b, v[0], v[1], v[2], v[3]));
165 break;
166 }
167 /* FALLTHROUGH */
168 case PROGRAM_STATE_VAR: {
169 assert(c->parameters != NULL);
170
171 nir_intrinsic_instr *load =
172 nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_var);
173 nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL);
174 load->num_components = 4;
175
176 load->variables[0] = nir_deref_var_create(load, c->parameters);
177 nir_deref_array *deref_arr =
178 nir_deref_array_create(load->variables[0]);
179 deref_arr->deref.type = glsl_vec4_type();
180 load->variables[0]->deref.child = &deref_arr->deref;
181
182 if (prog_src->RelAddr) {
183 deref_arr->deref_array_type = nir_deref_array_type_indirect;
184
185 nir_alu_src addr_src = { NIR_SRC_INIT };
186 addr_src.src = nir_src_for_reg(c->addr_reg);
187 nir_ssa_def *reladdr = nir_imov_alu(b, addr_src, 1);
188
189 if (prog_src->Index < 0) {
190 /* This is a negative offset which should be added to the address
191 * register's value.
192 */
193 reladdr = nir_iadd(b, reladdr, nir_imm_int(b, prog_src->Index));
194
195 deref_arr->base_offset = 0;
196 } else {
197 deref_arr->base_offset = prog_src->Index;
198 }
199 deref_arr->indirect = nir_src_for_ssa(reladdr);
200 } else {
201 deref_arr->deref_array_type = nir_deref_array_type_direct;
202 deref_arr->base_offset = prog_src->Index;
203 }
204
205 nir_builder_instr_insert(b, &load->instr);
206
207 src.src = nir_src_for_ssa(&load->dest.ssa);
208 break;
209 }
210 default:
211 fprintf(stderr, "bad uniform src register file: %s (%d)\n",
212 _mesa_register_file_name(file), file);
213 abort();
214 }
215 break;
216 }
217 default:
218 fprintf(stderr, "unknown src register file: %s (%d)\n",
219 _mesa_register_file_name(prog_src->File), prog_src->File);
220 abort();
221 }
222
223 nir_ssa_def *def;
224 if (!HAS_EXTENDED_SWIZZLE(prog_src->Swizzle) &&
225 (prog_src->Negate == NEGATE_NONE || prog_src->Negate == NEGATE_XYZW)) {
226 /* The simple non-SWZ case. */
227 for (int i = 0; i < 4; i++)
228 src.swizzle[i] = GET_SWZ(prog_src->Swizzle, i);
229
230 def = nir_fmov_alu(b, src, 4);
231
232 if (prog_src->Negate)
233 def = nir_fneg(b, def);
234 } else {
235 /* The SWZ instruction allows per-component zero/one swizzles, and also
236 * per-component negation.
237 */
238 nir_ssa_def *chans[4];
239 for (int i = 0; i < 4; i++) {
240 int swizzle = GET_SWZ(prog_src->Swizzle, i);
241 if (swizzle == SWIZZLE_ZERO) {
242 chans[i] = nir_imm_float(b, 0.0);
243 } else if (swizzle == SWIZZLE_ONE) {
244 chans[i] = nir_imm_float(b, 1.0);
245 } else {
246 assert(swizzle != SWIZZLE_NIL);
247 nir_alu_instr *mov = nir_alu_instr_create(b->shader, nir_op_fmov);
248 nir_ssa_dest_init(&mov->instr, &mov->dest.dest, 1, 32, NULL);
249 mov->dest.write_mask = 0x1;
250 mov->src[0] = src;
251 mov->src[0].swizzle[0] = swizzle;
252 nir_builder_instr_insert(b, &mov->instr);
253
254 chans[i] = &mov->dest.dest.ssa;
255 }
256
257 if (prog_src->Negate & (1 << i))
258 chans[i] = nir_fneg(b, chans[i]);
259 }
260 def = nir_vec4(b, chans[0], chans[1], chans[2], chans[3]);
261 }
262
263 return def;
264 }
265
266 static void
267 ptn_alu(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
268 {
269 unsigned num_srcs = nir_op_infos[op].num_inputs;
270 nir_alu_instr *instr = nir_alu_instr_create(b->shader, op);
271 unsigned i;
272
273 for (i = 0; i < num_srcs; i++)
274 instr->src[i].src = nir_src_for_ssa(src[i]);
275
276 instr->dest = dest;
277 nir_builder_instr_insert(b, &instr->instr);
278 }
279
280 static void
281 ptn_move_dest_masked(nir_builder *b, nir_alu_dest dest,
282 nir_ssa_def *def, unsigned write_mask)
283 {
284 if (!(dest.write_mask & write_mask))
285 return;
286
287 nir_alu_instr *mov = nir_alu_instr_create(b->shader, nir_op_fmov);
288 if (!mov)
289 return;
290
291 mov->dest = dest;
292 mov->dest.write_mask &= write_mask;
293 mov->src[0].src = nir_src_for_ssa(def);
294 for (unsigned i = def->num_components; i < 4; i++)
295 mov->src[0].swizzle[i] = def->num_components - 1;
296 nir_builder_instr_insert(b, &mov->instr);
297 }
298
299 static void
300 ptn_move_dest(nir_builder *b, nir_alu_dest dest, nir_ssa_def *def)
301 {
302 ptn_move_dest_masked(b, dest, def, WRITEMASK_XYZW);
303 }
304
305 static void
306 ptn_arl(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
307 {
308 ptn_move_dest(b, dest, nir_f2i(b, nir_ffloor(b, src[0])));
309 }
310
311 /* EXP - Approximate Exponential Base 2
312 * dst.x = 2^{\lfloor src.x\rfloor}
313 * dst.y = src.x - \lfloor src.x\rfloor
314 * dst.z = 2^{src.x}
315 * dst.w = 1.0
316 */
317 static void
318 ptn_exp(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
319 {
320 nir_ssa_def *srcx = ptn_channel(b, src[0], X);
321
322 ptn_move_dest_masked(b, dest, nir_fexp2(b, nir_ffloor(b, srcx)), WRITEMASK_X);
323 ptn_move_dest_masked(b, dest, nir_fsub(b, srcx, nir_ffloor(b, srcx)), WRITEMASK_Y);
324 ptn_move_dest_masked(b, dest, nir_fexp2(b, srcx), WRITEMASK_Z);
325 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
326 }
327
328 /* LOG - Approximate Logarithm Base 2
329 * dst.x = \lfloor\log_2{|src.x|}\rfloor
330 * dst.y = |src.x| * 2^{-\lfloor\log_2{|src.x|}\rfloor}}
331 * dst.z = \log_2{|src.x|}
332 * dst.w = 1.0
333 */
334 static void
335 ptn_log(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
336 {
337 nir_ssa_def *abs_srcx = nir_fabs(b, ptn_channel(b, src[0], X));
338 nir_ssa_def *log2 = nir_flog2(b, abs_srcx);
339 nir_ssa_def *floor_log2 = nir_ffloor(b, log2);
340
341 ptn_move_dest_masked(b, dest, floor_log2, WRITEMASK_X);
342 ptn_move_dest_masked(b, dest,
343 nir_fmul(b, abs_srcx,
344 nir_fexp2(b, nir_fneg(b, floor_log2))),
345 WRITEMASK_Y);
346 ptn_move_dest_masked(b, dest, log2, WRITEMASK_Z);
347 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
348 }
349
350 /* DST - Distance Vector
351 * dst.x = 1.0
352 * dst.y = src0.y \times src1.y
353 * dst.z = src0.z
354 * dst.w = src1.w
355 */
356 static void
357 ptn_dst(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
358 {
359 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_X);
360 ptn_move_dest_masked(b, dest, nir_fmul(b, src[0], src[1]), WRITEMASK_Y);
361 ptn_move_dest_masked(b, dest, nir_fmov(b, src[0]), WRITEMASK_Z);
362 ptn_move_dest_masked(b, dest, nir_fmov(b, src[1]), WRITEMASK_W);
363 }
364
365 /* LIT - Light Coefficients
366 * dst.x = 1.0
367 * dst.y = max(src.x, 0.0)
368 * dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0
369 * dst.w = 1.0
370 */
371 static void
372 ptn_lit(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
373 {
374 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_XW);
375
376 ptn_move_dest_masked(b, dest, nir_fmax(b, ptn_channel(b, src[0], X),
377 nir_imm_float(b, 0.0)), WRITEMASK_Y);
378
379 if (dest.write_mask & WRITEMASK_Z) {
380 nir_ssa_def *src0_y = ptn_channel(b, src[0], Y);
381 nir_ssa_def *wclamp = nir_fmax(b, nir_fmin(b, ptn_channel(b, src[0], W),
382 nir_imm_float(b, 128.0)),
383 nir_imm_float(b, -128.0));
384 nir_ssa_def *pow = nir_fpow(b, nir_fmax(b, src0_y, nir_imm_float(b, 0.0)),
385 wclamp);
386
387 nir_ssa_def *z;
388 if (b->shader->options->native_integers) {
389 z = nir_bcsel(b,
390 nir_fge(b, nir_imm_float(b, 0.0), ptn_channel(b, src[0], X)),
391 nir_imm_float(b, 0.0),
392 pow);
393 } else {
394 z = nir_fcsel(b,
395 nir_sge(b, nir_imm_float(b, 0.0), ptn_channel(b, src[0], X)),
396 nir_imm_float(b, 0.0),
397 pow);
398 }
399
400 ptn_move_dest_masked(b, dest, z, WRITEMASK_Z);
401 }
402 }
403
404 /* SCS - Sine Cosine
405 * dst.x = \cos{src.x}
406 * dst.y = \sin{src.x}
407 * dst.z = 0.0
408 * dst.w = 1.0
409 */
410 static void
411 ptn_scs(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
412 {
413 ptn_move_dest_masked(b, dest, nir_fcos(b, ptn_channel(b, src[0], X)),
414 WRITEMASK_X);
415 ptn_move_dest_masked(b, dest, nir_fsin(b, ptn_channel(b, src[0], X)),
416 WRITEMASK_Y);
417 ptn_move_dest_masked(b, dest, nir_imm_float(b, 0.0), WRITEMASK_Z);
418 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
419 }
420
421 /**
422 * Emit SLT. For platforms with integers, prefer b2f(flt(...)).
423 */
424 static void
425 ptn_slt(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
426 {
427 if (b->shader->options->native_integers) {
428 ptn_move_dest(b, dest, nir_b2f(b, nir_flt(b, src[0], src[1])));
429 } else {
430 ptn_move_dest(b, dest, nir_slt(b, src[0], src[1]));
431 }
432 }
433
434 /**
435 * Emit SGE. For platforms with integers, prefer b2f(fge(...)).
436 */
437 static void
438 ptn_sge(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
439 {
440 if (b->shader->options->native_integers) {
441 ptn_move_dest(b, dest, nir_b2f(b, nir_fge(b, src[0], src[1])));
442 } else {
443 ptn_move_dest(b, dest, nir_sge(b, src[0], src[1]));
444 }
445 }
446
447 static void
448 ptn_sle(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
449 {
450 nir_ssa_def *commuted[] = { src[1], src[0] };
451 ptn_sge(b, dest, commuted);
452 }
453
454 static void
455 ptn_sgt(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
456 {
457 nir_ssa_def *commuted[] = { src[1], src[0] };
458 ptn_slt(b, dest, commuted);
459 }
460
461 /**
462 * Emit SEQ. For platforms with integers, prefer b2f(feq(...)).
463 */
464 static void
465 ptn_seq(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
466 {
467 if (b->shader->options->native_integers) {
468 ptn_move_dest(b, dest, nir_b2f(b, nir_feq(b, src[0], src[1])));
469 } else {
470 ptn_move_dest(b, dest, nir_seq(b, src[0], src[1]));
471 }
472 }
473
474 /**
475 * Emit SNE. For platforms with integers, prefer b2f(fne(...)).
476 */
477 static void
478 ptn_sne(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
479 {
480 if (b->shader->options->native_integers) {
481 ptn_move_dest(b, dest, nir_b2f(b, nir_fne(b, src[0], src[1])));
482 } else {
483 ptn_move_dest(b, dest, nir_sne(b, src[0], src[1]));
484 }
485 }
486
487 static void
488 ptn_xpd(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
489 {
490 ptn_move_dest_masked(b, dest,
491 nir_fsub(b,
492 nir_fmul(b,
493 nir_swizzle(b, src[0], SWIZ(Y, Z, X, W), 3, true),
494 nir_swizzle(b, src[1], SWIZ(Z, X, Y, W), 3, true)),
495 nir_fmul(b,
496 nir_swizzle(b, src[1], SWIZ(Y, Z, X, W), 3, true),
497 nir_swizzle(b, src[0], SWIZ(Z, X, Y, W), 3, true))),
498 WRITEMASK_XYZ);
499 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
500 }
501
502 static void
503 ptn_dp2(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
504 {
505 ptn_move_dest(b, dest, nir_fdot2(b, src[0], src[1]));
506 }
507
508 static void
509 ptn_dp3(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
510 {
511 ptn_move_dest(b, dest, nir_fdot3(b, src[0], src[1]));
512 }
513
514 static void
515 ptn_dp4(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
516 {
517 ptn_move_dest(b, dest, nir_fdot4(b, src[0], src[1]));
518 }
519
520 static void
521 ptn_dph(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
522 {
523 ptn_move_dest(b, dest, nir_fdph(b, src[0], src[1]));
524 }
525
526 static void
527 ptn_cmp(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
528 {
529 if (b->shader->options->native_integers) {
530 ptn_move_dest(b, dest, nir_bcsel(b,
531 nir_flt(b, src[0], nir_imm_float(b, 0.0)),
532 src[1], src[2]));
533 } else {
534 ptn_move_dest(b, dest, nir_fcsel(b,
535 nir_slt(b, src[0], nir_imm_float(b, 0.0)),
536 src[1], src[2]));
537 }
538 }
539
540 static void
541 ptn_lrp(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
542 {
543 ptn_move_dest(b, dest, nir_flrp(b, src[2], src[1], src[0]));
544 }
545
546 static void
547 ptn_kil(nir_builder *b, nir_ssa_def **src)
548 {
549 nir_ssa_def *cmp = b->shader->options->native_integers ?
550 nir_bany_inequal4(b, nir_flt(b, src[0], nir_imm_float(b, 0.0)), nir_imm_int(b, 0)) :
551 nir_fany_nequal4(b, nir_slt(b, src[0], nir_imm_float(b, 0.0)), nir_imm_float(b, 0.0));
552
553 nir_intrinsic_instr *discard =
554 nir_intrinsic_instr_create(b->shader, nir_intrinsic_discard_if);
555 discard->src[0] = nir_src_for_ssa(cmp);
556 nir_builder_instr_insert(b, &discard->instr);
557 }
558
559 static void
560 ptn_tex(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src,
561 struct prog_instruction *prog_inst)
562 {
563 nir_tex_instr *instr;
564 nir_texop op;
565 unsigned num_srcs;
566
567 switch (prog_inst->Opcode) {
568 case OPCODE_TEX:
569 op = nir_texop_tex;
570 num_srcs = 1;
571 break;
572 case OPCODE_TXB:
573 op = nir_texop_txb;
574 num_srcs = 2;
575 break;
576 case OPCODE_TXD:
577 op = nir_texop_txd;
578 num_srcs = 3;
579 break;
580 case OPCODE_TXL:
581 op = nir_texop_txl;
582 num_srcs = 2;
583 break;
584 case OPCODE_TXP:
585 op = nir_texop_tex;
586 num_srcs = 2;
587 break;
588 default:
589 fprintf(stderr, "unknown tex op %d\n", prog_inst->Opcode);
590 abort();
591 }
592
593 if (prog_inst->TexShadow)
594 num_srcs++;
595
596 instr = nir_tex_instr_create(b->shader, num_srcs);
597 instr->op = op;
598 instr->dest_type = nir_type_float;
599 instr->is_shadow = prog_inst->TexShadow;
600 instr->texture_index = prog_inst->TexSrcUnit;
601 instr->sampler_index = prog_inst->TexSrcUnit;
602
603 switch (prog_inst->TexSrcTarget) {
604 case TEXTURE_1D_INDEX:
605 instr->sampler_dim = GLSL_SAMPLER_DIM_1D;
606 break;
607 case TEXTURE_2D_INDEX:
608 instr->sampler_dim = GLSL_SAMPLER_DIM_2D;
609 break;
610 case TEXTURE_3D_INDEX:
611 instr->sampler_dim = GLSL_SAMPLER_DIM_3D;
612 break;
613 case TEXTURE_CUBE_INDEX:
614 instr->sampler_dim = GLSL_SAMPLER_DIM_CUBE;
615 break;
616 case TEXTURE_RECT_INDEX:
617 instr->sampler_dim = GLSL_SAMPLER_DIM_RECT;
618 break;
619 default:
620 fprintf(stderr, "Unknown texture target %d\n", prog_inst->TexSrcTarget);
621 abort();
622 }
623
624 switch (instr->sampler_dim) {
625 case GLSL_SAMPLER_DIM_1D:
626 case GLSL_SAMPLER_DIM_BUF:
627 instr->coord_components = 1;
628 break;
629 case GLSL_SAMPLER_DIM_2D:
630 case GLSL_SAMPLER_DIM_RECT:
631 case GLSL_SAMPLER_DIM_EXTERNAL:
632 case GLSL_SAMPLER_DIM_MS:
633 instr->coord_components = 2;
634 break;
635 case GLSL_SAMPLER_DIM_3D:
636 case GLSL_SAMPLER_DIM_CUBE:
637 instr->coord_components = 3;
638 break;
639 }
640
641 unsigned src_number = 0;
642
643 instr->src[src_number].src =
644 nir_src_for_ssa(nir_swizzle(b, src[0], SWIZ(X, Y, Z, W),
645 instr->coord_components, true));
646 instr->src[src_number].src_type = nir_tex_src_coord;
647 src_number++;
648
649 if (prog_inst->Opcode == OPCODE_TXP) {
650 instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
651 instr->src[src_number].src_type = nir_tex_src_projector;
652 src_number++;
653 }
654
655 if (prog_inst->Opcode == OPCODE_TXB) {
656 instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
657 instr->src[src_number].src_type = nir_tex_src_bias;
658 src_number++;
659 }
660
661 if (prog_inst->Opcode == OPCODE_TXL) {
662 instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
663 instr->src[src_number].src_type = nir_tex_src_lod;
664 src_number++;
665 }
666
667 if (instr->is_shadow) {
668 if (instr->coord_components < 3)
669 instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], Z));
670 else
671 instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
672
673 instr->src[src_number].src_type = nir_tex_src_comparitor;
674 src_number++;
675 }
676
677 assert(src_number == num_srcs);
678
679 nir_ssa_dest_init(&instr->instr, &instr->dest, 4, 32, NULL);
680 nir_builder_instr_insert(b, &instr->instr);
681
682 /* Resolve the writemask on the texture op. */
683 ptn_move_dest(b, dest, &instr->dest.ssa);
684 }
685
686 static const nir_op op_trans[MAX_OPCODE] = {
687 [OPCODE_NOP] = 0,
688 [OPCODE_ABS] = nir_op_fabs,
689 [OPCODE_ADD] = nir_op_fadd,
690 [OPCODE_ARL] = 0,
691 [OPCODE_CMP] = 0,
692 [OPCODE_COS] = 0,
693 [OPCODE_DDX] = nir_op_fddx,
694 [OPCODE_DDY] = nir_op_fddy,
695 [OPCODE_DP2] = 0,
696 [OPCODE_DP3] = 0,
697 [OPCODE_DP4] = 0,
698 [OPCODE_DPH] = 0,
699 [OPCODE_DST] = 0,
700 [OPCODE_END] = 0,
701 [OPCODE_EX2] = 0,
702 [OPCODE_EXP] = 0,
703 [OPCODE_FLR] = nir_op_ffloor,
704 [OPCODE_FRC] = nir_op_ffract,
705 [OPCODE_LG2] = 0,
706 [OPCODE_LIT] = 0,
707 [OPCODE_LOG] = 0,
708 [OPCODE_LRP] = 0,
709 [OPCODE_MAD] = nir_op_ffma,
710 [OPCODE_MAX] = nir_op_fmax,
711 [OPCODE_MIN] = nir_op_fmin,
712 [OPCODE_MOV] = nir_op_fmov,
713 [OPCODE_MUL] = nir_op_fmul,
714 [OPCODE_POW] = 0,
715 [OPCODE_RCP] = 0,
716
717 [OPCODE_RSQ] = 0,
718 [OPCODE_SCS] = 0,
719 [OPCODE_SEQ] = 0,
720 [OPCODE_SGE] = 0,
721 [OPCODE_SGT] = 0,
722 [OPCODE_SIN] = 0,
723 [OPCODE_SLE] = 0,
724 [OPCODE_SLT] = 0,
725 [OPCODE_SNE] = 0,
726 [OPCODE_SSG] = nir_op_fsign,
727 [OPCODE_SUB] = nir_op_fsub,
728 [OPCODE_SWZ] = 0,
729 [OPCODE_TEX] = 0,
730 [OPCODE_TRUNC] = nir_op_ftrunc,
731 [OPCODE_TXB] = 0,
732 [OPCODE_TXD] = 0,
733 [OPCODE_TXL] = 0,
734 [OPCODE_TXP] = 0,
735 [OPCODE_XPD] = 0,
736 };
737
738 static void
739 ptn_emit_instruction(struct ptn_compile *c, struct prog_instruction *prog_inst)
740 {
741 nir_builder *b = &c->build;
742 unsigned i;
743 const unsigned op = prog_inst->Opcode;
744
745 if (op == OPCODE_END)
746 return;
747
748 nir_ssa_def *src[3];
749 for (i = 0; i < 3; i++) {
750 src[i] = ptn_get_src(c, &prog_inst->SrcReg[i]);
751 }
752 nir_alu_dest dest = ptn_get_dest(c, &prog_inst->DstReg);
753 if (c->error)
754 return;
755
756 switch (op) {
757 case OPCODE_RSQ:
758 ptn_move_dest(b, dest,
759 nir_frsq(b, nir_fabs(b, ptn_channel(b, src[0], X))));
760 break;
761
762 case OPCODE_RCP:
763 ptn_move_dest(b, dest, nir_frcp(b, ptn_channel(b, src[0], X)));
764 break;
765
766 case OPCODE_EX2:
767 ptn_move_dest(b, dest, nir_fexp2(b, ptn_channel(b, src[0], X)));
768 break;
769
770 case OPCODE_LG2:
771 ptn_move_dest(b, dest, nir_flog2(b, ptn_channel(b, src[0], X)));
772 break;
773
774 case OPCODE_POW:
775 ptn_move_dest(b, dest, nir_fpow(b,
776 ptn_channel(b, src[0], X),
777 ptn_channel(b, src[1], X)));
778 break;
779
780 case OPCODE_COS:
781 ptn_move_dest(b, dest, nir_fcos(b, ptn_channel(b, src[0], X)));
782 break;
783
784 case OPCODE_SIN:
785 ptn_move_dest(b, dest, nir_fsin(b, ptn_channel(b, src[0], X)));
786 break;
787
788 case OPCODE_ARL:
789 ptn_arl(b, dest, src);
790 break;
791
792 case OPCODE_EXP:
793 ptn_exp(b, dest, src);
794 break;
795
796 case OPCODE_LOG:
797 ptn_log(b, dest, src);
798 break;
799
800 case OPCODE_LRP:
801 ptn_lrp(b, dest, src);
802 break;
803
804 case OPCODE_DST:
805 ptn_dst(b, dest, src);
806 break;
807
808 case OPCODE_LIT:
809 ptn_lit(b, dest, src);
810 break;
811
812 case OPCODE_XPD:
813 ptn_xpd(b, dest, src);
814 break;
815
816 case OPCODE_DP2:
817 ptn_dp2(b, dest, src);
818 break;
819
820 case OPCODE_DP3:
821 ptn_dp3(b, dest, src);
822 break;
823
824 case OPCODE_DP4:
825 ptn_dp4(b, dest, src);
826 break;
827
828 case OPCODE_DPH:
829 ptn_dph(b, dest, src);
830 break;
831
832 case OPCODE_KIL:
833 ptn_kil(b, src);
834 break;
835
836 case OPCODE_CMP:
837 ptn_cmp(b, dest, src);
838 break;
839
840 case OPCODE_SCS:
841 ptn_scs(b, dest, src);
842 break;
843
844 case OPCODE_SLT:
845 ptn_slt(b, dest, src);
846 break;
847
848 case OPCODE_SGT:
849 ptn_sgt(b, dest, src);
850 break;
851
852 case OPCODE_SLE:
853 ptn_sle(b, dest, src);
854 break;
855
856 case OPCODE_SGE:
857 ptn_sge(b, dest, src);
858 break;
859
860 case OPCODE_SEQ:
861 ptn_seq(b, dest, src);
862 break;
863
864 case OPCODE_SNE:
865 ptn_sne(b, dest, src);
866 break;
867
868 case OPCODE_TEX:
869 case OPCODE_TXB:
870 case OPCODE_TXD:
871 case OPCODE_TXL:
872 case OPCODE_TXP:
873 ptn_tex(b, dest, src, prog_inst);
874 break;
875
876 case OPCODE_SWZ:
877 /* Extended swizzles were already handled in ptn_get_src(). */
878 ptn_alu(b, nir_op_fmov, dest, src);
879 break;
880
881 case OPCODE_NOP:
882 break;
883
884 default:
885 if (op_trans[op] != 0) {
886 ptn_alu(b, op_trans[op], dest, src);
887 } else {
888 fprintf(stderr, "unknown opcode: %s\n", _mesa_opcode_string(op));
889 abort();
890 }
891 break;
892 }
893
894 if (prog_inst->Saturate) {
895 assert(prog_inst->Saturate);
896 assert(!dest.dest.is_ssa);
897 ptn_move_dest(b, dest, nir_fsat(b, ptn_src_for_dest(c, &dest)));
898 }
899 }
900
901 /**
902 * Puts a NIR intrinsic to store of each PROGRAM_OUTPUT value to the output
903 * variables at the end of the shader.
904 *
905 * We don't generate these incrementally as the PROGRAM_OUTPUT values are
906 * written, because there's no output load intrinsic, which means we couldn't
907 * handle writemasks.
908 */
909 static void
910 ptn_add_output_stores(struct ptn_compile *c)
911 {
912 nir_builder *b = &c->build;
913
914 nir_foreach_variable(var, &b->shader->outputs) {
915 nir_intrinsic_instr *store =
916 nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_var);
917 store->num_components = glsl_get_vector_elements(var->type);
918 nir_intrinsic_set_write_mask(store, (1 << store->num_components) - 1);
919 store->variables[0] =
920 nir_deref_var_create(store, c->output_vars[var->data.location]);
921
922 if (c->prog->Target == GL_FRAGMENT_PROGRAM_ARB &&
923 var->data.location == FRAG_RESULT_DEPTH) {
924 /* result.depth has this strange convention of being the .z component of
925 * a vec4 with undefined .xyw components. We resolve it to a scalar, to
926 * match GLSL's gl_FragDepth and the expectations of most backends.
927 */
928 nir_alu_src alu_src = { NIR_SRC_INIT };
929 alu_src.src = nir_src_for_reg(c->output_regs[FRAG_RESULT_DEPTH]);
930 alu_src.swizzle[0] = SWIZZLE_Z;
931 store->src[0] = nir_src_for_ssa(nir_fmov_alu(b, alu_src, 1));
932 } else {
933 store->src[0].reg.reg = c->output_regs[var->data.location];
934 }
935 nir_builder_instr_insert(b, &store->instr);
936 }
937 }
938
939 static void
940 setup_registers_and_variables(struct ptn_compile *c)
941 {
942 nir_builder *b = &c->build;
943 struct nir_shader *shader = b->shader;
944
945 /* Create input variables. */
946 const int num_inputs = _mesa_flsll(c->prog->InputsRead);
947 for (int i = 0; i < num_inputs; i++) {
948 if (!(c->prog->InputsRead & BITFIELD64_BIT(i)))
949 continue;
950
951 nir_variable *var =
952 nir_variable_create(shader, nir_var_shader_in, glsl_vec4_type(),
953 ralloc_asprintf(shader, "in_%d", i));
954 var->data.location = i;
955 var->data.index = 0;
956
957 if (c->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
958 struct gl_fragment_program *fp =
959 (struct gl_fragment_program *) c->prog;
960
961 var->data.interpolation = fp->InterpQualifier[i];
962
963 if (i == VARYING_SLOT_POS) {
964 var->data.origin_upper_left = fp->OriginUpperLeft;
965 var->data.pixel_center_integer = fp->PixelCenterInteger;
966 } else if (i == VARYING_SLOT_FOGC) {
967 /* fogcoord is defined as <f, 0.0, 0.0, 1.0>. Make the actual
968 * input variable a float, and create a local containing the
969 * full vec4 value.
970 */
971 var->type = glsl_float_type();
972
973 nir_intrinsic_instr *load_x =
974 nir_intrinsic_instr_create(shader, nir_intrinsic_load_var);
975 load_x->num_components = 1;
976 load_x->variables[0] = nir_deref_var_create(load_x, var);
977 nir_ssa_dest_init(&load_x->instr, &load_x->dest, 1, 32, NULL);
978 nir_builder_instr_insert(b, &load_x->instr);
979
980 nir_ssa_def *f001 = nir_vec4(b, &load_x->dest.ssa, nir_imm_float(b, 0.0),
981 nir_imm_float(b, 0.0), nir_imm_float(b, 1.0));
982
983 nir_variable *fullvar =
984 nir_local_variable_create(b->impl, glsl_vec4_type(),
985 "fogcoord_tmp");
986 nir_intrinsic_instr *store =
987 nir_intrinsic_instr_create(shader, nir_intrinsic_store_var);
988 store->num_components = 4;
989 nir_intrinsic_set_write_mask(store, WRITEMASK_XYZW);
990 store->variables[0] = nir_deref_var_create(store, fullvar);
991 store->src[0] = nir_src_for_ssa(f001);
992 nir_builder_instr_insert(b, &store->instr);
993
994 /* We inserted the real input into the list so the driver has real
995 * inputs, but we set c->input_vars[i] to the temporary so we use
996 * the splatted value.
997 */
998 c->input_vars[i] = fullvar;
999 continue;
1000 }
1001 }
1002
1003 c->input_vars[i] = var;
1004 }
1005
1006 /* Create output registers and variables. */
1007 int max_outputs = _mesa_fls(c->prog->OutputsWritten);
1008 c->output_regs = rzalloc_array(c, nir_register *, max_outputs);
1009
1010 for (int i = 0; i < max_outputs; i++) {
1011 if (!(c->prog->OutputsWritten & BITFIELD64_BIT(i)))
1012 continue;
1013
1014 /* Since we can't load from outputs in the IR, we make temporaries
1015 * for the outputs and emit stores to the real outputs at the end of
1016 * the shader.
1017 */
1018 nir_register *reg = nir_local_reg_create(b->impl);
1019 reg->num_components = 4;
1020
1021 nir_variable *var = rzalloc(shader, nir_variable);
1022 if (c->prog->Target == GL_FRAGMENT_PROGRAM_ARB && i == FRAG_RESULT_DEPTH)
1023 var->type = glsl_float_type();
1024 else
1025 var->type = glsl_vec4_type();
1026 var->data.mode = nir_var_shader_out;
1027 var->name = ralloc_asprintf(var, "out_%d", i);
1028
1029 var->data.location = i;
1030 var->data.index = 0;
1031
1032 c->output_regs[i] = reg;
1033
1034 exec_list_push_tail(&shader->outputs, &var->node);
1035 c->output_vars[i] = var;
1036 }
1037
1038 /* Create temporary registers. */
1039 c->temp_regs = rzalloc_array(c, nir_register *, c->prog->NumTemporaries);
1040
1041 nir_register *reg;
1042 for (unsigned i = 0; i < c->prog->NumTemporaries; i++) {
1043 reg = nir_local_reg_create(b->impl);
1044 if (!reg) {
1045 c->error = true;
1046 return;
1047 }
1048 reg->num_components = 4;
1049 c->temp_regs[i] = reg;
1050 }
1051
1052 /* Create the address register (for ARB_vertex_program). */
1053 reg = nir_local_reg_create(b->impl);
1054 if (!reg) {
1055 c->error = true;
1056 return;
1057 }
1058 reg->num_components = 1;
1059 c->addr_reg = reg;
1060 }
1061
1062 struct nir_shader *
1063 prog_to_nir(const struct gl_program *prog,
1064 const nir_shader_compiler_options *options)
1065 {
1066 struct ptn_compile *c;
1067 struct nir_shader *s;
1068 gl_shader_stage stage = _mesa_program_enum_to_shader_stage(prog->Target);
1069
1070 c = rzalloc(NULL, struct ptn_compile);
1071 if (!c)
1072 return NULL;
1073 c->prog = prog;
1074
1075 nir_builder_init_simple_shader(&c->build, NULL, stage, options);
1076 s = c->build.shader;
1077
1078 if (prog->Parameters->NumParameters > 0) {
1079 c->parameters = rzalloc(s, nir_variable);
1080 c->parameters->type =
1081 glsl_array_type(glsl_vec4_type(), prog->Parameters->NumParameters);
1082 c->parameters->name = "parameters";
1083 c->parameters->data.read_only = true;
1084 c->parameters->data.mode = nir_var_uniform;
1085 exec_list_push_tail(&s->uniforms, &c->parameters->node);
1086 }
1087
1088 setup_registers_and_variables(c);
1089 if (unlikely(c->error))
1090 goto fail;
1091
1092 for (unsigned int i = 0; i < prog->NumInstructions; i++) {
1093 ptn_emit_instruction(c, &prog->Instructions[i]);
1094
1095 if (unlikely(c->error))
1096 break;
1097 }
1098
1099 ptn_add_output_stores(c);
1100
1101 s->info.name = ralloc_asprintf(s, "ARB%d", prog->Id);
1102 s->info.num_textures = _mesa_fls(prog->SamplersUsed);
1103 s->info.num_ubos = 0;
1104 s->info.num_abos = 0;
1105 s->info.num_ssbos = 0;
1106 s->info.num_images = 0;
1107 s->info.inputs_read = prog->InputsRead;
1108 s->info.outputs_written = prog->OutputsWritten;
1109 s->info.system_values_read = prog->SystemValuesRead;
1110 s->info.uses_texture_gather = false;
1111 s->info.uses_clip_distance_out = false;
1112 s->info.separate_shader = false;
1113
1114 if (stage == MESA_SHADER_FRAGMENT) {
1115 struct gl_fragment_program *fp = (struct gl_fragment_program *)prog;
1116
1117 s->info.fs.uses_discard = fp->UsesKill;
1118 }
1119
1120 fail:
1121 if (c->error) {
1122 ralloc_free(s);
1123 s = NULL;
1124 }
1125 ralloc_free(c);
1126 return s;
1127 }