prog/nir: Simplify some load/store operations
[mesa.git] / src / mesa / program / prog_to_nir.c
1 /*
2 * Copyright © 2015 Intel Corporation
3 * Copyright © 2014-2015 Broadcom
4 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
23 * IN THE SOFTWARE.
24 */
25
26 #include "compiler/nir/nir.h"
27 #include "compiler/nir/nir_builder.h"
28 #include "compiler/glsl/list.h"
29 #include "main/imports.h"
30 #include "util/ralloc.h"
31
32 #include "prog_to_nir.h"
33 #include "prog_instruction.h"
34 #include "prog_parameter.h"
35 #include "prog_print.h"
36 #include "program.h"
37
38 /**
39 * \file prog_to_nir.c
40 *
41 * A translator from Mesa IR (prog_instruction.h) to NIR. This is primarily
42 * intended to support ARB_vertex_program, ARB_fragment_program, and fixed-function
43 * vertex processing. Full GLSL support should use glsl_to_nir instead.
44 */
45
46 struct ptn_compile {
47 const struct gl_program *prog;
48 nir_builder build;
49 bool error;
50
51 nir_variable *parameters;
52 nir_variable *input_vars[VARYING_SLOT_MAX];
53 nir_variable *output_vars[VARYING_SLOT_MAX];
54 nir_register **output_regs;
55 nir_register **temp_regs;
56
57 nir_register *addr_reg;
58 };
59
60 #define SWIZ(X, Y, Z, W) \
61 (unsigned[4]){ SWIZZLE_##X, SWIZZLE_##Y, SWIZZLE_##Z, SWIZZLE_##W }
62 #define ptn_channel(b, src, ch) nir_swizzle(b, src, SWIZ(ch, ch, ch, ch), 1, true)
63
64 static nir_ssa_def *
65 ptn_src_for_dest(struct ptn_compile *c, nir_alu_dest *dest)
66 {
67 nir_builder *b = &c->build;
68
69 nir_alu_src src;
70 memset(&src, 0, sizeof(src));
71
72 if (dest->dest.is_ssa)
73 src.src = nir_src_for_ssa(&dest->dest.ssa);
74 else {
75 assert(!dest->dest.reg.indirect);
76 src.src = nir_src_for_reg(dest->dest.reg.reg);
77 src.src.reg.base_offset = dest->dest.reg.base_offset;
78 }
79
80 for (int i = 0; i < 4; i++)
81 src.swizzle[i] = i;
82
83 return nir_fmov_alu(b, src, 4);
84 }
85
86 static nir_alu_dest
87 ptn_get_dest(struct ptn_compile *c, const struct prog_dst_register *prog_dst)
88 {
89 nir_alu_dest dest;
90
91 memset(&dest, 0, sizeof(dest));
92
93 switch (prog_dst->File) {
94 case PROGRAM_TEMPORARY:
95 dest.dest.reg.reg = c->temp_regs[prog_dst->Index];
96 break;
97 case PROGRAM_OUTPUT:
98 dest.dest.reg.reg = c->output_regs[prog_dst->Index];
99 break;
100 case PROGRAM_ADDRESS:
101 assert(prog_dst->Index == 0);
102 dest.dest.reg.reg = c->addr_reg;
103 break;
104 case PROGRAM_UNDEFINED:
105 break;
106 }
107
108 dest.write_mask = prog_dst->WriteMask;
109 dest.saturate = false;
110
111 assert(!prog_dst->RelAddr);
112
113 return dest;
114 }
115
116 static nir_ssa_def *
117 ptn_get_src(struct ptn_compile *c, const struct prog_src_register *prog_src)
118 {
119 nir_builder *b = &c->build;
120 nir_alu_src src;
121
122 memset(&src, 0, sizeof(src));
123
124 switch (prog_src->File) {
125 case PROGRAM_UNDEFINED:
126 return nir_imm_float(b, 0.0);
127 case PROGRAM_TEMPORARY:
128 assert(!prog_src->RelAddr && prog_src->Index >= 0);
129 src.src.reg.reg = c->temp_regs[prog_src->Index];
130 break;
131 case PROGRAM_INPUT: {
132 /* ARB_vertex_program doesn't allow relative addressing on vertex
133 * attributes; ARB_fragment_program has no relative addressing at all.
134 */
135 assert(!prog_src->RelAddr);
136
137 assert(prog_src->Index >= 0 && prog_src->Index < VARYING_SLOT_MAX);
138
139 nir_variable *var = c->input_vars[prog_src->Index];
140 src.src = nir_src_for_ssa(nir_load_var(b, var));
141 break;
142 }
143 case PROGRAM_STATE_VAR:
144 case PROGRAM_CONSTANT: {
145 /* We actually want to look at the type in the Parameters list for this,
146 * because it lets us upload constant builtin uniforms as actual
147 * constants.
148 */
149 struct gl_program_parameter_list *plist = c->prog->Parameters;
150 gl_register_file file = prog_src->RelAddr ? prog_src->File :
151 plist->Parameters[prog_src->Index].Type;
152
153 switch (file) {
154 case PROGRAM_CONSTANT:
155 if ((c->prog->arb.IndirectRegisterFiles &
156 (1 << PROGRAM_CONSTANT)) == 0) {
157 unsigned pvo = plist->ParameterValueOffset[prog_src->Index];
158 float *v = (float *) plist->ParameterValues + pvo;
159 src.src = nir_src_for_ssa(nir_imm_vec4(b, v[0], v[1], v[2], v[3]));
160 break;
161 }
162 /* FALLTHROUGH */
163 case PROGRAM_STATE_VAR: {
164 assert(c->parameters != NULL);
165
166 nir_intrinsic_instr *load =
167 nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_var);
168 nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL);
169 load->num_components = 4;
170
171 load->variables[0] = nir_deref_var_create(load, c->parameters);
172 nir_deref_array *deref_arr =
173 nir_deref_array_create(load->variables[0]);
174 deref_arr->deref.type = glsl_vec4_type();
175 load->variables[0]->deref.child = &deref_arr->deref;
176
177 if (prog_src->RelAddr) {
178 deref_arr->deref_array_type = nir_deref_array_type_indirect;
179
180 nir_alu_src addr_src = { NIR_SRC_INIT };
181 addr_src.src = nir_src_for_reg(c->addr_reg);
182 nir_ssa_def *reladdr = nir_imov_alu(b, addr_src, 1);
183
184 if (prog_src->Index < 0) {
185 /* This is a negative offset which should be added to the address
186 * register's value.
187 */
188 reladdr = nir_iadd(b, reladdr, nir_imm_int(b, prog_src->Index));
189
190 deref_arr->base_offset = 0;
191 } else {
192 deref_arr->base_offset = prog_src->Index;
193 }
194 deref_arr->indirect = nir_src_for_ssa(reladdr);
195 } else {
196 deref_arr->deref_array_type = nir_deref_array_type_direct;
197 deref_arr->base_offset = prog_src->Index;
198 }
199
200 nir_builder_instr_insert(b, &load->instr);
201
202 src.src = nir_src_for_ssa(&load->dest.ssa);
203 break;
204 }
205 default:
206 fprintf(stderr, "bad uniform src register file: %s (%d)\n",
207 _mesa_register_file_name(file), file);
208 abort();
209 }
210 break;
211 }
212 default:
213 fprintf(stderr, "unknown src register file: %s (%d)\n",
214 _mesa_register_file_name(prog_src->File), prog_src->File);
215 abort();
216 }
217
218 nir_ssa_def *def;
219 if (!HAS_EXTENDED_SWIZZLE(prog_src->Swizzle) &&
220 (prog_src->Negate == NEGATE_NONE || prog_src->Negate == NEGATE_XYZW)) {
221 /* The simple non-SWZ case. */
222 for (int i = 0; i < 4; i++)
223 src.swizzle[i] = GET_SWZ(prog_src->Swizzle, i);
224
225 def = nir_fmov_alu(b, src, 4);
226
227 if (prog_src->Negate)
228 def = nir_fneg(b, def);
229 } else {
230 /* The SWZ instruction allows per-component zero/one swizzles, and also
231 * per-component negation.
232 */
233 nir_ssa_def *chans[4];
234 for (int i = 0; i < 4; i++) {
235 int swizzle = GET_SWZ(prog_src->Swizzle, i);
236 if (swizzle == SWIZZLE_ZERO) {
237 chans[i] = nir_imm_float(b, 0.0);
238 } else if (swizzle == SWIZZLE_ONE) {
239 chans[i] = nir_imm_float(b, 1.0);
240 } else {
241 assert(swizzle != SWIZZLE_NIL);
242 nir_alu_instr *mov = nir_alu_instr_create(b->shader, nir_op_fmov);
243 nir_ssa_dest_init(&mov->instr, &mov->dest.dest, 1, 32, NULL);
244 mov->dest.write_mask = 0x1;
245 mov->src[0] = src;
246 mov->src[0].swizzle[0] = swizzle;
247 nir_builder_instr_insert(b, &mov->instr);
248
249 chans[i] = &mov->dest.dest.ssa;
250 }
251
252 if (prog_src->Negate & (1 << i))
253 chans[i] = nir_fneg(b, chans[i]);
254 }
255 def = nir_vec4(b, chans[0], chans[1], chans[2], chans[3]);
256 }
257
258 return def;
259 }
260
261 static void
262 ptn_alu(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
263 {
264 unsigned num_srcs = nir_op_infos[op].num_inputs;
265 nir_alu_instr *instr = nir_alu_instr_create(b->shader, op);
266 unsigned i;
267
268 for (i = 0; i < num_srcs; i++)
269 instr->src[i].src = nir_src_for_ssa(src[i]);
270
271 instr->dest = dest;
272 nir_builder_instr_insert(b, &instr->instr);
273 }
274
275 static void
276 ptn_move_dest_masked(nir_builder *b, nir_alu_dest dest,
277 nir_ssa_def *def, unsigned write_mask)
278 {
279 if (!(dest.write_mask & write_mask))
280 return;
281
282 nir_alu_instr *mov = nir_alu_instr_create(b->shader, nir_op_fmov);
283 if (!mov)
284 return;
285
286 mov->dest = dest;
287 mov->dest.write_mask &= write_mask;
288 mov->src[0].src = nir_src_for_ssa(def);
289 for (unsigned i = def->num_components; i < 4; i++)
290 mov->src[0].swizzle[i] = def->num_components - 1;
291 nir_builder_instr_insert(b, &mov->instr);
292 }
293
294 static void
295 ptn_move_dest(nir_builder *b, nir_alu_dest dest, nir_ssa_def *def)
296 {
297 ptn_move_dest_masked(b, dest, def, WRITEMASK_XYZW);
298 }
299
300 static void
301 ptn_arl(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
302 {
303 ptn_move_dest(b, dest, nir_f2i32(b, nir_ffloor(b, src[0])));
304 }
305
306 /* EXP - Approximate Exponential Base 2
307 * dst.x = 2^{\lfloor src.x\rfloor}
308 * dst.y = src.x - \lfloor src.x\rfloor
309 * dst.z = 2^{src.x}
310 * dst.w = 1.0
311 */
312 static void
313 ptn_exp(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
314 {
315 nir_ssa_def *srcx = ptn_channel(b, src[0], X);
316
317 ptn_move_dest_masked(b, dest, nir_fexp2(b, nir_ffloor(b, srcx)), WRITEMASK_X);
318 ptn_move_dest_masked(b, dest, nir_fsub(b, srcx, nir_ffloor(b, srcx)), WRITEMASK_Y);
319 ptn_move_dest_masked(b, dest, nir_fexp2(b, srcx), WRITEMASK_Z);
320 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
321 }
322
323 /* LOG - Approximate Logarithm Base 2
324 * dst.x = \lfloor\log_2{|src.x|}\rfloor
325 * dst.y = |src.x| * 2^{-\lfloor\log_2{|src.x|}\rfloor}}
326 * dst.z = \log_2{|src.x|}
327 * dst.w = 1.0
328 */
329 static void
330 ptn_log(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
331 {
332 nir_ssa_def *abs_srcx = nir_fabs(b, ptn_channel(b, src[0], X));
333 nir_ssa_def *log2 = nir_flog2(b, abs_srcx);
334 nir_ssa_def *floor_log2 = nir_ffloor(b, log2);
335
336 ptn_move_dest_masked(b, dest, floor_log2, WRITEMASK_X);
337 ptn_move_dest_masked(b, dest,
338 nir_fmul(b, abs_srcx,
339 nir_fexp2(b, nir_fneg(b, floor_log2))),
340 WRITEMASK_Y);
341 ptn_move_dest_masked(b, dest, log2, WRITEMASK_Z);
342 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
343 }
344
345 /* DST - Distance Vector
346 * dst.x = 1.0
347 * dst.y = src0.y \times src1.y
348 * dst.z = src0.z
349 * dst.w = src1.w
350 */
351 static void
352 ptn_dst(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
353 {
354 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_X);
355 ptn_move_dest_masked(b, dest, nir_fmul(b, src[0], src[1]), WRITEMASK_Y);
356 ptn_move_dest_masked(b, dest, nir_fmov(b, src[0]), WRITEMASK_Z);
357 ptn_move_dest_masked(b, dest, nir_fmov(b, src[1]), WRITEMASK_W);
358 }
359
360 /* LIT - Light Coefficients
361 * dst.x = 1.0
362 * dst.y = max(src.x, 0.0)
363 * dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0
364 * dst.w = 1.0
365 */
366 static void
367 ptn_lit(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
368 {
369 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_XW);
370
371 ptn_move_dest_masked(b, dest, nir_fmax(b, ptn_channel(b, src[0], X),
372 nir_imm_float(b, 0.0)), WRITEMASK_Y);
373
374 if (dest.write_mask & WRITEMASK_Z) {
375 nir_ssa_def *src0_y = ptn_channel(b, src[0], Y);
376 nir_ssa_def *wclamp = nir_fmax(b, nir_fmin(b, ptn_channel(b, src[0], W),
377 nir_imm_float(b, 128.0)),
378 nir_imm_float(b, -128.0));
379 nir_ssa_def *pow = nir_fpow(b, nir_fmax(b, src0_y, nir_imm_float(b, 0.0)),
380 wclamp);
381
382 nir_ssa_def *z;
383 if (b->shader->options->native_integers) {
384 z = nir_bcsel(b,
385 nir_fge(b, nir_imm_float(b, 0.0), ptn_channel(b, src[0], X)),
386 nir_imm_float(b, 0.0),
387 pow);
388 } else {
389 z = nir_fcsel(b,
390 nir_sge(b, nir_imm_float(b, 0.0), ptn_channel(b, src[0], X)),
391 nir_imm_float(b, 0.0),
392 pow);
393 }
394
395 ptn_move_dest_masked(b, dest, z, WRITEMASK_Z);
396 }
397 }
398
399 /* SCS - Sine Cosine
400 * dst.x = \cos{src.x}
401 * dst.y = \sin{src.x}
402 * dst.z = 0.0
403 * dst.w = 1.0
404 */
405 static void
406 ptn_scs(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
407 {
408 ptn_move_dest_masked(b, dest, nir_fcos(b, ptn_channel(b, src[0], X)),
409 WRITEMASK_X);
410 ptn_move_dest_masked(b, dest, nir_fsin(b, ptn_channel(b, src[0], X)),
411 WRITEMASK_Y);
412 ptn_move_dest_masked(b, dest, nir_imm_float(b, 0.0), WRITEMASK_Z);
413 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
414 }
415
416 /**
417 * Emit SLT. For platforms with integers, prefer b2f(flt(...)).
418 */
419 static void
420 ptn_slt(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
421 {
422 if (b->shader->options->native_integers) {
423 ptn_move_dest(b, dest, nir_b2f(b, nir_flt(b, src[0], src[1])));
424 } else {
425 ptn_move_dest(b, dest, nir_slt(b, src[0], src[1]));
426 }
427 }
428
429 /**
430 * Emit SGE. For platforms with integers, prefer b2f(fge(...)).
431 */
432 static void
433 ptn_sge(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
434 {
435 if (b->shader->options->native_integers) {
436 ptn_move_dest(b, dest, nir_b2f(b, nir_fge(b, src[0], src[1])));
437 } else {
438 ptn_move_dest(b, dest, nir_sge(b, src[0], src[1]));
439 }
440 }
441
442 static void
443 ptn_xpd(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
444 {
445 ptn_move_dest_masked(b, dest,
446 nir_fsub(b,
447 nir_fmul(b,
448 nir_swizzle(b, src[0], SWIZ(Y, Z, X, W), 3, true),
449 nir_swizzle(b, src[1], SWIZ(Z, X, Y, W), 3, true)),
450 nir_fmul(b,
451 nir_swizzle(b, src[1], SWIZ(Y, Z, X, W), 3, true),
452 nir_swizzle(b, src[0], SWIZ(Z, X, Y, W), 3, true))),
453 WRITEMASK_XYZ);
454 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
455 }
456
457 static void
458 ptn_dp2(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
459 {
460 ptn_move_dest(b, dest, nir_fdot2(b, src[0], src[1]));
461 }
462
463 static void
464 ptn_dp3(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
465 {
466 ptn_move_dest(b, dest, nir_fdot3(b, src[0], src[1]));
467 }
468
469 static void
470 ptn_dp4(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
471 {
472 ptn_move_dest(b, dest, nir_fdot4(b, src[0], src[1]));
473 }
474
475 static void
476 ptn_dph(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
477 {
478 ptn_move_dest(b, dest, nir_fdph(b, src[0], src[1]));
479 }
480
481 static void
482 ptn_cmp(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
483 {
484 if (b->shader->options->native_integers) {
485 ptn_move_dest(b, dest, nir_bcsel(b,
486 nir_flt(b, src[0], nir_imm_float(b, 0.0)),
487 src[1], src[2]));
488 } else {
489 ptn_move_dest(b, dest, nir_fcsel(b,
490 nir_slt(b, src[0], nir_imm_float(b, 0.0)),
491 src[1], src[2]));
492 }
493 }
494
495 static void
496 ptn_lrp(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
497 {
498 ptn_move_dest(b, dest, nir_flrp(b, src[2], src[1], src[0]));
499 }
500
501 static void
502 ptn_kil(nir_builder *b, nir_ssa_def **src)
503 {
504 nir_ssa_def *cmp = b->shader->options->native_integers ?
505 nir_bany_inequal4(b, nir_flt(b, src[0], nir_imm_float(b, 0.0)), nir_imm_int(b, 0)) :
506 nir_fany_nequal4(b, nir_slt(b, src[0], nir_imm_float(b, 0.0)), nir_imm_float(b, 0.0));
507
508 nir_intrinsic_instr *discard =
509 nir_intrinsic_instr_create(b->shader, nir_intrinsic_discard_if);
510 discard->src[0] = nir_src_for_ssa(cmp);
511 nir_builder_instr_insert(b, &discard->instr);
512 }
513
514 static void
515 ptn_tex(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src,
516 struct prog_instruction *prog_inst)
517 {
518 nir_tex_instr *instr;
519 nir_texop op;
520 unsigned num_srcs;
521
522 switch (prog_inst->Opcode) {
523 case OPCODE_TEX:
524 op = nir_texop_tex;
525 num_srcs = 1;
526 break;
527 case OPCODE_TXB:
528 op = nir_texop_txb;
529 num_srcs = 2;
530 break;
531 case OPCODE_TXD:
532 op = nir_texop_txd;
533 num_srcs = 3;
534 break;
535 case OPCODE_TXL:
536 op = nir_texop_txl;
537 num_srcs = 2;
538 break;
539 case OPCODE_TXP:
540 op = nir_texop_tex;
541 num_srcs = 2;
542 break;
543 default:
544 fprintf(stderr, "unknown tex op %d\n", prog_inst->Opcode);
545 abort();
546 }
547
548 if (prog_inst->TexShadow)
549 num_srcs++;
550
551 instr = nir_tex_instr_create(b->shader, num_srcs);
552 instr->op = op;
553 instr->dest_type = nir_type_float;
554 instr->is_shadow = prog_inst->TexShadow;
555 instr->texture_index = prog_inst->TexSrcUnit;
556 instr->sampler_index = prog_inst->TexSrcUnit;
557
558 switch (prog_inst->TexSrcTarget) {
559 case TEXTURE_1D_INDEX:
560 instr->sampler_dim = GLSL_SAMPLER_DIM_1D;
561 break;
562 case TEXTURE_2D_INDEX:
563 instr->sampler_dim = GLSL_SAMPLER_DIM_2D;
564 break;
565 case TEXTURE_3D_INDEX:
566 instr->sampler_dim = GLSL_SAMPLER_DIM_3D;
567 break;
568 case TEXTURE_CUBE_INDEX:
569 instr->sampler_dim = GLSL_SAMPLER_DIM_CUBE;
570 break;
571 case TEXTURE_RECT_INDEX:
572 instr->sampler_dim = GLSL_SAMPLER_DIM_RECT;
573 break;
574 default:
575 fprintf(stderr, "Unknown texture target %d\n", prog_inst->TexSrcTarget);
576 abort();
577 }
578
579 switch (instr->sampler_dim) {
580 case GLSL_SAMPLER_DIM_1D:
581 case GLSL_SAMPLER_DIM_BUF:
582 instr->coord_components = 1;
583 break;
584 case GLSL_SAMPLER_DIM_2D:
585 case GLSL_SAMPLER_DIM_RECT:
586 case GLSL_SAMPLER_DIM_EXTERNAL:
587 case GLSL_SAMPLER_DIM_MS:
588 instr->coord_components = 2;
589 break;
590 case GLSL_SAMPLER_DIM_3D:
591 case GLSL_SAMPLER_DIM_CUBE:
592 instr->coord_components = 3;
593 break;
594 case GLSL_SAMPLER_DIM_SUBPASS:
595 case GLSL_SAMPLER_DIM_SUBPASS_MS:
596 unreachable("can't reach");
597 }
598
599 unsigned src_number = 0;
600
601 instr->src[src_number].src =
602 nir_src_for_ssa(nir_swizzle(b, src[0], SWIZ(X, Y, Z, W),
603 instr->coord_components, true));
604 instr->src[src_number].src_type = nir_tex_src_coord;
605 src_number++;
606
607 if (prog_inst->Opcode == OPCODE_TXP) {
608 instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
609 instr->src[src_number].src_type = nir_tex_src_projector;
610 src_number++;
611 }
612
613 if (prog_inst->Opcode == OPCODE_TXB) {
614 instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
615 instr->src[src_number].src_type = nir_tex_src_bias;
616 src_number++;
617 }
618
619 if (prog_inst->Opcode == OPCODE_TXL) {
620 instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
621 instr->src[src_number].src_type = nir_tex_src_lod;
622 src_number++;
623 }
624
625 if (instr->is_shadow) {
626 if (instr->coord_components < 3)
627 instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], Z));
628 else
629 instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
630
631 instr->src[src_number].src_type = nir_tex_src_comparator;
632 src_number++;
633 }
634
635 assert(src_number == num_srcs);
636
637 nir_ssa_dest_init(&instr->instr, &instr->dest, 4, 32, NULL);
638 nir_builder_instr_insert(b, &instr->instr);
639
640 /* Resolve the writemask on the texture op. */
641 ptn_move_dest(b, dest, &instr->dest.ssa);
642 }
643
644 static const nir_op op_trans[MAX_OPCODE] = {
645 [OPCODE_NOP] = 0,
646 [OPCODE_ABS] = nir_op_fabs,
647 [OPCODE_ADD] = nir_op_fadd,
648 [OPCODE_ARL] = 0,
649 [OPCODE_CMP] = 0,
650 [OPCODE_COS] = 0,
651 [OPCODE_DDX] = nir_op_fddx,
652 [OPCODE_DDY] = nir_op_fddy,
653 [OPCODE_DP2] = 0,
654 [OPCODE_DP3] = 0,
655 [OPCODE_DP4] = 0,
656 [OPCODE_DPH] = 0,
657 [OPCODE_DST] = 0,
658 [OPCODE_END] = 0,
659 [OPCODE_EX2] = 0,
660 [OPCODE_EXP] = 0,
661 [OPCODE_FLR] = nir_op_ffloor,
662 [OPCODE_FRC] = nir_op_ffract,
663 [OPCODE_LG2] = 0,
664 [OPCODE_LIT] = 0,
665 [OPCODE_LOG] = 0,
666 [OPCODE_LRP] = 0,
667 [OPCODE_MAD] = 0,
668 [OPCODE_MAX] = nir_op_fmax,
669 [OPCODE_MIN] = nir_op_fmin,
670 [OPCODE_MOV] = nir_op_fmov,
671 [OPCODE_MUL] = nir_op_fmul,
672 [OPCODE_POW] = 0,
673 [OPCODE_RCP] = 0,
674
675 [OPCODE_RSQ] = 0,
676 [OPCODE_SCS] = 0,
677 [OPCODE_SGE] = 0,
678 [OPCODE_SIN] = 0,
679 [OPCODE_SLT] = 0,
680 [OPCODE_SSG] = nir_op_fsign,
681 [OPCODE_SUB] = nir_op_fsub,
682 [OPCODE_SWZ] = 0,
683 [OPCODE_TEX] = 0,
684 [OPCODE_TRUNC] = nir_op_ftrunc,
685 [OPCODE_TXB] = 0,
686 [OPCODE_TXD] = 0,
687 [OPCODE_TXL] = 0,
688 [OPCODE_TXP] = 0,
689 [OPCODE_XPD] = 0,
690 };
691
692 static void
693 ptn_emit_instruction(struct ptn_compile *c, struct prog_instruction *prog_inst)
694 {
695 nir_builder *b = &c->build;
696 unsigned i;
697 const unsigned op = prog_inst->Opcode;
698
699 if (op == OPCODE_END)
700 return;
701
702 nir_ssa_def *src[3];
703 for (i = 0; i < 3; i++) {
704 src[i] = ptn_get_src(c, &prog_inst->SrcReg[i]);
705 }
706 nir_alu_dest dest = ptn_get_dest(c, &prog_inst->DstReg);
707 if (c->error)
708 return;
709
710 switch (op) {
711 case OPCODE_RSQ:
712 ptn_move_dest(b, dest,
713 nir_frsq(b, nir_fabs(b, ptn_channel(b, src[0], X))));
714 break;
715
716 case OPCODE_RCP:
717 ptn_move_dest(b, dest, nir_frcp(b, ptn_channel(b, src[0], X)));
718 break;
719
720 case OPCODE_EX2:
721 ptn_move_dest(b, dest, nir_fexp2(b, ptn_channel(b, src[0], X)));
722 break;
723
724 case OPCODE_LG2:
725 ptn_move_dest(b, dest, nir_flog2(b, ptn_channel(b, src[0], X)));
726 break;
727
728 case OPCODE_POW:
729 ptn_move_dest(b, dest, nir_fpow(b,
730 ptn_channel(b, src[0], X),
731 ptn_channel(b, src[1], X)));
732 break;
733
734 case OPCODE_COS:
735 ptn_move_dest(b, dest, nir_fcos(b, ptn_channel(b, src[0], X)));
736 break;
737
738 case OPCODE_SIN:
739 ptn_move_dest(b, dest, nir_fsin(b, ptn_channel(b, src[0], X)));
740 break;
741
742 case OPCODE_ARL:
743 ptn_arl(b, dest, src);
744 break;
745
746 case OPCODE_EXP:
747 ptn_exp(b, dest, src);
748 break;
749
750 case OPCODE_LOG:
751 ptn_log(b, dest, src);
752 break;
753
754 case OPCODE_LRP:
755 ptn_lrp(b, dest, src);
756 break;
757
758 case OPCODE_MAD:
759 ptn_move_dest(b, dest, nir_fadd(b, nir_fmul(b, src[0], src[1]), src[2]));
760 break;
761
762 case OPCODE_DST:
763 ptn_dst(b, dest, src);
764 break;
765
766 case OPCODE_LIT:
767 ptn_lit(b, dest, src);
768 break;
769
770 case OPCODE_XPD:
771 ptn_xpd(b, dest, src);
772 break;
773
774 case OPCODE_DP2:
775 ptn_dp2(b, dest, src);
776 break;
777
778 case OPCODE_DP3:
779 ptn_dp3(b, dest, src);
780 break;
781
782 case OPCODE_DP4:
783 ptn_dp4(b, dest, src);
784 break;
785
786 case OPCODE_DPH:
787 ptn_dph(b, dest, src);
788 break;
789
790 case OPCODE_KIL:
791 ptn_kil(b, src);
792 break;
793
794 case OPCODE_CMP:
795 ptn_cmp(b, dest, src);
796 break;
797
798 case OPCODE_SCS:
799 ptn_scs(b, dest, src);
800 break;
801
802 case OPCODE_SLT:
803 ptn_slt(b, dest, src);
804 break;
805
806 case OPCODE_SGE:
807 ptn_sge(b, dest, src);
808 break;
809
810 case OPCODE_TEX:
811 case OPCODE_TXB:
812 case OPCODE_TXD:
813 case OPCODE_TXL:
814 case OPCODE_TXP:
815 ptn_tex(b, dest, src, prog_inst);
816 break;
817
818 case OPCODE_SWZ:
819 /* Extended swizzles were already handled in ptn_get_src(). */
820 ptn_alu(b, nir_op_fmov, dest, src);
821 break;
822
823 case OPCODE_NOP:
824 break;
825
826 default:
827 if (op_trans[op] != 0) {
828 ptn_alu(b, op_trans[op], dest, src);
829 } else {
830 fprintf(stderr, "unknown opcode: %s\n", _mesa_opcode_string(op));
831 abort();
832 }
833 break;
834 }
835
836 if (prog_inst->Saturate) {
837 assert(prog_inst->Saturate);
838 assert(!dest.dest.is_ssa);
839 ptn_move_dest(b, dest, nir_fsat(b, ptn_src_for_dest(c, &dest)));
840 }
841 }
842
843 /**
844 * Puts a NIR intrinsic to store of each PROGRAM_OUTPUT value to the output
845 * variables at the end of the shader.
846 *
847 * We don't generate these incrementally as the PROGRAM_OUTPUT values are
848 * written, because there's no output load intrinsic, which means we couldn't
849 * handle writemasks.
850 */
851 static void
852 ptn_add_output_stores(struct ptn_compile *c)
853 {
854 nir_builder *b = &c->build;
855
856 nir_foreach_variable(var, &b->shader->outputs) {
857 nir_ssa_def *src = nir_load_reg(b, c->output_regs[var->data.location]);
858 if (c->prog->Target == GL_FRAGMENT_PROGRAM_ARB &&
859 var->data.location == FRAG_RESULT_DEPTH) {
860 /* result.depth has this strange convention of being the .z component of
861 * a vec4 with undefined .xyw components. We resolve it to a scalar, to
862 * match GLSL's gl_FragDepth and the expectations of most backends.
863 */
864 src = nir_channel(b, src, 2);
865 }
866 unsigned num_components = glsl_get_vector_elements(var->type);
867 nir_store_var(b, var, src, (1 << num_components) - 1);
868 }
869 }
870
871 static void
872 setup_registers_and_variables(struct ptn_compile *c)
873 {
874 nir_builder *b = &c->build;
875 struct nir_shader *shader = b->shader;
876
877 /* Create input variables. */
878 const int num_inputs = util_last_bit64(c->prog->info.inputs_read);
879 for (int i = 0; i < num_inputs; i++) {
880 if (!(c->prog->info.inputs_read & BITFIELD64_BIT(i)))
881 continue;
882
883 nir_variable *var =
884 nir_variable_create(shader, nir_var_shader_in, glsl_vec4_type(),
885 ralloc_asprintf(shader, "in_%d", i));
886 var->data.location = i;
887 var->data.index = 0;
888
889 if (c->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
890 if (i == VARYING_SLOT_POS) {
891 var->data.origin_upper_left = c->prog->OriginUpperLeft;
892 var->data.pixel_center_integer = c->prog->PixelCenterInteger;
893 } else if (i == VARYING_SLOT_FOGC) {
894 /* fogcoord is defined as <f, 0.0, 0.0, 1.0>. Make the actual
895 * input variable a float, and create a local containing the
896 * full vec4 value.
897 */
898 var->type = glsl_float_type();
899
900 nir_variable *fullvar =
901 nir_local_variable_create(b->impl, glsl_vec4_type(),
902 "fogcoord_tmp");
903
904 nir_store_var(b, fullvar,
905 nir_vec4(b, nir_load_var(b, var),
906 nir_imm_float(b, 0.0),
907 nir_imm_float(b, 0.0),
908 nir_imm_float(b, 1.0)),
909 WRITEMASK_XYZW);
910
911 /* We inserted the real input into the list so the driver has real
912 * inputs, but we set c->input_vars[i] to the temporary so we use
913 * the splatted value.
914 */
915 c->input_vars[i] = fullvar;
916 continue;
917 }
918 }
919
920 c->input_vars[i] = var;
921 }
922
923 /* Create output registers and variables. */
924 int max_outputs = util_last_bit(c->prog->info.outputs_written);
925 c->output_regs = rzalloc_array(c, nir_register *, max_outputs);
926
927 for (int i = 0; i < max_outputs; i++) {
928 if (!(c->prog->info.outputs_written & BITFIELD64_BIT(i)))
929 continue;
930
931 /* Since we can't load from outputs in the IR, we make temporaries
932 * for the outputs and emit stores to the real outputs at the end of
933 * the shader.
934 */
935 nir_register *reg = nir_local_reg_create(b->impl);
936 reg->num_components = 4;
937
938 nir_variable *var = rzalloc(shader, nir_variable);
939 if (c->prog->Target == GL_FRAGMENT_PROGRAM_ARB && i == FRAG_RESULT_DEPTH)
940 var->type = glsl_float_type();
941 else
942 var->type = glsl_vec4_type();
943 var->data.mode = nir_var_shader_out;
944 var->name = ralloc_asprintf(var, "out_%d", i);
945
946 var->data.location = i;
947 var->data.index = 0;
948
949 c->output_regs[i] = reg;
950
951 exec_list_push_tail(&shader->outputs, &var->node);
952 c->output_vars[i] = var;
953 }
954
955 /* Create temporary registers. */
956 c->temp_regs = rzalloc_array(c, nir_register *,
957 c->prog->arb.NumTemporaries);
958
959 nir_register *reg;
960 for (unsigned i = 0; i < c->prog->arb.NumTemporaries; i++) {
961 reg = nir_local_reg_create(b->impl);
962 if (!reg) {
963 c->error = true;
964 return;
965 }
966 reg->num_components = 4;
967 c->temp_regs[i] = reg;
968 }
969
970 /* Create the address register (for ARB_vertex_program). */
971 reg = nir_local_reg_create(b->impl);
972 if (!reg) {
973 c->error = true;
974 return;
975 }
976 reg->num_components = 1;
977 c->addr_reg = reg;
978 }
979
980 struct nir_shader *
981 prog_to_nir(const struct gl_program *prog,
982 const nir_shader_compiler_options *options)
983 {
984 struct ptn_compile *c;
985 struct nir_shader *s;
986 gl_shader_stage stage = _mesa_program_enum_to_shader_stage(prog->Target);
987
988 c = rzalloc(NULL, struct ptn_compile);
989 if (!c)
990 return NULL;
991 c->prog = prog;
992
993 nir_builder_init_simple_shader(&c->build, NULL, stage, options);
994
995 /* Copy the shader_info from the gl_program */
996 c->build.shader->info = prog->info;
997
998 s = c->build.shader;
999
1000 if (prog->Parameters->NumParameters > 0) {
1001 c->parameters = rzalloc(s, nir_variable);
1002 c->parameters->type =
1003 glsl_array_type(glsl_vec4_type(), prog->Parameters->NumParameters);
1004 c->parameters->name = "parameters";
1005 c->parameters->data.read_only = true;
1006 c->parameters->data.mode = nir_var_uniform;
1007 exec_list_push_tail(&s->uniforms, &c->parameters->node);
1008 }
1009
1010 setup_registers_and_variables(c);
1011 if (unlikely(c->error))
1012 goto fail;
1013
1014 for (unsigned int i = 0; i < prog->arb.NumInstructions; i++) {
1015 ptn_emit_instruction(c, &prog->arb.Instructions[i]);
1016
1017 if (unlikely(c->error))
1018 break;
1019 }
1020
1021 ptn_add_output_stores(c);
1022
1023 s->info.name = ralloc_asprintf(s, "ARB%d", prog->Id);
1024 s->info.num_textures = util_last_bit(prog->SamplersUsed);
1025 s->info.num_ubos = 0;
1026 s->info.num_abos = 0;
1027 s->info.num_ssbos = 0;
1028 s->info.num_images = 0;
1029 s->info.uses_texture_gather = false;
1030 s->info.clip_distance_array_size = 0;
1031 s->info.cull_distance_array_size = 0;
1032 s->info.separate_shader = false;
1033
1034 fail:
1035 if (c->error) {
1036 ralloc_free(s);
1037 s = NULL;
1038 }
1039 ralloc_free(c);
1040 return s;
1041 }