mesa: rework ParameterList to allow packing
[mesa.git] / src / mesa / program / prog_to_nir.c
1 /*
2 * Copyright © 2015 Intel Corporation
3 * Copyright © 2014-2015 Broadcom
4 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
23 * IN THE SOFTWARE.
24 */
25
26 #include "compiler/nir/nir.h"
27 #include "compiler/nir/nir_builder.h"
28 #include "compiler/glsl/list.h"
29 #include "main/imports.h"
30 #include "util/ralloc.h"
31
32 #include "prog_to_nir.h"
33 #include "prog_instruction.h"
34 #include "prog_parameter.h"
35 #include "prog_print.h"
36 #include "program.h"
37
38 /**
39 * \file prog_to_nir.c
40 *
41 * A translator from Mesa IR (prog_instruction.h) to NIR. This is primarily
42 * intended to support ARB_vertex_program, ARB_fragment_program, and fixed-function
43 * vertex processing. Full GLSL support should use glsl_to_nir instead.
44 */
45
46 struct ptn_compile {
47 const struct gl_program *prog;
48 nir_builder build;
49 bool error;
50
51 nir_variable *parameters;
52 nir_variable *input_vars[VARYING_SLOT_MAX];
53 nir_variable *output_vars[VARYING_SLOT_MAX];
54 nir_register **output_regs;
55 nir_register **temp_regs;
56
57 nir_register *addr_reg;
58 };
59
60 #define SWIZ(X, Y, Z, W) \
61 (unsigned[4]){ SWIZZLE_##X, SWIZZLE_##Y, SWIZZLE_##Z, SWIZZLE_##W }
62 #define ptn_channel(b, src, ch) nir_swizzle(b, src, SWIZ(ch, ch, ch, ch), 1, true)
63
64 static nir_ssa_def *
65 ptn_src_for_dest(struct ptn_compile *c, nir_alu_dest *dest)
66 {
67 nir_builder *b = &c->build;
68
69 nir_alu_src src;
70 memset(&src, 0, sizeof(src));
71
72 if (dest->dest.is_ssa)
73 src.src = nir_src_for_ssa(&dest->dest.ssa);
74 else {
75 assert(!dest->dest.reg.indirect);
76 src.src = nir_src_for_reg(dest->dest.reg.reg);
77 src.src.reg.base_offset = dest->dest.reg.base_offset;
78 }
79
80 for (int i = 0; i < 4; i++)
81 src.swizzle[i] = i;
82
83 return nir_fmov_alu(b, src, 4);
84 }
85
86 static nir_alu_dest
87 ptn_get_dest(struct ptn_compile *c, const struct prog_dst_register *prog_dst)
88 {
89 nir_alu_dest dest;
90
91 memset(&dest, 0, sizeof(dest));
92
93 switch (prog_dst->File) {
94 case PROGRAM_TEMPORARY:
95 dest.dest.reg.reg = c->temp_regs[prog_dst->Index];
96 break;
97 case PROGRAM_OUTPUT:
98 dest.dest.reg.reg = c->output_regs[prog_dst->Index];
99 break;
100 case PROGRAM_ADDRESS:
101 assert(prog_dst->Index == 0);
102 dest.dest.reg.reg = c->addr_reg;
103 break;
104 case PROGRAM_UNDEFINED:
105 break;
106 }
107
108 dest.write_mask = prog_dst->WriteMask;
109 dest.saturate = false;
110
111 assert(!prog_dst->RelAddr);
112
113 return dest;
114 }
115
116 static nir_ssa_def *
117 ptn_get_src(struct ptn_compile *c, const struct prog_src_register *prog_src)
118 {
119 nir_builder *b = &c->build;
120 nir_alu_src src;
121
122 memset(&src, 0, sizeof(src));
123
124 switch (prog_src->File) {
125 case PROGRAM_UNDEFINED:
126 return nir_imm_float(b, 0.0);
127 case PROGRAM_TEMPORARY:
128 assert(!prog_src->RelAddr && prog_src->Index >= 0);
129 src.src.reg.reg = c->temp_regs[prog_src->Index];
130 break;
131 case PROGRAM_INPUT: {
132 /* ARB_vertex_program doesn't allow relative addressing on vertex
133 * attributes; ARB_fragment_program has no relative addressing at all.
134 */
135 assert(!prog_src->RelAddr);
136
137 assert(prog_src->Index >= 0 && prog_src->Index < VARYING_SLOT_MAX);
138
139 nir_intrinsic_instr *load =
140 nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_var);
141 load->num_components = 4;
142 load->variables[0] = nir_deref_var_create(load, c->input_vars[prog_src->Index]);
143
144 nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL);
145 nir_builder_instr_insert(b, &load->instr);
146
147 src.src = nir_src_for_ssa(&load->dest.ssa);
148 break;
149 }
150 case PROGRAM_STATE_VAR:
151 case PROGRAM_CONSTANT: {
152 /* We actually want to look at the type in the Parameters list for this,
153 * because it lets us upload constant builtin uniforms as actual
154 * constants.
155 */
156 struct gl_program_parameter_list *plist = c->prog->Parameters;
157 gl_register_file file = prog_src->RelAddr ? prog_src->File :
158 plist->Parameters[prog_src->Index].Type;
159
160 switch (file) {
161 case PROGRAM_CONSTANT:
162 if ((c->prog->arb.IndirectRegisterFiles &
163 (1 << PROGRAM_CONSTANT)) == 0) {
164 unsigned pvo = plist->ParameterValueOffset[prog_src->Index];
165 float *v = (float *) plist->ParameterValues + pvo;
166 src.src = nir_src_for_ssa(nir_imm_vec4(b, v[0], v[1], v[2], v[3]));
167 break;
168 }
169 /* FALLTHROUGH */
170 case PROGRAM_STATE_VAR: {
171 assert(c->parameters != NULL);
172
173 nir_intrinsic_instr *load =
174 nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_var);
175 nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL);
176 load->num_components = 4;
177
178 load->variables[0] = nir_deref_var_create(load, c->parameters);
179 nir_deref_array *deref_arr =
180 nir_deref_array_create(load->variables[0]);
181 deref_arr->deref.type = glsl_vec4_type();
182 load->variables[0]->deref.child = &deref_arr->deref;
183
184 if (prog_src->RelAddr) {
185 deref_arr->deref_array_type = nir_deref_array_type_indirect;
186
187 nir_alu_src addr_src = { NIR_SRC_INIT };
188 addr_src.src = nir_src_for_reg(c->addr_reg);
189 nir_ssa_def *reladdr = nir_imov_alu(b, addr_src, 1);
190
191 if (prog_src->Index < 0) {
192 /* This is a negative offset which should be added to the address
193 * register's value.
194 */
195 reladdr = nir_iadd(b, reladdr, nir_imm_int(b, prog_src->Index));
196
197 deref_arr->base_offset = 0;
198 } else {
199 deref_arr->base_offset = prog_src->Index;
200 }
201 deref_arr->indirect = nir_src_for_ssa(reladdr);
202 } else {
203 deref_arr->deref_array_type = nir_deref_array_type_direct;
204 deref_arr->base_offset = prog_src->Index;
205 }
206
207 nir_builder_instr_insert(b, &load->instr);
208
209 src.src = nir_src_for_ssa(&load->dest.ssa);
210 break;
211 }
212 default:
213 fprintf(stderr, "bad uniform src register file: %s (%d)\n",
214 _mesa_register_file_name(file), file);
215 abort();
216 }
217 break;
218 }
219 default:
220 fprintf(stderr, "unknown src register file: %s (%d)\n",
221 _mesa_register_file_name(prog_src->File), prog_src->File);
222 abort();
223 }
224
225 nir_ssa_def *def;
226 if (!HAS_EXTENDED_SWIZZLE(prog_src->Swizzle) &&
227 (prog_src->Negate == NEGATE_NONE || prog_src->Negate == NEGATE_XYZW)) {
228 /* The simple non-SWZ case. */
229 for (int i = 0; i < 4; i++)
230 src.swizzle[i] = GET_SWZ(prog_src->Swizzle, i);
231
232 def = nir_fmov_alu(b, src, 4);
233
234 if (prog_src->Negate)
235 def = nir_fneg(b, def);
236 } else {
237 /* The SWZ instruction allows per-component zero/one swizzles, and also
238 * per-component negation.
239 */
240 nir_ssa_def *chans[4];
241 for (int i = 0; i < 4; i++) {
242 int swizzle = GET_SWZ(prog_src->Swizzle, i);
243 if (swizzle == SWIZZLE_ZERO) {
244 chans[i] = nir_imm_float(b, 0.0);
245 } else if (swizzle == SWIZZLE_ONE) {
246 chans[i] = nir_imm_float(b, 1.0);
247 } else {
248 assert(swizzle != SWIZZLE_NIL);
249 nir_alu_instr *mov = nir_alu_instr_create(b->shader, nir_op_fmov);
250 nir_ssa_dest_init(&mov->instr, &mov->dest.dest, 1, 32, NULL);
251 mov->dest.write_mask = 0x1;
252 mov->src[0] = src;
253 mov->src[0].swizzle[0] = swizzle;
254 nir_builder_instr_insert(b, &mov->instr);
255
256 chans[i] = &mov->dest.dest.ssa;
257 }
258
259 if (prog_src->Negate & (1 << i))
260 chans[i] = nir_fneg(b, chans[i]);
261 }
262 def = nir_vec4(b, chans[0], chans[1], chans[2], chans[3]);
263 }
264
265 return def;
266 }
267
268 static void
269 ptn_alu(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
270 {
271 unsigned num_srcs = nir_op_infos[op].num_inputs;
272 nir_alu_instr *instr = nir_alu_instr_create(b->shader, op);
273 unsigned i;
274
275 for (i = 0; i < num_srcs; i++)
276 instr->src[i].src = nir_src_for_ssa(src[i]);
277
278 instr->dest = dest;
279 nir_builder_instr_insert(b, &instr->instr);
280 }
281
282 static void
283 ptn_move_dest_masked(nir_builder *b, nir_alu_dest dest,
284 nir_ssa_def *def, unsigned write_mask)
285 {
286 if (!(dest.write_mask & write_mask))
287 return;
288
289 nir_alu_instr *mov = nir_alu_instr_create(b->shader, nir_op_fmov);
290 if (!mov)
291 return;
292
293 mov->dest = dest;
294 mov->dest.write_mask &= write_mask;
295 mov->src[0].src = nir_src_for_ssa(def);
296 for (unsigned i = def->num_components; i < 4; i++)
297 mov->src[0].swizzle[i] = def->num_components - 1;
298 nir_builder_instr_insert(b, &mov->instr);
299 }
300
301 static void
302 ptn_move_dest(nir_builder *b, nir_alu_dest dest, nir_ssa_def *def)
303 {
304 ptn_move_dest_masked(b, dest, def, WRITEMASK_XYZW);
305 }
306
307 static void
308 ptn_arl(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
309 {
310 ptn_move_dest(b, dest, nir_f2i32(b, nir_ffloor(b, src[0])));
311 }
312
313 /* EXP - Approximate Exponential Base 2
314 * dst.x = 2^{\lfloor src.x\rfloor}
315 * dst.y = src.x - \lfloor src.x\rfloor
316 * dst.z = 2^{src.x}
317 * dst.w = 1.0
318 */
319 static void
320 ptn_exp(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
321 {
322 nir_ssa_def *srcx = ptn_channel(b, src[0], X);
323
324 ptn_move_dest_masked(b, dest, nir_fexp2(b, nir_ffloor(b, srcx)), WRITEMASK_X);
325 ptn_move_dest_masked(b, dest, nir_fsub(b, srcx, nir_ffloor(b, srcx)), WRITEMASK_Y);
326 ptn_move_dest_masked(b, dest, nir_fexp2(b, srcx), WRITEMASK_Z);
327 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
328 }
329
330 /* LOG - Approximate Logarithm Base 2
331 * dst.x = \lfloor\log_2{|src.x|}\rfloor
332 * dst.y = |src.x| * 2^{-\lfloor\log_2{|src.x|}\rfloor}}
333 * dst.z = \log_2{|src.x|}
334 * dst.w = 1.0
335 */
336 static void
337 ptn_log(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
338 {
339 nir_ssa_def *abs_srcx = nir_fabs(b, ptn_channel(b, src[0], X));
340 nir_ssa_def *log2 = nir_flog2(b, abs_srcx);
341 nir_ssa_def *floor_log2 = nir_ffloor(b, log2);
342
343 ptn_move_dest_masked(b, dest, floor_log2, WRITEMASK_X);
344 ptn_move_dest_masked(b, dest,
345 nir_fmul(b, abs_srcx,
346 nir_fexp2(b, nir_fneg(b, floor_log2))),
347 WRITEMASK_Y);
348 ptn_move_dest_masked(b, dest, log2, WRITEMASK_Z);
349 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
350 }
351
352 /* DST - Distance Vector
353 * dst.x = 1.0
354 * dst.y = src0.y \times src1.y
355 * dst.z = src0.z
356 * dst.w = src1.w
357 */
358 static void
359 ptn_dst(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
360 {
361 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_X);
362 ptn_move_dest_masked(b, dest, nir_fmul(b, src[0], src[1]), WRITEMASK_Y);
363 ptn_move_dest_masked(b, dest, nir_fmov(b, src[0]), WRITEMASK_Z);
364 ptn_move_dest_masked(b, dest, nir_fmov(b, src[1]), WRITEMASK_W);
365 }
366
367 /* LIT - Light Coefficients
368 * dst.x = 1.0
369 * dst.y = max(src.x, 0.0)
370 * dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0
371 * dst.w = 1.0
372 */
373 static void
374 ptn_lit(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
375 {
376 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_XW);
377
378 ptn_move_dest_masked(b, dest, nir_fmax(b, ptn_channel(b, src[0], X),
379 nir_imm_float(b, 0.0)), WRITEMASK_Y);
380
381 if (dest.write_mask & WRITEMASK_Z) {
382 nir_ssa_def *src0_y = ptn_channel(b, src[0], Y);
383 nir_ssa_def *wclamp = nir_fmax(b, nir_fmin(b, ptn_channel(b, src[0], W),
384 nir_imm_float(b, 128.0)),
385 nir_imm_float(b, -128.0));
386 nir_ssa_def *pow = nir_fpow(b, nir_fmax(b, src0_y, nir_imm_float(b, 0.0)),
387 wclamp);
388
389 nir_ssa_def *z;
390 if (b->shader->options->native_integers) {
391 z = nir_bcsel(b,
392 nir_fge(b, nir_imm_float(b, 0.0), ptn_channel(b, src[0], X)),
393 nir_imm_float(b, 0.0),
394 pow);
395 } else {
396 z = nir_fcsel(b,
397 nir_sge(b, nir_imm_float(b, 0.0), ptn_channel(b, src[0], X)),
398 nir_imm_float(b, 0.0),
399 pow);
400 }
401
402 ptn_move_dest_masked(b, dest, z, WRITEMASK_Z);
403 }
404 }
405
406 /* SCS - Sine Cosine
407 * dst.x = \cos{src.x}
408 * dst.y = \sin{src.x}
409 * dst.z = 0.0
410 * dst.w = 1.0
411 */
412 static void
413 ptn_scs(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
414 {
415 ptn_move_dest_masked(b, dest, nir_fcos(b, ptn_channel(b, src[0], X)),
416 WRITEMASK_X);
417 ptn_move_dest_masked(b, dest, nir_fsin(b, ptn_channel(b, src[0], X)),
418 WRITEMASK_Y);
419 ptn_move_dest_masked(b, dest, nir_imm_float(b, 0.0), WRITEMASK_Z);
420 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
421 }
422
423 /**
424 * Emit SLT. For platforms with integers, prefer b2f(flt(...)).
425 */
426 static void
427 ptn_slt(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
428 {
429 if (b->shader->options->native_integers) {
430 ptn_move_dest(b, dest, nir_b2f(b, nir_flt(b, src[0], src[1])));
431 } else {
432 ptn_move_dest(b, dest, nir_slt(b, src[0], src[1]));
433 }
434 }
435
436 /**
437 * Emit SGE. For platforms with integers, prefer b2f(fge(...)).
438 */
439 static void
440 ptn_sge(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
441 {
442 if (b->shader->options->native_integers) {
443 ptn_move_dest(b, dest, nir_b2f(b, nir_fge(b, src[0], src[1])));
444 } else {
445 ptn_move_dest(b, dest, nir_sge(b, src[0], src[1]));
446 }
447 }
448
449 static void
450 ptn_xpd(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
451 {
452 ptn_move_dest_masked(b, dest,
453 nir_fsub(b,
454 nir_fmul(b,
455 nir_swizzle(b, src[0], SWIZ(Y, Z, X, W), 3, true),
456 nir_swizzle(b, src[1], SWIZ(Z, X, Y, W), 3, true)),
457 nir_fmul(b,
458 nir_swizzle(b, src[1], SWIZ(Y, Z, X, W), 3, true),
459 nir_swizzle(b, src[0], SWIZ(Z, X, Y, W), 3, true))),
460 WRITEMASK_XYZ);
461 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
462 }
463
464 static void
465 ptn_dp2(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
466 {
467 ptn_move_dest(b, dest, nir_fdot2(b, src[0], src[1]));
468 }
469
470 static void
471 ptn_dp3(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
472 {
473 ptn_move_dest(b, dest, nir_fdot3(b, src[0], src[1]));
474 }
475
476 static void
477 ptn_dp4(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
478 {
479 ptn_move_dest(b, dest, nir_fdot4(b, src[0], src[1]));
480 }
481
482 static void
483 ptn_dph(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
484 {
485 ptn_move_dest(b, dest, nir_fdph(b, src[0], src[1]));
486 }
487
488 static void
489 ptn_cmp(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
490 {
491 if (b->shader->options->native_integers) {
492 ptn_move_dest(b, dest, nir_bcsel(b,
493 nir_flt(b, src[0], nir_imm_float(b, 0.0)),
494 src[1], src[2]));
495 } else {
496 ptn_move_dest(b, dest, nir_fcsel(b,
497 nir_slt(b, src[0], nir_imm_float(b, 0.0)),
498 src[1], src[2]));
499 }
500 }
501
502 static void
503 ptn_lrp(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
504 {
505 ptn_move_dest(b, dest, nir_flrp(b, src[2], src[1], src[0]));
506 }
507
508 static void
509 ptn_kil(nir_builder *b, nir_ssa_def **src)
510 {
511 nir_ssa_def *cmp = b->shader->options->native_integers ?
512 nir_bany_inequal4(b, nir_flt(b, src[0], nir_imm_float(b, 0.0)), nir_imm_int(b, 0)) :
513 nir_fany_nequal4(b, nir_slt(b, src[0], nir_imm_float(b, 0.0)), nir_imm_float(b, 0.0));
514
515 nir_intrinsic_instr *discard =
516 nir_intrinsic_instr_create(b->shader, nir_intrinsic_discard_if);
517 discard->src[0] = nir_src_for_ssa(cmp);
518 nir_builder_instr_insert(b, &discard->instr);
519 }
520
521 static void
522 ptn_tex(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src,
523 struct prog_instruction *prog_inst)
524 {
525 nir_tex_instr *instr;
526 nir_texop op;
527 unsigned num_srcs;
528
529 switch (prog_inst->Opcode) {
530 case OPCODE_TEX:
531 op = nir_texop_tex;
532 num_srcs = 1;
533 break;
534 case OPCODE_TXB:
535 op = nir_texop_txb;
536 num_srcs = 2;
537 break;
538 case OPCODE_TXD:
539 op = nir_texop_txd;
540 num_srcs = 3;
541 break;
542 case OPCODE_TXL:
543 op = nir_texop_txl;
544 num_srcs = 2;
545 break;
546 case OPCODE_TXP:
547 op = nir_texop_tex;
548 num_srcs = 2;
549 break;
550 default:
551 fprintf(stderr, "unknown tex op %d\n", prog_inst->Opcode);
552 abort();
553 }
554
555 if (prog_inst->TexShadow)
556 num_srcs++;
557
558 instr = nir_tex_instr_create(b->shader, num_srcs);
559 instr->op = op;
560 instr->dest_type = nir_type_float;
561 instr->is_shadow = prog_inst->TexShadow;
562 instr->texture_index = prog_inst->TexSrcUnit;
563 instr->sampler_index = prog_inst->TexSrcUnit;
564
565 switch (prog_inst->TexSrcTarget) {
566 case TEXTURE_1D_INDEX:
567 instr->sampler_dim = GLSL_SAMPLER_DIM_1D;
568 break;
569 case TEXTURE_2D_INDEX:
570 instr->sampler_dim = GLSL_SAMPLER_DIM_2D;
571 break;
572 case TEXTURE_3D_INDEX:
573 instr->sampler_dim = GLSL_SAMPLER_DIM_3D;
574 break;
575 case TEXTURE_CUBE_INDEX:
576 instr->sampler_dim = GLSL_SAMPLER_DIM_CUBE;
577 break;
578 case TEXTURE_RECT_INDEX:
579 instr->sampler_dim = GLSL_SAMPLER_DIM_RECT;
580 break;
581 default:
582 fprintf(stderr, "Unknown texture target %d\n", prog_inst->TexSrcTarget);
583 abort();
584 }
585
586 switch (instr->sampler_dim) {
587 case GLSL_SAMPLER_DIM_1D:
588 case GLSL_SAMPLER_DIM_BUF:
589 instr->coord_components = 1;
590 break;
591 case GLSL_SAMPLER_DIM_2D:
592 case GLSL_SAMPLER_DIM_RECT:
593 case GLSL_SAMPLER_DIM_EXTERNAL:
594 case GLSL_SAMPLER_DIM_MS:
595 instr->coord_components = 2;
596 break;
597 case GLSL_SAMPLER_DIM_3D:
598 case GLSL_SAMPLER_DIM_CUBE:
599 instr->coord_components = 3;
600 break;
601 case GLSL_SAMPLER_DIM_SUBPASS:
602 case GLSL_SAMPLER_DIM_SUBPASS_MS:
603 unreachable("can't reach");
604 }
605
606 unsigned src_number = 0;
607
608 instr->src[src_number].src =
609 nir_src_for_ssa(nir_swizzle(b, src[0], SWIZ(X, Y, Z, W),
610 instr->coord_components, true));
611 instr->src[src_number].src_type = nir_tex_src_coord;
612 src_number++;
613
614 if (prog_inst->Opcode == OPCODE_TXP) {
615 instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
616 instr->src[src_number].src_type = nir_tex_src_projector;
617 src_number++;
618 }
619
620 if (prog_inst->Opcode == OPCODE_TXB) {
621 instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
622 instr->src[src_number].src_type = nir_tex_src_bias;
623 src_number++;
624 }
625
626 if (prog_inst->Opcode == OPCODE_TXL) {
627 instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
628 instr->src[src_number].src_type = nir_tex_src_lod;
629 src_number++;
630 }
631
632 if (instr->is_shadow) {
633 if (instr->coord_components < 3)
634 instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], Z));
635 else
636 instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
637
638 instr->src[src_number].src_type = nir_tex_src_comparator;
639 src_number++;
640 }
641
642 assert(src_number == num_srcs);
643
644 nir_ssa_dest_init(&instr->instr, &instr->dest, 4, 32, NULL);
645 nir_builder_instr_insert(b, &instr->instr);
646
647 /* Resolve the writemask on the texture op. */
648 ptn_move_dest(b, dest, &instr->dest.ssa);
649 }
650
651 static const nir_op op_trans[MAX_OPCODE] = {
652 [OPCODE_NOP] = 0,
653 [OPCODE_ABS] = nir_op_fabs,
654 [OPCODE_ADD] = nir_op_fadd,
655 [OPCODE_ARL] = 0,
656 [OPCODE_CMP] = 0,
657 [OPCODE_COS] = 0,
658 [OPCODE_DDX] = nir_op_fddx,
659 [OPCODE_DDY] = nir_op_fddy,
660 [OPCODE_DP2] = 0,
661 [OPCODE_DP3] = 0,
662 [OPCODE_DP4] = 0,
663 [OPCODE_DPH] = 0,
664 [OPCODE_DST] = 0,
665 [OPCODE_END] = 0,
666 [OPCODE_EX2] = 0,
667 [OPCODE_EXP] = 0,
668 [OPCODE_FLR] = nir_op_ffloor,
669 [OPCODE_FRC] = nir_op_ffract,
670 [OPCODE_LG2] = 0,
671 [OPCODE_LIT] = 0,
672 [OPCODE_LOG] = 0,
673 [OPCODE_LRP] = 0,
674 [OPCODE_MAD] = 0,
675 [OPCODE_MAX] = nir_op_fmax,
676 [OPCODE_MIN] = nir_op_fmin,
677 [OPCODE_MOV] = nir_op_fmov,
678 [OPCODE_MUL] = nir_op_fmul,
679 [OPCODE_POW] = 0,
680 [OPCODE_RCP] = 0,
681
682 [OPCODE_RSQ] = 0,
683 [OPCODE_SCS] = 0,
684 [OPCODE_SGE] = 0,
685 [OPCODE_SIN] = 0,
686 [OPCODE_SLT] = 0,
687 [OPCODE_SSG] = nir_op_fsign,
688 [OPCODE_SUB] = nir_op_fsub,
689 [OPCODE_SWZ] = 0,
690 [OPCODE_TEX] = 0,
691 [OPCODE_TRUNC] = nir_op_ftrunc,
692 [OPCODE_TXB] = 0,
693 [OPCODE_TXD] = 0,
694 [OPCODE_TXL] = 0,
695 [OPCODE_TXP] = 0,
696 [OPCODE_XPD] = 0,
697 };
698
699 static void
700 ptn_emit_instruction(struct ptn_compile *c, struct prog_instruction *prog_inst)
701 {
702 nir_builder *b = &c->build;
703 unsigned i;
704 const unsigned op = prog_inst->Opcode;
705
706 if (op == OPCODE_END)
707 return;
708
709 nir_ssa_def *src[3];
710 for (i = 0; i < 3; i++) {
711 src[i] = ptn_get_src(c, &prog_inst->SrcReg[i]);
712 }
713 nir_alu_dest dest = ptn_get_dest(c, &prog_inst->DstReg);
714 if (c->error)
715 return;
716
717 switch (op) {
718 case OPCODE_RSQ:
719 ptn_move_dest(b, dest,
720 nir_frsq(b, nir_fabs(b, ptn_channel(b, src[0], X))));
721 break;
722
723 case OPCODE_RCP:
724 ptn_move_dest(b, dest, nir_frcp(b, ptn_channel(b, src[0], X)));
725 break;
726
727 case OPCODE_EX2:
728 ptn_move_dest(b, dest, nir_fexp2(b, ptn_channel(b, src[0], X)));
729 break;
730
731 case OPCODE_LG2:
732 ptn_move_dest(b, dest, nir_flog2(b, ptn_channel(b, src[0], X)));
733 break;
734
735 case OPCODE_POW:
736 ptn_move_dest(b, dest, nir_fpow(b,
737 ptn_channel(b, src[0], X),
738 ptn_channel(b, src[1], X)));
739 break;
740
741 case OPCODE_COS:
742 ptn_move_dest(b, dest, nir_fcos(b, ptn_channel(b, src[0], X)));
743 break;
744
745 case OPCODE_SIN:
746 ptn_move_dest(b, dest, nir_fsin(b, ptn_channel(b, src[0], X)));
747 break;
748
749 case OPCODE_ARL:
750 ptn_arl(b, dest, src);
751 break;
752
753 case OPCODE_EXP:
754 ptn_exp(b, dest, src);
755 break;
756
757 case OPCODE_LOG:
758 ptn_log(b, dest, src);
759 break;
760
761 case OPCODE_LRP:
762 ptn_lrp(b, dest, src);
763 break;
764
765 case OPCODE_MAD:
766 ptn_move_dest(b, dest, nir_fadd(b, nir_fmul(b, src[0], src[1]), src[2]));
767 break;
768
769 case OPCODE_DST:
770 ptn_dst(b, dest, src);
771 break;
772
773 case OPCODE_LIT:
774 ptn_lit(b, dest, src);
775 break;
776
777 case OPCODE_XPD:
778 ptn_xpd(b, dest, src);
779 break;
780
781 case OPCODE_DP2:
782 ptn_dp2(b, dest, src);
783 break;
784
785 case OPCODE_DP3:
786 ptn_dp3(b, dest, src);
787 break;
788
789 case OPCODE_DP4:
790 ptn_dp4(b, dest, src);
791 break;
792
793 case OPCODE_DPH:
794 ptn_dph(b, dest, src);
795 break;
796
797 case OPCODE_KIL:
798 ptn_kil(b, src);
799 break;
800
801 case OPCODE_CMP:
802 ptn_cmp(b, dest, src);
803 break;
804
805 case OPCODE_SCS:
806 ptn_scs(b, dest, src);
807 break;
808
809 case OPCODE_SLT:
810 ptn_slt(b, dest, src);
811 break;
812
813 case OPCODE_SGE:
814 ptn_sge(b, dest, src);
815 break;
816
817 case OPCODE_TEX:
818 case OPCODE_TXB:
819 case OPCODE_TXD:
820 case OPCODE_TXL:
821 case OPCODE_TXP:
822 ptn_tex(b, dest, src, prog_inst);
823 break;
824
825 case OPCODE_SWZ:
826 /* Extended swizzles were already handled in ptn_get_src(). */
827 ptn_alu(b, nir_op_fmov, dest, src);
828 break;
829
830 case OPCODE_NOP:
831 break;
832
833 default:
834 if (op_trans[op] != 0) {
835 ptn_alu(b, op_trans[op], dest, src);
836 } else {
837 fprintf(stderr, "unknown opcode: %s\n", _mesa_opcode_string(op));
838 abort();
839 }
840 break;
841 }
842
843 if (prog_inst->Saturate) {
844 assert(prog_inst->Saturate);
845 assert(!dest.dest.is_ssa);
846 ptn_move_dest(b, dest, nir_fsat(b, ptn_src_for_dest(c, &dest)));
847 }
848 }
849
850 /**
851 * Puts a NIR intrinsic to store of each PROGRAM_OUTPUT value to the output
852 * variables at the end of the shader.
853 *
854 * We don't generate these incrementally as the PROGRAM_OUTPUT values are
855 * written, because there's no output load intrinsic, which means we couldn't
856 * handle writemasks.
857 */
858 static void
859 ptn_add_output_stores(struct ptn_compile *c)
860 {
861 nir_builder *b = &c->build;
862
863 nir_foreach_variable(var, &b->shader->outputs) {
864 nir_intrinsic_instr *store =
865 nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_var);
866 store->num_components = glsl_get_vector_elements(var->type);
867 nir_intrinsic_set_write_mask(store, (1 << store->num_components) - 1);
868 store->variables[0] =
869 nir_deref_var_create(store, c->output_vars[var->data.location]);
870
871 if (c->prog->Target == GL_FRAGMENT_PROGRAM_ARB &&
872 var->data.location == FRAG_RESULT_DEPTH) {
873 /* result.depth has this strange convention of being the .z component of
874 * a vec4 with undefined .xyw components. We resolve it to a scalar, to
875 * match GLSL's gl_FragDepth and the expectations of most backends.
876 */
877 nir_alu_src alu_src = { NIR_SRC_INIT };
878 alu_src.src = nir_src_for_reg(c->output_regs[FRAG_RESULT_DEPTH]);
879 alu_src.swizzle[0] = SWIZZLE_Z;
880 store->src[0] = nir_src_for_ssa(nir_fmov_alu(b, alu_src, 1));
881 } else {
882 store->src[0].reg.reg = c->output_regs[var->data.location];
883 }
884 nir_builder_instr_insert(b, &store->instr);
885 }
886 }
887
888 static void
889 setup_registers_and_variables(struct ptn_compile *c)
890 {
891 nir_builder *b = &c->build;
892 struct nir_shader *shader = b->shader;
893
894 /* Create input variables. */
895 const int num_inputs = util_last_bit64(c->prog->info.inputs_read);
896 for (int i = 0; i < num_inputs; i++) {
897 if (!(c->prog->info.inputs_read & BITFIELD64_BIT(i)))
898 continue;
899
900 nir_variable *var =
901 nir_variable_create(shader, nir_var_shader_in, glsl_vec4_type(),
902 ralloc_asprintf(shader, "in_%d", i));
903 var->data.location = i;
904 var->data.index = 0;
905
906 if (c->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
907 if (i == VARYING_SLOT_POS) {
908 var->data.origin_upper_left = c->prog->OriginUpperLeft;
909 var->data.pixel_center_integer = c->prog->PixelCenterInteger;
910 } else if (i == VARYING_SLOT_FOGC) {
911 /* fogcoord is defined as <f, 0.0, 0.0, 1.0>. Make the actual
912 * input variable a float, and create a local containing the
913 * full vec4 value.
914 */
915 var->type = glsl_float_type();
916
917 nir_intrinsic_instr *load_x =
918 nir_intrinsic_instr_create(shader, nir_intrinsic_load_var);
919 load_x->num_components = 1;
920 load_x->variables[0] = nir_deref_var_create(load_x, var);
921 nir_ssa_dest_init(&load_x->instr, &load_x->dest, 1, 32, NULL);
922 nir_builder_instr_insert(b, &load_x->instr);
923
924 nir_ssa_def *f001 = nir_vec4(b, &load_x->dest.ssa, nir_imm_float(b, 0.0),
925 nir_imm_float(b, 0.0), nir_imm_float(b, 1.0));
926
927 nir_variable *fullvar =
928 nir_local_variable_create(b->impl, glsl_vec4_type(),
929 "fogcoord_tmp");
930 nir_intrinsic_instr *store =
931 nir_intrinsic_instr_create(shader, nir_intrinsic_store_var);
932 store->num_components = 4;
933 nir_intrinsic_set_write_mask(store, WRITEMASK_XYZW);
934 store->variables[0] = nir_deref_var_create(store, fullvar);
935 store->src[0] = nir_src_for_ssa(f001);
936 nir_builder_instr_insert(b, &store->instr);
937
938 /* We inserted the real input into the list so the driver has real
939 * inputs, but we set c->input_vars[i] to the temporary so we use
940 * the splatted value.
941 */
942 c->input_vars[i] = fullvar;
943 continue;
944 }
945 }
946
947 c->input_vars[i] = var;
948 }
949
950 /* Create output registers and variables. */
951 int max_outputs = util_last_bit(c->prog->info.outputs_written);
952 c->output_regs = rzalloc_array(c, nir_register *, max_outputs);
953
954 for (int i = 0; i < max_outputs; i++) {
955 if (!(c->prog->info.outputs_written & BITFIELD64_BIT(i)))
956 continue;
957
958 /* Since we can't load from outputs in the IR, we make temporaries
959 * for the outputs and emit stores to the real outputs at the end of
960 * the shader.
961 */
962 nir_register *reg = nir_local_reg_create(b->impl);
963 reg->num_components = 4;
964
965 nir_variable *var = rzalloc(shader, nir_variable);
966 if (c->prog->Target == GL_FRAGMENT_PROGRAM_ARB && i == FRAG_RESULT_DEPTH)
967 var->type = glsl_float_type();
968 else
969 var->type = glsl_vec4_type();
970 var->data.mode = nir_var_shader_out;
971 var->name = ralloc_asprintf(var, "out_%d", i);
972
973 var->data.location = i;
974 var->data.index = 0;
975
976 c->output_regs[i] = reg;
977
978 exec_list_push_tail(&shader->outputs, &var->node);
979 c->output_vars[i] = var;
980 }
981
982 /* Create temporary registers. */
983 c->temp_regs = rzalloc_array(c, nir_register *,
984 c->prog->arb.NumTemporaries);
985
986 nir_register *reg;
987 for (unsigned i = 0; i < c->prog->arb.NumTemporaries; i++) {
988 reg = nir_local_reg_create(b->impl);
989 if (!reg) {
990 c->error = true;
991 return;
992 }
993 reg->num_components = 4;
994 c->temp_regs[i] = reg;
995 }
996
997 /* Create the address register (for ARB_vertex_program). */
998 reg = nir_local_reg_create(b->impl);
999 if (!reg) {
1000 c->error = true;
1001 return;
1002 }
1003 reg->num_components = 1;
1004 c->addr_reg = reg;
1005 }
1006
1007 struct nir_shader *
1008 prog_to_nir(const struct gl_program *prog,
1009 const nir_shader_compiler_options *options)
1010 {
1011 struct ptn_compile *c;
1012 struct nir_shader *s;
1013 gl_shader_stage stage = _mesa_program_enum_to_shader_stage(prog->Target);
1014
1015 c = rzalloc(NULL, struct ptn_compile);
1016 if (!c)
1017 return NULL;
1018 c->prog = prog;
1019
1020 nir_builder_init_simple_shader(&c->build, NULL, stage, options);
1021
1022 /* Copy the shader_info from the gl_program */
1023 c->build.shader->info = prog->info;
1024
1025 s = c->build.shader;
1026
1027 if (prog->Parameters->NumParameters > 0) {
1028 c->parameters = rzalloc(s, nir_variable);
1029 c->parameters->type =
1030 glsl_array_type(glsl_vec4_type(), prog->Parameters->NumParameters);
1031 c->parameters->name = "parameters";
1032 c->parameters->data.read_only = true;
1033 c->parameters->data.mode = nir_var_uniform;
1034 exec_list_push_tail(&s->uniforms, &c->parameters->node);
1035 }
1036
1037 setup_registers_and_variables(c);
1038 if (unlikely(c->error))
1039 goto fail;
1040
1041 for (unsigned int i = 0; i < prog->arb.NumInstructions; i++) {
1042 ptn_emit_instruction(c, &prog->arb.Instructions[i]);
1043
1044 if (unlikely(c->error))
1045 break;
1046 }
1047
1048 ptn_add_output_stores(c);
1049
1050 s->info.name = ralloc_asprintf(s, "ARB%d", prog->Id);
1051 s->info.num_textures = util_last_bit(prog->SamplersUsed);
1052 s->info.num_ubos = 0;
1053 s->info.num_abos = 0;
1054 s->info.num_ssbos = 0;
1055 s->info.num_images = 0;
1056 s->info.uses_texture_gather = false;
1057 s->info.clip_distance_array_size = 0;
1058 s->info.cull_distance_array_size = 0;
1059 s->info.separate_shader = false;
1060
1061 fail:
1062 if (c->error) {
1063 ralloc_free(s);
1064 s = NULL;
1065 }
1066 ralloc_free(c);
1067 return s;
1068 }