mesa: include mtypes.h less
[mesa.git] / src / mesa / program / prog_to_nir.c
1 /*
2 * Copyright © 2015 Intel Corporation
3 * Copyright © 2014-2015 Broadcom
4 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
23 * IN THE SOFTWARE.
24 */
25
26 #include "compiler/nir/nir.h"
27 #include "compiler/nir/nir_builder.h"
28 #include "compiler/glsl/list.h"
29 #include "main/imports.h"
30 #include "main/mtypes.h"
31 #include "util/ralloc.h"
32
33 #include "prog_to_nir.h"
34 #include "prog_instruction.h"
35 #include "prog_parameter.h"
36 #include "prog_print.h"
37 #include "program.h"
38
39 /**
40 * \file prog_to_nir.c
41 *
42 * A translator from Mesa IR (prog_instruction.h) to NIR. This is primarily
43 * intended to support ARB_vertex_program, ARB_fragment_program, and fixed-function
44 * vertex processing. Full GLSL support should use glsl_to_nir instead.
45 */
46
47 struct ptn_compile {
48 const struct gl_program *prog;
49 nir_builder build;
50 bool error;
51
52 nir_variable *parameters;
53 nir_variable *input_vars[VARYING_SLOT_MAX];
54 nir_variable *output_vars[VARYING_SLOT_MAX];
55 nir_register **output_regs;
56 nir_register **temp_regs;
57
58 nir_register *addr_reg;
59 };
60
61 #define SWIZ(X, Y, Z, W) \
62 (unsigned[4]){ SWIZZLE_##X, SWIZZLE_##Y, SWIZZLE_##Z, SWIZZLE_##W }
63 #define ptn_channel(b, src, ch) nir_swizzle(b, src, SWIZ(ch, ch, ch, ch), 1, true)
64
65 static nir_ssa_def *
66 ptn_src_for_dest(struct ptn_compile *c, nir_alu_dest *dest)
67 {
68 nir_builder *b = &c->build;
69
70 nir_alu_src src;
71 memset(&src, 0, sizeof(src));
72
73 if (dest->dest.is_ssa)
74 src.src = nir_src_for_ssa(&dest->dest.ssa);
75 else {
76 assert(!dest->dest.reg.indirect);
77 src.src = nir_src_for_reg(dest->dest.reg.reg);
78 src.src.reg.base_offset = dest->dest.reg.base_offset;
79 }
80
81 for (int i = 0; i < 4; i++)
82 src.swizzle[i] = i;
83
84 return nir_fmov_alu(b, src, 4);
85 }
86
87 static nir_alu_dest
88 ptn_get_dest(struct ptn_compile *c, const struct prog_dst_register *prog_dst)
89 {
90 nir_alu_dest dest;
91
92 memset(&dest, 0, sizeof(dest));
93
94 switch (prog_dst->File) {
95 case PROGRAM_TEMPORARY:
96 dest.dest.reg.reg = c->temp_regs[prog_dst->Index];
97 break;
98 case PROGRAM_OUTPUT:
99 dest.dest.reg.reg = c->output_regs[prog_dst->Index];
100 break;
101 case PROGRAM_ADDRESS:
102 assert(prog_dst->Index == 0);
103 dest.dest.reg.reg = c->addr_reg;
104 break;
105 case PROGRAM_UNDEFINED:
106 break;
107 }
108
109 dest.write_mask = prog_dst->WriteMask;
110 dest.saturate = false;
111
112 assert(!prog_dst->RelAddr);
113
114 return dest;
115 }
116
117 static nir_ssa_def *
118 ptn_get_src(struct ptn_compile *c, const struct prog_src_register *prog_src)
119 {
120 nir_builder *b = &c->build;
121 nir_alu_src src;
122
123 memset(&src, 0, sizeof(src));
124
125 switch (prog_src->File) {
126 case PROGRAM_UNDEFINED:
127 return nir_imm_float(b, 0.0);
128 case PROGRAM_TEMPORARY:
129 assert(!prog_src->RelAddr && prog_src->Index >= 0);
130 src.src.reg.reg = c->temp_regs[prog_src->Index];
131 break;
132 case PROGRAM_INPUT: {
133 /* ARB_vertex_program doesn't allow relative addressing on vertex
134 * attributes; ARB_fragment_program has no relative addressing at all.
135 */
136 assert(!prog_src->RelAddr);
137
138 assert(prog_src->Index >= 0 && prog_src->Index < VARYING_SLOT_MAX);
139
140 nir_variable *var = c->input_vars[prog_src->Index];
141 src.src = nir_src_for_ssa(nir_load_var(b, var));
142 break;
143 }
144 case PROGRAM_STATE_VAR:
145 case PROGRAM_CONSTANT: {
146 /* We actually want to look at the type in the Parameters list for this,
147 * because it lets us upload constant builtin uniforms as actual
148 * constants.
149 */
150 struct gl_program_parameter_list *plist = c->prog->Parameters;
151 gl_register_file file = prog_src->RelAddr ? prog_src->File :
152 plist->Parameters[prog_src->Index].Type;
153
154 switch (file) {
155 case PROGRAM_CONSTANT:
156 if ((c->prog->arb.IndirectRegisterFiles &
157 (1 << PROGRAM_CONSTANT)) == 0) {
158 unsigned pvo = plist->ParameterValueOffset[prog_src->Index];
159 float *v = (float *) plist->ParameterValues + pvo;
160 src.src = nir_src_for_ssa(nir_imm_vec4(b, v[0], v[1], v[2], v[3]));
161 break;
162 }
163 /* FALLTHROUGH */
164 case PROGRAM_STATE_VAR: {
165 assert(c->parameters != NULL);
166
167 nir_intrinsic_instr *load =
168 nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_var);
169 nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL);
170 load->num_components = 4;
171
172 load->variables[0] = nir_deref_var_create(load, c->parameters);
173 nir_deref_array *deref_arr =
174 nir_deref_array_create(load->variables[0]);
175 deref_arr->deref.type = glsl_vec4_type();
176 load->variables[0]->deref.child = &deref_arr->deref;
177
178 if (prog_src->RelAddr) {
179 deref_arr->deref_array_type = nir_deref_array_type_indirect;
180
181 nir_alu_src addr_src = { NIR_SRC_INIT };
182 addr_src.src = nir_src_for_reg(c->addr_reg);
183 nir_ssa_def *reladdr = nir_imov_alu(b, addr_src, 1);
184
185 if (prog_src->Index < 0) {
186 /* This is a negative offset which should be added to the address
187 * register's value.
188 */
189 reladdr = nir_iadd(b, reladdr, nir_imm_int(b, prog_src->Index));
190
191 deref_arr->base_offset = 0;
192 } else {
193 deref_arr->base_offset = prog_src->Index;
194 }
195 deref_arr->indirect = nir_src_for_ssa(reladdr);
196 } else {
197 deref_arr->deref_array_type = nir_deref_array_type_direct;
198 deref_arr->base_offset = prog_src->Index;
199 }
200
201 nir_builder_instr_insert(b, &load->instr);
202
203 src.src = nir_src_for_ssa(&load->dest.ssa);
204 break;
205 }
206 default:
207 fprintf(stderr, "bad uniform src register file: %s (%d)\n",
208 _mesa_register_file_name(file), file);
209 abort();
210 }
211 break;
212 }
213 default:
214 fprintf(stderr, "unknown src register file: %s (%d)\n",
215 _mesa_register_file_name(prog_src->File), prog_src->File);
216 abort();
217 }
218
219 nir_ssa_def *def;
220 if (!HAS_EXTENDED_SWIZZLE(prog_src->Swizzle) &&
221 (prog_src->Negate == NEGATE_NONE || prog_src->Negate == NEGATE_XYZW)) {
222 /* The simple non-SWZ case. */
223 for (int i = 0; i < 4; i++)
224 src.swizzle[i] = GET_SWZ(prog_src->Swizzle, i);
225
226 def = nir_fmov_alu(b, src, 4);
227
228 if (prog_src->Negate)
229 def = nir_fneg(b, def);
230 } else {
231 /* The SWZ instruction allows per-component zero/one swizzles, and also
232 * per-component negation.
233 */
234 nir_ssa_def *chans[4];
235 for (int i = 0; i < 4; i++) {
236 int swizzle = GET_SWZ(prog_src->Swizzle, i);
237 if (swizzle == SWIZZLE_ZERO) {
238 chans[i] = nir_imm_float(b, 0.0);
239 } else if (swizzle == SWIZZLE_ONE) {
240 chans[i] = nir_imm_float(b, 1.0);
241 } else {
242 assert(swizzle != SWIZZLE_NIL);
243 nir_alu_instr *mov = nir_alu_instr_create(b->shader, nir_op_fmov);
244 nir_ssa_dest_init(&mov->instr, &mov->dest.dest, 1, 32, NULL);
245 mov->dest.write_mask = 0x1;
246 mov->src[0] = src;
247 mov->src[0].swizzle[0] = swizzle;
248 nir_builder_instr_insert(b, &mov->instr);
249
250 chans[i] = &mov->dest.dest.ssa;
251 }
252
253 if (prog_src->Negate & (1 << i))
254 chans[i] = nir_fneg(b, chans[i]);
255 }
256 def = nir_vec4(b, chans[0], chans[1], chans[2], chans[3]);
257 }
258
259 return def;
260 }
261
262 static void
263 ptn_alu(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
264 {
265 unsigned num_srcs = nir_op_infos[op].num_inputs;
266 nir_alu_instr *instr = nir_alu_instr_create(b->shader, op);
267 unsigned i;
268
269 for (i = 0; i < num_srcs; i++)
270 instr->src[i].src = nir_src_for_ssa(src[i]);
271
272 instr->dest = dest;
273 nir_builder_instr_insert(b, &instr->instr);
274 }
275
276 static void
277 ptn_move_dest_masked(nir_builder *b, nir_alu_dest dest,
278 nir_ssa_def *def, unsigned write_mask)
279 {
280 if (!(dest.write_mask & write_mask))
281 return;
282
283 nir_alu_instr *mov = nir_alu_instr_create(b->shader, nir_op_fmov);
284 if (!mov)
285 return;
286
287 mov->dest = dest;
288 mov->dest.write_mask &= write_mask;
289 mov->src[0].src = nir_src_for_ssa(def);
290 for (unsigned i = def->num_components; i < 4; i++)
291 mov->src[0].swizzle[i] = def->num_components - 1;
292 nir_builder_instr_insert(b, &mov->instr);
293 }
294
295 static void
296 ptn_move_dest(nir_builder *b, nir_alu_dest dest, nir_ssa_def *def)
297 {
298 ptn_move_dest_masked(b, dest, def, WRITEMASK_XYZW);
299 }
300
301 static void
302 ptn_arl(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
303 {
304 ptn_move_dest(b, dest, nir_f2i32(b, nir_ffloor(b, src[0])));
305 }
306
307 /* EXP - Approximate Exponential Base 2
308 * dst.x = 2^{\lfloor src.x\rfloor}
309 * dst.y = src.x - \lfloor src.x\rfloor
310 * dst.z = 2^{src.x}
311 * dst.w = 1.0
312 */
313 static void
314 ptn_exp(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
315 {
316 nir_ssa_def *srcx = ptn_channel(b, src[0], X);
317
318 ptn_move_dest_masked(b, dest, nir_fexp2(b, nir_ffloor(b, srcx)), WRITEMASK_X);
319 ptn_move_dest_masked(b, dest, nir_fsub(b, srcx, nir_ffloor(b, srcx)), WRITEMASK_Y);
320 ptn_move_dest_masked(b, dest, nir_fexp2(b, srcx), WRITEMASK_Z);
321 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
322 }
323
324 /* LOG - Approximate Logarithm Base 2
325 * dst.x = \lfloor\log_2{|src.x|}\rfloor
326 * dst.y = |src.x| * 2^{-\lfloor\log_2{|src.x|}\rfloor}}
327 * dst.z = \log_2{|src.x|}
328 * dst.w = 1.0
329 */
330 static void
331 ptn_log(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
332 {
333 nir_ssa_def *abs_srcx = nir_fabs(b, ptn_channel(b, src[0], X));
334 nir_ssa_def *log2 = nir_flog2(b, abs_srcx);
335 nir_ssa_def *floor_log2 = nir_ffloor(b, log2);
336
337 ptn_move_dest_masked(b, dest, floor_log2, WRITEMASK_X);
338 ptn_move_dest_masked(b, dest,
339 nir_fmul(b, abs_srcx,
340 nir_fexp2(b, nir_fneg(b, floor_log2))),
341 WRITEMASK_Y);
342 ptn_move_dest_masked(b, dest, log2, WRITEMASK_Z);
343 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
344 }
345
346 /* DST - Distance Vector
347 * dst.x = 1.0
348 * dst.y = src0.y \times src1.y
349 * dst.z = src0.z
350 * dst.w = src1.w
351 */
352 static void
353 ptn_dst(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
354 {
355 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_X);
356 ptn_move_dest_masked(b, dest, nir_fmul(b, src[0], src[1]), WRITEMASK_Y);
357 ptn_move_dest_masked(b, dest, nir_fmov(b, src[0]), WRITEMASK_Z);
358 ptn_move_dest_masked(b, dest, nir_fmov(b, src[1]), WRITEMASK_W);
359 }
360
361 /* LIT - Light Coefficients
362 * dst.x = 1.0
363 * dst.y = max(src.x, 0.0)
364 * dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0
365 * dst.w = 1.0
366 */
367 static void
368 ptn_lit(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
369 {
370 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_XW);
371
372 ptn_move_dest_masked(b, dest, nir_fmax(b, ptn_channel(b, src[0], X),
373 nir_imm_float(b, 0.0)), WRITEMASK_Y);
374
375 if (dest.write_mask & WRITEMASK_Z) {
376 nir_ssa_def *src0_y = ptn_channel(b, src[0], Y);
377 nir_ssa_def *wclamp = nir_fmax(b, nir_fmin(b, ptn_channel(b, src[0], W),
378 nir_imm_float(b, 128.0)),
379 nir_imm_float(b, -128.0));
380 nir_ssa_def *pow = nir_fpow(b, nir_fmax(b, src0_y, nir_imm_float(b, 0.0)),
381 wclamp);
382
383 nir_ssa_def *z;
384 if (b->shader->options->native_integers) {
385 z = nir_bcsel(b,
386 nir_fge(b, nir_imm_float(b, 0.0), ptn_channel(b, src[0], X)),
387 nir_imm_float(b, 0.0),
388 pow);
389 } else {
390 z = nir_fcsel(b,
391 nir_sge(b, nir_imm_float(b, 0.0), ptn_channel(b, src[0], X)),
392 nir_imm_float(b, 0.0),
393 pow);
394 }
395
396 ptn_move_dest_masked(b, dest, z, WRITEMASK_Z);
397 }
398 }
399
400 /* SCS - Sine Cosine
401 * dst.x = \cos{src.x}
402 * dst.y = \sin{src.x}
403 * dst.z = 0.0
404 * dst.w = 1.0
405 */
406 static void
407 ptn_scs(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
408 {
409 ptn_move_dest_masked(b, dest, nir_fcos(b, ptn_channel(b, src[0], X)),
410 WRITEMASK_X);
411 ptn_move_dest_masked(b, dest, nir_fsin(b, ptn_channel(b, src[0], X)),
412 WRITEMASK_Y);
413 ptn_move_dest_masked(b, dest, nir_imm_float(b, 0.0), WRITEMASK_Z);
414 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
415 }
416
417 /**
418 * Emit SLT. For platforms with integers, prefer b2f(flt(...)).
419 */
420 static void
421 ptn_slt(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
422 {
423 if (b->shader->options->native_integers) {
424 ptn_move_dest(b, dest, nir_b2f(b, nir_flt(b, src[0], src[1])));
425 } else {
426 ptn_move_dest(b, dest, nir_slt(b, src[0], src[1]));
427 }
428 }
429
430 /**
431 * Emit SGE. For platforms with integers, prefer b2f(fge(...)).
432 */
433 static void
434 ptn_sge(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
435 {
436 if (b->shader->options->native_integers) {
437 ptn_move_dest(b, dest, nir_b2f(b, nir_fge(b, src[0], src[1])));
438 } else {
439 ptn_move_dest(b, dest, nir_sge(b, src[0], src[1]));
440 }
441 }
442
443 static void
444 ptn_xpd(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
445 {
446 ptn_move_dest_masked(b, dest,
447 nir_fsub(b,
448 nir_fmul(b,
449 nir_swizzle(b, src[0], SWIZ(Y, Z, X, W), 3, true),
450 nir_swizzle(b, src[1], SWIZ(Z, X, Y, W), 3, true)),
451 nir_fmul(b,
452 nir_swizzle(b, src[1], SWIZ(Y, Z, X, W), 3, true),
453 nir_swizzle(b, src[0], SWIZ(Z, X, Y, W), 3, true))),
454 WRITEMASK_XYZ);
455 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
456 }
457
458 static void
459 ptn_dp2(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
460 {
461 ptn_move_dest(b, dest, nir_fdot2(b, src[0], src[1]));
462 }
463
464 static void
465 ptn_dp3(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
466 {
467 ptn_move_dest(b, dest, nir_fdot3(b, src[0], src[1]));
468 }
469
470 static void
471 ptn_dp4(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
472 {
473 ptn_move_dest(b, dest, nir_fdot4(b, src[0], src[1]));
474 }
475
476 static void
477 ptn_dph(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
478 {
479 ptn_move_dest(b, dest, nir_fdph(b, src[0], src[1]));
480 }
481
482 static void
483 ptn_cmp(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
484 {
485 if (b->shader->options->native_integers) {
486 ptn_move_dest(b, dest, nir_bcsel(b,
487 nir_flt(b, src[0], nir_imm_float(b, 0.0)),
488 src[1], src[2]));
489 } else {
490 ptn_move_dest(b, dest, nir_fcsel(b,
491 nir_slt(b, src[0], nir_imm_float(b, 0.0)),
492 src[1], src[2]));
493 }
494 }
495
496 static void
497 ptn_lrp(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
498 {
499 ptn_move_dest(b, dest, nir_flrp(b, src[2], src[1], src[0]));
500 }
501
502 static void
503 ptn_kil(nir_builder *b, nir_ssa_def **src)
504 {
505 nir_ssa_def *cmp = b->shader->options->native_integers ?
506 nir_bany_inequal4(b, nir_flt(b, src[0], nir_imm_float(b, 0.0)), nir_imm_int(b, 0)) :
507 nir_fany_nequal4(b, nir_slt(b, src[0], nir_imm_float(b, 0.0)), nir_imm_float(b, 0.0));
508
509 nir_intrinsic_instr *discard =
510 nir_intrinsic_instr_create(b->shader, nir_intrinsic_discard_if);
511 discard->src[0] = nir_src_for_ssa(cmp);
512 nir_builder_instr_insert(b, &discard->instr);
513 }
514
515 static void
516 ptn_tex(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src,
517 struct prog_instruction *prog_inst)
518 {
519 nir_tex_instr *instr;
520 nir_texop op;
521 unsigned num_srcs;
522
523 switch (prog_inst->Opcode) {
524 case OPCODE_TEX:
525 op = nir_texop_tex;
526 num_srcs = 1;
527 break;
528 case OPCODE_TXB:
529 op = nir_texop_txb;
530 num_srcs = 2;
531 break;
532 case OPCODE_TXD:
533 op = nir_texop_txd;
534 num_srcs = 3;
535 break;
536 case OPCODE_TXL:
537 op = nir_texop_txl;
538 num_srcs = 2;
539 break;
540 case OPCODE_TXP:
541 op = nir_texop_tex;
542 num_srcs = 2;
543 break;
544 default:
545 fprintf(stderr, "unknown tex op %d\n", prog_inst->Opcode);
546 abort();
547 }
548
549 if (prog_inst->TexShadow)
550 num_srcs++;
551
552 instr = nir_tex_instr_create(b->shader, num_srcs);
553 instr->op = op;
554 instr->dest_type = nir_type_float;
555 instr->is_shadow = prog_inst->TexShadow;
556 instr->texture_index = prog_inst->TexSrcUnit;
557 instr->sampler_index = prog_inst->TexSrcUnit;
558
559 switch (prog_inst->TexSrcTarget) {
560 case TEXTURE_1D_INDEX:
561 instr->sampler_dim = GLSL_SAMPLER_DIM_1D;
562 break;
563 case TEXTURE_2D_INDEX:
564 instr->sampler_dim = GLSL_SAMPLER_DIM_2D;
565 break;
566 case TEXTURE_3D_INDEX:
567 instr->sampler_dim = GLSL_SAMPLER_DIM_3D;
568 break;
569 case TEXTURE_CUBE_INDEX:
570 instr->sampler_dim = GLSL_SAMPLER_DIM_CUBE;
571 break;
572 case TEXTURE_RECT_INDEX:
573 instr->sampler_dim = GLSL_SAMPLER_DIM_RECT;
574 break;
575 default:
576 fprintf(stderr, "Unknown texture target %d\n", prog_inst->TexSrcTarget);
577 abort();
578 }
579
580 switch (instr->sampler_dim) {
581 case GLSL_SAMPLER_DIM_1D:
582 case GLSL_SAMPLER_DIM_BUF:
583 instr->coord_components = 1;
584 break;
585 case GLSL_SAMPLER_DIM_2D:
586 case GLSL_SAMPLER_DIM_RECT:
587 case GLSL_SAMPLER_DIM_EXTERNAL:
588 case GLSL_SAMPLER_DIM_MS:
589 instr->coord_components = 2;
590 break;
591 case GLSL_SAMPLER_DIM_3D:
592 case GLSL_SAMPLER_DIM_CUBE:
593 instr->coord_components = 3;
594 break;
595 case GLSL_SAMPLER_DIM_SUBPASS:
596 case GLSL_SAMPLER_DIM_SUBPASS_MS:
597 unreachable("can't reach");
598 }
599
600 unsigned src_number = 0;
601
602 instr->src[src_number].src =
603 nir_src_for_ssa(nir_swizzle(b, src[0], SWIZ(X, Y, Z, W),
604 instr->coord_components, true));
605 instr->src[src_number].src_type = nir_tex_src_coord;
606 src_number++;
607
608 if (prog_inst->Opcode == OPCODE_TXP) {
609 instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
610 instr->src[src_number].src_type = nir_tex_src_projector;
611 src_number++;
612 }
613
614 if (prog_inst->Opcode == OPCODE_TXB) {
615 instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
616 instr->src[src_number].src_type = nir_tex_src_bias;
617 src_number++;
618 }
619
620 if (prog_inst->Opcode == OPCODE_TXL) {
621 instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
622 instr->src[src_number].src_type = nir_tex_src_lod;
623 src_number++;
624 }
625
626 if (instr->is_shadow) {
627 if (instr->coord_components < 3)
628 instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], Z));
629 else
630 instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
631
632 instr->src[src_number].src_type = nir_tex_src_comparator;
633 src_number++;
634 }
635
636 assert(src_number == num_srcs);
637
638 nir_ssa_dest_init(&instr->instr, &instr->dest, 4, 32, NULL);
639 nir_builder_instr_insert(b, &instr->instr);
640
641 /* Resolve the writemask on the texture op. */
642 ptn_move_dest(b, dest, &instr->dest.ssa);
643 }
644
645 static const nir_op op_trans[MAX_OPCODE] = {
646 [OPCODE_NOP] = 0,
647 [OPCODE_ABS] = nir_op_fabs,
648 [OPCODE_ADD] = nir_op_fadd,
649 [OPCODE_ARL] = 0,
650 [OPCODE_CMP] = 0,
651 [OPCODE_COS] = 0,
652 [OPCODE_DDX] = nir_op_fddx,
653 [OPCODE_DDY] = nir_op_fddy,
654 [OPCODE_DP2] = 0,
655 [OPCODE_DP3] = 0,
656 [OPCODE_DP4] = 0,
657 [OPCODE_DPH] = 0,
658 [OPCODE_DST] = 0,
659 [OPCODE_END] = 0,
660 [OPCODE_EX2] = 0,
661 [OPCODE_EXP] = 0,
662 [OPCODE_FLR] = nir_op_ffloor,
663 [OPCODE_FRC] = nir_op_ffract,
664 [OPCODE_LG2] = 0,
665 [OPCODE_LIT] = 0,
666 [OPCODE_LOG] = 0,
667 [OPCODE_LRP] = 0,
668 [OPCODE_MAD] = 0,
669 [OPCODE_MAX] = nir_op_fmax,
670 [OPCODE_MIN] = nir_op_fmin,
671 [OPCODE_MOV] = nir_op_fmov,
672 [OPCODE_MUL] = nir_op_fmul,
673 [OPCODE_POW] = 0,
674 [OPCODE_RCP] = 0,
675
676 [OPCODE_RSQ] = 0,
677 [OPCODE_SCS] = 0,
678 [OPCODE_SGE] = 0,
679 [OPCODE_SIN] = 0,
680 [OPCODE_SLT] = 0,
681 [OPCODE_SSG] = nir_op_fsign,
682 [OPCODE_SUB] = nir_op_fsub,
683 [OPCODE_SWZ] = 0,
684 [OPCODE_TEX] = 0,
685 [OPCODE_TRUNC] = nir_op_ftrunc,
686 [OPCODE_TXB] = 0,
687 [OPCODE_TXD] = 0,
688 [OPCODE_TXL] = 0,
689 [OPCODE_TXP] = 0,
690 [OPCODE_XPD] = 0,
691 };
692
693 static void
694 ptn_emit_instruction(struct ptn_compile *c, struct prog_instruction *prog_inst)
695 {
696 nir_builder *b = &c->build;
697 unsigned i;
698 const unsigned op = prog_inst->Opcode;
699
700 if (op == OPCODE_END)
701 return;
702
703 nir_ssa_def *src[3];
704 for (i = 0; i < 3; i++) {
705 src[i] = ptn_get_src(c, &prog_inst->SrcReg[i]);
706 }
707 nir_alu_dest dest = ptn_get_dest(c, &prog_inst->DstReg);
708 if (c->error)
709 return;
710
711 switch (op) {
712 case OPCODE_RSQ:
713 ptn_move_dest(b, dest,
714 nir_frsq(b, nir_fabs(b, ptn_channel(b, src[0], X))));
715 break;
716
717 case OPCODE_RCP:
718 ptn_move_dest(b, dest, nir_frcp(b, ptn_channel(b, src[0], X)));
719 break;
720
721 case OPCODE_EX2:
722 ptn_move_dest(b, dest, nir_fexp2(b, ptn_channel(b, src[0], X)));
723 break;
724
725 case OPCODE_LG2:
726 ptn_move_dest(b, dest, nir_flog2(b, ptn_channel(b, src[0], X)));
727 break;
728
729 case OPCODE_POW:
730 ptn_move_dest(b, dest, nir_fpow(b,
731 ptn_channel(b, src[0], X),
732 ptn_channel(b, src[1], X)));
733 break;
734
735 case OPCODE_COS:
736 ptn_move_dest(b, dest, nir_fcos(b, ptn_channel(b, src[0], X)));
737 break;
738
739 case OPCODE_SIN:
740 ptn_move_dest(b, dest, nir_fsin(b, ptn_channel(b, src[0], X)));
741 break;
742
743 case OPCODE_ARL:
744 ptn_arl(b, dest, src);
745 break;
746
747 case OPCODE_EXP:
748 ptn_exp(b, dest, src);
749 break;
750
751 case OPCODE_LOG:
752 ptn_log(b, dest, src);
753 break;
754
755 case OPCODE_LRP:
756 ptn_lrp(b, dest, src);
757 break;
758
759 case OPCODE_MAD:
760 ptn_move_dest(b, dest, nir_fadd(b, nir_fmul(b, src[0], src[1]), src[2]));
761 break;
762
763 case OPCODE_DST:
764 ptn_dst(b, dest, src);
765 break;
766
767 case OPCODE_LIT:
768 ptn_lit(b, dest, src);
769 break;
770
771 case OPCODE_XPD:
772 ptn_xpd(b, dest, src);
773 break;
774
775 case OPCODE_DP2:
776 ptn_dp2(b, dest, src);
777 break;
778
779 case OPCODE_DP3:
780 ptn_dp3(b, dest, src);
781 break;
782
783 case OPCODE_DP4:
784 ptn_dp4(b, dest, src);
785 break;
786
787 case OPCODE_DPH:
788 ptn_dph(b, dest, src);
789 break;
790
791 case OPCODE_KIL:
792 ptn_kil(b, src);
793 break;
794
795 case OPCODE_CMP:
796 ptn_cmp(b, dest, src);
797 break;
798
799 case OPCODE_SCS:
800 ptn_scs(b, dest, src);
801 break;
802
803 case OPCODE_SLT:
804 ptn_slt(b, dest, src);
805 break;
806
807 case OPCODE_SGE:
808 ptn_sge(b, dest, src);
809 break;
810
811 case OPCODE_TEX:
812 case OPCODE_TXB:
813 case OPCODE_TXD:
814 case OPCODE_TXL:
815 case OPCODE_TXP:
816 ptn_tex(b, dest, src, prog_inst);
817 break;
818
819 case OPCODE_SWZ:
820 /* Extended swizzles were already handled in ptn_get_src(). */
821 ptn_alu(b, nir_op_fmov, dest, src);
822 break;
823
824 case OPCODE_NOP:
825 break;
826
827 default:
828 if (op_trans[op] != 0) {
829 ptn_alu(b, op_trans[op], dest, src);
830 } else {
831 fprintf(stderr, "unknown opcode: %s\n", _mesa_opcode_string(op));
832 abort();
833 }
834 break;
835 }
836
837 if (prog_inst->Saturate) {
838 assert(prog_inst->Saturate);
839 assert(!dest.dest.is_ssa);
840 ptn_move_dest(b, dest, nir_fsat(b, ptn_src_for_dest(c, &dest)));
841 }
842 }
843
844 /**
845 * Puts a NIR intrinsic to store of each PROGRAM_OUTPUT value to the output
846 * variables at the end of the shader.
847 *
848 * We don't generate these incrementally as the PROGRAM_OUTPUT values are
849 * written, because there's no output load intrinsic, which means we couldn't
850 * handle writemasks.
851 */
852 static void
853 ptn_add_output_stores(struct ptn_compile *c)
854 {
855 nir_builder *b = &c->build;
856
857 nir_foreach_variable(var, &b->shader->outputs) {
858 nir_ssa_def *src = nir_load_reg(b, c->output_regs[var->data.location]);
859 if (c->prog->Target == GL_FRAGMENT_PROGRAM_ARB &&
860 var->data.location == FRAG_RESULT_DEPTH) {
861 /* result.depth has this strange convention of being the .z component of
862 * a vec4 with undefined .xyw components. We resolve it to a scalar, to
863 * match GLSL's gl_FragDepth and the expectations of most backends.
864 */
865 src = nir_channel(b, src, 2);
866 }
867 unsigned num_components = glsl_get_vector_elements(var->type);
868 nir_store_var(b, var, src, (1 << num_components) - 1);
869 }
870 }
871
872 static void
873 setup_registers_and_variables(struct ptn_compile *c)
874 {
875 nir_builder *b = &c->build;
876 struct nir_shader *shader = b->shader;
877
878 /* Create input variables. */
879 const int num_inputs = util_last_bit64(c->prog->info.inputs_read);
880 for (int i = 0; i < num_inputs; i++) {
881 if (!(c->prog->info.inputs_read & BITFIELD64_BIT(i)))
882 continue;
883
884 nir_variable *var =
885 nir_variable_create(shader, nir_var_shader_in, glsl_vec4_type(),
886 ralloc_asprintf(shader, "in_%d", i));
887 var->data.location = i;
888 var->data.index = 0;
889
890 if (c->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
891 if (i == VARYING_SLOT_POS) {
892 var->data.origin_upper_left = c->prog->OriginUpperLeft;
893 var->data.pixel_center_integer = c->prog->PixelCenterInteger;
894 } else if (i == VARYING_SLOT_FOGC) {
895 /* fogcoord is defined as <f, 0.0, 0.0, 1.0>. Make the actual
896 * input variable a float, and create a local containing the
897 * full vec4 value.
898 */
899 var->type = glsl_float_type();
900
901 nir_variable *fullvar =
902 nir_local_variable_create(b->impl, glsl_vec4_type(),
903 "fogcoord_tmp");
904
905 nir_store_var(b, fullvar,
906 nir_vec4(b, nir_load_var(b, var),
907 nir_imm_float(b, 0.0),
908 nir_imm_float(b, 0.0),
909 nir_imm_float(b, 1.0)),
910 WRITEMASK_XYZW);
911
912 /* We inserted the real input into the list so the driver has real
913 * inputs, but we set c->input_vars[i] to the temporary so we use
914 * the splatted value.
915 */
916 c->input_vars[i] = fullvar;
917 continue;
918 }
919 }
920
921 c->input_vars[i] = var;
922 }
923
924 /* Create output registers and variables. */
925 int max_outputs = util_last_bit(c->prog->info.outputs_written);
926 c->output_regs = rzalloc_array(c, nir_register *, max_outputs);
927
928 for (int i = 0; i < max_outputs; i++) {
929 if (!(c->prog->info.outputs_written & BITFIELD64_BIT(i)))
930 continue;
931
932 /* Since we can't load from outputs in the IR, we make temporaries
933 * for the outputs and emit stores to the real outputs at the end of
934 * the shader.
935 */
936 nir_register *reg = nir_local_reg_create(b->impl);
937 reg->num_components = 4;
938
939 nir_variable *var = rzalloc(shader, nir_variable);
940 if (c->prog->Target == GL_FRAGMENT_PROGRAM_ARB && i == FRAG_RESULT_DEPTH)
941 var->type = glsl_float_type();
942 else
943 var->type = glsl_vec4_type();
944 var->data.mode = nir_var_shader_out;
945 var->name = ralloc_asprintf(var, "out_%d", i);
946
947 var->data.location = i;
948 var->data.index = 0;
949
950 c->output_regs[i] = reg;
951
952 exec_list_push_tail(&shader->outputs, &var->node);
953 c->output_vars[i] = var;
954 }
955
956 /* Create temporary registers. */
957 c->temp_regs = rzalloc_array(c, nir_register *,
958 c->prog->arb.NumTemporaries);
959
960 nir_register *reg;
961 for (unsigned i = 0; i < c->prog->arb.NumTemporaries; i++) {
962 reg = nir_local_reg_create(b->impl);
963 if (!reg) {
964 c->error = true;
965 return;
966 }
967 reg->num_components = 4;
968 c->temp_regs[i] = reg;
969 }
970
971 /* Create the address register (for ARB_vertex_program). */
972 reg = nir_local_reg_create(b->impl);
973 if (!reg) {
974 c->error = true;
975 return;
976 }
977 reg->num_components = 1;
978 c->addr_reg = reg;
979 }
980
981 struct nir_shader *
982 prog_to_nir(const struct gl_program *prog,
983 const nir_shader_compiler_options *options)
984 {
985 struct ptn_compile *c;
986 struct nir_shader *s;
987 gl_shader_stage stage = _mesa_program_enum_to_shader_stage(prog->Target);
988
989 c = rzalloc(NULL, struct ptn_compile);
990 if (!c)
991 return NULL;
992 c->prog = prog;
993
994 nir_builder_init_simple_shader(&c->build, NULL, stage, options);
995
996 /* Copy the shader_info from the gl_program */
997 c->build.shader->info = prog->info;
998
999 s = c->build.shader;
1000
1001 if (prog->Parameters->NumParameters > 0) {
1002 c->parameters = rzalloc(s, nir_variable);
1003 c->parameters->type =
1004 glsl_array_type(glsl_vec4_type(), prog->Parameters->NumParameters);
1005 c->parameters->name = "parameters";
1006 c->parameters->data.read_only = true;
1007 c->parameters->data.mode = nir_var_uniform;
1008 exec_list_push_tail(&s->uniforms, &c->parameters->node);
1009 }
1010
1011 setup_registers_and_variables(c);
1012 if (unlikely(c->error))
1013 goto fail;
1014
1015 for (unsigned int i = 0; i < prog->arb.NumInstructions; i++) {
1016 ptn_emit_instruction(c, &prog->arb.Instructions[i]);
1017
1018 if (unlikely(c->error))
1019 break;
1020 }
1021
1022 ptn_add_output_stores(c);
1023
1024 s->info.name = ralloc_asprintf(s, "ARB%d", prog->Id);
1025 s->info.num_textures = util_last_bit(prog->SamplersUsed);
1026 s->info.num_ubos = 0;
1027 s->info.num_abos = 0;
1028 s->info.num_ssbos = 0;
1029 s->info.num_images = 0;
1030 s->info.uses_texture_gather = false;
1031 s->info.clip_distance_array_size = 0;
1032 s->info.cull_distance_array_size = 0;
1033 s->info.separate_shader = false;
1034
1035 fail:
1036 if (c->error) {
1037 ralloc_free(s);
1038 s = NULL;
1039 }
1040 ralloc_free(c);
1041 return s;
1042 }