nir: Create sampler variables in prog_to_nir.
[mesa.git] / src / mesa / program / prog_to_nir.c
1 /*
2 * Copyright © 2015 Intel Corporation
3 * Copyright © 2014-2015 Broadcom
4 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
23 * IN THE SOFTWARE.
24 */
25
26 #include "compiler/nir/nir.h"
27 #include "compiler/nir/nir_builder.h"
28 #include "compiler/glsl/list.h"
29 #include "main/imports.h"
30 #include "main/mtypes.h"
31 #include "util/ralloc.h"
32
33 #include "prog_to_nir.h"
34 #include "prog_instruction.h"
35 #include "prog_parameter.h"
36 #include "prog_print.h"
37 #include "program.h"
38
39 /**
40 * \file prog_to_nir.c
41 *
42 * A translator from Mesa IR (prog_instruction.h) to NIR. This is primarily
43 * intended to support ARB_vertex_program, ARB_fragment_program, and fixed-function
44 * vertex processing. Full GLSL support should use glsl_to_nir instead.
45 */
46
47 struct ptn_compile {
48 const struct gl_program *prog;
49 nir_builder build;
50 bool error;
51
52 nir_variable *parameters;
53 nir_variable *input_vars[VARYING_SLOT_MAX];
54 nir_variable *output_vars[VARYING_SLOT_MAX];
55 nir_variable *sampler_vars[32]; /* matches number of bits in TexSrcUnit */
56 nir_register **output_regs;
57 nir_register **temp_regs;
58
59 nir_register *addr_reg;
60 };
61
62 #define SWIZ(X, Y, Z, W) \
63 (unsigned[4]){ SWIZZLE_##X, SWIZZLE_##Y, SWIZZLE_##Z, SWIZZLE_##W }
64 #define ptn_channel(b, src, ch) nir_swizzle(b, src, SWIZ(ch, ch, ch, ch), 1, true)
65
66 static nir_ssa_def *
67 ptn_src_for_dest(struct ptn_compile *c, nir_alu_dest *dest)
68 {
69 nir_builder *b = &c->build;
70
71 nir_alu_src src;
72 memset(&src, 0, sizeof(src));
73
74 if (dest->dest.is_ssa)
75 src.src = nir_src_for_ssa(&dest->dest.ssa);
76 else {
77 assert(!dest->dest.reg.indirect);
78 src.src = nir_src_for_reg(dest->dest.reg.reg);
79 src.src.reg.base_offset = dest->dest.reg.base_offset;
80 }
81
82 for (int i = 0; i < 4; i++)
83 src.swizzle[i] = i;
84
85 return nir_fmov_alu(b, src, 4);
86 }
87
88 static nir_alu_dest
89 ptn_get_dest(struct ptn_compile *c, const struct prog_dst_register *prog_dst)
90 {
91 nir_alu_dest dest;
92
93 memset(&dest, 0, sizeof(dest));
94
95 switch (prog_dst->File) {
96 case PROGRAM_TEMPORARY:
97 dest.dest.reg.reg = c->temp_regs[prog_dst->Index];
98 break;
99 case PROGRAM_OUTPUT:
100 dest.dest.reg.reg = c->output_regs[prog_dst->Index];
101 break;
102 case PROGRAM_ADDRESS:
103 assert(prog_dst->Index == 0);
104 dest.dest.reg.reg = c->addr_reg;
105 break;
106 case PROGRAM_UNDEFINED:
107 break;
108 }
109
110 dest.write_mask = prog_dst->WriteMask;
111 dest.saturate = false;
112
113 assert(!prog_dst->RelAddr);
114
115 return dest;
116 }
117
118 static nir_ssa_def *
119 ptn_get_src(struct ptn_compile *c, const struct prog_src_register *prog_src)
120 {
121 nir_builder *b = &c->build;
122 nir_alu_src src;
123
124 memset(&src, 0, sizeof(src));
125
126 switch (prog_src->File) {
127 case PROGRAM_UNDEFINED:
128 return nir_imm_float(b, 0.0);
129 case PROGRAM_TEMPORARY:
130 assert(!prog_src->RelAddr && prog_src->Index >= 0);
131 src.src.reg.reg = c->temp_regs[prog_src->Index];
132 break;
133 case PROGRAM_INPUT: {
134 /* ARB_vertex_program doesn't allow relative addressing on vertex
135 * attributes; ARB_fragment_program has no relative addressing at all.
136 */
137 assert(!prog_src->RelAddr);
138
139 assert(prog_src->Index >= 0 && prog_src->Index < VARYING_SLOT_MAX);
140
141 nir_variable *var = c->input_vars[prog_src->Index];
142 src.src = nir_src_for_ssa(nir_load_var(b, var));
143 break;
144 }
145 case PROGRAM_STATE_VAR:
146 case PROGRAM_CONSTANT: {
147 /* We actually want to look at the type in the Parameters list for this,
148 * because it lets us upload constant builtin uniforms as actual
149 * constants.
150 */
151 struct gl_program_parameter_list *plist = c->prog->Parameters;
152 gl_register_file file = prog_src->RelAddr ? prog_src->File :
153 plist->Parameters[prog_src->Index].Type;
154
155 switch (file) {
156 case PROGRAM_CONSTANT:
157 if ((c->prog->arb.IndirectRegisterFiles &
158 (1 << PROGRAM_CONSTANT)) == 0) {
159 unsigned pvo = plist->ParameterValueOffset[prog_src->Index];
160 float *v = (float *) plist->ParameterValues + pvo;
161 src.src = nir_src_for_ssa(nir_imm_vec4(b, v[0], v[1], v[2], v[3]));
162 break;
163 }
164 /* FALLTHROUGH */
165 case PROGRAM_STATE_VAR: {
166 assert(c->parameters != NULL);
167
168 nir_deref_instr *deref = nir_build_deref_var(b, c->parameters);
169
170 nir_ssa_def *index = nir_imm_int(b, prog_src->Index);
171 if (prog_src->RelAddr)
172 index = nir_iadd(b, index, nir_load_reg(b, c->addr_reg));
173 deref = nir_build_deref_array(b, deref, nir_channel(b, index, 0));
174
175 src.src = nir_src_for_ssa(nir_load_deref(b, deref));
176 break;
177 }
178 default:
179 fprintf(stderr, "bad uniform src register file: %s (%d)\n",
180 _mesa_register_file_name(file), file);
181 abort();
182 }
183 break;
184 }
185 default:
186 fprintf(stderr, "unknown src register file: %s (%d)\n",
187 _mesa_register_file_name(prog_src->File), prog_src->File);
188 abort();
189 }
190
191 nir_ssa_def *def;
192 if (!HAS_EXTENDED_SWIZZLE(prog_src->Swizzle) &&
193 (prog_src->Negate == NEGATE_NONE || prog_src->Negate == NEGATE_XYZW)) {
194 /* The simple non-SWZ case. */
195 for (int i = 0; i < 4; i++)
196 src.swizzle[i] = GET_SWZ(prog_src->Swizzle, i);
197
198 def = nir_fmov_alu(b, src, 4);
199
200 if (prog_src->Negate)
201 def = nir_fneg(b, def);
202 } else {
203 /* The SWZ instruction allows per-component zero/one swizzles, and also
204 * per-component negation.
205 */
206 nir_ssa_def *chans[4];
207 for (int i = 0; i < 4; i++) {
208 int swizzle = GET_SWZ(prog_src->Swizzle, i);
209 if (swizzle == SWIZZLE_ZERO) {
210 chans[i] = nir_imm_float(b, 0.0);
211 } else if (swizzle == SWIZZLE_ONE) {
212 chans[i] = nir_imm_float(b, 1.0);
213 } else {
214 assert(swizzle != SWIZZLE_NIL);
215 nir_alu_instr *mov = nir_alu_instr_create(b->shader, nir_op_fmov);
216 nir_ssa_dest_init(&mov->instr, &mov->dest.dest, 1, 32, NULL);
217 mov->dest.write_mask = 0x1;
218 mov->src[0] = src;
219 mov->src[0].swizzle[0] = swizzle;
220 nir_builder_instr_insert(b, &mov->instr);
221
222 chans[i] = &mov->dest.dest.ssa;
223 }
224
225 if (prog_src->Negate & (1 << i))
226 chans[i] = nir_fneg(b, chans[i]);
227 }
228 def = nir_vec4(b, chans[0], chans[1], chans[2], chans[3]);
229 }
230
231 return def;
232 }
233
234 static void
235 ptn_alu(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
236 {
237 unsigned num_srcs = nir_op_infos[op].num_inputs;
238 nir_alu_instr *instr = nir_alu_instr_create(b->shader, op);
239 unsigned i;
240
241 for (i = 0; i < num_srcs; i++)
242 instr->src[i].src = nir_src_for_ssa(src[i]);
243
244 instr->dest = dest;
245 nir_builder_instr_insert(b, &instr->instr);
246 }
247
248 static void
249 ptn_move_dest_masked(nir_builder *b, nir_alu_dest dest,
250 nir_ssa_def *def, unsigned write_mask)
251 {
252 if (!(dest.write_mask & write_mask))
253 return;
254
255 nir_alu_instr *mov = nir_alu_instr_create(b->shader, nir_op_fmov);
256 if (!mov)
257 return;
258
259 mov->dest = dest;
260 mov->dest.write_mask &= write_mask;
261 mov->src[0].src = nir_src_for_ssa(def);
262 for (unsigned i = def->num_components; i < 4; i++)
263 mov->src[0].swizzle[i] = def->num_components - 1;
264 nir_builder_instr_insert(b, &mov->instr);
265 }
266
267 static void
268 ptn_move_dest(nir_builder *b, nir_alu_dest dest, nir_ssa_def *def)
269 {
270 ptn_move_dest_masked(b, dest, def, WRITEMASK_XYZW);
271 }
272
273 static void
274 ptn_arl(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
275 {
276 ptn_move_dest(b, dest, nir_f2i32(b, nir_ffloor(b, src[0])));
277 }
278
279 /* EXP - Approximate Exponential Base 2
280 * dst.x = 2^{\lfloor src.x\rfloor}
281 * dst.y = src.x - \lfloor src.x\rfloor
282 * dst.z = 2^{src.x}
283 * dst.w = 1.0
284 */
285 static void
286 ptn_exp(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
287 {
288 nir_ssa_def *srcx = ptn_channel(b, src[0], X);
289
290 ptn_move_dest_masked(b, dest, nir_fexp2(b, nir_ffloor(b, srcx)), WRITEMASK_X);
291 ptn_move_dest_masked(b, dest, nir_fsub(b, srcx, nir_ffloor(b, srcx)), WRITEMASK_Y);
292 ptn_move_dest_masked(b, dest, nir_fexp2(b, srcx), WRITEMASK_Z);
293 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
294 }
295
296 /* LOG - Approximate Logarithm Base 2
297 * dst.x = \lfloor\log_2{|src.x|}\rfloor
298 * dst.y = |src.x| * 2^{-\lfloor\log_2{|src.x|}\rfloor}}
299 * dst.z = \log_2{|src.x|}
300 * dst.w = 1.0
301 */
302 static void
303 ptn_log(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
304 {
305 nir_ssa_def *abs_srcx = nir_fabs(b, ptn_channel(b, src[0], X));
306 nir_ssa_def *log2 = nir_flog2(b, abs_srcx);
307 nir_ssa_def *floor_log2 = nir_ffloor(b, log2);
308
309 ptn_move_dest_masked(b, dest, floor_log2, WRITEMASK_X);
310 ptn_move_dest_masked(b, dest,
311 nir_fmul(b, abs_srcx,
312 nir_fexp2(b, nir_fneg(b, floor_log2))),
313 WRITEMASK_Y);
314 ptn_move_dest_masked(b, dest, log2, WRITEMASK_Z);
315 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
316 }
317
318 /* DST - Distance Vector
319 * dst.x = 1.0
320 * dst.y = src0.y \times src1.y
321 * dst.z = src0.z
322 * dst.w = src1.w
323 */
324 static void
325 ptn_dst(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
326 {
327 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_X);
328 ptn_move_dest_masked(b, dest, nir_fmul(b, src[0], src[1]), WRITEMASK_Y);
329 ptn_move_dest_masked(b, dest, nir_fmov(b, src[0]), WRITEMASK_Z);
330 ptn_move_dest_masked(b, dest, nir_fmov(b, src[1]), WRITEMASK_W);
331 }
332
333 /* LIT - Light Coefficients
334 * dst.x = 1.0
335 * dst.y = max(src.x, 0.0)
336 * dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0
337 * dst.w = 1.0
338 */
339 static void
340 ptn_lit(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
341 {
342 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_XW);
343
344 ptn_move_dest_masked(b, dest, nir_fmax(b, ptn_channel(b, src[0], X),
345 nir_imm_float(b, 0.0)), WRITEMASK_Y);
346
347 if (dest.write_mask & WRITEMASK_Z) {
348 nir_ssa_def *src0_y = ptn_channel(b, src[0], Y);
349 nir_ssa_def *wclamp = nir_fmax(b, nir_fmin(b, ptn_channel(b, src[0], W),
350 nir_imm_float(b, 128.0)),
351 nir_imm_float(b, -128.0));
352 nir_ssa_def *pow = nir_fpow(b, nir_fmax(b, src0_y, nir_imm_float(b, 0.0)),
353 wclamp);
354
355 nir_ssa_def *z;
356 if (b->shader->options->native_integers) {
357 z = nir_bcsel(b,
358 nir_fge(b, nir_imm_float(b, 0.0), ptn_channel(b, src[0], X)),
359 nir_imm_float(b, 0.0),
360 pow);
361 } else {
362 z = nir_fcsel(b,
363 nir_sge(b, nir_imm_float(b, 0.0), ptn_channel(b, src[0], X)),
364 nir_imm_float(b, 0.0),
365 pow);
366 }
367
368 ptn_move_dest_masked(b, dest, z, WRITEMASK_Z);
369 }
370 }
371
372 /* SCS - Sine Cosine
373 * dst.x = \cos{src.x}
374 * dst.y = \sin{src.x}
375 * dst.z = 0.0
376 * dst.w = 1.0
377 */
378 static void
379 ptn_scs(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
380 {
381 ptn_move_dest_masked(b, dest, nir_fcos(b, ptn_channel(b, src[0], X)),
382 WRITEMASK_X);
383 ptn_move_dest_masked(b, dest, nir_fsin(b, ptn_channel(b, src[0], X)),
384 WRITEMASK_Y);
385 ptn_move_dest_masked(b, dest, nir_imm_float(b, 0.0), WRITEMASK_Z);
386 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
387 }
388
389 /**
390 * Emit SLT. For platforms with integers, prefer b2f(flt(...)).
391 */
392 static void
393 ptn_slt(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
394 {
395 if (b->shader->options->native_integers) {
396 ptn_move_dest(b, dest, nir_b2f(b, nir_flt(b, src[0], src[1])));
397 } else {
398 ptn_move_dest(b, dest, nir_slt(b, src[0], src[1]));
399 }
400 }
401
402 /**
403 * Emit SGE. For platforms with integers, prefer b2f(fge(...)).
404 */
405 static void
406 ptn_sge(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
407 {
408 if (b->shader->options->native_integers) {
409 ptn_move_dest(b, dest, nir_b2f(b, nir_fge(b, src[0], src[1])));
410 } else {
411 ptn_move_dest(b, dest, nir_sge(b, src[0], src[1]));
412 }
413 }
414
415 static void
416 ptn_xpd(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
417 {
418 ptn_move_dest_masked(b, dest,
419 nir_fsub(b,
420 nir_fmul(b,
421 nir_swizzle(b, src[0], SWIZ(Y, Z, X, W), 3, true),
422 nir_swizzle(b, src[1], SWIZ(Z, X, Y, W), 3, true)),
423 nir_fmul(b,
424 nir_swizzle(b, src[1], SWIZ(Y, Z, X, W), 3, true),
425 nir_swizzle(b, src[0], SWIZ(Z, X, Y, W), 3, true))),
426 WRITEMASK_XYZ);
427 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
428 }
429
430 static void
431 ptn_dp2(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
432 {
433 ptn_move_dest(b, dest, nir_fdot2(b, src[0], src[1]));
434 }
435
436 static void
437 ptn_dp3(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
438 {
439 ptn_move_dest(b, dest, nir_fdot3(b, src[0], src[1]));
440 }
441
442 static void
443 ptn_dp4(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
444 {
445 ptn_move_dest(b, dest, nir_fdot4(b, src[0], src[1]));
446 }
447
448 static void
449 ptn_dph(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
450 {
451 ptn_move_dest(b, dest, nir_fdph(b, src[0], src[1]));
452 }
453
454 static void
455 ptn_cmp(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
456 {
457 if (b->shader->options->native_integers) {
458 ptn_move_dest(b, dest, nir_bcsel(b,
459 nir_flt(b, src[0], nir_imm_float(b, 0.0)),
460 src[1], src[2]));
461 } else {
462 ptn_move_dest(b, dest, nir_fcsel(b,
463 nir_slt(b, src[0], nir_imm_float(b, 0.0)),
464 src[1], src[2]));
465 }
466 }
467
468 static void
469 ptn_lrp(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
470 {
471 ptn_move_dest(b, dest, nir_flrp(b, src[2], src[1], src[0]));
472 }
473
474 static void
475 ptn_kil(nir_builder *b, nir_ssa_def **src)
476 {
477 nir_ssa_def *cmp = b->shader->options->native_integers ?
478 nir_bany_inequal4(b, nir_flt(b, src[0], nir_imm_float(b, 0.0)), nir_imm_int(b, 0)) :
479 nir_fany_nequal4(b, nir_slt(b, src[0], nir_imm_float(b, 0.0)), nir_imm_float(b, 0.0));
480
481 nir_intrinsic_instr *discard =
482 nir_intrinsic_instr_create(b->shader, nir_intrinsic_discard_if);
483 discard->src[0] = nir_src_for_ssa(cmp);
484 nir_builder_instr_insert(b, &discard->instr);
485 }
486
487 static void
488 ptn_tex(struct ptn_compile *c, nir_alu_dest dest, nir_ssa_def **src,
489 struct prog_instruction *prog_inst)
490 {
491 nir_builder *b = &c->build;
492 nir_tex_instr *instr;
493 nir_texop op;
494 unsigned num_srcs;
495
496 switch (prog_inst->Opcode) {
497 case OPCODE_TEX:
498 op = nir_texop_tex;
499 num_srcs = 1;
500 break;
501 case OPCODE_TXB:
502 op = nir_texop_txb;
503 num_srcs = 2;
504 break;
505 case OPCODE_TXD:
506 op = nir_texop_txd;
507 num_srcs = 3;
508 break;
509 case OPCODE_TXL:
510 op = nir_texop_txl;
511 num_srcs = 2;
512 break;
513 case OPCODE_TXP:
514 op = nir_texop_tex;
515 num_srcs = 2;
516 break;
517 default:
518 fprintf(stderr, "unknown tex op %d\n", prog_inst->Opcode);
519 abort();
520 }
521
522 if (prog_inst->TexShadow)
523 num_srcs++;
524
525 instr = nir_tex_instr_create(b->shader, num_srcs);
526 instr->op = op;
527 instr->dest_type = nir_type_float;
528 instr->is_shadow = prog_inst->TexShadow;
529 instr->texture_index = prog_inst->TexSrcUnit;
530 instr->sampler_index = prog_inst->TexSrcUnit;
531
532 switch (prog_inst->TexSrcTarget) {
533 case TEXTURE_1D_INDEX:
534 instr->sampler_dim = GLSL_SAMPLER_DIM_1D;
535 break;
536 case TEXTURE_2D_INDEX:
537 instr->sampler_dim = GLSL_SAMPLER_DIM_2D;
538 break;
539 case TEXTURE_3D_INDEX:
540 instr->sampler_dim = GLSL_SAMPLER_DIM_3D;
541 break;
542 case TEXTURE_CUBE_INDEX:
543 instr->sampler_dim = GLSL_SAMPLER_DIM_CUBE;
544 break;
545 case TEXTURE_RECT_INDEX:
546 instr->sampler_dim = GLSL_SAMPLER_DIM_RECT;
547 break;
548 default:
549 fprintf(stderr, "Unknown texture target %d\n", prog_inst->TexSrcTarget);
550 abort();
551 }
552
553 switch (instr->sampler_dim) {
554 case GLSL_SAMPLER_DIM_1D:
555 case GLSL_SAMPLER_DIM_BUF:
556 instr->coord_components = 1;
557 break;
558 case GLSL_SAMPLER_DIM_2D:
559 case GLSL_SAMPLER_DIM_RECT:
560 case GLSL_SAMPLER_DIM_EXTERNAL:
561 case GLSL_SAMPLER_DIM_MS:
562 instr->coord_components = 2;
563 break;
564 case GLSL_SAMPLER_DIM_3D:
565 case GLSL_SAMPLER_DIM_CUBE:
566 instr->coord_components = 3;
567 break;
568 case GLSL_SAMPLER_DIM_SUBPASS:
569 case GLSL_SAMPLER_DIM_SUBPASS_MS:
570 unreachable("can't reach");
571 }
572
573 if (!c->sampler_vars[prog_inst->TexSrcUnit]) {
574 const struct glsl_type *type =
575 glsl_sampler_type(instr->sampler_dim, false, false, GLSL_TYPE_FLOAT);
576 nir_variable *var =
577 nir_variable_create(b->shader, nir_var_uniform, type, "sampler");
578 var->data.binding = prog_inst->TexSrcUnit;
579 c->sampler_vars[prog_inst->TexSrcUnit] = var;
580 }
581
582 unsigned src_number = 0;
583
584 instr->src[src_number].src =
585 nir_src_for_ssa(nir_swizzle(b, src[0], SWIZ(X, Y, Z, W),
586 instr->coord_components, true));
587 instr->src[src_number].src_type = nir_tex_src_coord;
588 src_number++;
589
590 if (prog_inst->Opcode == OPCODE_TXP) {
591 instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
592 instr->src[src_number].src_type = nir_tex_src_projector;
593 src_number++;
594 }
595
596 if (prog_inst->Opcode == OPCODE_TXB) {
597 instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
598 instr->src[src_number].src_type = nir_tex_src_bias;
599 src_number++;
600 }
601
602 if (prog_inst->Opcode == OPCODE_TXL) {
603 instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
604 instr->src[src_number].src_type = nir_tex_src_lod;
605 src_number++;
606 }
607
608 if (instr->is_shadow) {
609 if (instr->coord_components < 3)
610 instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], Z));
611 else
612 instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
613
614 instr->src[src_number].src_type = nir_tex_src_comparator;
615 src_number++;
616 }
617
618 assert(src_number == num_srcs);
619
620 nir_ssa_dest_init(&instr->instr, &instr->dest, 4, 32, NULL);
621 nir_builder_instr_insert(b, &instr->instr);
622
623 /* Resolve the writemask on the texture op. */
624 ptn_move_dest(b, dest, &instr->dest.ssa);
625 }
626
627 static const nir_op op_trans[MAX_OPCODE] = {
628 [OPCODE_NOP] = 0,
629 [OPCODE_ABS] = nir_op_fabs,
630 [OPCODE_ADD] = nir_op_fadd,
631 [OPCODE_ARL] = 0,
632 [OPCODE_CMP] = 0,
633 [OPCODE_COS] = 0,
634 [OPCODE_DDX] = nir_op_fddx,
635 [OPCODE_DDY] = nir_op_fddy,
636 [OPCODE_DP2] = 0,
637 [OPCODE_DP3] = 0,
638 [OPCODE_DP4] = 0,
639 [OPCODE_DPH] = 0,
640 [OPCODE_DST] = 0,
641 [OPCODE_END] = 0,
642 [OPCODE_EX2] = 0,
643 [OPCODE_EXP] = 0,
644 [OPCODE_FLR] = nir_op_ffloor,
645 [OPCODE_FRC] = nir_op_ffract,
646 [OPCODE_LG2] = 0,
647 [OPCODE_LIT] = 0,
648 [OPCODE_LOG] = 0,
649 [OPCODE_LRP] = 0,
650 [OPCODE_MAD] = 0,
651 [OPCODE_MAX] = nir_op_fmax,
652 [OPCODE_MIN] = nir_op_fmin,
653 [OPCODE_MOV] = nir_op_fmov,
654 [OPCODE_MUL] = nir_op_fmul,
655 [OPCODE_POW] = 0,
656 [OPCODE_RCP] = 0,
657
658 [OPCODE_RSQ] = 0,
659 [OPCODE_SCS] = 0,
660 [OPCODE_SGE] = 0,
661 [OPCODE_SIN] = 0,
662 [OPCODE_SLT] = 0,
663 [OPCODE_SSG] = nir_op_fsign,
664 [OPCODE_SUB] = nir_op_fsub,
665 [OPCODE_SWZ] = 0,
666 [OPCODE_TEX] = 0,
667 [OPCODE_TRUNC] = nir_op_ftrunc,
668 [OPCODE_TXB] = 0,
669 [OPCODE_TXD] = 0,
670 [OPCODE_TXL] = 0,
671 [OPCODE_TXP] = 0,
672 [OPCODE_XPD] = 0,
673 };
674
675 static void
676 ptn_emit_instruction(struct ptn_compile *c, struct prog_instruction *prog_inst)
677 {
678 nir_builder *b = &c->build;
679 unsigned i;
680 const unsigned op = prog_inst->Opcode;
681
682 if (op == OPCODE_END)
683 return;
684
685 nir_ssa_def *src[3];
686 for (i = 0; i < 3; i++) {
687 src[i] = ptn_get_src(c, &prog_inst->SrcReg[i]);
688 }
689 nir_alu_dest dest = ptn_get_dest(c, &prog_inst->DstReg);
690 if (c->error)
691 return;
692
693 switch (op) {
694 case OPCODE_RSQ:
695 ptn_move_dest(b, dest,
696 nir_frsq(b, nir_fabs(b, ptn_channel(b, src[0], X))));
697 break;
698
699 case OPCODE_RCP:
700 ptn_move_dest(b, dest, nir_frcp(b, ptn_channel(b, src[0], X)));
701 break;
702
703 case OPCODE_EX2:
704 ptn_move_dest(b, dest, nir_fexp2(b, ptn_channel(b, src[0], X)));
705 break;
706
707 case OPCODE_LG2:
708 ptn_move_dest(b, dest, nir_flog2(b, ptn_channel(b, src[0], X)));
709 break;
710
711 case OPCODE_POW:
712 ptn_move_dest(b, dest, nir_fpow(b,
713 ptn_channel(b, src[0], X),
714 ptn_channel(b, src[1], X)));
715 break;
716
717 case OPCODE_COS:
718 ptn_move_dest(b, dest, nir_fcos(b, ptn_channel(b, src[0], X)));
719 break;
720
721 case OPCODE_SIN:
722 ptn_move_dest(b, dest, nir_fsin(b, ptn_channel(b, src[0], X)));
723 break;
724
725 case OPCODE_ARL:
726 ptn_arl(b, dest, src);
727 break;
728
729 case OPCODE_EXP:
730 ptn_exp(b, dest, src);
731 break;
732
733 case OPCODE_LOG:
734 ptn_log(b, dest, src);
735 break;
736
737 case OPCODE_LRP:
738 ptn_lrp(b, dest, src);
739 break;
740
741 case OPCODE_MAD:
742 ptn_move_dest(b, dest, nir_fadd(b, nir_fmul(b, src[0], src[1]), src[2]));
743 break;
744
745 case OPCODE_DST:
746 ptn_dst(b, dest, src);
747 break;
748
749 case OPCODE_LIT:
750 ptn_lit(b, dest, src);
751 break;
752
753 case OPCODE_XPD:
754 ptn_xpd(b, dest, src);
755 break;
756
757 case OPCODE_DP2:
758 ptn_dp2(b, dest, src);
759 break;
760
761 case OPCODE_DP3:
762 ptn_dp3(b, dest, src);
763 break;
764
765 case OPCODE_DP4:
766 ptn_dp4(b, dest, src);
767 break;
768
769 case OPCODE_DPH:
770 ptn_dph(b, dest, src);
771 break;
772
773 case OPCODE_KIL:
774 ptn_kil(b, src);
775 break;
776
777 case OPCODE_CMP:
778 ptn_cmp(b, dest, src);
779 break;
780
781 case OPCODE_SCS:
782 ptn_scs(b, dest, src);
783 break;
784
785 case OPCODE_SLT:
786 ptn_slt(b, dest, src);
787 break;
788
789 case OPCODE_SGE:
790 ptn_sge(b, dest, src);
791 break;
792
793 case OPCODE_TEX:
794 case OPCODE_TXB:
795 case OPCODE_TXD:
796 case OPCODE_TXL:
797 case OPCODE_TXP:
798 ptn_tex(c, dest, src, prog_inst);
799 break;
800
801 case OPCODE_SWZ:
802 /* Extended swizzles were already handled in ptn_get_src(). */
803 ptn_alu(b, nir_op_fmov, dest, src);
804 break;
805
806 case OPCODE_NOP:
807 break;
808
809 default:
810 if (op_trans[op] != 0) {
811 ptn_alu(b, op_trans[op], dest, src);
812 } else {
813 fprintf(stderr, "unknown opcode: %s\n", _mesa_opcode_string(op));
814 abort();
815 }
816 break;
817 }
818
819 if (prog_inst->Saturate) {
820 assert(prog_inst->Saturate);
821 assert(!dest.dest.is_ssa);
822 ptn_move_dest(b, dest, nir_fsat(b, ptn_src_for_dest(c, &dest)));
823 }
824 }
825
826 /**
827 * Puts a NIR intrinsic to store of each PROGRAM_OUTPUT value to the output
828 * variables at the end of the shader.
829 *
830 * We don't generate these incrementally as the PROGRAM_OUTPUT values are
831 * written, because there's no output load intrinsic, which means we couldn't
832 * handle writemasks.
833 */
834 static void
835 ptn_add_output_stores(struct ptn_compile *c)
836 {
837 nir_builder *b = &c->build;
838
839 nir_foreach_variable(var, &b->shader->outputs) {
840 nir_ssa_def *src = nir_load_reg(b, c->output_regs[var->data.location]);
841 if (c->prog->Target == GL_FRAGMENT_PROGRAM_ARB &&
842 var->data.location == FRAG_RESULT_DEPTH) {
843 /* result.depth has this strange convention of being the .z component of
844 * a vec4 with undefined .xyw components. We resolve it to a scalar, to
845 * match GLSL's gl_FragDepth and the expectations of most backends.
846 */
847 src = nir_channel(b, src, 2);
848 }
849 unsigned num_components = glsl_get_vector_elements(var->type);
850 nir_store_var(b, var, src, (1 << num_components) - 1);
851 }
852 }
853
854 static void
855 setup_registers_and_variables(struct ptn_compile *c)
856 {
857 nir_builder *b = &c->build;
858 struct nir_shader *shader = b->shader;
859
860 /* Create input variables. */
861 const int num_inputs = util_last_bit64(c->prog->info.inputs_read);
862 for (int i = 0; i < num_inputs; i++) {
863 if (!(c->prog->info.inputs_read & BITFIELD64_BIT(i)))
864 continue;
865
866 nir_variable *var =
867 nir_variable_create(shader, nir_var_shader_in, glsl_vec4_type(),
868 ralloc_asprintf(shader, "in_%d", i));
869 var->data.location = i;
870 var->data.index = 0;
871
872 if (c->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
873 if (i == VARYING_SLOT_POS) {
874 var->data.origin_upper_left = c->prog->OriginUpperLeft;
875 var->data.pixel_center_integer = c->prog->PixelCenterInteger;
876 } else if (i == VARYING_SLOT_FOGC) {
877 /* fogcoord is defined as <f, 0.0, 0.0, 1.0>. Make the actual
878 * input variable a float, and create a local containing the
879 * full vec4 value.
880 */
881 var->type = glsl_float_type();
882
883 nir_variable *fullvar =
884 nir_local_variable_create(b->impl, glsl_vec4_type(),
885 "fogcoord_tmp");
886
887 nir_store_var(b, fullvar,
888 nir_vec4(b, nir_load_var(b, var),
889 nir_imm_float(b, 0.0),
890 nir_imm_float(b, 0.0),
891 nir_imm_float(b, 1.0)),
892 WRITEMASK_XYZW);
893
894 /* We inserted the real input into the list so the driver has real
895 * inputs, but we set c->input_vars[i] to the temporary so we use
896 * the splatted value.
897 */
898 c->input_vars[i] = fullvar;
899 continue;
900 }
901 }
902
903 c->input_vars[i] = var;
904 }
905
906 /* Create output registers and variables. */
907 int max_outputs = util_last_bit(c->prog->info.outputs_written);
908 c->output_regs = rzalloc_array(c, nir_register *, max_outputs);
909
910 for (int i = 0; i < max_outputs; i++) {
911 if (!(c->prog->info.outputs_written & BITFIELD64_BIT(i)))
912 continue;
913
914 /* Since we can't load from outputs in the IR, we make temporaries
915 * for the outputs and emit stores to the real outputs at the end of
916 * the shader.
917 */
918 nir_register *reg = nir_local_reg_create(b->impl);
919 reg->num_components = 4;
920
921 nir_variable *var = rzalloc(shader, nir_variable);
922 if (c->prog->Target == GL_FRAGMENT_PROGRAM_ARB && i == FRAG_RESULT_DEPTH)
923 var->type = glsl_float_type();
924 else
925 var->type = glsl_vec4_type();
926 var->data.mode = nir_var_shader_out;
927 var->name = ralloc_asprintf(var, "out_%d", i);
928
929 var->data.location = i;
930 var->data.index = 0;
931
932 c->output_regs[i] = reg;
933
934 exec_list_push_tail(&shader->outputs, &var->node);
935 c->output_vars[i] = var;
936 }
937
938 /* Create temporary registers. */
939 c->temp_regs = rzalloc_array(c, nir_register *,
940 c->prog->arb.NumTemporaries);
941
942 nir_register *reg;
943 for (unsigned i = 0; i < c->prog->arb.NumTemporaries; i++) {
944 reg = nir_local_reg_create(b->impl);
945 if (!reg) {
946 c->error = true;
947 return;
948 }
949 reg->num_components = 4;
950 c->temp_regs[i] = reg;
951 }
952
953 /* Create the address register (for ARB_vertex_program). */
954 reg = nir_local_reg_create(b->impl);
955 if (!reg) {
956 c->error = true;
957 return;
958 }
959 reg->num_components = 1;
960 c->addr_reg = reg;
961 }
962
963 struct nir_shader *
964 prog_to_nir(const struct gl_program *prog,
965 const nir_shader_compiler_options *options)
966 {
967 struct ptn_compile *c;
968 struct nir_shader *s;
969 gl_shader_stage stage = _mesa_program_enum_to_shader_stage(prog->Target);
970
971 c = rzalloc(NULL, struct ptn_compile);
972 if (!c)
973 return NULL;
974 c->prog = prog;
975
976 nir_builder_init_simple_shader(&c->build, NULL, stage, options);
977
978 /* Copy the shader_info from the gl_program */
979 c->build.shader->info = prog->info;
980
981 s = c->build.shader;
982
983 if (prog->Parameters->NumParameters > 0) {
984 c->parameters = rzalloc(s, nir_variable);
985 c->parameters->type =
986 glsl_array_type(glsl_vec4_type(), prog->Parameters->NumParameters);
987 c->parameters->name = "parameters";
988 c->parameters->data.read_only = true;
989 c->parameters->data.mode = nir_var_uniform;
990 exec_list_push_tail(&s->uniforms, &c->parameters->node);
991 }
992
993 setup_registers_and_variables(c);
994 if (unlikely(c->error))
995 goto fail;
996
997 for (unsigned int i = 0; i < prog->arb.NumInstructions; i++) {
998 ptn_emit_instruction(c, &prog->arb.Instructions[i]);
999
1000 if (unlikely(c->error))
1001 break;
1002 }
1003
1004 ptn_add_output_stores(c);
1005
1006 s->info.name = ralloc_asprintf(s, "ARB%d", prog->Id);
1007 s->info.num_textures = util_last_bit(prog->SamplersUsed);
1008 s->info.num_ubos = 0;
1009 s->info.num_abos = 0;
1010 s->info.num_ssbos = 0;
1011 s->info.num_images = 0;
1012 s->info.uses_texture_gather = false;
1013 s->info.clip_distance_array_size = 0;
1014 s->info.cull_distance_array_size = 0;
1015 s->info.separate_shader = false;
1016
1017 fail:
1018 if (c->error) {
1019 ralloc_free(s);
1020 s = NULL;
1021 }
1022 ralloc_free(c);
1023 return s;
1024 }