Fix up varying pull constants
[mesa.git] / src / mesa / drivers / dri / i965 / brw_fs_nir.cpp
1 /*
2 * Copyright © 2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "glsl/nir/glsl_to_nir.h"
25 #include "brw_fs.h"
26
27 static glsl_interp_qualifier
28 determine_interpolation_mode(nir_variable *var, bool flat_shade)
29 {
30 if (var->data.interpolation != INTERP_QUALIFIER_NONE)
31 return (glsl_interp_qualifier) var->data.interpolation;
32 int location = var->data.location;
33 bool is_gl_Color =
34 location == VARYING_SLOT_COL0 || location == VARYING_SLOT_COL1;
35 if (flat_shade && is_gl_Color)
36 return INTERP_QUALIFIER_FLAT;
37 else
38 return INTERP_QUALIFIER_SMOOTH;
39 }
40
41 void
42 fs_visitor::emit_nir_code()
43 {
44 /* first, lower the GLSL IR shader to NIR */
45 nir_shader *nir = glsl_to_nir(shader->base.ir, NULL, true);
46 nir_validate_shader(nir);
47
48 /* lower some of the GLSL-isms into NIR-isms - after this point, we no
49 * longer have to deal with variables inside the shader
50 */
51
52 nir_lower_variables_scalar(nir, true, true, true, true);
53 nir_validate_shader(nir);
54
55 nir_lower_samplers(nir, shader_prog, shader->base.Program);
56 nir_validate_shader(nir);
57
58 nir_lower_system_values(nir);
59 nir_validate_shader(nir);
60
61 nir_lower_atomics(nir);
62 nir_validate_shader(nir);
63
64 nir_remove_dead_variables(nir);
65 nir_opt_global_to_local(nir);
66 nir_validate_shader(nir);
67
68 if (1)
69 nir_print_shader(nir, stderr);
70
71 /* emit the arrays used for inputs and outputs - load/store intrinsics will
72 * be converted to reads/writes of these arrays
73 */
74
75 if (nir->num_inputs > 0) {
76 nir_inputs = fs_reg(GRF, virtual_grf_alloc(nir->num_inputs));
77 nir_setup_inputs(nir);
78 }
79
80 if (nir->num_outputs > 0) {
81 nir_outputs = fs_reg(GRF, virtual_grf_alloc(nir->num_outputs));
82 nir_setup_outputs(nir);
83 }
84
85 if (nir->num_uniforms > 0) {
86 nir_uniforms = fs_reg(UNIFORM, 0);
87 nir_setup_uniforms(nir);
88 }
89
90 nir_setup_registers(&nir->registers);
91
92 /* get the main function and emit it */
93 nir_foreach_overload(nir, overload) {
94 assert(strcmp(overload->function->name, "main") == 0);
95 assert(overload->impl);
96 nir_emit_impl(overload->impl);
97 }
98
99 ralloc_free(nir);
100 }
101
102 void
103 fs_visitor::nir_setup_inputs(nir_shader *shader)
104 {
105 fs_reg varying = nir_inputs;
106
107 struct hash_entry *entry;
108 hash_table_foreach(shader->inputs, entry) {
109 nir_variable *var = (nir_variable *) entry->data;
110 varying.reg_offset = var->data.driver_location;
111
112 fs_reg reg;
113 if (!strcmp(var->name, "gl_FragCoord")) {
114 reg = *emit_fragcoord_interpolation(var->data.pixel_center_integer,
115 var->data.origin_upper_left);
116 emit_percomp(MOV(varying, reg), 0xF);
117 } else if (!strcmp(var->name, "gl_FrontFacing")) {
118 reg = *emit_frontfacing_interpolation();
119 emit(MOV(retype(varying, BRW_REGISTER_TYPE_UD), reg));
120 } else {
121 nir_emit_interpolation(var, &varying);
122 }
123 }
124 }
125
126 void
127 fs_visitor::nir_emit_interpolation(nir_variable *var, fs_reg *varying)
128 {
129 brw_wm_prog_data *prog_data = (brw_wm_prog_data*) this->prog_data;
130 brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
131 fs_reg reg = *varying;
132 reg.type = brw_type_for_base_type(var->type->get_scalar_type());
133
134 unsigned int array_elements;
135 const glsl_type *type;
136
137 if (var->type->is_array()) {
138 array_elements = var->type->length;
139 if (array_elements == 0) {
140 fail("dereferenced array '%s' has length 0\n", var->name);
141 }
142 type = var->type->fields.array;
143 } else {
144 array_elements = 1;
145 type = var->type;
146 }
147
148 glsl_interp_qualifier interpolation_mode =
149 determine_interpolation_mode(var, key->flat_shade);
150
151 int location = var->data.location;
152 for (unsigned int i = 0; i < array_elements; i++) {
153 for (unsigned int j = 0; j < type->matrix_columns; j++) {
154 if (prog_data->urb_setup[location] == -1) {
155 /* If there's no incoming setup data for this slot, don't
156 * emit interpolation for it.
157 */
158 reg.reg_offset += type->vector_elements;
159 location++;
160 continue;
161 }
162
163 if (interpolation_mode == INTERP_QUALIFIER_FLAT) {
164 /* Constant interpolation (flat shading) case. The SF has
165 * handed us defined values in only the constant offset
166 * field of the setup reg.
167 */
168 for (unsigned int k = 0; k < type->vector_elements; k++) {
169 struct brw_reg interp = interp_reg(location, k);
170 interp = suboffset(interp, 3);
171 interp.type = reg.type;
172 emit(FS_OPCODE_CINTERP, reg, fs_reg(interp));
173 reg.reg_offset++;
174 }
175 } else {
176 /* Smooth/noperspective interpolation case. */
177 for (unsigned int k = 0; k < type->vector_elements; k++) {
178 struct brw_reg interp = interp_reg(location, k);
179 if (brw->needs_unlit_centroid_workaround && var->data.centroid) {
180 /* Get the pixel/sample mask into f0 so that we know
181 * which pixels are lit. Then, for each channel that is
182 * unlit, replace the centroid data with non-centroid
183 * data.
184 */
185 emit(FS_OPCODE_MOV_DISPATCH_TO_FLAGS);
186
187 fs_inst *inst;
188 inst = emit_linterp(reg, fs_reg(interp), interpolation_mode,
189 false, false);
190 inst->predicate = BRW_PREDICATE_NORMAL;
191 inst->predicate_inverse = true;
192 if (brw->has_pln)
193 inst->no_dd_clear = true;
194
195 inst = emit_linterp(reg, fs_reg(interp), interpolation_mode,
196 var->data.centroid && !key->persample_shading,
197 var->data.sample || key->persample_shading);
198 inst->predicate = BRW_PREDICATE_NORMAL;
199 inst->predicate_inverse = false;
200 if (brw->has_pln)
201 inst->no_dd_check = true;
202
203 } else {
204 emit_linterp(reg, fs_reg(interp), interpolation_mode,
205 var->data.centroid && !key->persample_shading,
206 var->data.sample || key->persample_shading);
207 }
208 if (brw->gen < 6 && interpolation_mode == INTERP_QUALIFIER_SMOOTH) {
209 emit(BRW_OPCODE_MUL, reg, reg, this->pixel_w);
210 }
211 reg.reg_offset++;
212 }
213
214 }
215 location++;
216 }
217 }
218 }
219
220 void
221 fs_visitor::nir_setup_outputs(nir_shader *shader)
222 {
223 brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
224 fs_reg reg = nir_outputs;
225
226 struct hash_entry *entry;
227 hash_table_foreach(shader->outputs, entry) {
228 nir_variable *var = (nir_variable *) entry->data;
229 reg.reg_offset = var->data.driver_location;
230
231 if (var->data.index > 0) {
232 assert(var->data.location == FRAG_RESULT_DATA0);
233 assert(var->data.index == 1);
234 this->dual_src_output = reg;
235 this->do_dual_src = true;
236 } else if (var->data.location == FRAG_RESULT_COLOR) {
237 /* Writing gl_FragColor outputs to all color regions. */
238 for (unsigned int i = 0; i < MAX2(key->nr_color_regions, 1); i++) {
239 this->outputs[i] = reg;
240 this->output_components[i] = 4;
241 }
242 } else if (var->data.location == FRAG_RESULT_DEPTH) {
243 this->frag_depth = reg;
244 } else if (var->data.location == FRAG_RESULT_SAMPLE_MASK) {
245 this->sample_mask = reg;
246 } else {
247 /* gl_FragData or a user-defined FS output */
248 assert(var->data.location >= FRAG_RESULT_DATA0 &&
249 var->data.location < FRAG_RESULT_DATA0 + BRW_MAX_DRAW_BUFFERS);
250
251 int vector_elements =
252 var->type->is_array() ? var->type->fields.array->vector_elements
253 : var->type->vector_elements;
254
255 /* General color output. */
256 for (unsigned int i = 0; i < MAX2(1, var->type->length); i++) {
257 int output = var->data.location - FRAG_RESULT_DATA0 + i;
258 this->outputs[output] = reg;
259 this->outputs[output].reg_offset += vector_elements * i;
260 this->output_components[output] = vector_elements;
261 }
262 }
263 }
264 }
265
266 void
267 fs_visitor::nir_setup_uniforms(nir_shader *shader)
268 {
269 uniforms = shader->num_uniforms;
270 param_size[0] = shader->num_uniforms;
271
272 if (dispatch_width != 8)
273 return;
274
275 struct hash_entry *entry;
276 hash_table_foreach(shader->uniforms, entry) {
277 nir_variable *var = (nir_variable *) entry->data;
278
279 /* UBO's and atomics don't take up space in the uniform file */
280
281 if (var->interface_type != NULL || var->type->contains_atomic())
282 continue;
283
284 if (strncmp(var->name, "gl_", 3) == 0)
285 nir_setup_builtin_uniform(var);
286 else
287 nir_setup_uniform(var);
288 }
289 }
290
291 void
292 fs_visitor::nir_setup_uniform(nir_variable *var)
293 {
294 int namelen = strlen(var->name);
295
296 /* The data for our (non-builtin) uniforms is stored in a series of
297 * gl_uniform_driver_storage structs for each subcomponent that
298 * glGetUniformLocation() could name. We know it's been set up in the
299 * same order we'd walk the type, so walk the list of storage and find
300 * anything with our name, or the prefix of a component that starts with
301 * our name.
302 */
303 unsigned index = var->data.driver_location;
304 for (unsigned u = 0; u < shader_prog->NumUserUniformStorage; u++) {
305 struct gl_uniform_storage *storage = &shader_prog->UniformStorage[u];
306
307 if (strncmp(var->name, storage->name, namelen) != 0 ||
308 (storage->name[namelen] != 0 &&
309 storage->name[namelen] != '.' &&
310 storage->name[namelen] != '[')) {
311 continue;
312 }
313
314 unsigned slots = storage->type->component_slots();
315 if (storage->array_elements)
316 slots *= storage->array_elements;
317
318 for (unsigned i = 0; i < slots; i++) {
319 stage_prog_data->param[index++] = &storage->storage[i];
320 }
321 }
322
323 /* Make sure we actually initialized the right amount of stuff here. */
324 assert(var->data.driver_location + var->type->component_slots() == index);
325 }
326
327 void
328 fs_visitor::nir_setup_builtin_uniform(nir_variable *var)
329 {
330 const nir_state_slot *const slots = var->state_slots;
331 assert(var->state_slots != NULL);
332
333 unsigned uniform_index = var->data.driver_location;
334 for (unsigned int i = 0; i < var->num_state_slots; i++) {
335 /* This state reference has already been setup by ir_to_mesa, but we'll
336 * get the same index back here.
337 */
338 int index = _mesa_add_state_reference(this->prog->Parameters,
339 (gl_state_index *)slots[i].tokens);
340
341 /* Add each of the unique swizzles of the element as a parameter.
342 * This'll end up matching the expected layout of the
343 * array/matrix/structure we're trying to fill in.
344 */
345 int last_swiz = -1;
346 for (unsigned int j = 0; j < 4; j++) {
347 int swiz = GET_SWZ(slots[i].swizzle, j);
348 if (swiz == last_swiz)
349 break;
350 last_swiz = swiz;
351
352 stage_prog_data->param[uniform_index++] =
353 &prog->Parameters->ParameterValues[index][swiz];
354 }
355 }
356 }
357
358 void
359 fs_visitor::nir_setup_registers(exec_list *list)
360 {
361 foreach_list_typed(nir_register, nir_reg, node, list) {
362 unsigned array_elems =
363 nir_reg->num_array_elems == 0 ? 1 : nir_reg->num_array_elems;
364 unsigned size = array_elems * nir_reg->num_components;
365 fs_reg *reg = new(mem_ctx) fs_reg(GRF, virtual_grf_alloc(size));
366 _mesa_hash_table_insert(this->nir_reg_ht, nir_reg, reg);
367 }
368 }
369
370 void
371 fs_visitor::nir_emit_impl(nir_function_impl *impl)
372 {
373 nir_setup_registers(&impl->registers);
374 nir_emit_cf_list(&impl->body);
375 }
376
377 void
378 fs_visitor::nir_emit_cf_list(exec_list *list)
379 {
380 foreach_list_typed(nir_cf_node, node, node, list) {
381 switch (node->type) {
382 case nir_cf_node_if:
383 nir_emit_if(nir_cf_node_as_if(node));
384 break;
385
386 case nir_cf_node_loop:
387 nir_emit_loop(nir_cf_node_as_loop(node));
388 break;
389
390 case nir_cf_node_block:
391 nir_emit_block(nir_cf_node_as_block(node));
392 break;
393
394 default:
395 unreachable("Invalid CFG node block");
396 }
397 }
398 }
399
400 void
401 fs_visitor::nir_emit_if(nir_if *if_stmt)
402 {
403 if (brw->gen < 6) {
404 no16("Can't support (non-uniform) control flow on SIMD16\n");
405 }
406
407 /* first, put the condition into f0 */
408 fs_inst *inst = emit(MOV(reg_null_d,
409 retype(get_nir_src(if_stmt->condition),
410 BRW_REGISTER_TYPE_UD)));
411 inst->conditional_mod = BRW_CONDITIONAL_NZ;
412
413 emit(IF(BRW_PREDICATE_NORMAL));
414
415 nir_emit_cf_list(&if_stmt->then_list);
416
417 /* note: if the else is empty, dead CF elimination will remove it */
418 emit(BRW_OPCODE_ELSE);
419
420 nir_emit_cf_list(&if_stmt->else_list);
421
422 emit(BRW_OPCODE_ENDIF);
423
424 try_replace_with_sel();
425 }
426
427 void
428 fs_visitor::nir_emit_loop(nir_loop *loop)
429 {
430 if (brw->gen < 6) {
431 no16("Can't support (non-uniform) control flow on SIMD16\n");
432 }
433
434 emit(BRW_OPCODE_DO);
435
436 nir_emit_cf_list(&loop->body);
437
438 emit(BRW_OPCODE_WHILE);
439 }
440
441 void
442 fs_visitor::nir_emit_block(nir_block *block)
443 {
444 nir_foreach_instr(block, instr) {
445 nir_emit_instr(instr);
446 }
447 }
448
449 void
450 fs_visitor::nir_emit_instr(nir_instr *instr)
451 {
452 switch (instr->type) {
453 case nir_instr_type_alu:
454 nir_emit_alu(nir_instr_as_alu(instr));
455 break;
456
457 case nir_instr_type_intrinsic:
458 nir_emit_intrinsic(nir_instr_as_intrinsic(instr));
459 break;
460
461 case nir_instr_type_texture:
462 nir_emit_texture(nir_instr_as_texture(instr));
463 break;
464
465 case nir_instr_type_load_const:
466 nir_emit_load_const(nir_instr_as_load_const(instr));
467 break;
468
469 case nir_instr_type_jump:
470 nir_emit_jump(nir_instr_as_jump(instr));
471 break;
472
473 default:
474 unreachable("unknown instruction type");
475 }
476 }
477
478 static brw_reg_type
479 brw_type_for_nir_type(nir_alu_type type)
480 {
481 switch (type) {
482 case nir_type_bool:
483 case nir_type_unsigned:
484 return BRW_REGISTER_TYPE_UD;
485 case nir_type_int:
486 return BRW_REGISTER_TYPE_D;
487 case nir_type_float:
488 return BRW_REGISTER_TYPE_F;
489 default:
490 unreachable("unknown type");
491 }
492
493 return BRW_REGISTER_TYPE_F;
494 }
495
496 void
497 fs_visitor::nir_emit_alu(nir_alu_instr *instr)
498 {
499 struct brw_wm_prog_key *fs_key = (struct brw_wm_prog_key *) this->key;
500
501 fs_reg op[3];
502 fs_reg dest = retype(get_nir_dest(instr->dest.dest),
503 brw_type_for_nir_type(nir_op_infos[instr->op].output_type));
504
505 fs_reg result;
506 if (instr->has_predicate) {
507 result = fs_reg(GRF, virtual_grf_alloc(4));
508 result.type = dest.type;
509 } else {
510 result = dest;
511 }
512
513
514 for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
515 op[i] = retype(get_nir_alu_src(instr, i),
516 brw_type_for_nir_type(nir_op_infos[instr->op].input_types[i]));
517 }
518
519 switch (instr->op) {
520 case nir_op_fmov:
521 case nir_op_i2f:
522 case nir_op_u2f: {
523 fs_inst *inst = MOV(result, op[0]);
524 inst->saturate = instr->dest.saturate;
525 emit_percomp(inst, instr->dest.write_mask);
526 }
527 break;
528
529 case nir_op_imov:
530 case nir_op_f2i:
531 case nir_op_f2u:
532 emit_percomp(MOV(result, op[0]), instr->dest.write_mask);
533 break;
534
535 case nir_op_fsign: {
536 /* AND(val, 0x80000000) gives the sign bit.
537 *
538 * Predicated OR ORs 1.0 (0x3f800000) with the sign bit if val is not
539 * zero.
540 */
541 emit_percomp(CMP(reg_null_f, op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ),
542 instr->dest.write_mask);
543
544 fs_reg result_int = retype(result, BRW_REGISTER_TYPE_UD);
545 op[0].type = BRW_REGISTER_TYPE_UD;
546 result.type = BRW_REGISTER_TYPE_UD;
547 emit_percomp(AND(result_int, op[0], fs_reg(0x80000000u)),
548 instr->dest.write_mask);
549
550 fs_inst *inst = OR(result_int, result_int, fs_reg(0x3f800000u));
551 inst->predicate = BRW_PREDICATE_NORMAL;
552 emit_percomp(inst, instr->dest.write_mask);
553 if (instr->dest.saturate) {
554 fs_inst *inst = MOV(result, result);
555 inst->saturate = true;
556 emit_percomp(inst, instr->dest.write_mask);
557 }
558 break;
559 }
560
561 case nir_op_isign: {
562 /* ASR(val, 31) -> negative val generates 0xffffffff (signed -1).
563 * -> non-negative val generates 0x00000000.
564 * Predicated OR sets 1 if val is positive.
565 */
566 emit_percomp(CMP(reg_null_d, op[0], fs_reg(0), BRW_CONDITIONAL_G),
567 instr->dest.write_mask);
568
569 emit_percomp(ASR(result, op[0], fs_reg(31)), instr->dest.write_mask);
570
571 fs_inst *inst = OR(result, result, fs_reg(1));
572 inst->predicate = BRW_PREDICATE_NORMAL;
573 emit_percomp(inst, instr->dest.write_mask);
574 break;
575 }
576
577 case nir_op_frcp:
578 emit_math_percomp(SHADER_OPCODE_RCP, result, op[0],
579 instr->dest.write_mask, instr->dest.saturate);
580 break;
581
582 case nir_op_fexp2:
583 emit_math_percomp(SHADER_OPCODE_EXP2, result, op[0],
584 instr->dest.write_mask, instr->dest.saturate);
585 break;
586
587 case nir_op_flog2:
588 emit_math_percomp(SHADER_OPCODE_LOG2, result, op[0],
589 instr->dest.write_mask, instr->dest.saturate);
590 break;
591
592 case nir_op_fexp:
593 case nir_op_flog:
594 unreachable("not reached: should be handled by ir_explog_to_explog2");
595
596 case nir_op_fsin:
597 case nir_op_fsin_reduced:
598 emit_math_percomp(SHADER_OPCODE_SIN, result, op[0],
599 instr->dest.write_mask, instr->dest.saturate);
600 break;
601
602 case nir_op_fcos:
603 case nir_op_fcos_reduced:
604 emit_math_percomp(SHADER_OPCODE_COS, result, op[0],
605 instr->dest.write_mask, instr->dest.saturate);
606 break;
607
608 case nir_op_fddx:
609 if (fs_key->high_quality_derivatives)
610 emit_percomp(FS_OPCODE_DDX_FINE, result, op[0],
611 instr->dest.write_mask, instr->dest.saturate);
612 else
613 emit_percomp(FS_OPCODE_DDX_COARSE, result, op[0],
614 instr->dest.write_mask, instr->dest.saturate);
615 break;
616 case nir_op_fddy:
617 if (fs_key->high_quality_derivatives)
618 emit_percomp(FS_OPCODE_DDY_FINE, result, op[0],
619 fs_reg(fs_key->render_to_fbo),
620 instr->dest.write_mask, instr->dest.saturate);
621 else
622 emit_percomp(FS_OPCODE_DDY_COARSE, result, op[0],
623 fs_reg(fs_key->render_to_fbo),
624 instr->dest.write_mask, instr->dest.saturate);
625 break;
626
627 case nir_op_fadd:
628 case nir_op_iadd: {
629 fs_inst *inst = ADD(result, op[0], op[1]);
630 inst->saturate = instr->dest.saturate;
631 emit_percomp(inst, instr->dest.write_mask);
632 break;
633 }
634
635 case nir_op_fmul: {
636 fs_inst *inst = MUL(result, op[0], op[1]);
637 inst->saturate = instr->dest.saturate;
638 emit_percomp(MUL(result, op[0], op[1]), instr->dest.write_mask);
639 break;
640 }
641
642 case nir_op_imul: {
643 /* TODO put in the 16-bit constant optimization once we have SSA */
644
645 if (brw->gen >= 7)
646 no16("SIMD16 explicit accumulator operands unsupported\n");
647
648 struct brw_reg acc = retype(brw_acc_reg(dispatch_width), result.type);
649
650 emit_percomp(MUL(acc, op[0], op[1]), instr->dest.write_mask);
651 emit_percomp(MACH(reg_null_d, op[0], op[1]), instr->dest.write_mask);
652 emit_percomp(MOV(result, fs_reg(acc)), instr->dest.write_mask);
653 break;
654 }
655
656 case nir_op_imul_high:
657 case nir_op_umul_high: {
658 if (brw->gen >= 7)
659 no16("SIMD16 explicit accumulator operands unsupported\n");
660
661 struct brw_reg acc = retype(brw_acc_reg(dispatch_width), result.type);
662
663 emit_percomp(MUL(acc, op[0], op[1]), instr->dest.write_mask);
664 emit_percomp(MACH(result, op[0], op[1]), instr->dest.write_mask);
665 break;
666 }
667
668 case nir_op_idiv:
669 case nir_op_udiv:
670 emit_math_percomp(SHADER_OPCODE_INT_QUOTIENT, result, op[0], op[1],
671 instr->dest.write_mask);
672 break;
673
674 case nir_op_uadd_carry: {
675 if (brw->gen >= 7)
676 no16("SIMD16 explicit accumulator operands unsupported\n");
677
678 struct brw_reg acc = retype(brw_acc_reg(dispatch_width),
679 BRW_REGISTER_TYPE_UD);
680
681 emit_percomp(ADDC(reg_null_ud, op[0], op[1]), instr->dest.write_mask);
682 emit_percomp(MOV(result, fs_reg(acc)), instr->dest.write_mask);
683 break;
684 }
685
686 case nir_op_usub_borrow: {
687 if (brw->gen >= 7)
688 no16("SIMD16 explicit accumulator operands unsupported\n");
689
690 struct brw_reg acc = retype(brw_acc_reg(dispatch_width),
691 BRW_REGISTER_TYPE_UD);
692
693 emit_percomp(SUBB(reg_null_ud, op[0], op[1]), instr->dest.write_mask);
694 emit_percomp(MOV(result, fs_reg(acc)), instr->dest.write_mask);
695 break;
696 }
697
698 case nir_op_umod:
699 emit_math_percomp(SHADER_OPCODE_INT_REMAINDER, result, op[0],
700 op[1], instr->dest.write_mask);
701 break;
702
703 case nir_op_flt:
704 case nir_op_ilt:
705 case nir_op_ult:
706 emit_percomp(CMP(result, op[0], op[1], BRW_CONDITIONAL_L),
707 instr->dest.write_mask);
708 break;
709
710 case nir_op_fge:
711 case nir_op_ige:
712 case nir_op_uge:
713 emit_percomp(CMP(result, op[0], op[1], BRW_CONDITIONAL_GE),
714 instr->dest.write_mask);
715 break;
716
717 case nir_op_feq:
718 case nir_op_ieq:
719 emit_percomp(CMP(result, op[0], op[1], BRW_CONDITIONAL_Z),
720 instr->dest.write_mask);
721 break;
722
723 case nir_op_fne:
724 case nir_op_ine:
725 emit_percomp(CMP(result, op[0], op[1], BRW_CONDITIONAL_NZ),
726 instr->dest.write_mask);
727 break;
728
729 case nir_op_ball_fequal2:
730 case nir_op_ball_iequal2:
731 case nir_op_ball_fequal3:
732 case nir_op_ball_iequal3:
733 case nir_op_ball_fequal4:
734 case nir_op_ball_iequal4: {
735 unsigned num_components = nir_op_infos[instr->op].input_sizes[0];
736 fs_reg temp = fs_reg(GRF, virtual_grf_alloc(num_components));
737 emit_percomp(CMP(temp, op[0], op[1], BRW_CONDITIONAL_Z),
738 (1 << num_components) - 1);
739 emit_reduction(BRW_OPCODE_AND, result, temp, num_components);
740 break;
741 }
742
743 case nir_op_bany_fnequal2:
744 case nir_op_bany_inequal2:
745 case nir_op_bany_fnequal3:
746 case nir_op_bany_inequal3:
747 case nir_op_bany_fnequal4:
748 case nir_op_bany_inequal4: {
749 unsigned num_components = nir_op_infos[instr->op].input_sizes[0];
750 fs_reg temp = fs_reg(GRF, virtual_grf_alloc(num_components));
751 temp.type = BRW_REGISTER_TYPE_UD;
752 emit_percomp(CMP(temp, op[0], op[1], BRW_CONDITIONAL_NZ),
753 (1 << num_components) - 1);
754 emit_reduction(BRW_OPCODE_OR, result, temp, num_components);
755 break;
756 }
757
758 case nir_op_inot:
759 emit_percomp(NOT(result, op[0]), instr->dest.write_mask);
760 break;
761 case nir_op_ixor:
762 emit_percomp(XOR(result, op[0], op[1]), instr->dest.write_mask);
763 break;
764 case nir_op_ior:
765 emit_percomp(OR(result, op[0], op[1]), instr->dest.write_mask);
766 break;
767 case nir_op_iand:
768 emit_percomp(AND(result, op[0], op[1]), instr->dest.write_mask);
769 break;
770
771 case nir_op_fdot2:
772 case nir_op_fdot3:
773 case nir_op_fdot4: {
774 unsigned num_components = nir_op_infos[instr->op].input_sizes[0];
775 fs_reg temp = fs_reg(GRF, virtual_grf_alloc(num_components));
776 emit_percomp(MUL(temp, op[0], op[1]), (1 << num_components) - 1);
777 emit_reduction(BRW_OPCODE_ADD, result, temp, num_components);
778 if (instr->dest.saturate) {
779 fs_inst *inst = emit(MOV(result, result));
780 inst->saturate = true;
781 }
782 break;
783 }
784
785 case nir_op_bany2:
786 case nir_op_bany3:
787 case nir_op_bany4: {
788 unsigned num_components = nir_op_infos[instr->op].input_sizes[0];
789 emit_reduction(BRW_OPCODE_OR, result, op[0], num_components);
790 break;
791 }
792
793 case nir_op_ball2:
794 case nir_op_ball3:
795 case nir_op_ball4: {
796 unsigned num_components = nir_op_infos[instr->op].input_sizes[0];
797 emit_reduction(BRW_OPCODE_AND, result, op[0], num_components);
798 break;
799 }
800
801 case nir_op_fnoise1_1:
802 case nir_op_fnoise1_2:
803 case nir_op_fnoise1_3:
804 case nir_op_fnoise1_4:
805 case nir_op_fnoise2_1:
806 case nir_op_fnoise2_2:
807 case nir_op_fnoise2_3:
808 case nir_op_fnoise2_4:
809 case nir_op_fnoise3_1:
810 case nir_op_fnoise3_2:
811 case nir_op_fnoise3_3:
812 case nir_op_fnoise3_4:
813 case nir_op_fnoise4_1:
814 case nir_op_fnoise4_2:
815 case nir_op_fnoise4_3:
816 case nir_op_fnoise4_4:
817 unreachable("not reached: should be handled by lower_noise");
818
819 case nir_op_vec2:
820 case nir_op_vec3:
821 case nir_op_vec4:
822 unreachable("not reached: should be handled by lower_quadop_vector");
823
824 case nir_op_ldexp:
825 unreachable("not reached: should be handled by ldexp_to_arith()");
826
827 case nir_op_fsqrt:
828 emit_math_percomp(SHADER_OPCODE_SQRT, result, op[0],
829 instr->dest.write_mask, instr->dest.saturate);
830 break;
831
832 case nir_op_frsq:
833 emit_math_percomp(SHADER_OPCODE_RSQ, result, op[0],
834 instr->dest.write_mask, instr->dest.saturate);
835 break;
836
837 case nir_op_b2i:
838 emit_percomp(AND(result, op[0], fs_reg(1)), instr->dest.write_mask);
839 break;
840 case nir_op_b2f: {
841 emit_percomp(AND(retype(result, BRW_REGISTER_TYPE_UD), op[0],
842 fs_reg(0x3f800000u)),
843 instr->dest.write_mask);
844 break;
845 }
846
847 case nir_op_f2b:
848 emit_percomp(CMP(result, op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ),
849 instr->dest.write_mask);
850 break;
851 case nir_op_i2b:
852 emit_percomp(CMP(result, op[0], fs_reg(0), BRW_CONDITIONAL_NZ),
853 instr->dest.write_mask);
854 break;
855
856 case nir_op_ftrunc: {
857 fs_inst *inst = RNDZ(result, op[0]);
858 inst->saturate = instr->dest.saturate;
859 emit_percomp(inst, instr->dest.write_mask);
860 break;
861 }
862 case nir_op_fceil: {
863 op[0].negate = !op[0].negate;
864 fs_reg temp = fs_reg(this, glsl_type::vec4_type);
865 emit_percomp(RNDD(temp, op[0]), instr->dest.write_mask);
866 temp.negate = true;
867 fs_inst *inst = MOV(result, temp);
868 inst->saturate = instr->dest.saturate;
869 emit_percomp(inst, instr->dest.write_mask);
870 break;
871 }
872 case nir_op_ffloor: {
873 fs_inst *inst = RNDD(result, op[0]);
874 inst->saturate = instr->dest.saturate;
875 emit_percomp(inst, instr->dest.write_mask);
876 break;
877 }
878 case nir_op_ffract: {
879 fs_inst *inst = FRC(result, op[0]);
880 inst->saturate = instr->dest.saturate;
881 emit_percomp(inst, instr->dest.write_mask);
882 break;
883 }
884 case nir_op_fround_even: {
885 fs_inst *inst = RNDE(result, op[0]);
886 inst->saturate = instr->dest.saturate;
887 emit_percomp(inst, instr->dest.write_mask);
888 break;
889 }
890
891 case nir_op_fmin:
892 case nir_op_imin:
893 case nir_op_umin:
894 if (brw->gen >= 6) {
895 emit_percomp(BRW_OPCODE_SEL, result, op[0], op[1],
896 instr->dest.write_mask, instr->dest.saturate,
897 BRW_PREDICATE_NONE, BRW_CONDITIONAL_L);
898 } else {
899 emit_percomp(CMP(reg_null_d, op[0], op[1], BRW_CONDITIONAL_L),
900 instr->dest.write_mask);
901
902 emit_percomp(BRW_OPCODE_SEL, result, op[0], op[1],
903 instr->dest.write_mask, instr->dest.saturate,
904 BRW_PREDICATE_NORMAL);
905 }
906 break;
907
908 case nir_op_fmax:
909 case nir_op_imax:
910 case nir_op_umax:
911 if (brw->gen >= 6) {
912 emit_percomp(BRW_OPCODE_SEL, result, op[0], op[1],
913 instr->dest.write_mask, instr->dest.saturate,
914 BRW_PREDICATE_NONE, BRW_CONDITIONAL_GE);
915 } else {
916 emit_percomp(CMP(reg_null_d, op[0], op[1], BRW_CONDITIONAL_GE),
917 instr->dest.write_mask);
918
919 emit_percomp(BRW_OPCODE_SEL, result, op[0], op[1],
920 instr->dest.write_mask, instr->dest.saturate,
921 BRW_PREDICATE_NORMAL);
922 }
923 break;
924
925 case nir_op_pack_snorm_2x16:
926 case nir_op_pack_snorm_4x8:
927 case nir_op_pack_unorm_2x16:
928 case nir_op_pack_unorm_4x8:
929 case nir_op_unpack_snorm_2x16:
930 case nir_op_unpack_snorm_4x8:
931 case nir_op_unpack_unorm_2x16:
932 case nir_op_unpack_unorm_4x8:
933 case nir_op_unpack_half_2x16:
934 case nir_op_pack_half_2x16:
935 unreachable("not reached: should be handled by lower_packing_builtins");
936
937 case nir_op_unpack_half_2x16_split_x:
938 emit_percomp(FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X, result, op[0],
939 instr->dest.write_mask, instr->dest.saturate);
940 break;
941 case nir_op_unpack_half_2x16_split_y:
942 emit_percomp(FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y, result, op[0],
943 instr->dest.write_mask, instr->dest.saturate);
944 break;
945
946 case nir_op_fpow:
947 emit_percomp(SHADER_OPCODE_POW, result, op[0], op[1],
948 instr->dest.write_mask, instr->dest.saturate);
949 break;
950
951 case nir_op_bitfield_reverse:
952 emit_percomp(BFREV(result, op[0]), instr->dest.write_mask);
953 break;
954
955 case nir_op_bit_count:
956 emit_percomp(CBIT(result, op[0]), instr->dest.write_mask);
957 break;
958
959 case nir_op_find_msb: {
960 fs_reg temp = fs_reg(this, glsl_type::uvec4_type);
961 emit_percomp(FBH(temp, op[0]), instr->dest.write_mask);
962
963 /* FBH counts from the MSB side, while GLSL's findMSB() wants the count
964 * from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then
965 * subtract the result from 31 to convert the MSB count into an LSB count.
966 */
967
968 emit_percomp(CMP(reg_null_d, temp, fs_reg(~0), BRW_CONDITIONAL_NZ),
969 instr->dest.write_mask);
970 temp.negate = true;
971 fs_inst *inst = ADD(result, temp, fs_reg(31));
972 inst->predicate = BRW_PREDICATE_NORMAL;
973 emit_percomp(inst, instr->dest.write_mask);
974 break;
975 }
976
977 case nir_op_find_lsb:
978 emit_percomp(FBL(result, op[0]), instr->dest.write_mask);
979 break;
980
981 case nir_op_ubitfield_extract:
982 case nir_op_ibitfield_extract:
983 emit_percomp(BFE(result, op[2], op[1], op[0]), instr->dest.write_mask);
984 break;
985 case nir_op_bfm:
986 emit_percomp(BFI1(result, op[0], op[1]), instr->dest.write_mask);
987 break;
988 case nir_op_bfi:
989 emit_percomp(BFI2(result, op[0], op[1], op[2]), instr->dest.write_mask);
990 break;
991
992 case nir_op_bitfield_insert:
993 unreachable("not reached: should be handled by "
994 "lower_instructions::bitfield_insert_to_bfm_bfi");
995
996 case nir_op_ishl:
997 emit_percomp(SHL(result, op[0], op[1]), instr->dest.write_mask);
998 break;
999 case nir_op_ishr:
1000 emit_percomp(ASR(result, op[0], op[1]), instr->dest.write_mask);
1001 break;
1002 case nir_op_ushr:
1003 emit_percomp(SHR(result, op[0], op[1]), instr->dest.write_mask);
1004 break;
1005
1006 case nir_op_pack_half_2x16_split:
1007 emit_percomp(FS_OPCODE_PACK_HALF_2x16_SPLIT, result, op[0], op[1],
1008 instr->dest.write_mask);
1009 break;
1010
1011 case nir_op_ffma:
1012 emit_percomp(MAD(result, op[2], op[1], op[0]), instr->dest.write_mask);
1013 break;
1014
1015 case nir_op_flrp:
1016 /* TODO emulate for gen < 6 */
1017 emit_percomp(LRP(result, op[2], op[1], op[0]), instr->dest.write_mask);
1018 break;
1019
1020 case nir_op_bcsel:
1021 emit(CMP(reg_null_d, op[0], fs_reg(0), BRW_CONDITIONAL_NZ));
1022 emit_percomp(BRW_OPCODE_SEL, result, op[1], op[2],
1023 instr->dest.write_mask, false, BRW_PREDICATE_NORMAL);
1024 break;
1025
1026 default:
1027 unreachable("unhandled instruction");
1028 }
1029
1030 /* emit a predicated move if there was predication */
1031 if (instr->has_predicate) {
1032 fs_inst *inst = emit(MOV(reg_null_d,
1033 retype(get_nir_src(instr->predicate),
1034 BRW_REGISTER_TYPE_UD)));
1035 inst->conditional_mod = BRW_CONDITIONAL_NZ;
1036 inst = MOV(dest, result);
1037 inst->predicate = BRW_PREDICATE_NORMAL;
1038 emit_percomp(inst, instr->dest.write_mask);
1039 }
1040 }
1041
1042 fs_reg
1043 fs_visitor::get_nir_src(nir_src src)
1044 {
1045 struct hash_entry *entry =
1046 _mesa_hash_table_search(this->nir_reg_ht, src.reg.reg);
1047 fs_reg reg = *((fs_reg *) entry->data);
1048 /* to avoid floating-point denorm flushing problems, set the type by
1049 * default to D - instructions that need floating point semantics will set
1050 * this to F if they need to
1051 */
1052 reg.type = BRW_REGISTER_TYPE_D;
1053 reg.reg_offset = src.reg.base_offset;
1054 if (src.reg.indirect) {
1055 reg.reladdr = new(mem_ctx) fs_reg();
1056 *reg.reladdr = retype(get_nir_src(*src.reg.indirect),
1057 BRW_REGISTER_TYPE_D);
1058 }
1059
1060 return reg;
1061 }
1062
1063 fs_reg
1064 fs_visitor::get_nir_alu_src(nir_alu_instr *instr, unsigned src)
1065 {
1066 fs_reg reg = get_nir_src(instr->src[src].src);
1067
1068 reg.abs = instr->src[src].abs;
1069 reg.negate = instr->src[src].negate;
1070
1071 bool needs_swizzle = false;
1072 unsigned num_components = 0;
1073 for (unsigned i = 0; i < 4; i++) {
1074 if (!nir_alu_instr_channel_used(instr, src, i))
1075 continue;
1076
1077 if (instr->src[src].swizzle[i] != i)
1078 needs_swizzle = true;
1079
1080 num_components = i + 1;
1081 }
1082
1083 if (needs_swizzle) {
1084 /* resolve the swizzle through MOV's */
1085 fs_reg new_reg = fs_reg(GRF, virtual_grf_alloc(num_components));
1086
1087 for (unsigned i = 0; i < 4; i++) {
1088 if (!nir_alu_instr_channel_used(instr, src, i))
1089 continue;
1090
1091 fs_reg dest = new_reg;
1092 dest.type = reg.type;
1093 dest.reg_offset = i;
1094
1095 fs_reg src0 = reg;
1096 src0.reg_offset += instr->src[src].swizzle[i];
1097
1098 emit(MOV(dest, src0));
1099 }
1100
1101 return new_reg;
1102 }
1103
1104 return reg;
1105 }
1106
1107 fs_reg
1108 fs_visitor::get_nir_dest(nir_dest dest)
1109 {
1110 struct hash_entry *entry =
1111 _mesa_hash_table_search(this->nir_reg_ht, dest.reg.reg);
1112 fs_reg reg = *((fs_reg *) entry->data);
1113 reg.reg_offset = dest.reg.base_offset;
1114 if (dest.reg.indirect) {
1115 reg.reladdr = new(mem_ctx) fs_reg();
1116 *reg.reladdr = retype(get_nir_src(*dest.reg.indirect),
1117 BRW_REGISTER_TYPE_D);
1118 }
1119
1120 return reg;
1121 }
1122
1123 void
1124 fs_visitor::emit_percomp(fs_inst *inst, unsigned wr_mask)
1125 {
1126 for (unsigned i = 0; i < 4; i++) {
1127 if (!((wr_mask >> i) & 1))
1128 continue;
1129
1130 fs_inst *new_inst = new(mem_ctx) fs_inst(*inst);
1131 new_inst->dst.reg_offset += i;
1132 for (unsigned j = 0; j < new_inst->sources; j++)
1133 if (inst->src[j].file == GRF)
1134 new_inst->src[j].reg_offset += i;
1135
1136 emit(new_inst);
1137 }
1138 }
1139
1140 void
1141 fs_visitor::emit_percomp(enum opcode op, fs_reg dest, fs_reg src0,
1142 unsigned wr_mask, bool saturate,
1143 enum brw_predicate predicate,
1144 enum brw_conditional_mod mod)
1145 {
1146 for (unsigned i = 0; i < 4; i++) {
1147 if (!((wr_mask >> i) & 1))
1148 continue;
1149
1150 fs_inst *new_inst = new(mem_ctx) fs_inst(op, dest, src0);
1151 new_inst->dst.reg_offset += i;
1152 for (unsigned j = 0; j < new_inst->sources; j++)
1153 if (new_inst->src[j].file == GRF)
1154 new_inst->src[j].reg_offset += i;
1155
1156 new_inst->predicate = predicate;
1157 new_inst->conditional_mod = mod;
1158 new_inst->saturate = saturate;
1159 emit(new_inst);
1160 }
1161 }
1162
1163 void
1164 fs_visitor::emit_percomp(enum opcode op, fs_reg dest, fs_reg src0, fs_reg src1,
1165 unsigned wr_mask, bool saturate,
1166 enum brw_predicate predicate,
1167 enum brw_conditional_mod mod)
1168 {
1169 for (unsigned i = 0; i < 4; i++) {
1170 if (!((wr_mask >> i) & 1))
1171 continue;
1172
1173 fs_inst *new_inst = new(mem_ctx) fs_inst(op, dest, src0, src1);
1174 new_inst->dst.reg_offset += i;
1175 for (unsigned j = 0; j < new_inst->sources; j++)
1176 if (new_inst->src[j].file == GRF)
1177 new_inst->src[j].reg_offset += i;
1178
1179 new_inst->predicate = predicate;
1180 new_inst->conditional_mod = mod;
1181 new_inst->saturate = saturate;
1182 emit(new_inst);
1183 }
1184 }
1185
1186 void
1187 fs_visitor::emit_math_percomp(enum opcode op, fs_reg dest, fs_reg src0,
1188 unsigned wr_mask, bool saturate)
1189 {
1190 for (unsigned i = 0; i < 4; i++) {
1191 if (!((wr_mask >> i) & 1))
1192 continue;
1193
1194 fs_reg new_dest = dest;
1195 new_dest.reg_offset += i;
1196 fs_reg new_src0 = src0;
1197 if (src0.file == GRF)
1198 new_src0.reg_offset += i;
1199
1200 fs_inst *new_inst = emit_math(op, new_dest, new_src0);
1201 new_inst->saturate = saturate;
1202 }
1203 }
1204
1205 void
1206 fs_visitor::emit_math_percomp(enum opcode op, fs_reg dest, fs_reg src0,
1207 fs_reg src1, unsigned wr_mask,
1208 bool saturate)
1209 {
1210 for (unsigned i = 0; i < 4; i++) {
1211 if (!((wr_mask >> i) & 1))
1212 continue;
1213
1214 fs_reg new_dest = dest;
1215 new_dest.reg_offset += i;
1216 fs_reg new_src0 = src0;
1217 if (src0.file == GRF)
1218 new_src0.reg_offset += i;
1219 fs_reg new_src1 = src1;
1220 if (src1.file == GRF)
1221 new_src1.reg_offset += i;
1222
1223 fs_inst *new_inst = emit_math(op, new_dest, new_src0, new_src1);
1224 new_inst->saturate = saturate;
1225 }
1226 }
1227
1228 void
1229 fs_visitor::emit_reduction(enum opcode op, fs_reg dest, fs_reg src,
1230 unsigned num_components)
1231 {
1232 fs_reg src0 = src;
1233 fs_reg src1 = src;
1234 src1.reg_offset++;
1235
1236 if (num_components == 2) {
1237 emit(op, dest, src0, src1);
1238 return;
1239 }
1240
1241 fs_reg temp1 = fs_reg(GRF, virtual_grf_alloc(1));
1242 temp1.type = src.type;
1243 emit(op, temp1, src0, src1);
1244
1245 fs_reg src2 = src;
1246 src2.reg_offset += 2;
1247
1248 if (num_components == 3) {
1249 emit(op, dest, temp1, src2);
1250 return;
1251 }
1252
1253 assert(num_components == 4);
1254
1255 fs_reg src3 = src;
1256 src3.reg_offset += 3;
1257 fs_reg temp2 = fs_reg(GRF, virtual_grf_alloc(1));
1258 temp2.type = src.type;
1259
1260 emit(op, temp2, src2, src3);
1261 emit(op, dest, temp1, temp2);
1262 }
1263
1264 void
1265 fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
1266 {
1267 fs_reg dest;
1268 if (nir_intrinsic_infos[instr->intrinsic].has_dest)
1269 dest = get_nir_dest(instr->dest);
1270 if (instr->has_predicate) {
1271 fs_inst *inst = emit(MOV(reg_null_d,
1272 retype(get_nir_src(instr->predicate),
1273 BRW_REGISTER_TYPE_UD)));
1274 inst->conditional_mod = BRW_CONDITIONAL_NZ;
1275 }
1276
1277 switch (instr->intrinsic) {
1278 case nir_intrinsic_discard: {
1279 /* We track our discarded pixels in f0.1. By predicating on it, we can
1280 * update just the flag bits that aren't yet discarded. By emitting a
1281 * CMP of g0 != g0, all our currently executing channels will get turned
1282 * off.
1283 */
1284 fs_reg some_reg = fs_reg(retype(brw_vec8_grf(0, 0),
1285 BRW_REGISTER_TYPE_UW));
1286 fs_inst *cmp = emit(CMP(reg_null_f, some_reg, some_reg,
1287 BRW_CONDITIONAL_NZ));
1288 cmp->predicate = BRW_PREDICATE_NORMAL;
1289 cmp->flag_subreg = 1;
1290
1291 if (brw->gen >= 6) {
1292 /* For performance, after a discard, jump to the end of the shader.
1293 * Only jump if all relevant channels have been discarded.
1294 */
1295 fs_inst *discard_jump = emit(FS_OPCODE_DISCARD_JUMP);
1296 discard_jump->flag_subreg = 1;
1297
1298 discard_jump->predicate = (dispatch_width == 8)
1299 ? BRW_PREDICATE_ALIGN1_ANY8H
1300 : BRW_PREDICATE_ALIGN1_ANY16H;
1301 discard_jump->predicate_inverse = true;
1302 }
1303
1304 break;
1305 }
1306
1307 case nir_intrinsic_atomic_counter_inc:
1308 case nir_intrinsic_atomic_counter_dec:
1309 case nir_intrinsic_atomic_counter_read:
1310 assert(!"TODO");
1311
1312
1313 case nir_intrinsic_load_front_face:
1314 assert(!"TODO");
1315
1316 case nir_intrinsic_load_sample_mask_in: {
1317 assert(brw->gen >= 7);
1318 fs_reg reg = fs_reg(retype(brw_vec8_grf(payload.sample_mask_in_reg, 0),
1319 BRW_REGISTER_TYPE_D));
1320 dest.type = reg.type;
1321 fs_inst *inst = MOV(dest, reg);
1322 if (instr->has_predicate)
1323 inst->predicate = BRW_PREDICATE_NORMAL;
1324 emit(inst);
1325 break;
1326 }
1327
1328 case nir_intrinsic_load_sample_pos:
1329 case nir_intrinsic_load_sample_id:
1330 assert(!"TODO");
1331
1332 case nir_intrinsic_load_uniform_vec1:
1333 case nir_intrinsic_load_uniform_vec2:
1334 case nir_intrinsic_load_uniform_vec3:
1335 case nir_intrinsic_load_uniform_vec4: {
1336 unsigned index = 0;
1337 for (int i = 0; i < instr->const_index[1]; i++) {
1338 for (unsigned j = 0;
1339 j < nir_intrinsic_infos[instr->intrinsic].dest_components; j++) {
1340 fs_reg src = nir_uniforms;
1341 src.reg_offset = instr->const_index[0] + index;
1342 src.type = dest.type;
1343 index++;
1344
1345 fs_inst *inst = MOV(dest, src);
1346 if (instr->has_predicate)
1347 inst->predicate = BRW_PREDICATE_NORMAL;
1348 emit(inst);
1349 dest.reg_offset++;
1350 }
1351 }
1352 break;
1353 }
1354
1355 case nir_intrinsic_load_uniform_vec1_indirect:
1356 case nir_intrinsic_load_uniform_vec2_indirect:
1357 case nir_intrinsic_load_uniform_vec3_indirect:
1358 case nir_intrinsic_load_uniform_vec4_indirect: {
1359 unsigned index = 0;
1360 for (int i = 0; i < instr->const_index[1]; i++) {
1361 for (unsigned j = 0;
1362 j < nir_intrinsic_infos[instr->intrinsic].dest_components; j++) {
1363 fs_reg src = nir_uniforms;
1364 src.reg_offset = instr->const_index[0] + index;
1365 src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0]));
1366 src.reladdr->type = BRW_REGISTER_TYPE_D;
1367 src.type = dest.type;
1368 index++;
1369
1370 fs_inst *inst = MOV(dest, src);
1371 if (instr->has_predicate)
1372 inst->predicate = BRW_PREDICATE_NORMAL;
1373 emit(inst);
1374 dest.reg_offset++;
1375 }
1376 }
1377 break;
1378 }
1379
1380 case nir_intrinsic_load_ubo_vec1:
1381 case nir_intrinsic_load_ubo_vec2:
1382 case nir_intrinsic_load_ubo_vec3:
1383 case nir_intrinsic_load_ubo_vec4: {
1384 fs_reg surf_index = fs_reg(prog_data->binding_table.ubo_start +
1385 (unsigned) instr->const_index[0]);
1386 fs_reg packed_consts = fs_reg(this, glsl_type::float_type);
1387 packed_consts.type = dest.type;
1388
1389 fs_reg const_offset_reg = fs_reg((unsigned) instr->const_index[1] & ~15);
1390 emit(new(mem_ctx) fs_inst(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
1391 packed_consts, surf_index, const_offset_reg));
1392
1393 for (unsigned i = 0;
1394 i < nir_intrinsic_infos[instr->intrinsic].dest_components; i++) {
1395 packed_consts.set_smear(instr->const_index[1] % 16 / 4 + i);
1396
1397 /* The std140 packing rules don't allow vectors to cross 16-byte
1398 * boundaries, and a reg is 32 bytes.
1399 */
1400 assert(packed_consts.subreg_offset < 32);
1401
1402 fs_inst *inst = MOV(dest, packed_consts);
1403 if (instr->has_predicate)
1404 inst->predicate = BRW_PREDICATE_NORMAL;
1405 emit(inst);
1406
1407 dest.reg_offset++;
1408 }
1409 break;
1410 }
1411
1412 case nir_intrinsic_load_ubo_vec1_indirect:
1413 case nir_intrinsic_load_ubo_vec2_indirect:
1414 case nir_intrinsic_load_ubo_vec3_indirect:
1415 case nir_intrinsic_load_ubo_vec4_indirect: {
1416 fs_reg surf_index = fs_reg(prog_data->binding_table.ubo_start +
1417 instr->const_index[0]);
1418 /* Turn the byte offset into a dword offset. */
1419 unsigned base_offset = instr->const_index[1] / 4;
1420 fs_reg offset = fs_reg(this, glsl_type::int_type);
1421 emit(SHR(offset, retype(get_nir_src(instr->src[0]), BRW_REGISTER_TYPE_D),
1422 fs_reg(2)));
1423
1424 for (unsigned i = 0;
1425 i < nir_intrinsic_infos[instr->intrinsic].dest_components; i++) {
1426 exec_list list = VARYING_PULL_CONSTANT_LOAD(dest, surf_index,
1427 offset, base_offset + i);
1428 fs_inst *last_inst = (fs_inst *) list.get_tail();
1429 if (instr->has_predicate)
1430 last_inst->predicate = BRW_PREDICATE_NORMAL;
1431 emit(list);
1432
1433 dest.reg_offset++;
1434 }
1435 break;
1436 }
1437
1438 case nir_intrinsic_load_input_vec1:
1439 case nir_intrinsic_load_input_vec2:
1440 case nir_intrinsic_load_input_vec3:
1441 case nir_intrinsic_load_input_vec4: {
1442 unsigned index = 0;
1443 for (int i = 0; i < instr->const_index[1]; i++) {
1444 for (unsigned j = 0;
1445 j < nir_intrinsic_infos[instr->intrinsic].dest_components; j++) {
1446 fs_reg src = nir_inputs;
1447 src.reg_offset = instr->const_index[0] + index;
1448 src.type = dest.type;
1449 index++;
1450
1451 fs_inst *inst = MOV(dest, src);
1452 if (instr->has_predicate)
1453 inst->predicate = BRW_PREDICATE_NORMAL;
1454 emit(inst);
1455 dest.reg_offset++;
1456 }
1457 }
1458 break;
1459 }
1460
1461 case nir_intrinsic_load_input_vec1_indirect:
1462 case nir_intrinsic_load_input_vec2_indirect:
1463 case nir_intrinsic_load_input_vec3_indirect:
1464 case nir_intrinsic_load_input_vec4_indirect: {
1465 unsigned index = 0;
1466 for (int i = 0; i < instr->const_index[1]; i++) {
1467 for (unsigned j = 0;
1468 j < nir_intrinsic_infos[instr->intrinsic].dest_components; j++) {
1469 fs_reg src = nir_inputs;
1470 src.reg_offset = instr->const_index[0] + index;
1471 src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0]));
1472 src.reladdr->type = BRW_REGISTER_TYPE_D;
1473 src.type = dest.type;
1474 index++;
1475
1476 fs_inst *inst = MOV(dest, src);
1477 if (instr->has_predicate)
1478 inst->predicate = BRW_PREDICATE_NORMAL;
1479 emit(inst);
1480 dest.reg_offset++;
1481 }
1482 }
1483 break;
1484 }
1485
1486 case nir_intrinsic_store_output_vec1:
1487 case nir_intrinsic_store_output_vec2:
1488 case nir_intrinsic_store_output_vec3:
1489 case nir_intrinsic_store_output_vec4: {
1490 fs_reg src = get_nir_src(instr->src[0]);
1491 unsigned index = 0;
1492 for (int i = 0; i < instr->const_index[1]; i++) {
1493 for (unsigned j = 0;
1494 j < nir_intrinsic_infos[instr->intrinsic].src_components[0]; j++) {
1495 fs_reg new_dest = nir_outputs;
1496 new_dest.reg_offset = instr->const_index[0] + index;
1497 new_dest.type = src.type;
1498 index++;
1499 fs_inst *inst = MOV(new_dest, src);
1500 if (instr->has_predicate)
1501 inst->predicate = BRW_PREDICATE_NORMAL;
1502 emit(inst);
1503 src.reg_offset++;
1504 }
1505 }
1506 break;
1507 }
1508
1509 case nir_intrinsic_store_output_vec1_indirect:
1510 case nir_intrinsic_store_output_vec2_indirect:
1511 case nir_intrinsic_store_output_vec3_indirect:
1512 case nir_intrinsic_store_output_vec4_indirect: {
1513 fs_reg src = get_nir_src(instr->src[0]);
1514 fs_reg indirect = get_nir_src(instr->src[1]);
1515 unsigned index = 0;
1516 for (int i = 0; i < instr->const_index[1]; i++) {
1517 for (unsigned j = 0;
1518 j < nir_intrinsic_infos[instr->intrinsic].src_components[0]; j++) {
1519 fs_reg new_dest = nir_outputs;
1520 new_dest.reg_offset = instr->const_index[0] + index;
1521 new_dest.reladdr = new(mem_ctx) fs_reg(indirect);
1522 new_dest.type = src.type;
1523 index++;
1524 fs_inst *inst = MOV(new_dest, src);
1525 if (instr->has_predicate)
1526 inst->predicate = BRW_PREDICATE_NORMAL;
1527 emit(MOV(new_dest, src));
1528 src.reg_offset++;
1529 }
1530 }
1531 break;
1532 }
1533
1534 default:
1535 unreachable("unknown intrinsic");
1536 }
1537 }
1538
1539 void
1540 fs_visitor::nir_emit_texture(nir_tex_instr *instr)
1541 {
1542 brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
1543 unsigned sampler = instr->sampler_index;
1544
1545 /* FINISHME: We're failing to recompile our programs when the sampler is
1546 * updated. This only matters for the texture rectangle scale parameters
1547 * (pre-gen6, or gen6+ with GL_CLAMP).
1548 */
1549 int texunit = prog->SamplerUnits[sampler];
1550
1551 int gather_component = instr->component;
1552
1553 bool is_rect = instr->sampler_dim == GLSL_SAMPLER_DIM_RECT;
1554
1555 bool is_cube_array = instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE &&
1556 instr->is_array;
1557
1558 int lod_components, offset_components = 0;
1559
1560 fs_reg coordinate, shadow_comparitor, lod, lod2, sample_index, mcs, offset;
1561
1562 for (unsigned i = 0; i < instr->num_srcs; i++) {
1563 fs_reg src = get_nir_src(instr->src[i]);
1564 switch (instr->src_type[i]) {
1565 case nir_tex_src_bias:
1566 lod = retype(src, BRW_REGISTER_TYPE_F);
1567 break;
1568 case nir_tex_src_comparitor:
1569 shadow_comparitor = retype(src, BRW_REGISTER_TYPE_F);
1570 break;
1571 case nir_tex_src_coord:
1572 switch (instr->op) {
1573 case nir_texop_txf:
1574 case nir_texop_txf_ms:
1575 coordinate = retype(src, BRW_REGISTER_TYPE_D);
1576 break;
1577 default:
1578 coordinate = retype(src, BRW_REGISTER_TYPE_F);
1579 break;
1580 }
1581 break;
1582 case nir_tex_src_ddx:
1583 lod = retype(src, BRW_REGISTER_TYPE_F);
1584 lod_components = nir_tex_instr_src_size(instr, i);
1585 break;
1586 case nir_tex_src_ddy:
1587 lod2 = retype(src, BRW_REGISTER_TYPE_F);
1588 break;
1589 case nir_tex_src_lod:
1590 switch (instr->op) {
1591 case nir_texop_txs:
1592 lod = retype(src, BRW_REGISTER_TYPE_UD);
1593 break;
1594 case nir_texop_txf:
1595 lod = retype(src, BRW_REGISTER_TYPE_D);
1596 break;
1597 default:
1598 lod = retype(src, BRW_REGISTER_TYPE_F);
1599 break;
1600 }
1601 break;
1602 case nir_tex_src_ms_index:
1603 sample_index = retype(src, BRW_REGISTER_TYPE_UD);
1604 break;
1605 case nir_tex_src_offset:
1606 offset = retype(src, BRW_REGISTER_TYPE_D);
1607 if (instr->is_array)
1608 offset_components = instr->coord_components - 1;
1609 else
1610 offset_components = instr->coord_components;
1611 break;
1612 case nir_tex_src_projector:
1613 unreachable("should be lowered");
1614 case nir_tex_src_sampler_index:
1615 unreachable("not yet supported");
1616 default:
1617 unreachable("unknown texture source");
1618 }
1619 }
1620
1621 if (instr->op == nir_texop_txf_ms) {
1622 if (brw->gen >= 7 && key->tex.compressed_multisample_layout_mask & (1<<sampler))
1623 mcs = emit_mcs_fetch(coordinate, instr->coord_components, fs_reg(sampler));
1624 else
1625 mcs = fs_reg(0u);
1626 }
1627
1628 for (unsigned i = 0; i < 3; i++) {
1629 if (instr->const_offset[i] != 0) {
1630 assert(offset_components == 0);
1631 offset = fs_reg(brw_texture_offset(ctx, instr->const_offset, 3));
1632 break;
1633 }
1634 }
1635
1636 enum glsl_base_type dest_base_type;
1637 switch (instr->dest_type) {
1638 case nir_type_float:
1639 dest_base_type = GLSL_TYPE_FLOAT;
1640 break;
1641 case nir_type_int:
1642 dest_base_type = GLSL_TYPE_INT;
1643 break;
1644 case nir_type_unsigned:
1645 dest_base_type = GLSL_TYPE_UINT;
1646 break;
1647 default:
1648 unreachable("bad type");
1649 }
1650
1651 const glsl_type *dest_type =
1652 glsl_type::get_instance(dest_base_type, nir_tex_instr_dest_size(instr),
1653 1);
1654
1655 ir_texture_opcode op;
1656 switch (instr->op) {
1657 case nir_texop_lod: op = ir_lod; break;
1658 case nir_texop_query_levels: op = ir_query_levels; break;
1659 case nir_texop_tex: op = ir_tex; break;
1660 case nir_texop_tg4: op = ir_tg4; break;
1661 case nir_texop_txb: op = ir_txb; break;
1662 case nir_texop_txd: op = ir_txd; break;
1663 case nir_texop_txf: op = ir_txf; break;
1664 case nir_texop_txf_ms: op = ir_txf_ms; break;
1665 case nir_texop_txl: op = ir_txl; break;
1666 case nir_texop_txs: op = ir_txs; break;
1667 default:
1668 unreachable("unknown texture opcode");
1669 }
1670
1671 emit_texture(op, dest_type, coordinate, instr->coord_components,
1672 shadow_comparitor, lod, lod2, lod_components, sample_index,
1673 offset, offset_components, mcs, gather_component,
1674 is_cube_array, is_rect, sampler, fs_reg(sampler), texunit);
1675
1676 fs_reg dest = get_nir_dest(instr->dest);
1677 dest.type = this->result.type;
1678 unsigned num_components = nir_tex_instr_dest_size(instr);
1679 emit_percomp(MOV(dest, this->result), (1 << num_components) - 1);
1680 }
1681
1682 void
1683 fs_visitor::nir_emit_load_const(nir_load_const_instr *instr)
1684 {
1685 fs_reg dest = get_nir_dest(instr->dest);
1686 dest.type = BRW_REGISTER_TYPE_UD;
1687 if (instr->array_elems == 0) {
1688 for (unsigned i = 0; i < instr->num_components; i++) {
1689 emit(MOV(dest, fs_reg(instr->value.u[i])));
1690 dest.reg_offset++;
1691 }
1692 } else {
1693 for (unsigned i = 0; i < instr->array_elems; i++) {
1694 for (unsigned j = 0; j < instr->num_components; j++) {
1695 emit(MOV(dest, fs_reg(instr->array[i].u[j])));
1696 dest.reg_offset++;
1697 }
1698 }
1699 }
1700 }
1701
1702 void
1703 fs_visitor::nir_emit_jump(nir_jump_instr *instr)
1704 {
1705 switch (instr->type) {
1706 case nir_jump_break:
1707 emit(BRW_OPCODE_BREAK);
1708 break;
1709 case nir_jump_continue:
1710 emit(BRW_OPCODE_CONTINUE);
1711 break;
1712 case nir_jump_return:
1713 default:
1714 unreachable("unknown jump");
1715 }
1716 }