i965/backend_shader: Add a field to store the NIR shader
[mesa.git] / src / mesa / drivers / dri / i965 / brw_fs_nir.cpp
1 /*
2 * Copyright © 2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "glsl/ir.h"
25 #include "glsl/ir_optimization.h"
26 #include "glsl/nir/glsl_to_nir.h"
27 #include "main/shaderimage.h"
28 #include "program/prog_to_nir.h"
29 #include "brw_fs.h"
30 #include "brw_fs_surface_builder.h"
31 #include "brw_nir.h"
32 #include "brw_fs_surface_builder.h"
33
34 using namespace brw;
35 using namespace brw::surface_access;
36
37 void
38 fs_visitor::emit_nir_code()
39 {
40 /* emit the arrays used for inputs and outputs - load/store intrinsics will
41 * be converted to reads/writes of these arrays
42 */
43 nir_setup_inputs();
44 nir_setup_outputs();
45 nir_setup_uniforms();
46 nir_emit_system_values();
47
48 /* get the main function and emit it */
49 nir_foreach_overload(nir, overload) {
50 assert(strcmp(overload->function->name, "main") == 0);
51 assert(overload->impl);
52 nir_emit_impl(overload->impl);
53 }
54 }
55
56 void
57 fs_visitor::nir_setup_inputs()
58 {
59 nir_inputs = bld.vgrf(BRW_REGISTER_TYPE_F, nir->num_inputs);
60
61 foreach_list_typed(nir_variable, var, node, &nir->inputs) {
62 enum brw_reg_type type = brw_type_for_base_type(var->type);
63 fs_reg input = offset(nir_inputs, bld, var->data.driver_location);
64
65 fs_reg reg;
66 switch (stage) {
67 case MESA_SHADER_VERTEX: {
68 /* Our ATTR file is indexed by VERT_ATTRIB_*, which is the value
69 * stored in nir_variable::location.
70 *
71 * However, NIR's load_input intrinsics use a different index - an
72 * offset into a single contiguous array containing all inputs.
73 * This index corresponds to the nir_variable::driver_location field.
74 *
75 * So, we need to copy from fs_reg(ATTR, var->location) to
76 * offset(nir_inputs, var->data.driver_location).
77 */
78 const glsl_type *const t = var->type->without_array();
79 const unsigned components = t->components();
80 const unsigned cols = t->matrix_columns;
81 const unsigned elts = t->vector_elements;
82 unsigned array_length = var->type->is_array() ? var->type->length : 1;
83 for (unsigned i = 0; i < array_length; i++) {
84 for (unsigned j = 0; j < cols; j++) {
85 for (unsigned k = 0; k < elts; k++) {
86 bld.MOV(offset(retype(input, type), bld,
87 components * i + elts * j + k),
88 offset(fs_reg(ATTR, var->data.location + i, type),
89 bld, 4 * j + k));
90 }
91 }
92 }
93 break;
94 }
95 case MESA_SHADER_GEOMETRY:
96 case MESA_SHADER_COMPUTE:
97 case MESA_SHADER_TESS_CTRL:
98 case MESA_SHADER_TESS_EVAL:
99 unreachable("fs_visitor not used for these stages yet.");
100 break;
101 case MESA_SHADER_FRAGMENT:
102 if (var->data.location == VARYING_SLOT_POS) {
103 reg = *emit_fragcoord_interpolation(var->data.pixel_center_integer,
104 var->data.origin_upper_left);
105 emit_percomp(bld, fs_inst(BRW_OPCODE_MOV, bld.dispatch_width(),
106 input, reg), 0xF);
107 } else {
108 emit_general_interpolation(input, var->name, var->type,
109 (glsl_interp_qualifier) var->data.interpolation,
110 var->data.location, var->data.centroid,
111 var->data.sample);
112 }
113 break;
114 }
115 }
116 }
117
118 void
119 fs_visitor::nir_setup_outputs()
120 {
121 brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
122
123 nir_outputs = bld.vgrf(BRW_REGISTER_TYPE_F, nir->num_outputs);
124
125 foreach_list_typed(nir_variable, var, node, &nir->outputs) {
126 fs_reg reg = offset(nir_outputs, bld, var->data.driver_location);
127
128 int vector_elements =
129 var->type->is_array() ? var->type->fields.array->vector_elements
130 : var->type->vector_elements;
131
132 switch (stage) {
133 case MESA_SHADER_VERTEX:
134 for (unsigned int i = 0; i < ALIGN(type_size_scalar(var->type), 4) / 4; i++) {
135 int output = var->data.location + i;
136 this->outputs[output] = offset(reg, bld, 4 * i);
137 this->output_components[output] = vector_elements;
138 }
139 break;
140 case MESA_SHADER_FRAGMENT:
141 if (var->data.index > 0) {
142 assert(var->data.location == FRAG_RESULT_DATA0);
143 assert(var->data.index == 1);
144 this->dual_src_output = reg;
145 this->do_dual_src = true;
146 } else if (var->data.location == FRAG_RESULT_COLOR) {
147 /* Writing gl_FragColor outputs to all color regions. */
148 for (unsigned int i = 0; i < MAX2(key->nr_color_regions, 1); i++) {
149 this->outputs[i] = reg;
150 this->output_components[i] = 4;
151 }
152 } else if (var->data.location == FRAG_RESULT_DEPTH) {
153 this->frag_depth = reg;
154 } else if (var->data.location == FRAG_RESULT_SAMPLE_MASK) {
155 this->sample_mask = reg;
156 } else {
157 /* gl_FragData or a user-defined FS output */
158 assert(var->data.location >= FRAG_RESULT_DATA0 &&
159 var->data.location < FRAG_RESULT_DATA0+BRW_MAX_DRAW_BUFFERS);
160
161 /* General color output. */
162 for (unsigned int i = 0; i < MAX2(1, var->type->length); i++) {
163 int output = var->data.location - FRAG_RESULT_DATA0 + i;
164 this->outputs[output] = offset(reg, bld, vector_elements * i);
165 this->output_components[output] = vector_elements;
166 }
167 }
168 break;
169 default:
170 unreachable("unhandled shader stage");
171 }
172 }
173 }
174
175 void
176 fs_visitor::nir_setup_uniforms()
177 {
178 if (dispatch_width != 8)
179 return;
180
181 uniforms = nir->num_uniforms;
182
183 foreach_list_typed(nir_variable, var, node, &nir->uniforms) {
184 /* UBO's and atomics don't take up space in the uniform file */
185 if (var->interface_type != NULL || var->type->contains_atomic())
186 continue;
187
188 if (type_size_scalar(var->type) > 0)
189 param_size[var->data.driver_location] = type_size_scalar(var->type);
190 }
191 }
192
193 static bool
194 emit_system_values_block(nir_block *block, void *void_visitor)
195 {
196 fs_visitor *v = (fs_visitor *)void_visitor;
197 fs_reg *reg;
198
199 nir_foreach_instr(block, instr) {
200 if (instr->type != nir_instr_type_intrinsic)
201 continue;
202
203 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
204 switch (intrin->intrinsic) {
205 case nir_intrinsic_load_vertex_id:
206 unreachable("should be lowered by lower_vertex_id().");
207
208 case nir_intrinsic_load_vertex_id_zero_base:
209 assert(v->stage == MESA_SHADER_VERTEX);
210 reg = &v->nir_system_values[SYSTEM_VALUE_VERTEX_ID_ZERO_BASE];
211 if (reg->file == BAD_FILE)
212 *reg = *v->emit_vs_system_value(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE);
213 break;
214
215 case nir_intrinsic_load_base_vertex:
216 assert(v->stage == MESA_SHADER_VERTEX);
217 reg = &v->nir_system_values[SYSTEM_VALUE_BASE_VERTEX];
218 if (reg->file == BAD_FILE)
219 *reg = *v->emit_vs_system_value(SYSTEM_VALUE_BASE_VERTEX);
220 break;
221
222 case nir_intrinsic_load_instance_id:
223 assert(v->stage == MESA_SHADER_VERTEX);
224 reg = &v->nir_system_values[SYSTEM_VALUE_INSTANCE_ID];
225 if (reg->file == BAD_FILE)
226 *reg = *v->emit_vs_system_value(SYSTEM_VALUE_INSTANCE_ID);
227 break;
228
229 case nir_intrinsic_load_sample_pos:
230 assert(v->stage == MESA_SHADER_FRAGMENT);
231 reg = &v->nir_system_values[SYSTEM_VALUE_SAMPLE_POS];
232 if (reg->file == BAD_FILE)
233 *reg = *v->emit_samplepos_setup();
234 break;
235
236 case nir_intrinsic_load_sample_id:
237 assert(v->stage == MESA_SHADER_FRAGMENT);
238 reg = &v->nir_system_values[SYSTEM_VALUE_SAMPLE_ID];
239 if (reg->file == BAD_FILE)
240 *reg = *v->emit_sampleid_setup();
241 break;
242
243 case nir_intrinsic_load_sample_mask_in:
244 assert(v->stage == MESA_SHADER_FRAGMENT);
245 assert(v->devinfo->gen >= 7);
246 reg = &v->nir_system_values[SYSTEM_VALUE_SAMPLE_MASK_IN];
247 if (reg->file == BAD_FILE)
248 *reg = fs_reg(retype(brw_vec8_grf(v->payload.sample_mask_in_reg, 0),
249 BRW_REGISTER_TYPE_D));
250 break;
251
252 case nir_intrinsic_load_local_invocation_id:
253 assert(v->stage == MESA_SHADER_COMPUTE);
254 reg = &v->nir_system_values[SYSTEM_VALUE_LOCAL_INVOCATION_ID];
255 if (reg->file == BAD_FILE)
256 *reg = *v->emit_cs_local_invocation_id_setup();
257 break;
258
259 case nir_intrinsic_load_work_group_id:
260 assert(v->stage == MESA_SHADER_COMPUTE);
261 reg = &v->nir_system_values[SYSTEM_VALUE_WORK_GROUP_ID];
262 if (reg->file == BAD_FILE)
263 *reg = *v->emit_cs_work_group_id_setup();
264 break;
265
266 default:
267 break;
268 }
269 }
270
271 return true;
272 }
273
274 void
275 fs_visitor::nir_emit_system_values()
276 {
277 nir_system_values = ralloc_array(mem_ctx, fs_reg, SYSTEM_VALUE_MAX);
278 nir_foreach_overload(nir, overload) {
279 assert(strcmp(overload->function->name, "main") == 0);
280 assert(overload->impl);
281 nir_foreach_block(overload->impl, emit_system_values_block, this);
282 }
283 }
284
285 void
286 fs_visitor::nir_emit_impl(nir_function_impl *impl)
287 {
288 nir_locals = reralloc(mem_ctx, nir_locals, fs_reg, impl->reg_alloc);
289 foreach_list_typed(nir_register, reg, node, &impl->registers) {
290 unsigned array_elems =
291 reg->num_array_elems == 0 ? 1 : reg->num_array_elems;
292 unsigned size = array_elems * reg->num_components;
293 nir_locals[reg->index] = bld.vgrf(BRW_REGISTER_TYPE_F, size);
294 }
295
296 nir_ssa_values = reralloc(mem_ctx, nir_ssa_values, fs_reg,
297 impl->ssa_alloc);
298
299 nir_emit_cf_list(&impl->body);
300 }
301
302 void
303 fs_visitor::nir_emit_cf_list(exec_list *list)
304 {
305 exec_list_validate(list);
306 foreach_list_typed(nir_cf_node, node, node, list) {
307 switch (node->type) {
308 case nir_cf_node_if:
309 nir_emit_if(nir_cf_node_as_if(node));
310 break;
311
312 case nir_cf_node_loop:
313 nir_emit_loop(nir_cf_node_as_loop(node));
314 break;
315
316 case nir_cf_node_block:
317 nir_emit_block(nir_cf_node_as_block(node));
318 break;
319
320 default:
321 unreachable("Invalid CFG node block");
322 }
323 }
324 }
325
326 void
327 fs_visitor::nir_emit_if(nir_if *if_stmt)
328 {
329 /* first, put the condition into f0 */
330 fs_inst *inst = bld.MOV(bld.null_reg_d(),
331 retype(get_nir_src(if_stmt->condition),
332 BRW_REGISTER_TYPE_D));
333 inst->conditional_mod = BRW_CONDITIONAL_NZ;
334
335 bld.IF(BRW_PREDICATE_NORMAL);
336
337 nir_emit_cf_list(&if_stmt->then_list);
338
339 /* note: if the else is empty, dead CF elimination will remove it */
340 bld.emit(BRW_OPCODE_ELSE);
341
342 nir_emit_cf_list(&if_stmt->else_list);
343
344 bld.emit(BRW_OPCODE_ENDIF);
345 }
346
347 void
348 fs_visitor::nir_emit_loop(nir_loop *loop)
349 {
350 bld.emit(BRW_OPCODE_DO);
351
352 nir_emit_cf_list(&loop->body);
353
354 bld.emit(BRW_OPCODE_WHILE);
355 }
356
357 void
358 fs_visitor::nir_emit_block(nir_block *block)
359 {
360 nir_foreach_instr(block, instr) {
361 nir_emit_instr(instr);
362 }
363 }
364
365 void
366 fs_visitor::nir_emit_instr(nir_instr *instr)
367 {
368 const fs_builder abld = bld.annotate(NULL, instr);
369
370 switch (instr->type) {
371 case nir_instr_type_alu:
372 nir_emit_alu(abld, nir_instr_as_alu(instr));
373 break;
374
375 case nir_instr_type_intrinsic:
376 nir_emit_intrinsic(abld, nir_instr_as_intrinsic(instr));
377 break;
378
379 case nir_instr_type_tex:
380 nir_emit_texture(abld, nir_instr_as_tex(instr));
381 break;
382
383 case nir_instr_type_load_const:
384 nir_emit_load_const(abld, nir_instr_as_load_const(instr));
385 break;
386
387 case nir_instr_type_ssa_undef:
388 nir_emit_undef(abld, nir_instr_as_ssa_undef(instr));
389 break;
390
391 case nir_instr_type_jump:
392 nir_emit_jump(abld, nir_instr_as_jump(instr));
393 break;
394
395 default:
396 unreachable("unknown instruction type");
397 }
398 }
399
400 bool
401 fs_visitor::optimize_frontfacing_ternary(nir_alu_instr *instr,
402 const fs_reg &result)
403 {
404 if (!instr->src[0].src.is_ssa ||
405 instr->src[0].src.ssa->parent_instr->type != nir_instr_type_intrinsic)
406 return false;
407
408 nir_intrinsic_instr *src0 =
409 nir_instr_as_intrinsic(instr->src[0].src.ssa->parent_instr);
410
411 if (src0->intrinsic != nir_intrinsic_load_front_face)
412 return false;
413
414 nir_const_value *value1 = nir_src_as_const_value(instr->src[1].src);
415 if (!value1 || fabsf(value1->f[0]) != 1.0f)
416 return false;
417
418 nir_const_value *value2 = nir_src_as_const_value(instr->src[2].src);
419 if (!value2 || fabsf(value2->f[0]) != 1.0f)
420 return false;
421
422 fs_reg tmp = vgrf(glsl_type::int_type);
423
424 if (devinfo->gen >= 6) {
425 /* Bit 15 of g0.0 is 0 if the polygon is front facing. */
426 fs_reg g0 = fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_W));
427
428 /* For (gl_FrontFacing ? 1.0 : -1.0), emit:
429 *
430 * or(8) tmp.1<2>W g0.0<0,1,0>W 0x00003f80W
431 * and(8) dst<1>D tmp<8,8,1>D 0xbf800000D
432 *
433 * and negate g0.0<0,1,0>W for (gl_FrontFacing ? -1.0 : 1.0).
434 *
435 * This negation looks like it's safe in practice, because bits 0:4 will
436 * surely be TRIANGLES
437 */
438
439 if (value1->f[0] == -1.0f) {
440 g0.negate = true;
441 }
442
443 tmp.type = BRW_REGISTER_TYPE_W;
444 tmp.subreg_offset = 2;
445 tmp.stride = 2;
446
447 fs_inst *or_inst = bld.OR(tmp, g0, fs_reg(0x3f80));
448 or_inst->src[1].type = BRW_REGISTER_TYPE_UW;
449
450 tmp.type = BRW_REGISTER_TYPE_D;
451 tmp.subreg_offset = 0;
452 tmp.stride = 1;
453 } else {
454 /* Bit 31 of g1.6 is 0 if the polygon is front facing. */
455 fs_reg g1_6 = fs_reg(retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_D));
456
457 /* For (gl_FrontFacing ? 1.0 : -1.0), emit:
458 *
459 * or(8) tmp<1>D g1.6<0,1,0>D 0x3f800000D
460 * and(8) dst<1>D tmp<8,8,1>D 0xbf800000D
461 *
462 * and negate g1.6<0,1,0>D for (gl_FrontFacing ? -1.0 : 1.0).
463 *
464 * This negation looks like it's safe in practice, because bits 0:4 will
465 * surely be TRIANGLES
466 */
467
468 if (value1->f[0] == -1.0f) {
469 g1_6.negate = true;
470 }
471
472 bld.OR(tmp, g1_6, fs_reg(0x3f800000));
473 }
474 bld.AND(retype(result, BRW_REGISTER_TYPE_D), tmp, fs_reg(0xbf800000));
475
476 return true;
477 }
478
479 void
480 fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
481 {
482 struct brw_wm_prog_key *fs_key = (struct brw_wm_prog_key *) this->key;
483 fs_inst *inst;
484
485 fs_reg result = get_nir_dest(instr->dest.dest);
486 result.type = brw_type_for_nir_type(nir_op_infos[instr->op].output_type);
487
488 fs_reg op[4];
489 for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
490 op[i] = get_nir_src(instr->src[i].src);
491 op[i].type = brw_type_for_nir_type(nir_op_infos[instr->op].input_types[i]);
492 op[i].abs = instr->src[i].abs;
493 op[i].negate = instr->src[i].negate;
494 }
495
496 /* We get a bunch of mov's out of the from_ssa pass and they may still
497 * be vectorized. We'll handle them as a special-case. We'll also
498 * handle vecN here because it's basically the same thing.
499 */
500 switch (instr->op) {
501 case nir_op_imov:
502 case nir_op_fmov:
503 case nir_op_vec2:
504 case nir_op_vec3:
505 case nir_op_vec4: {
506 fs_reg temp = result;
507 bool need_extra_copy = false;
508 for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
509 if (!instr->src[i].src.is_ssa &&
510 instr->dest.dest.reg.reg == instr->src[i].src.reg.reg) {
511 need_extra_copy = true;
512 temp = bld.vgrf(result.type, 4);
513 break;
514 }
515 }
516
517 for (unsigned i = 0; i < 4; i++) {
518 if (!(instr->dest.write_mask & (1 << i)))
519 continue;
520
521 if (instr->op == nir_op_imov || instr->op == nir_op_fmov) {
522 inst = bld.MOV(offset(temp, bld, i),
523 offset(op[0], bld, instr->src[0].swizzle[i]));
524 } else {
525 inst = bld.MOV(offset(temp, bld, i),
526 offset(op[i], bld, instr->src[i].swizzle[0]));
527 }
528 inst->saturate = instr->dest.saturate;
529 }
530
531 /* In this case the source and destination registers were the same,
532 * so we need to insert an extra set of moves in order to deal with
533 * any swizzling.
534 */
535 if (need_extra_copy) {
536 for (unsigned i = 0; i < 4; i++) {
537 if (!(instr->dest.write_mask & (1 << i)))
538 continue;
539
540 bld.MOV(offset(result, bld, i), offset(temp, bld, i));
541 }
542 }
543 return;
544 }
545 default:
546 break;
547 }
548
549 /* At this point, we have dealt with any instruction that operates on
550 * more than a single channel. Therefore, we can just adjust the source
551 * and destination registers for that channel and emit the instruction.
552 */
553 unsigned channel = 0;
554 if (nir_op_infos[instr->op].output_size == 0) {
555 /* Since NIR is doing the scalarizing for us, we should only ever see
556 * vectorized operations with a single channel.
557 */
558 assert(_mesa_bitcount(instr->dest.write_mask) == 1);
559 channel = ffs(instr->dest.write_mask) - 1;
560
561 result = offset(result, bld, channel);
562 }
563
564 for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
565 assert(nir_op_infos[instr->op].input_sizes[i] < 2);
566 op[i] = offset(op[i], bld, instr->src[i].swizzle[channel]);
567 }
568
569 switch (instr->op) {
570 case nir_op_i2f:
571 case nir_op_u2f:
572 inst = bld.MOV(result, op[0]);
573 inst->saturate = instr->dest.saturate;
574 break;
575
576 case nir_op_f2i:
577 case nir_op_f2u:
578 bld.MOV(result, op[0]);
579 break;
580
581 case nir_op_fsign: {
582 /* AND(val, 0x80000000) gives the sign bit.
583 *
584 * Predicated OR ORs 1.0 (0x3f800000) with the sign bit if val is not
585 * zero.
586 */
587 bld.CMP(bld.null_reg_f(), op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ);
588
589 fs_reg result_int = retype(result, BRW_REGISTER_TYPE_UD);
590 op[0].type = BRW_REGISTER_TYPE_UD;
591 result.type = BRW_REGISTER_TYPE_UD;
592 bld.AND(result_int, op[0], fs_reg(0x80000000u));
593
594 inst = bld.OR(result_int, result_int, fs_reg(0x3f800000u));
595 inst->predicate = BRW_PREDICATE_NORMAL;
596 if (instr->dest.saturate) {
597 inst = bld.MOV(result, result);
598 inst->saturate = true;
599 }
600 break;
601 }
602
603 case nir_op_isign:
604 /* ASR(val, 31) -> negative val generates 0xffffffff (signed -1).
605 * -> non-negative val generates 0x00000000.
606 * Predicated OR sets 1 if val is positive.
607 */
608 bld.CMP(bld.null_reg_d(), op[0], fs_reg(0), BRW_CONDITIONAL_G);
609 bld.ASR(result, op[0], fs_reg(31));
610 inst = bld.OR(result, result, fs_reg(1));
611 inst->predicate = BRW_PREDICATE_NORMAL;
612 break;
613
614 case nir_op_frcp:
615 inst = bld.emit(SHADER_OPCODE_RCP, result, op[0]);
616 inst->saturate = instr->dest.saturate;
617 break;
618
619 case nir_op_fexp2:
620 inst = bld.emit(SHADER_OPCODE_EXP2, result, op[0]);
621 inst->saturate = instr->dest.saturate;
622 break;
623
624 case nir_op_flog2:
625 inst = bld.emit(SHADER_OPCODE_LOG2, result, op[0]);
626 inst->saturate = instr->dest.saturate;
627 break;
628
629 case nir_op_fsin:
630 inst = bld.emit(SHADER_OPCODE_SIN, result, op[0]);
631 inst->saturate = instr->dest.saturate;
632 break;
633
634 case nir_op_fcos:
635 inst = bld.emit(SHADER_OPCODE_COS, result, op[0]);
636 inst->saturate = instr->dest.saturate;
637 break;
638
639 case nir_op_fddx:
640 if (fs_key->high_quality_derivatives) {
641 inst = bld.emit(FS_OPCODE_DDX_FINE, result, op[0]);
642 } else {
643 inst = bld.emit(FS_OPCODE_DDX_COARSE, result, op[0]);
644 }
645 inst->saturate = instr->dest.saturate;
646 break;
647 case nir_op_fddx_fine:
648 inst = bld.emit(FS_OPCODE_DDX_FINE, result, op[0]);
649 inst->saturate = instr->dest.saturate;
650 break;
651 case nir_op_fddx_coarse:
652 inst = bld.emit(FS_OPCODE_DDX_COARSE, result, op[0]);
653 inst->saturate = instr->dest.saturate;
654 break;
655 case nir_op_fddy:
656 if (fs_key->high_quality_derivatives) {
657 inst = bld.emit(FS_OPCODE_DDY_FINE, result, op[0],
658 fs_reg(fs_key->render_to_fbo));
659 } else {
660 inst = bld.emit(FS_OPCODE_DDY_COARSE, result, op[0],
661 fs_reg(fs_key->render_to_fbo));
662 }
663 inst->saturate = instr->dest.saturate;
664 break;
665 case nir_op_fddy_fine:
666 inst = bld.emit(FS_OPCODE_DDY_FINE, result, op[0],
667 fs_reg(fs_key->render_to_fbo));
668 inst->saturate = instr->dest.saturate;
669 break;
670 case nir_op_fddy_coarse:
671 inst = bld.emit(FS_OPCODE_DDY_COARSE, result, op[0],
672 fs_reg(fs_key->render_to_fbo));
673 inst->saturate = instr->dest.saturate;
674 break;
675
676 case nir_op_fadd:
677 case nir_op_iadd:
678 inst = bld.ADD(result, op[0], op[1]);
679 inst->saturate = instr->dest.saturate;
680 break;
681
682 case nir_op_fmul:
683 inst = bld.MUL(result, op[0], op[1]);
684 inst->saturate = instr->dest.saturate;
685 break;
686
687 case nir_op_imul:
688 bld.MUL(result, op[0], op[1]);
689 break;
690
691 case nir_op_imul_high:
692 case nir_op_umul_high:
693 bld.emit(SHADER_OPCODE_MULH, result, op[0], op[1]);
694 break;
695
696 case nir_op_idiv:
697 case nir_op_udiv:
698 bld.emit(SHADER_OPCODE_INT_QUOTIENT, result, op[0], op[1]);
699 break;
700
701 case nir_op_uadd_carry:
702 unreachable("Should have been lowered by carry_to_arith().");
703
704 case nir_op_usub_borrow:
705 unreachable("Should have been lowered by borrow_to_arith().");
706
707 case nir_op_umod:
708 bld.emit(SHADER_OPCODE_INT_REMAINDER, result, op[0], op[1]);
709 break;
710
711 case nir_op_flt:
712 case nir_op_ilt:
713 case nir_op_ult:
714 bld.CMP(result, op[0], op[1], BRW_CONDITIONAL_L);
715 break;
716
717 case nir_op_fge:
718 case nir_op_ige:
719 case nir_op_uge:
720 bld.CMP(result, op[0], op[1], BRW_CONDITIONAL_GE);
721 break;
722
723 case nir_op_feq:
724 case nir_op_ieq:
725 bld.CMP(result, op[0], op[1], BRW_CONDITIONAL_Z);
726 break;
727
728 case nir_op_fne:
729 case nir_op_ine:
730 bld.CMP(result, op[0], op[1], BRW_CONDITIONAL_NZ);
731 break;
732
733 case nir_op_inot:
734 if (devinfo->gen >= 8) {
735 op[0] = resolve_source_modifiers(op[0]);
736 }
737 bld.NOT(result, op[0]);
738 break;
739 case nir_op_ixor:
740 if (devinfo->gen >= 8) {
741 op[0] = resolve_source_modifiers(op[0]);
742 op[1] = resolve_source_modifiers(op[1]);
743 }
744 bld.XOR(result, op[0], op[1]);
745 break;
746 case nir_op_ior:
747 if (devinfo->gen >= 8) {
748 op[0] = resolve_source_modifiers(op[0]);
749 op[1] = resolve_source_modifiers(op[1]);
750 }
751 bld.OR(result, op[0], op[1]);
752 break;
753 case nir_op_iand:
754 if (devinfo->gen >= 8) {
755 op[0] = resolve_source_modifiers(op[0]);
756 op[1] = resolve_source_modifiers(op[1]);
757 }
758 bld.AND(result, op[0], op[1]);
759 break;
760
761 case nir_op_fdot2:
762 case nir_op_fdot3:
763 case nir_op_fdot4:
764 case nir_op_bany2:
765 case nir_op_bany3:
766 case nir_op_bany4:
767 case nir_op_ball2:
768 case nir_op_ball3:
769 case nir_op_ball4:
770 case nir_op_ball_fequal2:
771 case nir_op_ball_iequal2:
772 case nir_op_ball_fequal3:
773 case nir_op_ball_iequal3:
774 case nir_op_ball_fequal4:
775 case nir_op_ball_iequal4:
776 case nir_op_bany_fnequal2:
777 case nir_op_bany_inequal2:
778 case nir_op_bany_fnequal3:
779 case nir_op_bany_inequal3:
780 case nir_op_bany_fnequal4:
781 case nir_op_bany_inequal4:
782 unreachable("Lowered by nir_lower_alu_reductions");
783
784 case nir_op_fnoise1_1:
785 case nir_op_fnoise1_2:
786 case nir_op_fnoise1_3:
787 case nir_op_fnoise1_4:
788 case nir_op_fnoise2_1:
789 case nir_op_fnoise2_2:
790 case nir_op_fnoise2_3:
791 case nir_op_fnoise2_4:
792 case nir_op_fnoise3_1:
793 case nir_op_fnoise3_2:
794 case nir_op_fnoise3_3:
795 case nir_op_fnoise3_4:
796 case nir_op_fnoise4_1:
797 case nir_op_fnoise4_2:
798 case nir_op_fnoise4_3:
799 case nir_op_fnoise4_4:
800 unreachable("not reached: should be handled by lower_noise");
801
802 case nir_op_ldexp:
803 unreachable("not reached: should be handled by ldexp_to_arith()");
804
805 case nir_op_fsqrt:
806 inst = bld.emit(SHADER_OPCODE_SQRT, result, op[0]);
807 inst->saturate = instr->dest.saturate;
808 break;
809
810 case nir_op_frsq:
811 inst = bld.emit(SHADER_OPCODE_RSQ, result, op[0]);
812 inst->saturate = instr->dest.saturate;
813 break;
814
815 case nir_op_b2i:
816 case nir_op_b2f:
817 bld.MOV(result, negate(op[0]));
818 break;
819
820 case nir_op_f2b:
821 bld.CMP(result, op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ);
822 break;
823 case nir_op_i2b:
824 bld.CMP(result, op[0], fs_reg(0), BRW_CONDITIONAL_NZ);
825 break;
826
827 case nir_op_ftrunc:
828 inst = bld.RNDZ(result, op[0]);
829 inst->saturate = instr->dest.saturate;
830 break;
831
832 case nir_op_fceil: {
833 op[0].negate = !op[0].negate;
834 fs_reg temp = vgrf(glsl_type::float_type);
835 bld.RNDD(temp, op[0]);
836 temp.negate = true;
837 inst = bld.MOV(result, temp);
838 inst->saturate = instr->dest.saturate;
839 break;
840 }
841 case nir_op_ffloor:
842 inst = bld.RNDD(result, op[0]);
843 inst->saturate = instr->dest.saturate;
844 break;
845 case nir_op_ffract:
846 inst = bld.FRC(result, op[0]);
847 inst->saturate = instr->dest.saturate;
848 break;
849 case nir_op_fround_even:
850 inst = bld.RNDE(result, op[0]);
851 inst->saturate = instr->dest.saturate;
852 break;
853
854 case nir_op_fmin:
855 case nir_op_imin:
856 case nir_op_umin:
857 if (devinfo->gen >= 6) {
858 inst = bld.emit(BRW_OPCODE_SEL, result, op[0], op[1]);
859 inst->conditional_mod = BRW_CONDITIONAL_L;
860 } else {
861 bld.CMP(bld.null_reg_d(), op[0], op[1], BRW_CONDITIONAL_L);
862 inst = bld.SEL(result, op[0], op[1]);
863 inst->predicate = BRW_PREDICATE_NORMAL;
864 }
865 inst->saturate = instr->dest.saturate;
866 break;
867
868 case nir_op_fmax:
869 case nir_op_imax:
870 case nir_op_umax:
871 if (devinfo->gen >= 6) {
872 inst = bld.emit(BRW_OPCODE_SEL, result, op[0], op[1]);
873 inst->conditional_mod = BRW_CONDITIONAL_GE;
874 } else {
875 bld.CMP(bld.null_reg_d(), op[0], op[1], BRW_CONDITIONAL_GE);
876 inst = bld.SEL(result, op[0], op[1]);
877 inst->predicate = BRW_PREDICATE_NORMAL;
878 }
879 inst->saturate = instr->dest.saturate;
880 break;
881
882 case nir_op_pack_snorm_2x16:
883 case nir_op_pack_snorm_4x8:
884 case nir_op_pack_unorm_2x16:
885 case nir_op_pack_unorm_4x8:
886 case nir_op_unpack_snorm_2x16:
887 case nir_op_unpack_snorm_4x8:
888 case nir_op_unpack_unorm_2x16:
889 case nir_op_unpack_unorm_4x8:
890 case nir_op_unpack_half_2x16:
891 case nir_op_pack_half_2x16:
892 unreachable("not reached: should be handled by lower_packing_builtins");
893
894 case nir_op_unpack_half_2x16_split_x:
895 inst = bld.emit(FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X, result, op[0]);
896 inst->saturate = instr->dest.saturate;
897 break;
898 case nir_op_unpack_half_2x16_split_y:
899 inst = bld.emit(FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y, result, op[0]);
900 inst->saturate = instr->dest.saturate;
901 break;
902
903 case nir_op_fpow:
904 inst = bld.emit(SHADER_OPCODE_POW, result, op[0], op[1]);
905 inst->saturate = instr->dest.saturate;
906 break;
907
908 case nir_op_bitfield_reverse:
909 bld.BFREV(result, op[0]);
910 break;
911
912 case nir_op_bit_count:
913 bld.CBIT(result, op[0]);
914 break;
915
916 case nir_op_ufind_msb:
917 case nir_op_ifind_msb: {
918 bld.FBH(retype(result, BRW_REGISTER_TYPE_UD), op[0]);
919
920 /* FBH counts from the MSB side, while GLSL's findMSB() wants the count
921 * from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then
922 * subtract the result from 31 to convert the MSB count into an LSB count.
923 */
924
925 bld.CMP(bld.null_reg_d(), result, fs_reg(-1), BRW_CONDITIONAL_NZ);
926 fs_reg neg_result(result);
927 neg_result.negate = true;
928 inst = bld.ADD(result, neg_result, fs_reg(31));
929 inst->predicate = BRW_PREDICATE_NORMAL;
930 break;
931 }
932
933 case nir_op_find_lsb:
934 bld.FBL(result, op[0]);
935 break;
936
937 case nir_op_ubitfield_extract:
938 case nir_op_ibitfield_extract:
939 bld.BFE(result, op[2], op[1], op[0]);
940 break;
941 case nir_op_bfm:
942 bld.BFI1(result, op[0], op[1]);
943 break;
944 case nir_op_bfi:
945 bld.BFI2(result, op[0], op[1], op[2]);
946 break;
947
948 case nir_op_bitfield_insert:
949 unreachable("not reached: should be handled by "
950 "lower_instructions::bitfield_insert_to_bfm_bfi");
951
952 case nir_op_ishl:
953 bld.SHL(result, op[0], op[1]);
954 break;
955 case nir_op_ishr:
956 bld.ASR(result, op[0], op[1]);
957 break;
958 case nir_op_ushr:
959 bld.SHR(result, op[0], op[1]);
960 break;
961
962 case nir_op_pack_half_2x16_split:
963 bld.emit(FS_OPCODE_PACK_HALF_2x16_SPLIT, result, op[0], op[1]);
964 break;
965
966 case nir_op_ffma:
967 inst = bld.MAD(result, op[2], op[1], op[0]);
968 inst->saturate = instr->dest.saturate;
969 break;
970
971 case nir_op_flrp:
972 inst = bld.LRP(result, op[0], op[1], op[2]);
973 inst->saturate = instr->dest.saturate;
974 break;
975
976 case nir_op_bcsel:
977 if (optimize_frontfacing_ternary(instr, result))
978 return;
979
980 bld.CMP(bld.null_reg_d(), op[0], fs_reg(0), BRW_CONDITIONAL_NZ);
981 inst = bld.SEL(result, op[1], op[2]);
982 inst->predicate = BRW_PREDICATE_NORMAL;
983 break;
984
985 default:
986 unreachable("unhandled instruction");
987 }
988
989 /* If we need to do a boolean resolve, replace the result with -(x & 1)
990 * to sign extend the low bit to 0/~0
991 */
992 if (devinfo->gen <= 5 &&
993 (instr->instr.pass_flags & BRW_NIR_BOOLEAN_MASK) == BRW_NIR_BOOLEAN_NEEDS_RESOLVE) {
994 fs_reg masked = vgrf(glsl_type::int_type);
995 bld.AND(masked, result, fs_reg(1));
996 masked.negate = true;
997 bld.MOV(retype(result, BRW_REGISTER_TYPE_D), masked);
998 }
999 }
1000
1001 void
1002 fs_visitor::nir_emit_load_const(const fs_builder &bld,
1003 nir_load_const_instr *instr)
1004 {
1005 fs_reg reg = bld.vgrf(BRW_REGISTER_TYPE_D, instr->def.num_components);
1006
1007 for (unsigned i = 0; i < instr->def.num_components; i++)
1008 bld.MOV(offset(reg, bld, i), fs_reg(instr->value.i[i]));
1009
1010 nir_ssa_values[instr->def.index] = reg;
1011 }
1012
1013 void
1014 fs_visitor::nir_emit_undef(const fs_builder &bld, nir_ssa_undef_instr *instr)
1015 {
1016 nir_ssa_values[instr->def.index] = bld.vgrf(BRW_REGISTER_TYPE_D,
1017 instr->def.num_components);
1018 }
1019
1020 static fs_reg
1021 fs_reg_for_nir_reg(fs_visitor *v, nir_register *nir_reg,
1022 unsigned base_offset, nir_src *indirect)
1023 {
1024 fs_reg reg;
1025
1026 assert(!nir_reg->is_global);
1027
1028 reg = v->nir_locals[nir_reg->index];
1029
1030 reg = offset(reg, v->bld, base_offset * nir_reg->num_components);
1031 if (indirect) {
1032 int multiplier = nir_reg->num_components * (v->dispatch_width / 8);
1033
1034 reg.reladdr = new(v->mem_ctx) fs_reg(v->vgrf(glsl_type::int_type));
1035 v->bld.MUL(*reg.reladdr, v->get_nir_src(*indirect),
1036 fs_reg(multiplier));
1037 }
1038
1039 return reg;
1040 }
1041
1042 fs_reg
1043 fs_visitor::get_nir_src(nir_src src)
1044 {
1045 fs_reg reg;
1046 if (src.is_ssa) {
1047 reg = nir_ssa_values[src.ssa->index];
1048 } else {
1049 reg = fs_reg_for_nir_reg(this, src.reg.reg, src.reg.base_offset,
1050 src.reg.indirect);
1051 }
1052
1053 /* to avoid floating-point denorm flushing problems, set the type by
1054 * default to D - instructions that need floating point semantics will set
1055 * this to F if they need to
1056 */
1057 return retype(reg, BRW_REGISTER_TYPE_D);
1058 }
1059
1060 fs_reg
1061 fs_visitor::get_nir_dest(nir_dest dest)
1062 {
1063 if (dest.is_ssa) {
1064 nir_ssa_values[dest.ssa.index] = bld.vgrf(BRW_REGISTER_TYPE_F,
1065 dest.ssa.num_components);
1066 return nir_ssa_values[dest.ssa.index];
1067 }
1068
1069 return fs_reg_for_nir_reg(this, dest.reg.reg, dest.reg.base_offset,
1070 dest.reg.indirect);
1071 }
1072
1073 fs_reg
1074 fs_visitor::get_nir_image_deref(const nir_deref_var *deref)
1075 {
1076 fs_reg image(UNIFORM, deref->var->data.driver_location,
1077 BRW_REGISTER_TYPE_UD);
1078
1079 if (deref->deref.child) {
1080 const nir_deref_array *deref_array =
1081 nir_deref_as_array(deref->deref.child);
1082 assert(deref->deref.child->deref_type == nir_deref_type_array &&
1083 deref_array->deref.child == NULL);
1084 const unsigned size = glsl_get_length(deref->var->type);
1085 const unsigned base = MIN2(deref_array->base_offset, size - 1);
1086
1087 image = offset(image, bld, base * BRW_IMAGE_PARAM_SIZE);
1088
1089 if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
1090 fs_reg *tmp = new(mem_ctx) fs_reg(vgrf(glsl_type::int_type));
1091
1092 if (devinfo->gen == 7 && !devinfo->is_haswell) {
1093 /* IVB hangs when trying to access an invalid surface index with
1094 * the dataport. According to the spec "if the index used to
1095 * select an individual element is negative or greater than or
1096 * equal to the size of the array, the results of the operation
1097 * are undefined but may not lead to termination" -- which is one
1098 * of the possible outcomes of the hang. Clamp the index to
1099 * prevent access outside of the array bounds.
1100 */
1101 bld.emit_minmax(*tmp, retype(get_nir_src(deref_array->indirect),
1102 BRW_REGISTER_TYPE_UD),
1103 fs_reg(size - base - 1), BRW_CONDITIONAL_L);
1104 } else {
1105 bld.MOV(*tmp, get_nir_src(deref_array->indirect));
1106 }
1107
1108 bld.MUL(*tmp, *tmp, fs_reg(BRW_IMAGE_PARAM_SIZE));
1109 image.reladdr = tmp;
1110 }
1111 }
1112
1113 return image;
1114 }
1115
1116 void
1117 fs_visitor::emit_percomp(const fs_builder &bld, const fs_inst &inst,
1118 unsigned wr_mask)
1119 {
1120 for (unsigned i = 0; i < 4; i++) {
1121 if (!((wr_mask >> i) & 1))
1122 continue;
1123
1124 fs_inst *new_inst = new(mem_ctx) fs_inst(inst);
1125 new_inst->dst = offset(new_inst->dst, bld, i);
1126 for (unsigned j = 0; j < new_inst->sources; j++)
1127 if (new_inst->src[j].file == GRF)
1128 new_inst->src[j] = offset(new_inst->src[j], bld, i);
1129
1130 bld.emit(new_inst);
1131 }
1132 }
1133
1134 /**
1135 * Get the matching channel register datatype for an image intrinsic of the
1136 * specified GLSL image type.
1137 */
1138 static brw_reg_type
1139 get_image_base_type(const glsl_type *type)
1140 {
1141 switch ((glsl_base_type)type->sampler_type) {
1142 case GLSL_TYPE_UINT:
1143 return BRW_REGISTER_TYPE_UD;
1144 case GLSL_TYPE_INT:
1145 return BRW_REGISTER_TYPE_D;
1146 case GLSL_TYPE_FLOAT:
1147 return BRW_REGISTER_TYPE_F;
1148 default:
1149 unreachable("Not reached.");
1150 }
1151 }
1152
1153 /**
1154 * Get the appropriate atomic op for an image atomic intrinsic.
1155 */
1156 static unsigned
1157 get_image_atomic_op(nir_intrinsic_op op, const glsl_type *type)
1158 {
1159 switch (op) {
1160 case nir_intrinsic_image_atomic_add:
1161 return BRW_AOP_ADD;
1162 case nir_intrinsic_image_atomic_min:
1163 return (get_image_base_type(type) == BRW_REGISTER_TYPE_D ?
1164 BRW_AOP_IMIN : BRW_AOP_UMIN);
1165 case nir_intrinsic_image_atomic_max:
1166 return (get_image_base_type(type) == BRW_REGISTER_TYPE_D ?
1167 BRW_AOP_IMAX : BRW_AOP_UMAX);
1168 case nir_intrinsic_image_atomic_and:
1169 return BRW_AOP_AND;
1170 case nir_intrinsic_image_atomic_or:
1171 return BRW_AOP_OR;
1172 case nir_intrinsic_image_atomic_xor:
1173 return BRW_AOP_XOR;
1174 case nir_intrinsic_image_atomic_exchange:
1175 return BRW_AOP_MOV;
1176 case nir_intrinsic_image_atomic_comp_swap:
1177 return BRW_AOP_CMPWR;
1178 default:
1179 unreachable("Not reachable.");
1180 }
1181 }
1182
1183 void
1184 fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr)
1185 {
1186 fs_reg dest;
1187 if (nir_intrinsic_infos[instr->intrinsic].has_dest)
1188 dest = get_nir_dest(instr->dest);
1189
1190 bool has_indirect = false;
1191
1192 switch (instr->intrinsic) {
1193 case nir_intrinsic_discard:
1194 case nir_intrinsic_discard_if: {
1195 /* We track our discarded pixels in f0.1. By predicating on it, we can
1196 * update just the flag bits that aren't yet discarded. If there's no
1197 * condition, we emit a CMP of g0 != g0, so all currently executing
1198 * channels will get turned off.
1199 */
1200 fs_inst *cmp;
1201 if (instr->intrinsic == nir_intrinsic_discard_if) {
1202 cmp = bld.CMP(bld.null_reg_f(), get_nir_src(instr->src[0]),
1203 fs_reg(0), BRW_CONDITIONAL_Z);
1204 } else {
1205 fs_reg some_reg = fs_reg(retype(brw_vec8_grf(0, 0),
1206 BRW_REGISTER_TYPE_UW));
1207 cmp = bld.CMP(bld.null_reg_f(), some_reg, some_reg, BRW_CONDITIONAL_NZ);
1208 }
1209 cmp->predicate = BRW_PREDICATE_NORMAL;
1210 cmp->flag_subreg = 1;
1211
1212 if (devinfo->gen >= 6) {
1213 emit_discard_jump();
1214 }
1215 break;
1216 }
1217
1218 case nir_intrinsic_atomic_counter_inc:
1219 case nir_intrinsic_atomic_counter_dec:
1220 case nir_intrinsic_atomic_counter_read: {
1221 using namespace surface_access;
1222
1223 /* Get the arguments of the atomic intrinsic. */
1224 const fs_reg offset = get_nir_src(instr->src[0]);
1225 const unsigned surface = (stage_prog_data->binding_table.abo_start +
1226 instr->const_index[0]);
1227 fs_reg tmp;
1228
1229 /* Emit a surface read or atomic op. */
1230 switch (instr->intrinsic) {
1231 case nir_intrinsic_atomic_counter_read:
1232 tmp = emit_untyped_read(bld, fs_reg(surface), offset, 1, 1);
1233 break;
1234
1235 case nir_intrinsic_atomic_counter_inc:
1236 tmp = emit_untyped_atomic(bld, fs_reg(surface), offset, fs_reg(),
1237 fs_reg(), 1, 1, BRW_AOP_INC);
1238 break;
1239
1240 case nir_intrinsic_atomic_counter_dec:
1241 tmp = emit_untyped_atomic(bld, fs_reg(surface), offset, fs_reg(),
1242 fs_reg(), 1, 1, BRW_AOP_PREDEC);
1243 break;
1244
1245 default:
1246 unreachable("Unreachable");
1247 }
1248
1249 /* Assign the result. */
1250 bld.MOV(retype(dest, BRW_REGISTER_TYPE_UD), tmp);
1251
1252 /* Mark the surface as used. */
1253 brw_mark_surface_used(stage_prog_data, surface);
1254 break;
1255 }
1256
1257 case nir_intrinsic_image_load:
1258 case nir_intrinsic_image_store:
1259 case nir_intrinsic_image_atomic_add:
1260 case nir_intrinsic_image_atomic_min:
1261 case nir_intrinsic_image_atomic_max:
1262 case nir_intrinsic_image_atomic_and:
1263 case nir_intrinsic_image_atomic_or:
1264 case nir_intrinsic_image_atomic_xor:
1265 case nir_intrinsic_image_atomic_exchange:
1266 case nir_intrinsic_image_atomic_comp_swap: {
1267 using namespace image_access;
1268
1269 /* Get the referenced image variable and type. */
1270 const nir_variable *var = instr->variables[0]->var;
1271 const glsl_type *type = var->type->without_array();
1272 const brw_reg_type base_type = get_image_base_type(type);
1273
1274 /* Get some metadata from the image intrinsic. */
1275 const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic];
1276 const unsigned arr_dims = type->sampler_array ? 1 : 0;
1277 const unsigned surf_dims = type->coordinate_components() - arr_dims;
1278 const mesa_format format =
1279 (var->data.image.write_only ? MESA_FORMAT_NONE :
1280 _mesa_get_shader_image_format(var->data.image.format));
1281
1282 /* Get the arguments of the image intrinsic. */
1283 const fs_reg image = get_nir_image_deref(instr->variables[0]);
1284 const fs_reg addr = retype(get_nir_src(instr->src[0]),
1285 BRW_REGISTER_TYPE_UD);
1286 const fs_reg src0 = (info->num_srcs >= 3 ?
1287 retype(get_nir_src(instr->src[2]), base_type) :
1288 fs_reg());
1289 const fs_reg src1 = (info->num_srcs >= 4 ?
1290 retype(get_nir_src(instr->src[3]), base_type) :
1291 fs_reg());
1292 fs_reg tmp;
1293
1294 /* Emit an image load, store or atomic op. */
1295 if (instr->intrinsic == nir_intrinsic_image_load)
1296 tmp = emit_image_load(bld, image, addr, surf_dims, arr_dims, format);
1297
1298 else if (instr->intrinsic == nir_intrinsic_image_store)
1299 emit_image_store(bld, image, addr, src0, surf_dims, arr_dims, format);
1300
1301 else
1302 tmp = emit_image_atomic(bld, image, addr, src0, src1,
1303 surf_dims, arr_dims, info->dest_components,
1304 get_image_atomic_op(instr->intrinsic, type));
1305
1306 /* Assign the result. */
1307 for (unsigned c = 0; c < info->dest_components; ++c)
1308 bld.MOV(offset(retype(dest, base_type), bld, c),
1309 offset(tmp, bld, c));
1310 break;
1311 }
1312
1313 case nir_intrinsic_memory_barrier: {
1314 const fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD, 16 / dispatch_width);
1315 bld.emit(SHADER_OPCODE_MEMORY_FENCE, tmp)
1316 ->regs_written = 2;
1317 break;
1318 }
1319
1320 case nir_intrinsic_image_size: {
1321 /* Get the referenced image variable and type. */
1322 const nir_variable *var = instr->variables[0]->var;
1323 const glsl_type *type = var->type->without_array();
1324
1325 /* Get the size of the image. */
1326 const fs_reg image = get_nir_image_deref(instr->variables[0]);
1327 const fs_reg size = offset(image, bld, BRW_IMAGE_PARAM_SIZE_OFFSET);
1328
1329 /* For 1DArray image types, the array index is stored in the Z component.
1330 * Fix this by swizzling the Z component to the Y component.
1331 */
1332 const bool is_1d_array_image =
1333 type->sampler_dimensionality == GLSL_SAMPLER_DIM_1D &&
1334 type->sampler_array;
1335
1336 /* For CubeArray images, we should count the number of cubes instead
1337 * of the number of faces. Fix it by dividing the (Z component) by 6.
1338 */
1339 const bool is_cube_array_image =
1340 type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE &&
1341 type->sampler_array;
1342
1343 /* Copy all the components. */
1344 const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic];
1345 for (unsigned c = 0; c < info->dest_components; ++c) {
1346 if ((int)c >= type->coordinate_components()) {
1347 bld.MOV(offset(retype(dest, BRW_REGISTER_TYPE_D), bld, c),
1348 fs_reg(1));
1349 } else if (c == 1 && is_1d_array_image) {
1350 bld.MOV(offset(retype(dest, BRW_REGISTER_TYPE_D), bld, c),
1351 offset(size, bld, 2));
1352 } else if (c == 2 && is_cube_array_image) {
1353 bld.emit(SHADER_OPCODE_INT_QUOTIENT,
1354 offset(retype(dest, BRW_REGISTER_TYPE_D), bld, c),
1355 offset(size, bld, c), fs_reg(6));
1356 } else {
1357 bld.MOV(offset(retype(dest, BRW_REGISTER_TYPE_D), bld, c),
1358 offset(size, bld, c));
1359 }
1360 }
1361
1362 break;
1363 }
1364
1365 case nir_intrinsic_image_samples:
1366 /* The driver does not support multi-sampled images. */
1367 bld.MOV(retype(dest, BRW_REGISTER_TYPE_D), fs_reg(1));
1368 break;
1369
1370 case nir_intrinsic_load_front_face:
1371 bld.MOV(retype(dest, BRW_REGISTER_TYPE_D),
1372 *emit_frontfacing_interpolation());
1373 break;
1374
1375 case nir_intrinsic_load_vertex_id:
1376 unreachable("should be lowered by lower_vertex_id()");
1377
1378 case nir_intrinsic_load_vertex_id_zero_base:
1379 case nir_intrinsic_load_base_vertex:
1380 case nir_intrinsic_load_instance_id:
1381 case nir_intrinsic_load_sample_mask_in:
1382 case nir_intrinsic_load_sample_id: {
1383 gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic);
1384 fs_reg val = nir_system_values[sv];
1385 assert(val.file != BAD_FILE);
1386 dest.type = val.type;
1387 bld.MOV(dest, val);
1388 break;
1389 }
1390
1391 case nir_intrinsic_load_sample_pos: {
1392 fs_reg sample_pos = nir_system_values[SYSTEM_VALUE_SAMPLE_POS];
1393 assert(sample_pos.file != BAD_FILE);
1394 dest.type = sample_pos.type;
1395 bld.MOV(dest, sample_pos);
1396 bld.MOV(offset(dest, bld, 1), offset(sample_pos, bld, 1));
1397 break;
1398 }
1399
1400 case nir_intrinsic_load_uniform_indirect:
1401 has_indirect = true;
1402 /* fallthrough */
1403 case nir_intrinsic_load_uniform: {
1404 fs_reg uniform_reg(UNIFORM, instr->const_index[0]);
1405 uniform_reg.reg_offset = instr->const_index[1];
1406
1407 for (unsigned j = 0; j < instr->num_components; j++) {
1408 fs_reg src = offset(retype(uniform_reg, dest.type), bld, j);
1409 if (has_indirect)
1410 src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0]));
1411
1412 bld.MOV(dest, src);
1413 dest = offset(dest, bld, 1);
1414 }
1415 break;
1416 }
1417
1418 case nir_intrinsic_load_ubo_indirect:
1419 has_indirect = true;
1420 /* fallthrough */
1421 case nir_intrinsic_load_ubo: {
1422 nir_const_value *const_index = nir_src_as_const_value(instr->src[0]);
1423 fs_reg surf_index;
1424
1425 if (const_index) {
1426 surf_index = fs_reg(stage_prog_data->binding_table.ubo_start +
1427 const_index->u[0]);
1428 } else {
1429 /* The block index is not a constant. Evaluate the index expression
1430 * per-channel and add the base UBO index; we have to select a value
1431 * from any live channel.
1432 */
1433 surf_index = vgrf(glsl_type::uint_type);
1434 bld.ADD(surf_index, get_nir_src(instr->src[0]),
1435 fs_reg(stage_prog_data->binding_table.ubo_start));
1436 surf_index = bld.emit_uniformize(surf_index);
1437
1438 /* Assume this may touch any UBO. It would be nice to provide
1439 * a tighter bound, but the array information is already lowered away.
1440 */
1441 brw_mark_surface_used(prog_data,
1442 stage_prog_data->binding_table.ubo_start +
1443 shader_prog->NumBufferInterfaceBlocks - 1);
1444 }
1445
1446 if (has_indirect) {
1447 /* Turn the byte offset into a dword offset. */
1448 fs_reg base_offset = vgrf(glsl_type::int_type);
1449 bld.SHR(base_offset, retype(get_nir_src(instr->src[1]),
1450 BRW_REGISTER_TYPE_D),
1451 fs_reg(2));
1452
1453 unsigned vec4_offset = instr->const_index[0] / 4;
1454 for (int i = 0; i < instr->num_components; i++)
1455 VARYING_PULL_CONSTANT_LOAD(bld, offset(dest, bld, i), surf_index,
1456 base_offset, vec4_offset + i);
1457 } else {
1458 fs_reg packed_consts = vgrf(glsl_type::float_type);
1459 packed_consts.type = dest.type;
1460
1461 fs_reg const_offset_reg((unsigned) instr->const_index[0] & ~15);
1462 bld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, packed_consts,
1463 surf_index, const_offset_reg);
1464
1465 for (unsigned i = 0; i < instr->num_components; i++) {
1466 packed_consts.set_smear(instr->const_index[0] % 16 / 4 + i);
1467
1468 /* The std140 packing rules don't allow vectors to cross 16-byte
1469 * boundaries, and a reg is 32 bytes.
1470 */
1471 assert(packed_consts.subreg_offset < 32);
1472
1473 bld.MOV(dest, packed_consts);
1474 dest = offset(dest, bld, 1);
1475 }
1476 }
1477 break;
1478 }
1479
1480 case nir_intrinsic_load_ssbo_indirect:
1481 has_indirect = true;
1482 /* fallthrough */
1483 case nir_intrinsic_load_ssbo: {
1484 assert(devinfo->gen >= 7);
1485
1486 nir_const_value *const_uniform_block =
1487 nir_src_as_const_value(instr->src[0]);
1488
1489 fs_reg surf_index;
1490 if (const_uniform_block) {
1491 unsigned index = stage_prog_data->binding_table.ubo_start +
1492 const_uniform_block->u[0];
1493 surf_index = fs_reg(index);
1494 brw_mark_surface_used(prog_data, index);
1495 } else {
1496 surf_index = vgrf(glsl_type::uint_type);
1497 bld.ADD(surf_index, get_nir_src(instr->src[0]),
1498 fs_reg(stage_prog_data->binding_table.ubo_start));
1499 surf_index = bld.emit_uniformize(surf_index);
1500
1501 /* Assume this may touch any UBO. It would be nice to provide
1502 * a tighter bound, but the array information is already lowered away.
1503 */
1504 brw_mark_surface_used(prog_data,
1505 stage_prog_data->binding_table.ubo_start +
1506 shader_prog->NumBufferInterfaceBlocks - 1);
1507 }
1508
1509 /* Get the offset to read from */
1510 fs_reg offset_reg = vgrf(glsl_type::uint_type);
1511 unsigned const_offset_bytes = 0;
1512 if (has_indirect) {
1513 bld.MOV(offset_reg, get_nir_src(instr->src[1]));
1514 } else {
1515 const_offset_bytes = instr->const_index[0];
1516 bld.MOV(offset_reg, fs_reg(const_offset_bytes));
1517 }
1518
1519 /* Read the vector */
1520 for (int i = 0; i < instr->num_components; i++) {
1521 fs_reg read_result = emit_untyped_read(bld, surf_index, offset_reg,
1522 1 /* dims */, 1 /* size */,
1523 BRW_PREDICATE_NONE);
1524 read_result.type = dest.type;
1525 bld.MOV(dest, read_result);
1526 dest = offset(dest, bld, 1);
1527
1528 /* Vector components are stored contiguous in memory */
1529 if (i < instr->num_components) {
1530 if (!has_indirect) {
1531 const_offset_bytes += 4;
1532 bld.MOV(offset_reg, fs_reg(const_offset_bytes));
1533 } else {
1534 bld.ADD(offset_reg, offset_reg, brw_imm_ud(4));
1535 }
1536 }
1537 }
1538
1539 break;
1540 }
1541
1542 case nir_intrinsic_load_input_indirect:
1543 has_indirect = true;
1544 /* fallthrough */
1545 case nir_intrinsic_load_input: {
1546 unsigned index = 0;
1547 for (unsigned j = 0; j < instr->num_components; j++) {
1548 fs_reg src = offset(retype(nir_inputs, dest.type), bld,
1549 instr->const_index[0] + index);
1550 if (has_indirect)
1551 src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0]));
1552 index++;
1553
1554 bld.MOV(dest, src);
1555 dest = offset(dest, bld, 1);
1556 }
1557 break;
1558 }
1559
1560 /* Handle ARB_gpu_shader5 interpolation intrinsics
1561 *
1562 * It's worth a quick word of explanation as to why we handle the full
1563 * variable-based interpolation intrinsic rather than a lowered version
1564 * with like we do for other inputs. We have to do that because the way
1565 * we set up inputs doesn't allow us to use the already setup inputs for
1566 * interpolation. At the beginning of the shader, we go through all of
1567 * the input variables and do the initial interpolation and put it in
1568 * the nir_inputs array based on its location as determined in
1569 * nir_lower_io. If the input isn't used, dead code cleans up and
1570 * everything works fine. However, when we get to the ARB_gpu_shader5
1571 * interpolation intrinsics, we need to reinterpolate the input
1572 * differently. If we used an intrinsic that just had an index it would
1573 * only give us the offset into the nir_inputs array. However, this is
1574 * useless because that value is post-interpolation and we need
1575 * pre-interpolation. In order to get the actual location of the bits
1576 * we get from the vertex fetching hardware, we need the variable.
1577 */
1578 case nir_intrinsic_interp_var_at_centroid:
1579 case nir_intrinsic_interp_var_at_sample:
1580 case nir_intrinsic_interp_var_at_offset: {
1581 assert(stage == MESA_SHADER_FRAGMENT);
1582
1583 ((struct brw_wm_prog_data *) prog_data)->pulls_bary = true;
1584
1585 fs_reg dst_xy = bld.vgrf(BRW_REGISTER_TYPE_F, 2);
1586
1587 /* For most messages, we need one reg of ignored data; the hardware
1588 * requires mlen==1 even when there is no payload. in the per-slot
1589 * offset case, we'll replace this with the proper source data.
1590 */
1591 fs_reg src = vgrf(glsl_type::float_type);
1592 int mlen = 1; /* one reg unless overriden */
1593 fs_inst *inst;
1594
1595 switch (instr->intrinsic) {
1596 case nir_intrinsic_interp_var_at_centroid:
1597 inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_CENTROID,
1598 dst_xy, src, fs_reg(0u));
1599 break;
1600
1601 case nir_intrinsic_interp_var_at_sample: {
1602 /* XXX: We should probably handle non-constant sample id's */
1603 nir_const_value *const_sample = nir_src_as_const_value(instr->src[0]);
1604 assert(const_sample);
1605 unsigned msg_data = const_sample ? const_sample->i[0] << 4 : 0;
1606 inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_SAMPLE, dst_xy, src,
1607 fs_reg(msg_data));
1608 break;
1609 }
1610
1611 case nir_intrinsic_interp_var_at_offset: {
1612 nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]);
1613
1614 if (const_offset) {
1615 unsigned off_x = MIN2((int)(const_offset->f[0] * 16), 7) & 0xf;
1616 unsigned off_y = MIN2((int)(const_offset->f[1] * 16), 7) & 0xf;
1617
1618 inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET, dst_xy, src,
1619 fs_reg(off_x | (off_y << 4)));
1620 } else {
1621 src = vgrf(glsl_type::ivec2_type);
1622 fs_reg offset_src = retype(get_nir_src(instr->src[0]),
1623 BRW_REGISTER_TYPE_F);
1624 for (int i = 0; i < 2; i++) {
1625 fs_reg temp = vgrf(glsl_type::float_type);
1626 bld.MUL(temp, offset(offset_src, bld, i), fs_reg(16.0f));
1627 fs_reg itemp = vgrf(glsl_type::int_type);
1628 bld.MOV(itemp, temp); /* float to int */
1629
1630 /* Clamp the upper end of the range to +7/16.
1631 * ARB_gpu_shader5 requires that we support a maximum offset
1632 * of +0.5, which isn't representable in a S0.4 value -- if
1633 * we didn't clamp it, we'd end up with -8/16, which is the
1634 * opposite of what the shader author wanted.
1635 *
1636 * This is legal due to ARB_gpu_shader5's quantization
1637 * rules:
1638 *
1639 * "Not all values of <offset> may be supported; x and y
1640 * offsets may be rounded to fixed-point values with the
1641 * number of fraction bits given by the
1642 * implementation-dependent constant
1643 * FRAGMENT_INTERPOLATION_OFFSET_BITS"
1644 */
1645 set_condmod(BRW_CONDITIONAL_L,
1646 bld.SEL(offset(src, bld, i), itemp, fs_reg(7)));
1647 }
1648
1649 mlen = 2 * dispatch_width / 8;
1650 inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET, dst_xy, src,
1651 fs_reg(0u));
1652 }
1653 break;
1654 }
1655
1656 default:
1657 unreachable("Invalid intrinsic");
1658 }
1659
1660 inst->mlen = mlen;
1661 /* 2 floats per slot returned */
1662 inst->regs_written = 2 * dispatch_width / 8;
1663 inst->pi_noperspective = instr->variables[0]->var->data.interpolation ==
1664 INTERP_QUALIFIER_NOPERSPECTIVE;
1665
1666 for (unsigned j = 0; j < instr->num_components; j++) {
1667 fs_reg src = interp_reg(instr->variables[0]->var->data.location, j);
1668 src.type = dest.type;
1669
1670 bld.emit(FS_OPCODE_LINTERP, dest, dst_xy, src);
1671 dest = offset(dest, bld, 1);
1672 }
1673 break;
1674 }
1675
1676 case nir_intrinsic_store_ssbo_indirect:
1677 has_indirect = true;
1678 /* fallthrough */
1679 case nir_intrinsic_store_ssbo: {
1680 assert(devinfo->gen >= 7);
1681
1682 /* Block index */
1683 fs_reg surf_index;
1684 nir_const_value *const_uniform_block =
1685 nir_src_as_const_value(instr->src[1]);
1686 if (const_uniform_block) {
1687 unsigned index = stage_prog_data->binding_table.ubo_start +
1688 const_uniform_block->u[0];
1689 surf_index = fs_reg(index);
1690 brw_mark_surface_used(prog_data, index);
1691 } else {
1692 surf_index = vgrf(glsl_type::uint_type);
1693 bld.ADD(surf_index, get_nir_src(instr->src[1]),
1694 fs_reg(stage_prog_data->binding_table.ubo_start));
1695 surf_index = bld.emit_uniformize(surf_index);
1696
1697 brw_mark_surface_used(prog_data,
1698 stage_prog_data->binding_table.ubo_start +
1699 shader_prog->NumBufferInterfaceBlocks - 1);
1700 }
1701
1702 /* Offset */
1703 fs_reg offset_reg = vgrf(glsl_type::uint_type);
1704 unsigned const_offset_bytes = 0;
1705 if (has_indirect) {
1706 bld.MOV(offset_reg, get_nir_src(instr->src[2]));
1707 } else {
1708 const_offset_bytes = instr->const_index[0];
1709 bld.MOV(offset_reg, fs_reg(const_offset_bytes));
1710 }
1711
1712 /* Value */
1713 fs_reg val_reg = get_nir_src(instr->src[0]);
1714
1715 /* Writemask */
1716 unsigned writemask = instr->const_index[1];
1717
1718 /* Write each component present in the writemask */
1719 unsigned skipped_channels = 0;
1720 for (int i = 0; i < instr->num_components; i++) {
1721 int component_mask = 1 << i;
1722 if (writemask & component_mask) {
1723 if (skipped_channels) {
1724 if (!has_indirect) {
1725 const_offset_bytes += 4 * skipped_channels;
1726 bld.MOV(offset_reg, fs_reg(const_offset_bytes));
1727 } else {
1728 bld.ADD(offset_reg, offset_reg,
1729 brw_imm_ud(4 * skipped_channels));
1730 }
1731 skipped_channels = 0;
1732 }
1733
1734 emit_untyped_write(bld, surf_index, offset_reg,
1735 offset(val_reg, bld, i),
1736 1 /* dims */, 1 /* size */,
1737 BRW_PREDICATE_NONE);
1738 }
1739
1740 skipped_channels++;
1741 }
1742 break;
1743 }
1744
1745 case nir_intrinsic_store_output_indirect:
1746 has_indirect = true;
1747 /* fallthrough */
1748 case nir_intrinsic_store_output: {
1749 fs_reg src = get_nir_src(instr->src[0]);
1750 unsigned index = 0;
1751 for (unsigned j = 0; j < instr->num_components; j++) {
1752 fs_reg new_dest = offset(retype(nir_outputs, src.type), bld,
1753 instr->const_index[0] + index);
1754 if (has_indirect)
1755 src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[1]));
1756 index++;
1757 bld.MOV(new_dest, src);
1758 src = offset(src, bld, 1);
1759 }
1760 break;
1761 }
1762
1763 case nir_intrinsic_barrier:
1764 emit_barrier();
1765 if (stage == MESA_SHADER_COMPUTE)
1766 ((struct brw_cs_prog_data *) prog_data)->uses_barrier = true;
1767 break;
1768
1769 case nir_intrinsic_load_local_invocation_id:
1770 case nir_intrinsic_load_work_group_id: {
1771 gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic);
1772 fs_reg val = nir_system_values[sv];
1773 assert(val.file != BAD_FILE);
1774 dest.type = val.type;
1775 for (unsigned i = 0; i < 3; i++)
1776 bld.MOV(offset(dest, bld, i), offset(val, bld, i));
1777 break;
1778 }
1779
1780 case nir_intrinsic_ssbo_atomic_add:
1781 nir_emit_ssbo_atomic(bld, BRW_AOP_ADD, instr);
1782 break;
1783 case nir_intrinsic_ssbo_atomic_min:
1784 if (dest.type == BRW_REGISTER_TYPE_D)
1785 nir_emit_ssbo_atomic(bld, BRW_AOP_IMIN, instr);
1786 else
1787 nir_emit_ssbo_atomic(bld, BRW_AOP_UMIN, instr);
1788 break;
1789 case nir_intrinsic_ssbo_atomic_max:
1790 if (dest.type == BRW_REGISTER_TYPE_D)
1791 nir_emit_ssbo_atomic(bld, BRW_AOP_IMAX, instr);
1792 else
1793 nir_emit_ssbo_atomic(bld, BRW_AOP_UMAX, instr);
1794 break;
1795 case nir_intrinsic_ssbo_atomic_and:
1796 nir_emit_ssbo_atomic(bld, BRW_AOP_AND, instr);
1797 break;
1798 case nir_intrinsic_ssbo_atomic_or:
1799 nir_emit_ssbo_atomic(bld, BRW_AOP_OR, instr);
1800 break;
1801 case nir_intrinsic_ssbo_atomic_xor:
1802 nir_emit_ssbo_atomic(bld, BRW_AOP_XOR, instr);
1803 break;
1804 case nir_intrinsic_ssbo_atomic_exchange:
1805 nir_emit_ssbo_atomic(bld, BRW_AOP_MOV, instr);
1806 break;
1807 case nir_intrinsic_ssbo_atomic_comp_swap:
1808 nir_emit_ssbo_atomic(bld, BRW_AOP_CMPWR, instr);
1809 break;
1810
1811 case nir_intrinsic_get_buffer_size: {
1812 nir_const_value *const_uniform_block = nir_src_as_const_value(instr->src[0]);
1813 unsigned ubo_index = const_uniform_block ? const_uniform_block->u[0] : 0;
1814 int reg_width = dispatch_width / 8;
1815
1816 assert(shader->base.UniformBlocks[ubo_index].IsShaderStorage);
1817
1818 /* Set LOD = 0 */
1819 fs_reg source = fs_reg(0);
1820
1821 int mlen = 1 * reg_width;
1822 fs_reg src_payload = fs_reg(GRF, alloc.allocate(mlen),
1823 BRW_REGISTER_TYPE_UD);
1824 bld.LOAD_PAYLOAD(src_payload, &source, 1, 0);
1825
1826 fs_reg surf_index = fs_reg(prog_data->binding_table.ubo_start + ubo_index);
1827 fs_inst *inst = bld.emit(FS_OPCODE_GET_BUFFER_SIZE, dest,
1828 src_payload, surf_index);
1829 inst->header_size = 0;
1830 inst->mlen = mlen;
1831 bld.emit(inst);
1832 break;
1833 }
1834
1835 case nir_intrinsic_load_num_work_groups: {
1836 assert(devinfo->gen >= 7);
1837 assert(stage == MESA_SHADER_COMPUTE);
1838
1839 struct brw_cs_prog_data *cs_prog_data =
1840 (struct brw_cs_prog_data *) prog_data;
1841 const unsigned surface =
1842 cs_prog_data->binding_table.work_groups_start;
1843
1844 cs_prog_data->uses_num_work_groups = true;
1845
1846 fs_reg surf_index = fs_reg(surface);
1847 brw_mark_surface_used(prog_data, surface);
1848
1849 /* Read the 3 GLuint components of gl_NumWorkGroups */
1850 for (unsigned i = 0; i < 3; i++) {
1851 fs_reg read_result =
1852 emit_untyped_read(bld, surf_index,
1853 fs_reg(i << 2),
1854 1 /* dims */, 1 /* size */,
1855 BRW_PREDICATE_NONE);
1856 read_result.type = dest.type;
1857 bld.MOV(dest, read_result);
1858 dest = offset(dest, bld, 1);
1859 }
1860 break;
1861 }
1862
1863 default:
1864 unreachable("unknown intrinsic");
1865 }
1866 }
1867
1868 void
1869 fs_visitor::nir_emit_ssbo_atomic(const fs_builder &bld,
1870 int op, nir_intrinsic_instr *instr)
1871 {
1872 fs_reg dest;
1873 if (nir_intrinsic_infos[instr->intrinsic].has_dest)
1874 dest = get_nir_dest(instr->dest);
1875
1876 fs_reg surface;
1877 nir_const_value *const_surface = nir_src_as_const_value(instr->src[0]);
1878 if (const_surface) {
1879 unsigned surf_index = stage_prog_data->binding_table.ubo_start +
1880 const_surface->u[0];
1881 surface = fs_reg(surf_index);
1882 brw_mark_surface_used(prog_data, surf_index);
1883 } else {
1884 surface = vgrf(glsl_type::uint_type);
1885 bld.ADD(surface, get_nir_src(instr->src[0]),
1886 fs_reg(stage_prog_data->binding_table.ubo_start));
1887
1888 /* Assume this may touch any UBO. This is the same we do for other
1889 * UBO/SSBO accesses with non-constant surface.
1890 */
1891 brw_mark_surface_used(prog_data,
1892 stage_prog_data->binding_table.ubo_start +
1893 shader_prog->NumBufferInterfaceBlocks - 1);
1894 }
1895
1896 fs_reg offset = get_nir_src(instr->src[1]);
1897 fs_reg data1 = get_nir_src(instr->src[2]);
1898 fs_reg data2;
1899 if (op == BRW_AOP_CMPWR)
1900 data2 = get_nir_src(instr->src[3]);
1901
1902 /* Emit the actual atomic operation operation */
1903
1904 fs_reg atomic_result =
1905 surface_access::emit_untyped_atomic(bld, surface, offset,
1906 data1, data2,
1907 1 /* dims */, 1 /* rsize */,
1908 op,
1909 BRW_PREDICATE_NONE);
1910 dest.type = atomic_result.type;
1911 bld.MOV(dest, atomic_result);
1912 }
1913
1914 void
1915 fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr)
1916 {
1917 unsigned sampler = instr->sampler_index;
1918 fs_reg sampler_reg(sampler);
1919
1920 /* FINISHME: We're failing to recompile our programs when the sampler is
1921 * updated. This only matters for the texture rectangle scale parameters
1922 * (pre-gen6, or gen6+ with GL_CLAMP).
1923 */
1924 int texunit = prog->SamplerUnits[sampler];
1925
1926 int gather_component = instr->component;
1927
1928 bool is_rect = instr->sampler_dim == GLSL_SAMPLER_DIM_RECT;
1929
1930 bool is_cube_array = instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE &&
1931 instr->is_array;
1932
1933 int lod_components = 0;
1934 int UNUSED offset_components = 0;
1935
1936 fs_reg coordinate, shadow_comparitor, lod, lod2, sample_index, mcs, tex_offset;
1937
1938 for (unsigned i = 0; i < instr->num_srcs; i++) {
1939 fs_reg src = get_nir_src(instr->src[i].src);
1940 switch (instr->src[i].src_type) {
1941 case nir_tex_src_bias:
1942 lod = retype(src, BRW_REGISTER_TYPE_F);
1943 break;
1944 case nir_tex_src_comparitor:
1945 shadow_comparitor = retype(src, BRW_REGISTER_TYPE_F);
1946 break;
1947 case nir_tex_src_coord:
1948 switch (instr->op) {
1949 case nir_texop_txf:
1950 case nir_texop_txf_ms:
1951 coordinate = retype(src, BRW_REGISTER_TYPE_D);
1952 break;
1953 default:
1954 coordinate = retype(src, BRW_REGISTER_TYPE_F);
1955 break;
1956 }
1957 break;
1958 case nir_tex_src_ddx:
1959 lod = retype(src, BRW_REGISTER_TYPE_F);
1960 lod_components = nir_tex_instr_src_size(instr, i);
1961 break;
1962 case nir_tex_src_ddy:
1963 lod2 = retype(src, BRW_REGISTER_TYPE_F);
1964 break;
1965 case nir_tex_src_lod:
1966 switch (instr->op) {
1967 case nir_texop_txs:
1968 lod = retype(src, BRW_REGISTER_TYPE_UD);
1969 break;
1970 case nir_texop_txf:
1971 lod = retype(src, BRW_REGISTER_TYPE_D);
1972 break;
1973 default:
1974 lod = retype(src, BRW_REGISTER_TYPE_F);
1975 break;
1976 }
1977 break;
1978 case nir_tex_src_ms_index:
1979 sample_index = retype(src, BRW_REGISTER_TYPE_UD);
1980 break;
1981 case nir_tex_src_offset:
1982 tex_offset = retype(src, BRW_REGISTER_TYPE_D);
1983 if (instr->is_array)
1984 offset_components = instr->coord_components - 1;
1985 else
1986 offset_components = instr->coord_components;
1987 break;
1988 case nir_tex_src_projector:
1989 unreachable("should be lowered");
1990
1991 case nir_tex_src_sampler_offset: {
1992 /* Figure out the highest possible sampler index and mark it as used */
1993 uint32_t max_used = sampler + instr->sampler_array_size - 1;
1994 if (instr->op == nir_texop_tg4 && devinfo->gen < 8) {
1995 max_used += stage_prog_data->binding_table.gather_texture_start;
1996 } else {
1997 max_used += stage_prog_data->binding_table.texture_start;
1998 }
1999 brw_mark_surface_used(prog_data, max_used);
2000
2001 /* Emit code to evaluate the actual indexing expression */
2002 sampler_reg = vgrf(glsl_type::uint_type);
2003 bld.ADD(sampler_reg, src, fs_reg(sampler));
2004 sampler_reg = bld.emit_uniformize(sampler_reg);
2005 break;
2006 }
2007
2008 default:
2009 unreachable("unknown texture source");
2010 }
2011 }
2012
2013 if (instr->op == nir_texop_txf_ms) {
2014 if (devinfo->gen >= 7 &&
2015 key_tex->compressed_multisample_layout_mask & (1 << sampler)) {
2016 mcs = emit_mcs_fetch(coordinate, instr->coord_components, sampler_reg);
2017 } else {
2018 mcs = fs_reg(0u);
2019 }
2020 }
2021
2022 for (unsigned i = 0; i < 3; i++) {
2023 if (instr->const_offset[i] != 0) {
2024 assert(offset_components == 0);
2025 tex_offset = fs_reg(brw_texture_offset(instr->const_offset, 3));
2026 break;
2027 }
2028 }
2029
2030 enum glsl_base_type dest_base_type =
2031 brw_glsl_base_type_for_nir_type (instr->dest_type);
2032
2033 const glsl_type *dest_type =
2034 glsl_type::get_instance(dest_base_type, nir_tex_instr_dest_size(instr),
2035 1);
2036
2037 ir_texture_opcode op;
2038 switch (instr->op) {
2039 case nir_texop_lod: op = ir_lod; break;
2040 case nir_texop_query_levels: op = ir_query_levels; break;
2041 case nir_texop_tex: op = ir_tex; break;
2042 case nir_texop_tg4: op = ir_tg4; break;
2043 case nir_texop_txb: op = ir_txb; break;
2044 case nir_texop_txd: op = ir_txd; break;
2045 case nir_texop_txf: op = ir_txf; break;
2046 case nir_texop_txf_ms: op = ir_txf_ms; break;
2047 case nir_texop_txl: op = ir_txl; break;
2048 case nir_texop_txs: op = ir_txs; break;
2049 case nir_texop_texture_samples: {
2050 fs_reg dst = retype(get_nir_dest(instr->dest), BRW_REGISTER_TYPE_D);
2051 fs_inst *inst = bld.emit(SHADER_OPCODE_SAMPLEINFO, dst,
2052 bld.vgrf(BRW_REGISTER_TYPE_D, 1),
2053 sampler_reg);
2054 inst->mlen = 1;
2055 inst->header_size = 1;
2056 inst->base_mrf = -1;
2057 return;
2058 }
2059 default:
2060 unreachable("unknown texture opcode");
2061 }
2062
2063 emit_texture(op, dest_type, coordinate, instr->coord_components,
2064 shadow_comparitor, lod, lod2, lod_components, sample_index,
2065 tex_offset, mcs, gather_component,
2066 is_cube_array, is_rect, sampler, sampler_reg, texunit);
2067
2068 fs_reg dest = get_nir_dest(instr->dest);
2069 dest.type = this->result.type;
2070 unsigned num_components = nir_tex_instr_dest_size(instr);
2071 emit_percomp(bld, fs_inst(BRW_OPCODE_MOV, bld.dispatch_width(),
2072 dest, this->result),
2073 (1 << num_components) - 1);
2074 }
2075
2076 void
2077 fs_visitor::nir_emit_jump(const fs_builder &bld, nir_jump_instr *instr)
2078 {
2079 switch (instr->type) {
2080 case nir_jump_break:
2081 bld.emit(BRW_OPCODE_BREAK);
2082 break;
2083 case nir_jump_continue:
2084 bld.emit(BRW_OPCODE_CONTINUE);
2085 break;
2086 case nir_jump_return:
2087 default:
2088 unreachable("unknown jump");
2089 }
2090 }