i965/fs: Handle derivative quality decisions in the front-end.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_shader.cpp
1 /*
2 * Copyright © 2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 extern "C" {
25 #include "main/macros.h"
26 #include "brw_context.h"
27 }
28 #include "brw_vs.h"
29 #include "brw_gs.h"
30 #include "brw_fs.h"
31 #include "brw_cfg.h"
32 #include "glsl/ir_optimization.h"
33 #include "glsl/glsl_parser_extras.h"
34 #include "main/shaderapi.h"
35
36 struct gl_shader *
37 brw_new_shader(struct gl_context *ctx, GLuint name, GLuint type)
38 {
39 struct brw_shader *shader;
40
41 shader = rzalloc(NULL, struct brw_shader);
42 if (shader) {
43 shader->base.Type = type;
44 shader->base.Stage = _mesa_shader_enum_to_shader_stage(type);
45 shader->base.Name = name;
46 _mesa_init_shader(ctx, &shader->base);
47 }
48
49 return &shader->base;
50 }
51
52 /**
53 * Performs a compile of the shader stages even when we don't know
54 * what non-orthogonal state will be set, in the hope that it reflects
55 * the eventual NOS used, and thus allows us to produce link failures.
56 */
57 static bool
58 brw_shader_precompile(struct gl_context *ctx,
59 struct gl_shader_program *sh_prog)
60 {
61 struct gl_shader *vs = sh_prog->_LinkedShaders[MESA_SHADER_VERTEX];
62 struct gl_shader *gs = sh_prog->_LinkedShaders[MESA_SHADER_GEOMETRY];
63 struct gl_shader *fs = sh_prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
64
65 if (fs && !brw_fs_precompile(ctx, sh_prog, fs->Program))
66 return false;
67
68 if (gs && !brw_gs_precompile(ctx, sh_prog, gs->Program))
69 return false;
70
71 if (vs && !brw_vs_precompile(ctx, sh_prog, vs->Program))
72 return false;
73
74 return true;
75 }
76
77 static void
78 brw_lower_packing_builtins(struct brw_context *brw,
79 gl_shader_stage shader_type,
80 exec_list *ir)
81 {
82 int ops = LOWER_PACK_SNORM_2x16
83 | LOWER_UNPACK_SNORM_2x16
84 | LOWER_PACK_UNORM_2x16
85 | LOWER_UNPACK_UNORM_2x16;
86
87 if (shader_type == MESA_SHADER_FRAGMENT) {
88 ops |= LOWER_UNPACK_UNORM_4x8
89 | LOWER_UNPACK_SNORM_4x8
90 | LOWER_PACK_UNORM_4x8
91 | LOWER_PACK_SNORM_4x8;
92 }
93
94 if (brw->gen >= 7) {
95 /* Gen7 introduced the f32to16 and f16to32 instructions, which can be
96 * used to execute packHalf2x16 and unpackHalf2x16. For AOS code, no
97 * lowering is needed. For SOA code, the Half2x16 ops must be
98 * scalarized.
99 */
100 if (shader_type == MESA_SHADER_FRAGMENT) {
101 ops |= LOWER_PACK_HALF_2x16_TO_SPLIT
102 | LOWER_UNPACK_HALF_2x16_TO_SPLIT;
103 }
104 } else {
105 ops |= LOWER_PACK_HALF_2x16
106 | LOWER_UNPACK_HALF_2x16;
107 }
108
109 lower_packing_builtins(ir, ops);
110 }
111
112 GLboolean
113 brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg)
114 {
115 struct brw_context *brw = brw_context(ctx);
116 unsigned int stage;
117
118 for (stage = 0; stage < ARRAY_SIZE(shProg->_LinkedShaders); stage++) {
119 const struct gl_shader_compiler_options *options =
120 &ctx->Const.ShaderCompilerOptions[stage];
121 struct brw_shader *shader =
122 (struct brw_shader *)shProg->_LinkedShaders[stage];
123
124 if (!shader)
125 continue;
126
127 struct gl_program *prog =
128 ctx->Driver.NewProgram(ctx, _mesa_shader_stage_to_program(stage),
129 shader->base.Name);
130 if (!prog)
131 return false;
132 prog->Parameters = _mesa_new_parameter_list();
133
134 _mesa_copy_linked_program_data((gl_shader_stage) stage, shProg, prog);
135
136 bool progress;
137
138 /* lower_packing_builtins() inserts arithmetic instructions, so it
139 * must precede lower_instructions().
140 */
141 brw_lower_packing_builtins(brw, (gl_shader_stage) stage, shader->base.ir);
142 do_mat_op_to_vec(shader->base.ir);
143 const int bitfield_insert = brw->gen >= 7
144 ? BITFIELD_INSERT_TO_BFM_BFI
145 : 0;
146 lower_instructions(shader->base.ir,
147 MOD_TO_FRACT |
148 DIV_TO_MUL_RCP |
149 SUB_TO_ADD_NEG |
150 EXP_TO_EXP2 |
151 LOG_TO_LOG2 |
152 bitfield_insert |
153 LDEXP_TO_ARITH);
154
155 /* Pre-gen6 HW can only nest if-statements 16 deep. Beyond this,
156 * if-statements need to be flattened.
157 */
158 if (brw->gen < 6)
159 lower_if_to_cond_assign(shader->base.ir, 16);
160
161 do_lower_texture_projection(shader->base.ir);
162 brw_lower_texture_gradients(brw, shader->base.ir);
163 do_vec_index_to_cond_assign(shader->base.ir);
164 lower_vector_insert(shader->base.ir, true);
165 brw_do_cubemap_normalize(shader->base.ir);
166 lower_offset_arrays(shader->base.ir);
167 brw_do_lower_unnormalized_offset(shader->base.ir);
168 lower_noise(shader->base.ir);
169 lower_quadop_vector(shader->base.ir, false);
170
171 bool lowered_variable_indexing =
172 lower_variable_index_to_cond_assign(shader->base.ir,
173 options->EmitNoIndirectInput,
174 options->EmitNoIndirectOutput,
175 options->EmitNoIndirectTemp,
176 options->EmitNoIndirectUniform);
177
178 if (unlikely(brw->perf_debug && lowered_variable_indexing)) {
179 perf_debug("Unsupported form of variable indexing in FS; falling "
180 "back to very inefficient code generation\n");
181 }
182
183 lower_ubo_reference(&shader->base, shader->base.ir);
184
185 do {
186 progress = false;
187
188 if (stage == MESA_SHADER_FRAGMENT) {
189 brw_do_channel_expressions(shader->base.ir);
190 brw_do_vector_splitting(shader->base.ir);
191 }
192
193 progress = do_lower_jumps(shader->base.ir, true, true,
194 true, /* main return */
195 false, /* continue */
196 false /* loops */
197 ) || progress;
198
199 progress = do_common_optimization(shader->base.ir, true, true,
200 options, ctx->Const.NativeIntegers)
201 || progress;
202 } while (progress);
203
204 /* Make a pass over the IR to add state references for any built-in
205 * uniforms that are used. This has to be done now (during linking).
206 * Code generation doesn't happen until the first time this shader is
207 * used for rendering. Waiting until then to generate the parameters is
208 * too late. At that point, the values for the built-in uniforms won't
209 * get sent to the shader.
210 */
211 foreach_in_list(ir_instruction, node, shader->base.ir) {
212 ir_variable *var = node->as_variable();
213
214 if ((var == NULL) || (var->data.mode != ir_var_uniform)
215 || (strncmp(var->name, "gl_", 3) != 0))
216 continue;
217
218 const ir_state_slot *const slots = var->get_state_slots();
219 assert(slots != NULL);
220
221 for (unsigned int i = 0; i < var->get_num_state_slots(); i++) {
222 _mesa_add_state_reference(prog->Parameters,
223 (gl_state_index *) slots[i].tokens);
224 }
225 }
226
227 validate_ir_tree(shader->base.ir);
228
229 do_set_program_inouts(shader->base.ir, prog, shader->base.Stage);
230
231 prog->SamplersUsed = shader->base.active_samplers;
232 _mesa_update_shader_textures_used(shProg, prog);
233
234 _mesa_reference_program(ctx, &shader->base.Program, prog);
235
236 brw_add_texrect_params(prog);
237
238 _mesa_reference_program(ctx, &prog, NULL);
239
240 if (ctx->_Shader->Flags & GLSL_DUMP) {
241 fprintf(stderr, "\n");
242 fprintf(stderr, "GLSL IR for linked %s program %d:\n",
243 _mesa_shader_stage_to_string(shader->base.Stage),
244 shProg->Name);
245 _mesa_print_ir(stderr, shader->base.ir, NULL);
246 fprintf(stderr, "\n");
247 }
248 }
249
250 if ((ctx->_Shader->Flags & GLSL_DUMP) && shProg->Name != 0) {
251 for (unsigned i = 0; i < shProg->NumShaders; i++) {
252 const struct gl_shader *sh = shProg->Shaders[i];
253 if (!sh)
254 continue;
255
256 fprintf(stderr, "GLSL %s shader %d source for linked program %d:\n",
257 _mesa_shader_stage_to_string(sh->Stage),
258 i, shProg->Name);
259 fprintf(stderr, "%s", sh->Source);
260 fprintf(stderr, "\n");
261 }
262 }
263
264 if (brw->precompile && !brw_shader_precompile(ctx, shProg))
265 return false;
266
267 return true;
268 }
269
270
271 enum brw_reg_type
272 brw_type_for_base_type(const struct glsl_type *type)
273 {
274 switch (type->base_type) {
275 case GLSL_TYPE_FLOAT:
276 return BRW_REGISTER_TYPE_F;
277 case GLSL_TYPE_INT:
278 return BRW_REGISTER_TYPE_D;
279 case GLSL_TYPE_BOOL:
280 case GLSL_TYPE_UINT:
281 return BRW_REGISTER_TYPE_UD;
282 case GLSL_TYPE_ARRAY:
283 return brw_type_for_base_type(type->fields.array);
284 case GLSL_TYPE_STRUCT:
285 case GLSL_TYPE_SAMPLER:
286 case GLSL_TYPE_ATOMIC_UINT:
287 /* These should be overridden with the type of the member when
288 * dereferenced into. BRW_REGISTER_TYPE_UD seems like a likely
289 * way to trip up if we don't.
290 */
291 return BRW_REGISTER_TYPE_UD;
292 case GLSL_TYPE_IMAGE:
293 return BRW_REGISTER_TYPE_UD;
294 case GLSL_TYPE_VOID:
295 case GLSL_TYPE_ERROR:
296 case GLSL_TYPE_INTERFACE:
297 unreachable("not reached");
298 }
299
300 return BRW_REGISTER_TYPE_F;
301 }
302
303 enum brw_conditional_mod
304 brw_conditional_for_comparison(unsigned int op)
305 {
306 switch (op) {
307 case ir_binop_less:
308 return BRW_CONDITIONAL_L;
309 case ir_binop_greater:
310 return BRW_CONDITIONAL_G;
311 case ir_binop_lequal:
312 return BRW_CONDITIONAL_LE;
313 case ir_binop_gequal:
314 return BRW_CONDITIONAL_GE;
315 case ir_binop_equal:
316 case ir_binop_all_equal: /* same as equal for scalars */
317 return BRW_CONDITIONAL_Z;
318 case ir_binop_nequal:
319 case ir_binop_any_nequal: /* same as nequal for scalars */
320 return BRW_CONDITIONAL_NZ;
321 default:
322 unreachable("not reached: bad operation for comparison");
323 }
324 }
325
326 uint32_t
327 brw_math_function(enum opcode op)
328 {
329 switch (op) {
330 case SHADER_OPCODE_RCP:
331 return BRW_MATH_FUNCTION_INV;
332 case SHADER_OPCODE_RSQ:
333 return BRW_MATH_FUNCTION_RSQ;
334 case SHADER_OPCODE_SQRT:
335 return BRW_MATH_FUNCTION_SQRT;
336 case SHADER_OPCODE_EXP2:
337 return BRW_MATH_FUNCTION_EXP;
338 case SHADER_OPCODE_LOG2:
339 return BRW_MATH_FUNCTION_LOG;
340 case SHADER_OPCODE_POW:
341 return BRW_MATH_FUNCTION_POW;
342 case SHADER_OPCODE_SIN:
343 return BRW_MATH_FUNCTION_SIN;
344 case SHADER_OPCODE_COS:
345 return BRW_MATH_FUNCTION_COS;
346 case SHADER_OPCODE_INT_QUOTIENT:
347 return BRW_MATH_FUNCTION_INT_DIV_QUOTIENT;
348 case SHADER_OPCODE_INT_REMAINDER:
349 return BRW_MATH_FUNCTION_INT_DIV_REMAINDER;
350 default:
351 unreachable("not reached: unknown math function");
352 }
353 }
354
355 uint32_t
356 brw_texture_offset(struct gl_context *ctx, int *offsets,
357 unsigned num_components)
358 {
359 /* If the driver does not support GL_ARB_gpu_shader5, the offset
360 * must be constant.
361 */
362 assert(offsets != NULL || ctx->Extensions.ARB_gpu_shader5);
363
364 if (!offsets) return 0; /* nonconstant offset; caller will handle it. */
365
366 /* Combine all three offsets into a single unsigned dword:
367 *
368 * bits 11:8 - U Offset (X component)
369 * bits 7:4 - V Offset (Y component)
370 * bits 3:0 - R Offset (Z component)
371 */
372 unsigned offset_bits = 0;
373 for (unsigned i = 0; i < num_components; i++) {
374 const unsigned shift = 4 * (2 - i);
375 offset_bits |= (offsets[i] << shift) & (0xF << shift);
376 }
377 return offset_bits;
378 }
379
380 const char *
381 brw_instruction_name(enum opcode op)
382 {
383 char *fallback;
384
385 if (op < ARRAY_SIZE(opcode_descs) && opcode_descs[op].name)
386 return opcode_descs[op].name;
387
388 switch (op) {
389 case FS_OPCODE_FB_WRITE:
390 return "fb_write";
391 case FS_OPCODE_BLORP_FB_WRITE:
392 return "blorp_fb_write";
393
394 case SHADER_OPCODE_RCP:
395 return "rcp";
396 case SHADER_OPCODE_RSQ:
397 return "rsq";
398 case SHADER_OPCODE_SQRT:
399 return "sqrt";
400 case SHADER_OPCODE_EXP2:
401 return "exp2";
402 case SHADER_OPCODE_LOG2:
403 return "log2";
404 case SHADER_OPCODE_POW:
405 return "pow";
406 case SHADER_OPCODE_INT_QUOTIENT:
407 return "int_quot";
408 case SHADER_OPCODE_INT_REMAINDER:
409 return "int_rem";
410 case SHADER_OPCODE_SIN:
411 return "sin";
412 case SHADER_OPCODE_COS:
413 return "cos";
414
415 case SHADER_OPCODE_TEX:
416 return "tex";
417 case SHADER_OPCODE_TXD:
418 return "txd";
419 case SHADER_OPCODE_TXF:
420 return "txf";
421 case SHADER_OPCODE_TXL:
422 return "txl";
423 case SHADER_OPCODE_TXS:
424 return "txs";
425 case FS_OPCODE_TXB:
426 return "txb";
427 case SHADER_OPCODE_TXF_CMS:
428 return "txf_cms";
429 case SHADER_OPCODE_TXF_UMS:
430 return "txf_ums";
431 case SHADER_OPCODE_TXF_MCS:
432 return "txf_mcs";
433 case SHADER_OPCODE_TG4:
434 return "tg4";
435 case SHADER_OPCODE_TG4_OFFSET:
436 return "tg4_offset";
437 case SHADER_OPCODE_SHADER_TIME_ADD:
438 return "shader_time_add";
439
440 case SHADER_OPCODE_LOAD_PAYLOAD:
441 return "load_payload";
442
443 case SHADER_OPCODE_GEN4_SCRATCH_READ:
444 return "gen4_scratch_read";
445 case SHADER_OPCODE_GEN4_SCRATCH_WRITE:
446 return "gen4_scratch_write";
447 case SHADER_OPCODE_GEN7_SCRATCH_READ:
448 return "gen7_scratch_read";
449
450 case VEC4_OPCODE_PACK_BYTES:
451 return "pack_bytes";
452
453 case FS_OPCODE_DDX_COARSE:
454 return "ddx_coarse";
455 case FS_OPCODE_DDX_FINE:
456 return "ddx_fine";
457 case FS_OPCODE_DDY_COARSE:
458 return "ddy_coarse";
459 case FS_OPCODE_DDY_FINE:
460 return "ddy_fine";
461
462 case FS_OPCODE_PIXEL_X:
463 return "pixel_x";
464 case FS_OPCODE_PIXEL_Y:
465 return "pixel_y";
466
467 case FS_OPCODE_CINTERP:
468 return "cinterp";
469 case FS_OPCODE_LINTERP:
470 return "linterp";
471
472 case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
473 return "uniform_pull_const";
474 case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7:
475 return "uniform_pull_const_gen7";
476 case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD:
477 return "varying_pull_const";
478 case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7:
479 return "varying_pull_const_gen7";
480
481 case FS_OPCODE_MOV_DISPATCH_TO_FLAGS:
482 return "mov_dispatch_to_flags";
483 case FS_OPCODE_DISCARD_JUMP:
484 return "discard_jump";
485
486 case FS_OPCODE_SET_SIMD4X2_OFFSET:
487 return "set_simd4x2_offset";
488
489 case FS_OPCODE_PACK_HALF_2x16_SPLIT:
490 return "pack_half_2x16_split";
491 case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X:
492 return "unpack_half_2x16_split_x";
493 case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y:
494 return "unpack_half_2x16_split_y";
495
496 case FS_OPCODE_PLACEHOLDER_HALT:
497 return "placeholder_halt";
498
499 case VS_OPCODE_URB_WRITE:
500 return "vs_urb_write";
501 case VS_OPCODE_PULL_CONSTANT_LOAD:
502 return "pull_constant_load";
503 case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
504 return "pull_constant_load_gen7";
505 case VS_OPCODE_UNPACK_FLAGS_SIMD4X2:
506 return "unpack_flags_simd4x2";
507
508 case GS_OPCODE_URB_WRITE:
509 return "gs_urb_write";
510 case GS_OPCODE_URB_WRITE_ALLOCATE:
511 return "gs_urb_write_allocate";
512 case GS_OPCODE_THREAD_END:
513 return "gs_thread_end";
514 case GS_OPCODE_SET_WRITE_OFFSET:
515 return "set_write_offset";
516 case GS_OPCODE_SET_VERTEX_COUNT:
517 return "set_vertex_count";
518 case GS_OPCODE_SET_DWORD_2:
519 return "set_dword_2";
520 case GS_OPCODE_PREPARE_CHANNEL_MASKS:
521 return "prepare_channel_masks";
522 case GS_OPCODE_SET_CHANNEL_MASKS:
523 return "set_channel_masks";
524 case GS_OPCODE_GET_INSTANCE_ID:
525 return "get_instance_id";
526 case GS_OPCODE_FF_SYNC:
527 return "ff_sync";
528 case GS_OPCODE_SET_PRIMITIVE_ID:
529 return "set_primitive_id";
530 case GS_OPCODE_SVB_WRITE:
531 return "gs_svb_write";
532 case GS_OPCODE_SVB_SET_DST_INDEX:
533 return "gs_svb_set_dst_index";
534 case GS_OPCODE_FF_SYNC_SET_PRIMITIVES:
535 return "gs_ff_sync_set_primitives";
536
537 default:
538 /* Yes, this leaks. It's in debug code, it should never occur, and if
539 * it does, you should just add the case to the list above.
540 */
541 asprintf(&fallback, "op%d", op);
542 return fallback;
543 }
544 }
545
546 backend_visitor::backend_visitor(struct brw_context *brw,
547 struct gl_shader_program *shader_prog,
548 struct gl_program *prog,
549 struct brw_stage_prog_data *stage_prog_data,
550 gl_shader_stage stage)
551 : brw(brw),
552 ctx(&brw->ctx),
553 shader(shader_prog ?
554 (struct brw_shader *)shader_prog->_LinkedShaders[stage] : NULL),
555 shader_prog(shader_prog),
556 prog(prog),
557 stage_prog_data(stage_prog_data),
558 cfg(NULL),
559 stage(stage)
560 {
561 }
562
563 bool
564 backend_reg::is_zero() const
565 {
566 if (file != IMM)
567 return false;
568
569 return fixed_hw_reg.dw1.d == 0;
570 }
571
572 bool
573 backend_reg::is_one() const
574 {
575 if (file != IMM)
576 return false;
577
578 return type == BRW_REGISTER_TYPE_F
579 ? fixed_hw_reg.dw1.f == 1.0
580 : fixed_hw_reg.dw1.d == 1;
581 }
582
583 bool
584 backend_reg::is_null() const
585 {
586 return file == HW_REG &&
587 fixed_hw_reg.file == BRW_ARCHITECTURE_REGISTER_FILE &&
588 fixed_hw_reg.nr == BRW_ARF_NULL;
589 }
590
591
592 bool
593 backend_reg::is_accumulator() const
594 {
595 return file == HW_REG &&
596 fixed_hw_reg.file == BRW_ARCHITECTURE_REGISTER_FILE &&
597 fixed_hw_reg.nr == BRW_ARF_ACCUMULATOR;
598 }
599
600 bool
601 backend_instruction::is_tex() const
602 {
603 return (opcode == SHADER_OPCODE_TEX ||
604 opcode == FS_OPCODE_TXB ||
605 opcode == SHADER_OPCODE_TXD ||
606 opcode == SHADER_OPCODE_TXF ||
607 opcode == SHADER_OPCODE_TXF_CMS ||
608 opcode == SHADER_OPCODE_TXF_UMS ||
609 opcode == SHADER_OPCODE_TXF_MCS ||
610 opcode == SHADER_OPCODE_TXL ||
611 opcode == SHADER_OPCODE_TXS ||
612 opcode == SHADER_OPCODE_LOD ||
613 opcode == SHADER_OPCODE_TG4 ||
614 opcode == SHADER_OPCODE_TG4_OFFSET);
615 }
616
617 bool
618 backend_instruction::is_math() const
619 {
620 return (opcode == SHADER_OPCODE_RCP ||
621 opcode == SHADER_OPCODE_RSQ ||
622 opcode == SHADER_OPCODE_SQRT ||
623 opcode == SHADER_OPCODE_EXP2 ||
624 opcode == SHADER_OPCODE_LOG2 ||
625 opcode == SHADER_OPCODE_SIN ||
626 opcode == SHADER_OPCODE_COS ||
627 opcode == SHADER_OPCODE_INT_QUOTIENT ||
628 opcode == SHADER_OPCODE_INT_REMAINDER ||
629 opcode == SHADER_OPCODE_POW);
630 }
631
632 bool
633 backend_instruction::is_control_flow() const
634 {
635 switch (opcode) {
636 case BRW_OPCODE_DO:
637 case BRW_OPCODE_WHILE:
638 case BRW_OPCODE_IF:
639 case BRW_OPCODE_ELSE:
640 case BRW_OPCODE_ENDIF:
641 case BRW_OPCODE_BREAK:
642 case BRW_OPCODE_CONTINUE:
643 return true;
644 default:
645 return false;
646 }
647 }
648
649 bool
650 backend_instruction::can_do_source_mods() const
651 {
652 switch (opcode) {
653 case BRW_OPCODE_ADDC:
654 case BRW_OPCODE_BFE:
655 case BRW_OPCODE_BFI1:
656 case BRW_OPCODE_BFI2:
657 case BRW_OPCODE_BFREV:
658 case BRW_OPCODE_CBIT:
659 case BRW_OPCODE_FBH:
660 case BRW_OPCODE_FBL:
661 case BRW_OPCODE_SUBB:
662 return false;
663 default:
664 return true;
665 }
666 }
667
668 bool
669 backend_instruction::can_do_saturate() const
670 {
671 switch (opcode) {
672 case BRW_OPCODE_ADD:
673 case BRW_OPCODE_ASR:
674 case BRW_OPCODE_AVG:
675 case BRW_OPCODE_DP2:
676 case BRW_OPCODE_DP3:
677 case BRW_OPCODE_DP4:
678 case BRW_OPCODE_DPH:
679 case BRW_OPCODE_F16TO32:
680 case BRW_OPCODE_F32TO16:
681 case BRW_OPCODE_LINE:
682 case BRW_OPCODE_LRP:
683 case BRW_OPCODE_MAC:
684 case BRW_OPCODE_MACH:
685 case BRW_OPCODE_MAD:
686 case BRW_OPCODE_MATH:
687 case BRW_OPCODE_MOV:
688 case BRW_OPCODE_MUL:
689 case BRW_OPCODE_PLN:
690 case BRW_OPCODE_RNDD:
691 case BRW_OPCODE_RNDE:
692 case BRW_OPCODE_RNDU:
693 case BRW_OPCODE_RNDZ:
694 case BRW_OPCODE_SEL:
695 case BRW_OPCODE_SHL:
696 case BRW_OPCODE_SHR:
697 case FS_OPCODE_LINTERP:
698 case SHADER_OPCODE_COS:
699 case SHADER_OPCODE_EXP2:
700 case SHADER_OPCODE_LOG2:
701 case SHADER_OPCODE_POW:
702 case SHADER_OPCODE_RCP:
703 case SHADER_OPCODE_RSQ:
704 case SHADER_OPCODE_SIN:
705 case SHADER_OPCODE_SQRT:
706 return true;
707 default:
708 return false;
709 }
710 }
711
712 bool
713 backend_instruction::reads_accumulator_implicitly() const
714 {
715 switch (opcode) {
716 case BRW_OPCODE_MAC:
717 case BRW_OPCODE_MACH:
718 case BRW_OPCODE_SADA2:
719 return true;
720 default:
721 return false;
722 }
723 }
724
725 bool
726 backend_instruction::writes_accumulator_implicitly(struct brw_context *brw) const
727 {
728 return writes_accumulator ||
729 (brw->gen < 6 &&
730 ((opcode >= BRW_OPCODE_ADD && opcode < BRW_OPCODE_NOP) ||
731 (opcode >= FS_OPCODE_DDX_COARSE && opcode <= FS_OPCODE_LINTERP &&
732 opcode != FS_OPCODE_CINTERP)));
733 }
734
735 bool
736 backend_instruction::has_side_effects() const
737 {
738 switch (opcode) {
739 case SHADER_OPCODE_UNTYPED_ATOMIC:
740 case FS_OPCODE_FB_WRITE:
741 return true;
742 default:
743 return false;
744 }
745 }
746
747 #ifndef NDEBUG
748 static bool
749 inst_is_in_block(const bblock_t *block, const backend_instruction *inst)
750 {
751 bool found = false;
752 foreach_inst_in_block (backend_instruction, i, block) {
753 if (inst == i) {
754 found = true;
755 }
756 }
757 return found;
758 }
759 #endif
760
761 static void
762 adjust_later_block_ips(bblock_t *start_block, int ip_adjustment)
763 {
764 for (bblock_t *block_iter = start_block->next();
765 !block_iter->link.is_tail_sentinel();
766 block_iter = block_iter->next()) {
767 block_iter->start_ip += ip_adjustment;
768 block_iter->end_ip += ip_adjustment;
769 }
770 }
771
772 void
773 backend_instruction::insert_after(bblock_t *block, backend_instruction *inst)
774 {
775 assert(inst_is_in_block(block, this) || !"Instruction not in block");
776
777 block->end_ip++;
778
779 adjust_later_block_ips(block, 1);
780
781 exec_node::insert_after(inst);
782 }
783
784 void
785 backend_instruction::insert_before(bblock_t *block, backend_instruction *inst)
786 {
787 assert(inst_is_in_block(block, this) || !"Instruction not in block");
788
789 block->end_ip++;
790
791 adjust_later_block_ips(block, 1);
792
793 exec_node::insert_before(inst);
794 }
795
796 void
797 backend_instruction::insert_before(bblock_t *block, exec_list *list)
798 {
799 assert(inst_is_in_block(block, this) || !"Instruction not in block");
800
801 unsigned num_inst = list->length();
802
803 block->end_ip += num_inst;
804
805 adjust_later_block_ips(block, num_inst);
806
807 exec_node::insert_before(list);
808 }
809
810 void
811 backend_instruction::remove(bblock_t *block)
812 {
813 assert(inst_is_in_block(block, this) || !"Instruction not in block");
814
815 adjust_later_block_ips(block, -1);
816
817 if (block->start_ip == block->end_ip) {
818 block->cfg->remove_block(block);
819 } else {
820 block->end_ip--;
821 }
822
823 exec_node::remove();
824 }
825
826 void
827 backend_visitor::dump_instructions()
828 {
829 dump_instructions(NULL);
830 }
831
832 void
833 backend_visitor::dump_instructions(const char *name)
834 {
835 FILE *file = stderr;
836 if (name && geteuid() != 0) {
837 file = fopen(name, "w");
838 if (!file)
839 file = stderr;
840 }
841
842 int ip = 0;
843 foreach_block_and_inst(block, backend_instruction, inst, cfg) {
844 if (!name)
845 fprintf(stderr, "%d: ", ip++);
846 dump_instruction(inst, file);
847 }
848
849 if (file != stderr) {
850 fclose(file);
851 }
852 }
853
854 void
855 backend_visitor::calculate_cfg()
856 {
857 if (this->cfg)
858 return;
859 cfg = new(mem_ctx) cfg_t(&this->instructions);
860 }
861
862 void
863 backend_visitor::invalidate_cfg()
864 {
865 ralloc_free(this->cfg);
866 this->cfg = NULL;
867 }
868
869 /**
870 * Sets up the starting offsets for the groups of binding table entries
871 * commong to all pipeline stages.
872 *
873 * Unused groups are initialized to 0xd0d0d0d0 to make it obvious that they're
874 * unused but also make sure that addition of small offsets to them will
875 * trigger some of our asserts that surface indices are < BRW_MAX_SURFACES.
876 */
877 void
878 backend_visitor::assign_common_binding_table_offsets(uint32_t next_binding_table_offset)
879 {
880 int num_textures = _mesa_fls(prog->SamplersUsed);
881
882 stage_prog_data->binding_table.texture_start = next_binding_table_offset;
883 next_binding_table_offset += num_textures;
884
885 if (shader) {
886 stage_prog_data->binding_table.ubo_start = next_binding_table_offset;
887 next_binding_table_offset += shader->base.NumUniformBlocks;
888 } else {
889 stage_prog_data->binding_table.ubo_start = 0xd0d0d0d0;
890 }
891
892 if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
893 stage_prog_data->binding_table.shader_time_start = next_binding_table_offset;
894 next_binding_table_offset++;
895 } else {
896 stage_prog_data->binding_table.shader_time_start = 0xd0d0d0d0;
897 }
898
899 if (prog->UsesGather) {
900 if (brw->gen >= 8) {
901 stage_prog_data->binding_table.gather_texture_start =
902 stage_prog_data->binding_table.texture_start;
903 } else {
904 stage_prog_data->binding_table.gather_texture_start = next_binding_table_offset;
905 next_binding_table_offset += num_textures;
906 }
907 } else {
908 stage_prog_data->binding_table.gather_texture_start = 0xd0d0d0d0;
909 }
910
911 if (shader_prog && shader_prog->NumAtomicBuffers) {
912 stage_prog_data->binding_table.abo_start = next_binding_table_offset;
913 next_binding_table_offset += shader_prog->NumAtomicBuffers;
914 } else {
915 stage_prog_data->binding_table.abo_start = 0xd0d0d0d0;
916 }
917
918 /* This may or may not be used depending on how the compile goes. */
919 stage_prog_data->binding_table.pull_constants_start = next_binding_table_offset;
920 next_binding_table_offset++;
921
922 assert(next_binding_table_offset <= BRW_MAX_SURFACES);
923
924 /* prog_data->base.binding_table.size will be set by brw_mark_surface_used. */
925 }