42d6236e6fdb00c9ad612c81e618f5efbaac12f0
[mesa.git] / src / mesa / drivers / dri / i965 / brw_shader.cpp
1 /*
2 * Copyright © 2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "main/macros.h"
25 #include "brw_context.h"
26 #include "brw_vs.h"
27 #include "brw_gs.h"
28 #include "brw_fs.h"
29 #include "brw_cfg.h"
30 #include "brw_nir.h"
31 #include "glsl/ir_optimization.h"
32 #include "glsl/glsl_parser_extras.h"
33 #include "main/shaderapi.h"
34
35 static void
36 shader_debug_log_mesa(void *data, const char *fmt, ...)
37 {
38 struct brw_context *brw = (struct brw_context *)data;
39 va_list args;
40
41 va_start(args, fmt);
42 GLuint msg_id = 0;
43 _mesa_gl_vdebug(&brw->ctx, &msg_id,
44 MESA_DEBUG_SOURCE_SHADER_COMPILER,
45 MESA_DEBUG_TYPE_OTHER,
46 MESA_DEBUG_SEVERITY_NOTIFICATION, fmt, args);
47 va_end(args);
48 }
49
50 static void
51 shader_perf_log_mesa(void *data, const char *fmt, ...)
52 {
53 struct brw_context *brw = (struct brw_context *)data;
54
55 va_list args;
56 va_start(args, fmt);
57
58 if (unlikely(INTEL_DEBUG & DEBUG_PERF)) {
59 va_list args_copy;
60 va_copy(args_copy, args);
61 vfprintf(stderr, fmt, args_copy);
62 va_end(args_copy);
63 }
64
65 if (brw->perf_debug) {
66 GLuint msg_id = 0;
67 _mesa_gl_vdebug(&brw->ctx, &msg_id,
68 MESA_DEBUG_SOURCE_SHADER_COMPILER,
69 MESA_DEBUG_TYPE_PERFORMANCE,
70 MESA_DEBUG_SEVERITY_MEDIUM, fmt, args);
71 }
72 va_end(args);
73 }
74
75 struct brw_compiler *
76 brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo)
77 {
78 struct brw_compiler *compiler = rzalloc(mem_ctx, struct brw_compiler);
79
80 compiler->devinfo = devinfo;
81 compiler->shader_debug_log = shader_debug_log_mesa;
82 compiler->shader_perf_log = shader_perf_log_mesa;
83
84 brw_fs_alloc_reg_sets(compiler);
85 brw_vec4_alloc_reg_set(compiler);
86
87 return compiler;
88 }
89
90 struct gl_shader *
91 brw_new_shader(struct gl_context *ctx, GLuint name, GLuint type)
92 {
93 struct brw_shader *shader;
94
95 shader = rzalloc(NULL, struct brw_shader);
96 if (shader) {
97 shader->base.Type = type;
98 shader->base.Stage = _mesa_shader_enum_to_shader_stage(type);
99 shader->base.Name = name;
100 _mesa_init_shader(ctx, &shader->base);
101 }
102
103 return &shader->base;
104 }
105
106 /**
107 * Performs a compile of the shader stages even when we don't know
108 * what non-orthogonal state will be set, in the hope that it reflects
109 * the eventual NOS used, and thus allows us to produce link failures.
110 */
111 static bool
112 brw_shader_precompile(struct gl_context *ctx,
113 struct gl_shader_program *sh_prog)
114 {
115 struct gl_shader *vs = sh_prog->_LinkedShaders[MESA_SHADER_VERTEX];
116 struct gl_shader *gs = sh_prog->_LinkedShaders[MESA_SHADER_GEOMETRY];
117 struct gl_shader *fs = sh_prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
118 struct gl_shader *cs = sh_prog->_LinkedShaders[MESA_SHADER_COMPUTE];
119
120 if (fs && !brw_fs_precompile(ctx, sh_prog, fs->Program))
121 return false;
122
123 if (gs && !brw_gs_precompile(ctx, sh_prog, gs->Program))
124 return false;
125
126 if (vs && !brw_vs_precompile(ctx, sh_prog, vs->Program))
127 return false;
128
129 if (cs && !brw_cs_precompile(ctx, sh_prog, cs->Program))
130 return false;
131
132 return true;
133 }
134
135 static inline bool
136 is_scalar_shader_stage(struct brw_context *brw, int stage)
137 {
138 switch (stage) {
139 case MESA_SHADER_FRAGMENT:
140 return true;
141 case MESA_SHADER_VERTEX:
142 return brw->scalar_vs;
143 default:
144 return false;
145 }
146 }
147
148 static void
149 brw_lower_packing_builtins(struct brw_context *brw,
150 gl_shader_stage shader_type,
151 exec_list *ir)
152 {
153 int ops = LOWER_PACK_SNORM_2x16
154 | LOWER_UNPACK_SNORM_2x16
155 | LOWER_PACK_UNORM_2x16
156 | LOWER_UNPACK_UNORM_2x16;
157
158 if (is_scalar_shader_stage(brw, shader_type)) {
159 ops |= LOWER_UNPACK_UNORM_4x8
160 | LOWER_UNPACK_SNORM_4x8
161 | LOWER_PACK_UNORM_4x8
162 | LOWER_PACK_SNORM_4x8;
163 }
164
165 if (brw->gen >= 7) {
166 /* Gen7 introduced the f32to16 and f16to32 instructions, which can be
167 * used to execute packHalf2x16 and unpackHalf2x16. For AOS code, no
168 * lowering is needed. For SOA code, the Half2x16 ops must be
169 * scalarized.
170 */
171 if (is_scalar_shader_stage(brw, shader_type)) {
172 ops |= LOWER_PACK_HALF_2x16_TO_SPLIT
173 | LOWER_UNPACK_HALF_2x16_TO_SPLIT;
174 }
175 } else {
176 ops |= LOWER_PACK_HALF_2x16
177 | LOWER_UNPACK_HALF_2x16;
178 }
179
180 lower_packing_builtins(ir, ops);
181 }
182
183 static void
184 process_glsl_ir(struct brw_context *brw,
185 struct gl_shader_program *shader_prog,
186 struct gl_shader *shader)
187 {
188 struct gl_context *ctx = &brw->ctx;
189 const struct gl_shader_compiler_options *options =
190 &ctx->Const.ShaderCompilerOptions[shader->Stage];
191
192 /* Temporary memory context for any new IR. */
193 void *mem_ctx = ralloc_context(NULL);
194
195 ralloc_adopt(mem_ctx, shader->ir);
196
197 /* lower_packing_builtins() inserts arithmetic instructions, so it
198 * must precede lower_instructions().
199 */
200 brw_lower_packing_builtins(brw, shader->Stage, shader->ir);
201 do_mat_op_to_vec(shader->ir);
202 const int bitfield_insert = brw->gen >= 7 ? BITFIELD_INSERT_TO_BFM_BFI : 0;
203 lower_instructions(shader->ir,
204 MOD_TO_FLOOR |
205 DIV_TO_MUL_RCP |
206 SUB_TO_ADD_NEG |
207 EXP_TO_EXP2 |
208 LOG_TO_LOG2 |
209 bitfield_insert |
210 LDEXP_TO_ARITH);
211
212 /* Pre-gen6 HW can only nest if-statements 16 deep. Beyond this,
213 * if-statements need to be flattened.
214 */
215 if (brw->gen < 6)
216 lower_if_to_cond_assign(shader->ir, 16);
217
218 do_lower_texture_projection(shader->ir);
219 brw_lower_texture_gradients(brw, shader->ir);
220 do_vec_index_to_cond_assign(shader->ir);
221 lower_vector_insert(shader->ir, true);
222 if (options->NirOptions == NULL)
223 brw_do_cubemap_normalize(shader->ir);
224 lower_offset_arrays(shader->ir);
225 brw_do_lower_unnormalized_offset(shader->ir);
226 lower_noise(shader->ir);
227 lower_quadop_vector(shader->ir, false);
228
229 bool lowered_variable_indexing =
230 lower_variable_index_to_cond_assign(shader->ir,
231 options->EmitNoIndirectInput,
232 options->EmitNoIndirectOutput,
233 options->EmitNoIndirectTemp,
234 options->EmitNoIndirectUniform);
235
236 if (unlikely(brw->perf_debug && lowered_variable_indexing)) {
237 perf_debug("Unsupported form of variable indexing in FS; falling "
238 "back to very inefficient code generation\n");
239 }
240
241 lower_ubo_reference(shader, shader->ir);
242
243 bool progress;
244 do {
245 progress = false;
246
247 if (is_scalar_shader_stage(brw, shader->Stage)) {
248 brw_do_channel_expressions(shader->ir);
249 brw_do_vector_splitting(shader->ir);
250 }
251
252 progress = do_lower_jumps(shader->ir, true, true,
253 true, /* main return */
254 false, /* continue */
255 false /* loops */
256 ) || progress;
257
258 progress = do_common_optimization(shader->ir, true, true,
259 options, ctx->Const.NativeIntegers) || progress;
260 } while (progress);
261
262 if (options->NirOptions != NULL)
263 lower_output_reads(shader->ir);
264
265 validate_ir_tree(shader->ir);
266
267 /* Now that we've finished altering the linked IR, reparent any live IR back
268 * to the permanent memory context, and free the temporary one (discarding any
269 * junk we optimized away).
270 */
271 reparent_ir(shader->ir, shader->ir);
272 ralloc_free(mem_ctx);
273
274 if (ctx->_Shader->Flags & GLSL_DUMP) {
275 fprintf(stderr, "\n");
276 fprintf(stderr, "GLSL IR for linked %s program %d:\n",
277 _mesa_shader_stage_to_string(shader->Stage),
278 shader_prog->Name);
279 _mesa_print_ir(stderr, shader->ir, NULL);
280 fprintf(stderr, "\n");
281 }
282 }
283
284 GLboolean
285 brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg)
286 {
287 struct brw_context *brw = brw_context(ctx);
288 unsigned int stage;
289
290 for (stage = 0; stage < ARRAY_SIZE(shProg->_LinkedShaders); stage++) {
291 struct gl_shader *shader = shProg->_LinkedShaders[stage];
292 const struct gl_shader_compiler_options *options =
293 &ctx->Const.ShaderCompilerOptions[stage];
294
295 if (!shader)
296 continue;
297
298 struct gl_program *prog =
299 ctx->Driver.NewProgram(ctx, _mesa_shader_stage_to_program(stage),
300 shader->Name);
301 if (!prog)
302 return false;
303 prog->Parameters = _mesa_new_parameter_list();
304
305 _mesa_copy_linked_program_data((gl_shader_stage) stage, shProg, prog);
306
307 process_glsl_ir(brw, shProg, shader);
308
309 /* Make a pass over the IR to add state references for any built-in
310 * uniforms that are used. This has to be done now (during linking).
311 * Code generation doesn't happen until the first time this shader is
312 * used for rendering. Waiting until then to generate the parameters is
313 * too late. At that point, the values for the built-in uniforms won't
314 * get sent to the shader.
315 */
316 foreach_in_list(ir_instruction, node, shader->ir) {
317 ir_variable *var = node->as_variable();
318
319 if ((var == NULL) || (var->data.mode != ir_var_uniform)
320 || (strncmp(var->name, "gl_", 3) != 0))
321 continue;
322
323 const ir_state_slot *const slots = var->get_state_slots();
324 assert(slots != NULL);
325
326 for (unsigned int i = 0; i < var->get_num_state_slots(); i++) {
327 _mesa_add_state_reference(prog->Parameters,
328 (gl_state_index *) slots[i].tokens);
329 }
330 }
331
332 do_set_program_inouts(shader->ir, prog, shader->Stage);
333
334 prog->SamplersUsed = shader->active_samplers;
335 prog->ShadowSamplers = shader->shadow_samplers;
336 _mesa_update_shader_textures_used(shProg, prog);
337
338 _mesa_reference_program(ctx, &shader->Program, prog);
339
340 brw_add_texrect_params(prog);
341
342 if (options->NirOptions)
343 prog->nir = brw_create_nir(brw, shProg, prog, (gl_shader_stage) stage);
344
345 _mesa_reference_program(ctx, &prog, NULL);
346 }
347
348 if ((ctx->_Shader->Flags & GLSL_DUMP) && shProg->Name != 0) {
349 for (unsigned i = 0; i < shProg->NumShaders; i++) {
350 const struct gl_shader *sh = shProg->Shaders[i];
351 if (!sh)
352 continue;
353
354 fprintf(stderr, "GLSL %s shader %d source for linked program %d:\n",
355 _mesa_shader_stage_to_string(sh->Stage),
356 i, shProg->Name);
357 fprintf(stderr, "%s", sh->Source);
358 fprintf(stderr, "\n");
359 }
360 }
361
362 if (brw->precompile && !brw_shader_precompile(ctx, shProg))
363 return false;
364
365 return true;
366 }
367
368
369 enum brw_reg_type
370 brw_type_for_base_type(const struct glsl_type *type)
371 {
372 switch (type->base_type) {
373 case GLSL_TYPE_FLOAT:
374 return BRW_REGISTER_TYPE_F;
375 case GLSL_TYPE_INT:
376 case GLSL_TYPE_BOOL:
377 return BRW_REGISTER_TYPE_D;
378 case GLSL_TYPE_UINT:
379 return BRW_REGISTER_TYPE_UD;
380 case GLSL_TYPE_ARRAY:
381 return brw_type_for_base_type(type->fields.array);
382 case GLSL_TYPE_STRUCT:
383 case GLSL_TYPE_SAMPLER:
384 case GLSL_TYPE_ATOMIC_UINT:
385 /* These should be overridden with the type of the member when
386 * dereferenced into. BRW_REGISTER_TYPE_UD seems like a likely
387 * way to trip up if we don't.
388 */
389 return BRW_REGISTER_TYPE_UD;
390 case GLSL_TYPE_IMAGE:
391 return BRW_REGISTER_TYPE_UD;
392 case GLSL_TYPE_VOID:
393 case GLSL_TYPE_ERROR:
394 case GLSL_TYPE_INTERFACE:
395 case GLSL_TYPE_DOUBLE:
396 unreachable("not reached");
397 }
398
399 return BRW_REGISTER_TYPE_F;
400 }
401
402 enum brw_conditional_mod
403 brw_conditional_for_comparison(unsigned int op)
404 {
405 switch (op) {
406 case ir_binop_less:
407 return BRW_CONDITIONAL_L;
408 case ir_binop_greater:
409 return BRW_CONDITIONAL_G;
410 case ir_binop_lequal:
411 return BRW_CONDITIONAL_LE;
412 case ir_binop_gequal:
413 return BRW_CONDITIONAL_GE;
414 case ir_binop_equal:
415 case ir_binop_all_equal: /* same as equal for scalars */
416 return BRW_CONDITIONAL_Z;
417 case ir_binop_nequal:
418 case ir_binop_any_nequal: /* same as nequal for scalars */
419 return BRW_CONDITIONAL_NZ;
420 default:
421 unreachable("not reached: bad operation for comparison");
422 }
423 }
424
425 uint32_t
426 brw_math_function(enum opcode op)
427 {
428 switch (op) {
429 case SHADER_OPCODE_RCP:
430 return BRW_MATH_FUNCTION_INV;
431 case SHADER_OPCODE_RSQ:
432 return BRW_MATH_FUNCTION_RSQ;
433 case SHADER_OPCODE_SQRT:
434 return BRW_MATH_FUNCTION_SQRT;
435 case SHADER_OPCODE_EXP2:
436 return BRW_MATH_FUNCTION_EXP;
437 case SHADER_OPCODE_LOG2:
438 return BRW_MATH_FUNCTION_LOG;
439 case SHADER_OPCODE_POW:
440 return BRW_MATH_FUNCTION_POW;
441 case SHADER_OPCODE_SIN:
442 return BRW_MATH_FUNCTION_SIN;
443 case SHADER_OPCODE_COS:
444 return BRW_MATH_FUNCTION_COS;
445 case SHADER_OPCODE_INT_QUOTIENT:
446 return BRW_MATH_FUNCTION_INT_DIV_QUOTIENT;
447 case SHADER_OPCODE_INT_REMAINDER:
448 return BRW_MATH_FUNCTION_INT_DIV_REMAINDER;
449 default:
450 unreachable("not reached: unknown math function");
451 }
452 }
453
454 uint32_t
455 brw_texture_offset(int *offsets, unsigned num_components)
456 {
457 if (!offsets) return 0; /* nonconstant offset; caller will handle it. */
458
459 /* Combine all three offsets into a single unsigned dword:
460 *
461 * bits 11:8 - U Offset (X component)
462 * bits 7:4 - V Offset (Y component)
463 * bits 3:0 - R Offset (Z component)
464 */
465 unsigned offset_bits = 0;
466 for (unsigned i = 0; i < num_components; i++) {
467 const unsigned shift = 4 * (2 - i);
468 offset_bits |= (offsets[i] << shift) & (0xF << shift);
469 }
470 return offset_bits;
471 }
472
473 const char *
474 brw_instruction_name(enum opcode op)
475 {
476 switch (op) {
477 case BRW_OPCODE_MOV ... BRW_OPCODE_NOP:
478 assert(opcode_descs[op].name);
479 return opcode_descs[op].name;
480 case FS_OPCODE_FB_WRITE:
481 return "fb_write";
482 case FS_OPCODE_BLORP_FB_WRITE:
483 return "blorp_fb_write";
484 case FS_OPCODE_REP_FB_WRITE:
485 return "rep_fb_write";
486
487 case SHADER_OPCODE_RCP:
488 return "rcp";
489 case SHADER_OPCODE_RSQ:
490 return "rsq";
491 case SHADER_OPCODE_SQRT:
492 return "sqrt";
493 case SHADER_OPCODE_EXP2:
494 return "exp2";
495 case SHADER_OPCODE_LOG2:
496 return "log2";
497 case SHADER_OPCODE_POW:
498 return "pow";
499 case SHADER_OPCODE_INT_QUOTIENT:
500 return "int_quot";
501 case SHADER_OPCODE_INT_REMAINDER:
502 return "int_rem";
503 case SHADER_OPCODE_SIN:
504 return "sin";
505 case SHADER_OPCODE_COS:
506 return "cos";
507
508 case SHADER_OPCODE_TEX:
509 return "tex";
510 case SHADER_OPCODE_TXD:
511 return "txd";
512 case SHADER_OPCODE_TXF:
513 return "txf";
514 case SHADER_OPCODE_TXL:
515 return "txl";
516 case SHADER_OPCODE_TXS:
517 return "txs";
518 case FS_OPCODE_TXB:
519 return "txb";
520 case SHADER_OPCODE_TXF_CMS:
521 return "txf_cms";
522 case SHADER_OPCODE_TXF_UMS:
523 return "txf_ums";
524 case SHADER_OPCODE_TXF_MCS:
525 return "txf_mcs";
526 case SHADER_OPCODE_LOD:
527 return "lod";
528 case SHADER_OPCODE_TG4:
529 return "tg4";
530 case SHADER_OPCODE_TG4_OFFSET:
531 return "tg4_offset";
532 case SHADER_OPCODE_SHADER_TIME_ADD:
533 return "shader_time_add";
534
535 case SHADER_OPCODE_UNTYPED_ATOMIC:
536 return "untyped_atomic";
537 case SHADER_OPCODE_UNTYPED_SURFACE_READ:
538 return "untyped_surface_read";
539 case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
540 return "untyped_surface_write";
541 case SHADER_OPCODE_TYPED_ATOMIC:
542 return "typed_atomic";
543 case SHADER_OPCODE_TYPED_SURFACE_READ:
544 return "typed_surface_read";
545 case SHADER_OPCODE_TYPED_SURFACE_WRITE:
546 return "typed_surface_write";
547 case SHADER_OPCODE_MEMORY_FENCE:
548 return "memory_fence";
549
550 case SHADER_OPCODE_LOAD_PAYLOAD:
551 return "load_payload";
552
553 case SHADER_OPCODE_GEN4_SCRATCH_READ:
554 return "gen4_scratch_read";
555 case SHADER_OPCODE_GEN4_SCRATCH_WRITE:
556 return "gen4_scratch_write";
557 case SHADER_OPCODE_GEN7_SCRATCH_READ:
558 return "gen7_scratch_read";
559 case SHADER_OPCODE_URB_WRITE_SIMD8:
560 return "gen8_urb_write_simd8";
561
562 case SHADER_OPCODE_FIND_LIVE_CHANNEL:
563 return "find_live_channel";
564 case SHADER_OPCODE_BROADCAST:
565 return "broadcast";
566
567 case VEC4_OPCODE_MOV_BYTES:
568 return "mov_bytes";
569 case VEC4_OPCODE_PACK_BYTES:
570 return "pack_bytes";
571 case VEC4_OPCODE_UNPACK_UNIFORM:
572 return "unpack_uniform";
573
574 case FS_OPCODE_DDX_COARSE:
575 return "ddx_coarse";
576 case FS_OPCODE_DDX_FINE:
577 return "ddx_fine";
578 case FS_OPCODE_DDY_COARSE:
579 return "ddy_coarse";
580 case FS_OPCODE_DDY_FINE:
581 return "ddy_fine";
582
583 case FS_OPCODE_CINTERP:
584 return "cinterp";
585 case FS_OPCODE_LINTERP:
586 return "linterp";
587
588 case FS_OPCODE_PIXEL_X:
589 return "pixel_x";
590 case FS_OPCODE_PIXEL_Y:
591 return "pixel_y";
592
593 case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
594 return "uniform_pull_const";
595 case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7:
596 return "uniform_pull_const_gen7";
597 case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD:
598 return "varying_pull_const";
599 case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7:
600 return "varying_pull_const_gen7";
601
602 case FS_OPCODE_MOV_DISPATCH_TO_FLAGS:
603 return "mov_dispatch_to_flags";
604 case FS_OPCODE_DISCARD_JUMP:
605 return "discard_jump";
606
607 case FS_OPCODE_SET_OMASK:
608 return "set_omask";
609 case FS_OPCODE_SET_SAMPLE_ID:
610 return "set_sample_id";
611 case FS_OPCODE_SET_SIMD4X2_OFFSET:
612 return "set_simd4x2_offset";
613
614 case FS_OPCODE_PACK_HALF_2x16_SPLIT:
615 return "pack_half_2x16_split";
616 case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X:
617 return "unpack_half_2x16_split_x";
618 case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y:
619 return "unpack_half_2x16_split_y";
620
621 case FS_OPCODE_PLACEHOLDER_HALT:
622 return "placeholder_halt";
623
624 case FS_OPCODE_INTERPOLATE_AT_CENTROID:
625 return "interp_centroid";
626 case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
627 return "interp_sample";
628 case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:
629 return "interp_shared_offset";
630 case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
631 return "interp_per_slot_offset";
632
633 case VS_OPCODE_URB_WRITE:
634 return "vs_urb_write";
635 case VS_OPCODE_PULL_CONSTANT_LOAD:
636 return "pull_constant_load";
637 case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
638 return "pull_constant_load_gen7";
639
640 case VS_OPCODE_SET_SIMD4X2_HEADER_GEN9:
641 return "set_simd4x2_header_gen9";
642
643 case VS_OPCODE_UNPACK_FLAGS_SIMD4X2:
644 return "unpack_flags_simd4x2";
645
646 case GS_OPCODE_URB_WRITE:
647 return "gs_urb_write";
648 case GS_OPCODE_URB_WRITE_ALLOCATE:
649 return "gs_urb_write_allocate";
650 case GS_OPCODE_THREAD_END:
651 return "gs_thread_end";
652 case GS_OPCODE_SET_WRITE_OFFSET:
653 return "set_write_offset";
654 case GS_OPCODE_SET_VERTEX_COUNT:
655 return "set_vertex_count";
656 case GS_OPCODE_SET_DWORD_2:
657 return "set_dword_2";
658 case GS_OPCODE_PREPARE_CHANNEL_MASKS:
659 return "prepare_channel_masks";
660 case GS_OPCODE_SET_CHANNEL_MASKS:
661 return "set_channel_masks";
662 case GS_OPCODE_GET_INSTANCE_ID:
663 return "get_instance_id";
664 case GS_OPCODE_FF_SYNC:
665 return "ff_sync";
666 case GS_OPCODE_SET_PRIMITIVE_ID:
667 return "set_primitive_id";
668 case GS_OPCODE_SVB_WRITE:
669 return "gs_svb_write";
670 case GS_OPCODE_SVB_SET_DST_INDEX:
671 return "gs_svb_set_dst_index";
672 case GS_OPCODE_FF_SYNC_SET_PRIMITIVES:
673 return "gs_ff_sync_set_primitives";
674 case CS_OPCODE_CS_TERMINATE:
675 return "cs_terminate";
676 case SHADER_OPCODE_BARRIER:
677 return "barrier";
678 }
679
680 unreachable("not reached");
681 }
682
683 bool
684 brw_saturate_immediate(enum brw_reg_type type, struct brw_reg *reg)
685 {
686 union {
687 unsigned ud;
688 int d;
689 float f;
690 } imm = { reg->dw1.ud }, sat_imm = { 0 };
691
692 switch (type) {
693 case BRW_REGISTER_TYPE_UD:
694 case BRW_REGISTER_TYPE_D:
695 case BRW_REGISTER_TYPE_UQ:
696 case BRW_REGISTER_TYPE_Q:
697 /* Nothing to do. */
698 return false;
699 case BRW_REGISTER_TYPE_UW:
700 sat_imm.ud = CLAMP(imm.ud, 0, USHRT_MAX);
701 break;
702 case BRW_REGISTER_TYPE_W:
703 sat_imm.d = CLAMP(imm.d, SHRT_MIN, SHRT_MAX);
704 break;
705 case BRW_REGISTER_TYPE_F:
706 sat_imm.f = CLAMP(imm.f, 0.0f, 1.0f);
707 break;
708 case BRW_REGISTER_TYPE_UB:
709 case BRW_REGISTER_TYPE_B:
710 unreachable("no UB/B immediates");
711 case BRW_REGISTER_TYPE_V:
712 case BRW_REGISTER_TYPE_UV:
713 case BRW_REGISTER_TYPE_VF:
714 unreachable("unimplemented: saturate vector immediate");
715 case BRW_REGISTER_TYPE_DF:
716 case BRW_REGISTER_TYPE_HF:
717 unreachable("unimplemented: saturate DF/HF immediate");
718 }
719
720 if (imm.ud != sat_imm.ud) {
721 reg->dw1.ud = sat_imm.ud;
722 return true;
723 }
724 return false;
725 }
726
727 bool
728 brw_negate_immediate(enum brw_reg_type type, struct brw_reg *reg)
729 {
730 switch (type) {
731 case BRW_REGISTER_TYPE_D:
732 case BRW_REGISTER_TYPE_UD:
733 reg->dw1.d = -reg->dw1.d;
734 return true;
735 case BRW_REGISTER_TYPE_W:
736 case BRW_REGISTER_TYPE_UW:
737 reg->dw1.d = -(int16_t)reg->dw1.ud;
738 return true;
739 case BRW_REGISTER_TYPE_F:
740 reg->dw1.f = -reg->dw1.f;
741 return true;
742 case BRW_REGISTER_TYPE_VF:
743 reg->dw1.ud ^= 0x80808080;
744 return true;
745 case BRW_REGISTER_TYPE_UB:
746 case BRW_REGISTER_TYPE_B:
747 unreachable("no UB/B immediates");
748 case BRW_REGISTER_TYPE_UV:
749 case BRW_REGISTER_TYPE_V:
750 assert(!"unimplemented: negate UV/V immediate");
751 case BRW_REGISTER_TYPE_UQ:
752 case BRW_REGISTER_TYPE_Q:
753 assert(!"unimplemented: negate UQ/Q immediate");
754 case BRW_REGISTER_TYPE_DF:
755 case BRW_REGISTER_TYPE_HF:
756 assert(!"unimplemented: negate DF/HF immediate");
757 }
758
759 return false;
760 }
761
762 bool
763 brw_abs_immediate(enum brw_reg_type type, struct brw_reg *reg)
764 {
765 switch (type) {
766 case BRW_REGISTER_TYPE_D:
767 reg->dw1.d = abs(reg->dw1.d);
768 return true;
769 case BRW_REGISTER_TYPE_W:
770 reg->dw1.d = abs((int16_t)reg->dw1.ud);
771 return true;
772 case BRW_REGISTER_TYPE_F:
773 reg->dw1.f = fabsf(reg->dw1.f);
774 return true;
775 case BRW_REGISTER_TYPE_VF:
776 reg->dw1.ud &= ~0x80808080;
777 return true;
778 case BRW_REGISTER_TYPE_UB:
779 case BRW_REGISTER_TYPE_B:
780 unreachable("no UB/B immediates");
781 case BRW_REGISTER_TYPE_UQ:
782 case BRW_REGISTER_TYPE_UD:
783 case BRW_REGISTER_TYPE_UW:
784 case BRW_REGISTER_TYPE_UV:
785 /* Presumably the absolute value modifier on an unsigned source is a
786 * nop, but it would be nice to confirm.
787 */
788 assert(!"unimplemented: abs unsigned immediate");
789 case BRW_REGISTER_TYPE_V:
790 assert(!"unimplemented: abs V immediate");
791 case BRW_REGISTER_TYPE_Q:
792 assert(!"unimplemented: abs Q immediate");
793 case BRW_REGISTER_TYPE_DF:
794 case BRW_REGISTER_TYPE_HF:
795 assert(!"unimplemented: abs DF/HF immediate");
796 }
797
798 return false;
799 }
800
801 backend_shader::backend_shader(struct brw_context *brw,
802 void *mem_ctx,
803 struct gl_shader_program *shader_prog,
804 struct gl_program *prog,
805 struct brw_stage_prog_data *stage_prog_data,
806 gl_shader_stage stage)
807 : brw(brw),
808 devinfo(brw->intelScreen->devinfo),
809 ctx(&brw->ctx),
810 shader(shader_prog ?
811 (struct brw_shader *)shader_prog->_LinkedShaders[stage] : NULL),
812 shader_prog(shader_prog),
813 prog(prog),
814 stage_prog_data(stage_prog_data),
815 mem_ctx(mem_ctx),
816 cfg(NULL),
817 stage(stage)
818 {
819 debug_enabled = INTEL_DEBUG & intel_debug_flag_for_shader_stage(stage);
820 stage_name = _mesa_shader_stage_to_string(stage);
821 stage_abbrev = _mesa_shader_stage_to_abbrev(stage);
822 }
823
824 bool
825 backend_reg::is_zero() const
826 {
827 if (file != IMM)
828 return false;
829
830 return fixed_hw_reg.dw1.d == 0;
831 }
832
833 bool
834 backend_reg::is_one() const
835 {
836 if (file != IMM)
837 return false;
838
839 return type == BRW_REGISTER_TYPE_F
840 ? fixed_hw_reg.dw1.f == 1.0
841 : fixed_hw_reg.dw1.d == 1;
842 }
843
844 bool
845 backend_reg::is_negative_one() const
846 {
847 if (file != IMM)
848 return false;
849
850 switch (type) {
851 case BRW_REGISTER_TYPE_F:
852 return fixed_hw_reg.dw1.f == -1.0;
853 case BRW_REGISTER_TYPE_D:
854 return fixed_hw_reg.dw1.d == -1;
855 default:
856 return false;
857 }
858 }
859
860 bool
861 backend_reg::is_null() const
862 {
863 return file == HW_REG &&
864 fixed_hw_reg.file == BRW_ARCHITECTURE_REGISTER_FILE &&
865 fixed_hw_reg.nr == BRW_ARF_NULL;
866 }
867
868
869 bool
870 backend_reg::is_accumulator() const
871 {
872 return file == HW_REG &&
873 fixed_hw_reg.file == BRW_ARCHITECTURE_REGISTER_FILE &&
874 fixed_hw_reg.nr == BRW_ARF_ACCUMULATOR;
875 }
876
877 bool
878 backend_reg::in_range(const backend_reg &r, unsigned n) const
879 {
880 return (file == r.file &&
881 reg == r.reg &&
882 reg_offset >= r.reg_offset &&
883 reg_offset < r.reg_offset + n);
884 }
885
886 bool
887 backend_instruction::is_commutative() const
888 {
889 switch (opcode) {
890 case BRW_OPCODE_AND:
891 case BRW_OPCODE_OR:
892 case BRW_OPCODE_XOR:
893 case BRW_OPCODE_ADD:
894 case BRW_OPCODE_MUL:
895 return true;
896 case BRW_OPCODE_SEL:
897 /* MIN and MAX are commutative. */
898 if (conditional_mod == BRW_CONDITIONAL_GE ||
899 conditional_mod == BRW_CONDITIONAL_L) {
900 return true;
901 }
902 /* fallthrough */
903 default:
904 return false;
905 }
906 }
907
908 bool
909 backend_instruction::is_3src() const
910 {
911 return opcode < ARRAY_SIZE(opcode_descs) && opcode_descs[opcode].nsrc == 3;
912 }
913
914 bool
915 backend_instruction::is_tex() const
916 {
917 return (opcode == SHADER_OPCODE_TEX ||
918 opcode == FS_OPCODE_TXB ||
919 opcode == SHADER_OPCODE_TXD ||
920 opcode == SHADER_OPCODE_TXF ||
921 opcode == SHADER_OPCODE_TXF_CMS ||
922 opcode == SHADER_OPCODE_TXF_UMS ||
923 opcode == SHADER_OPCODE_TXF_MCS ||
924 opcode == SHADER_OPCODE_TXL ||
925 opcode == SHADER_OPCODE_TXS ||
926 opcode == SHADER_OPCODE_LOD ||
927 opcode == SHADER_OPCODE_TG4 ||
928 opcode == SHADER_OPCODE_TG4_OFFSET);
929 }
930
931 bool
932 backend_instruction::is_math() const
933 {
934 return (opcode == SHADER_OPCODE_RCP ||
935 opcode == SHADER_OPCODE_RSQ ||
936 opcode == SHADER_OPCODE_SQRT ||
937 opcode == SHADER_OPCODE_EXP2 ||
938 opcode == SHADER_OPCODE_LOG2 ||
939 opcode == SHADER_OPCODE_SIN ||
940 opcode == SHADER_OPCODE_COS ||
941 opcode == SHADER_OPCODE_INT_QUOTIENT ||
942 opcode == SHADER_OPCODE_INT_REMAINDER ||
943 opcode == SHADER_OPCODE_POW);
944 }
945
946 bool
947 backend_instruction::is_control_flow() const
948 {
949 switch (opcode) {
950 case BRW_OPCODE_DO:
951 case BRW_OPCODE_WHILE:
952 case BRW_OPCODE_IF:
953 case BRW_OPCODE_ELSE:
954 case BRW_OPCODE_ENDIF:
955 case BRW_OPCODE_BREAK:
956 case BRW_OPCODE_CONTINUE:
957 return true;
958 default:
959 return false;
960 }
961 }
962
963 bool
964 backend_instruction::can_do_source_mods() const
965 {
966 switch (opcode) {
967 case BRW_OPCODE_ADDC:
968 case BRW_OPCODE_BFE:
969 case BRW_OPCODE_BFI1:
970 case BRW_OPCODE_BFI2:
971 case BRW_OPCODE_BFREV:
972 case BRW_OPCODE_CBIT:
973 case BRW_OPCODE_FBH:
974 case BRW_OPCODE_FBL:
975 case BRW_OPCODE_SUBB:
976 return false;
977 default:
978 return true;
979 }
980 }
981
982 bool
983 backend_instruction::can_do_saturate() const
984 {
985 switch (opcode) {
986 case BRW_OPCODE_ADD:
987 case BRW_OPCODE_ASR:
988 case BRW_OPCODE_AVG:
989 case BRW_OPCODE_DP2:
990 case BRW_OPCODE_DP3:
991 case BRW_OPCODE_DP4:
992 case BRW_OPCODE_DPH:
993 case BRW_OPCODE_F16TO32:
994 case BRW_OPCODE_F32TO16:
995 case BRW_OPCODE_LINE:
996 case BRW_OPCODE_LRP:
997 case BRW_OPCODE_MAC:
998 case BRW_OPCODE_MAD:
999 case BRW_OPCODE_MATH:
1000 case BRW_OPCODE_MOV:
1001 case BRW_OPCODE_MUL:
1002 case BRW_OPCODE_PLN:
1003 case BRW_OPCODE_RNDD:
1004 case BRW_OPCODE_RNDE:
1005 case BRW_OPCODE_RNDU:
1006 case BRW_OPCODE_RNDZ:
1007 case BRW_OPCODE_SEL:
1008 case BRW_OPCODE_SHL:
1009 case BRW_OPCODE_SHR:
1010 case FS_OPCODE_LINTERP:
1011 case SHADER_OPCODE_COS:
1012 case SHADER_OPCODE_EXP2:
1013 case SHADER_OPCODE_LOG2:
1014 case SHADER_OPCODE_POW:
1015 case SHADER_OPCODE_RCP:
1016 case SHADER_OPCODE_RSQ:
1017 case SHADER_OPCODE_SIN:
1018 case SHADER_OPCODE_SQRT:
1019 return true;
1020 default:
1021 return false;
1022 }
1023 }
1024
1025 bool
1026 backend_instruction::can_do_cmod() const
1027 {
1028 switch (opcode) {
1029 case BRW_OPCODE_ADD:
1030 case BRW_OPCODE_ADDC:
1031 case BRW_OPCODE_AND:
1032 case BRW_OPCODE_ASR:
1033 case BRW_OPCODE_AVG:
1034 case BRW_OPCODE_CMP:
1035 case BRW_OPCODE_CMPN:
1036 case BRW_OPCODE_DP2:
1037 case BRW_OPCODE_DP3:
1038 case BRW_OPCODE_DP4:
1039 case BRW_OPCODE_DPH:
1040 case BRW_OPCODE_F16TO32:
1041 case BRW_OPCODE_F32TO16:
1042 case BRW_OPCODE_FRC:
1043 case BRW_OPCODE_LINE:
1044 case BRW_OPCODE_LRP:
1045 case BRW_OPCODE_LZD:
1046 case BRW_OPCODE_MAC:
1047 case BRW_OPCODE_MACH:
1048 case BRW_OPCODE_MAD:
1049 case BRW_OPCODE_MOV:
1050 case BRW_OPCODE_MUL:
1051 case BRW_OPCODE_NOT:
1052 case BRW_OPCODE_OR:
1053 case BRW_OPCODE_PLN:
1054 case BRW_OPCODE_RNDD:
1055 case BRW_OPCODE_RNDE:
1056 case BRW_OPCODE_RNDU:
1057 case BRW_OPCODE_RNDZ:
1058 case BRW_OPCODE_SAD2:
1059 case BRW_OPCODE_SADA2:
1060 case BRW_OPCODE_SHL:
1061 case BRW_OPCODE_SHR:
1062 case BRW_OPCODE_SUBB:
1063 case BRW_OPCODE_XOR:
1064 case FS_OPCODE_CINTERP:
1065 case FS_OPCODE_LINTERP:
1066 return true;
1067 default:
1068 return false;
1069 }
1070 }
1071
1072 bool
1073 backend_instruction::reads_accumulator_implicitly() const
1074 {
1075 switch (opcode) {
1076 case BRW_OPCODE_MAC:
1077 case BRW_OPCODE_MACH:
1078 case BRW_OPCODE_SADA2:
1079 return true;
1080 default:
1081 return false;
1082 }
1083 }
1084
1085 bool
1086 backend_instruction::writes_accumulator_implicitly(const struct brw_device_info *devinfo) const
1087 {
1088 return writes_accumulator ||
1089 (devinfo->gen < 6 &&
1090 ((opcode >= BRW_OPCODE_ADD && opcode < BRW_OPCODE_NOP) ||
1091 (opcode >= FS_OPCODE_DDX_COARSE && opcode <= FS_OPCODE_LINTERP &&
1092 opcode != FS_OPCODE_CINTERP)));
1093 }
1094
1095 bool
1096 backend_instruction::has_side_effects() const
1097 {
1098 switch (opcode) {
1099 case SHADER_OPCODE_UNTYPED_ATOMIC:
1100 case SHADER_OPCODE_GEN4_SCRATCH_WRITE:
1101 case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
1102 case SHADER_OPCODE_TYPED_ATOMIC:
1103 case SHADER_OPCODE_TYPED_SURFACE_WRITE:
1104 case SHADER_OPCODE_MEMORY_FENCE:
1105 case SHADER_OPCODE_URB_WRITE_SIMD8:
1106 case FS_OPCODE_FB_WRITE:
1107 case SHADER_OPCODE_BARRIER:
1108 return true;
1109 default:
1110 return false;
1111 }
1112 }
1113
1114 #ifndef NDEBUG
1115 static bool
1116 inst_is_in_block(const bblock_t *block, const backend_instruction *inst)
1117 {
1118 bool found = false;
1119 foreach_inst_in_block (backend_instruction, i, block) {
1120 if (inst == i) {
1121 found = true;
1122 }
1123 }
1124 return found;
1125 }
1126 #endif
1127
1128 static void
1129 adjust_later_block_ips(bblock_t *start_block, int ip_adjustment)
1130 {
1131 for (bblock_t *block_iter = start_block->next();
1132 !block_iter->link.is_tail_sentinel();
1133 block_iter = block_iter->next()) {
1134 block_iter->start_ip += ip_adjustment;
1135 block_iter->end_ip += ip_adjustment;
1136 }
1137 }
1138
1139 void
1140 backend_instruction::insert_after(bblock_t *block, backend_instruction *inst)
1141 {
1142 if (!this->is_head_sentinel())
1143 assert(inst_is_in_block(block, this) || !"Instruction not in block");
1144
1145 block->end_ip++;
1146
1147 adjust_later_block_ips(block, 1);
1148
1149 exec_node::insert_after(inst);
1150 }
1151
1152 void
1153 backend_instruction::insert_before(bblock_t *block, backend_instruction *inst)
1154 {
1155 if (!this->is_tail_sentinel())
1156 assert(inst_is_in_block(block, this) || !"Instruction not in block");
1157
1158 block->end_ip++;
1159
1160 adjust_later_block_ips(block, 1);
1161
1162 exec_node::insert_before(inst);
1163 }
1164
1165 void
1166 backend_instruction::insert_before(bblock_t *block, exec_list *list)
1167 {
1168 assert(inst_is_in_block(block, this) || !"Instruction not in block");
1169
1170 unsigned num_inst = list->length();
1171
1172 block->end_ip += num_inst;
1173
1174 adjust_later_block_ips(block, num_inst);
1175
1176 exec_node::insert_before(list);
1177 }
1178
1179 void
1180 backend_instruction::remove(bblock_t *block)
1181 {
1182 assert(inst_is_in_block(block, this) || !"Instruction not in block");
1183
1184 adjust_later_block_ips(block, -1);
1185
1186 if (block->start_ip == block->end_ip) {
1187 block->cfg->remove_block(block);
1188 } else {
1189 block->end_ip--;
1190 }
1191
1192 exec_node::remove();
1193 }
1194
1195 void
1196 backend_shader::dump_instructions()
1197 {
1198 dump_instructions(NULL);
1199 }
1200
1201 void
1202 backend_shader::dump_instructions(const char *name)
1203 {
1204 FILE *file = stderr;
1205 if (name && geteuid() != 0) {
1206 file = fopen(name, "w");
1207 if (!file)
1208 file = stderr;
1209 }
1210
1211 if (cfg) {
1212 int ip = 0;
1213 foreach_block_and_inst(block, backend_instruction, inst, cfg) {
1214 fprintf(file, "%4d: ", ip++);
1215 dump_instruction(inst, file);
1216 }
1217 } else {
1218 int ip = 0;
1219 foreach_in_list(backend_instruction, inst, &instructions) {
1220 fprintf(file, "%4d: ", ip++);
1221 dump_instruction(inst, file);
1222 }
1223 }
1224
1225 if (file != stderr) {
1226 fclose(file);
1227 }
1228 }
1229
1230 void
1231 backend_shader::calculate_cfg()
1232 {
1233 if (this->cfg)
1234 return;
1235 cfg = new(mem_ctx) cfg_t(&this->instructions);
1236 }
1237
1238 void
1239 backend_shader::invalidate_cfg()
1240 {
1241 ralloc_free(this->cfg);
1242 this->cfg = NULL;
1243 }
1244
1245 /**
1246 * Sets up the starting offsets for the groups of binding table entries
1247 * commong to all pipeline stages.
1248 *
1249 * Unused groups are initialized to 0xd0d0d0d0 to make it obvious that they're
1250 * unused but also make sure that addition of small offsets to them will
1251 * trigger some of our asserts that surface indices are < BRW_MAX_SURFACES.
1252 */
1253 void
1254 backend_shader::assign_common_binding_table_offsets(uint32_t next_binding_table_offset)
1255 {
1256 int num_textures = _mesa_fls(prog->SamplersUsed);
1257
1258 stage_prog_data->binding_table.texture_start = next_binding_table_offset;
1259 next_binding_table_offset += num_textures;
1260
1261 if (shader) {
1262 stage_prog_data->binding_table.ubo_start = next_binding_table_offset;
1263 next_binding_table_offset += shader->base.NumUniformBlocks;
1264 } else {
1265 stage_prog_data->binding_table.ubo_start = 0xd0d0d0d0;
1266 }
1267
1268 if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
1269 stage_prog_data->binding_table.shader_time_start = next_binding_table_offset;
1270 next_binding_table_offset++;
1271 } else {
1272 stage_prog_data->binding_table.shader_time_start = 0xd0d0d0d0;
1273 }
1274
1275 if (prog->UsesGather) {
1276 if (devinfo->gen >= 8) {
1277 stage_prog_data->binding_table.gather_texture_start =
1278 stage_prog_data->binding_table.texture_start;
1279 } else {
1280 stage_prog_data->binding_table.gather_texture_start = next_binding_table_offset;
1281 next_binding_table_offset += num_textures;
1282 }
1283 } else {
1284 stage_prog_data->binding_table.gather_texture_start = 0xd0d0d0d0;
1285 }
1286
1287 if (shader_prog && shader_prog->NumAtomicBuffers) {
1288 stage_prog_data->binding_table.abo_start = next_binding_table_offset;
1289 next_binding_table_offset += shader_prog->NumAtomicBuffers;
1290 } else {
1291 stage_prog_data->binding_table.abo_start = 0xd0d0d0d0;
1292 }
1293
1294 if (shader && shader->base.NumImages) {
1295 stage_prog_data->binding_table.image_start = next_binding_table_offset;
1296 next_binding_table_offset += shader->base.NumImages;
1297 } else {
1298 stage_prog_data->binding_table.image_start = 0xd0d0d0d0;
1299 }
1300
1301 /* This may or may not be used depending on how the compile goes. */
1302 stage_prog_data->binding_table.pull_constants_start = next_binding_table_offset;
1303 next_binding_table_offset++;
1304
1305 assert(next_binding_table_offset <= BRW_MAX_SURFACES);
1306
1307 /* prog_data->base.binding_table.size will be set by brw_mark_surface_used. */
1308 }