i965/cs: Add CS_OPCODE_CS_TERMINATE
[mesa.git] / src / mesa / drivers / dri / i965 / brw_shader.cpp
1 /*
2 * Copyright © 2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "main/macros.h"
25 #include "brw_context.h"
26 #include "brw_vs.h"
27 #include "brw_gs.h"
28 #include "brw_fs.h"
29 #include "brw_cfg.h"
30 #include "brw_nir.h"
31 #include "glsl/ir_optimization.h"
32 #include "glsl/glsl_parser_extras.h"
33 #include "main/shaderapi.h"
34
35 struct brw_compiler *
36 brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo)
37 {
38 struct brw_compiler *compiler = rzalloc(mem_ctx, struct brw_compiler);
39
40 compiler->devinfo = devinfo;
41
42 brw_fs_alloc_reg_sets(compiler);
43 brw_vec4_alloc_reg_set(compiler);
44
45 return compiler;
46 }
47
48 struct gl_shader *
49 brw_new_shader(struct gl_context *ctx, GLuint name, GLuint type)
50 {
51 struct brw_shader *shader;
52
53 shader = rzalloc(NULL, struct brw_shader);
54 if (shader) {
55 shader->base.Type = type;
56 shader->base.Stage = _mesa_shader_enum_to_shader_stage(type);
57 shader->base.Name = name;
58 _mesa_init_shader(ctx, &shader->base);
59 }
60
61 return &shader->base;
62 }
63
64 /**
65 * Performs a compile of the shader stages even when we don't know
66 * what non-orthogonal state will be set, in the hope that it reflects
67 * the eventual NOS used, and thus allows us to produce link failures.
68 */
69 static bool
70 brw_shader_precompile(struct gl_context *ctx,
71 struct gl_shader_program *sh_prog)
72 {
73 struct gl_shader *vs = sh_prog->_LinkedShaders[MESA_SHADER_VERTEX];
74 struct gl_shader *gs = sh_prog->_LinkedShaders[MESA_SHADER_GEOMETRY];
75 struct gl_shader *fs = sh_prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
76
77 if (fs && !brw_fs_precompile(ctx, sh_prog, fs->Program))
78 return false;
79
80 if (gs && !brw_gs_precompile(ctx, sh_prog, gs->Program))
81 return false;
82
83 if (vs && !brw_vs_precompile(ctx, sh_prog, vs->Program))
84 return false;
85
86 return true;
87 }
88
89 static inline bool
90 is_scalar_shader_stage(struct brw_context *brw, int stage)
91 {
92 switch (stage) {
93 case MESA_SHADER_FRAGMENT:
94 return true;
95 case MESA_SHADER_VERTEX:
96 return brw->scalar_vs;
97 default:
98 return false;
99 }
100 }
101
102 static void
103 brw_lower_packing_builtins(struct brw_context *brw,
104 gl_shader_stage shader_type,
105 exec_list *ir)
106 {
107 int ops = LOWER_PACK_SNORM_2x16
108 | LOWER_UNPACK_SNORM_2x16
109 | LOWER_PACK_UNORM_2x16
110 | LOWER_UNPACK_UNORM_2x16;
111
112 if (is_scalar_shader_stage(brw, shader_type)) {
113 ops |= LOWER_UNPACK_UNORM_4x8
114 | LOWER_UNPACK_SNORM_4x8
115 | LOWER_PACK_UNORM_4x8
116 | LOWER_PACK_SNORM_4x8;
117 }
118
119 if (brw->gen >= 7) {
120 /* Gen7 introduced the f32to16 and f16to32 instructions, which can be
121 * used to execute packHalf2x16 and unpackHalf2x16. For AOS code, no
122 * lowering is needed. For SOA code, the Half2x16 ops must be
123 * scalarized.
124 */
125 if (is_scalar_shader_stage(brw, shader_type)) {
126 ops |= LOWER_PACK_HALF_2x16_TO_SPLIT
127 | LOWER_UNPACK_HALF_2x16_TO_SPLIT;
128 }
129 } else {
130 ops |= LOWER_PACK_HALF_2x16
131 | LOWER_UNPACK_HALF_2x16;
132 }
133
134 lower_packing_builtins(ir, ops);
135 }
136
137 static void
138 process_glsl_ir(struct brw_context *brw,
139 struct gl_shader_program *shader_prog,
140 struct gl_shader *shader)
141 {
142 struct gl_context *ctx = &brw->ctx;
143 const struct gl_shader_compiler_options *options =
144 &ctx->Const.ShaderCompilerOptions[shader->Stage];
145
146 /* Temporary memory context for any new IR. */
147 void *mem_ctx = ralloc_context(NULL);
148
149 ralloc_adopt(mem_ctx, shader->ir);
150
151 /* lower_packing_builtins() inserts arithmetic instructions, so it
152 * must precede lower_instructions().
153 */
154 brw_lower_packing_builtins(brw, shader->Stage, shader->ir);
155 do_mat_op_to_vec(shader->ir);
156 const int bitfield_insert = brw->gen >= 7 ? BITFIELD_INSERT_TO_BFM_BFI : 0;
157 lower_instructions(shader->ir,
158 MOD_TO_FLOOR |
159 DIV_TO_MUL_RCP |
160 SUB_TO_ADD_NEG |
161 EXP_TO_EXP2 |
162 LOG_TO_LOG2 |
163 bitfield_insert |
164 LDEXP_TO_ARITH);
165
166 /* Pre-gen6 HW can only nest if-statements 16 deep. Beyond this,
167 * if-statements need to be flattened.
168 */
169 if (brw->gen < 6)
170 lower_if_to_cond_assign(shader->ir, 16);
171
172 do_lower_texture_projection(shader->ir);
173 brw_lower_texture_gradients(brw, shader->ir);
174 do_vec_index_to_cond_assign(shader->ir);
175 lower_vector_insert(shader->ir, true);
176 if (options->NirOptions == NULL)
177 brw_do_cubemap_normalize(shader->ir);
178 lower_offset_arrays(shader->ir);
179 brw_do_lower_unnormalized_offset(shader->ir);
180 lower_noise(shader->ir);
181 lower_quadop_vector(shader->ir, false);
182
183 bool lowered_variable_indexing =
184 lower_variable_index_to_cond_assign(shader->ir,
185 options->EmitNoIndirectInput,
186 options->EmitNoIndirectOutput,
187 options->EmitNoIndirectTemp,
188 options->EmitNoIndirectUniform);
189
190 if (unlikely(brw->perf_debug && lowered_variable_indexing)) {
191 perf_debug("Unsupported form of variable indexing in FS; falling "
192 "back to very inefficient code generation\n");
193 }
194
195 lower_ubo_reference(shader, shader->ir);
196
197 bool progress;
198 do {
199 progress = false;
200
201 if (is_scalar_shader_stage(brw, shader->Stage)) {
202 brw_do_channel_expressions(shader->ir);
203 brw_do_vector_splitting(shader->ir);
204 }
205
206 progress = do_lower_jumps(shader->ir, true, true,
207 true, /* main return */
208 false, /* continue */
209 false /* loops */
210 ) || progress;
211
212 progress = do_common_optimization(shader->ir, true, true,
213 options, ctx->Const.NativeIntegers) || progress;
214 } while (progress);
215
216 if (options->NirOptions != NULL)
217 lower_output_reads(shader->ir);
218
219 validate_ir_tree(shader->ir);
220
221 /* Now that we've finished altering the linked IR, reparent any live IR back
222 * to the permanent memory context, and free the temporary one (discarding any
223 * junk we optimized away).
224 */
225 reparent_ir(shader->ir, shader->ir);
226 ralloc_free(mem_ctx);
227
228 if (ctx->_Shader->Flags & GLSL_DUMP) {
229 fprintf(stderr, "\n");
230 fprintf(stderr, "GLSL IR for linked %s program %d:\n",
231 _mesa_shader_stage_to_string(shader->Stage),
232 shader_prog->Name);
233 _mesa_print_ir(stderr, shader->ir, NULL);
234 fprintf(stderr, "\n");
235 }
236 }
237
238 GLboolean
239 brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg)
240 {
241 struct brw_context *brw = brw_context(ctx);
242 unsigned int stage;
243
244 for (stage = 0; stage < ARRAY_SIZE(shProg->_LinkedShaders); stage++) {
245 struct gl_shader *shader = shProg->_LinkedShaders[stage];
246 const struct gl_shader_compiler_options *options =
247 &ctx->Const.ShaderCompilerOptions[stage];
248
249 if (!shader)
250 continue;
251
252 struct gl_program *prog =
253 ctx->Driver.NewProgram(ctx, _mesa_shader_stage_to_program(stage),
254 shader->Name);
255 if (!prog)
256 return false;
257 prog->Parameters = _mesa_new_parameter_list();
258
259 _mesa_copy_linked_program_data((gl_shader_stage) stage, shProg, prog);
260
261 process_glsl_ir(brw, shProg, shader);
262
263 /* Make a pass over the IR to add state references for any built-in
264 * uniforms that are used. This has to be done now (during linking).
265 * Code generation doesn't happen until the first time this shader is
266 * used for rendering. Waiting until then to generate the parameters is
267 * too late. At that point, the values for the built-in uniforms won't
268 * get sent to the shader.
269 */
270 foreach_in_list(ir_instruction, node, shader->ir) {
271 ir_variable *var = node->as_variable();
272
273 if ((var == NULL) || (var->data.mode != ir_var_uniform)
274 || (strncmp(var->name, "gl_", 3) != 0))
275 continue;
276
277 const ir_state_slot *const slots = var->get_state_slots();
278 assert(slots != NULL);
279
280 for (unsigned int i = 0; i < var->get_num_state_slots(); i++) {
281 _mesa_add_state_reference(prog->Parameters,
282 (gl_state_index *) slots[i].tokens);
283 }
284 }
285
286 do_set_program_inouts(shader->ir, prog, shader->Stage);
287
288 prog->SamplersUsed = shader->active_samplers;
289 prog->ShadowSamplers = shader->shadow_samplers;
290 _mesa_update_shader_textures_used(shProg, prog);
291
292 _mesa_reference_program(ctx, &shader->Program, prog);
293
294 brw_add_texrect_params(prog);
295
296 if (options->NirOptions)
297 prog->nir = brw_create_nir(brw, shProg, prog, (gl_shader_stage) stage);
298
299 _mesa_reference_program(ctx, &prog, NULL);
300 }
301
302 if ((ctx->_Shader->Flags & GLSL_DUMP) && shProg->Name != 0) {
303 for (unsigned i = 0; i < shProg->NumShaders; i++) {
304 const struct gl_shader *sh = shProg->Shaders[i];
305 if (!sh)
306 continue;
307
308 fprintf(stderr, "GLSL %s shader %d source for linked program %d:\n",
309 _mesa_shader_stage_to_string(sh->Stage),
310 i, shProg->Name);
311 fprintf(stderr, "%s", sh->Source);
312 fprintf(stderr, "\n");
313 }
314 }
315
316 if (brw->precompile && !brw_shader_precompile(ctx, shProg))
317 return false;
318
319 return true;
320 }
321
322
323 enum brw_reg_type
324 brw_type_for_base_type(const struct glsl_type *type)
325 {
326 switch (type->base_type) {
327 case GLSL_TYPE_FLOAT:
328 return BRW_REGISTER_TYPE_F;
329 case GLSL_TYPE_INT:
330 case GLSL_TYPE_BOOL:
331 return BRW_REGISTER_TYPE_D;
332 case GLSL_TYPE_UINT:
333 return BRW_REGISTER_TYPE_UD;
334 case GLSL_TYPE_ARRAY:
335 return brw_type_for_base_type(type->fields.array);
336 case GLSL_TYPE_STRUCT:
337 case GLSL_TYPE_SAMPLER:
338 case GLSL_TYPE_ATOMIC_UINT:
339 /* These should be overridden with the type of the member when
340 * dereferenced into. BRW_REGISTER_TYPE_UD seems like a likely
341 * way to trip up if we don't.
342 */
343 return BRW_REGISTER_TYPE_UD;
344 case GLSL_TYPE_IMAGE:
345 return BRW_REGISTER_TYPE_UD;
346 case GLSL_TYPE_VOID:
347 case GLSL_TYPE_ERROR:
348 case GLSL_TYPE_INTERFACE:
349 case GLSL_TYPE_DOUBLE:
350 unreachable("not reached");
351 }
352
353 return BRW_REGISTER_TYPE_F;
354 }
355
356 enum brw_conditional_mod
357 brw_conditional_for_comparison(unsigned int op)
358 {
359 switch (op) {
360 case ir_binop_less:
361 return BRW_CONDITIONAL_L;
362 case ir_binop_greater:
363 return BRW_CONDITIONAL_G;
364 case ir_binop_lequal:
365 return BRW_CONDITIONAL_LE;
366 case ir_binop_gequal:
367 return BRW_CONDITIONAL_GE;
368 case ir_binop_equal:
369 case ir_binop_all_equal: /* same as equal for scalars */
370 return BRW_CONDITIONAL_Z;
371 case ir_binop_nequal:
372 case ir_binop_any_nequal: /* same as nequal for scalars */
373 return BRW_CONDITIONAL_NZ;
374 default:
375 unreachable("not reached: bad operation for comparison");
376 }
377 }
378
379 uint32_t
380 brw_math_function(enum opcode op)
381 {
382 switch (op) {
383 case SHADER_OPCODE_RCP:
384 return BRW_MATH_FUNCTION_INV;
385 case SHADER_OPCODE_RSQ:
386 return BRW_MATH_FUNCTION_RSQ;
387 case SHADER_OPCODE_SQRT:
388 return BRW_MATH_FUNCTION_SQRT;
389 case SHADER_OPCODE_EXP2:
390 return BRW_MATH_FUNCTION_EXP;
391 case SHADER_OPCODE_LOG2:
392 return BRW_MATH_FUNCTION_LOG;
393 case SHADER_OPCODE_POW:
394 return BRW_MATH_FUNCTION_POW;
395 case SHADER_OPCODE_SIN:
396 return BRW_MATH_FUNCTION_SIN;
397 case SHADER_OPCODE_COS:
398 return BRW_MATH_FUNCTION_COS;
399 case SHADER_OPCODE_INT_QUOTIENT:
400 return BRW_MATH_FUNCTION_INT_DIV_QUOTIENT;
401 case SHADER_OPCODE_INT_REMAINDER:
402 return BRW_MATH_FUNCTION_INT_DIV_REMAINDER;
403 default:
404 unreachable("not reached: unknown math function");
405 }
406 }
407
408 uint32_t
409 brw_texture_offset(int *offsets, unsigned num_components)
410 {
411 if (!offsets) return 0; /* nonconstant offset; caller will handle it. */
412
413 /* Combine all three offsets into a single unsigned dword:
414 *
415 * bits 11:8 - U Offset (X component)
416 * bits 7:4 - V Offset (Y component)
417 * bits 3:0 - R Offset (Z component)
418 */
419 unsigned offset_bits = 0;
420 for (unsigned i = 0; i < num_components; i++) {
421 const unsigned shift = 4 * (2 - i);
422 offset_bits |= (offsets[i] << shift) & (0xF << shift);
423 }
424 return offset_bits;
425 }
426
427 const char *
428 brw_instruction_name(enum opcode op)
429 {
430 switch (op) {
431 case BRW_OPCODE_MOV ... BRW_OPCODE_NOP:
432 assert(opcode_descs[op].name);
433 return opcode_descs[op].name;
434 case FS_OPCODE_FB_WRITE:
435 return "fb_write";
436 case FS_OPCODE_BLORP_FB_WRITE:
437 return "blorp_fb_write";
438 case FS_OPCODE_REP_FB_WRITE:
439 return "rep_fb_write";
440
441 case SHADER_OPCODE_RCP:
442 return "rcp";
443 case SHADER_OPCODE_RSQ:
444 return "rsq";
445 case SHADER_OPCODE_SQRT:
446 return "sqrt";
447 case SHADER_OPCODE_EXP2:
448 return "exp2";
449 case SHADER_OPCODE_LOG2:
450 return "log2";
451 case SHADER_OPCODE_POW:
452 return "pow";
453 case SHADER_OPCODE_INT_QUOTIENT:
454 return "int_quot";
455 case SHADER_OPCODE_INT_REMAINDER:
456 return "int_rem";
457 case SHADER_OPCODE_SIN:
458 return "sin";
459 case SHADER_OPCODE_COS:
460 return "cos";
461
462 case SHADER_OPCODE_TEX:
463 return "tex";
464 case SHADER_OPCODE_TXD:
465 return "txd";
466 case SHADER_OPCODE_TXF:
467 return "txf";
468 case SHADER_OPCODE_TXL:
469 return "txl";
470 case SHADER_OPCODE_TXS:
471 return "txs";
472 case FS_OPCODE_TXB:
473 return "txb";
474 case SHADER_OPCODE_TXF_CMS:
475 return "txf_cms";
476 case SHADER_OPCODE_TXF_UMS:
477 return "txf_ums";
478 case SHADER_OPCODE_TXF_MCS:
479 return "txf_mcs";
480 case SHADER_OPCODE_LOD:
481 return "lod";
482 case SHADER_OPCODE_TG4:
483 return "tg4";
484 case SHADER_OPCODE_TG4_OFFSET:
485 return "tg4_offset";
486 case SHADER_OPCODE_SHADER_TIME_ADD:
487 return "shader_time_add";
488
489 case SHADER_OPCODE_UNTYPED_ATOMIC:
490 return "untyped_atomic";
491 case SHADER_OPCODE_UNTYPED_SURFACE_READ:
492 return "untyped_surface_read";
493
494 case SHADER_OPCODE_LOAD_PAYLOAD:
495 return "load_payload";
496
497 case SHADER_OPCODE_GEN4_SCRATCH_READ:
498 return "gen4_scratch_read";
499 case SHADER_OPCODE_GEN4_SCRATCH_WRITE:
500 return "gen4_scratch_write";
501 case SHADER_OPCODE_GEN7_SCRATCH_READ:
502 return "gen7_scratch_read";
503 case SHADER_OPCODE_URB_WRITE_SIMD8:
504 return "gen8_urb_write_simd8";
505
506 case VEC4_OPCODE_MOV_BYTES:
507 return "mov_bytes";
508 case VEC4_OPCODE_PACK_BYTES:
509 return "pack_bytes";
510 case VEC4_OPCODE_UNPACK_UNIFORM:
511 return "unpack_uniform";
512
513 case FS_OPCODE_DDX_COARSE:
514 return "ddx_coarse";
515 case FS_OPCODE_DDX_FINE:
516 return "ddx_fine";
517 case FS_OPCODE_DDY_COARSE:
518 return "ddy_coarse";
519 case FS_OPCODE_DDY_FINE:
520 return "ddy_fine";
521
522 case FS_OPCODE_CINTERP:
523 return "cinterp";
524 case FS_OPCODE_LINTERP:
525 return "linterp";
526
527 case FS_OPCODE_PIXEL_X:
528 return "pixel_x";
529 case FS_OPCODE_PIXEL_Y:
530 return "pixel_y";
531
532 case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
533 return "uniform_pull_const";
534 case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7:
535 return "uniform_pull_const_gen7";
536 case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD:
537 return "varying_pull_const";
538 case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7:
539 return "varying_pull_const_gen7";
540
541 case FS_OPCODE_MOV_DISPATCH_TO_FLAGS:
542 return "mov_dispatch_to_flags";
543 case FS_OPCODE_DISCARD_JUMP:
544 return "discard_jump";
545
546 case FS_OPCODE_SET_OMASK:
547 return "set_omask";
548 case FS_OPCODE_SET_SAMPLE_ID:
549 return "set_sample_id";
550 case FS_OPCODE_SET_SIMD4X2_OFFSET:
551 return "set_simd4x2_offset";
552
553 case FS_OPCODE_PACK_HALF_2x16_SPLIT:
554 return "pack_half_2x16_split";
555 case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X:
556 return "unpack_half_2x16_split_x";
557 case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y:
558 return "unpack_half_2x16_split_y";
559
560 case FS_OPCODE_PLACEHOLDER_HALT:
561 return "placeholder_halt";
562
563 case FS_OPCODE_INTERPOLATE_AT_CENTROID:
564 return "interp_centroid";
565 case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
566 return "interp_sample";
567 case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:
568 return "interp_shared_offset";
569 case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
570 return "interp_per_slot_offset";
571
572 case VS_OPCODE_URB_WRITE:
573 return "vs_urb_write";
574 case VS_OPCODE_PULL_CONSTANT_LOAD:
575 return "pull_constant_load";
576 case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
577 return "pull_constant_load_gen7";
578
579 case VS_OPCODE_SET_SIMD4X2_HEADER_GEN9:
580 return "set_simd4x2_header_gen9";
581
582 case VS_OPCODE_UNPACK_FLAGS_SIMD4X2:
583 return "unpack_flags_simd4x2";
584
585 case GS_OPCODE_URB_WRITE:
586 return "gs_urb_write";
587 case GS_OPCODE_URB_WRITE_ALLOCATE:
588 return "gs_urb_write_allocate";
589 case GS_OPCODE_THREAD_END:
590 return "gs_thread_end";
591 case GS_OPCODE_SET_WRITE_OFFSET:
592 return "set_write_offset";
593 case GS_OPCODE_SET_VERTEX_COUNT:
594 return "set_vertex_count";
595 case GS_OPCODE_SET_DWORD_2:
596 return "set_dword_2";
597 case GS_OPCODE_PREPARE_CHANNEL_MASKS:
598 return "prepare_channel_masks";
599 case GS_OPCODE_SET_CHANNEL_MASKS:
600 return "set_channel_masks";
601 case GS_OPCODE_GET_INSTANCE_ID:
602 return "get_instance_id";
603 case GS_OPCODE_FF_SYNC:
604 return "ff_sync";
605 case GS_OPCODE_SET_PRIMITIVE_ID:
606 return "set_primitive_id";
607 case GS_OPCODE_SVB_WRITE:
608 return "gs_svb_write";
609 case GS_OPCODE_SVB_SET_DST_INDEX:
610 return "gs_svb_set_dst_index";
611 case GS_OPCODE_FF_SYNC_SET_PRIMITIVES:
612 return "gs_ff_sync_set_primitives";
613 case CS_OPCODE_CS_TERMINATE:
614 return "cs_terminate";
615 }
616
617 unreachable("not reached");
618 }
619
620 bool
621 brw_saturate_immediate(enum brw_reg_type type, struct brw_reg *reg)
622 {
623 union {
624 unsigned ud;
625 int d;
626 float f;
627 } imm = { reg->dw1.ud }, sat_imm = { 0 };
628
629 switch (type) {
630 case BRW_REGISTER_TYPE_UD:
631 case BRW_REGISTER_TYPE_D:
632 case BRW_REGISTER_TYPE_UQ:
633 case BRW_REGISTER_TYPE_Q:
634 /* Nothing to do. */
635 return false;
636 case BRW_REGISTER_TYPE_UW:
637 sat_imm.ud = CLAMP(imm.ud, 0, USHRT_MAX);
638 break;
639 case BRW_REGISTER_TYPE_W:
640 sat_imm.d = CLAMP(imm.d, SHRT_MIN, SHRT_MAX);
641 break;
642 case BRW_REGISTER_TYPE_F:
643 sat_imm.f = CLAMP(imm.f, 0.0f, 1.0f);
644 break;
645 case BRW_REGISTER_TYPE_UB:
646 case BRW_REGISTER_TYPE_B:
647 unreachable("no UB/B immediates");
648 case BRW_REGISTER_TYPE_V:
649 case BRW_REGISTER_TYPE_UV:
650 case BRW_REGISTER_TYPE_VF:
651 unreachable("unimplemented: saturate vector immediate");
652 case BRW_REGISTER_TYPE_DF:
653 case BRW_REGISTER_TYPE_HF:
654 unreachable("unimplemented: saturate DF/HF immediate");
655 }
656
657 if (imm.ud != sat_imm.ud) {
658 reg->dw1.ud = sat_imm.ud;
659 return true;
660 }
661 return false;
662 }
663
664 bool
665 brw_negate_immediate(enum brw_reg_type type, struct brw_reg *reg)
666 {
667 switch (type) {
668 case BRW_REGISTER_TYPE_D:
669 case BRW_REGISTER_TYPE_UD:
670 reg->dw1.d = -reg->dw1.d;
671 return true;
672 case BRW_REGISTER_TYPE_W:
673 case BRW_REGISTER_TYPE_UW:
674 reg->dw1.d = -(int16_t)reg->dw1.ud;
675 return true;
676 case BRW_REGISTER_TYPE_F:
677 reg->dw1.f = -reg->dw1.f;
678 return true;
679 case BRW_REGISTER_TYPE_VF:
680 reg->dw1.ud ^= 0x80808080;
681 return true;
682 case BRW_REGISTER_TYPE_UB:
683 case BRW_REGISTER_TYPE_B:
684 unreachable("no UB/B immediates");
685 case BRW_REGISTER_TYPE_UV:
686 case BRW_REGISTER_TYPE_V:
687 assert(!"unimplemented: negate UV/V immediate");
688 case BRW_REGISTER_TYPE_UQ:
689 case BRW_REGISTER_TYPE_Q:
690 assert(!"unimplemented: negate UQ/Q immediate");
691 case BRW_REGISTER_TYPE_DF:
692 case BRW_REGISTER_TYPE_HF:
693 assert(!"unimplemented: negate DF/HF immediate");
694 }
695
696 return false;
697 }
698
699 bool
700 brw_abs_immediate(enum brw_reg_type type, struct brw_reg *reg)
701 {
702 switch (type) {
703 case BRW_REGISTER_TYPE_D:
704 reg->dw1.d = abs(reg->dw1.d);
705 return true;
706 case BRW_REGISTER_TYPE_W:
707 reg->dw1.d = abs((int16_t)reg->dw1.ud);
708 return true;
709 case BRW_REGISTER_TYPE_F:
710 reg->dw1.f = fabsf(reg->dw1.f);
711 return true;
712 case BRW_REGISTER_TYPE_VF:
713 reg->dw1.ud &= ~0x80808080;
714 return true;
715 case BRW_REGISTER_TYPE_UB:
716 case BRW_REGISTER_TYPE_B:
717 unreachable("no UB/B immediates");
718 case BRW_REGISTER_TYPE_UQ:
719 case BRW_REGISTER_TYPE_UD:
720 case BRW_REGISTER_TYPE_UW:
721 case BRW_REGISTER_TYPE_UV:
722 /* Presumably the absolute value modifier on an unsigned source is a
723 * nop, but it would be nice to confirm.
724 */
725 assert(!"unimplemented: abs unsigned immediate");
726 case BRW_REGISTER_TYPE_V:
727 assert(!"unimplemented: abs V immediate");
728 case BRW_REGISTER_TYPE_Q:
729 assert(!"unimplemented: abs Q immediate");
730 case BRW_REGISTER_TYPE_DF:
731 case BRW_REGISTER_TYPE_HF:
732 assert(!"unimplemented: abs DF/HF immediate");
733 }
734
735 return false;
736 }
737
738 backend_visitor::backend_visitor(struct brw_context *brw,
739 struct gl_shader_program *shader_prog,
740 struct gl_program *prog,
741 struct brw_stage_prog_data *stage_prog_data,
742 gl_shader_stage stage)
743 : brw(brw),
744 devinfo(brw->intelScreen->devinfo),
745 ctx(&brw->ctx),
746 shader(shader_prog ?
747 (struct brw_shader *)shader_prog->_LinkedShaders[stage] : NULL),
748 shader_prog(shader_prog),
749 prog(prog),
750 stage_prog_data(stage_prog_data),
751 cfg(NULL),
752 stage(stage)
753 {
754 debug_enabled = INTEL_DEBUG & intel_debug_flag_for_shader_stage(stage);
755 stage_name = _mesa_shader_stage_to_string(stage);
756 stage_abbrev = _mesa_shader_stage_to_abbrev(stage);
757 }
758
759 bool
760 backend_reg::is_zero() const
761 {
762 if (file != IMM)
763 return false;
764
765 return fixed_hw_reg.dw1.d == 0;
766 }
767
768 bool
769 backend_reg::is_one() const
770 {
771 if (file != IMM)
772 return false;
773
774 return type == BRW_REGISTER_TYPE_F
775 ? fixed_hw_reg.dw1.f == 1.0
776 : fixed_hw_reg.dw1.d == 1;
777 }
778
779 bool
780 backend_reg::is_negative_one() const
781 {
782 if (file != IMM)
783 return false;
784
785 switch (type) {
786 case BRW_REGISTER_TYPE_F:
787 return fixed_hw_reg.dw1.f == -1.0;
788 case BRW_REGISTER_TYPE_D:
789 return fixed_hw_reg.dw1.d == -1;
790 default:
791 return false;
792 }
793 }
794
795 bool
796 backend_reg::is_null() const
797 {
798 return file == HW_REG &&
799 fixed_hw_reg.file == BRW_ARCHITECTURE_REGISTER_FILE &&
800 fixed_hw_reg.nr == BRW_ARF_NULL;
801 }
802
803
804 bool
805 backend_reg::is_accumulator() const
806 {
807 return file == HW_REG &&
808 fixed_hw_reg.file == BRW_ARCHITECTURE_REGISTER_FILE &&
809 fixed_hw_reg.nr == BRW_ARF_ACCUMULATOR;
810 }
811
812 bool
813 backend_reg::in_range(const backend_reg &r, unsigned n) const
814 {
815 return (file == r.file &&
816 reg == r.reg &&
817 reg_offset >= r.reg_offset &&
818 reg_offset < r.reg_offset + n);
819 }
820
821 bool
822 backend_instruction::is_commutative() const
823 {
824 switch (opcode) {
825 case BRW_OPCODE_AND:
826 case BRW_OPCODE_OR:
827 case BRW_OPCODE_XOR:
828 case BRW_OPCODE_ADD:
829 case BRW_OPCODE_MUL:
830 return true;
831 case BRW_OPCODE_SEL:
832 /* MIN and MAX are commutative. */
833 if (conditional_mod == BRW_CONDITIONAL_GE ||
834 conditional_mod == BRW_CONDITIONAL_L) {
835 return true;
836 }
837 /* fallthrough */
838 default:
839 return false;
840 }
841 }
842
843 bool
844 backend_instruction::is_3src() const
845 {
846 return opcode < ARRAY_SIZE(opcode_descs) && opcode_descs[opcode].nsrc == 3;
847 }
848
849 bool
850 backend_instruction::is_tex() const
851 {
852 return (opcode == SHADER_OPCODE_TEX ||
853 opcode == FS_OPCODE_TXB ||
854 opcode == SHADER_OPCODE_TXD ||
855 opcode == SHADER_OPCODE_TXF ||
856 opcode == SHADER_OPCODE_TXF_CMS ||
857 opcode == SHADER_OPCODE_TXF_UMS ||
858 opcode == SHADER_OPCODE_TXF_MCS ||
859 opcode == SHADER_OPCODE_TXL ||
860 opcode == SHADER_OPCODE_TXS ||
861 opcode == SHADER_OPCODE_LOD ||
862 opcode == SHADER_OPCODE_TG4 ||
863 opcode == SHADER_OPCODE_TG4_OFFSET);
864 }
865
866 bool
867 backend_instruction::is_math() const
868 {
869 return (opcode == SHADER_OPCODE_RCP ||
870 opcode == SHADER_OPCODE_RSQ ||
871 opcode == SHADER_OPCODE_SQRT ||
872 opcode == SHADER_OPCODE_EXP2 ||
873 opcode == SHADER_OPCODE_LOG2 ||
874 opcode == SHADER_OPCODE_SIN ||
875 opcode == SHADER_OPCODE_COS ||
876 opcode == SHADER_OPCODE_INT_QUOTIENT ||
877 opcode == SHADER_OPCODE_INT_REMAINDER ||
878 opcode == SHADER_OPCODE_POW);
879 }
880
881 bool
882 backend_instruction::is_control_flow() const
883 {
884 switch (opcode) {
885 case BRW_OPCODE_DO:
886 case BRW_OPCODE_WHILE:
887 case BRW_OPCODE_IF:
888 case BRW_OPCODE_ELSE:
889 case BRW_OPCODE_ENDIF:
890 case BRW_OPCODE_BREAK:
891 case BRW_OPCODE_CONTINUE:
892 return true;
893 default:
894 return false;
895 }
896 }
897
898 bool
899 backend_instruction::can_do_source_mods() const
900 {
901 switch (opcode) {
902 case BRW_OPCODE_ADDC:
903 case BRW_OPCODE_BFE:
904 case BRW_OPCODE_BFI1:
905 case BRW_OPCODE_BFI2:
906 case BRW_OPCODE_BFREV:
907 case BRW_OPCODE_CBIT:
908 case BRW_OPCODE_FBH:
909 case BRW_OPCODE_FBL:
910 case BRW_OPCODE_SUBB:
911 return false;
912 default:
913 return true;
914 }
915 }
916
917 bool
918 backend_instruction::can_do_saturate() const
919 {
920 switch (opcode) {
921 case BRW_OPCODE_ADD:
922 case BRW_OPCODE_ASR:
923 case BRW_OPCODE_AVG:
924 case BRW_OPCODE_DP2:
925 case BRW_OPCODE_DP3:
926 case BRW_OPCODE_DP4:
927 case BRW_OPCODE_DPH:
928 case BRW_OPCODE_F16TO32:
929 case BRW_OPCODE_F32TO16:
930 case BRW_OPCODE_LINE:
931 case BRW_OPCODE_LRP:
932 case BRW_OPCODE_MAC:
933 case BRW_OPCODE_MACH:
934 case BRW_OPCODE_MAD:
935 case BRW_OPCODE_MATH:
936 case BRW_OPCODE_MOV:
937 case BRW_OPCODE_MUL:
938 case BRW_OPCODE_PLN:
939 case BRW_OPCODE_RNDD:
940 case BRW_OPCODE_RNDE:
941 case BRW_OPCODE_RNDU:
942 case BRW_OPCODE_RNDZ:
943 case BRW_OPCODE_SEL:
944 case BRW_OPCODE_SHL:
945 case BRW_OPCODE_SHR:
946 case FS_OPCODE_LINTERP:
947 case SHADER_OPCODE_COS:
948 case SHADER_OPCODE_EXP2:
949 case SHADER_OPCODE_LOG2:
950 case SHADER_OPCODE_POW:
951 case SHADER_OPCODE_RCP:
952 case SHADER_OPCODE_RSQ:
953 case SHADER_OPCODE_SIN:
954 case SHADER_OPCODE_SQRT:
955 return true;
956 default:
957 return false;
958 }
959 }
960
961 bool
962 backend_instruction::can_do_cmod() const
963 {
964 switch (opcode) {
965 case BRW_OPCODE_ADD:
966 case BRW_OPCODE_ADDC:
967 case BRW_OPCODE_AND:
968 case BRW_OPCODE_ASR:
969 case BRW_OPCODE_AVG:
970 case BRW_OPCODE_CMP:
971 case BRW_OPCODE_CMPN:
972 case BRW_OPCODE_DP2:
973 case BRW_OPCODE_DP3:
974 case BRW_OPCODE_DP4:
975 case BRW_OPCODE_DPH:
976 case BRW_OPCODE_F16TO32:
977 case BRW_OPCODE_F32TO16:
978 case BRW_OPCODE_FRC:
979 case BRW_OPCODE_LINE:
980 case BRW_OPCODE_LRP:
981 case BRW_OPCODE_LZD:
982 case BRW_OPCODE_MAC:
983 case BRW_OPCODE_MACH:
984 case BRW_OPCODE_MAD:
985 case BRW_OPCODE_MOV:
986 case BRW_OPCODE_MUL:
987 case BRW_OPCODE_NOT:
988 case BRW_OPCODE_OR:
989 case BRW_OPCODE_PLN:
990 case BRW_OPCODE_RNDD:
991 case BRW_OPCODE_RNDE:
992 case BRW_OPCODE_RNDU:
993 case BRW_OPCODE_RNDZ:
994 case BRW_OPCODE_SAD2:
995 case BRW_OPCODE_SADA2:
996 case BRW_OPCODE_SHL:
997 case BRW_OPCODE_SHR:
998 case BRW_OPCODE_SUBB:
999 case BRW_OPCODE_XOR:
1000 case FS_OPCODE_CINTERP:
1001 case FS_OPCODE_LINTERP:
1002 return true;
1003 default:
1004 return false;
1005 }
1006 }
1007
1008 bool
1009 backend_instruction::reads_accumulator_implicitly() const
1010 {
1011 switch (opcode) {
1012 case BRW_OPCODE_MAC:
1013 case BRW_OPCODE_MACH:
1014 case BRW_OPCODE_SADA2:
1015 return true;
1016 default:
1017 return false;
1018 }
1019 }
1020
1021 bool
1022 backend_instruction::writes_accumulator_implicitly(const struct brw_device_info *devinfo) const
1023 {
1024 return writes_accumulator ||
1025 (devinfo->gen < 6 &&
1026 ((opcode >= BRW_OPCODE_ADD && opcode < BRW_OPCODE_NOP) ||
1027 (opcode >= FS_OPCODE_DDX_COARSE && opcode <= FS_OPCODE_LINTERP &&
1028 opcode != FS_OPCODE_CINTERP)));
1029 }
1030
1031 bool
1032 backend_instruction::has_side_effects() const
1033 {
1034 switch (opcode) {
1035 case SHADER_OPCODE_UNTYPED_ATOMIC:
1036 case SHADER_OPCODE_GEN4_SCRATCH_WRITE:
1037 case SHADER_OPCODE_URB_WRITE_SIMD8:
1038 case FS_OPCODE_FB_WRITE:
1039 return true;
1040 default:
1041 return false;
1042 }
1043 }
1044
1045 #ifndef NDEBUG
1046 static bool
1047 inst_is_in_block(const bblock_t *block, const backend_instruction *inst)
1048 {
1049 bool found = false;
1050 foreach_inst_in_block (backend_instruction, i, block) {
1051 if (inst == i) {
1052 found = true;
1053 }
1054 }
1055 return found;
1056 }
1057 #endif
1058
1059 static void
1060 adjust_later_block_ips(bblock_t *start_block, int ip_adjustment)
1061 {
1062 for (bblock_t *block_iter = start_block->next();
1063 !block_iter->link.is_tail_sentinel();
1064 block_iter = block_iter->next()) {
1065 block_iter->start_ip += ip_adjustment;
1066 block_iter->end_ip += ip_adjustment;
1067 }
1068 }
1069
1070 void
1071 backend_instruction::insert_after(bblock_t *block, backend_instruction *inst)
1072 {
1073 if (!this->is_head_sentinel())
1074 assert(inst_is_in_block(block, this) || !"Instruction not in block");
1075
1076 block->end_ip++;
1077
1078 adjust_later_block_ips(block, 1);
1079
1080 exec_node::insert_after(inst);
1081 }
1082
1083 void
1084 backend_instruction::insert_before(bblock_t *block, backend_instruction *inst)
1085 {
1086 if (!this->is_tail_sentinel())
1087 assert(inst_is_in_block(block, this) || !"Instruction not in block");
1088
1089 block->end_ip++;
1090
1091 adjust_later_block_ips(block, 1);
1092
1093 exec_node::insert_before(inst);
1094 }
1095
1096 void
1097 backend_instruction::insert_before(bblock_t *block, exec_list *list)
1098 {
1099 assert(inst_is_in_block(block, this) || !"Instruction not in block");
1100
1101 unsigned num_inst = list->length();
1102
1103 block->end_ip += num_inst;
1104
1105 adjust_later_block_ips(block, num_inst);
1106
1107 exec_node::insert_before(list);
1108 }
1109
1110 void
1111 backend_instruction::remove(bblock_t *block)
1112 {
1113 assert(inst_is_in_block(block, this) || !"Instruction not in block");
1114
1115 adjust_later_block_ips(block, -1);
1116
1117 if (block->start_ip == block->end_ip) {
1118 block->cfg->remove_block(block);
1119 } else {
1120 block->end_ip--;
1121 }
1122
1123 exec_node::remove();
1124 }
1125
1126 void
1127 backend_visitor::dump_instructions()
1128 {
1129 dump_instructions(NULL);
1130 }
1131
1132 void
1133 backend_visitor::dump_instructions(const char *name)
1134 {
1135 FILE *file = stderr;
1136 if (name && geteuid() != 0) {
1137 file = fopen(name, "w");
1138 if (!file)
1139 file = stderr;
1140 }
1141
1142 if (cfg) {
1143 int ip = 0;
1144 foreach_block_and_inst(block, backend_instruction, inst, cfg) {
1145 fprintf(file, "%4d: ", ip++);
1146 dump_instruction(inst, file);
1147 }
1148 } else {
1149 int ip = 0;
1150 foreach_in_list(backend_instruction, inst, &instructions) {
1151 fprintf(file, "%4d: ", ip++);
1152 dump_instruction(inst, file);
1153 }
1154 }
1155
1156 if (file != stderr) {
1157 fclose(file);
1158 }
1159 }
1160
1161 void
1162 backend_visitor::calculate_cfg()
1163 {
1164 if (this->cfg)
1165 return;
1166 cfg = new(mem_ctx) cfg_t(&this->instructions);
1167 }
1168
1169 void
1170 backend_visitor::invalidate_cfg()
1171 {
1172 ralloc_free(this->cfg);
1173 this->cfg = NULL;
1174 }
1175
1176 /**
1177 * Sets up the starting offsets for the groups of binding table entries
1178 * commong to all pipeline stages.
1179 *
1180 * Unused groups are initialized to 0xd0d0d0d0 to make it obvious that they're
1181 * unused but also make sure that addition of small offsets to them will
1182 * trigger some of our asserts that surface indices are < BRW_MAX_SURFACES.
1183 */
1184 void
1185 backend_visitor::assign_common_binding_table_offsets(uint32_t next_binding_table_offset)
1186 {
1187 int num_textures = _mesa_fls(prog->SamplersUsed);
1188
1189 stage_prog_data->binding_table.texture_start = next_binding_table_offset;
1190 next_binding_table_offset += num_textures;
1191
1192 if (shader) {
1193 stage_prog_data->binding_table.ubo_start = next_binding_table_offset;
1194 next_binding_table_offset += shader->base.NumUniformBlocks;
1195 } else {
1196 stage_prog_data->binding_table.ubo_start = 0xd0d0d0d0;
1197 }
1198
1199 if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
1200 stage_prog_data->binding_table.shader_time_start = next_binding_table_offset;
1201 next_binding_table_offset++;
1202 } else {
1203 stage_prog_data->binding_table.shader_time_start = 0xd0d0d0d0;
1204 }
1205
1206 if (prog->UsesGather) {
1207 if (devinfo->gen >= 8) {
1208 stage_prog_data->binding_table.gather_texture_start =
1209 stage_prog_data->binding_table.texture_start;
1210 } else {
1211 stage_prog_data->binding_table.gather_texture_start = next_binding_table_offset;
1212 next_binding_table_offset += num_textures;
1213 }
1214 } else {
1215 stage_prog_data->binding_table.gather_texture_start = 0xd0d0d0d0;
1216 }
1217
1218 if (shader_prog && shader_prog->NumAtomicBuffers) {
1219 stage_prog_data->binding_table.abo_start = next_binding_table_offset;
1220 next_binding_table_offset += shader_prog->NumAtomicBuffers;
1221 } else {
1222 stage_prog_data->binding_table.abo_start = 0xd0d0d0d0;
1223 }
1224
1225 if (shader && shader->base.NumImages) {
1226 stage_prog_data->binding_table.image_start = next_binding_table_offset;
1227 next_binding_table_offset += shader->base.NumImages;
1228 } else {
1229 stage_prog_data->binding_table.image_start = 0xd0d0d0d0;
1230 }
1231
1232 /* This may or may not be used depending on how the compile goes. */
1233 stage_prog_data->binding_table.pull_constants_start = next_binding_table_offset;
1234 next_binding_table_offset++;
1235
1236 assert(next_binding_table_offset <= BRW_MAX_SURFACES);
1237
1238 /* prog_data->base.binding_table.size will be set by brw_mark_surface_used. */
1239 }