i965/fs: Add a local common subexpression elimination pass.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_fs.h
1 /*
2 * Copyright © 2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Eric Anholt <eric@anholt.net>
25 *
26 */
27
28 #pragma once
29
30 #include "brw_shader.h"
31
32 extern "C" {
33
34 #include <sys/types.h>
35
36 #include "main/macros.h"
37 #include "main/shaderobj.h"
38 #include "main/uniforms.h"
39 #include "program/prog_parameter.h"
40 #include "program/prog_print.h"
41 #include "program/prog_optimize.h"
42 #include "program/register_allocate.h"
43 #include "program/sampler.h"
44 #include "program/hash_table.h"
45 #include "brw_context.h"
46 #include "brw_eu.h"
47 #include "brw_wm.h"
48 }
49 #include "glsl/glsl_types.h"
50 #include "glsl/ir.h"
51
52 class fs_bblock;
53
54 enum register_file {
55 BAD_FILE,
56 ARF,
57 GRF,
58 MRF,
59 IMM,
60 FIXED_HW_REG, /* a struct brw_reg */
61 UNIFORM, /* prog_data->params[reg] */
62 };
63
64 class fs_reg {
65 public:
66 /* Callers of this ralloc-based new need not call delete. It's
67 * easier to just ralloc_free 'ctx' (or any of its ancestors). */
68 static void* operator new(size_t size, void *ctx)
69 {
70 void *node;
71
72 node = ralloc_size(ctx, size);
73 assert(node != NULL);
74
75 return node;
76 }
77
78 void init()
79 {
80 memset(this, 0, sizeof(*this));
81 this->smear = -1;
82 }
83
84 /** Generic unset register constructor. */
85 fs_reg()
86 {
87 init();
88 this->file = BAD_FILE;
89 }
90
91 /** Immediate value constructor. */
92 fs_reg(float f)
93 {
94 init();
95 this->file = IMM;
96 this->type = BRW_REGISTER_TYPE_F;
97 this->imm.f = f;
98 }
99
100 /** Immediate value constructor. */
101 fs_reg(int32_t i)
102 {
103 init();
104 this->file = IMM;
105 this->type = BRW_REGISTER_TYPE_D;
106 this->imm.i = i;
107 }
108
109 /** Immediate value constructor. */
110 fs_reg(uint32_t u)
111 {
112 init();
113 this->file = IMM;
114 this->type = BRW_REGISTER_TYPE_UD;
115 this->imm.u = u;
116 }
117
118 /** Fixed brw_reg Immediate value constructor. */
119 fs_reg(struct brw_reg fixed_hw_reg)
120 {
121 init();
122 this->file = FIXED_HW_REG;
123 this->fixed_hw_reg = fixed_hw_reg;
124 this->type = fixed_hw_reg.type;
125 }
126
127 fs_reg(enum register_file file, int reg);
128 fs_reg(enum register_file file, int reg, uint32_t type);
129 fs_reg(class fs_visitor *v, const struct glsl_type *type);
130
131 bool equals(const fs_reg &r) const
132 {
133 return (file == r.file &&
134 reg == r.reg &&
135 reg_offset == r.reg_offset &&
136 type == r.type &&
137 negate == r.negate &&
138 abs == r.abs &&
139 memcmp(&fixed_hw_reg, &r.fixed_hw_reg,
140 sizeof(fixed_hw_reg)) == 0 &&
141 smear == r.smear &&
142 imm.u == r.imm.u);
143 }
144
145 /** Register file: ARF, GRF, MRF, IMM. */
146 enum register_file file;
147 /**
148 * Register number. For ARF/MRF, it's the hardware register. For
149 * GRF, it's a virtual register number until register allocation
150 */
151 int reg;
152 /**
153 * For virtual registers, this is a hardware register offset from
154 * the start of the register block (for example, a constant index
155 * in an array access).
156 */
157 int reg_offset;
158 /** Register type. BRW_REGISTER_TYPE_* */
159 int type;
160 bool negate;
161 bool abs;
162 bool sechalf;
163 struct brw_reg fixed_hw_reg;
164 int smear; /* -1, or a channel of the reg to smear to all channels. */
165
166 /** Value for file == IMM */
167 union {
168 int32_t i;
169 uint32_t u;
170 float f;
171 } imm;
172 };
173
174 static const fs_reg reg_undef;
175 static const fs_reg reg_null_f(ARF, BRW_ARF_NULL, BRW_REGISTER_TYPE_F);
176 static const fs_reg reg_null_d(ARF, BRW_ARF_NULL, BRW_REGISTER_TYPE_D);
177
178 class ip_record : public exec_node {
179 public:
180 static void* operator new(size_t size, void *ctx)
181 {
182 void *node;
183
184 node = rzalloc_size(ctx, size);
185 assert(node != NULL);
186
187 return node;
188 }
189
190 ip_record(int ip)
191 {
192 this->ip = ip;
193 }
194
195 int ip;
196 };
197
198 class fs_inst : public exec_node {
199 public:
200 /* Callers of this ralloc-based new need not call delete. It's
201 * easier to just ralloc_free 'ctx' (or any of its ancestors). */
202 static void* operator new(size_t size, void *ctx)
203 {
204 void *node;
205
206 node = rzalloc_size(ctx, size);
207 assert(node != NULL);
208
209 return node;
210 }
211
212 void init()
213 {
214 memset(this, 0, sizeof(*this));
215 this->opcode = BRW_OPCODE_NOP;
216 this->conditional_mod = BRW_CONDITIONAL_NONE;
217
218 this->dst = reg_undef;
219 this->src[0] = reg_undef;
220 this->src[1] = reg_undef;
221 this->src[2] = reg_undef;
222 }
223
224 fs_inst()
225 {
226 init();
227 }
228
229 fs_inst(enum opcode opcode)
230 {
231 init();
232 this->opcode = opcode;
233 }
234
235 fs_inst(enum opcode opcode, fs_reg dst)
236 {
237 init();
238 this->opcode = opcode;
239 this->dst = dst;
240
241 if (dst.file == GRF)
242 assert(dst.reg_offset >= 0);
243 }
244
245 fs_inst(enum opcode opcode, fs_reg dst, fs_reg src0)
246 {
247 init();
248 this->opcode = opcode;
249 this->dst = dst;
250 this->src[0] = src0;
251
252 if (dst.file == GRF)
253 assert(dst.reg_offset >= 0);
254 if (src[0].file == GRF)
255 assert(src[0].reg_offset >= 0);
256 }
257
258 fs_inst(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1)
259 {
260 init();
261 this->opcode = opcode;
262 this->dst = dst;
263 this->src[0] = src0;
264 this->src[1] = src1;
265
266 if (dst.file == GRF)
267 assert(dst.reg_offset >= 0);
268 if (src[0].file == GRF)
269 assert(src[0].reg_offset >= 0);
270 if (src[1].file == GRF)
271 assert(src[1].reg_offset >= 0);
272 }
273
274 fs_inst(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2)
275 {
276 init();
277 this->opcode = opcode;
278 this->dst = dst;
279 this->src[0] = src0;
280 this->src[1] = src1;
281 this->src[2] = src2;
282
283 if (dst.file == GRF)
284 assert(dst.reg_offset >= 0);
285 if (src[0].file == GRF)
286 assert(src[0].reg_offset >= 0);
287 if (src[1].file == GRF)
288 assert(src[1].reg_offset >= 0);
289 if (src[2].file == GRF)
290 assert(src[2].reg_offset >= 0);
291 }
292
293 bool equals(fs_inst *inst)
294 {
295 return (opcode == inst->opcode &&
296 dst.equals(inst->dst) &&
297 src[0].equals(inst->src[0]) &&
298 src[1].equals(inst->src[1]) &&
299 src[2].equals(inst->src[2]) &&
300 saturate == inst->saturate &&
301 predicated == inst->predicated &&
302 conditional_mod == inst->conditional_mod &&
303 mlen == inst->mlen &&
304 base_mrf == inst->base_mrf &&
305 sampler == inst->sampler &&
306 target == inst->target &&
307 eot == inst->eot &&
308 header_present == inst->header_present &&
309 shadow_compare == inst->shadow_compare &&
310 offset == inst->offset);
311 }
312
313 int regs_written()
314 {
315 if (is_tex())
316 return 4;
317
318 /* The SINCOS and INT_DIV_QUOTIENT_AND_REMAINDER math functions return 2,
319 * but we don't currently use them...nor do we have an opcode for them.
320 */
321
322 return 1;
323 }
324
325 bool is_tex()
326 {
327 return (opcode == SHADER_OPCODE_TEX ||
328 opcode == FS_OPCODE_TXB ||
329 opcode == SHADER_OPCODE_TXD ||
330 opcode == SHADER_OPCODE_TXF ||
331 opcode == SHADER_OPCODE_TXL ||
332 opcode == SHADER_OPCODE_TXS);
333 }
334
335 bool is_math()
336 {
337 return (opcode == SHADER_OPCODE_RCP ||
338 opcode == SHADER_OPCODE_RSQ ||
339 opcode == SHADER_OPCODE_SQRT ||
340 opcode == SHADER_OPCODE_EXP2 ||
341 opcode == SHADER_OPCODE_LOG2 ||
342 opcode == SHADER_OPCODE_SIN ||
343 opcode == SHADER_OPCODE_COS ||
344 opcode == SHADER_OPCODE_INT_QUOTIENT ||
345 opcode == SHADER_OPCODE_INT_REMAINDER ||
346 opcode == SHADER_OPCODE_POW);
347 }
348
349 enum opcode opcode; /* BRW_OPCODE_* or FS_OPCODE_* */
350 fs_reg dst;
351 fs_reg src[3];
352 bool saturate;
353 bool predicated;
354 bool predicate_inverse;
355 int conditional_mod; /**< BRW_CONDITIONAL_* */
356
357 int mlen; /**< SEND message length */
358 int base_mrf; /**< First MRF in the SEND message, if mlen is nonzero. */
359 int sampler;
360 int target; /**< MRT target. */
361 bool eot;
362 bool header_present;
363 bool shadow_compare;
364 bool force_uncompressed;
365 bool force_sechalf;
366 uint32_t offset; /* spill/unspill offset */
367
368 /** @{
369 * Annotation for the generated IR. One of the two can be set.
370 */
371 ir_instruction *ir;
372 const char *annotation;
373 /** @} */
374 };
375
376 class fs_visitor : public ir_visitor
377 {
378 public:
379
380 fs_visitor(struct brw_wm_compile *c, struct gl_shader_program *prog,
381 struct brw_shader *shader)
382 {
383 this->c = c;
384 this->p = &c->func;
385 this->brw = p->brw;
386 this->fp = (struct gl_fragment_program *)
387 prog->_LinkedShaders[MESA_SHADER_FRAGMENT]->Program;
388 this->prog = prog;
389 this->intel = &brw->intel;
390 this->ctx = &intel->ctx;
391 this->mem_ctx = ralloc_context(NULL);
392 this->shader = shader;
393 this->failed = false;
394 this->variable_ht = hash_table_ctor(0,
395 hash_table_pointer_hash,
396 hash_table_pointer_compare);
397
398 /* There's a question that appears to be left open in the spec:
399 * How do implicit dst conversions interact with the CMP
400 * instruction or conditional mods? On gen6, the instruction:
401 *
402 * CMP null<d> src0<f> src1<f>
403 *
404 * will do src1 - src0 and compare that result as if it was an
405 * integer. On gen4, it will do src1 - src0 as float, convert
406 * the result to int, and compare as int. In between, it
407 * appears that it does src1 - src0 and does the compare in the
408 * execution type so dst type doesn't matter.
409 */
410 if (this->intel->gen > 4)
411 this->reg_null_cmp = reg_null_d;
412 else
413 this->reg_null_cmp = reg_null_f;
414
415 this->frag_depth = NULL;
416 memset(this->outputs, 0, sizeof(this->outputs));
417 this->first_non_payload_grf = 0;
418 this->max_grf = intel->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF;
419
420 this->current_annotation = NULL;
421 this->base_ir = NULL;
422
423 this->virtual_grf_sizes = NULL;
424 this->virtual_grf_next = 0;
425 this->virtual_grf_array_size = 0;
426 this->virtual_grf_def = NULL;
427 this->virtual_grf_use = NULL;
428 this->live_intervals_valid = false;
429
430 this->kill_emitted = false;
431 this->force_uncompressed_stack = 0;
432 this->force_sechalf_stack = 0;
433 }
434
435 ~fs_visitor()
436 {
437 ralloc_free(this->mem_ctx);
438 hash_table_dtor(this->variable_ht);
439 }
440
441 fs_reg *variable_storage(ir_variable *var);
442 int virtual_grf_alloc(int size);
443 void import_uniforms(fs_visitor *v);
444
445 void visit(ir_variable *ir);
446 void visit(ir_assignment *ir);
447 void visit(ir_dereference_variable *ir);
448 void visit(ir_dereference_record *ir);
449 void visit(ir_dereference_array *ir);
450 void visit(ir_expression *ir);
451 void visit(ir_texture *ir);
452 void visit(ir_if *ir);
453 void visit(ir_constant *ir);
454 void visit(ir_swizzle *ir);
455 void visit(ir_return *ir);
456 void visit(ir_loop *ir);
457 void visit(ir_loop_jump *ir);
458 void visit(ir_discard *ir);
459 void visit(ir_call *ir);
460 void visit(ir_function *ir);
461 void visit(ir_function_signature *ir);
462
463 void swizzle_result(ir_texture *ir, fs_reg orig_val, int sampler);
464
465 fs_inst *emit(fs_inst inst);
466
467 fs_inst *emit(enum opcode opcode)
468 {
469 return emit(fs_inst(opcode));
470 }
471
472 fs_inst *emit(enum opcode opcode, fs_reg dst)
473 {
474 return emit(fs_inst(opcode, dst));
475 }
476
477 fs_inst *emit(enum opcode opcode, fs_reg dst, fs_reg src0)
478 {
479 return emit(fs_inst(opcode, dst, src0));
480 }
481
482 fs_inst *emit(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1)
483 {
484 return emit(fs_inst(opcode, dst, src0, src1));
485 }
486
487 fs_inst *emit(enum opcode opcode, fs_reg dst,
488 fs_reg src0, fs_reg src1, fs_reg src2)
489 {
490 return emit(fs_inst(opcode, dst, src0, src1, src2));
491 }
492
493 int type_size(const struct glsl_type *type);
494 fs_inst *get_instruction_generating_reg(fs_inst *start,
495 fs_inst *end,
496 fs_reg reg);
497
498 bool run();
499 void setup_paramvalues_refs();
500 void assign_curb_setup();
501 void calculate_urb_setup();
502 void assign_urb_setup();
503 bool assign_regs();
504 void assign_regs_trivial();
505 int choose_spill_reg(struct ra_graph *g);
506 void spill_reg(int spill_reg);
507 void split_virtual_grfs();
508 void setup_pull_constants();
509 void calculate_live_intervals();
510 bool propagate_constants();
511 bool opt_algebraic();
512 bool opt_cse();
513 bool opt_cse_local(fs_bblock *block, exec_list *aeb);
514 bool register_coalesce();
515 bool compute_to_mrf();
516 bool dead_code_eliminate();
517 bool remove_dead_constants();
518 bool remove_duplicate_mrf_writes();
519 bool virtual_grf_interferes(int a, int b);
520 void schedule_instructions();
521 void patch_discard_jumps_to_fb_writes();
522 void fail(const char *msg, ...);
523
524 void push_force_uncompressed();
525 void pop_force_uncompressed();
526 void push_force_sechalf();
527 void pop_force_sechalf();
528
529 void generate_code();
530 void generate_fb_write(fs_inst *inst);
531 void generate_pixel_xy(struct brw_reg dst, bool is_x);
532 void generate_linterp(fs_inst *inst, struct brw_reg dst,
533 struct brw_reg *src);
534 void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
535 void generate_math1_gen7(fs_inst *inst,
536 struct brw_reg dst,
537 struct brw_reg src);
538 void generate_math2_gen7(fs_inst *inst,
539 struct brw_reg dst,
540 struct brw_reg src0,
541 struct brw_reg src1);
542 void generate_math1_gen6(fs_inst *inst,
543 struct brw_reg dst,
544 struct brw_reg src);
545 void generate_math2_gen6(fs_inst *inst,
546 struct brw_reg dst,
547 struct brw_reg src0,
548 struct brw_reg src1);
549 void generate_math_gen4(fs_inst *inst,
550 struct brw_reg dst,
551 struct brw_reg src);
552 void generate_discard(fs_inst *inst);
553 void generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
554 void generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
555 void generate_spill(fs_inst *inst, struct brw_reg src);
556 void generate_unspill(fs_inst *inst, struct brw_reg dst);
557 void generate_pull_constant_load(fs_inst *inst, struct brw_reg dst);
558
559 void emit_dummy_fs();
560 fs_reg *emit_fragcoord_interpolation(ir_variable *ir);
561 fs_reg *emit_frontfacing_interpolation(ir_variable *ir);
562 fs_reg *emit_general_interpolation(ir_variable *ir);
563 void emit_interpolation_setup_gen4();
564 void emit_interpolation_setup_gen6();
565 fs_inst *emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate,
566 int sampler);
567 fs_inst *emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate,
568 int sampler);
569 fs_inst *emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
570 int sampler);
571 fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0);
572 fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0, fs_reg src1);
573 bool try_emit_saturate(ir_expression *ir);
574 bool try_emit_mad(ir_expression *ir, int mul_arg);
575 void emit_bool_to_cond_code(ir_rvalue *condition);
576 void emit_if_gen6(ir_if *ir);
577 void emit_unspill(fs_inst *inst, fs_reg reg, uint32_t spill_offset);
578
579 void emit_color_write(int target, int index, int first_color_mrf);
580 void emit_fb_writes();
581 bool try_rewrite_rhs_to_dst(ir_assignment *ir,
582 fs_reg dst,
583 fs_reg src,
584 fs_inst *pre_rhs_inst,
585 fs_inst *last_rhs_inst);
586 void emit_assignment_writes(fs_reg &l, fs_reg &r,
587 const glsl_type *type, bool predicated);
588 void resolve_ud_negate(fs_reg *reg);
589 void resolve_bool_comparison(ir_rvalue *rvalue, fs_reg *reg);
590
591 struct brw_reg interp_reg(int location, int channel);
592 int setup_uniform_values(int loc, const glsl_type *type);
593 void setup_builtin_uniform_values(ir_variable *ir);
594 int implied_mrf_writes(fs_inst *inst);
595
596 struct brw_context *brw;
597 const struct gl_fragment_program *fp;
598 struct intel_context *intel;
599 struct gl_context *ctx;
600 struct brw_wm_compile *c;
601 struct brw_compile *p;
602 struct brw_shader *shader;
603 struct gl_shader_program *prog;
604 void *mem_ctx;
605 exec_list instructions;
606 exec_list discard_halt_patches;
607
608 /* Delayed setup of c->prog_data.params[] due to realloc of
609 * ParamValues[] during compile.
610 */
611 int param_index[MAX_UNIFORMS * 4];
612 int param_offset[MAX_UNIFORMS * 4];
613
614 int *virtual_grf_sizes;
615 int virtual_grf_next;
616 int virtual_grf_array_size;
617 int *virtual_grf_def;
618 int *virtual_grf_use;
619 bool live_intervals_valid;
620
621 /* This is the map from UNIFORM hw_reg + reg_offset as generated by
622 * the visitor to the packed uniform number after
623 * remove_dead_constants() that represents the actual uploaded
624 * uniform index.
625 */
626 int *params_remap;
627
628 struct hash_table *variable_ht;
629 ir_variable *frag_depth;
630 fs_reg outputs[BRW_MAX_DRAW_BUFFERS];
631 int first_non_payload_grf;
632 int max_grf;
633 int urb_setup[FRAG_ATTRIB_MAX];
634 bool kill_emitted;
635
636 /** @{ debug annotation info */
637 const char *current_annotation;
638 ir_instruction *base_ir;
639 /** @} */
640
641 bool failed;
642 char *fail_msg;
643
644 /* Result of last visit() method. */
645 fs_reg result;
646
647 fs_reg pixel_x;
648 fs_reg pixel_y;
649 fs_reg wpos_w;
650 fs_reg pixel_w;
651 fs_reg delta_x[BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT];
652 fs_reg delta_y[BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT];
653 fs_reg reg_null_cmp;
654
655 int grf_used;
656
657 int force_uncompressed_stack;
658 int force_sechalf_stack;
659
660 class fs_bblock *bblock;
661 };
662
663 bool brw_do_channel_expressions(struct exec_list *instructions);
664 bool brw_do_vector_splitting(struct exec_list *instructions);
665 bool brw_fs_precompile(struct gl_context *ctx, struct gl_shader_program *prog);