i965/vec4: adding vec4_cmod_propagation optimization
[mesa.git] / src / mesa / drivers / dri / i965 / brw_vec4.h
1 /*
2 * Copyright © 2011 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #ifndef BRW_VEC4_H
25 #define BRW_VEC4_H
26
27 #include <stdint.h>
28 #include "brw_shader.h"
29 #include "main/compiler.h"
30 #include "program/hash_table.h"
31 #include "brw_program.h"
32
33 #ifdef __cplusplus
34 #include "brw_ir_vec4.h"
35
36 extern "C" {
37 #endif
38
39 #include "brw_context.h"
40 #include "brw_eu.h"
41 #include "intel_asm_annotation.h"
42
43 #ifdef __cplusplus
44 }; /* extern "C" */
45 #endif
46
47 #include "glsl/ir.h"
48 #include "glsl/nir/nir.h"
49
50
51 #ifdef __cplusplus
52 extern "C" {
53 #endif
54
55 #ifdef __cplusplus
56 } /* extern "C" */
57
58 namespace brw {
59
60 class vec4_live_variables;
61
62 /**
63 * The vertex shader front-end.
64 *
65 * Translates either GLSL IR or Mesa IR (for ARB_vertex_program and
66 * fixed-function) into VS IR.
67 */
68 class vec4_visitor : public backend_shader
69 {
70 public:
71 vec4_visitor(const struct brw_compiler *compiler,
72 void *log_data,
73 const struct brw_sampler_prog_key_data *key,
74 struct brw_vue_prog_data *prog_data,
75 const nir_shader *shader,
76 void *mem_ctx,
77 bool no_spills,
78 int shader_time_index);
79 virtual ~vec4_visitor();
80
81 dst_reg dst_null_f()
82 {
83 return dst_reg(brw_null_reg());
84 }
85
86 dst_reg dst_null_d()
87 {
88 return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
89 }
90
91 dst_reg dst_null_ud()
92 {
93 return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
94 }
95
96 const struct brw_sampler_prog_key_data * const key_tex;
97 struct brw_vue_prog_data * const prog_data;
98 char *fail_msg;
99 bool failed;
100
101 /**
102 * GLSL IR currently being processed, which is associated with our
103 * driver IR instructions for debugging purposes.
104 */
105 const void *base_ir;
106 const char *current_annotation;
107
108 int first_non_payload_grf;
109 unsigned int max_grf;
110 int *virtual_grf_start;
111 int *virtual_grf_end;
112 brw::vec4_live_variables *live_intervals;
113 dst_reg userplane[MAX_CLIP_PLANES];
114
115 bool need_all_constants_in_pull_buffer;
116
117 /* Regs for vertex results. Generated at ir_variable visiting time
118 * for the ir->location's used.
119 */
120 dst_reg output_reg[BRW_VARYING_SLOT_COUNT];
121 const char *output_reg_annotation[BRW_VARYING_SLOT_COUNT];
122 int *uniform_size;
123 int uniform_array_size; /*< Size of the uniform_size array */
124 int uniforms;
125
126 src_reg shader_start_time;
127
128 bool run();
129 void fail(const char *msg, ...);
130
131 int setup_uniforms(int payload_reg);
132
133 bool reg_allocate_trivial();
134 bool reg_allocate();
135 void evaluate_spill_costs(float *spill_costs, bool *no_spill);
136 int choose_spill_reg(struct ra_graph *g);
137 void spill_reg(int spill_reg);
138 void move_grf_array_access_to_scratch();
139 void move_uniform_array_access_to_pull_constants();
140 void move_push_constants_to_pull_constants();
141 void split_uniform_registers();
142 void pack_uniform_registers();
143 void calculate_live_intervals();
144 void invalidate_live_intervals();
145 void split_virtual_grfs();
146 bool opt_vector_float();
147 bool opt_reduce_swizzle();
148 bool dead_code_eliminate();
149 int var_range_start(unsigned v, unsigned n) const;
150 int var_range_end(unsigned v, unsigned n) const;
151 bool virtual_grf_interferes(int a, int b);
152 bool opt_cmod_propagation();
153 bool opt_copy_propagation(bool do_constant_prop = true);
154 bool opt_cse_local(bblock_t *block);
155 bool opt_cse();
156 bool opt_algebraic();
157 bool opt_register_coalesce();
158 bool eliminate_find_live_channel();
159 bool is_dep_ctrl_unsafe(const vec4_instruction *inst);
160 void opt_set_dependency_control();
161 void opt_schedule_instructions();
162
163 vec4_instruction *emit(vec4_instruction *inst);
164
165 vec4_instruction *emit(enum opcode opcode);
166 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst);
167 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
168 const src_reg &src0);
169 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
170 const src_reg &src0, const src_reg &src1);
171 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
172 const src_reg &src0, const src_reg &src1,
173 const src_reg &src2);
174
175 vec4_instruction *emit_before(bblock_t *block,
176 vec4_instruction *inst,
177 vec4_instruction *new_inst);
178
179 #define EMIT1(op) vec4_instruction *op(const dst_reg &, const src_reg &);
180 #define EMIT2(op) vec4_instruction *op(const dst_reg &, const src_reg &, const src_reg &);
181 #define EMIT3(op) vec4_instruction *op(const dst_reg &, const src_reg &, const src_reg &, const src_reg &);
182 EMIT1(MOV)
183 EMIT1(NOT)
184 EMIT1(RNDD)
185 EMIT1(RNDE)
186 EMIT1(RNDZ)
187 EMIT1(FRC)
188 EMIT1(F32TO16)
189 EMIT1(F16TO32)
190 EMIT2(ADD)
191 EMIT2(MUL)
192 EMIT2(MACH)
193 EMIT2(MAC)
194 EMIT2(AND)
195 EMIT2(OR)
196 EMIT2(XOR)
197 EMIT2(DP3)
198 EMIT2(DP4)
199 EMIT2(DPH)
200 EMIT2(SHL)
201 EMIT2(SHR)
202 EMIT2(ASR)
203 vec4_instruction *CMP(dst_reg dst, src_reg src0, src_reg src1,
204 enum brw_conditional_mod condition);
205 vec4_instruction *IF(src_reg src0, src_reg src1,
206 enum brw_conditional_mod condition);
207 vec4_instruction *IF(enum brw_predicate predicate);
208 EMIT1(SCRATCH_READ)
209 EMIT2(SCRATCH_WRITE)
210 EMIT3(LRP)
211 EMIT1(BFREV)
212 EMIT3(BFE)
213 EMIT2(BFI1)
214 EMIT3(BFI2)
215 EMIT1(FBH)
216 EMIT1(FBL)
217 EMIT1(CBIT)
218 EMIT3(MAD)
219 EMIT2(ADDC)
220 EMIT2(SUBB)
221 #undef EMIT1
222 #undef EMIT2
223 #undef EMIT3
224
225 int implied_mrf_writes(vec4_instruction *inst);
226
227 vec4_instruction *emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst,
228 src_reg src0, src_reg src1);
229
230 vec4_instruction *emit_lrp(const dst_reg &dst, const src_reg &x,
231 const src_reg &y, const src_reg &a);
232
233 /**
234 * Copy any live channel from \p src to the first channel of the
235 * result.
236 */
237 src_reg emit_uniformize(const src_reg &src);
238
239 src_reg fix_3src_operand(const src_reg &src);
240 src_reg resolve_source_modifiers(const src_reg &src);
241
242 vec4_instruction *emit_math(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
243 const src_reg &src1 = src_reg());
244
245 src_reg fix_math_operand(const src_reg &src);
246
247 void emit_pack_half_2x16(dst_reg dst, src_reg src0);
248 void emit_unpack_half_2x16(dst_reg dst, src_reg src0);
249 void emit_unpack_unorm_4x8(const dst_reg &dst, src_reg src0);
250 void emit_unpack_snorm_4x8(const dst_reg &dst, src_reg src0);
251 void emit_pack_unorm_4x8(const dst_reg &dst, const src_reg &src0);
252 void emit_pack_snorm_4x8(const dst_reg &dst, const src_reg &src0);
253
254 void emit_texture(ir_texture_opcode op,
255 dst_reg dest,
256 const glsl_type *dest_type,
257 src_reg coordinate,
258 int coord_components,
259 src_reg shadow_comparitor,
260 src_reg lod, src_reg lod2,
261 src_reg sample_index,
262 uint32_t constant_offset,
263 src_reg offset_value,
264 src_reg mcs,
265 bool is_cube_array,
266 uint32_t sampler, src_reg sampler_reg);
267
268 uint32_t gather_channel(unsigned gather_component, uint32_t sampler);
269 src_reg emit_mcs_fetch(const glsl_type *coordinate_type, src_reg coordinate,
270 src_reg sampler);
271 void emit_gen6_gather_wa(uint8_t wa, dst_reg dst);
272 void swizzle_result(ir_texture_opcode op, dst_reg dest,
273 src_reg orig_val, uint32_t sampler,
274 const glsl_type *dest_type);
275
276 void emit_ndc_computation();
277 void emit_psiz_and_flags(dst_reg reg);
278 vec4_instruction *emit_generic_urb_slot(dst_reg reg, int varying);
279 virtual void emit_urb_slot(dst_reg reg, int varying);
280
281 void emit_shader_time_begin();
282 void emit_shader_time_end();
283 void emit_shader_time_write(int shader_time_subindex, src_reg value);
284
285 void emit_untyped_atomic(unsigned atomic_op, unsigned surf_index,
286 dst_reg dst, src_reg offset, src_reg src0,
287 src_reg src1);
288
289 void emit_untyped_surface_read(unsigned surf_index, dst_reg dst,
290 src_reg offset);
291
292 src_reg get_scratch_offset(bblock_t *block, vec4_instruction *inst,
293 src_reg *reladdr, int reg_offset);
294 src_reg get_pull_constant_offset(bblock_t *block, vec4_instruction *inst,
295 src_reg *reladdr, int reg_offset);
296 void emit_scratch_read(bblock_t *block, vec4_instruction *inst,
297 dst_reg dst,
298 src_reg orig_src,
299 int base_offset);
300 void emit_scratch_write(bblock_t *block, vec4_instruction *inst,
301 int base_offset);
302 void emit_pull_constant_load(bblock_t *block, vec4_instruction *inst,
303 dst_reg dst,
304 src_reg orig_src,
305 int base_offset);
306 void emit_pull_constant_load_reg(dst_reg dst,
307 src_reg surf_index,
308 src_reg offset,
309 bblock_t *before_block,
310 vec4_instruction *before_inst);
311 src_reg emit_resolve_reladdr(int scratch_loc[], bblock_t *block,
312 vec4_instruction *inst, src_reg src);
313
314 void resolve_ud_negate(src_reg *reg);
315
316 src_reg get_timestamp();
317
318 void dump_instruction(backend_instruction *inst);
319 void dump_instruction(backend_instruction *inst, FILE *file);
320
321 bool is_high_sampler(src_reg sampler);
322
323 virtual void emit_nir_code();
324 virtual void nir_setup_inputs();
325 virtual void nir_setup_uniforms();
326 virtual void nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr);
327 virtual void nir_setup_system_values();
328 virtual void nir_emit_impl(nir_function_impl *impl);
329 virtual void nir_emit_cf_list(exec_list *list);
330 virtual void nir_emit_if(nir_if *if_stmt);
331 virtual void nir_emit_loop(nir_loop *loop);
332 virtual void nir_emit_block(nir_block *block);
333 virtual void nir_emit_instr(nir_instr *instr);
334 virtual void nir_emit_load_const(nir_load_const_instr *instr);
335 virtual void nir_emit_intrinsic(nir_intrinsic_instr *instr);
336 virtual void nir_emit_alu(nir_alu_instr *instr);
337 virtual void nir_emit_jump(nir_jump_instr *instr);
338 virtual void nir_emit_texture(nir_tex_instr *instr);
339 virtual void nir_emit_undef(nir_ssa_undef_instr *instr);
340 virtual void nir_emit_ssbo_atomic(int op, nir_intrinsic_instr *instr);
341
342 dst_reg get_nir_dest(nir_dest dest, enum brw_reg_type type);
343 dst_reg get_nir_dest(nir_dest dest, nir_alu_type type);
344 dst_reg get_nir_dest(nir_dest dest);
345 src_reg get_nir_src(nir_src src, enum brw_reg_type type,
346 unsigned num_components = 4);
347 src_reg get_nir_src(nir_src src, nir_alu_type type,
348 unsigned num_components = 4);
349 src_reg get_nir_src(nir_src src,
350 unsigned num_components = 4);
351
352 virtual dst_reg *make_reg_for_system_value(int location,
353 const glsl_type *type) = 0;
354
355 dst_reg *nir_locals;
356 dst_reg *nir_ssa_values;
357 src_reg *nir_inputs;
358 dst_reg *nir_system_values;
359
360 protected:
361 void emit_vertex();
362 void lower_attributes_to_hw_regs(const int *attribute_map,
363 bool interleaved);
364 void setup_payload_interference(struct ra_graph *g, int first_payload_node,
365 int reg_node_count);
366 virtual void setup_payload() = 0;
367 virtual void emit_prolog() = 0;
368 virtual void emit_thread_end() = 0;
369 virtual void emit_urb_write_header(int mrf) = 0;
370 virtual vec4_instruction *emit_urb_write_opcode(bool complete) = 0;
371 virtual void gs_emit_vertex(int stream_id);
372 virtual void gs_end_primitive();
373
374 private:
375 /**
376 * If true, then register allocation should fail instead of spilling.
377 */
378 const bool no_spills;
379
380 int shader_time_index;
381
382 unsigned last_scratch; /**< measured in 32-byte (register size) units */
383 };
384
385
386 /**
387 * The vertex shader code generator.
388 *
389 * Translates VS IR to actual i965 assembly code.
390 */
391 class vec4_generator
392 {
393 public:
394 vec4_generator(const struct brw_compiler *compiler, void *log_data,
395 struct brw_vue_prog_data *prog_data,
396 void *mem_ctx,
397 bool debug_flag,
398 const char *stage_name,
399 const char *stage_abbrev);
400 ~vec4_generator();
401
402 const unsigned *generate_assembly(const cfg_t *cfg, unsigned *asm_size,
403 const nir_shader *nir);
404
405 private:
406 void generate_code(const cfg_t *cfg, const nir_shader *nir);
407
408 void generate_math1_gen4(vec4_instruction *inst,
409 struct brw_reg dst,
410 struct brw_reg src);
411 void generate_math2_gen4(vec4_instruction *inst,
412 struct brw_reg dst,
413 struct brw_reg src0,
414 struct brw_reg src1);
415 void generate_math_gen6(vec4_instruction *inst,
416 struct brw_reg dst,
417 struct brw_reg src0,
418 struct brw_reg src1);
419
420 void generate_tex(vec4_instruction *inst,
421 struct brw_reg dst,
422 struct brw_reg src,
423 struct brw_reg sampler_index);
424
425 void generate_vs_urb_write(vec4_instruction *inst);
426 void generate_gs_urb_write(vec4_instruction *inst);
427 void generate_gs_urb_write_allocate(vec4_instruction *inst);
428 void generate_gs_thread_end(vec4_instruction *inst);
429 void generate_gs_set_write_offset(struct brw_reg dst,
430 struct brw_reg src0,
431 struct brw_reg src1);
432 void generate_gs_set_vertex_count(struct brw_reg dst,
433 struct brw_reg src);
434 void generate_gs_svb_write(vec4_instruction *inst,
435 struct brw_reg dst,
436 struct brw_reg src0,
437 struct brw_reg src1);
438 void generate_gs_svb_set_destination_index(vec4_instruction *inst,
439 struct brw_reg dst,
440 struct brw_reg src);
441 void generate_gs_set_dword_2(struct brw_reg dst, struct brw_reg src);
442 void generate_gs_prepare_channel_masks(struct brw_reg dst);
443 void generate_gs_set_channel_masks(struct brw_reg dst, struct brw_reg src);
444 void generate_gs_get_instance_id(struct brw_reg dst);
445 void generate_gs_ff_sync_set_primitives(struct brw_reg dst,
446 struct brw_reg src0,
447 struct brw_reg src1,
448 struct brw_reg src2);
449 void generate_gs_ff_sync(vec4_instruction *inst,
450 struct brw_reg dst,
451 struct brw_reg src0,
452 struct brw_reg src1);
453 void generate_gs_set_primitive_id(struct brw_reg dst);
454 void generate_oword_dual_block_offsets(struct brw_reg m1,
455 struct brw_reg index);
456 void generate_scratch_write(vec4_instruction *inst,
457 struct brw_reg dst,
458 struct brw_reg src,
459 struct brw_reg index);
460 void generate_scratch_read(vec4_instruction *inst,
461 struct brw_reg dst,
462 struct brw_reg index);
463 void generate_pull_constant_load(vec4_instruction *inst,
464 struct brw_reg dst,
465 struct brw_reg index,
466 struct brw_reg offset);
467 void generate_pull_constant_load_gen7(vec4_instruction *inst,
468 struct brw_reg dst,
469 struct brw_reg surf_index,
470 struct brw_reg offset);
471 void generate_set_simd4x2_header_gen9(vec4_instruction *inst,
472 struct brw_reg dst);
473
474 void generate_get_buffer_size(vec4_instruction *inst,
475 struct brw_reg dst,
476 struct brw_reg src,
477 struct brw_reg index);
478
479 void generate_unpack_flags(struct brw_reg dst);
480
481 const struct brw_compiler *compiler;
482 void *log_data; /* Passed to compiler->*_log functions */
483
484 const struct brw_device_info *devinfo;
485
486 struct brw_codegen *p;
487
488 struct brw_vue_prog_data *prog_data;
489
490 void *mem_ctx;
491 const char *stage_name;
492 const char *stage_abbrev;
493 const bool debug_flag;
494 };
495
496 } /* namespace brw */
497 #endif /* __cplusplus */
498
499 #endif /* BRW_VEC4_H */