i965: Pull calls to get_shader_time_index out of the visitor
[mesa.git] / src / mesa / drivers / dri / i965 / brw_vec4.h
1 /*
2 * Copyright © 2011 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #ifndef BRW_VEC4_H
25 #define BRW_VEC4_H
26
27 #include <stdint.h>
28 #include "brw_shader.h"
29 #include "main/compiler.h"
30 #include "program/hash_table.h"
31 #include "brw_program.h"
32
33 #ifdef __cplusplus
34 #include "brw_ir_vec4.h"
35
36 extern "C" {
37 #endif
38
39 #include "brw_context.h"
40 #include "brw_eu.h"
41 #include "intel_asm_annotation.h"
42
43 #ifdef __cplusplus
44 }; /* extern "C" */
45 #endif
46
47 #include "glsl/ir.h"
48
49
50 struct brw_vec4_compile {
51 GLuint last_scratch; /**< measured in 32-byte (register size) units */
52 };
53
54 #ifdef __cplusplus
55 extern "C" {
56 #endif
57
58 void
59 brw_vue_setup_prog_key_for_precompile(struct gl_context *ctx,
60 struct brw_vue_prog_key *key,
61 GLuint id, struct gl_program *prog);
62
63 #ifdef __cplusplus
64 } /* extern "C" */
65
66 namespace brw {
67
68 class vec4_live_variables;
69
70 /**
71 * The vertex shader front-end.
72 *
73 * Translates either GLSL IR or Mesa IR (for ARB_vertex_program and
74 * fixed-function) into VS IR.
75 */
76 class vec4_visitor : public backend_shader, public ir_visitor
77 {
78 public:
79 vec4_visitor(struct brw_context *brw,
80 struct brw_vec4_compile *c,
81 struct gl_program *prog,
82 const struct brw_vue_prog_key *key,
83 struct brw_vue_prog_data *prog_data,
84 struct gl_shader_program *shader_prog,
85 gl_shader_stage stage,
86 void *mem_ctx,
87 bool no_spills,
88 int shader_time_index);
89 ~vec4_visitor();
90
91 dst_reg dst_null_f()
92 {
93 return dst_reg(brw_null_reg());
94 }
95
96 dst_reg dst_null_d()
97 {
98 return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
99 }
100
101 dst_reg dst_null_ud()
102 {
103 return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
104 }
105
106 struct brw_vec4_compile * const c;
107 const struct brw_vue_prog_key * const key;
108 struct brw_vue_prog_data * const prog_data;
109 unsigned int sanity_param_count;
110
111 char *fail_msg;
112 bool failed;
113
114 /**
115 * GLSL IR currently being processed, which is associated with our
116 * driver IR instructions for debugging purposes.
117 */
118 const void *base_ir;
119 const char *current_annotation;
120
121 int first_non_payload_grf;
122 unsigned int max_grf;
123 int *virtual_grf_start;
124 int *virtual_grf_end;
125 brw::vec4_live_variables *live_intervals;
126 dst_reg userplane[MAX_CLIP_PLANES];
127
128 dst_reg *variable_storage(ir_variable *var);
129
130 void reladdr_to_temp(ir_instruction *ir, src_reg *reg, int *num_reladdr);
131
132 bool need_all_constants_in_pull_buffer;
133
134 /**
135 * \name Visit methods
136 *
137 * As typical for the visitor pattern, there must be one \c visit method for
138 * each concrete subclass of \c ir_instruction. Virtual base classes within
139 * the hierarchy should not have \c visit methods.
140 */
141 /*@{*/
142 virtual void visit(ir_variable *);
143 virtual void visit(ir_loop *);
144 virtual void visit(ir_loop_jump *);
145 virtual void visit(ir_function_signature *);
146 virtual void visit(ir_function *);
147 virtual void visit(ir_expression *);
148 virtual void visit(ir_swizzle *);
149 virtual void visit(ir_dereference_variable *);
150 virtual void visit(ir_dereference_array *);
151 virtual void visit(ir_dereference_record *);
152 virtual void visit(ir_assignment *);
153 virtual void visit(ir_constant *);
154 virtual void visit(ir_call *);
155 virtual void visit(ir_return *);
156 virtual void visit(ir_discard *);
157 virtual void visit(ir_texture *);
158 virtual void visit(ir_if *);
159 virtual void visit(ir_emit_vertex *);
160 virtual void visit(ir_end_primitive *);
161 virtual void visit(ir_barrier *);
162 /*@}*/
163
164 src_reg result;
165
166 /* Regs for vertex results. Generated at ir_variable visiting time
167 * for the ir->location's used.
168 */
169 dst_reg output_reg[BRW_VARYING_SLOT_COUNT];
170 const char *output_reg_annotation[BRW_VARYING_SLOT_COUNT];
171 int *uniform_size;
172 int *uniform_vector_size;
173 int uniform_array_size; /*< Size of uniform_[vector_]size arrays */
174 int uniforms;
175
176 src_reg shader_start_time;
177
178 struct hash_table *variable_ht;
179
180 bool run(void);
181 void fail(const char *msg, ...);
182
183 void setup_uniform_clipplane_values();
184 void setup_uniform_values(ir_variable *ir);
185 void setup_builtin_uniform_values(ir_variable *ir);
186 int setup_uniforms(int payload_reg);
187 bool reg_allocate_trivial();
188 bool reg_allocate();
189 void evaluate_spill_costs(float *spill_costs, bool *no_spill);
190 int choose_spill_reg(struct ra_graph *g);
191 void spill_reg(int spill_reg);
192 void move_grf_array_access_to_scratch();
193 void move_uniform_array_access_to_pull_constants();
194 void move_push_constants_to_pull_constants();
195 void split_uniform_registers();
196 void pack_uniform_registers();
197 void calculate_live_intervals();
198 void invalidate_live_intervals();
199 void split_virtual_grfs();
200 bool opt_vector_float();
201 bool opt_reduce_swizzle();
202 bool dead_code_eliminate();
203 int var_range_start(unsigned v, unsigned n) const;
204 int var_range_end(unsigned v, unsigned n) const;
205 bool virtual_grf_interferes(int a, int b);
206 bool opt_copy_propagation(bool do_constant_prop = true);
207 bool opt_cse_local(bblock_t *block);
208 bool opt_cse();
209 bool opt_algebraic();
210 bool opt_register_coalesce();
211 bool eliminate_find_live_channel();
212 bool is_dep_ctrl_unsafe(const vec4_instruction *inst);
213 void opt_set_dependency_control();
214 void opt_schedule_instructions();
215
216 vec4_instruction *emit(vec4_instruction *inst);
217
218 vec4_instruction *emit(enum opcode opcode);
219 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst);
220 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
221 const src_reg &src0);
222 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
223 const src_reg &src0, const src_reg &src1);
224 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
225 const src_reg &src0, const src_reg &src1,
226 const src_reg &src2);
227
228 vec4_instruction *emit_before(bblock_t *block,
229 vec4_instruction *inst,
230 vec4_instruction *new_inst);
231
232 #define EMIT1(op) vec4_instruction *op(const dst_reg &, const src_reg &);
233 #define EMIT2(op) vec4_instruction *op(const dst_reg &, const src_reg &, const src_reg &);
234 #define EMIT3(op) vec4_instruction *op(const dst_reg &, const src_reg &, const src_reg &, const src_reg &);
235 EMIT1(MOV)
236 EMIT1(NOT)
237 EMIT1(RNDD)
238 EMIT1(RNDE)
239 EMIT1(RNDZ)
240 EMIT1(FRC)
241 EMIT1(F32TO16)
242 EMIT1(F16TO32)
243 EMIT2(ADD)
244 EMIT2(MUL)
245 EMIT2(MACH)
246 EMIT2(MAC)
247 EMIT2(AND)
248 EMIT2(OR)
249 EMIT2(XOR)
250 EMIT2(DP3)
251 EMIT2(DP4)
252 EMIT2(DPH)
253 EMIT2(SHL)
254 EMIT2(SHR)
255 EMIT2(ASR)
256 vec4_instruction *CMP(dst_reg dst, src_reg src0, src_reg src1,
257 enum brw_conditional_mod condition);
258 vec4_instruction *IF(src_reg src0, src_reg src1,
259 enum brw_conditional_mod condition);
260 vec4_instruction *IF(enum brw_predicate predicate);
261 EMIT1(SCRATCH_READ)
262 EMIT2(SCRATCH_WRITE)
263 EMIT3(LRP)
264 EMIT1(BFREV)
265 EMIT3(BFE)
266 EMIT2(BFI1)
267 EMIT3(BFI2)
268 EMIT1(FBH)
269 EMIT1(FBL)
270 EMIT1(CBIT)
271 EMIT3(MAD)
272 EMIT2(ADDC)
273 EMIT2(SUBB)
274 #undef EMIT1
275 #undef EMIT2
276 #undef EMIT3
277
278 int implied_mrf_writes(vec4_instruction *inst);
279
280 bool try_rewrite_rhs_to_dst(ir_assignment *ir,
281 dst_reg dst,
282 src_reg src,
283 vec4_instruction *pre_rhs_inst,
284 vec4_instruction *last_rhs_inst);
285
286 /** Walks an exec_list of ir_instruction and sends it through this visitor. */
287 void visit_instructions(const exec_list *list);
288
289 void emit_vp_sop(enum brw_conditional_mod condmod, dst_reg dst,
290 src_reg src0, src_reg src1, src_reg one);
291
292 void emit_bool_to_cond_code(ir_rvalue *ir, enum brw_predicate *predicate);
293 void emit_if_gen6(ir_if *ir);
294
295 void emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst,
296 src_reg src0, src_reg src1);
297
298 void emit_lrp(const dst_reg &dst,
299 const src_reg &x, const src_reg &y, const src_reg &a);
300
301 /** Copy any live channel from \p src to the first channel of \p dst. */
302 void emit_uniformize(const dst_reg &dst, const src_reg &src);
303
304 void emit_block_move(dst_reg *dst, src_reg *src,
305 const struct glsl_type *type, brw_predicate predicate);
306
307 void emit_constant_values(dst_reg *dst, ir_constant *value);
308
309 /**
310 * Emit the correct dot-product instruction for the type of arguments
311 */
312 void emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements);
313
314 void emit_scalar(ir_instruction *ir, enum prog_opcode op,
315 dst_reg dst, src_reg src0);
316
317 void emit_scalar(ir_instruction *ir, enum prog_opcode op,
318 dst_reg dst, src_reg src0, src_reg src1);
319
320 src_reg fix_3src_operand(src_reg src);
321
322 void emit_math(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
323 const src_reg &src1 = src_reg());
324 src_reg fix_math_operand(src_reg src);
325
326 void emit_pack_half_2x16(dst_reg dst, src_reg src0);
327 void emit_unpack_half_2x16(dst_reg dst, src_reg src0);
328 void emit_unpack_unorm_4x8(const dst_reg &dst, src_reg src0);
329 void emit_unpack_snorm_4x8(const dst_reg &dst, src_reg src0);
330 void emit_pack_unorm_4x8(const dst_reg &dst, const src_reg &src0);
331 void emit_pack_snorm_4x8(const dst_reg &dst, const src_reg &src0);
332
333 uint32_t gather_channel(ir_texture *ir, uint32_t sampler);
334 src_reg emit_mcs_fetch(ir_texture *ir, src_reg coordinate, src_reg sampler);
335 void emit_gen6_gather_wa(uint8_t wa, dst_reg dst);
336 void swizzle_result(ir_texture *ir, src_reg orig_val, uint32_t sampler);
337
338 void emit_ndc_computation();
339 void emit_psiz_and_flags(dst_reg reg);
340 void emit_clip_distances(dst_reg reg, int offset);
341 vec4_instruction *emit_generic_urb_slot(dst_reg reg, int varying);
342 void emit_urb_slot(dst_reg reg, int varying);
343
344 void emit_shader_time_begin();
345 void emit_shader_time_end();
346 void emit_shader_time_write(int shader_time_subindex, src_reg value);
347
348 void emit_untyped_atomic(unsigned atomic_op, unsigned surf_index,
349 dst_reg dst, src_reg offset, src_reg src0,
350 src_reg src1);
351
352 void emit_untyped_surface_read(unsigned surf_index, dst_reg dst,
353 src_reg offset);
354
355 src_reg get_scratch_offset(bblock_t *block, vec4_instruction *inst,
356 src_reg *reladdr, int reg_offset);
357 src_reg get_pull_constant_offset(bblock_t *block, vec4_instruction *inst,
358 src_reg *reladdr, int reg_offset);
359 void emit_scratch_read(bblock_t *block, vec4_instruction *inst,
360 dst_reg dst,
361 src_reg orig_src,
362 int base_offset);
363 void emit_scratch_write(bblock_t *block, vec4_instruction *inst,
364 int base_offset);
365 void emit_pull_constant_load(bblock_t *block, vec4_instruction *inst,
366 dst_reg dst,
367 src_reg orig_src,
368 int base_offset);
369 void emit_pull_constant_load_reg(dst_reg dst,
370 src_reg surf_index,
371 src_reg offset,
372 bblock_t *before_block,
373 vec4_instruction *before_inst);
374 src_reg emit_resolve_reladdr(int scratch_loc[], bblock_t *block,
375 vec4_instruction *inst, src_reg src);
376
377 bool try_emit_mad(ir_expression *ir);
378 bool try_emit_b2f_of_compare(ir_expression *ir);
379 void resolve_ud_negate(src_reg *reg);
380 void resolve_bool_comparison(ir_rvalue *rvalue, src_reg *reg);
381
382 src_reg get_timestamp();
383
384 bool process_move_condition(ir_rvalue *ir);
385
386 void dump_instruction(backend_instruction *inst);
387 void dump_instruction(backend_instruction *inst, FILE *file);
388
389 void visit_atomic_counter_intrinsic(ir_call *ir);
390
391 protected:
392 void emit_vertex();
393 void lower_attributes_to_hw_regs(const int *attribute_map,
394 bool interleaved);
395 void setup_payload_interference(struct ra_graph *g, int first_payload_node,
396 int reg_node_count);
397 virtual dst_reg *make_reg_for_system_value(ir_variable *ir) = 0;
398 virtual void assign_binding_table_offsets();
399 virtual void setup_payload() = 0;
400 virtual void emit_prolog() = 0;
401 virtual void emit_program_code() = 0;
402 virtual void emit_thread_end() = 0;
403 virtual void emit_urb_write_header(int mrf) = 0;
404 virtual vec4_instruction *emit_urb_write_opcode(bool complete) = 0;
405 virtual int compute_array_stride(ir_dereference_array *ir);
406
407 private:
408 /**
409 * If true, then register allocation should fail instead of spilling.
410 */
411 const bool no_spills;
412
413 int shader_time_index;
414 };
415
416
417 /**
418 * The vertex shader code generator.
419 *
420 * Translates VS IR to actual i965 assembly code.
421 */
422 class vec4_generator
423 {
424 public:
425 vec4_generator(const struct brw_compiler *compiler, void *log_data,
426 struct gl_shader_program *shader_prog,
427 struct gl_program *prog,
428 struct brw_vue_prog_data *prog_data,
429 void *mem_ctx,
430 bool debug_flag,
431 const char *stage_name,
432 const char *stage_abbrev);
433 ~vec4_generator();
434
435 const unsigned *generate_assembly(const cfg_t *cfg, unsigned *asm_size);
436
437 private:
438 void generate_code(const cfg_t *cfg);
439
440 void generate_math1_gen4(vec4_instruction *inst,
441 struct brw_reg dst,
442 struct brw_reg src);
443 void generate_math2_gen4(vec4_instruction *inst,
444 struct brw_reg dst,
445 struct brw_reg src0,
446 struct brw_reg src1);
447 void generate_math_gen6(vec4_instruction *inst,
448 struct brw_reg dst,
449 struct brw_reg src0,
450 struct brw_reg src1);
451
452 void generate_tex(vec4_instruction *inst,
453 struct brw_reg dst,
454 struct brw_reg src,
455 struct brw_reg sampler_index);
456
457 void generate_vs_urb_write(vec4_instruction *inst);
458 void generate_gs_urb_write(vec4_instruction *inst);
459 void generate_gs_urb_write_allocate(vec4_instruction *inst);
460 void generate_gs_thread_end(vec4_instruction *inst);
461 void generate_gs_set_write_offset(struct brw_reg dst,
462 struct brw_reg src0,
463 struct brw_reg src1);
464 void generate_gs_set_vertex_count(struct brw_reg dst,
465 struct brw_reg src);
466 void generate_gs_svb_write(vec4_instruction *inst,
467 struct brw_reg dst,
468 struct brw_reg src0,
469 struct brw_reg src1);
470 void generate_gs_svb_set_destination_index(vec4_instruction *inst,
471 struct brw_reg dst,
472 struct brw_reg src);
473 void generate_gs_set_dword_2(struct brw_reg dst, struct brw_reg src);
474 void generate_gs_prepare_channel_masks(struct brw_reg dst);
475 void generate_gs_set_channel_masks(struct brw_reg dst, struct brw_reg src);
476 void generate_gs_get_instance_id(struct brw_reg dst);
477 void generate_gs_ff_sync_set_primitives(struct brw_reg dst,
478 struct brw_reg src0,
479 struct brw_reg src1,
480 struct brw_reg src2);
481 void generate_gs_ff_sync(vec4_instruction *inst,
482 struct brw_reg dst,
483 struct brw_reg src0,
484 struct brw_reg src1);
485 void generate_gs_set_primitive_id(struct brw_reg dst);
486 void generate_oword_dual_block_offsets(struct brw_reg m1,
487 struct brw_reg index);
488 void generate_scratch_write(vec4_instruction *inst,
489 struct brw_reg dst,
490 struct brw_reg src,
491 struct brw_reg index);
492 void generate_scratch_read(vec4_instruction *inst,
493 struct brw_reg dst,
494 struct brw_reg index);
495 void generate_pull_constant_load(vec4_instruction *inst,
496 struct brw_reg dst,
497 struct brw_reg index,
498 struct brw_reg offset);
499 void generate_pull_constant_load_gen7(vec4_instruction *inst,
500 struct brw_reg dst,
501 struct brw_reg surf_index,
502 struct brw_reg offset);
503 void generate_set_simd4x2_header_gen9(vec4_instruction *inst,
504 struct brw_reg dst);
505 void generate_unpack_flags(struct brw_reg dst);
506
507 const struct brw_compiler *compiler;
508 void *log_data; /* Passed to compiler->*_log functions */
509
510 const struct brw_device_info *devinfo;
511
512 struct brw_codegen *p;
513
514 struct gl_shader_program *shader_prog;
515 const struct gl_program *prog;
516
517 struct brw_vue_prog_data *prog_data;
518
519 void *mem_ctx;
520 const char *stage_name;
521 const char *stage_abbrev;
522 const bool debug_flag;
523 };
524
525 } /* namespace brw */
526 #endif /* __cplusplus */
527
528 #endif /* BRW_VEC4_H */