i965/vec4: Move c->last_scratch into vec4_visitor.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_vec4.h
1 /*
2 * Copyright © 2011 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #ifndef BRW_VEC4_H
25 #define BRW_VEC4_H
26
27 #include <stdint.h>
28 #include "brw_shader.h"
29 #include "main/compiler.h"
30 #include "program/hash_table.h"
31 #include "brw_program.h"
32
33 #ifdef __cplusplus
34 #include "brw_ir_vec4.h"
35
36 extern "C" {
37 #endif
38
39 #include "brw_context.h"
40 #include "brw_eu.h"
41 #include "intel_asm_annotation.h"
42
43 #ifdef __cplusplus
44 }; /* extern "C" */
45 #endif
46
47 #include "glsl/ir.h"
48
49
50 #ifdef __cplusplus
51 extern "C" {
52 #endif
53
54 void
55 brw_vue_setup_prog_key_for_precompile(struct gl_context *ctx,
56 struct brw_vue_prog_key *key,
57 GLuint id, struct gl_program *prog);
58
59 #ifdef __cplusplus
60 } /* extern "C" */
61
62 namespace brw {
63
64 class vec4_live_variables;
65
66 /**
67 * The vertex shader front-end.
68 *
69 * Translates either GLSL IR or Mesa IR (for ARB_vertex_program and
70 * fixed-function) into VS IR.
71 */
72 class vec4_visitor : public backend_shader, public ir_visitor
73 {
74 public:
75 vec4_visitor(const struct brw_compiler *compiler,
76 void *log_data,
77 struct gl_program *prog,
78 const struct brw_vue_prog_key *key,
79 struct brw_vue_prog_data *prog_data,
80 struct gl_shader_program *shader_prog,
81 gl_shader_stage stage,
82 void *mem_ctx,
83 bool no_spills,
84 int shader_time_index);
85 ~vec4_visitor();
86
87 dst_reg dst_null_f()
88 {
89 return dst_reg(brw_null_reg());
90 }
91
92 dst_reg dst_null_d()
93 {
94 return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
95 }
96
97 dst_reg dst_null_ud()
98 {
99 return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
100 }
101
102 const struct brw_vue_prog_key * const key;
103 struct brw_vue_prog_data * const prog_data;
104 unsigned int sanity_param_count;
105
106 char *fail_msg;
107 bool failed;
108
109 /**
110 * GLSL IR currently being processed, which is associated with our
111 * driver IR instructions for debugging purposes.
112 */
113 const void *base_ir;
114 const char *current_annotation;
115
116 int first_non_payload_grf;
117 unsigned int max_grf;
118 int *virtual_grf_start;
119 int *virtual_grf_end;
120 brw::vec4_live_variables *live_intervals;
121 dst_reg userplane[MAX_CLIP_PLANES];
122
123 dst_reg *variable_storage(ir_variable *var);
124
125 void reladdr_to_temp(ir_instruction *ir, src_reg *reg, int *num_reladdr);
126
127 bool need_all_constants_in_pull_buffer;
128
129 /**
130 * \name Visit methods
131 *
132 * As typical for the visitor pattern, there must be one \c visit method for
133 * each concrete subclass of \c ir_instruction. Virtual base classes within
134 * the hierarchy should not have \c visit methods.
135 */
136 /*@{*/
137 virtual void visit(ir_variable *);
138 virtual void visit(ir_loop *);
139 virtual void visit(ir_loop_jump *);
140 virtual void visit(ir_function_signature *);
141 virtual void visit(ir_function *);
142 virtual void visit(ir_expression *);
143 virtual void visit(ir_swizzle *);
144 virtual void visit(ir_dereference_variable *);
145 virtual void visit(ir_dereference_array *);
146 virtual void visit(ir_dereference_record *);
147 virtual void visit(ir_assignment *);
148 virtual void visit(ir_constant *);
149 virtual void visit(ir_call *);
150 virtual void visit(ir_return *);
151 virtual void visit(ir_discard *);
152 virtual void visit(ir_texture *);
153 virtual void visit(ir_if *);
154 virtual void visit(ir_emit_vertex *);
155 virtual void visit(ir_end_primitive *);
156 virtual void visit(ir_barrier *);
157 /*@}*/
158
159 src_reg result;
160
161 /* Regs for vertex results. Generated at ir_variable visiting time
162 * for the ir->location's used.
163 */
164 dst_reg output_reg[BRW_VARYING_SLOT_COUNT];
165 const char *output_reg_annotation[BRW_VARYING_SLOT_COUNT];
166 int *uniform_size;
167 int *uniform_vector_size;
168 int uniform_array_size; /*< Size of uniform_[vector_]size arrays */
169 int uniforms;
170
171 src_reg shader_start_time;
172
173 struct hash_table *variable_ht;
174
175 bool run(gl_clip_plane *clip_planes);
176 void fail(const char *msg, ...);
177
178 void setup_uniform_clipplane_values(gl_clip_plane *clip_planes);
179 void setup_uniform_values(ir_variable *ir);
180 void setup_builtin_uniform_values(ir_variable *ir);
181 int setup_uniforms(int payload_reg);
182 bool reg_allocate_trivial();
183 bool reg_allocate();
184 void evaluate_spill_costs(float *spill_costs, bool *no_spill);
185 int choose_spill_reg(struct ra_graph *g);
186 void spill_reg(int spill_reg);
187 void move_grf_array_access_to_scratch();
188 void move_uniform_array_access_to_pull_constants();
189 void move_push_constants_to_pull_constants();
190 void split_uniform_registers();
191 void pack_uniform_registers();
192 void calculate_live_intervals();
193 void invalidate_live_intervals();
194 void split_virtual_grfs();
195 bool opt_vector_float();
196 bool opt_reduce_swizzle();
197 bool dead_code_eliminate();
198 int var_range_start(unsigned v, unsigned n) const;
199 int var_range_end(unsigned v, unsigned n) const;
200 bool virtual_grf_interferes(int a, int b);
201 bool opt_copy_propagation(bool do_constant_prop = true);
202 bool opt_cse_local(bblock_t *block);
203 bool opt_cse();
204 bool opt_algebraic();
205 bool opt_register_coalesce();
206 bool eliminate_find_live_channel();
207 bool is_dep_ctrl_unsafe(const vec4_instruction *inst);
208 void opt_set_dependency_control();
209 void opt_schedule_instructions();
210
211 vec4_instruction *emit(vec4_instruction *inst);
212
213 vec4_instruction *emit(enum opcode opcode);
214 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst);
215 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
216 const src_reg &src0);
217 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
218 const src_reg &src0, const src_reg &src1);
219 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
220 const src_reg &src0, const src_reg &src1,
221 const src_reg &src2);
222
223 vec4_instruction *emit_before(bblock_t *block,
224 vec4_instruction *inst,
225 vec4_instruction *new_inst);
226
227 #define EMIT1(op) vec4_instruction *op(const dst_reg &, const src_reg &);
228 #define EMIT2(op) vec4_instruction *op(const dst_reg &, const src_reg &, const src_reg &);
229 #define EMIT3(op) vec4_instruction *op(const dst_reg &, const src_reg &, const src_reg &, const src_reg &);
230 EMIT1(MOV)
231 EMIT1(NOT)
232 EMIT1(RNDD)
233 EMIT1(RNDE)
234 EMIT1(RNDZ)
235 EMIT1(FRC)
236 EMIT1(F32TO16)
237 EMIT1(F16TO32)
238 EMIT2(ADD)
239 EMIT2(MUL)
240 EMIT2(MACH)
241 EMIT2(MAC)
242 EMIT2(AND)
243 EMIT2(OR)
244 EMIT2(XOR)
245 EMIT2(DP3)
246 EMIT2(DP4)
247 EMIT2(DPH)
248 EMIT2(SHL)
249 EMIT2(SHR)
250 EMIT2(ASR)
251 vec4_instruction *CMP(dst_reg dst, src_reg src0, src_reg src1,
252 enum brw_conditional_mod condition);
253 vec4_instruction *IF(src_reg src0, src_reg src1,
254 enum brw_conditional_mod condition);
255 vec4_instruction *IF(enum brw_predicate predicate);
256 EMIT1(SCRATCH_READ)
257 EMIT2(SCRATCH_WRITE)
258 EMIT3(LRP)
259 EMIT1(BFREV)
260 EMIT3(BFE)
261 EMIT2(BFI1)
262 EMIT3(BFI2)
263 EMIT1(FBH)
264 EMIT1(FBL)
265 EMIT1(CBIT)
266 EMIT3(MAD)
267 EMIT2(ADDC)
268 EMIT2(SUBB)
269 #undef EMIT1
270 #undef EMIT2
271 #undef EMIT3
272
273 int implied_mrf_writes(vec4_instruction *inst);
274
275 bool try_rewrite_rhs_to_dst(ir_assignment *ir,
276 dst_reg dst,
277 src_reg src,
278 vec4_instruction *pre_rhs_inst,
279 vec4_instruction *last_rhs_inst);
280
281 /** Walks an exec_list of ir_instruction and sends it through this visitor. */
282 void visit_instructions(const exec_list *list);
283
284 void emit_vp_sop(enum brw_conditional_mod condmod, dst_reg dst,
285 src_reg src0, src_reg src1, src_reg one);
286
287 void emit_bool_to_cond_code(ir_rvalue *ir, enum brw_predicate *predicate);
288 void emit_if_gen6(ir_if *ir);
289
290 void emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst,
291 src_reg src0, src_reg src1);
292
293 void emit_lrp(const dst_reg &dst,
294 const src_reg &x, const src_reg &y, const src_reg &a);
295
296 /** Copy any live channel from \p src to the first channel of \p dst. */
297 void emit_uniformize(const dst_reg &dst, const src_reg &src);
298
299 void emit_block_move(dst_reg *dst, src_reg *src,
300 const struct glsl_type *type, brw_predicate predicate);
301
302 void emit_constant_values(dst_reg *dst, ir_constant *value);
303
304 /**
305 * Emit the correct dot-product instruction for the type of arguments
306 */
307 void emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements);
308
309 void emit_scalar(ir_instruction *ir, enum prog_opcode op,
310 dst_reg dst, src_reg src0);
311
312 void emit_scalar(ir_instruction *ir, enum prog_opcode op,
313 dst_reg dst, src_reg src0, src_reg src1);
314
315 src_reg fix_3src_operand(src_reg src);
316
317 void emit_math(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
318 const src_reg &src1 = src_reg());
319 src_reg fix_math_operand(src_reg src);
320
321 void emit_pack_half_2x16(dst_reg dst, src_reg src0);
322 void emit_unpack_half_2x16(dst_reg dst, src_reg src0);
323 void emit_unpack_unorm_4x8(const dst_reg &dst, src_reg src0);
324 void emit_unpack_snorm_4x8(const dst_reg &dst, src_reg src0);
325 void emit_pack_unorm_4x8(const dst_reg &dst, const src_reg &src0);
326 void emit_pack_snorm_4x8(const dst_reg &dst, const src_reg &src0);
327
328 uint32_t gather_channel(ir_texture *ir, uint32_t sampler);
329 src_reg emit_mcs_fetch(ir_texture *ir, src_reg coordinate, src_reg sampler);
330 void emit_gen6_gather_wa(uint8_t wa, dst_reg dst);
331 void swizzle_result(ir_texture *ir, src_reg orig_val, uint32_t sampler);
332
333 void emit_ndc_computation();
334 void emit_psiz_and_flags(dst_reg reg);
335 void emit_clip_distances(dst_reg reg, int offset);
336 vec4_instruction *emit_generic_urb_slot(dst_reg reg, int varying);
337 void emit_urb_slot(dst_reg reg, int varying);
338
339 void emit_shader_time_begin();
340 void emit_shader_time_end();
341 void emit_shader_time_write(int shader_time_subindex, src_reg value);
342
343 void emit_untyped_atomic(unsigned atomic_op, unsigned surf_index,
344 dst_reg dst, src_reg offset, src_reg src0,
345 src_reg src1);
346
347 void emit_untyped_surface_read(unsigned surf_index, dst_reg dst,
348 src_reg offset);
349
350 src_reg get_scratch_offset(bblock_t *block, vec4_instruction *inst,
351 src_reg *reladdr, int reg_offset);
352 src_reg get_pull_constant_offset(bblock_t *block, vec4_instruction *inst,
353 src_reg *reladdr, int reg_offset);
354 void emit_scratch_read(bblock_t *block, vec4_instruction *inst,
355 dst_reg dst,
356 src_reg orig_src,
357 int base_offset);
358 void emit_scratch_write(bblock_t *block, vec4_instruction *inst,
359 int base_offset);
360 void emit_pull_constant_load(bblock_t *block, vec4_instruction *inst,
361 dst_reg dst,
362 src_reg orig_src,
363 int base_offset);
364 void emit_pull_constant_load_reg(dst_reg dst,
365 src_reg surf_index,
366 src_reg offset,
367 bblock_t *before_block,
368 vec4_instruction *before_inst);
369 src_reg emit_resolve_reladdr(int scratch_loc[], bblock_t *block,
370 vec4_instruction *inst, src_reg src);
371
372 bool try_emit_mad(ir_expression *ir);
373 bool try_emit_b2f_of_compare(ir_expression *ir);
374 void resolve_ud_negate(src_reg *reg);
375 void resolve_bool_comparison(ir_rvalue *rvalue, src_reg *reg);
376
377 src_reg get_timestamp();
378
379 bool process_move_condition(ir_rvalue *ir);
380
381 void dump_instruction(backend_instruction *inst);
382 void dump_instruction(backend_instruction *inst, FILE *file);
383
384 void visit_atomic_counter_intrinsic(ir_call *ir);
385
386 protected:
387 void emit_vertex();
388 void lower_attributes_to_hw_regs(const int *attribute_map,
389 bool interleaved);
390 void setup_payload_interference(struct ra_graph *g, int first_payload_node,
391 int reg_node_count);
392 virtual dst_reg *make_reg_for_system_value(ir_variable *ir) = 0;
393 virtual void assign_binding_table_offsets();
394 virtual void setup_payload() = 0;
395 virtual void emit_prolog() = 0;
396 virtual void emit_program_code() = 0;
397 virtual void emit_thread_end() = 0;
398 virtual void emit_urb_write_header(int mrf) = 0;
399 virtual vec4_instruction *emit_urb_write_opcode(bool complete) = 0;
400 virtual int compute_array_stride(ir_dereference_array *ir);
401
402 private:
403 /**
404 * If true, then register allocation should fail instead of spilling.
405 */
406 const bool no_spills;
407
408 int shader_time_index;
409
410 unsigned last_scratch; /**< measured in 32-byte (register size) units */
411 };
412
413
414 /**
415 * The vertex shader code generator.
416 *
417 * Translates VS IR to actual i965 assembly code.
418 */
419 class vec4_generator
420 {
421 public:
422 vec4_generator(const struct brw_compiler *compiler, void *log_data,
423 struct gl_shader_program *shader_prog,
424 struct gl_program *prog,
425 struct brw_vue_prog_data *prog_data,
426 void *mem_ctx,
427 bool debug_flag,
428 const char *stage_name,
429 const char *stage_abbrev);
430 ~vec4_generator();
431
432 const unsigned *generate_assembly(const cfg_t *cfg, unsigned *asm_size);
433
434 private:
435 void generate_code(const cfg_t *cfg);
436
437 void generate_math1_gen4(vec4_instruction *inst,
438 struct brw_reg dst,
439 struct brw_reg src);
440 void generate_math2_gen4(vec4_instruction *inst,
441 struct brw_reg dst,
442 struct brw_reg src0,
443 struct brw_reg src1);
444 void generate_math_gen6(vec4_instruction *inst,
445 struct brw_reg dst,
446 struct brw_reg src0,
447 struct brw_reg src1);
448
449 void generate_tex(vec4_instruction *inst,
450 struct brw_reg dst,
451 struct brw_reg src,
452 struct brw_reg sampler_index);
453
454 void generate_vs_urb_write(vec4_instruction *inst);
455 void generate_gs_urb_write(vec4_instruction *inst);
456 void generate_gs_urb_write_allocate(vec4_instruction *inst);
457 void generate_gs_thread_end(vec4_instruction *inst);
458 void generate_gs_set_write_offset(struct brw_reg dst,
459 struct brw_reg src0,
460 struct brw_reg src1);
461 void generate_gs_set_vertex_count(struct brw_reg dst,
462 struct brw_reg src);
463 void generate_gs_svb_write(vec4_instruction *inst,
464 struct brw_reg dst,
465 struct brw_reg src0,
466 struct brw_reg src1);
467 void generate_gs_svb_set_destination_index(vec4_instruction *inst,
468 struct brw_reg dst,
469 struct brw_reg src);
470 void generate_gs_set_dword_2(struct brw_reg dst, struct brw_reg src);
471 void generate_gs_prepare_channel_masks(struct brw_reg dst);
472 void generate_gs_set_channel_masks(struct brw_reg dst, struct brw_reg src);
473 void generate_gs_get_instance_id(struct brw_reg dst);
474 void generate_gs_ff_sync_set_primitives(struct brw_reg dst,
475 struct brw_reg src0,
476 struct brw_reg src1,
477 struct brw_reg src2);
478 void generate_gs_ff_sync(vec4_instruction *inst,
479 struct brw_reg dst,
480 struct brw_reg src0,
481 struct brw_reg src1);
482 void generate_gs_set_primitive_id(struct brw_reg dst);
483 void generate_oword_dual_block_offsets(struct brw_reg m1,
484 struct brw_reg index);
485 void generate_scratch_write(vec4_instruction *inst,
486 struct brw_reg dst,
487 struct brw_reg src,
488 struct brw_reg index);
489 void generate_scratch_read(vec4_instruction *inst,
490 struct brw_reg dst,
491 struct brw_reg index);
492 void generate_pull_constant_load(vec4_instruction *inst,
493 struct brw_reg dst,
494 struct brw_reg index,
495 struct brw_reg offset);
496 void generate_pull_constant_load_gen7(vec4_instruction *inst,
497 struct brw_reg dst,
498 struct brw_reg surf_index,
499 struct brw_reg offset);
500 void generate_set_simd4x2_header_gen9(vec4_instruction *inst,
501 struct brw_reg dst);
502 void generate_unpack_flags(struct brw_reg dst);
503
504 const struct brw_compiler *compiler;
505 void *log_data; /* Passed to compiler->*_log functions */
506
507 const struct brw_device_info *devinfo;
508
509 struct brw_codegen *p;
510
511 struct gl_shader_program *shader_prog;
512 const struct gl_program *prog;
513
514 struct brw_vue_prog_data *prog_data;
515
516 void *mem_ctx;
517 const char *stage_name;
518 const char *stage_abbrev;
519 const bool debug_flag;
520 };
521
522 } /* namespace brw */
523 #endif /* __cplusplus */
524
525 #endif /* BRW_VEC4_H */