i965: Define helper function to copy an arbitrary live component from some register.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_vec4.h
1 /*
2 * Copyright © 2011 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #ifndef BRW_VEC4_H
25 #define BRW_VEC4_H
26
27 #include <stdint.h>
28 #include "brw_shader.h"
29 #include "main/compiler.h"
30 #include "program/hash_table.h"
31 #include "brw_program.h"
32
33 #ifdef __cplusplus
34 #include "brw_ir_vec4.h"
35
36 extern "C" {
37 #endif
38
39 #include "brw_context.h"
40 #include "brw_eu.h"
41 #include "intel_asm_annotation.h"
42
43 #ifdef __cplusplus
44 }; /* extern "C" */
45 #endif
46
47 #include "glsl/ir.h"
48
49
50 struct brw_vec4_compile {
51 GLuint last_scratch; /**< measured in 32-byte (register size) units */
52 };
53
54 #ifdef __cplusplus
55 extern "C" {
56 #endif
57
58 void
59 brw_vue_setup_prog_key_for_precompile(struct gl_context *ctx,
60 struct brw_vue_prog_key *key,
61 GLuint id, struct gl_program *prog);
62
63 #ifdef __cplusplus
64 } /* extern "C" */
65
66 namespace brw {
67
68 class vec4_live_variables;
69
70 /**
71 * The vertex shader front-end.
72 *
73 * Translates either GLSL IR or Mesa IR (for ARB_vertex_program and
74 * fixed-function) into VS IR.
75 */
76 class vec4_visitor : public backend_visitor
77 {
78 public:
79 vec4_visitor(struct brw_context *brw,
80 struct brw_vec4_compile *c,
81 struct gl_program *prog,
82 const struct brw_vue_prog_key *key,
83 struct brw_vue_prog_data *prog_data,
84 struct gl_shader_program *shader_prog,
85 gl_shader_stage stage,
86 void *mem_ctx,
87 bool no_spills,
88 shader_time_shader_type st_base,
89 shader_time_shader_type st_written,
90 shader_time_shader_type st_reset);
91 ~vec4_visitor();
92
93 dst_reg dst_null_f()
94 {
95 return dst_reg(brw_null_reg());
96 }
97
98 dst_reg dst_null_d()
99 {
100 return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
101 }
102
103 dst_reg dst_null_ud()
104 {
105 return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
106 }
107
108 struct brw_vec4_compile * const c;
109 const struct brw_vue_prog_key * const key;
110 struct brw_vue_prog_data * const prog_data;
111 unsigned int sanity_param_count;
112
113 char *fail_msg;
114 bool failed;
115
116 /**
117 * GLSL IR currently being processed, which is associated with our
118 * driver IR instructions for debugging purposes.
119 */
120 const void *base_ir;
121 const char *current_annotation;
122
123 int first_non_payload_grf;
124 unsigned int max_grf;
125 int *virtual_grf_start;
126 int *virtual_grf_end;
127 brw::vec4_live_variables *live_intervals;
128 dst_reg userplane[MAX_CLIP_PLANES];
129
130 dst_reg *variable_storage(ir_variable *var);
131
132 void reladdr_to_temp(ir_instruction *ir, src_reg *reg, int *num_reladdr);
133
134 bool need_all_constants_in_pull_buffer;
135
136 /**
137 * \name Visit methods
138 *
139 * As typical for the visitor pattern, there must be one \c visit method for
140 * each concrete subclass of \c ir_instruction. Virtual base classes within
141 * the hierarchy should not have \c visit methods.
142 */
143 /*@{*/
144 virtual void visit(ir_variable *);
145 virtual void visit(ir_loop *);
146 virtual void visit(ir_loop_jump *);
147 virtual void visit(ir_function_signature *);
148 virtual void visit(ir_function *);
149 virtual void visit(ir_expression *);
150 virtual void visit(ir_swizzle *);
151 virtual void visit(ir_dereference_variable *);
152 virtual void visit(ir_dereference_array *);
153 virtual void visit(ir_dereference_record *);
154 virtual void visit(ir_assignment *);
155 virtual void visit(ir_constant *);
156 virtual void visit(ir_call *);
157 virtual void visit(ir_return *);
158 virtual void visit(ir_discard *);
159 virtual void visit(ir_texture *);
160 virtual void visit(ir_if *);
161 virtual void visit(ir_emit_vertex *);
162 virtual void visit(ir_end_primitive *);
163 /*@}*/
164
165 src_reg result;
166
167 /* Regs for vertex results. Generated at ir_variable visiting time
168 * for the ir->location's used.
169 */
170 dst_reg output_reg[BRW_VARYING_SLOT_COUNT];
171 const char *output_reg_annotation[BRW_VARYING_SLOT_COUNT];
172 int *uniform_size;
173 int *uniform_vector_size;
174 int uniform_array_size; /*< Size of uniform_[vector_]size arrays */
175 int uniforms;
176
177 src_reg shader_start_time;
178
179 struct hash_table *variable_ht;
180
181 bool run(void);
182 void fail(const char *msg, ...);
183
184 void setup_uniform_clipplane_values();
185 void setup_uniform_values(ir_variable *ir);
186 void setup_builtin_uniform_values(ir_variable *ir);
187 int setup_uniforms(int payload_reg);
188 bool reg_allocate_trivial();
189 bool reg_allocate();
190 void evaluate_spill_costs(float *spill_costs, bool *no_spill);
191 int choose_spill_reg(struct ra_graph *g);
192 void spill_reg(int spill_reg);
193 void move_grf_array_access_to_scratch();
194 void move_uniform_array_access_to_pull_constants();
195 void move_push_constants_to_pull_constants();
196 void split_uniform_registers();
197 void pack_uniform_registers();
198 void calculate_live_intervals();
199 void invalidate_live_intervals();
200 void split_virtual_grfs();
201 bool opt_vector_float();
202 bool opt_reduce_swizzle();
203 bool dead_code_eliminate();
204 int var_range_start(unsigned v, unsigned n) const;
205 int var_range_end(unsigned v, unsigned n) const;
206 bool virtual_grf_interferes(int a, int b);
207 bool opt_copy_propagation(bool do_constant_prop = true);
208 bool opt_cse_local(bblock_t *block);
209 bool opt_cse();
210 bool opt_algebraic();
211 bool opt_register_coalesce();
212 bool eliminate_find_live_channel();
213 bool is_dep_ctrl_unsafe(const vec4_instruction *inst);
214 void opt_set_dependency_control();
215 void opt_schedule_instructions();
216
217 vec4_instruction *emit(vec4_instruction *inst);
218
219 vec4_instruction *emit(enum opcode opcode);
220 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst);
221 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
222 const src_reg &src0);
223 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
224 const src_reg &src0, const src_reg &src1);
225 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
226 const src_reg &src0, const src_reg &src1,
227 const src_reg &src2);
228
229 vec4_instruction *emit_before(bblock_t *block,
230 vec4_instruction *inst,
231 vec4_instruction *new_inst);
232
233 #define EMIT1(op) vec4_instruction *op(const dst_reg &, const src_reg &);
234 #define EMIT2(op) vec4_instruction *op(const dst_reg &, const src_reg &, const src_reg &);
235 #define EMIT3(op) vec4_instruction *op(const dst_reg &, const src_reg &, const src_reg &, const src_reg &);
236 EMIT1(MOV)
237 EMIT1(NOT)
238 EMIT1(RNDD)
239 EMIT1(RNDE)
240 EMIT1(RNDZ)
241 EMIT1(FRC)
242 EMIT1(F32TO16)
243 EMIT1(F16TO32)
244 EMIT2(ADD)
245 EMIT2(MUL)
246 EMIT2(MACH)
247 EMIT2(MAC)
248 EMIT2(AND)
249 EMIT2(OR)
250 EMIT2(XOR)
251 EMIT2(DP3)
252 EMIT2(DP4)
253 EMIT2(DPH)
254 EMIT2(SHL)
255 EMIT2(SHR)
256 EMIT2(ASR)
257 vec4_instruction *CMP(dst_reg dst, src_reg src0, src_reg src1,
258 enum brw_conditional_mod condition);
259 vec4_instruction *IF(src_reg src0, src_reg src1,
260 enum brw_conditional_mod condition);
261 vec4_instruction *IF(enum brw_predicate predicate);
262 EMIT1(SCRATCH_READ)
263 EMIT2(SCRATCH_WRITE)
264 EMIT3(LRP)
265 EMIT1(BFREV)
266 EMIT3(BFE)
267 EMIT2(BFI1)
268 EMIT3(BFI2)
269 EMIT1(FBH)
270 EMIT1(FBL)
271 EMIT1(CBIT)
272 EMIT3(MAD)
273 EMIT2(ADDC)
274 EMIT2(SUBB)
275 #undef EMIT1
276 #undef EMIT2
277 #undef EMIT3
278
279 int implied_mrf_writes(vec4_instruction *inst);
280
281 bool try_rewrite_rhs_to_dst(ir_assignment *ir,
282 dst_reg dst,
283 src_reg src,
284 vec4_instruction *pre_rhs_inst,
285 vec4_instruction *last_rhs_inst);
286
287 /** Walks an exec_list of ir_instruction and sends it through this visitor. */
288 void visit_instructions(const exec_list *list);
289
290 void emit_vp_sop(enum brw_conditional_mod condmod, dst_reg dst,
291 src_reg src0, src_reg src1, src_reg one);
292
293 void emit_bool_to_cond_code(ir_rvalue *ir, enum brw_predicate *predicate);
294 void emit_if_gen6(ir_if *ir);
295
296 void emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst,
297 src_reg src0, src_reg src1);
298
299 void emit_lrp(const dst_reg &dst,
300 const src_reg &x, const src_reg &y, const src_reg &a);
301
302 /** Copy any live channel from \p src to the first channel of \p dst. */
303 void emit_uniformize(const dst_reg &dst, const src_reg &src);
304
305 void emit_block_move(dst_reg *dst, src_reg *src,
306 const struct glsl_type *type, brw_predicate predicate);
307
308 void emit_constant_values(dst_reg *dst, ir_constant *value);
309
310 /**
311 * Emit the correct dot-product instruction for the type of arguments
312 */
313 void emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements);
314
315 void emit_scalar(ir_instruction *ir, enum prog_opcode op,
316 dst_reg dst, src_reg src0);
317
318 void emit_scalar(ir_instruction *ir, enum prog_opcode op,
319 dst_reg dst, src_reg src0, src_reg src1);
320
321 src_reg fix_3src_operand(src_reg src);
322
323 void emit_math(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
324 const src_reg &src1 = src_reg());
325 src_reg fix_math_operand(src_reg src);
326
327 void emit_pack_half_2x16(dst_reg dst, src_reg src0);
328 void emit_unpack_half_2x16(dst_reg dst, src_reg src0);
329 void emit_unpack_unorm_4x8(const dst_reg &dst, src_reg src0);
330 void emit_unpack_snorm_4x8(const dst_reg &dst, src_reg src0);
331 void emit_pack_unorm_4x8(const dst_reg &dst, const src_reg &src0);
332 void emit_pack_snorm_4x8(const dst_reg &dst, const src_reg &src0);
333
334 uint32_t gather_channel(ir_texture *ir, uint32_t sampler);
335 src_reg emit_mcs_fetch(ir_texture *ir, src_reg coordinate, src_reg sampler);
336 void emit_gen6_gather_wa(uint8_t wa, dst_reg dst);
337 void swizzle_result(ir_texture *ir, src_reg orig_val, uint32_t sampler);
338
339 void emit_ndc_computation();
340 void emit_psiz_and_flags(dst_reg reg);
341 void emit_clip_distances(dst_reg reg, int offset);
342 vec4_instruction *emit_generic_urb_slot(dst_reg reg, int varying);
343 void emit_urb_slot(dst_reg reg, int varying);
344
345 void emit_shader_time_begin();
346 void emit_shader_time_end();
347 void emit_shader_time_write(enum shader_time_shader_type type,
348 src_reg value);
349
350 void emit_untyped_atomic(unsigned atomic_op, unsigned surf_index,
351 dst_reg dst, src_reg offset, src_reg src0,
352 src_reg src1);
353
354 void emit_untyped_surface_read(unsigned surf_index, dst_reg dst,
355 src_reg offset);
356
357 src_reg get_scratch_offset(bblock_t *block, vec4_instruction *inst,
358 src_reg *reladdr, int reg_offset);
359 src_reg get_pull_constant_offset(bblock_t *block, vec4_instruction *inst,
360 src_reg *reladdr, int reg_offset);
361 void emit_scratch_read(bblock_t *block, vec4_instruction *inst,
362 dst_reg dst,
363 src_reg orig_src,
364 int base_offset);
365 void emit_scratch_write(bblock_t *block, vec4_instruction *inst,
366 int base_offset);
367 void emit_pull_constant_load(bblock_t *block, vec4_instruction *inst,
368 dst_reg dst,
369 src_reg orig_src,
370 int base_offset);
371 void emit_pull_constant_load_reg(dst_reg dst,
372 src_reg surf_index,
373 src_reg offset,
374 bblock_t *before_block,
375 vec4_instruction *before_inst);
376 src_reg emit_resolve_reladdr(int scratch_loc[], bblock_t *block,
377 vec4_instruction *inst, src_reg src);
378
379 bool try_emit_mad(ir_expression *ir);
380 bool try_emit_b2f_of_compare(ir_expression *ir);
381 void resolve_ud_negate(src_reg *reg);
382 void resolve_bool_comparison(ir_rvalue *rvalue, src_reg *reg);
383
384 src_reg get_timestamp();
385
386 bool process_move_condition(ir_rvalue *ir);
387
388 void dump_instruction(backend_instruction *inst);
389 void dump_instruction(backend_instruction *inst, FILE *file);
390
391 void visit_atomic_counter_intrinsic(ir_call *ir);
392
393 protected:
394 void emit_vertex();
395 void lower_attributes_to_hw_regs(const int *attribute_map,
396 bool interleaved);
397 void setup_payload_interference(struct ra_graph *g, int first_payload_node,
398 int reg_node_count);
399 virtual dst_reg *make_reg_for_system_value(ir_variable *ir) = 0;
400 virtual void assign_binding_table_offsets();
401 virtual void setup_payload() = 0;
402 virtual void emit_prolog() = 0;
403 virtual void emit_program_code() = 0;
404 virtual void emit_thread_end() = 0;
405 virtual void emit_urb_write_header(int mrf) = 0;
406 virtual vec4_instruction *emit_urb_write_opcode(bool complete) = 0;
407 virtual int compute_array_stride(ir_dereference_array *ir);
408
409 private:
410 /**
411 * If true, then register allocation should fail instead of spilling.
412 */
413 const bool no_spills;
414
415 const shader_time_shader_type st_base;
416 const shader_time_shader_type st_written;
417 const shader_time_shader_type st_reset;
418 };
419
420
421 /**
422 * The vertex shader code generator.
423 *
424 * Translates VS IR to actual i965 assembly code.
425 */
426 class vec4_generator
427 {
428 public:
429 vec4_generator(struct brw_context *brw,
430 struct gl_shader_program *shader_prog,
431 struct gl_program *prog,
432 struct brw_vue_prog_data *prog_data,
433 void *mem_ctx,
434 bool debug_flag,
435 const char *stage_name,
436 const char *stage_abbrev);
437 ~vec4_generator();
438
439 const unsigned *generate_assembly(const cfg_t *cfg, unsigned *asm_size);
440
441 private:
442 void generate_code(const cfg_t *cfg);
443
444 void generate_math1_gen4(vec4_instruction *inst,
445 struct brw_reg dst,
446 struct brw_reg src);
447 void generate_math2_gen4(vec4_instruction *inst,
448 struct brw_reg dst,
449 struct brw_reg src0,
450 struct brw_reg src1);
451 void generate_math_gen6(vec4_instruction *inst,
452 struct brw_reg dst,
453 struct brw_reg src0,
454 struct brw_reg src1);
455
456 void generate_tex(vec4_instruction *inst,
457 struct brw_reg dst,
458 struct brw_reg src,
459 struct brw_reg sampler_index);
460
461 void generate_vs_urb_write(vec4_instruction *inst);
462 void generate_gs_urb_write(vec4_instruction *inst);
463 void generate_gs_urb_write_allocate(vec4_instruction *inst);
464 void generate_gs_thread_end(vec4_instruction *inst);
465 void generate_gs_set_write_offset(struct brw_reg dst,
466 struct brw_reg src0,
467 struct brw_reg src1);
468 void generate_gs_set_vertex_count(struct brw_reg dst,
469 struct brw_reg src);
470 void generate_gs_svb_write(vec4_instruction *inst,
471 struct brw_reg dst,
472 struct brw_reg src0,
473 struct brw_reg src1);
474 void generate_gs_svb_set_destination_index(vec4_instruction *inst,
475 struct brw_reg dst,
476 struct brw_reg src);
477 void generate_gs_set_dword_2(struct brw_reg dst, struct brw_reg src);
478 void generate_gs_prepare_channel_masks(struct brw_reg dst);
479 void generate_gs_set_channel_masks(struct brw_reg dst, struct brw_reg src);
480 void generate_gs_get_instance_id(struct brw_reg dst);
481 void generate_gs_ff_sync_set_primitives(struct brw_reg dst,
482 struct brw_reg src0,
483 struct brw_reg src1,
484 struct brw_reg src2);
485 void generate_gs_ff_sync(vec4_instruction *inst,
486 struct brw_reg dst,
487 struct brw_reg src0,
488 struct brw_reg src1);
489 void generate_gs_set_primitive_id(struct brw_reg dst);
490 void generate_oword_dual_block_offsets(struct brw_reg m1,
491 struct brw_reg index);
492 void generate_scratch_write(vec4_instruction *inst,
493 struct brw_reg dst,
494 struct brw_reg src,
495 struct brw_reg index);
496 void generate_scratch_read(vec4_instruction *inst,
497 struct brw_reg dst,
498 struct brw_reg index);
499 void generate_pull_constant_load(vec4_instruction *inst,
500 struct brw_reg dst,
501 struct brw_reg index,
502 struct brw_reg offset);
503 void generate_pull_constant_load_gen7(vec4_instruction *inst,
504 struct brw_reg dst,
505 struct brw_reg surf_index,
506 struct brw_reg offset);
507 void generate_set_simd4x2_header_gen9(vec4_instruction *inst,
508 struct brw_reg dst);
509 void generate_unpack_flags(struct brw_reg dst);
510
511 struct brw_context *brw;
512 const struct brw_device_info *devinfo;
513
514 struct brw_codegen *p;
515
516 struct gl_shader_program *shader_prog;
517 const struct gl_program *prog;
518
519 struct brw_vue_prog_data *prog_data;
520
521 void *mem_ctx;
522 const char *stage_name;
523 const char *stage_abbrev;
524 const bool debug_flag;
525 };
526
527 } /* namespace brw */
528 #endif /* __cplusplus */
529
530 #endif /* BRW_VEC4_H */