i965: Define the setup_vector_uniform_values() backend_visitor interface.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_vec4.h
1 /*
2 * Copyright © 2011 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #ifndef BRW_VEC4_H
25 #define BRW_VEC4_H
26
27 #include <stdint.h>
28 #include "brw_shader.h"
29 #include "main/compiler.h"
30 #include "program/hash_table.h"
31 #include "brw_program.h"
32
33 #ifdef __cplusplus
34 #include "brw_ir_vec4.h"
35
36 extern "C" {
37 #endif
38
39 #include "brw_context.h"
40 #include "brw_eu.h"
41 #include "intel_asm_annotation.h"
42
43 #ifdef __cplusplus
44 }; /* extern "C" */
45 #endif
46
47 #include "glsl/ir.h"
48
49
50 #ifdef __cplusplus
51 extern "C" {
52 #endif
53
54 void
55 brw_vue_setup_prog_key_for_precompile(struct gl_context *ctx,
56 struct brw_vue_prog_key *key,
57 GLuint id, struct gl_program *prog);
58
59 #ifdef __cplusplus
60 } /* extern "C" */
61
62 namespace brw {
63
64 class vec4_live_variables;
65
66 /**
67 * The vertex shader front-end.
68 *
69 * Translates either GLSL IR or Mesa IR (for ARB_vertex_program and
70 * fixed-function) into VS IR.
71 */
72 class vec4_visitor : public backend_shader, public ir_visitor
73 {
74 public:
75 vec4_visitor(const struct brw_compiler *compiler,
76 void *log_data,
77 struct gl_program *prog,
78 const struct brw_vue_prog_key *key,
79 struct brw_vue_prog_data *prog_data,
80 struct gl_shader_program *shader_prog,
81 gl_shader_stage stage,
82 void *mem_ctx,
83 bool no_spills,
84 int shader_time_index);
85 ~vec4_visitor();
86
87 dst_reg dst_null_f()
88 {
89 return dst_reg(brw_null_reg());
90 }
91
92 dst_reg dst_null_d()
93 {
94 return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
95 }
96
97 dst_reg dst_null_ud()
98 {
99 return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
100 }
101
102 const struct brw_vue_prog_key * const key;
103 struct brw_vue_prog_data * const prog_data;
104 unsigned int sanity_param_count;
105
106 char *fail_msg;
107 bool failed;
108
109 /**
110 * GLSL IR currently being processed, which is associated with our
111 * driver IR instructions for debugging purposes.
112 */
113 const void *base_ir;
114 const char *current_annotation;
115
116 int first_non_payload_grf;
117 unsigned int max_grf;
118 int *virtual_grf_start;
119 int *virtual_grf_end;
120 brw::vec4_live_variables *live_intervals;
121 dst_reg userplane[MAX_CLIP_PLANES];
122
123 dst_reg *variable_storage(ir_variable *var);
124
125 void reladdr_to_temp(ir_instruction *ir, src_reg *reg, int *num_reladdr);
126
127 bool need_all_constants_in_pull_buffer;
128
129 /**
130 * \name Visit methods
131 *
132 * As typical for the visitor pattern, there must be one \c visit method for
133 * each concrete subclass of \c ir_instruction. Virtual base classes within
134 * the hierarchy should not have \c visit methods.
135 */
136 /*@{*/
137 virtual void visit(ir_variable *);
138 virtual void visit(ir_loop *);
139 virtual void visit(ir_loop_jump *);
140 virtual void visit(ir_function_signature *);
141 virtual void visit(ir_function *);
142 virtual void visit(ir_expression *);
143 virtual void visit(ir_swizzle *);
144 virtual void visit(ir_dereference_variable *);
145 virtual void visit(ir_dereference_array *);
146 virtual void visit(ir_dereference_record *);
147 virtual void visit(ir_assignment *);
148 virtual void visit(ir_constant *);
149 virtual void visit(ir_call *);
150 virtual void visit(ir_return *);
151 virtual void visit(ir_discard *);
152 virtual void visit(ir_texture *);
153 virtual void visit(ir_if *);
154 virtual void visit(ir_emit_vertex *);
155 virtual void visit(ir_end_primitive *);
156 virtual void visit(ir_barrier *);
157 /*@}*/
158
159 src_reg result;
160
161 /* Regs for vertex results. Generated at ir_variable visiting time
162 * for the ir->location's used.
163 */
164 dst_reg output_reg[BRW_VARYING_SLOT_COUNT];
165 const char *output_reg_annotation[BRW_VARYING_SLOT_COUNT];
166 int *uniform_size;
167 int *uniform_vector_size;
168 int uniform_array_size; /*< Size of uniform_[vector_]size arrays */
169 int uniforms;
170
171 src_reg shader_start_time;
172
173 struct hash_table *variable_ht;
174
175 bool run(gl_clip_plane *clip_planes);
176 void fail(const char *msg, ...);
177
178 void setup_uniform_clipplane_values(gl_clip_plane *clip_planes);
179 virtual void setup_vector_uniform_values(const gl_constant_value *values,
180 unsigned n);
181 void setup_uniform_values(ir_variable *ir);
182 void setup_builtin_uniform_values(ir_variable *ir);
183 int setup_uniforms(int payload_reg);
184
185 bool reg_allocate_trivial();
186 bool reg_allocate();
187 void evaluate_spill_costs(float *spill_costs, bool *no_spill);
188 int choose_spill_reg(struct ra_graph *g);
189 void spill_reg(int spill_reg);
190 void move_grf_array_access_to_scratch();
191 void move_uniform_array_access_to_pull_constants();
192 void move_push_constants_to_pull_constants();
193 void split_uniform_registers();
194 void pack_uniform_registers();
195 void calculate_live_intervals();
196 void invalidate_live_intervals();
197 void split_virtual_grfs();
198 bool opt_vector_float();
199 bool opt_reduce_swizzle();
200 bool dead_code_eliminate();
201 int var_range_start(unsigned v, unsigned n) const;
202 int var_range_end(unsigned v, unsigned n) const;
203 bool virtual_grf_interferes(int a, int b);
204 bool opt_copy_propagation(bool do_constant_prop = true);
205 bool opt_cse_local(bblock_t *block);
206 bool opt_cse();
207 bool opt_algebraic();
208 bool opt_register_coalesce();
209 bool eliminate_find_live_channel();
210 bool is_dep_ctrl_unsafe(const vec4_instruction *inst);
211 void opt_set_dependency_control();
212 void opt_schedule_instructions();
213
214 vec4_instruction *emit(vec4_instruction *inst);
215
216 vec4_instruction *emit(enum opcode opcode);
217 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst);
218 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
219 const src_reg &src0);
220 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
221 const src_reg &src0, const src_reg &src1);
222 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
223 const src_reg &src0, const src_reg &src1,
224 const src_reg &src2);
225
226 vec4_instruction *emit_before(bblock_t *block,
227 vec4_instruction *inst,
228 vec4_instruction *new_inst);
229
230 #define EMIT1(op) vec4_instruction *op(const dst_reg &, const src_reg &);
231 #define EMIT2(op) vec4_instruction *op(const dst_reg &, const src_reg &, const src_reg &);
232 #define EMIT3(op) vec4_instruction *op(const dst_reg &, const src_reg &, const src_reg &, const src_reg &);
233 EMIT1(MOV)
234 EMIT1(NOT)
235 EMIT1(RNDD)
236 EMIT1(RNDE)
237 EMIT1(RNDZ)
238 EMIT1(FRC)
239 EMIT1(F32TO16)
240 EMIT1(F16TO32)
241 EMIT2(ADD)
242 EMIT2(MUL)
243 EMIT2(MACH)
244 EMIT2(MAC)
245 EMIT2(AND)
246 EMIT2(OR)
247 EMIT2(XOR)
248 EMIT2(DP3)
249 EMIT2(DP4)
250 EMIT2(DPH)
251 EMIT2(SHL)
252 EMIT2(SHR)
253 EMIT2(ASR)
254 vec4_instruction *CMP(dst_reg dst, src_reg src0, src_reg src1,
255 enum brw_conditional_mod condition);
256 vec4_instruction *IF(src_reg src0, src_reg src1,
257 enum brw_conditional_mod condition);
258 vec4_instruction *IF(enum brw_predicate predicate);
259 EMIT1(SCRATCH_READ)
260 EMIT2(SCRATCH_WRITE)
261 EMIT3(LRP)
262 EMIT1(BFREV)
263 EMIT3(BFE)
264 EMIT2(BFI1)
265 EMIT3(BFI2)
266 EMIT1(FBH)
267 EMIT1(FBL)
268 EMIT1(CBIT)
269 EMIT3(MAD)
270 EMIT2(ADDC)
271 EMIT2(SUBB)
272 #undef EMIT1
273 #undef EMIT2
274 #undef EMIT3
275
276 int implied_mrf_writes(vec4_instruction *inst);
277
278 bool try_rewrite_rhs_to_dst(ir_assignment *ir,
279 dst_reg dst,
280 src_reg src,
281 vec4_instruction *pre_rhs_inst,
282 vec4_instruction *last_rhs_inst);
283
284 /** Walks an exec_list of ir_instruction and sends it through this visitor. */
285 void visit_instructions(const exec_list *list);
286
287 void emit_vp_sop(enum brw_conditional_mod condmod, dst_reg dst,
288 src_reg src0, src_reg src1, src_reg one);
289
290 void emit_bool_to_cond_code(ir_rvalue *ir, enum brw_predicate *predicate);
291 void emit_if_gen6(ir_if *ir);
292
293 void emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst,
294 src_reg src0, src_reg src1);
295
296 void emit_lrp(const dst_reg &dst,
297 const src_reg &x, const src_reg &y, const src_reg &a);
298
299 /**
300 * Copy any live channel from \p src to the first channel of the
301 * result.
302 */
303 src_reg emit_uniformize(const src_reg &src);
304
305 void emit_block_move(dst_reg *dst, src_reg *src,
306 const struct glsl_type *type, brw_predicate predicate);
307
308 void emit_constant_values(dst_reg *dst, ir_constant *value);
309
310 /**
311 * Emit the correct dot-product instruction for the type of arguments
312 */
313 void emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements);
314
315 void emit_scalar(ir_instruction *ir, enum prog_opcode op,
316 dst_reg dst, src_reg src0);
317
318 void emit_scalar(ir_instruction *ir, enum prog_opcode op,
319 dst_reg dst, src_reg src0, src_reg src1);
320
321 src_reg fix_3src_operand(src_reg src);
322
323 void emit_math(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
324 const src_reg &src1 = src_reg());
325 src_reg fix_math_operand(src_reg src);
326
327 void emit_pack_half_2x16(dst_reg dst, src_reg src0);
328 void emit_unpack_half_2x16(dst_reg dst, src_reg src0);
329 void emit_unpack_unorm_4x8(const dst_reg &dst, src_reg src0);
330 void emit_unpack_snorm_4x8(const dst_reg &dst, src_reg src0);
331 void emit_pack_unorm_4x8(const dst_reg &dst, const src_reg &src0);
332 void emit_pack_snorm_4x8(const dst_reg &dst, const src_reg &src0);
333
334 uint32_t gather_channel(ir_texture *ir, uint32_t sampler);
335 src_reg emit_mcs_fetch(ir_texture *ir, src_reg coordinate, src_reg sampler);
336 void emit_gen6_gather_wa(uint8_t wa, dst_reg dst);
337 void swizzle_result(ir_texture *ir, src_reg orig_val, uint32_t sampler);
338
339 void emit_ndc_computation();
340 void emit_psiz_and_flags(dst_reg reg);
341 void emit_clip_distances(dst_reg reg, int offset);
342 vec4_instruction *emit_generic_urb_slot(dst_reg reg, int varying);
343 void emit_urb_slot(dst_reg reg, int varying);
344
345 void emit_shader_time_begin();
346 void emit_shader_time_end();
347 void emit_shader_time_write(int shader_time_subindex, src_reg value);
348
349 void emit_untyped_atomic(unsigned atomic_op, unsigned surf_index,
350 dst_reg dst, src_reg offset, src_reg src0,
351 src_reg src1);
352
353 void emit_untyped_surface_read(unsigned surf_index, dst_reg dst,
354 src_reg offset);
355
356 src_reg get_scratch_offset(bblock_t *block, vec4_instruction *inst,
357 src_reg *reladdr, int reg_offset);
358 src_reg get_pull_constant_offset(bblock_t *block, vec4_instruction *inst,
359 src_reg *reladdr, int reg_offset);
360 void emit_scratch_read(bblock_t *block, vec4_instruction *inst,
361 dst_reg dst,
362 src_reg orig_src,
363 int base_offset);
364 void emit_scratch_write(bblock_t *block, vec4_instruction *inst,
365 int base_offset);
366 void emit_pull_constant_load(bblock_t *block, vec4_instruction *inst,
367 dst_reg dst,
368 src_reg orig_src,
369 int base_offset);
370 void emit_pull_constant_load_reg(dst_reg dst,
371 src_reg surf_index,
372 src_reg offset,
373 bblock_t *before_block,
374 vec4_instruction *before_inst);
375 src_reg emit_resolve_reladdr(int scratch_loc[], bblock_t *block,
376 vec4_instruction *inst, src_reg src);
377
378 bool try_emit_mad(ir_expression *ir);
379 bool try_emit_b2f_of_compare(ir_expression *ir);
380 void resolve_ud_negate(src_reg *reg);
381 void resolve_bool_comparison(ir_rvalue *rvalue, src_reg *reg);
382
383 src_reg get_timestamp();
384
385 bool process_move_condition(ir_rvalue *ir);
386
387 void dump_instruction(backend_instruction *inst);
388 void dump_instruction(backend_instruction *inst, FILE *file);
389
390 void visit_atomic_counter_intrinsic(ir_call *ir);
391
392 protected:
393 void emit_vertex();
394 void lower_attributes_to_hw_regs(const int *attribute_map,
395 bool interleaved);
396 void setup_payload_interference(struct ra_graph *g, int first_payload_node,
397 int reg_node_count);
398 virtual dst_reg *make_reg_for_system_value(ir_variable *ir) = 0;
399 virtual void assign_binding_table_offsets();
400 virtual void setup_payload() = 0;
401 virtual void emit_prolog() = 0;
402 virtual void emit_program_code() = 0;
403 virtual void emit_thread_end() = 0;
404 virtual void emit_urb_write_header(int mrf) = 0;
405 virtual vec4_instruction *emit_urb_write_opcode(bool complete) = 0;
406 virtual int compute_array_stride(ir_dereference_array *ir);
407
408 private:
409 /**
410 * If true, then register allocation should fail instead of spilling.
411 */
412 const bool no_spills;
413
414 int shader_time_index;
415
416 unsigned last_scratch; /**< measured in 32-byte (register size) units */
417 };
418
419
420 /**
421 * The vertex shader code generator.
422 *
423 * Translates VS IR to actual i965 assembly code.
424 */
425 class vec4_generator
426 {
427 public:
428 vec4_generator(const struct brw_compiler *compiler, void *log_data,
429 struct gl_shader_program *shader_prog,
430 struct gl_program *prog,
431 struct brw_vue_prog_data *prog_data,
432 void *mem_ctx,
433 bool debug_flag,
434 const char *stage_name,
435 const char *stage_abbrev);
436 ~vec4_generator();
437
438 const unsigned *generate_assembly(const cfg_t *cfg, unsigned *asm_size);
439
440 private:
441 void generate_code(const cfg_t *cfg);
442
443 void generate_math1_gen4(vec4_instruction *inst,
444 struct brw_reg dst,
445 struct brw_reg src);
446 void generate_math2_gen4(vec4_instruction *inst,
447 struct brw_reg dst,
448 struct brw_reg src0,
449 struct brw_reg src1);
450 void generate_math_gen6(vec4_instruction *inst,
451 struct brw_reg dst,
452 struct brw_reg src0,
453 struct brw_reg src1);
454
455 void generate_tex(vec4_instruction *inst,
456 struct brw_reg dst,
457 struct brw_reg src,
458 struct brw_reg sampler_index);
459
460 void generate_vs_urb_write(vec4_instruction *inst);
461 void generate_gs_urb_write(vec4_instruction *inst);
462 void generate_gs_urb_write_allocate(vec4_instruction *inst);
463 void generate_gs_thread_end(vec4_instruction *inst);
464 void generate_gs_set_write_offset(struct brw_reg dst,
465 struct brw_reg src0,
466 struct brw_reg src1);
467 void generate_gs_set_vertex_count(struct brw_reg dst,
468 struct brw_reg src);
469 void generate_gs_svb_write(vec4_instruction *inst,
470 struct brw_reg dst,
471 struct brw_reg src0,
472 struct brw_reg src1);
473 void generate_gs_svb_set_destination_index(vec4_instruction *inst,
474 struct brw_reg dst,
475 struct brw_reg src);
476 void generate_gs_set_dword_2(struct brw_reg dst, struct brw_reg src);
477 void generate_gs_prepare_channel_masks(struct brw_reg dst);
478 void generate_gs_set_channel_masks(struct brw_reg dst, struct brw_reg src);
479 void generate_gs_get_instance_id(struct brw_reg dst);
480 void generate_gs_ff_sync_set_primitives(struct brw_reg dst,
481 struct brw_reg src0,
482 struct brw_reg src1,
483 struct brw_reg src2);
484 void generate_gs_ff_sync(vec4_instruction *inst,
485 struct brw_reg dst,
486 struct brw_reg src0,
487 struct brw_reg src1);
488 void generate_gs_set_primitive_id(struct brw_reg dst);
489 void generate_oword_dual_block_offsets(struct brw_reg m1,
490 struct brw_reg index);
491 void generate_scratch_write(vec4_instruction *inst,
492 struct brw_reg dst,
493 struct brw_reg src,
494 struct brw_reg index);
495 void generate_scratch_read(vec4_instruction *inst,
496 struct brw_reg dst,
497 struct brw_reg index);
498 void generate_pull_constant_load(vec4_instruction *inst,
499 struct brw_reg dst,
500 struct brw_reg index,
501 struct brw_reg offset);
502 void generate_pull_constant_load_gen7(vec4_instruction *inst,
503 struct brw_reg dst,
504 struct brw_reg surf_index,
505 struct brw_reg offset);
506 void generate_set_simd4x2_header_gen9(vec4_instruction *inst,
507 struct brw_reg dst);
508 void generate_unpack_flags(struct brw_reg dst);
509
510 const struct brw_compiler *compiler;
511 void *log_data; /* Passed to compiler->*_log functions */
512
513 const struct brw_device_info *devinfo;
514
515 struct brw_codegen *p;
516
517 struct gl_shader_program *shader_prog;
518 const struct gl_program *prog;
519
520 struct brw_vue_prog_data *prog_data;
521
522 void *mem_ctx;
523 const char *stage_name;
524 const char *stage_abbrev;
525 const bool debug_flag;
526 };
527
528 } /* namespace brw */
529 #endif /* __cplusplus */
530
531 #endif /* BRW_VEC4_H */