i965: Store a key_tex pointer in vec4_visitor.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_vec4.h
1 /*
2 * Copyright © 2011 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #ifndef BRW_VEC4_H
25 #define BRW_VEC4_H
26
27 #include <stdint.h>
28 #include "brw_shader.h"
29 #include "main/compiler.h"
30 #include "program/hash_table.h"
31 #include "brw_program.h"
32
33 #ifdef __cplusplus
34 #include "brw_ir_vec4.h"
35
36 extern "C" {
37 #endif
38
39 #include "brw_context.h"
40 #include "brw_eu.h"
41 #include "intel_asm_annotation.h"
42
43 #ifdef __cplusplus
44 }; /* extern "C" */
45 #endif
46
47 #include "glsl/ir.h"
48 #include "glsl/nir/nir.h"
49
50
51 #ifdef __cplusplus
52 extern "C" {
53 #endif
54
55 void
56 brw_vue_setup_prog_key_for_precompile(struct gl_context *ctx,
57 struct brw_vue_prog_key *key,
58 GLuint id, struct gl_program *prog);
59
60 #ifdef __cplusplus
61 } /* extern "C" */
62
63 namespace brw {
64
65 class vec4_live_variables;
66
67 /**
68 * The vertex shader front-end.
69 *
70 * Translates either GLSL IR or Mesa IR (for ARB_vertex_program and
71 * fixed-function) into VS IR.
72 */
73 class vec4_visitor : public backend_shader, public ir_visitor
74 {
75 public:
76 vec4_visitor(const struct brw_compiler *compiler,
77 void *log_data,
78 struct gl_program *prog,
79 const struct brw_vue_prog_key *key,
80 struct brw_vue_prog_data *prog_data,
81 struct gl_shader_program *shader_prog,
82 gl_shader_stage stage,
83 void *mem_ctx,
84 bool no_spills,
85 int shader_time_index);
86 ~vec4_visitor();
87
88 dst_reg dst_null_f()
89 {
90 return dst_reg(brw_null_reg());
91 }
92
93 dst_reg dst_null_d()
94 {
95 return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
96 }
97
98 dst_reg dst_null_ud()
99 {
100 return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
101 }
102
103 const struct brw_vue_prog_key * const key;
104 const struct brw_sampler_prog_key_data * const key_tex;
105 struct brw_vue_prog_data * const prog_data;
106 unsigned int sanity_param_count;
107
108 char *fail_msg;
109 bool failed;
110
111 /**
112 * GLSL IR currently being processed, which is associated with our
113 * driver IR instructions for debugging purposes.
114 */
115 const void *base_ir;
116 const char *current_annotation;
117
118 int first_non_payload_grf;
119 unsigned int max_grf;
120 int *virtual_grf_start;
121 int *virtual_grf_end;
122 brw::vec4_live_variables *live_intervals;
123 dst_reg userplane[MAX_CLIP_PLANES];
124
125 dst_reg *variable_storage(ir_variable *var);
126
127 void reladdr_to_temp(ir_instruction *ir, src_reg *reg, int *num_reladdr);
128
129 bool need_all_constants_in_pull_buffer;
130
131 /**
132 * \name Visit methods
133 *
134 * As typical for the visitor pattern, there must be one \c visit method for
135 * each concrete subclass of \c ir_instruction. Virtual base classes within
136 * the hierarchy should not have \c visit methods.
137 */
138 /*@{*/
139 virtual void visit(ir_variable *);
140 virtual void visit(ir_loop *);
141 virtual void visit(ir_loop_jump *);
142 virtual void visit(ir_function_signature *);
143 virtual void visit(ir_function *);
144 virtual void visit(ir_expression *);
145 virtual void visit(ir_swizzle *);
146 virtual void visit(ir_dereference_variable *);
147 virtual void visit(ir_dereference_array *);
148 virtual void visit(ir_dereference_record *);
149 virtual void visit(ir_assignment *);
150 virtual void visit(ir_constant *);
151 virtual void visit(ir_call *);
152 virtual void visit(ir_return *);
153 virtual void visit(ir_discard *);
154 virtual void visit(ir_texture *);
155 virtual void visit(ir_if *);
156 virtual void visit(ir_emit_vertex *);
157 virtual void visit(ir_end_primitive *);
158 virtual void visit(ir_barrier *);
159 /*@}*/
160
161 src_reg result;
162
163 /* Regs for vertex results. Generated at ir_variable visiting time
164 * for the ir->location's used.
165 */
166 dst_reg output_reg[BRW_VARYING_SLOT_COUNT];
167 const char *output_reg_annotation[BRW_VARYING_SLOT_COUNT];
168 int *uniform_size;
169 int *uniform_vector_size;
170 int uniform_array_size; /*< Size of uniform_[vector_]size arrays */
171 int uniforms;
172
173 src_reg shader_start_time;
174
175 struct hash_table *variable_ht;
176
177 bool run();
178 void fail(const char *msg, ...);
179
180 virtual void setup_vec4_uniform_value(unsigned param_offset,
181 const gl_constant_value *values,
182 unsigned n);
183 void setup_uniform_values(ir_variable *ir);
184 void setup_builtin_uniform_values(ir_variable *ir);
185 int setup_uniforms(int payload_reg);
186
187 bool reg_allocate_trivial();
188 bool reg_allocate();
189 void evaluate_spill_costs(float *spill_costs, bool *no_spill);
190 int choose_spill_reg(struct ra_graph *g);
191 void spill_reg(int spill_reg);
192 void move_grf_array_access_to_scratch();
193 void move_uniform_array_access_to_pull_constants();
194 void move_push_constants_to_pull_constants();
195 void split_uniform_registers();
196 void pack_uniform_registers();
197 void calculate_live_intervals();
198 void invalidate_live_intervals();
199 void split_virtual_grfs();
200 bool opt_vector_float();
201 bool opt_reduce_swizzle();
202 bool dead_code_eliminate();
203 int var_range_start(unsigned v, unsigned n) const;
204 int var_range_end(unsigned v, unsigned n) const;
205 bool virtual_grf_interferes(int a, int b);
206 bool opt_copy_propagation(bool do_constant_prop = true);
207 bool opt_cse_local(bblock_t *block);
208 bool opt_cse();
209 bool opt_algebraic();
210 bool opt_register_coalesce();
211 bool eliminate_find_live_channel();
212 bool is_dep_ctrl_unsafe(const vec4_instruction *inst);
213 void opt_set_dependency_control();
214 void opt_schedule_instructions();
215
216 vec4_instruction *emit(vec4_instruction *inst);
217
218 vec4_instruction *emit(enum opcode opcode);
219 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst);
220 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
221 const src_reg &src0);
222 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
223 const src_reg &src0, const src_reg &src1);
224 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
225 const src_reg &src0, const src_reg &src1,
226 const src_reg &src2);
227
228 vec4_instruction *emit_before(bblock_t *block,
229 vec4_instruction *inst,
230 vec4_instruction *new_inst);
231
232 #define EMIT1(op) vec4_instruction *op(const dst_reg &, const src_reg &);
233 #define EMIT2(op) vec4_instruction *op(const dst_reg &, const src_reg &, const src_reg &);
234 #define EMIT3(op) vec4_instruction *op(const dst_reg &, const src_reg &, const src_reg &, const src_reg &);
235 EMIT1(MOV)
236 EMIT1(NOT)
237 EMIT1(RNDD)
238 EMIT1(RNDE)
239 EMIT1(RNDZ)
240 EMIT1(FRC)
241 EMIT1(F32TO16)
242 EMIT1(F16TO32)
243 EMIT2(ADD)
244 EMIT2(MUL)
245 EMIT2(MACH)
246 EMIT2(MAC)
247 EMIT2(AND)
248 EMIT2(OR)
249 EMIT2(XOR)
250 EMIT2(DP3)
251 EMIT2(DP4)
252 EMIT2(DPH)
253 EMIT2(SHL)
254 EMIT2(SHR)
255 EMIT2(ASR)
256 vec4_instruction *CMP(dst_reg dst, src_reg src0, src_reg src1,
257 enum brw_conditional_mod condition);
258 vec4_instruction *IF(src_reg src0, src_reg src1,
259 enum brw_conditional_mod condition);
260 vec4_instruction *IF(enum brw_predicate predicate);
261 EMIT1(SCRATCH_READ)
262 EMIT2(SCRATCH_WRITE)
263 EMIT3(LRP)
264 EMIT1(BFREV)
265 EMIT3(BFE)
266 EMIT2(BFI1)
267 EMIT3(BFI2)
268 EMIT1(FBH)
269 EMIT1(FBL)
270 EMIT1(CBIT)
271 EMIT3(MAD)
272 EMIT2(ADDC)
273 EMIT2(SUBB)
274 #undef EMIT1
275 #undef EMIT2
276 #undef EMIT3
277
278 int implied_mrf_writes(vec4_instruction *inst);
279
280 bool try_rewrite_rhs_to_dst(ir_assignment *ir,
281 dst_reg dst,
282 src_reg src,
283 vec4_instruction *pre_rhs_inst,
284 vec4_instruction *last_rhs_inst);
285
286 /** Walks an exec_list of ir_instruction and sends it through this visitor. */
287 void visit_instructions(const exec_list *list);
288
289 void emit_vp_sop(enum brw_conditional_mod condmod, dst_reg dst,
290 src_reg src0, src_reg src1, src_reg one);
291
292 void emit_bool_to_cond_code(ir_rvalue *ir, enum brw_predicate *predicate);
293 void emit_if_gen6(ir_if *ir);
294
295 vec4_instruction *emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst,
296 src_reg src0, src_reg src1);
297
298 vec4_instruction *emit_lrp(const dst_reg &dst, const src_reg &x,
299 const src_reg &y, const src_reg &a);
300
301 /**
302 * Copy any live channel from \p src to the first channel of the
303 * result.
304 */
305 src_reg emit_uniformize(const src_reg &src);
306
307 void emit_block_move(dst_reg *dst, src_reg *src,
308 const struct glsl_type *type, brw_predicate predicate);
309
310 void emit_constant_values(dst_reg *dst, ir_constant *value);
311
312 /**
313 * Emit the correct dot-product instruction for the type of arguments
314 */
315 void emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements);
316
317 void emit_scalar(ir_instruction *ir, enum prog_opcode op,
318 dst_reg dst, src_reg src0);
319
320 void emit_scalar(ir_instruction *ir, enum prog_opcode op,
321 dst_reg dst, src_reg src0, src_reg src1);
322
323 src_reg fix_3src_operand(const src_reg &src);
324 src_reg resolve_source_modifiers(const src_reg &src);
325
326 vec4_instruction *emit_math(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
327 const src_reg &src1 = src_reg());
328
329 src_reg fix_math_operand(const src_reg &src);
330
331 void emit_pack_half_2x16(dst_reg dst, src_reg src0);
332 void emit_unpack_half_2x16(dst_reg dst, src_reg src0);
333 void emit_unpack_unorm_4x8(const dst_reg &dst, src_reg src0);
334 void emit_unpack_snorm_4x8(const dst_reg &dst, src_reg src0);
335 void emit_pack_unorm_4x8(const dst_reg &dst, const src_reg &src0);
336 void emit_pack_snorm_4x8(const dst_reg &dst, const src_reg &src0);
337
338 void emit_texture(ir_texture_opcode op,
339 dst_reg dest,
340 const glsl_type *dest_type,
341 src_reg coordinate,
342 int coord_components,
343 src_reg shadow_comparitor,
344 src_reg lod, src_reg lod2,
345 src_reg sample_index,
346 uint32_t constant_offset,
347 src_reg offset_value,
348 src_reg mcs,
349 bool is_cube_array,
350 uint32_t sampler, src_reg sampler_reg);
351
352 uint32_t gather_channel(unsigned gather_component, uint32_t sampler);
353 src_reg emit_mcs_fetch(const glsl_type *coordinate_type, src_reg coordinate,
354 src_reg sampler);
355 void emit_gen6_gather_wa(uint8_t wa, dst_reg dst);
356 void swizzle_result(ir_texture_opcode op, dst_reg dest,
357 src_reg orig_val, uint32_t sampler,
358 const glsl_type *dest_type);
359
360 void emit_ndc_computation();
361 void emit_psiz_and_flags(dst_reg reg);
362 vec4_instruction *emit_generic_urb_slot(dst_reg reg, int varying);
363 void emit_urb_slot(dst_reg reg, int varying);
364
365 void emit_shader_time_begin();
366 void emit_shader_time_end();
367 void emit_shader_time_write(int shader_time_subindex, src_reg value);
368
369 void emit_untyped_atomic(unsigned atomic_op, unsigned surf_index,
370 dst_reg dst, src_reg offset, src_reg src0,
371 src_reg src1);
372
373 void emit_untyped_surface_read(unsigned surf_index, dst_reg dst,
374 src_reg offset);
375
376 src_reg get_scratch_offset(bblock_t *block, vec4_instruction *inst,
377 src_reg *reladdr, int reg_offset);
378 src_reg get_pull_constant_offset(bblock_t *block, vec4_instruction *inst,
379 src_reg *reladdr, int reg_offset);
380 void emit_scratch_read(bblock_t *block, vec4_instruction *inst,
381 dst_reg dst,
382 src_reg orig_src,
383 int base_offset);
384 void emit_scratch_write(bblock_t *block, vec4_instruction *inst,
385 int base_offset);
386 void emit_pull_constant_load(bblock_t *block, vec4_instruction *inst,
387 dst_reg dst,
388 src_reg orig_src,
389 int base_offset);
390 void emit_pull_constant_load_reg(dst_reg dst,
391 src_reg surf_index,
392 src_reg offset,
393 bblock_t *before_block,
394 vec4_instruction *before_inst);
395 src_reg emit_resolve_reladdr(int scratch_loc[], bblock_t *block,
396 vec4_instruction *inst, src_reg src);
397
398 bool try_emit_mad(ir_expression *ir);
399 bool try_emit_b2f_of_compare(ir_expression *ir);
400 void resolve_ud_negate(src_reg *reg);
401 void resolve_bool_comparison(ir_rvalue *rvalue, src_reg *reg);
402
403 src_reg get_timestamp();
404
405 bool process_move_condition(ir_rvalue *ir);
406
407 void dump_instruction(backend_instruction *inst);
408 void dump_instruction(backend_instruction *inst, FILE *file);
409
410 void visit_atomic_counter_intrinsic(ir_call *ir);
411
412 bool is_high_sampler(src_reg sampler);
413
414 virtual void emit_nir_code();
415 virtual void nir_setup_inputs(nir_shader *shader);
416 virtual void nir_setup_uniforms(nir_shader *shader);
417 virtual void nir_setup_uniform(nir_variable *var);
418 virtual void nir_setup_builtin_uniform(nir_variable *var);
419 virtual void nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr);
420 virtual void nir_setup_system_values(nir_shader *shader);
421 virtual void nir_emit_impl(nir_function_impl *impl);
422 virtual void nir_emit_cf_list(exec_list *list);
423 virtual void nir_emit_if(nir_if *if_stmt);
424 virtual void nir_emit_loop(nir_loop *loop);
425 virtual void nir_emit_block(nir_block *block);
426 virtual void nir_emit_instr(nir_instr *instr);
427 virtual void nir_emit_load_const(nir_load_const_instr *instr);
428 virtual void nir_emit_intrinsic(nir_intrinsic_instr *instr);
429 virtual void nir_emit_alu(nir_alu_instr *instr);
430 virtual void nir_emit_jump(nir_jump_instr *instr);
431 virtual void nir_emit_texture(nir_tex_instr *instr);
432
433 dst_reg get_nir_dest(nir_dest dest, enum brw_reg_type type);
434 dst_reg get_nir_dest(nir_dest dest, nir_alu_type type);
435 dst_reg get_nir_dest(nir_dest dest);
436 src_reg get_nir_src(nir_src src, enum brw_reg_type type,
437 unsigned num_components = 4);
438 src_reg get_nir_src(nir_src src, nir_alu_type type,
439 unsigned num_components = 4);
440 src_reg get_nir_src(nir_src src,
441 unsigned num_components = 4);
442
443 virtual dst_reg *make_reg_for_system_value(int location,
444 const glsl_type *type) = 0;
445
446 dst_reg *nir_locals;
447 dst_reg *nir_ssa_values;
448 src_reg *nir_inputs;
449 dst_reg *nir_system_values;
450
451 protected:
452 void emit_vertex();
453 void lower_attributes_to_hw_regs(const int *attribute_map,
454 bool interleaved);
455 void setup_payload_interference(struct ra_graph *g, int first_payload_node,
456 int reg_node_count);
457 virtual void assign_binding_table_offsets();
458 virtual void setup_payload() = 0;
459 virtual void emit_prolog() = 0;
460 virtual void emit_program_code() = 0;
461 virtual void emit_thread_end() = 0;
462 virtual void emit_urb_write_header(int mrf) = 0;
463 virtual vec4_instruction *emit_urb_write_opcode(bool complete) = 0;
464 virtual int compute_array_stride(ir_dereference_array *ir);
465 virtual void gs_emit_vertex(int stream_id);
466 virtual void gs_end_primitive();
467
468 private:
469 /**
470 * If true, then register allocation should fail instead of spilling.
471 */
472 const bool no_spills;
473
474 int shader_time_index;
475
476 unsigned last_scratch; /**< measured in 32-byte (register size) units */
477 };
478
479
480 /**
481 * The vertex shader code generator.
482 *
483 * Translates VS IR to actual i965 assembly code.
484 */
485 class vec4_generator
486 {
487 public:
488 vec4_generator(const struct brw_compiler *compiler, void *log_data,
489 struct gl_shader_program *shader_prog,
490 struct gl_program *prog,
491 struct brw_vue_prog_data *prog_data,
492 void *mem_ctx,
493 bool debug_flag,
494 const char *stage_name,
495 const char *stage_abbrev);
496 ~vec4_generator();
497
498 const unsigned *generate_assembly(const cfg_t *cfg, unsigned *asm_size);
499
500 private:
501 void generate_code(const cfg_t *cfg);
502
503 void generate_math1_gen4(vec4_instruction *inst,
504 struct brw_reg dst,
505 struct brw_reg src);
506 void generate_math2_gen4(vec4_instruction *inst,
507 struct brw_reg dst,
508 struct brw_reg src0,
509 struct brw_reg src1);
510 void generate_math_gen6(vec4_instruction *inst,
511 struct brw_reg dst,
512 struct brw_reg src0,
513 struct brw_reg src1);
514
515 void generate_tex(vec4_instruction *inst,
516 struct brw_reg dst,
517 struct brw_reg src,
518 struct brw_reg sampler_index);
519
520 void generate_vs_urb_write(vec4_instruction *inst);
521 void generate_gs_urb_write(vec4_instruction *inst);
522 void generate_gs_urb_write_allocate(vec4_instruction *inst);
523 void generate_gs_thread_end(vec4_instruction *inst);
524 void generate_gs_set_write_offset(struct brw_reg dst,
525 struct brw_reg src0,
526 struct brw_reg src1);
527 void generate_gs_set_vertex_count(struct brw_reg dst,
528 struct brw_reg src);
529 void generate_gs_svb_write(vec4_instruction *inst,
530 struct brw_reg dst,
531 struct brw_reg src0,
532 struct brw_reg src1);
533 void generate_gs_svb_set_destination_index(vec4_instruction *inst,
534 struct brw_reg dst,
535 struct brw_reg src);
536 void generate_gs_set_dword_2(struct brw_reg dst, struct brw_reg src);
537 void generate_gs_prepare_channel_masks(struct brw_reg dst);
538 void generate_gs_set_channel_masks(struct brw_reg dst, struct brw_reg src);
539 void generate_gs_get_instance_id(struct brw_reg dst);
540 void generate_gs_ff_sync_set_primitives(struct brw_reg dst,
541 struct brw_reg src0,
542 struct brw_reg src1,
543 struct brw_reg src2);
544 void generate_gs_ff_sync(vec4_instruction *inst,
545 struct brw_reg dst,
546 struct brw_reg src0,
547 struct brw_reg src1);
548 void generate_gs_set_primitive_id(struct brw_reg dst);
549 void generate_oword_dual_block_offsets(struct brw_reg m1,
550 struct brw_reg index);
551 void generate_scratch_write(vec4_instruction *inst,
552 struct brw_reg dst,
553 struct brw_reg src,
554 struct brw_reg index);
555 void generate_scratch_read(vec4_instruction *inst,
556 struct brw_reg dst,
557 struct brw_reg index);
558 void generate_pull_constant_load(vec4_instruction *inst,
559 struct brw_reg dst,
560 struct brw_reg index,
561 struct brw_reg offset);
562 void generate_pull_constant_load_gen7(vec4_instruction *inst,
563 struct brw_reg dst,
564 struct brw_reg surf_index,
565 struct brw_reg offset);
566 void generate_set_simd4x2_header_gen9(vec4_instruction *inst,
567 struct brw_reg dst);
568 void generate_unpack_flags(struct brw_reg dst);
569
570 const struct brw_compiler *compiler;
571 void *log_data; /* Passed to compiler->*_log functions */
572
573 const struct brw_device_info *devinfo;
574
575 struct brw_codegen *p;
576
577 struct gl_shader_program *shader_prog;
578 const struct gl_program *prog;
579
580 struct brw_vue_prog_data *prog_data;
581
582 void *mem_ctx;
583 const char *stage_name;
584 const char *stage_abbrev;
585 const bool debug_flag;
586 };
587
588 } /* namespace brw */
589 #endif /* __cplusplus */
590
591 #endif /* BRW_VEC4_H */