i965/vec4: Implement VS_OPCODE_GET_BUFFER_SIZE
[mesa.git] / src / mesa / drivers / dri / i965 / brw_vec4.h
1 /*
2 * Copyright © 2011 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #ifndef BRW_VEC4_H
25 #define BRW_VEC4_H
26
27 #include <stdint.h>
28 #include "brw_shader.h"
29 #include "main/compiler.h"
30 #include "program/hash_table.h"
31 #include "brw_program.h"
32
33 #ifdef __cplusplus
34 #include "brw_ir_vec4.h"
35
36 extern "C" {
37 #endif
38
39 #include "brw_context.h"
40 #include "brw_eu.h"
41 #include "intel_asm_annotation.h"
42
43 #ifdef __cplusplus
44 }; /* extern "C" */
45 #endif
46
47 #include "glsl/ir.h"
48 #include "glsl/nir/nir.h"
49
50
51 #ifdef __cplusplus
52 extern "C" {
53 #endif
54
55 #ifdef __cplusplus
56 } /* extern "C" */
57
58 namespace brw {
59
60 class vec4_live_variables;
61
62 /**
63 * The vertex shader front-end.
64 *
65 * Translates either GLSL IR or Mesa IR (for ARB_vertex_program and
66 * fixed-function) into VS IR.
67 */
68 class vec4_visitor : public backend_shader, public ir_visitor
69 {
70 public:
71 vec4_visitor(const struct brw_compiler *compiler,
72 void *log_data,
73 struct gl_program *prog,
74 const struct brw_sampler_prog_key_data *key,
75 struct brw_vue_prog_data *prog_data,
76 struct gl_shader_program *shader_prog,
77 gl_shader_stage stage,
78 void *mem_ctx,
79 bool no_spills,
80 int shader_time_index);
81 ~vec4_visitor();
82
83 dst_reg dst_null_f()
84 {
85 return dst_reg(brw_null_reg());
86 }
87
88 dst_reg dst_null_d()
89 {
90 return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
91 }
92
93 dst_reg dst_null_ud()
94 {
95 return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
96 }
97
98 const struct brw_sampler_prog_key_data * const key_tex;
99 struct brw_vue_prog_data * const prog_data;
100 unsigned int sanity_param_count;
101
102 char *fail_msg;
103 bool failed;
104
105 /**
106 * GLSL IR currently being processed, which is associated with our
107 * driver IR instructions for debugging purposes.
108 */
109 const void *base_ir;
110 const char *current_annotation;
111
112 int first_non_payload_grf;
113 unsigned int max_grf;
114 int *virtual_grf_start;
115 int *virtual_grf_end;
116 brw::vec4_live_variables *live_intervals;
117 dst_reg userplane[MAX_CLIP_PLANES];
118
119 dst_reg *variable_storage(ir_variable *var);
120
121 void reladdr_to_temp(ir_instruction *ir, src_reg *reg, int *num_reladdr);
122
123 bool need_all_constants_in_pull_buffer;
124
125 /**
126 * \name Visit methods
127 *
128 * As typical for the visitor pattern, there must be one \c visit method for
129 * each concrete subclass of \c ir_instruction. Virtual base classes within
130 * the hierarchy should not have \c visit methods.
131 */
132 /*@{*/
133 virtual void visit(ir_variable *);
134 virtual void visit(ir_loop *);
135 virtual void visit(ir_loop_jump *);
136 virtual void visit(ir_function_signature *);
137 virtual void visit(ir_function *);
138 virtual void visit(ir_expression *);
139 virtual void visit(ir_swizzle *);
140 virtual void visit(ir_dereference_variable *);
141 virtual void visit(ir_dereference_array *);
142 virtual void visit(ir_dereference_record *);
143 virtual void visit(ir_assignment *);
144 virtual void visit(ir_constant *);
145 virtual void visit(ir_call *);
146 virtual void visit(ir_return *);
147 virtual void visit(ir_discard *);
148 virtual void visit(ir_texture *);
149 virtual void visit(ir_if *);
150 virtual void visit(ir_emit_vertex *);
151 virtual void visit(ir_end_primitive *);
152 virtual void visit(ir_barrier *);
153 /*@}*/
154
155 src_reg result;
156
157 /* Regs for vertex results. Generated at ir_variable visiting time
158 * for the ir->location's used.
159 */
160 dst_reg output_reg[BRW_VARYING_SLOT_COUNT];
161 const char *output_reg_annotation[BRW_VARYING_SLOT_COUNT];
162 int *uniform_size;
163 int *uniform_vector_size;
164 int uniform_array_size; /*< Size of uniform_[vector_]size arrays */
165 int uniforms;
166
167 src_reg shader_start_time;
168
169 struct hash_table *variable_ht;
170
171 bool run();
172 void fail(const char *msg, ...);
173
174 virtual void setup_vec4_uniform_value(unsigned param_offset,
175 const gl_constant_value *values,
176 unsigned n);
177 void setup_uniform_values(ir_variable *ir);
178 void setup_builtin_uniform_values(ir_variable *ir);
179 int setup_uniforms(int payload_reg);
180
181 bool reg_allocate_trivial();
182 bool reg_allocate();
183 void evaluate_spill_costs(float *spill_costs, bool *no_spill);
184 int choose_spill_reg(struct ra_graph *g);
185 void spill_reg(int spill_reg);
186 void move_grf_array_access_to_scratch();
187 void move_uniform_array_access_to_pull_constants();
188 void move_push_constants_to_pull_constants();
189 void split_uniform_registers();
190 void pack_uniform_registers();
191 void calculate_live_intervals();
192 void invalidate_live_intervals();
193 void split_virtual_grfs();
194 bool opt_vector_float();
195 bool opt_reduce_swizzle();
196 bool dead_code_eliminate();
197 int var_range_start(unsigned v, unsigned n) const;
198 int var_range_end(unsigned v, unsigned n) const;
199 bool virtual_grf_interferes(int a, int b);
200 bool opt_copy_propagation(bool do_constant_prop = true);
201 bool opt_cse_local(bblock_t *block);
202 bool opt_cse();
203 bool opt_algebraic();
204 bool opt_register_coalesce();
205 bool eliminate_find_live_channel();
206 bool is_dep_ctrl_unsafe(const vec4_instruction *inst);
207 void opt_set_dependency_control();
208 void opt_schedule_instructions();
209
210 vec4_instruction *emit(vec4_instruction *inst);
211
212 vec4_instruction *emit(enum opcode opcode);
213 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst);
214 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
215 const src_reg &src0);
216 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
217 const src_reg &src0, const src_reg &src1);
218 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
219 const src_reg &src0, const src_reg &src1,
220 const src_reg &src2);
221
222 vec4_instruction *emit_before(bblock_t *block,
223 vec4_instruction *inst,
224 vec4_instruction *new_inst);
225
226 #define EMIT1(op) vec4_instruction *op(const dst_reg &, const src_reg &);
227 #define EMIT2(op) vec4_instruction *op(const dst_reg &, const src_reg &, const src_reg &);
228 #define EMIT3(op) vec4_instruction *op(const dst_reg &, const src_reg &, const src_reg &, const src_reg &);
229 EMIT1(MOV)
230 EMIT1(NOT)
231 EMIT1(RNDD)
232 EMIT1(RNDE)
233 EMIT1(RNDZ)
234 EMIT1(FRC)
235 EMIT1(F32TO16)
236 EMIT1(F16TO32)
237 EMIT2(ADD)
238 EMIT2(MUL)
239 EMIT2(MACH)
240 EMIT2(MAC)
241 EMIT2(AND)
242 EMIT2(OR)
243 EMIT2(XOR)
244 EMIT2(DP3)
245 EMIT2(DP4)
246 EMIT2(DPH)
247 EMIT2(SHL)
248 EMIT2(SHR)
249 EMIT2(ASR)
250 vec4_instruction *CMP(dst_reg dst, src_reg src0, src_reg src1,
251 enum brw_conditional_mod condition);
252 vec4_instruction *IF(src_reg src0, src_reg src1,
253 enum brw_conditional_mod condition);
254 vec4_instruction *IF(enum brw_predicate predicate);
255 EMIT1(SCRATCH_READ)
256 EMIT2(SCRATCH_WRITE)
257 EMIT3(LRP)
258 EMIT1(BFREV)
259 EMIT3(BFE)
260 EMIT2(BFI1)
261 EMIT3(BFI2)
262 EMIT1(FBH)
263 EMIT1(FBL)
264 EMIT1(CBIT)
265 EMIT3(MAD)
266 EMIT2(ADDC)
267 EMIT2(SUBB)
268 #undef EMIT1
269 #undef EMIT2
270 #undef EMIT3
271
272 int implied_mrf_writes(vec4_instruction *inst);
273
274 bool try_rewrite_rhs_to_dst(ir_assignment *ir,
275 dst_reg dst,
276 src_reg src,
277 vec4_instruction *pre_rhs_inst,
278 vec4_instruction *last_rhs_inst);
279
280 /** Walks an exec_list of ir_instruction and sends it through this visitor. */
281 void visit_instructions(const exec_list *list);
282
283 void emit_vp_sop(enum brw_conditional_mod condmod, dst_reg dst,
284 src_reg src0, src_reg src1, src_reg one);
285
286 void emit_bool_to_cond_code(ir_rvalue *ir, enum brw_predicate *predicate);
287 void emit_if_gen6(ir_if *ir);
288
289 vec4_instruction *emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst,
290 src_reg src0, src_reg src1);
291
292 vec4_instruction *emit_lrp(const dst_reg &dst, const src_reg &x,
293 const src_reg &y, const src_reg &a);
294
295 /**
296 * Copy any live channel from \p src to the first channel of the
297 * result.
298 */
299 src_reg emit_uniformize(const src_reg &src);
300
301 void emit_block_move(dst_reg *dst, src_reg *src,
302 const struct glsl_type *type, brw_predicate predicate);
303
304 void emit_constant_values(dst_reg *dst, ir_constant *value);
305
306 /**
307 * Emit the correct dot-product instruction for the type of arguments
308 */
309 void emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements);
310
311 void emit_scalar(ir_instruction *ir, enum prog_opcode op,
312 dst_reg dst, src_reg src0);
313
314 void emit_scalar(ir_instruction *ir, enum prog_opcode op,
315 dst_reg dst, src_reg src0, src_reg src1);
316
317 src_reg fix_3src_operand(const src_reg &src);
318 src_reg resolve_source_modifiers(const src_reg &src);
319
320 vec4_instruction *emit_math(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
321 const src_reg &src1 = src_reg());
322
323 src_reg fix_math_operand(const src_reg &src);
324
325 void emit_pack_half_2x16(dst_reg dst, src_reg src0);
326 void emit_unpack_half_2x16(dst_reg dst, src_reg src0);
327 void emit_unpack_unorm_4x8(const dst_reg &dst, src_reg src0);
328 void emit_unpack_snorm_4x8(const dst_reg &dst, src_reg src0);
329 void emit_pack_unorm_4x8(const dst_reg &dst, const src_reg &src0);
330 void emit_pack_snorm_4x8(const dst_reg &dst, const src_reg &src0);
331
332 void emit_texture(ir_texture_opcode op,
333 dst_reg dest,
334 const glsl_type *dest_type,
335 src_reg coordinate,
336 int coord_components,
337 src_reg shadow_comparitor,
338 src_reg lod, src_reg lod2,
339 src_reg sample_index,
340 uint32_t constant_offset,
341 src_reg offset_value,
342 src_reg mcs,
343 bool is_cube_array,
344 uint32_t sampler, src_reg sampler_reg);
345
346 uint32_t gather_channel(unsigned gather_component, uint32_t sampler);
347 src_reg emit_mcs_fetch(const glsl_type *coordinate_type, src_reg coordinate,
348 src_reg sampler);
349 void emit_gen6_gather_wa(uint8_t wa, dst_reg dst);
350 void swizzle_result(ir_texture_opcode op, dst_reg dest,
351 src_reg orig_val, uint32_t sampler,
352 const glsl_type *dest_type);
353
354 void emit_ndc_computation();
355 void emit_psiz_and_flags(dst_reg reg);
356 vec4_instruction *emit_generic_urb_slot(dst_reg reg, int varying);
357 virtual void emit_urb_slot(dst_reg reg, int varying);
358
359 void emit_shader_time_begin();
360 void emit_shader_time_end();
361 void emit_shader_time_write(int shader_time_subindex, src_reg value);
362
363 void emit_untyped_atomic(unsigned atomic_op, unsigned surf_index,
364 dst_reg dst, src_reg offset, src_reg src0,
365 src_reg src1);
366
367 void emit_untyped_surface_read(unsigned surf_index, dst_reg dst,
368 src_reg offset);
369
370 src_reg get_scratch_offset(bblock_t *block, vec4_instruction *inst,
371 src_reg *reladdr, int reg_offset);
372 src_reg get_pull_constant_offset(bblock_t *block, vec4_instruction *inst,
373 src_reg *reladdr, int reg_offset);
374 void emit_scratch_read(bblock_t *block, vec4_instruction *inst,
375 dst_reg dst,
376 src_reg orig_src,
377 int base_offset);
378 void emit_scratch_write(bblock_t *block, vec4_instruction *inst,
379 int base_offset);
380 void emit_pull_constant_load(bblock_t *block, vec4_instruction *inst,
381 dst_reg dst,
382 src_reg orig_src,
383 int base_offset);
384 void emit_pull_constant_load_reg(dst_reg dst,
385 src_reg surf_index,
386 src_reg offset,
387 bblock_t *before_block,
388 vec4_instruction *before_inst);
389 src_reg emit_resolve_reladdr(int scratch_loc[], bblock_t *block,
390 vec4_instruction *inst, src_reg src);
391
392 bool try_emit_mad(ir_expression *ir);
393 bool try_emit_b2f_of_compare(ir_expression *ir);
394 void resolve_ud_negate(src_reg *reg);
395 void resolve_bool_comparison(ir_rvalue *rvalue, src_reg *reg);
396
397 src_reg get_timestamp();
398
399 bool process_move_condition(ir_rvalue *ir);
400
401 void dump_instruction(backend_instruction *inst);
402 void dump_instruction(backend_instruction *inst, FILE *file);
403
404 void visit_atomic_counter_intrinsic(ir_call *ir);
405
406 bool is_high_sampler(src_reg sampler);
407
408 virtual void emit_nir_code();
409 virtual void nir_setup_inputs(nir_shader *shader);
410 virtual void nir_setup_uniforms(nir_shader *shader);
411 virtual void nir_setup_uniform(nir_variable *var);
412 virtual void nir_setup_builtin_uniform(nir_variable *var);
413 virtual void nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr);
414 virtual void nir_setup_system_values(nir_shader *shader);
415 virtual void nir_emit_impl(nir_function_impl *impl);
416 virtual void nir_emit_cf_list(exec_list *list);
417 virtual void nir_emit_if(nir_if *if_stmt);
418 virtual void nir_emit_loop(nir_loop *loop);
419 virtual void nir_emit_block(nir_block *block);
420 virtual void nir_emit_instr(nir_instr *instr);
421 virtual void nir_emit_load_const(nir_load_const_instr *instr);
422 virtual void nir_emit_intrinsic(nir_intrinsic_instr *instr);
423 virtual void nir_emit_alu(nir_alu_instr *instr);
424 virtual void nir_emit_jump(nir_jump_instr *instr);
425 virtual void nir_emit_texture(nir_tex_instr *instr);
426 virtual void nir_emit_undef(nir_ssa_undef_instr *instr);
427
428 dst_reg get_nir_dest(nir_dest dest, enum brw_reg_type type);
429 dst_reg get_nir_dest(nir_dest dest, nir_alu_type type);
430 dst_reg get_nir_dest(nir_dest dest);
431 src_reg get_nir_src(nir_src src, enum brw_reg_type type,
432 unsigned num_components = 4);
433 src_reg get_nir_src(nir_src src, nir_alu_type type,
434 unsigned num_components = 4);
435 src_reg get_nir_src(nir_src src,
436 unsigned num_components = 4);
437
438 virtual dst_reg *make_reg_for_system_value(int location,
439 const glsl_type *type) = 0;
440
441 dst_reg *nir_locals;
442 dst_reg *nir_ssa_values;
443 src_reg *nir_inputs;
444 dst_reg *nir_system_values;
445
446 protected:
447 void emit_vertex();
448 void lower_attributes_to_hw_regs(const int *attribute_map,
449 bool interleaved);
450 void setup_payload_interference(struct ra_graph *g, int first_payload_node,
451 int reg_node_count);
452 virtual void assign_binding_table_offsets();
453 virtual void setup_payload() = 0;
454 virtual void emit_prolog() = 0;
455 virtual void emit_program_code() = 0;
456 virtual void emit_thread_end() = 0;
457 virtual void emit_urb_write_header(int mrf) = 0;
458 virtual vec4_instruction *emit_urb_write_opcode(bool complete) = 0;
459 virtual int compute_array_stride(ir_dereference_array *ir);
460 virtual void gs_emit_vertex(int stream_id);
461 virtual void gs_end_primitive();
462
463 private:
464 /**
465 * If true, then register allocation should fail instead of spilling.
466 */
467 const bool no_spills;
468
469 int shader_time_index;
470
471 unsigned last_scratch; /**< measured in 32-byte (register size) units */
472 };
473
474
475 /**
476 * The vertex shader code generator.
477 *
478 * Translates VS IR to actual i965 assembly code.
479 */
480 class vec4_generator
481 {
482 public:
483 vec4_generator(const struct brw_compiler *compiler, void *log_data,
484 struct gl_shader_program *shader_prog,
485 struct gl_program *prog,
486 struct brw_vue_prog_data *prog_data,
487 void *mem_ctx,
488 bool debug_flag,
489 const char *stage_name,
490 const char *stage_abbrev);
491 ~vec4_generator();
492
493 const unsigned *generate_assembly(const cfg_t *cfg, unsigned *asm_size);
494
495 private:
496 void generate_code(const cfg_t *cfg);
497
498 void generate_math1_gen4(vec4_instruction *inst,
499 struct brw_reg dst,
500 struct brw_reg src);
501 void generate_math2_gen4(vec4_instruction *inst,
502 struct brw_reg dst,
503 struct brw_reg src0,
504 struct brw_reg src1);
505 void generate_math_gen6(vec4_instruction *inst,
506 struct brw_reg dst,
507 struct brw_reg src0,
508 struct brw_reg src1);
509
510 void generate_tex(vec4_instruction *inst,
511 struct brw_reg dst,
512 struct brw_reg src,
513 struct brw_reg sampler_index);
514
515 void generate_vs_urb_write(vec4_instruction *inst);
516 void generate_gs_urb_write(vec4_instruction *inst);
517 void generate_gs_urb_write_allocate(vec4_instruction *inst);
518 void generate_gs_thread_end(vec4_instruction *inst);
519 void generate_gs_set_write_offset(struct brw_reg dst,
520 struct brw_reg src0,
521 struct brw_reg src1);
522 void generate_gs_set_vertex_count(struct brw_reg dst,
523 struct brw_reg src);
524 void generate_gs_svb_write(vec4_instruction *inst,
525 struct brw_reg dst,
526 struct brw_reg src0,
527 struct brw_reg src1);
528 void generate_gs_svb_set_destination_index(vec4_instruction *inst,
529 struct brw_reg dst,
530 struct brw_reg src);
531 void generate_gs_set_dword_2(struct brw_reg dst, struct brw_reg src);
532 void generate_gs_prepare_channel_masks(struct brw_reg dst);
533 void generate_gs_set_channel_masks(struct brw_reg dst, struct brw_reg src);
534 void generate_gs_get_instance_id(struct brw_reg dst);
535 void generate_gs_ff_sync_set_primitives(struct brw_reg dst,
536 struct brw_reg src0,
537 struct brw_reg src1,
538 struct brw_reg src2);
539 void generate_gs_ff_sync(vec4_instruction *inst,
540 struct brw_reg dst,
541 struct brw_reg src0,
542 struct brw_reg src1);
543 void generate_gs_set_primitive_id(struct brw_reg dst);
544 void generate_oword_dual_block_offsets(struct brw_reg m1,
545 struct brw_reg index);
546 void generate_scratch_write(vec4_instruction *inst,
547 struct brw_reg dst,
548 struct brw_reg src,
549 struct brw_reg index);
550 void generate_scratch_read(vec4_instruction *inst,
551 struct brw_reg dst,
552 struct brw_reg index);
553 void generate_pull_constant_load(vec4_instruction *inst,
554 struct brw_reg dst,
555 struct brw_reg index,
556 struct brw_reg offset);
557 void generate_pull_constant_load_gen7(vec4_instruction *inst,
558 struct brw_reg dst,
559 struct brw_reg surf_index,
560 struct brw_reg offset);
561 void generate_set_simd4x2_header_gen9(vec4_instruction *inst,
562 struct brw_reg dst);
563
564 void generate_get_buffer_size(vec4_instruction *inst,
565 struct brw_reg dst,
566 struct brw_reg src,
567 struct brw_reg index);
568
569 void generate_unpack_flags(struct brw_reg dst);
570
571 const struct brw_compiler *compiler;
572 void *log_data; /* Passed to compiler->*_log functions */
573
574 const struct brw_device_info *devinfo;
575
576 struct brw_codegen *p;
577
578 struct gl_shader_program *shader_prog;
579 const struct gl_program *prog;
580
581 struct brw_vue_prog_data *prog_data;
582
583 void *mem_ctx;
584 const char *stage_name;
585 const char *stage_abbrev;
586 const bool debug_flag;
587 };
588
589 } /* namespace brw */
590 #endif /* __cplusplus */
591
592 #endif /* BRW_VEC4_H */