i965/nir/vec4: Add shader function implementation
[mesa.git] / src / mesa / drivers / dri / i965 / brw_vec4.h
1 /*
2 * Copyright © 2011 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #ifndef BRW_VEC4_H
25 #define BRW_VEC4_H
26
27 #include <stdint.h>
28 #include "brw_shader.h"
29 #include "main/compiler.h"
30 #include "program/hash_table.h"
31 #include "brw_program.h"
32
33 #ifdef __cplusplus
34 #include "brw_ir_vec4.h"
35
36 extern "C" {
37 #endif
38
39 #include "brw_context.h"
40 #include "brw_eu.h"
41 #include "intel_asm_annotation.h"
42
43 #ifdef __cplusplus
44 }; /* extern "C" */
45 #endif
46
47 #include "glsl/ir.h"
48 #include "glsl/nir/nir.h"
49
50
51 #ifdef __cplusplus
52 extern "C" {
53 #endif
54
55 void
56 brw_vue_setup_prog_key_for_precompile(struct gl_context *ctx,
57 struct brw_vue_prog_key *key,
58 GLuint id, struct gl_program *prog);
59
60 #ifdef __cplusplus
61 } /* extern "C" */
62
63 namespace brw {
64
65 class vec4_live_variables;
66
67 /**
68 * The vertex shader front-end.
69 *
70 * Translates either GLSL IR or Mesa IR (for ARB_vertex_program and
71 * fixed-function) into VS IR.
72 */
73 class vec4_visitor : public backend_shader, public ir_visitor
74 {
75 public:
76 vec4_visitor(const struct brw_compiler *compiler,
77 void *log_data,
78 struct gl_program *prog,
79 const struct brw_vue_prog_key *key,
80 struct brw_vue_prog_data *prog_data,
81 struct gl_shader_program *shader_prog,
82 gl_shader_stage stage,
83 void *mem_ctx,
84 bool no_spills,
85 int shader_time_index);
86 ~vec4_visitor();
87
88 dst_reg dst_null_f()
89 {
90 return dst_reg(brw_null_reg());
91 }
92
93 dst_reg dst_null_d()
94 {
95 return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
96 }
97
98 dst_reg dst_null_ud()
99 {
100 return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
101 }
102
103 const struct brw_vue_prog_key * const key;
104 struct brw_vue_prog_data * const prog_data;
105 unsigned int sanity_param_count;
106
107 char *fail_msg;
108 bool failed;
109
110 /**
111 * GLSL IR currently being processed, which is associated with our
112 * driver IR instructions for debugging purposes.
113 */
114 const void *base_ir;
115 const char *current_annotation;
116
117 int first_non_payload_grf;
118 unsigned int max_grf;
119 int *virtual_grf_start;
120 int *virtual_grf_end;
121 brw::vec4_live_variables *live_intervals;
122 dst_reg userplane[MAX_CLIP_PLANES];
123
124 dst_reg *variable_storage(ir_variable *var);
125
126 void reladdr_to_temp(ir_instruction *ir, src_reg *reg, int *num_reladdr);
127
128 bool need_all_constants_in_pull_buffer;
129
130 /**
131 * \name Visit methods
132 *
133 * As typical for the visitor pattern, there must be one \c visit method for
134 * each concrete subclass of \c ir_instruction. Virtual base classes within
135 * the hierarchy should not have \c visit methods.
136 */
137 /*@{*/
138 virtual void visit(ir_variable *);
139 virtual void visit(ir_loop *);
140 virtual void visit(ir_loop_jump *);
141 virtual void visit(ir_function_signature *);
142 virtual void visit(ir_function *);
143 virtual void visit(ir_expression *);
144 virtual void visit(ir_swizzle *);
145 virtual void visit(ir_dereference_variable *);
146 virtual void visit(ir_dereference_array *);
147 virtual void visit(ir_dereference_record *);
148 virtual void visit(ir_assignment *);
149 virtual void visit(ir_constant *);
150 virtual void visit(ir_call *);
151 virtual void visit(ir_return *);
152 virtual void visit(ir_discard *);
153 virtual void visit(ir_texture *);
154 virtual void visit(ir_if *);
155 virtual void visit(ir_emit_vertex *);
156 virtual void visit(ir_end_primitive *);
157 virtual void visit(ir_barrier *);
158 /*@}*/
159
160 src_reg result;
161
162 /* Regs for vertex results. Generated at ir_variable visiting time
163 * for the ir->location's used.
164 */
165 dst_reg output_reg[BRW_VARYING_SLOT_COUNT];
166 const char *output_reg_annotation[BRW_VARYING_SLOT_COUNT];
167 int *uniform_size;
168 int *uniform_vector_size;
169 int uniform_array_size; /*< Size of uniform_[vector_]size arrays */
170 int uniforms;
171
172 src_reg shader_start_time;
173
174 struct hash_table *variable_ht;
175
176 bool run(gl_clip_plane *clip_planes);
177 void fail(const char *msg, ...);
178
179 void setup_uniform_clipplane_values(gl_clip_plane *clip_planes);
180 virtual void setup_vector_uniform_values(const gl_constant_value *values,
181 unsigned n);
182 void setup_uniform_values(ir_variable *ir);
183 void setup_builtin_uniform_values(ir_variable *ir);
184 int setup_uniforms(int payload_reg);
185
186 bool reg_allocate_trivial();
187 bool reg_allocate();
188 void evaluate_spill_costs(float *spill_costs, bool *no_spill);
189 int choose_spill_reg(struct ra_graph *g);
190 void spill_reg(int spill_reg);
191 void move_grf_array_access_to_scratch();
192 void move_uniform_array_access_to_pull_constants();
193 void move_push_constants_to_pull_constants();
194 void split_uniform_registers();
195 void pack_uniform_registers();
196 void calculate_live_intervals();
197 void invalidate_live_intervals();
198 void split_virtual_grfs();
199 bool opt_vector_float();
200 bool opt_reduce_swizzle();
201 bool dead_code_eliminate();
202 int var_range_start(unsigned v, unsigned n) const;
203 int var_range_end(unsigned v, unsigned n) const;
204 bool virtual_grf_interferes(int a, int b);
205 bool opt_copy_propagation(bool do_constant_prop = true);
206 bool opt_cse_local(bblock_t *block);
207 bool opt_cse();
208 bool opt_algebraic();
209 bool opt_register_coalesce();
210 bool eliminate_find_live_channel();
211 bool is_dep_ctrl_unsafe(const vec4_instruction *inst);
212 void opt_set_dependency_control();
213 void opt_schedule_instructions();
214
215 vec4_instruction *emit(vec4_instruction *inst);
216
217 vec4_instruction *emit(enum opcode opcode);
218 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst);
219 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
220 const src_reg &src0);
221 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
222 const src_reg &src0, const src_reg &src1);
223 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
224 const src_reg &src0, const src_reg &src1,
225 const src_reg &src2);
226
227 vec4_instruction *emit_before(bblock_t *block,
228 vec4_instruction *inst,
229 vec4_instruction *new_inst);
230
231 #define EMIT1(op) vec4_instruction *op(const dst_reg &, const src_reg &);
232 #define EMIT2(op) vec4_instruction *op(const dst_reg &, const src_reg &, const src_reg &);
233 #define EMIT3(op) vec4_instruction *op(const dst_reg &, const src_reg &, const src_reg &, const src_reg &);
234 EMIT1(MOV)
235 EMIT1(NOT)
236 EMIT1(RNDD)
237 EMIT1(RNDE)
238 EMIT1(RNDZ)
239 EMIT1(FRC)
240 EMIT1(F32TO16)
241 EMIT1(F16TO32)
242 EMIT2(ADD)
243 EMIT2(MUL)
244 EMIT2(MACH)
245 EMIT2(MAC)
246 EMIT2(AND)
247 EMIT2(OR)
248 EMIT2(XOR)
249 EMIT2(DP3)
250 EMIT2(DP4)
251 EMIT2(DPH)
252 EMIT2(SHL)
253 EMIT2(SHR)
254 EMIT2(ASR)
255 vec4_instruction *CMP(dst_reg dst, src_reg src0, src_reg src1,
256 enum brw_conditional_mod condition);
257 vec4_instruction *IF(src_reg src0, src_reg src1,
258 enum brw_conditional_mod condition);
259 vec4_instruction *IF(enum brw_predicate predicate);
260 EMIT1(SCRATCH_READ)
261 EMIT2(SCRATCH_WRITE)
262 EMIT3(LRP)
263 EMIT1(BFREV)
264 EMIT3(BFE)
265 EMIT2(BFI1)
266 EMIT3(BFI2)
267 EMIT1(FBH)
268 EMIT1(FBL)
269 EMIT1(CBIT)
270 EMIT3(MAD)
271 EMIT2(ADDC)
272 EMIT2(SUBB)
273 #undef EMIT1
274 #undef EMIT2
275 #undef EMIT3
276
277 int implied_mrf_writes(vec4_instruction *inst);
278
279 bool try_rewrite_rhs_to_dst(ir_assignment *ir,
280 dst_reg dst,
281 src_reg src,
282 vec4_instruction *pre_rhs_inst,
283 vec4_instruction *last_rhs_inst);
284
285 /** Walks an exec_list of ir_instruction and sends it through this visitor. */
286 void visit_instructions(const exec_list *list);
287
288 void emit_vp_sop(enum brw_conditional_mod condmod, dst_reg dst,
289 src_reg src0, src_reg src1, src_reg one);
290
291 void emit_bool_to_cond_code(ir_rvalue *ir, enum brw_predicate *predicate);
292 void emit_if_gen6(ir_if *ir);
293
294 void emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst,
295 src_reg src0, src_reg src1);
296
297 void emit_lrp(const dst_reg &dst,
298 const src_reg &x, const src_reg &y, const src_reg &a);
299
300 /**
301 * Copy any live channel from \p src to the first channel of the
302 * result.
303 */
304 src_reg emit_uniformize(const src_reg &src);
305
306 void emit_block_move(dst_reg *dst, src_reg *src,
307 const struct glsl_type *type, brw_predicate predicate);
308
309 void emit_constant_values(dst_reg *dst, ir_constant *value);
310
311 /**
312 * Emit the correct dot-product instruction for the type of arguments
313 */
314 void emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements);
315
316 void emit_scalar(ir_instruction *ir, enum prog_opcode op,
317 dst_reg dst, src_reg src0);
318
319 void emit_scalar(ir_instruction *ir, enum prog_opcode op,
320 dst_reg dst, src_reg src0, src_reg src1);
321
322 src_reg fix_3src_operand(src_reg src);
323
324 void emit_math(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
325 const src_reg &src1 = src_reg());
326 src_reg fix_math_operand(src_reg src);
327
328 void emit_pack_half_2x16(dst_reg dst, src_reg src0);
329 void emit_unpack_half_2x16(dst_reg dst, src_reg src0);
330 void emit_unpack_unorm_4x8(const dst_reg &dst, src_reg src0);
331 void emit_unpack_snorm_4x8(const dst_reg &dst, src_reg src0);
332 void emit_pack_unorm_4x8(const dst_reg &dst, const src_reg &src0);
333 void emit_pack_snorm_4x8(const dst_reg &dst, const src_reg &src0);
334
335 uint32_t gather_channel(ir_texture *ir, uint32_t sampler);
336 src_reg emit_mcs_fetch(ir_texture *ir, src_reg coordinate, src_reg sampler);
337 void emit_gen6_gather_wa(uint8_t wa, dst_reg dst);
338 void swizzle_result(ir_texture *ir, src_reg orig_val, uint32_t sampler);
339
340 void emit_ndc_computation();
341 void emit_psiz_and_flags(dst_reg reg);
342 void emit_clip_distances(dst_reg reg, int offset);
343 vec4_instruction *emit_generic_urb_slot(dst_reg reg, int varying);
344 void emit_urb_slot(dst_reg reg, int varying);
345
346 void emit_shader_time_begin();
347 void emit_shader_time_end();
348 void emit_shader_time_write(int shader_time_subindex, src_reg value);
349
350 void emit_untyped_atomic(unsigned atomic_op, unsigned surf_index,
351 dst_reg dst, src_reg offset, src_reg src0,
352 src_reg src1);
353
354 void emit_untyped_surface_read(unsigned surf_index, dst_reg dst,
355 src_reg offset);
356
357 src_reg get_scratch_offset(bblock_t *block, vec4_instruction *inst,
358 src_reg *reladdr, int reg_offset);
359 src_reg get_pull_constant_offset(bblock_t *block, vec4_instruction *inst,
360 src_reg *reladdr, int reg_offset);
361 void emit_scratch_read(bblock_t *block, vec4_instruction *inst,
362 dst_reg dst,
363 src_reg orig_src,
364 int base_offset);
365 void emit_scratch_write(bblock_t *block, vec4_instruction *inst,
366 int base_offset);
367 void emit_pull_constant_load(bblock_t *block, vec4_instruction *inst,
368 dst_reg dst,
369 src_reg orig_src,
370 int base_offset);
371 void emit_pull_constant_load_reg(dst_reg dst,
372 src_reg surf_index,
373 src_reg offset,
374 bblock_t *before_block,
375 vec4_instruction *before_inst);
376 src_reg emit_resolve_reladdr(int scratch_loc[], bblock_t *block,
377 vec4_instruction *inst, src_reg src);
378
379 bool try_emit_mad(ir_expression *ir);
380 bool try_emit_b2f_of_compare(ir_expression *ir);
381 void resolve_ud_negate(src_reg *reg);
382 void resolve_bool_comparison(ir_rvalue *rvalue, src_reg *reg);
383
384 src_reg get_timestamp();
385
386 bool process_move_condition(ir_rvalue *ir);
387
388 void dump_instruction(backend_instruction *inst);
389 void dump_instruction(backend_instruction *inst, FILE *file);
390
391 void visit_atomic_counter_intrinsic(ir_call *ir);
392
393 int type_size(const struct glsl_type *type);
394
395 virtual void emit_nir_code();
396 virtual void nir_setup_inputs(nir_shader *shader);
397 virtual void nir_setup_uniforms(nir_shader *shader);
398 virtual void nir_setup_uniform(nir_variable *var);
399 virtual void nir_setup_builtin_uniform(nir_variable *var);
400 virtual void nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr);
401 virtual void nir_setup_system_values(nir_shader *shader);
402 virtual void nir_emit_impl(nir_function_impl *impl);
403 virtual void nir_emit_cf_list(exec_list *list);
404 virtual void nir_emit_if(nir_if *if_stmt);
405 virtual void nir_emit_loop(nir_loop *loop);
406 virtual void nir_emit_block(nir_block *block);
407 virtual void nir_emit_instr(nir_instr *instr);
408 virtual void nir_emit_load_const(nir_load_const_instr *instr);
409 virtual void nir_emit_intrinsic(nir_intrinsic_instr *instr);
410 virtual void nir_emit_alu(nir_alu_instr *instr);
411 virtual void nir_emit_jump(nir_jump_instr *instr);
412 virtual void nir_emit_texture(nir_tex_instr *instr);
413
414 virtual dst_reg *make_reg_for_system_value(int location,
415 const glsl_type *type) = 0;
416
417 dst_reg *nir_locals;
418 src_reg *nir_inputs;
419 unsigned *nir_uniform_driver_location;
420 dst_reg *nir_system_values;
421
422 protected:
423 void emit_vertex();
424 void lower_attributes_to_hw_regs(const int *attribute_map,
425 bool interleaved);
426 void setup_payload_interference(struct ra_graph *g, int first_payload_node,
427 int reg_node_count);
428 virtual void assign_binding_table_offsets();
429 virtual void setup_payload() = 0;
430 virtual void emit_prolog() = 0;
431 virtual void emit_program_code() = 0;
432 virtual void emit_thread_end() = 0;
433 virtual void emit_urb_write_header(int mrf) = 0;
434 virtual vec4_instruction *emit_urb_write_opcode(bool complete) = 0;
435 virtual int compute_array_stride(ir_dereference_array *ir);
436
437 private:
438 /**
439 * If true, then register allocation should fail instead of spilling.
440 */
441 const bool no_spills;
442
443 int shader_time_index;
444
445 unsigned last_scratch; /**< measured in 32-byte (register size) units */
446 };
447
448
449 /**
450 * The vertex shader code generator.
451 *
452 * Translates VS IR to actual i965 assembly code.
453 */
454 class vec4_generator
455 {
456 public:
457 vec4_generator(const struct brw_compiler *compiler, void *log_data,
458 struct gl_shader_program *shader_prog,
459 struct gl_program *prog,
460 struct brw_vue_prog_data *prog_data,
461 void *mem_ctx,
462 bool debug_flag,
463 const char *stage_name,
464 const char *stage_abbrev);
465 ~vec4_generator();
466
467 const unsigned *generate_assembly(const cfg_t *cfg, unsigned *asm_size);
468
469 private:
470 void generate_code(const cfg_t *cfg);
471
472 void generate_math1_gen4(vec4_instruction *inst,
473 struct brw_reg dst,
474 struct brw_reg src);
475 void generate_math2_gen4(vec4_instruction *inst,
476 struct brw_reg dst,
477 struct brw_reg src0,
478 struct brw_reg src1);
479 void generate_math_gen6(vec4_instruction *inst,
480 struct brw_reg dst,
481 struct brw_reg src0,
482 struct brw_reg src1);
483
484 void generate_tex(vec4_instruction *inst,
485 struct brw_reg dst,
486 struct brw_reg src,
487 struct brw_reg sampler_index);
488
489 void generate_vs_urb_write(vec4_instruction *inst);
490 void generate_gs_urb_write(vec4_instruction *inst);
491 void generate_gs_urb_write_allocate(vec4_instruction *inst);
492 void generate_gs_thread_end(vec4_instruction *inst);
493 void generate_gs_set_write_offset(struct brw_reg dst,
494 struct brw_reg src0,
495 struct brw_reg src1);
496 void generate_gs_set_vertex_count(struct brw_reg dst,
497 struct brw_reg src);
498 void generate_gs_svb_write(vec4_instruction *inst,
499 struct brw_reg dst,
500 struct brw_reg src0,
501 struct brw_reg src1);
502 void generate_gs_svb_set_destination_index(vec4_instruction *inst,
503 struct brw_reg dst,
504 struct brw_reg src);
505 void generate_gs_set_dword_2(struct brw_reg dst, struct brw_reg src);
506 void generate_gs_prepare_channel_masks(struct brw_reg dst);
507 void generate_gs_set_channel_masks(struct brw_reg dst, struct brw_reg src);
508 void generate_gs_get_instance_id(struct brw_reg dst);
509 void generate_gs_ff_sync_set_primitives(struct brw_reg dst,
510 struct brw_reg src0,
511 struct brw_reg src1,
512 struct brw_reg src2);
513 void generate_gs_ff_sync(vec4_instruction *inst,
514 struct brw_reg dst,
515 struct brw_reg src0,
516 struct brw_reg src1);
517 void generate_gs_set_primitive_id(struct brw_reg dst);
518 void generate_oword_dual_block_offsets(struct brw_reg m1,
519 struct brw_reg index);
520 void generate_scratch_write(vec4_instruction *inst,
521 struct brw_reg dst,
522 struct brw_reg src,
523 struct brw_reg index);
524 void generate_scratch_read(vec4_instruction *inst,
525 struct brw_reg dst,
526 struct brw_reg index);
527 void generate_pull_constant_load(vec4_instruction *inst,
528 struct brw_reg dst,
529 struct brw_reg index,
530 struct brw_reg offset);
531 void generate_pull_constant_load_gen7(vec4_instruction *inst,
532 struct brw_reg dst,
533 struct brw_reg surf_index,
534 struct brw_reg offset);
535 void generate_set_simd4x2_header_gen9(vec4_instruction *inst,
536 struct brw_reg dst);
537 void generate_unpack_flags(struct brw_reg dst);
538
539 const struct brw_compiler *compiler;
540 void *log_data; /* Passed to compiler->*_log functions */
541
542 const struct brw_device_info *devinfo;
543
544 struct brw_codegen *p;
545
546 struct gl_shader_program *shader_prog;
547 const struct gl_program *prog;
548
549 struct brw_vue_prog_data *prog_data;
550
551 void *mem_ctx;
552 const char *stage_name;
553 const char *stage_abbrev;
554 const bool debug_flag;
555 };
556
557 } /* namespace brw */
558 #endif /* __cplusplus */
559
560 #endif /* BRW_VEC4_H */