i965/backend_shader: Add a field to store the NIR shader
[mesa.git] / src / mesa / drivers / dri / i965 / brw_vec4.h
1 /*
2 * Copyright © 2011 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #ifndef BRW_VEC4_H
25 #define BRW_VEC4_H
26
27 #include <stdint.h>
28 #include "brw_shader.h"
29 #include "main/compiler.h"
30 #include "program/hash_table.h"
31 #include "brw_program.h"
32
33 #ifdef __cplusplus
34 #include "brw_ir_vec4.h"
35
36 extern "C" {
37 #endif
38
39 #include "brw_context.h"
40 #include "brw_eu.h"
41 #include "intel_asm_annotation.h"
42
43 #ifdef __cplusplus
44 }; /* extern "C" */
45 #endif
46
47 #include "glsl/ir.h"
48 #include "glsl/nir/nir.h"
49
50
51 #ifdef __cplusplus
52 extern "C" {
53 #endif
54
55 #ifdef __cplusplus
56 } /* extern "C" */
57
58 namespace brw {
59
60 class vec4_live_variables;
61
62 /**
63 * The vertex shader front-end.
64 *
65 * Translates either GLSL IR or Mesa IR (for ARB_vertex_program and
66 * fixed-function) into VS IR.
67 */
68 class vec4_visitor : public backend_shader
69 {
70 public:
71 vec4_visitor(const struct brw_compiler *compiler,
72 void *log_data,
73 struct gl_program *prog,
74 const struct brw_sampler_prog_key_data *key,
75 struct brw_vue_prog_data *prog_data,
76 struct gl_shader_program *shader_prog,
77 gl_shader_stage stage,
78 void *mem_ctx,
79 bool no_spills,
80 int shader_time_index);
81 ~vec4_visitor();
82
83 dst_reg dst_null_f()
84 {
85 return dst_reg(brw_null_reg());
86 }
87
88 dst_reg dst_null_d()
89 {
90 return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
91 }
92
93 dst_reg dst_null_ud()
94 {
95 return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
96 }
97
98 const struct brw_sampler_prog_key_data * const key_tex;
99 struct brw_vue_prog_data * const prog_data;
100 unsigned int sanity_param_count;
101
102 char *fail_msg;
103 bool failed;
104
105 /**
106 * GLSL IR currently being processed, which is associated with our
107 * driver IR instructions for debugging purposes.
108 */
109 const void *base_ir;
110 const char *current_annotation;
111
112 int first_non_payload_grf;
113 unsigned int max_grf;
114 int *virtual_grf_start;
115 int *virtual_grf_end;
116 brw::vec4_live_variables *live_intervals;
117 dst_reg userplane[MAX_CLIP_PLANES];
118
119 bool need_all_constants_in_pull_buffer;
120
121 /* Regs for vertex results. Generated at ir_variable visiting time
122 * for the ir->location's used.
123 */
124 dst_reg output_reg[BRW_VARYING_SLOT_COUNT];
125 const char *output_reg_annotation[BRW_VARYING_SLOT_COUNT];
126 int *uniform_size;
127 int uniform_array_size; /*< Size of the uniform_size array */
128 int uniforms;
129
130 src_reg shader_start_time;
131
132 bool run();
133 void fail(const char *msg, ...);
134
135 int setup_uniforms(int payload_reg);
136
137 bool reg_allocate_trivial();
138 bool reg_allocate();
139 void evaluate_spill_costs(float *spill_costs, bool *no_spill);
140 int choose_spill_reg(struct ra_graph *g);
141 void spill_reg(int spill_reg);
142 void move_grf_array_access_to_scratch();
143 void move_uniform_array_access_to_pull_constants();
144 void move_push_constants_to_pull_constants();
145 void split_uniform_registers();
146 void pack_uniform_registers();
147 void calculate_live_intervals();
148 void invalidate_live_intervals();
149 void split_virtual_grfs();
150 bool opt_vector_float();
151 bool opt_reduce_swizzle();
152 bool dead_code_eliminate();
153 int var_range_start(unsigned v, unsigned n) const;
154 int var_range_end(unsigned v, unsigned n) const;
155 bool virtual_grf_interferes(int a, int b);
156 bool opt_copy_propagation(bool do_constant_prop = true);
157 bool opt_cse_local(bblock_t *block);
158 bool opt_cse();
159 bool opt_algebraic();
160 bool opt_register_coalesce();
161 bool eliminate_find_live_channel();
162 bool is_dep_ctrl_unsafe(const vec4_instruction *inst);
163 void opt_set_dependency_control();
164 void opt_schedule_instructions();
165
166 vec4_instruction *emit(vec4_instruction *inst);
167
168 vec4_instruction *emit(enum opcode opcode);
169 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst);
170 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
171 const src_reg &src0);
172 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
173 const src_reg &src0, const src_reg &src1);
174 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
175 const src_reg &src0, const src_reg &src1,
176 const src_reg &src2);
177
178 vec4_instruction *emit_before(bblock_t *block,
179 vec4_instruction *inst,
180 vec4_instruction *new_inst);
181
182 #define EMIT1(op) vec4_instruction *op(const dst_reg &, const src_reg &);
183 #define EMIT2(op) vec4_instruction *op(const dst_reg &, const src_reg &, const src_reg &);
184 #define EMIT3(op) vec4_instruction *op(const dst_reg &, const src_reg &, const src_reg &, const src_reg &);
185 EMIT1(MOV)
186 EMIT1(NOT)
187 EMIT1(RNDD)
188 EMIT1(RNDE)
189 EMIT1(RNDZ)
190 EMIT1(FRC)
191 EMIT1(F32TO16)
192 EMIT1(F16TO32)
193 EMIT2(ADD)
194 EMIT2(MUL)
195 EMIT2(MACH)
196 EMIT2(MAC)
197 EMIT2(AND)
198 EMIT2(OR)
199 EMIT2(XOR)
200 EMIT2(DP3)
201 EMIT2(DP4)
202 EMIT2(DPH)
203 EMIT2(SHL)
204 EMIT2(SHR)
205 EMIT2(ASR)
206 vec4_instruction *CMP(dst_reg dst, src_reg src0, src_reg src1,
207 enum brw_conditional_mod condition);
208 vec4_instruction *IF(src_reg src0, src_reg src1,
209 enum brw_conditional_mod condition);
210 vec4_instruction *IF(enum brw_predicate predicate);
211 EMIT1(SCRATCH_READ)
212 EMIT2(SCRATCH_WRITE)
213 EMIT3(LRP)
214 EMIT1(BFREV)
215 EMIT3(BFE)
216 EMIT2(BFI1)
217 EMIT3(BFI2)
218 EMIT1(FBH)
219 EMIT1(FBL)
220 EMIT1(CBIT)
221 EMIT3(MAD)
222 EMIT2(ADDC)
223 EMIT2(SUBB)
224 #undef EMIT1
225 #undef EMIT2
226 #undef EMIT3
227
228 int implied_mrf_writes(vec4_instruction *inst);
229
230 void emit_vp_sop(enum brw_conditional_mod condmod, dst_reg dst,
231 src_reg src0, src_reg src1, src_reg one);
232
233 vec4_instruction *emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst,
234 src_reg src0, src_reg src1);
235
236 vec4_instruction *emit_lrp(const dst_reg &dst, const src_reg &x,
237 const src_reg &y, const src_reg &a);
238
239 /**
240 * Copy any live channel from \p src to the first channel of the
241 * result.
242 */
243 src_reg emit_uniformize(const src_reg &src);
244
245 /**
246 * Emit the correct dot-product instruction for the type of arguments
247 */
248 void emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements);
249
250 src_reg fix_3src_operand(const src_reg &src);
251 src_reg resolve_source_modifiers(const src_reg &src);
252
253 vec4_instruction *emit_math(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
254 const src_reg &src1 = src_reg());
255
256 src_reg fix_math_operand(const src_reg &src);
257
258 void emit_pack_half_2x16(dst_reg dst, src_reg src0);
259 void emit_unpack_half_2x16(dst_reg dst, src_reg src0);
260 void emit_unpack_unorm_4x8(const dst_reg &dst, src_reg src0);
261 void emit_unpack_snorm_4x8(const dst_reg &dst, src_reg src0);
262 void emit_pack_unorm_4x8(const dst_reg &dst, const src_reg &src0);
263 void emit_pack_snorm_4x8(const dst_reg &dst, const src_reg &src0);
264
265 void emit_texture(ir_texture_opcode op,
266 dst_reg dest,
267 const glsl_type *dest_type,
268 src_reg coordinate,
269 int coord_components,
270 src_reg shadow_comparitor,
271 src_reg lod, src_reg lod2,
272 src_reg sample_index,
273 uint32_t constant_offset,
274 src_reg offset_value,
275 src_reg mcs,
276 bool is_cube_array,
277 uint32_t sampler, src_reg sampler_reg);
278
279 uint32_t gather_channel(unsigned gather_component, uint32_t sampler);
280 src_reg emit_mcs_fetch(const glsl_type *coordinate_type, src_reg coordinate,
281 src_reg sampler);
282 void emit_gen6_gather_wa(uint8_t wa, dst_reg dst);
283 void swizzle_result(ir_texture_opcode op, dst_reg dest,
284 src_reg orig_val, uint32_t sampler,
285 const glsl_type *dest_type);
286
287 void emit_ndc_computation();
288 void emit_psiz_and_flags(dst_reg reg);
289 vec4_instruction *emit_generic_urb_slot(dst_reg reg, int varying);
290 virtual void emit_urb_slot(dst_reg reg, int varying);
291
292 void emit_shader_time_begin();
293 void emit_shader_time_end();
294 void emit_shader_time_write(int shader_time_subindex, src_reg value);
295
296 void emit_untyped_atomic(unsigned atomic_op, unsigned surf_index,
297 dst_reg dst, src_reg offset, src_reg src0,
298 src_reg src1);
299
300 void emit_untyped_surface_read(unsigned surf_index, dst_reg dst,
301 src_reg offset);
302
303 src_reg get_scratch_offset(bblock_t *block, vec4_instruction *inst,
304 src_reg *reladdr, int reg_offset);
305 src_reg get_pull_constant_offset(bblock_t *block, vec4_instruction *inst,
306 src_reg *reladdr, int reg_offset);
307 void emit_scratch_read(bblock_t *block, vec4_instruction *inst,
308 dst_reg dst,
309 src_reg orig_src,
310 int base_offset);
311 void emit_scratch_write(bblock_t *block, vec4_instruction *inst,
312 int base_offset);
313 void emit_pull_constant_load(bblock_t *block, vec4_instruction *inst,
314 dst_reg dst,
315 src_reg orig_src,
316 int base_offset);
317 void emit_pull_constant_load_reg(dst_reg dst,
318 src_reg surf_index,
319 src_reg offset,
320 bblock_t *before_block,
321 vec4_instruction *before_inst);
322 src_reg emit_resolve_reladdr(int scratch_loc[], bblock_t *block,
323 vec4_instruction *inst, src_reg src);
324
325 void resolve_ud_negate(src_reg *reg);
326
327 src_reg get_timestamp();
328
329 void dump_instruction(backend_instruction *inst);
330 void dump_instruction(backend_instruction *inst, FILE *file);
331
332 bool is_high_sampler(src_reg sampler);
333
334 virtual void emit_nir_code();
335 virtual void nir_setup_inputs();
336 virtual void nir_setup_uniforms();
337 virtual void nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr);
338 virtual void nir_setup_system_values();
339 virtual void nir_emit_impl(nir_function_impl *impl);
340 virtual void nir_emit_cf_list(exec_list *list);
341 virtual void nir_emit_if(nir_if *if_stmt);
342 virtual void nir_emit_loop(nir_loop *loop);
343 virtual void nir_emit_block(nir_block *block);
344 virtual void nir_emit_instr(nir_instr *instr);
345 virtual void nir_emit_load_const(nir_load_const_instr *instr);
346 virtual void nir_emit_intrinsic(nir_intrinsic_instr *instr);
347 virtual void nir_emit_alu(nir_alu_instr *instr);
348 virtual void nir_emit_jump(nir_jump_instr *instr);
349 virtual void nir_emit_texture(nir_tex_instr *instr);
350 virtual void nir_emit_undef(nir_ssa_undef_instr *instr);
351 virtual void nir_emit_ssbo_atomic(int op, nir_intrinsic_instr *instr);
352
353 dst_reg get_nir_dest(nir_dest dest, enum brw_reg_type type);
354 dst_reg get_nir_dest(nir_dest dest, nir_alu_type type);
355 dst_reg get_nir_dest(nir_dest dest);
356 src_reg get_nir_src(nir_src src, enum brw_reg_type type,
357 unsigned num_components = 4);
358 src_reg get_nir_src(nir_src src, nir_alu_type type,
359 unsigned num_components = 4);
360 src_reg get_nir_src(nir_src src,
361 unsigned num_components = 4);
362
363 virtual dst_reg *make_reg_for_system_value(int location,
364 const glsl_type *type) = 0;
365
366 dst_reg *nir_locals;
367 dst_reg *nir_ssa_values;
368 src_reg *nir_inputs;
369 dst_reg *nir_system_values;
370
371 protected:
372 void emit_vertex();
373 void lower_attributes_to_hw_regs(const int *attribute_map,
374 bool interleaved);
375 void setup_payload_interference(struct ra_graph *g, int first_payload_node,
376 int reg_node_count);
377 virtual void setup_payload() = 0;
378 virtual void emit_prolog() = 0;
379 virtual void emit_thread_end() = 0;
380 virtual void emit_urb_write_header(int mrf) = 0;
381 virtual vec4_instruction *emit_urb_write_opcode(bool complete) = 0;
382 virtual void gs_emit_vertex(int stream_id);
383 virtual void gs_end_primitive();
384
385 private:
386 /**
387 * If true, then register allocation should fail instead of spilling.
388 */
389 const bool no_spills;
390
391 int shader_time_index;
392
393 unsigned last_scratch; /**< measured in 32-byte (register size) units */
394 };
395
396
397 /**
398 * The vertex shader code generator.
399 *
400 * Translates VS IR to actual i965 assembly code.
401 */
402 class vec4_generator
403 {
404 public:
405 vec4_generator(const struct brw_compiler *compiler, void *log_data,
406 struct gl_shader_program *shader_prog,
407 struct gl_program *prog,
408 struct brw_vue_prog_data *prog_data,
409 void *mem_ctx,
410 bool debug_flag,
411 const char *stage_name,
412 const char *stage_abbrev);
413 ~vec4_generator();
414
415 const unsigned *generate_assembly(const cfg_t *cfg, unsigned *asm_size);
416
417 private:
418 void generate_code(const cfg_t *cfg);
419
420 void generate_math1_gen4(vec4_instruction *inst,
421 struct brw_reg dst,
422 struct brw_reg src);
423 void generate_math2_gen4(vec4_instruction *inst,
424 struct brw_reg dst,
425 struct brw_reg src0,
426 struct brw_reg src1);
427 void generate_math_gen6(vec4_instruction *inst,
428 struct brw_reg dst,
429 struct brw_reg src0,
430 struct brw_reg src1);
431
432 void generate_tex(vec4_instruction *inst,
433 struct brw_reg dst,
434 struct brw_reg src,
435 struct brw_reg sampler_index);
436
437 void generate_vs_urb_write(vec4_instruction *inst);
438 void generate_gs_urb_write(vec4_instruction *inst);
439 void generate_gs_urb_write_allocate(vec4_instruction *inst);
440 void generate_gs_thread_end(vec4_instruction *inst);
441 void generate_gs_set_write_offset(struct brw_reg dst,
442 struct brw_reg src0,
443 struct brw_reg src1);
444 void generate_gs_set_vertex_count(struct brw_reg dst,
445 struct brw_reg src);
446 void generate_gs_svb_write(vec4_instruction *inst,
447 struct brw_reg dst,
448 struct brw_reg src0,
449 struct brw_reg src1);
450 void generate_gs_svb_set_destination_index(vec4_instruction *inst,
451 struct brw_reg dst,
452 struct brw_reg src);
453 void generate_gs_set_dword_2(struct brw_reg dst, struct brw_reg src);
454 void generate_gs_prepare_channel_masks(struct brw_reg dst);
455 void generate_gs_set_channel_masks(struct brw_reg dst, struct brw_reg src);
456 void generate_gs_get_instance_id(struct brw_reg dst);
457 void generate_gs_ff_sync_set_primitives(struct brw_reg dst,
458 struct brw_reg src0,
459 struct brw_reg src1,
460 struct brw_reg src2);
461 void generate_gs_ff_sync(vec4_instruction *inst,
462 struct brw_reg dst,
463 struct brw_reg src0,
464 struct brw_reg src1);
465 void generate_gs_set_primitive_id(struct brw_reg dst);
466 void generate_oword_dual_block_offsets(struct brw_reg m1,
467 struct brw_reg index);
468 void generate_scratch_write(vec4_instruction *inst,
469 struct brw_reg dst,
470 struct brw_reg src,
471 struct brw_reg index);
472 void generate_scratch_read(vec4_instruction *inst,
473 struct brw_reg dst,
474 struct brw_reg index);
475 void generate_pull_constant_load(vec4_instruction *inst,
476 struct brw_reg dst,
477 struct brw_reg index,
478 struct brw_reg offset);
479 void generate_pull_constant_load_gen7(vec4_instruction *inst,
480 struct brw_reg dst,
481 struct brw_reg surf_index,
482 struct brw_reg offset);
483 void generate_set_simd4x2_header_gen9(vec4_instruction *inst,
484 struct brw_reg dst);
485
486 void generate_get_buffer_size(vec4_instruction *inst,
487 struct brw_reg dst,
488 struct brw_reg src,
489 struct brw_reg index);
490
491 void generate_unpack_flags(struct brw_reg dst);
492
493 const struct brw_compiler *compiler;
494 void *log_data; /* Passed to compiler->*_log functions */
495
496 const struct brw_device_info *devinfo;
497
498 struct brw_codegen *p;
499
500 struct gl_shader_program *shader_prog;
501 const struct gl_program *prog;
502
503 struct brw_vue_prog_data *prog_data;
504
505 void *mem_ctx;
506 const char *stage_name;
507 const char *stage_abbrev;
508 const bool debug_flag;
509 };
510
511 } /* namespace brw */
512 #endif /* __cplusplus */
513
514 #endif /* BRW_VEC4_H */