d861b2e85dfb64600f1d527026f661d5d414ad5a
[mesa.git] / src / mesa / drivers / dri / i965 / brw_vec4.h
1 /*
2 * Copyright © 2011 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #ifndef BRW_VEC4_H
25 #define BRW_VEC4_H
26
27 #include <stdint.h>
28 #include "brw_shader.h"
29 #include "main/compiler.h"
30 #include "program/hash_table.h"
31 #include "brw_program.h"
32
33 #ifdef __cplusplus
34 #include "brw_ir_vec4.h"
35
36 extern "C" {
37 #endif
38
39 #include "brw_context.h"
40 #include "brw_eu.h"
41 #include "intel_asm_annotation.h"
42
43 #ifdef __cplusplus
44 }; /* extern "C" */
45 #endif
46
47 #include "glsl/ir.h"
48 #include "glsl/nir/nir.h"
49
50
51 #ifdef __cplusplus
52 extern "C" {
53 #endif
54
55 #ifdef __cplusplus
56 } /* extern "C" */
57
58 namespace brw {
59
60 class vec4_live_variables;
61
62 /**
63 * The vertex shader front-end.
64 *
65 * Translates either GLSL IR or Mesa IR (for ARB_vertex_program and
66 * fixed-function) into VS IR.
67 */
68 class vec4_visitor : public backend_shader
69 {
70 public:
71 vec4_visitor(const struct brw_compiler *compiler,
72 void *log_data,
73 const struct brw_sampler_prog_key_data *key,
74 struct brw_vue_prog_data *prog_data,
75 const nir_shader *shader,
76 void *mem_ctx,
77 bool no_spills,
78 int shader_time_index);
79 virtual ~vec4_visitor();
80
81 dst_reg dst_null_f()
82 {
83 return dst_reg(brw_null_reg());
84 }
85
86 dst_reg dst_null_d()
87 {
88 return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
89 }
90
91 dst_reg dst_null_ud()
92 {
93 return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
94 }
95
96 const struct brw_sampler_prog_key_data * const key_tex;
97 struct brw_vue_prog_data * const prog_data;
98 char *fail_msg;
99 bool failed;
100
101 /**
102 * GLSL IR currently being processed, which is associated with our
103 * driver IR instructions for debugging purposes.
104 */
105 const void *base_ir;
106 const char *current_annotation;
107
108 int first_non_payload_grf;
109 unsigned int max_grf;
110 int *virtual_grf_start;
111 int *virtual_grf_end;
112 brw::vec4_live_variables *live_intervals;
113 dst_reg userplane[MAX_CLIP_PLANES];
114
115 bool need_all_constants_in_pull_buffer;
116
117 /* Regs for vertex results. Generated at ir_variable visiting time
118 * for the ir->location's used.
119 */
120 dst_reg output_reg[BRW_VARYING_SLOT_COUNT];
121 const char *output_reg_annotation[BRW_VARYING_SLOT_COUNT];
122 int *uniform_size;
123 int uniform_array_size; /*< Size of the uniform_size array */
124 int uniforms;
125
126 src_reg shader_start_time;
127
128 bool run();
129 void fail(const char *msg, ...);
130
131 int setup_uniforms(int payload_reg);
132
133 bool reg_allocate_trivial();
134 bool reg_allocate();
135 void evaluate_spill_costs(float *spill_costs, bool *no_spill);
136 int choose_spill_reg(struct ra_graph *g);
137 void spill_reg(int spill_reg);
138 void move_grf_array_access_to_scratch();
139 void move_uniform_array_access_to_pull_constants();
140 void move_push_constants_to_pull_constants();
141 void split_uniform_registers();
142 void pack_uniform_registers();
143 void calculate_live_intervals();
144 void invalidate_live_intervals();
145 void split_virtual_grfs();
146 bool opt_vector_float();
147 bool opt_reduce_swizzle();
148 bool dead_code_eliminate();
149 int var_range_start(unsigned v, unsigned n) const;
150 int var_range_end(unsigned v, unsigned n) const;
151 bool virtual_grf_interferes(int a, int b);
152 bool opt_copy_propagation(bool do_constant_prop = true);
153 bool opt_cse_local(bblock_t *block);
154 bool opt_cse();
155 bool opt_algebraic();
156 bool opt_register_coalesce();
157 bool eliminate_find_live_channel();
158 bool is_dep_ctrl_unsafe(const vec4_instruction *inst);
159 void opt_set_dependency_control();
160 void opt_schedule_instructions();
161
162 vec4_instruction *emit(vec4_instruction *inst);
163
164 vec4_instruction *emit(enum opcode opcode);
165 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst);
166 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
167 const src_reg &src0);
168 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
169 const src_reg &src0, const src_reg &src1);
170 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
171 const src_reg &src0, const src_reg &src1,
172 const src_reg &src2);
173
174 vec4_instruction *emit_before(bblock_t *block,
175 vec4_instruction *inst,
176 vec4_instruction *new_inst);
177
178 #define EMIT1(op) vec4_instruction *op(const dst_reg &, const src_reg &);
179 #define EMIT2(op) vec4_instruction *op(const dst_reg &, const src_reg &, const src_reg &);
180 #define EMIT3(op) vec4_instruction *op(const dst_reg &, const src_reg &, const src_reg &, const src_reg &);
181 EMIT1(MOV)
182 EMIT1(NOT)
183 EMIT1(RNDD)
184 EMIT1(RNDE)
185 EMIT1(RNDZ)
186 EMIT1(FRC)
187 EMIT1(F32TO16)
188 EMIT1(F16TO32)
189 EMIT2(ADD)
190 EMIT2(MUL)
191 EMIT2(MACH)
192 EMIT2(MAC)
193 EMIT2(AND)
194 EMIT2(OR)
195 EMIT2(XOR)
196 EMIT2(DP3)
197 EMIT2(DP4)
198 EMIT2(DPH)
199 EMIT2(SHL)
200 EMIT2(SHR)
201 EMIT2(ASR)
202 vec4_instruction *CMP(dst_reg dst, src_reg src0, src_reg src1,
203 enum brw_conditional_mod condition);
204 vec4_instruction *IF(src_reg src0, src_reg src1,
205 enum brw_conditional_mod condition);
206 vec4_instruction *IF(enum brw_predicate predicate);
207 EMIT1(SCRATCH_READ)
208 EMIT2(SCRATCH_WRITE)
209 EMIT3(LRP)
210 EMIT1(BFREV)
211 EMIT3(BFE)
212 EMIT2(BFI1)
213 EMIT3(BFI2)
214 EMIT1(FBH)
215 EMIT1(FBL)
216 EMIT1(CBIT)
217 EMIT3(MAD)
218 EMIT2(ADDC)
219 EMIT2(SUBB)
220 #undef EMIT1
221 #undef EMIT2
222 #undef EMIT3
223
224 int implied_mrf_writes(vec4_instruction *inst);
225
226 vec4_instruction *emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst,
227 src_reg src0, src_reg src1);
228
229 vec4_instruction *emit_lrp(const dst_reg &dst, const src_reg &x,
230 const src_reg &y, const src_reg &a);
231
232 /**
233 * Copy any live channel from \p src to the first channel of the
234 * result.
235 */
236 src_reg emit_uniformize(const src_reg &src);
237
238 src_reg fix_3src_operand(const src_reg &src);
239 src_reg resolve_source_modifiers(const src_reg &src);
240
241 vec4_instruction *emit_math(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
242 const src_reg &src1 = src_reg());
243
244 src_reg fix_math_operand(const src_reg &src);
245
246 void emit_pack_half_2x16(dst_reg dst, src_reg src0);
247 void emit_unpack_half_2x16(dst_reg dst, src_reg src0);
248 void emit_unpack_unorm_4x8(const dst_reg &dst, src_reg src0);
249 void emit_unpack_snorm_4x8(const dst_reg &dst, src_reg src0);
250 void emit_pack_unorm_4x8(const dst_reg &dst, const src_reg &src0);
251 void emit_pack_snorm_4x8(const dst_reg &dst, const src_reg &src0);
252
253 void emit_texture(ir_texture_opcode op,
254 dst_reg dest,
255 const glsl_type *dest_type,
256 src_reg coordinate,
257 int coord_components,
258 src_reg shadow_comparitor,
259 src_reg lod, src_reg lod2,
260 src_reg sample_index,
261 uint32_t constant_offset,
262 src_reg offset_value,
263 src_reg mcs,
264 bool is_cube_array,
265 uint32_t sampler, src_reg sampler_reg);
266
267 uint32_t gather_channel(unsigned gather_component, uint32_t sampler);
268 src_reg emit_mcs_fetch(const glsl_type *coordinate_type, src_reg coordinate,
269 src_reg sampler);
270 void emit_gen6_gather_wa(uint8_t wa, dst_reg dst);
271 void swizzle_result(ir_texture_opcode op, dst_reg dest,
272 src_reg orig_val, uint32_t sampler,
273 const glsl_type *dest_type);
274
275 void emit_ndc_computation();
276 void emit_psiz_and_flags(dst_reg reg);
277 vec4_instruction *emit_generic_urb_slot(dst_reg reg, int varying);
278 virtual void emit_urb_slot(dst_reg reg, int varying);
279
280 void emit_shader_time_begin();
281 void emit_shader_time_end();
282 void emit_shader_time_write(int shader_time_subindex, src_reg value);
283
284 void emit_untyped_atomic(unsigned atomic_op, unsigned surf_index,
285 dst_reg dst, src_reg offset, src_reg src0,
286 src_reg src1);
287
288 void emit_untyped_surface_read(unsigned surf_index, dst_reg dst,
289 src_reg offset);
290
291 src_reg get_scratch_offset(bblock_t *block, vec4_instruction *inst,
292 src_reg *reladdr, int reg_offset);
293 src_reg get_pull_constant_offset(bblock_t *block, vec4_instruction *inst,
294 src_reg *reladdr, int reg_offset);
295 void emit_scratch_read(bblock_t *block, vec4_instruction *inst,
296 dst_reg dst,
297 src_reg orig_src,
298 int base_offset);
299 void emit_scratch_write(bblock_t *block, vec4_instruction *inst,
300 int base_offset);
301 void emit_pull_constant_load(bblock_t *block, vec4_instruction *inst,
302 dst_reg dst,
303 src_reg orig_src,
304 int base_offset);
305 void emit_pull_constant_load_reg(dst_reg dst,
306 src_reg surf_index,
307 src_reg offset,
308 bblock_t *before_block,
309 vec4_instruction *before_inst);
310 src_reg emit_resolve_reladdr(int scratch_loc[], bblock_t *block,
311 vec4_instruction *inst, src_reg src);
312
313 void resolve_ud_negate(src_reg *reg);
314
315 src_reg get_timestamp();
316
317 void dump_instruction(backend_instruction *inst);
318 void dump_instruction(backend_instruction *inst, FILE *file);
319
320 bool is_high_sampler(src_reg sampler);
321
322 virtual void emit_nir_code();
323 virtual void nir_setup_inputs();
324 virtual void nir_setup_uniforms();
325 virtual void nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr);
326 virtual void nir_setup_system_values();
327 virtual void nir_emit_impl(nir_function_impl *impl);
328 virtual void nir_emit_cf_list(exec_list *list);
329 virtual void nir_emit_if(nir_if *if_stmt);
330 virtual void nir_emit_loop(nir_loop *loop);
331 virtual void nir_emit_block(nir_block *block);
332 virtual void nir_emit_instr(nir_instr *instr);
333 virtual void nir_emit_load_const(nir_load_const_instr *instr);
334 virtual void nir_emit_intrinsic(nir_intrinsic_instr *instr);
335 virtual void nir_emit_alu(nir_alu_instr *instr);
336 virtual void nir_emit_jump(nir_jump_instr *instr);
337 virtual void nir_emit_texture(nir_tex_instr *instr);
338 virtual void nir_emit_undef(nir_ssa_undef_instr *instr);
339 virtual void nir_emit_ssbo_atomic(int op, nir_intrinsic_instr *instr);
340
341 dst_reg get_nir_dest(nir_dest dest, enum brw_reg_type type);
342 dst_reg get_nir_dest(nir_dest dest, nir_alu_type type);
343 dst_reg get_nir_dest(nir_dest dest);
344 src_reg get_nir_src(nir_src src, enum brw_reg_type type,
345 unsigned num_components = 4);
346 src_reg get_nir_src(nir_src src, nir_alu_type type,
347 unsigned num_components = 4);
348 src_reg get_nir_src(nir_src src,
349 unsigned num_components = 4);
350
351 virtual dst_reg *make_reg_for_system_value(int location,
352 const glsl_type *type) = 0;
353
354 dst_reg *nir_locals;
355 dst_reg *nir_ssa_values;
356 src_reg *nir_inputs;
357 dst_reg *nir_system_values;
358
359 protected:
360 void emit_vertex();
361 void lower_attributes_to_hw_regs(const int *attribute_map,
362 bool interleaved);
363 void setup_payload_interference(struct ra_graph *g, int first_payload_node,
364 int reg_node_count);
365 virtual void setup_payload() = 0;
366 virtual void emit_prolog() = 0;
367 virtual void emit_thread_end() = 0;
368 virtual void emit_urb_write_header(int mrf) = 0;
369 virtual vec4_instruction *emit_urb_write_opcode(bool complete) = 0;
370 virtual void gs_emit_vertex(int stream_id);
371 virtual void gs_end_primitive();
372
373 private:
374 /**
375 * If true, then register allocation should fail instead of spilling.
376 */
377 const bool no_spills;
378
379 int shader_time_index;
380
381 unsigned last_scratch; /**< measured in 32-byte (register size) units */
382 };
383
384
385 /**
386 * The vertex shader code generator.
387 *
388 * Translates VS IR to actual i965 assembly code.
389 */
390 class vec4_generator
391 {
392 public:
393 vec4_generator(const struct brw_compiler *compiler, void *log_data,
394 struct brw_vue_prog_data *prog_data,
395 void *mem_ctx,
396 bool debug_flag,
397 const char *stage_name,
398 const char *stage_abbrev);
399 ~vec4_generator();
400
401 const unsigned *generate_assembly(const cfg_t *cfg, unsigned *asm_size,
402 const nir_shader *nir);
403
404 private:
405 void generate_code(const cfg_t *cfg, const nir_shader *nir);
406
407 void generate_math1_gen4(vec4_instruction *inst,
408 struct brw_reg dst,
409 struct brw_reg src);
410 void generate_math2_gen4(vec4_instruction *inst,
411 struct brw_reg dst,
412 struct brw_reg src0,
413 struct brw_reg src1);
414 void generate_math_gen6(vec4_instruction *inst,
415 struct brw_reg dst,
416 struct brw_reg src0,
417 struct brw_reg src1);
418
419 void generate_tex(vec4_instruction *inst,
420 struct brw_reg dst,
421 struct brw_reg src,
422 struct brw_reg sampler_index);
423
424 void generate_vs_urb_write(vec4_instruction *inst);
425 void generate_gs_urb_write(vec4_instruction *inst);
426 void generate_gs_urb_write_allocate(vec4_instruction *inst);
427 void generate_gs_thread_end(vec4_instruction *inst);
428 void generate_gs_set_write_offset(struct brw_reg dst,
429 struct brw_reg src0,
430 struct brw_reg src1);
431 void generate_gs_set_vertex_count(struct brw_reg dst,
432 struct brw_reg src);
433 void generate_gs_svb_write(vec4_instruction *inst,
434 struct brw_reg dst,
435 struct brw_reg src0,
436 struct brw_reg src1);
437 void generate_gs_svb_set_destination_index(vec4_instruction *inst,
438 struct brw_reg dst,
439 struct brw_reg src);
440 void generate_gs_set_dword_2(struct brw_reg dst, struct brw_reg src);
441 void generate_gs_prepare_channel_masks(struct brw_reg dst);
442 void generate_gs_set_channel_masks(struct brw_reg dst, struct brw_reg src);
443 void generate_gs_get_instance_id(struct brw_reg dst);
444 void generate_gs_ff_sync_set_primitives(struct brw_reg dst,
445 struct brw_reg src0,
446 struct brw_reg src1,
447 struct brw_reg src2);
448 void generate_gs_ff_sync(vec4_instruction *inst,
449 struct brw_reg dst,
450 struct brw_reg src0,
451 struct brw_reg src1);
452 void generate_gs_set_primitive_id(struct brw_reg dst);
453 void generate_oword_dual_block_offsets(struct brw_reg m1,
454 struct brw_reg index);
455 void generate_scratch_write(vec4_instruction *inst,
456 struct brw_reg dst,
457 struct brw_reg src,
458 struct brw_reg index);
459 void generate_scratch_read(vec4_instruction *inst,
460 struct brw_reg dst,
461 struct brw_reg index);
462 void generate_pull_constant_load(vec4_instruction *inst,
463 struct brw_reg dst,
464 struct brw_reg index,
465 struct brw_reg offset);
466 void generate_pull_constant_load_gen7(vec4_instruction *inst,
467 struct brw_reg dst,
468 struct brw_reg surf_index,
469 struct brw_reg offset);
470 void generate_set_simd4x2_header_gen9(vec4_instruction *inst,
471 struct brw_reg dst);
472
473 void generate_get_buffer_size(vec4_instruction *inst,
474 struct brw_reg dst,
475 struct brw_reg src,
476 struct brw_reg index);
477
478 void generate_unpack_flags(struct brw_reg dst);
479
480 const struct brw_compiler *compiler;
481 void *log_data; /* Passed to compiler->*_log functions */
482
483 const struct brw_device_info *devinfo;
484
485 struct brw_codegen *p;
486
487 struct brw_vue_prog_data *prog_data;
488
489 void *mem_ctx;
490 const char *stage_name;
491 const char *stage_abbrev;
492 const bool debug_flag;
493 };
494
495 } /* namespace brw */
496 #endif /* __cplusplus */
497
498 #endif /* BRW_VEC4_H */