anv/gen7: Add stall and flushes before switching pipelines
[mesa.git] / src / mesa / drivers / dri / i965 / brw_vec4.h
1 /*
2 * Copyright © 2011 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #ifndef BRW_VEC4_H
25 #define BRW_VEC4_H
26
27 #include "brw_shader.h"
28 #include "brw_program.h"
29
30 #ifdef __cplusplus
31 #include "brw_ir_vec4.h"
32 #endif
33
34 #include "compiler/glsl/ir.h"
35 #include "compiler/nir/nir.h"
36
37
38 #ifdef __cplusplus
39 extern "C" {
40 #endif
41
42 const unsigned *
43 brw_vec4_generate_assembly(const struct brw_compiler *compiler,
44 void *log_data,
45 void *mem_ctx,
46 const nir_shader *nir,
47 struct brw_vue_prog_data *prog_data,
48 const struct cfg_t *cfg,
49 unsigned *out_assembly_size);
50
51 #ifdef __cplusplus
52 } /* extern "C" */
53
54 namespace brw {
55
56 class vec4_live_variables;
57
58 /**
59 * The vertex shader front-end.
60 *
61 * Translates either GLSL IR or Mesa IR (for ARB_vertex_program and
62 * fixed-function) into VS IR.
63 */
64 class vec4_visitor : public backend_shader
65 {
66 public:
67 vec4_visitor(const struct brw_compiler *compiler,
68 void *log_data,
69 const struct brw_sampler_prog_key_data *key,
70 struct brw_vue_prog_data *prog_data,
71 const nir_shader *shader,
72 void *mem_ctx,
73 bool no_spills,
74 int shader_time_index);
75 virtual ~vec4_visitor();
76
77 dst_reg dst_null_f()
78 {
79 return dst_reg(brw_null_reg());
80 }
81
82 dst_reg dst_null_d()
83 {
84 return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
85 }
86
87 dst_reg dst_null_ud()
88 {
89 return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
90 }
91
92 const struct brw_sampler_prog_key_data * const key_tex;
93 struct brw_vue_prog_data * const prog_data;
94 char *fail_msg;
95 bool failed;
96
97 /**
98 * GLSL IR currently being processed, which is associated with our
99 * driver IR instructions for debugging purposes.
100 */
101 const void *base_ir;
102 const char *current_annotation;
103
104 int first_non_payload_grf;
105 unsigned int max_grf;
106 int *virtual_grf_start;
107 int *virtual_grf_end;
108 brw::vec4_live_variables *live_intervals;
109 dst_reg userplane[MAX_CLIP_PLANES];
110
111 bool need_all_constants_in_pull_buffer;
112
113 /* Regs for vertex results. Generated at ir_variable visiting time
114 * for the ir->location's used.
115 */
116 dst_reg output_reg[BRW_VARYING_SLOT_COUNT];
117 const char *output_reg_annotation[BRW_VARYING_SLOT_COUNT];
118 int uniforms;
119
120 src_reg shader_start_time;
121
122 bool run();
123 void fail(const char *msg, ...);
124
125 int setup_uniforms(int payload_reg);
126
127 bool reg_allocate_trivial();
128 bool reg_allocate();
129 void evaluate_spill_costs(float *spill_costs, bool *no_spill);
130 int choose_spill_reg(struct ra_graph *g);
131 void spill_reg(int spill_reg);
132 void move_grf_array_access_to_scratch();
133 void move_uniform_array_access_to_pull_constants();
134 void move_push_constants_to_pull_constants();
135 void split_uniform_registers();
136 void pack_uniform_registers();
137 void calculate_live_intervals();
138 void invalidate_live_intervals();
139 void split_virtual_grfs();
140 bool opt_vector_float();
141 bool opt_reduce_swizzle();
142 bool dead_code_eliminate();
143 int var_range_start(unsigned v, unsigned n) const;
144 int var_range_end(unsigned v, unsigned n) const;
145 bool virtual_grf_interferes(int a, int b);
146 bool opt_cmod_propagation();
147 bool opt_copy_propagation(bool do_constant_prop = true);
148 bool opt_cse_local(bblock_t *block);
149 bool opt_cse();
150 bool opt_algebraic();
151 bool opt_register_coalesce();
152 bool eliminate_find_live_channel();
153 bool is_dep_ctrl_unsafe(const vec4_instruction *inst);
154 void opt_set_dependency_control();
155 void opt_schedule_instructions();
156 void convert_to_hw_regs();
157
158 vec4_instruction *emit(vec4_instruction *inst);
159
160 vec4_instruction *emit(enum opcode opcode);
161 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst);
162 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
163 const src_reg &src0);
164 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
165 const src_reg &src0, const src_reg &src1);
166 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
167 const src_reg &src0, const src_reg &src1,
168 const src_reg &src2);
169
170 vec4_instruction *emit_before(bblock_t *block,
171 vec4_instruction *inst,
172 vec4_instruction *new_inst);
173
174 #define EMIT1(op) vec4_instruction *op(const dst_reg &, const src_reg &);
175 #define EMIT2(op) vec4_instruction *op(const dst_reg &, const src_reg &, const src_reg &);
176 #define EMIT3(op) vec4_instruction *op(const dst_reg &, const src_reg &, const src_reg &, const src_reg &);
177 EMIT1(MOV)
178 EMIT1(NOT)
179 EMIT1(RNDD)
180 EMIT1(RNDE)
181 EMIT1(RNDZ)
182 EMIT1(FRC)
183 EMIT1(F32TO16)
184 EMIT1(F16TO32)
185 EMIT2(ADD)
186 EMIT2(MUL)
187 EMIT2(MACH)
188 EMIT2(MAC)
189 EMIT2(AND)
190 EMIT2(OR)
191 EMIT2(XOR)
192 EMIT2(DP3)
193 EMIT2(DP4)
194 EMIT2(DPH)
195 EMIT2(SHL)
196 EMIT2(SHR)
197 EMIT2(ASR)
198 vec4_instruction *CMP(dst_reg dst, src_reg src0, src_reg src1,
199 enum brw_conditional_mod condition);
200 vec4_instruction *IF(src_reg src0, src_reg src1,
201 enum brw_conditional_mod condition);
202 vec4_instruction *IF(enum brw_predicate predicate);
203 EMIT1(SCRATCH_READ)
204 EMIT2(SCRATCH_WRITE)
205 EMIT3(LRP)
206 EMIT1(BFREV)
207 EMIT3(BFE)
208 EMIT2(BFI1)
209 EMIT3(BFI2)
210 EMIT1(FBH)
211 EMIT1(FBL)
212 EMIT1(CBIT)
213 EMIT3(MAD)
214 EMIT2(ADDC)
215 EMIT2(SUBB)
216 #undef EMIT1
217 #undef EMIT2
218 #undef EMIT3
219
220 int implied_mrf_writes(vec4_instruction *inst);
221
222 vec4_instruction *emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst,
223 src_reg src0, src_reg src1);
224
225 vec4_instruction *emit_lrp(const dst_reg &dst, const src_reg &x,
226 const src_reg &y, const src_reg &a);
227
228 /**
229 * Copy any live channel from \p src to the first channel of the
230 * result.
231 */
232 src_reg emit_uniformize(const src_reg &src);
233
234 src_reg fix_3src_operand(const src_reg &src);
235 src_reg resolve_source_modifiers(const src_reg &src);
236
237 vec4_instruction *emit_math(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
238 const src_reg &src1 = src_reg());
239
240 src_reg fix_math_operand(const src_reg &src);
241
242 void emit_pack_half_2x16(dst_reg dst, src_reg src0);
243 void emit_unpack_half_2x16(dst_reg dst, src_reg src0);
244 void emit_unpack_unorm_4x8(const dst_reg &dst, src_reg src0);
245 void emit_unpack_snorm_4x8(const dst_reg &dst, src_reg src0);
246 void emit_pack_unorm_4x8(const dst_reg &dst, const src_reg &src0);
247 void emit_pack_snorm_4x8(const dst_reg &dst, const src_reg &src0);
248
249 void emit_texture(ir_texture_opcode op,
250 dst_reg dest,
251 const glsl_type *dest_type,
252 src_reg coordinate,
253 int coord_components,
254 src_reg shadow_comparitor,
255 src_reg lod, src_reg lod2,
256 src_reg sample_index,
257 uint32_t constant_offset,
258 src_reg offset_value,
259 src_reg mcs,
260 bool is_cube_array,
261 uint32_t surface, src_reg surface_reg,
262 uint32_t sampler, src_reg sampler_reg);
263
264 src_reg emit_mcs_fetch(const glsl_type *coordinate_type, src_reg coordinate,
265 src_reg surface);
266 void emit_gen6_gather_wa(uint8_t wa, dst_reg dst);
267
268 void emit_ndc_computation();
269 void emit_psiz_and_flags(dst_reg reg);
270 vec4_instruction *emit_generic_urb_slot(dst_reg reg, int varying);
271 virtual void emit_urb_slot(dst_reg reg, int varying);
272
273 void emit_shader_time_begin();
274 void emit_shader_time_end();
275 void emit_shader_time_write(int shader_time_subindex, src_reg value);
276
277 void emit_untyped_atomic(unsigned atomic_op, unsigned surf_index,
278 dst_reg dst, src_reg offset, src_reg src0,
279 src_reg src1);
280
281 void emit_untyped_surface_read(unsigned surf_index, dst_reg dst,
282 src_reg offset);
283
284 src_reg get_scratch_offset(bblock_t *block, vec4_instruction *inst,
285 src_reg *reladdr, int reg_offset);
286 void emit_scratch_read(bblock_t *block, vec4_instruction *inst,
287 dst_reg dst,
288 src_reg orig_src,
289 int base_offset);
290 void emit_scratch_write(bblock_t *block, vec4_instruction *inst,
291 int base_offset);
292 void emit_pull_constant_load(bblock_t *block, vec4_instruction *inst,
293 dst_reg dst,
294 src_reg orig_src,
295 int base_offset,
296 src_reg indirect);
297 void emit_pull_constant_load_reg(dst_reg dst,
298 src_reg surf_index,
299 src_reg offset,
300 bblock_t *before_block,
301 vec4_instruction *before_inst);
302 src_reg emit_resolve_reladdr(int scratch_loc[], bblock_t *block,
303 vec4_instruction *inst, src_reg src);
304
305 void resolve_ud_negate(src_reg *reg);
306
307 bool lower_minmax();
308
309 src_reg get_timestamp();
310
311 void dump_instruction(backend_instruction *inst);
312 void dump_instruction(backend_instruction *inst, FILE *file);
313
314 bool is_high_sampler(src_reg sampler);
315
316 bool optimize_predicate(nir_alu_instr *instr, enum brw_predicate *predicate);
317
318 virtual void emit_nir_code();
319 virtual void nir_setup_uniforms();
320 virtual void nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr);
321 virtual void nir_setup_system_values();
322 virtual void nir_emit_impl(nir_function_impl *impl);
323 virtual void nir_emit_cf_list(exec_list *list);
324 virtual void nir_emit_if(nir_if *if_stmt);
325 virtual void nir_emit_loop(nir_loop *loop);
326 virtual void nir_emit_block(nir_block *block);
327 virtual void nir_emit_instr(nir_instr *instr);
328 virtual void nir_emit_load_const(nir_load_const_instr *instr);
329 virtual void nir_emit_intrinsic(nir_intrinsic_instr *instr);
330 virtual void nir_emit_alu(nir_alu_instr *instr);
331 virtual void nir_emit_jump(nir_jump_instr *instr);
332 virtual void nir_emit_texture(nir_tex_instr *instr);
333 virtual void nir_emit_undef(nir_ssa_undef_instr *instr);
334 virtual void nir_emit_ssbo_atomic(int op, nir_intrinsic_instr *instr);
335
336 dst_reg get_nir_dest(nir_dest dest, enum brw_reg_type type);
337 dst_reg get_nir_dest(nir_dest dest, nir_alu_type type);
338 dst_reg get_nir_dest(nir_dest dest);
339 src_reg get_nir_src(nir_src src, enum brw_reg_type type,
340 unsigned num_components = 4);
341 src_reg get_nir_src(nir_src src, nir_alu_type type,
342 unsigned num_components = 4);
343 src_reg get_nir_src(nir_src src,
344 unsigned num_components = 4);
345 src_reg get_indirect_offset(nir_intrinsic_instr *instr);
346
347 virtual dst_reg *make_reg_for_system_value(int location,
348 const glsl_type *type) = 0;
349
350 dst_reg *nir_locals;
351 dst_reg *nir_ssa_values;
352 dst_reg *nir_system_values;
353
354 protected:
355 void emit_vertex();
356 void lower_attributes_to_hw_regs(const int *attribute_map,
357 bool interleaved);
358 void setup_payload_interference(struct ra_graph *g, int first_payload_node,
359 int reg_node_count);
360 virtual void setup_payload() = 0;
361 virtual void emit_prolog() = 0;
362 virtual void emit_thread_end() = 0;
363 virtual void emit_urb_write_header(int mrf) = 0;
364 virtual vec4_instruction *emit_urb_write_opcode(bool complete) = 0;
365 virtual void gs_emit_vertex(int stream_id);
366 virtual void gs_end_primitive();
367
368 private:
369 /**
370 * If true, then register allocation should fail instead of spilling.
371 */
372 const bool no_spills;
373
374 int shader_time_index;
375
376 unsigned last_scratch; /**< measured in 32-byte (register size) units */
377 };
378
379 } /* namespace brw */
380 #endif /* __cplusplus */
381
382 #endif /* BRW_VEC4_H */