intel/compiler: Drop nir_lower_to_source_mods() and related handling.
[mesa.git] / src / intel / compiler / brw_vec4.h
1 /*
2 * Copyright © 2011 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #ifndef BRW_VEC4_H
25 #define BRW_VEC4_H
26
27 #include "brw_shader.h"
28
29 #ifdef __cplusplus
30 #include "brw_ir_vec4.h"
31 #include "brw_vec4_builder.h"
32 #include "brw_vec4_live_variables.h"
33 #endif
34
35 #include "compiler/glsl/ir.h"
36 #include "compiler/nir/nir.h"
37
38
39 #ifdef __cplusplus
40 extern "C" {
41 #endif
42
43 const unsigned *
44 brw_vec4_generate_assembly(const struct brw_compiler *compiler,
45 void *log_data,
46 void *mem_ctx,
47 const nir_shader *nir,
48 struct brw_vue_prog_data *prog_data,
49 const struct cfg_t *cfg,
50 struct brw_compile_stats *stats);
51
52 #ifdef __cplusplus
53 } /* extern "C" */
54
55 namespace brw {
56 /**
57 * The vertex shader front-end.
58 *
59 * Translates either GLSL IR or Mesa IR (for ARB_vertex_program and
60 * fixed-function) into VS IR.
61 */
62 class vec4_visitor : public backend_shader
63 {
64 public:
65 vec4_visitor(const struct brw_compiler *compiler,
66 void *log_data,
67 const struct brw_sampler_prog_key_data *key,
68 struct brw_vue_prog_data *prog_data,
69 const nir_shader *shader,
70 void *mem_ctx,
71 bool no_spills,
72 int shader_time_index);
73
74 dst_reg dst_null_f()
75 {
76 return dst_reg(brw_null_reg());
77 }
78
79 dst_reg dst_null_df()
80 {
81 return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_DF));
82 }
83
84 dst_reg dst_null_d()
85 {
86 return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
87 }
88
89 dst_reg dst_null_ud()
90 {
91 return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
92 }
93
94 const struct brw_sampler_prog_key_data * const key_tex;
95 struct brw_vue_prog_data * const prog_data;
96 char *fail_msg;
97 bool failed;
98
99 /**
100 * GLSL IR currently being processed, which is associated with our
101 * driver IR instructions for debugging purposes.
102 */
103 const void *base_ir;
104 const char *current_annotation;
105
106 int first_non_payload_grf;
107 unsigned int max_grf;
108 BRW_ANALYSIS(live_analysis, brw::vec4_live_variables,
109 backend_shader *) live_analysis;
110
111 bool need_all_constants_in_pull_buffer;
112
113 /* Regs for vertex results. Generated at ir_variable visiting time
114 * for the ir->location's used.
115 */
116 dst_reg output_reg[VARYING_SLOT_TESS_MAX][4];
117 unsigned output_num_components[VARYING_SLOT_TESS_MAX][4];
118 const char *output_reg_annotation[VARYING_SLOT_TESS_MAX];
119 int uniforms;
120
121 src_reg shader_start_time;
122
123 bool run();
124 void fail(const char *msg, ...);
125
126 int setup_uniforms(int payload_reg);
127
128 bool reg_allocate_trivial();
129 bool reg_allocate();
130 void evaluate_spill_costs(float *spill_costs, bool *no_spill);
131 int choose_spill_reg(struct ra_graph *g);
132 void spill_reg(unsigned spill_reg);
133 void move_grf_array_access_to_scratch();
134 void move_uniform_array_access_to_pull_constants();
135 void move_push_constants_to_pull_constants();
136 void split_uniform_registers();
137 void pack_uniform_registers();
138 virtual void invalidate_analysis(brw::analysis_dependency_class c);
139 void split_virtual_grfs();
140 bool opt_vector_float();
141 bool opt_reduce_swizzle();
142 bool dead_code_eliminate();
143 bool opt_cmod_propagation();
144 bool opt_copy_propagation(bool do_constant_prop = true);
145 bool opt_cse_local(bblock_t *block, const vec4_live_variables &live);
146 bool opt_cse();
147 bool opt_algebraic();
148 bool opt_register_coalesce();
149 bool eliminate_find_live_channel();
150 bool is_dep_ctrl_unsafe(const vec4_instruction *inst);
151 void opt_set_dependency_control();
152 void opt_schedule_instructions();
153 void convert_to_hw_regs();
154 void fixup_3src_null_dest();
155
156 bool is_supported_64bit_region(vec4_instruction *inst, unsigned arg);
157 bool lower_simd_width();
158 bool scalarize_df();
159 bool lower_64bit_mad_to_mul_add();
160 void apply_logical_swizzle(struct brw_reg *hw_reg,
161 vec4_instruction *inst, int arg);
162
163 vec4_instruction *emit(vec4_instruction *inst);
164
165 vec4_instruction *emit(enum opcode opcode);
166 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst);
167 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
168 const src_reg &src0);
169 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
170 const src_reg &src0, const src_reg &src1);
171 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
172 const src_reg &src0, const src_reg &src1,
173 const src_reg &src2);
174
175 vec4_instruction *emit_before(bblock_t *block,
176 vec4_instruction *inst,
177 vec4_instruction *new_inst);
178
179 #define EMIT1(op) vec4_instruction *op(const dst_reg &, const src_reg &);
180 #define EMIT2(op) vec4_instruction *op(const dst_reg &, const src_reg &, const src_reg &);
181 #define EMIT3(op) vec4_instruction *op(const dst_reg &, const src_reg &, const src_reg &, const src_reg &);
182 EMIT1(MOV)
183 EMIT1(NOT)
184 EMIT1(RNDD)
185 EMIT1(RNDE)
186 EMIT1(RNDZ)
187 EMIT1(FRC)
188 EMIT1(F32TO16)
189 EMIT1(F16TO32)
190 EMIT2(ADD)
191 EMIT2(MUL)
192 EMIT2(MACH)
193 EMIT2(MAC)
194 EMIT2(AND)
195 EMIT2(OR)
196 EMIT2(XOR)
197 EMIT2(DP3)
198 EMIT2(DP4)
199 EMIT2(DPH)
200 EMIT2(SHL)
201 EMIT2(SHR)
202 EMIT2(ASR)
203 vec4_instruction *CMP(dst_reg dst, src_reg src0, src_reg src1,
204 enum brw_conditional_mod condition);
205 vec4_instruction *IF(src_reg src0, src_reg src1,
206 enum brw_conditional_mod condition);
207 vec4_instruction *IF(enum brw_predicate predicate);
208 EMIT1(SCRATCH_READ)
209 EMIT2(SCRATCH_WRITE)
210 EMIT3(LRP)
211 EMIT1(BFREV)
212 EMIT3(BFE)
213 EMIT2(BFI1)
214 EMIT3(BFI2)
215 EMIT1(FBH)
216 EMIT1(FBL)
217 EMIT1(CBIT)
218 EMIT3(MAD)
219 EMIT2(ADDC)
220 EMIT2(SUBB)
221 EMIT1(DIM)
222
223 #undef EMIT1
224 #undef EMIT2
225 #undef EMIT3
226
227 vec4_instruction *emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst,
228 src_reg src0, src_reg src1);
229
230 /**
231 * Copy any live channel from \p src to the first channel of the
232 * result.
233 */
234 src_reg emit_uniformize(const src_reg &src);
235
236 /** Fix all float operands of a 3-source instruction. */
237 void fix_float_operands(src_reg op[3], nir_alu_instr *instr);
238
239 src_reg fix_3src_operand(const src_reg &src);
240 src_reg resolve_source_modifiers(const src_reg &src);
241
242 vec4_instruction *emit_math(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
243 const src_reg &src1 = src_reg());
244
245 src_reg fix_math_operand(const src_reg &src);
246
247 void emit_pack_half_2x16(dst_reg dst, src_reg src0);
248 void emit_unpack_half_2x16(dst_reg dst, src_reg src0);
249 void emit_unpack_unorm_4x8(const dst_reg &dst, src_reg src0);
250 void emit_unpack_snorm_4x8(const dst_reg &dst, src_reg src0);
251 void emit_pack_unorm_4x8(const dst_reg &dst, const src_reg &src0);
252 void emit_pack_snorm_4x8(const dst_reg &dst, const src_reg &src0);
253
254 void emit_texture(ir_texture_opcode op,
255 dst_reg dest,
256 const glsl_type *dest_type,
257 src_reg coordinate,
258 int coord_components,
259 src_reg shadow_comparator,
260 src_reg lod, src_reg lod2,
261 src_reg sample_index,
262 uint32_t constant_offset,
263 src_reg offset_value,
264 src_reg mcs,
265 uint32_t surface, src_reg surface_reg,
266 src_reg sampler_reg);
267
268 src_reg emit_mcs_fetch(const glsl_type *coordinate_type, src_reg coordinate,
269 src_reg surface);
270 void emit_gen6_gather_wa(uint8_t wa, dst_reg dst);
271
272 void emit_ndc_computation();
273 void emit_psiz_and_flags(dst_reg reg);
274 vec4_instruction *emit_generic_urb_slot(dst_reg reg, int varying, int comp);
275 virtual void emit_urb_slot(dst_reg reg, int varying);
276
277 void emit_shader_time_begin();
278 void emit_shader_time_end();
279 void emit_shader_time_write(int shader_time_subindex, src_reg value);
280
281 src_reg get_scratch_offset(bblock_t *block, vec4_instruction *inst,
282 src_reg *reladdr, int reg_offset);
283 void emit_scratch_read(bblock_t *block, vec4_instruction *inst,
284 dst_reg dst,
285 src_reg orig_src,
286 int base_offset);
287 void emit_scratch_write(bblock_t *block, vec4_instruction *inst,
288 int base_offset);
289 void emit_pull_constant_load(bblock_t *block, vec4_instruction *inst,
290 dst_reg dst,
291 src_reg orig_src,
292 int base_offset,
293 src_reg indirect);
294 void emit_pull_constant_load_reg(dst_reg dst,
295 src_reg surf_index,
296 src_reg offset,
297 bblock_t *before_block,
298 vec4_instruction *before_inst);
299 src_reg emit_resolve_reladdr(int scratch_loc[], bblock_t *block,
300 vec4_instruction *inst, src_reg src);
301
302 void resolve_ud_negate(src_reg *reg);
303
304 bool lower_minmax();
305
306 src_reg get_timestamp();
307
308 void dump_instruction(const backend_instruction *inst) const;
309 void dump_instruction(const backend_instruction *inst, FILE *file) const;
310
311 bool is_high_sampler(src_reg sampler);
312
313 bool optimize_predicate(nir_alu_instr *instr, enum brw_predicate *predicate);
314
315 void emit_conversion_from_double(dst_reg dst, src_reg src);
316 void emit_conversion_to_double(dst_reg dst, src_reg src);
317
318 vec4_instruction *shuffle_64bit_data(dst_reg dst, src_reg src,
319 bool for_write,
320 bblock_t *block = NULL,
321 vec4_instruction *ref = NULL);
322
323 virtual void emit_nir_code();
324 virtual void nir_setup_uniforms();
325 virtual void nir_emit_impl(nir_function_impl *impl);
326 virtual void nir_emit_cf_list(exec_list *list);
327 virtual void nir_emit_if(nir_if *if_stmt);
328 virtual void nir_emit_loop(nir_loop *loop);
329 virtual void nir_emit_block(nir_block *block);
330 virtual void nir_emit_instr(nir_instr *instr);
331 virtual void nir_emit_load_const(nir_load_const_instr *instr);
332 src_reg get_nir_ssbo_intrinsic_index(nir_intrinsic_instr *instr);
333 virtual void nir_emit_intrinsic(nir_intrinsic_instr *instr);
334 virtual void nir_emit_alu(nir_alu_instr *instr);
335 virtual void nir_emit_jump(nir_jump_instr *instr);
336 virtual void nir_emit_texture(nir_tex_instr *instr);
337 virtual void nir_emit_undef(nir_ssa_undef_instr *instr);
338 virtual void nir_emit_ssbo_atomic(int op, nir_intrinsic_instr *instr);
339
340 dst_reg get_nir_dest(const nir_dest &dest, enum brw_reg_type type);
341 dst_reg get_nir_dest(const nir_dest &dest, nir_alu_type type);
342 dst_reg get_nir_dest(const nir_dest &dest);
343 src_reg get_nir_src(const nir_src &src, enum brw_reg_type type,
344 unsigned num_components = 4);
345 src_reg get_nir_src(const nir_src &src, nir_alu_type type,
346 unsigned num_components = 4);
347 src_reg get_nir_src(const nir_src &src,
348 unsigned num_components = 4);
349 src_reg get_nir_src_imm(const nir_src &src);
350 src_reg get_indirect_offset(nir_intrinsic_instr *instr);
351
352 dst_reg *nir_locals;
353 dst_reg *nir_ssa_values;
354
355 protected:
356 void emit_vertex();
357 void setup_payload_interference(struct ra_graph *g, int first_payload_node,
358 int reg_node_count);
359 virtual void setup_payload() = 0;
360 virtual void emit_prolog() = 0;
361 virtual void emit_thread_end() = 0;
362 virtual void emit_urb_write_header(int mrf) = 0;
363 virtual vec4_instruction *emit_urb_write_opcode(bool complete) = 0;
364 virtual void gs_emit_vertex(int stream_id);
365 virtual void gs_end_primitive();
366
367 private:
368 /**
369 * If true, then register allocation should fail instead of spilling.
370 */
371 const bool no_spills;
372
373 int shader_time_index;
374
375 unsigned last_scratch; /**< measured in 32-byte (register size) units */
376 };
377
378 } /* namespace brw */
379 #endif /* __cplusplus */
380
381 #endif /* BRW_VEC4_H */