intel: Drop program size pointer from vec4/fs assembly getters.
[mesa.git] / src / intel / compiler / brw_vec4.h
1 /*
2 * Copyright © 2011 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #ifndef BRW_VEC4_H
25 #define BRW_VEC4_H
26
27 #include "brw_shader.h"
28
29 #ifdef __cplusplus
30 #include "brw_ir_vec4.h"
31 #include "brw_vec4_builder.h"
32 #endif
33
34 #include "compiler/glsl/ir.h"
35 #include "compiler/nir/nir.h"
36
37
38 #ifdef __cplusplus
39 extern "C" {
40 #endif
41
42 const unsigned *
43 brw_vec4_generate_assembly(const struct brw_compiler *compiler,
44 void *log_data,
45 void *mem_ctx,
46 const nir_shader *nir,
47 struct brw_vue_prog_data *prog_data,
48 const struct cfg_t *cfg);
49
50 #ifdef __cplusplus
51 } /* extern "C" */
52
53 namespace brw {
54
55 class vec4_live_variables;
56
57 /**
58 * The vertex shader front-end.
59 *
60 * Translates either GLSL IR or Mesa IR (for ARB_vertex_program and
61 * fixed-function) into VS IR.
62 */
63 class vec4_visitor : public backend_shader
64 {
65 public:
66 vec4_visitor(const struct brw_compiler *compiler,
67 void *log_data,
68 const struct brw_sampler_prog_key_data *key,
69 struct brw_vue_prog_data *prog_data,
70 const nir_shader *shader,
71 void *mem_ctx,
72 bool no_spills,
73 int shader_time_index);
74
75 dst_reg dst_null_f()
76 {
77 return dst_reg(brw_null_reg());
78 }
79
80 dst_reg dst_null_df()
81 {
82 return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_DF));
83 }
84
85 dst_reg dst_null_d()
86 {
87 return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
88 }
89
90 dst_reg dst_null_ud()
91 {
92 return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
93 }
94
95 const struct brw_sampler_prog_key_data * const key_tex;
96 struct brw_vue_prog_data * const prog_data;
97 char *fail_msg;
98 bool failed;
99
100 /**
101 * GLSL IR currently being processed, which is associated with our
102 * driver IR instructions for debugging purposes.
103 */
104 const void *base_ir;
105 const char *current_annotation;
106
107 int first_non_payload_grf;
108 unsigned int max_grf;
109 int *virtual_grf_start;
110 int *virtual_grf_end;
111 brw::vec4_live_variables *live_intervals;
112 dst_reg userplane[MAX_CLIP_PLANES];
113
114 bool need_all_constants_in_pull_buffer;
115
116 /* Regs for vertex results. Generated at ir_variable visiting time
117 * for the ir->location's used.
118 */
119 dst_reg output_reg[VARYING_SLOT_TESS_MAX][4];
120 unsigned output_num_components[VARYING_SLOT_TESS_MAX][4];
121 const char *output_reg_annotation[VARYING_SLOT_TESS_MAX];
122 int uniforms;
123
124 src_reg shader_start_time;
125
126 bool run();
127 void fail(const char *msg, ...);
128
129 int setup_uniforms(int payload_reg);
130
131 bool reg_allocate_trivial();
132 bool reg_allocate();
133 void evaluate_spill_costs(float *spill_costs, bool *no_spill);
134 int choose_spill_reg(struct ra_graph *g);
135 void spill_reg(int spill_reg);
136 void move_grf_array_access_to_scratch();
137 void move_uniform_array_access_to_pull_constants();
138 void move_push_constants_to_pull_constants();
139 void split_uniform_registers();
140 void pack_uniform_registers();
141 void calculate_live_intervals();
142 void invalidate_live_intervals();
143 void split_virtual_grfs();
144 bool opt_vector_float();
145 bool opt_reduce_swizzle();
146 bool dead_code_eliminate();
147 int var_range_start(unsigned v, unsigned n) const;
148 int var_range_end(unsigned v, unsigned n) const;
149 bool virtual_grf_interferes(int a, int b);
150 bool opt_cmod_propagation();
151 bool opt_copy_propagation(bool do_constant_prop = true);
152 bool opt_cse_local(bblock_t *block);
153 bool opt_cse();
154 bool opt_algebraic();
155 bool opt_register_coalesce();
156 bool eliminate_find_live_channel();
157 bool is_dep_ctrl_unsafe(const vec4_instruction *inst);
158 void opt_set_dependency_control();
159 void opt_schedule_instructions();
160 void convert_to_hw_regs();
161
162 bool is_supported_64bit_region(vec4_instruction *inst, unsigned arg);
163 bool lower_simd_width();
164 bool scalarize_df();
165 bool lower_64bit_mad_to_mul_add();
166 void apply_logical_swizzle(struct brw_reg *hw_reg,
167 vec4_instruction *inst, int arg);
168
169 vec4_instruction *emit(vec4_instruction *inst);
170
171 vec4_instruction *emit(enum opcode opcode);
172 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst);
173 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
174 const src_reg &src0);
175 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
176 const src_reg &src0, const src_reg &src1);
177 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
178 const src_reg &src0, const src_reg &src1,
179 const src_reg &src2);
180
181 vec4_instruction *emit_before(bblock_t *block,
182 vec4_instruction *inst,
183 vec4_instruction *new_inst);
184
185 #define EMIT1(op) vec4_instruction *op(const dst_reg &, const src_reg &);
186 #define EMIT2(op) vec4_instruction *op(const dst_reg &, const src_reg &, const src_reg &);
187 #define EMIT3(op) vec4_instruction *op(const dst_reg &, const src_reg &, const src_reg &, const src_reg &);
188 EMIT1(MOV)
189 EMIT1(NOT)
190 EMIT1(RNDD)
191 EMIT1(RNDE)
192 EMIT1(RNDZ)
193 EMIT1(FRC)
194 EMIT1(F32TO16)
195 EMIT1(F16TO32)
196 EMIT2(ADD)
197 EMIT2(MUL)
198 EMIT2(MACH)
199 EMIT2(MAC)
200 EMIT2(AND)
201 EMIT2(OR)
202 EMIT2(XOR)
203 EMIT2(DP3)
204 EMIT2(DP4)
205 EMIT2(DPH)
206 EMIT2(SHL)
207 EMIT2(SHR)
208 EMIT2(ASR)
209 vec4_instruction *CMP(dst_reg dst, src_reg src0, src_reg src1,
210 enum brw_conditional_mod condition);
211 vec4_instruction *IF(src_reg src0, src_reg src1,
212 enum brw_conditional_mod condition);
213 vec4_instruction *IF(enum brw_predicate predicate);
214 EMIT1(SCRATCH_READ)
215 EMIT2(SCRATCH_WRITE)
216 EMIT3(LRP)
217 EMIT1(BFREV)
218 EMIT3(BFE)
219 EMIT2(BFI1)
220 EMIT3(BFI2)
221 EMIT1(FBH)
222 EMIT1(FBL)
223 EMIT1(CBIT)
224 EMIT3(MAD)
225 EMIT2(ADDC)
226 EMIT2(SUBB)
227 EMIT1(DIM)
228
229 #undef EMIT1
230 #undef EMIT2
231 #undef EMIT3
232
233 int implied_mrf_writes(vec4_instruction *inst);
234
235 vec4_instruction *emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst,
236 src_reg src0, src_reg src1);
237
238 vec4_instruction *emit_lrp(const dst_reg &dst, const src_reg &x,
239 const src_reg &y, const src_reg &a);
240
241 /**
242 * Copy any live channel from \p src to the first channel of the
243 * result.
244 */
245 src_reg emit_uniformize(const src_reg &src);
246
247 src_reg fix_3src_operand(const src_reg &src);
248 src_reg resolve_source_modifiers(const src_reg &src);
249
250 vec4_instruction *emit_math(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
251 const src_reg &src1 = src_reg());
252
253 src_reg fix_math_operand(const src_reg &src);
254
255 void emit_pack_half_2x16(dst_reg dst, src_reg src0);
256 void emit_unpack_half_2x16(dst_reg dst, src_reg src0);
257 void emit_unpack_unorm_4x8(const dst_reg &dst, src_reg src0);
258 void emit_unpack_snorm_4x8(const dst_reg &dst, src_reg src0);
259 void emit_pack_unorm_4x8(const dst_reg &dst, const src_reg &src0);
260 void emit_pack_snorm_4x8(const dst_reg &dst, const src_reg &src0);
261
262 void emit_texture(ir_texture_opcode op,
263 dst_reg dest,
264 const glsl_type *dest_type,
265 src_reg coordinate,
266 int coord_components,
267 src_reg shadow_comparator,
268 src_reg lod, src_reg lod2,
269 src_reg sample_index,
270 uint32_t constant_offset,
271 src_reg offset_value,
272 src_reg mcs,
273 uint32_t surface, src_reg surface_reg,
274 src_reg sampler_reg);
275
276 src_reg emit_mcs_fetch(const glsl_type *coordinate_type, src_reg coordinate,
277 src_reg surface);
278 void emit_gen6_gather_wa(uint8_t wa, dst_reg dst);
279
280 void emit_ndc_computation();
281 void emit_psiz_and_flags(dst_reg reg);
282 vec4_instruction *emit_generic_urb_slot(dst_reg reg, int varying, int comp);
283 virtual void emit_urb_slot(dst_reg reg, int varying);
284
285 void emit_shader_time_begin();
286 void emit_shader_time_end();
287 void emit_shader_time_write(int shader_time_subindex, src_reg value);
288
289 src_reg get_scratch_offset(bblock_t *block, vec4_instruction *inst,
290 src_reg *reladdr, int reg_offset);
291 void emit_scratch_read(bblock_t *block, vec4_instruction *inst,
292 dst_reg dst,
293 src_reg orig_src,
294 int base_offset);
295 void emit_scratch_write(bblock_t *block, vec4_instruction *inst,
296 int base_offset);
297 void emit_pull_constant_load(bblock_t *block, vec4_instruction *inst,
298 dst_reg dst,
299 src_reg orig_src,
300 int base_offset,
301 src_reg indirect);
302 void emit_pull_constant_load_reg(dst_reg dst,
303 src_reg surf_index,
304 src_reg offset,
305 bblock_t *before_block,
306 vec4_instruction *before_inst);
307 src_reg emit_resolve_reladdr(int scratch_loc[], bblock_t *block,
308 vec4_instruction *inst, src_reg src);
309
310 void resolve_ud_negate(src_reg *reg);
311
312 bool lower_minmax();
313
314 src_reg get_timestamp();
315
316 void dump_instruction(backend_instruction *inst);
317 void dump_instruction(backend_instruction *inst, FILE *file);
318
319 bool is_high_sampler(src_reg sampler);
320
321 bool optimize_predicate(nir_alu_instr *instr, enum brw_predicate *predicate);
322
323 void emit_conversion_from_double(dst_reg dst, src_reg src, bool saturate);
324 void emit_conversion_to_double(dst_reg dst, src_reg src, bool saturate);
325
326 vec4_instruction *shuffle_64bit_data(dst_reg dst, src_reg src,
327 bool for_write,
328 bblock_t *block = NULL,
329 vec4_instruction *ref = NULL);
330
331 virtual void emit_nir_code();
332 virtual void nir_setup_uniforms();
333 virtual void nir_emit_impl(nir_function_impl *impl);
334 virtual void nir_emit_cf_list(exec_list *list);
335 virtual void nir_emit_if(nir_if *if_stmt);
336 virtual void nir_emit_loop(nir_loop *loop);
337 virtual void nir_emit_block(nir_block *block);
338 virtual void nir_emit_instr(nir_instr *instr);
339 virtual void nir_emit_load_const(nir_load_const_instr *instr);
340 virtual void nir_emit_intrinsic(nir_intrinsic_instr *instr);
341 virtual void nir_emit_alu(nir_alu_instr *instr);
342 virtual void nir_emit_jump(nir_jump_instr *instr);
343 virtual void nir_emit_texture(nir_tex_instr *instr);
344 virtual void nir_emit_undef(nir_ssa_undef_instr *instr);
345 virtual void nir_emit_ssbo_atomic(int op, nir_intrinsic_instr *instr);
346
347 dst_reg get_nir_dest(const nir_dest &dest, enum brw_reg_type type);
348 dst_reg get_nir_dest(const nir_dest &dest, nir_alu_type type);
349 dst_reg get_nir_dest(const nir_dest &dest);
350 src_reg get_nir_src(const nir_src &src, enum brw_reg_type type,
351 unsigned num_components = 4);
352 src_reg get_nir_src(const nir_src &src, nir_alu_type type,
353 unsigned num_components = 4);
354 src_reg get_nir_src(const nir_src &src,
355 unsigned num_components = 4);
356 src_reg get_indirect_offset(nir_intrinsic_instr *instr);
357
358 dst_reg *nir_locals;
359 dst_reg *nir_ssa_values;
360
361 protected:
362 void emit_vertex();
363 void setup_payload_interference(struct ra_graph *g, int first_payload_node,
364 int reg_node_count);
365 virtual void setup_payload() = 0;
366 virtual void emit_prolog() = 0;
367 virtual void emit_thread_end() = 0;
368 virtual void emit_urb_write_header(int mrf) = 0;
369 virtual vec4_instruction *emit_urb_write_opcode(bool complete) = 0;
370 virtual void gs_emit_vertex(int stream_id);
371 virtual void gs_end_primitive();
372
373 private:
374 /**
375 * If true, then register allocation should fail instead of spilling.
376 */
377 const bool no_spills;
378
379 int shader_time_index;
380
381 unsigned last_scratch; /**< measured in 32-byte (register size) units */
382 };
383
384 } /* namespace brw */
385 #endif /* __cplusplus */
386
387 #endif /* BRW_VEC4_H */