i965/vec4: Move vec4_generator class definition into the .cpp file.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_vec4.h
1 /*
2 * Copyright © 2011 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #ifndef BRW_VEC4_H
25 #define BRW_VEC4_H
26
27 #include <stdint.h>
28 #include "brw_shader.h"
29 #include "main/compiler.h"
30 #include "program/hash_table.h"
31 #include "brw_program.h"
32
33 #ifdef __cplusplus
34 #include "brw_ir_vec4.h"
35
36 extern "C" {
37 #endif
38
39 #include "brw_context.h"
40 #include "brw_eu.h"
41 #include "intel_asm_annotation.h"
42
43 #ifdef __cplusplus
44 }; /* extern "C" */
45 #endif
46
47 #include "glsl/ir.h"
48 #include "glsl/nir/nir.h"
49
50
51 #ifdef __cplusplus
52 extern "C" {
53 #endif
54
55 const unsigned *
56 brw_vec4_generate_assembly(const struct brw_compiler *compiler,
57 void *log_data,
58 void *mem_ctx,
59 const nir_shader *nir,
60 struct brw_vue_prog_data *prog_data,
61 const struct cfg_t *cfg,
62 unsigned *out_assembly_size);
63
64 #ifdef __cplusplus
65 } /* extern "C" */
66
67 namespace brw {
68
69 class vec4_live_variables;
70
71 /**
72 * The vertex shader front-end.
73 *
74 * Translates either GLSL IR or Mesa IR (for ARB_vertex_program and
75 * fixed-function) into VS IR.
76 */
77 class vec4_visitor : public backend_shader
78 {
79 public:
80 vec4_visitor(const struct brw_compiler *compiler,
81 void *log_data,
82 const struct brw_sampler_prog_key_data *key,
83 struct brw_vue_prog_data *prog_data,
84 const nir_shader *shader,
85 void *mem_ctx,
86 bool no_spills,
87 int shader_time_index);
88 virtual ~vec4_visitor();
89
90 dst_reg dst_null_f()
91 {
92 return dst_reg(brw_null_reg());
93 }
94
95 dst_reg dst_null_d()
96 {
97 return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
98 }
99
100 dst_reg dst_null_ud()
101 {
102 return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
103 }
104
105 const struct brw_sampler_prog_key_data * const key_tex;
106 struct brw_vue_prog_data * const prog_data;
107 char *fail_msg;
108 bool failed;
109
110 /**
111 * GLSL IR currently being processed, which is associated with our
112 * driver IR instructions for debugging purposes.
113 */
114 const void *base_ir;
115 const char *current_annotation;
116
117 int first_non_payload_grf;
118 unsigned int max_grf;
119 int *virtual_grf_start;
120 int *virtual_grf_end;
121 brw::vec4_live_variables *live_intervals;
122 dst_reg userplane[MAX_CLIP_PLANES];
123
124 bool need_all_constants_in_pull_buffer;
125
126 /* Regs for vertex results. Generated at ir_variable visiting time
127 * for the ir->location's used.
128 */
129 dst_reg output_reg[BRW_VARYING_SLOT_COUNT];
130 const char *output_reg_annotation[BRW_VARYING_SLOT_COUNT];
131 int *uniform_size;
132 int uniform_array_size; /*< Size of the uniform_size array */
133 int uniforms;
134
135 src_reg shader_start_time;
136
137 bool run();
138 void fail(const char *msg, ...);
139
140 int setup_uniforms(int payload_reg);
141
142 bool reg_allocate_trivial();
143 bool reg_allocate();
144 void evaluate_spill_costs(float *spill_costs, bool *no_spill);
145 int choose_spill_reg(struct ra_graph *g);
146 void spill_reg(int spill_reg);
147 void move_grf_array_access_to_scratch();
148 void move_uniform_array_access_to_pull_constants();
149 void move_push_constants_to_pull_constants();
150 void split_uniform_registers();
151 void pack_uniform_registers();
152 void calculate_live_intervals();
153 void invalidate_live_intervals();
154 void split_virtual_grfs();
155 bool opt_vector_float();
156 bool opt_reduce_swizzle();
157 bool dead_code_eliminate();
158 int var_range_start(unsigned v, unsigned n) const;
159 int var_range_end(unsigned v, unsigned n) const;
160 bool virtual_grf_interferes(int a, int b);
161 bool opt_cmod_propagation();
162 bool opt_copy_propagation(bool do_constant_prop = true);
163 bool opt_cse_local(bblock_t *block);
164 bool opt_cse();
165 bool opt_algebraic();
166 bool opt_register_coalesce();
167 bool eliminate_find_live_channel();
168 bool is_dep_ctrl_unsafe(const vec4_instruction *inst);
169 void opt_set_dependency_control();
170 void opt_schedule_instructions();
171 void convert_to_hw_regs();
172
173 vec4_instruction *emit(vec4_instruction *inst);
174
175 vec4_instruction *emit(enum opcode opcode);
176 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst);
177 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
178 const src_reg &src0);
179 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
180 const src_reg &src0, const src_reg &src1);
181 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
182 const src_reg &src0, const src_reg &src1,
183 const src_reg &src2);
184
185 vec4_instruction *emit_before(bblock_t *block,
186 vec4_instruction *inst,
187 vec4_instruction *new_inst);
188
189 #define EMIT1(op) vec4_instruction *op(const dst_reg &, const src_reg &);
190 #define EMIT2(op) vec4_instruction *op(const dst_reg &, const src_reg &, const src_reg &);
191 #define EMIT3(op) vec4_instruction *op(const dst_reg &, const src_reg &, const src_reg &, const src_reg &);
192 EMIT1(MOV)
193 EMIT1(NOT)
194 EMIT1(RNDD)
195 EMIT1(RNDE)
196 EMIT1(RNDZ)
197 EMIT1(FRC)
198 EMIT1(F32TO16)
199 EMIT1(F16TO32)
200 EMIT2(ADD)
201 EMIT2(MUL)
202 EMIT2(MACH)
203 EMIT2(MAC)
204 EMIT2(AND)
205 EMIT2(OR)
206 EMIT2(XOR)
207 EMIT2(DP3)
208 EMIT2(DP4)
209 EMIT2(DPH)
210 EMIT2(SHL)
211 EMIT2(SHR)
212 EMIT2(ASR)
213 vec4_instruction *CMP(dst_reg dst, src_reg src0, src_reg src1,
214 enum brw_conditional_mod condition);
215 vec4_instruction *IF(src_reg src0, src_reg src1,
216 enum brw_conditional_mod condition);
217 vec4_instruction *IF(enum brw_predicate predicate);
218 EMIT1(SCRATCH_READ)
219 EMIT2(SCRATCH_WRITE)
220 EMIT3(LRP)
221 EMIT1(BFREV)
222 EMIT3(BFE)
223 EMIT2(BFI1)
224 EMIT3(BFI2)
225 EMIT1(FBH)
226 EMIT1(FBL)
227 EMIT1(CBIT)
228 EMIT3(MAD)
229 EMIT2(ADDC)
230 EMIT2(SUBB)
231 #undef EMIT1
232 #undef EMIT2
233 #undef EMIT3
234
235 int implied_mrf_writes(vec4_instruction *inst);
236
237 vec4_instruction *emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst,
238 src_reg src0, src_reg src1);
239
240 vec4_instruction *emit_lrp(const dst_reg &dst, const src_reg &x,
241 const src_reg &y, const src_reg &a);
242
243 /**
244 * Copy any live channel from \p src to the first channel of the
245 * result.
246 */
247 src_reg emit_uniformize(const src_reg &src);
248
249 src_reg fix_3src_operand(const src_reg &src);
250 src_reg resolve_source_modifiers(const src_reg &src);
251
252 vec4_instruction *emit_math(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
253 const src_reg &src1 = src_reg());
254
255 src_reg fix_math_operand(const src_reg &src);
256
257 void emit_pack_half_2x16(dst_reg dst, src_reg src0);
258 void emit_unpack_half_2x16(dst_reg dst, src_reg src0);
259 void emit_unpack_unorm_4x8(const dst_reg &dst, src_reg src0);
260 void emit_unpack_snorm_4x8(const dst_reg &dst, src_reg src0);
261 void emit_pack_unorm_4x8(const dst_reg &dst, const src_reg &src0);
262 void emit_pack_snorm_4x8(const dst_reg &dst, const src_reg &src0);
263
264 void emit_texture(ir_texture_opcode op,
265 dst_reg dest,
266 const glsl_type *dest_type,
267 src_reg coordinate,
268 int coord_components,
269 src_reg shadow_comparitor,
270 src_reg lod, src_reg lod2,
271 src_reg sample_index,
272 uint32_t constant_offset,
273 src_reg offset_value,
274 src_reg mcs,
275 bool is_cube_array,
276 uint32_t sampler, src_reg sampler_reg);
277
278 uint32_t gather_channel(unsigned gather_component, uint32_t sampler);
279 src_reg emit_mcs_fetch(const glsl_type *coordinate_type, src_reg coordinate,
280 src_reg sampler);
281 void emit_gen6_gather_wa(uint8_t wa, dst_reg dst);
282 void swizzle_result(ir_texture_opcode op, dst_reg dest,
283 src_reg orig_val, uint32_t sampler,
284 const glsl_type *dest_type);
285
286 void emit_ndc_computation();
287 void emit_psiz_and_flags(dst_reg reg);
288 vec4_instruction *emit_generic_urb_slot(dst_reg reg, int varying);
289 virtual void emit_urb_slot(dst_reg reg, int varying);
290
291 void emit_shader_time_begin();
292 void emit_shader_time_end();
293 void emit_shader_time_write(int shader_time_subindex, src_reg value);
294
295 void emit_untyped_atomic(unsigned atomic_op, unsigned surf_index,
296 dst_reg dst, src_reg offset, src_reg src0,
297 src_reg src1);
298
299 void emit_untyped_surface_read(unsigned surf_index, dst_reg dst,
300 src_reg offset);
301
302 src_reg get_scratch_offset(bblock_t *block, vec4_instruction *inst,
303 src_reg *reladdr, int reg_offset);
304 src_reg get_pull_constant_offset(bblock_t *block, vec4_instruction *inst,
305 src_reg *reladdr, int reg_offset);
306 void emit_scratch_read(bblock_t *block, vec4_instruction *inst,
307 dst_reg dst,
308 src_reg orig_src,
309 int base_offset);
310 void emit_scratch_write(bblock_t *block, vec4_instruction *inst,
311 int base_offset);
312 void emit_pull_constant_load(bblock_t *block, vec4_instruction *inst,
313 dst_reg dst,
314 src_reg orig_src,
315 int base_offset);
316 void emit_pull_constant_load_reg(dst_reg dst,
317 src_reg surf_index,
318 src_reg offset,
319 bblock_t *before_block,
320 vec4_instruction *before_inst);
321 src_reg emit_resolve_reladdr(int scratch_loc[], bblock_t *block,
322 vec4_instruction *inst, src_reg src);
323
324 void resolve_ud_negate(src_reg *reg);
325
326 src_reg get_timestamp();
327
328 void dump_instruction(backend_instruction *inst);
329 void dump_instruction(backend_instruction *inst, FILE *file);
330
331 bool is_high_sampler(src_reg sampler);
332
333 virtual void emit_nir_code();
334 virtual void nir_setup_inputs();
335 virtual void nir_setup_uniforms();
336 virtual void nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr);
337 virtual void nir_setup_system_values();
338 virtual void nir_emit_impl(nir_function_impl *impl);
339 virtual void nir_emit_cf_list(exec_list *list);
340 virtual void nir_emit_if(nir_if *if_stmt);
341 virtual void nir_emit_loop(nir_loop *loop);
342 virtual void nir_emit_block(nir_block *block);
343 virtual void nir_emit_instr(nir_instr *instr);
344 virtual void nir_emit_load_const(nir_load_const_instr *instr);
345 virtual void nir_emit_intrinsic(nir_intrinsic_instr *instr);
346 virtual void nir_emit_alu(nir_alu_instr *instr);
347 virtual void nir_emit_jump(nir_jump_instr *instr);
348 virtual void nir_emit_texture(nir_tex_instr *instr);
349 virtual void nir_emit_undef(nir_ssa_undef_instr *instr);
350 virtual void nir_emit_ssbo_atomic(int op, nir_intrinsic_instr *instr);
351
352 dst_reg get_nir_dest(nir_dest dest, enum brw_reg_type type);
353 dst_reg get_nir_dest(nir_dest dest, nir_alu_type type);
354 dst_reg get_nir_dest(nir_dest dest);
355 src_reg get_nir_src(nir_src src, enum brw_reg_type type,
356 unsigned num_components = 4);
357 src_reg get_nir_src(nir_src src, nir_alu_type type,
358 unsigned num_components = 4);
359 src_reg get_nir_src(nir_src src,
360 unsigned num_components = 4);
361
362 virtual dst_reg *make_reg_for_system_value(int location,
363 const glsl_type *type) = 0;
364
365 dst_reg *nir_locals;
366 dst_reg *nir_ssa_values;
367 src_reg *nir_inputs;
368 dst_reg *nir_system_values;
369
370 protected:
371 void emit_vertex();
372 void lower_attributes_to_hw_regs(const int *attribute_map,
373 bool interleaved);
374 void setup_payload_interference(struct ra_graph *g, int first_payload_node,
375 int reg_node_count);
376 virtual void setup_payload() = 0;
377 virtual void emit_prolog() = 0;
378 virtual void emit_thread_end() = 0;
379 virtual void emit_urb_write_header(int mrf) = 0;
380 virtual vec4_instruction *emit_urb_write_opcode(bool complete) = 0;
381 virtual void gs_emit_vertex(int stream_id);
382 virtual void gs_end_primitive();
383
384 private:
385 /**
386 * If true, then register allocation should fail instead of spilling.
387 */
388 const bool no_spills;
389
390 int shader_time_index;
391
392 unsigned last_scratch; /**< measured in 32-byte (register size) units */
393 };
394
395 } /* namespace brw */
396 #endif /* __cplusplus */
397
398 #endif /* BRW_VEC4_H */