i965: Push down inclusion of brw_program.h.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_vec4.h
1 /*
2 * Copyright © 2011 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #ifndef BRW_VEC4_H
25 #define BRW_VEC4_H
26
27 #include <stdint.h>
28 #include "brw_shader.h"
29 #include "main/compiler.h"
30 #include "program/hash_table.h"
31
32 #ifdef __cplusplus
33 #include "brw_ir_vec4.h"
34
35 extern "C" {
36 #endif
37
38 #include "brw_context.h"
39 #include "brw_eu.h"
40 #include "intel_asm_annotation.h"
41
42 #ifdef __cplusplus
43 }; /* extern "C" */
44 #endif
45
46 #include "glsl/ir.h"
47 #include "glsl/nir/nir.h"
48
49
50 #ifdef __cplusplus
51 extern "C" {
52 #endif
53
54 const unsigned *
55 brw_vec4_generate_assembly(const struct brw_compiler *compiler,
56 void *log_data,
57 void *mem_ctx,
58 const nir_shader *nir,
59 struct brw_vue_prog_data *prog_data,
60 const struct cfg_t *cfg,
61 unsigned *out_assembly_size);
62
63 #ifdef __cplusplus
64 } /* extern "C" */
65
66 namespace brw {
67
68 class vec4_live_variables;
69
70 /**
71 * The vertex shader front-end.
72 *
73 * Translates either GLSL IR or Mesa IR (for ARB_vertex_program and
74 * fixed-function) into VS IR.
75 */
76 class vec4_visitor : public backend_shader
77 {
78 public:
79 vec4_visitor(const struct brw_compiler *compiler,
80 void *log_data,
81 const struct brw_sampler_prog_key_data *key,
82 struct brw_vue_prog_data *prog_data,
83 const nir_shader *shader,
84 void *mem_ctx,
85 bool no_spills,
86 int shader_time_index);
87 virtual ~vec4_visitor();
88
89 dst_reg dst_null_f()
90 {
91 return dst_reg(brw_null_reg());
92 }
93
94 dst_reg dst_null_d()
95 {
96 return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
97 }
98
99 dst_reg dst_null_ud()
100 {
101 return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
102 }
103
104 const struct brw_sampler_prog_key_data * const key_tex;
105 struct brw_vue_prog_data * const prog_data;
106 char *fail_msg;
107 bool failed;
108
109 /**
110 * GLSL IR currently being processed, which is associated with our
111 * driver IR instructions for debugging purposes.
112 */
113 const void *base_ir;
114 const char *current_annotation;
115
116 int first_non_payload_grf;
117 unsigned int max_grf;
118 int *virtual_grf_start;
119 int *virtual_grf_end;
120 brw::vec4_live_variables *live_intervals;
121 dst_reg userplane[MAX_CLIP_PLANES];
122
123 bool need_all_constants_in_pull_buffer;
124
125 /* Regs for vertex results. Generated at ir_variable visiting time
126 * for the ir->location's used.
127 */
128 dst_reg output_reg[BRW_VARYING_SLOT_COUNT];
129 const char *output_reg_annotation[BRW_VARYING_SLOT_COUNT];
130 int *uniform_size;
131 int uniform_array_size; /*< Size of the uniform_size array */
132 int uniforms;
133
134 src_reg shader_start_time;
135
136 bool run();
137 void fail(const char *msg, ...);
138
139 int setup_uniforms(int payload_reg);
140
141 bool reg_allocate_trivial();
142 bool reg_allocate();
143 void evaluate_spill_costs(float *spill_costs, bool *no_spill);
144 int choose_spill_reg(struct ra_graph *g);
145 void spill_reg(int spill_reg);
146 void move_grf_array_access_to_scratch();
147 void move_uniform_array_access_to_pull_constants();
148 void move_push_constants_to_pull_constants();
149 void split_uniform_registers();
150 void pack_uniform_registers();
151 void calculate_live_intervals();
152 void invalidate_live_intervals();
153 void split_virtual_grfs();
154 bool opt_vector_float();
155 bool opt_reduce_swizzle();
156 bool dead_code_eliminate();
157 int var_range_start(unsigned v, unsigned n) const;
158 int var_range_end(unsigned v, unsigned n) const;
159 bool virtual_grf_interferes(int a, int b);
160 bool opt_cmod_propagation();
161 bool opt_copy_propagation(bool do_constant_prop = true);
162 bool opt_cse_local(bblock_t *block);
163 bool opt_cse();
164 bool opt_algebraic();
165 bool opt_register_coalesce();
166 bool eliminate_find_live_channel();
167 bool is_dep_ctrl_unsafe(const vec4_instruction *inst);
168 void opt_set_dependency_control();
169 void opt_schedule_instructions();
170 void convert_to_hw_regs();
171
172 vec4_instruction *emit(vec4_instruction *inst);
173
174 vec4_instruction *emit(enum opcode opcode);
175 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst);
176 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
177 const src_reg &src0);
178 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
179 const src_reg &src0, const src_reg &src1);
180 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
181 const src_reg &src0, const src_reg &src1,
182 const src_reg &src2);
183
184 vec4_instruction *emit_before(bblock_t *block,
185 vec4_instruction *inst,
186 vec4_instruction *new_inst);
187
188 #define EMIT1(op) vec4_instruction *op(const dst_reg &, const src_reg &);
189 #define EMIT2(op) vec4_instruction *op(const dst_reg &, const src_reg &, const src_reg &);
190 #define EMIT3(op) vec4_instruction *op(const dst_reg &, const src_reg &, const src_reg &, const src_reg &);
191 EMIT1(MOV)
192 EMIT1(NOT)
193 EMIT1(RNDD)
194 EMIT1(RNDE)
195 EMIT1(RNDZ)
196 EMIT1(FRC)
197 EMIT1(F32TO16)
198 EMIT1(F16TO32)
199 EMIT2(ADD)
200 EMIT2(MUL)
201 EMIT2(MACH)
202 EMIT2(MAC)
203 EMIT2(AND)
204 EMIT2(OR)
205 EMIT2(XOR)
206 EMIT2(DP3)
207 EMIT2(DP4)
208 EMIT2(DPH)
209 EMIT2(SHL)
210 EMIT2(SHR)
211 EMIT2(ASR)
212 vec4_instruction *CMP(dst_reg dst, src_reg src0, src_reg src1,
213 enum brw_conditional_mod condition);
214 vec4_instruction *IF(src_reg src0, src_reg src1,
215 enum brw_conditional_mod condition);
216 vec4_instruction *IF(enum brw_predicate predicate);
217 EMIT1(SCRATCH_READ)
218 EMIT2(SCRATCH_WRITE)
219 EMIT3(LRP)
220 EMIT1(BFREV)
221 EMIT3(BFE)
222 EMIT2(BFI1)
223 EMIT3(BFI2)
224 EMIT1(FBH)
225 EMIT1(FBL)
226 EMIT1(CBIT)
227 EMIT3(MAD)
228 EMIT2(ADDC)
229 EMIT2(SUBB)
230 #undef EMIT1
231 #undef EMIT2
232 #undef EMIT3
233
234 int implied_mrf_writes(vec4_instruction *inst);
235
236 vec4_instruction *emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst,
237 src_reg src0, src_reg src1);
238
239 vec4_instruction *emit_lrp(const dst_reg &dst, const src_reg &x,
240 const src_reg &y, const src_reg &a);
241
242 /**
243 * Copy any live channel from \p src to the first channel of the
244 * result.
245 */
246 src_reg emit_uniformize(const src_reg &src);
247
248 src_reg fix_3src_operand(const src_reg &src);
249 src_reg resolve_source_modifiers(const src_reg &src);
250
251 vec4_instruction *emit_math(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
252 const src_reg &src1 = src_reg());
253
254 src_reg fix_math_operand(const src_reg &src);
255
256 void emit_pack_half_2x16(dst_reg dst, src_reg src0);
257 void emit_unpack_half_2x16(dst_reg dst, src_reg src0);
258 void emit_unpack_unorm_4x8(const dst_reg &dst, src_reg src0);
259 void emit_unpack_snorm_4x8(const dst_reg &dst, src_reg src0);
260 void emit_pack_unorm_4x8(const dst_reg &dst, const src_reg &src0);
261 void emit_pack_snorm_4x8(const dst_reg &dst, const src_reg &src0);
262
263 void emit_texture(ir_texture_opcode op,
264 dst_reg dest,
265 const glsl_type *dest_type,
266 src_reg coordinate,
267 int coord_components,
268 src_reg shadow_comparitor,
269 src_reg lod, src_reg lod2,
270 src_reg sample_index,
271 uint32_t constant_offset,
272 src_reg offset_value,
273 src_reg mcs,
274 bool is_cube_array,
275 uint32_t sampler, src_reg sampler_reg);
276
277 src_reg emit_mcs_fetch(const glsl_type *coordinate_type, src_reg coordinate,
278 src_reg sampler);
279 void emit_gen6_gather_wa(uint8_t wa, dst_reg dst);
280
281 void emit_ndc_computation();
282 void emit_psiz_and_flags(dst_reg reg);
283 vec4_instruction *emit_generic_urb_slot(dst_reg reg, int varying);
284 virtual void emit_urb_slot(dst_reg reg, int varying);
285
286 void emit_shader_time_begin();
287 void emit_shader_time_end();
288 void emit_shader_time_write(int shader_time_subindex, src_reg value);
289
290 void emit_untyped_atomic(unsigned atomic_op, unsigned surf_index,
291 dst_reg dst, src_reg offset, src_reg src0,
292 src_reg src1);
293
294 void emit_untyped_surface_read(unsigned surf_index, dst_reg dst,
295 src_reg offset);
296
297 src_reg get_scratch_offset(bblock_t *block, vec4_instruction *inst,
298 src_reg *reladdr, int reg_offset);
299 src_reg get_pull_constant_offset(bblock_t *block, vec4_instruction *inst,
300 src_reg *reladdr, int reg_offset);
301 void emit_scratch_read(bblock_t *block, vec4_instruction *inst,
302 dst_reg dst,
303 src_reg orig_src,
304 int base_offset);
305 void emit_scratch_write(bblock_t *block, vec4_instruction *inst,
306 int base_offset);
307 void emit_pull_constant_load(bblock_t *block, vec4_instruction *inst,
308 dst_reg dst,
309 src_reg orig_src,
310 int base_offset);
311 void emit_pull_constant_load_reg(dst_reg dst,
312 src_reg surf_index,
313 src_reg offset,
314 bblock_t *before_block,
315 vec4_instruction *before_inst);
316 src_reg emit_resolve_reladdr(int scratch_loc[], bblock_t *block,
317 vec4_instruction *inst, src_reg src);
318
319 void resolve_ud_negate(src_reg *reg);
320
321 src_reg get_timestamp();
322
323 void dump_instruction(backend_instruction *inst);
324 void dump_instruction(backend_instruction *inst, FILE *file);
325
326 bool is_high_sampler(src_reg sampler);
327
328 virtual void emit_nir_code();
329 virtual void nir_setup_inputs();
330 virtual void nir_setup_uniforms();
331 virtual void nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr);
332 virtual void nir_setup_system_values();
333 virtual void nir_emit_impl(nir_function_impl *impl);
334 virtual void nir_emit_cf_list(exec_list *list);
335 virtual void nir_emit_if(nir_if *if_stmt);
336 virtual void nir_emit_loop(nir_loop *loop);
337 virtual void nir_emit_block(nir_block *block);
338 virtual void nir_emit_instr(nir_instr *instr);
339 virtual void nir_emit_load_const(nir_load_const_instr *instr);
340 virtual void nir_emit_intrinsic(nir_intrinsic_instr *instr);
341 virtual void nir_emit_alu(nir_alu_instr *instr);
342 virtual void nir_emit_jump(nir_jump_instr *instr);
343 virtual void nir_emit_texture(nir_tex_instr *instr);
344 virtual void nir_emit_undef(nir_ssa_undef_instr *instr);
345 virtual void nir_emit_ssbo_atomic(int op, nir_intrinsic_instr *instr);
346
347 dst_reg get_nir_dest(nir_dest dest, enum brw_reg_type type);
348 dst_reg get_nir_dest(nir_dest dest, nir_alu_type type);
349 dst_reg get_nir_dest(nir_dest dest);
350 src_reg get_nir_src(nir_src src, enum brw_reg_type type,
351 unsigned num_components = 4);
352 src_reg get_nir_src(nir_src src, nir_alu_type type,
353 unsigned num_components = 4);
354 src_reg get_nir_src(nir_src src,
355 unsigned num_components = 4);
356
357 virtual dst_reg *make_reg_for_system_value(int location,
358 const glsl_type *type) = 0;
359
360 dst_reg *nir_locals;
361 dst_reg *nir_ssa_values;
362 src_reg *nir_inputs;
363 dst_reg *nir_system_values;
364
365 protected:
366 void emit_vertex();
367 void lower_attributes_to_hw_regs(const int *attribute_map,
368 bool interleaved);
369 void setup_payload_interference(struct ra_graph *g, int first_payload_node,
370 int reg_node_count);
371 virtual void setup_payload() = 0;
372 virtual void emit_prolog() = 0;
373 virtual void emit_thread_end() = 0;
374 virtual void emit_urb_write_header(int mrf) = 0;
375 virtual vec4_instruction *emit_urb_write_opcode(bool complete) = 0;
376 virtual void gs_emit_vertex(int stream_id);
377 virtual void gs_end_primitive();
378
379 private:
380 /**
381 * If true, then register allocation should fail instead of spilling.
382 */
383 const bool no_spills;
384
385 int shader_time_index;
386
387 unsigned last_scratch; /**< measured in 32-byte (register size) units */
388 };
389
390 } /* namespace brw */
391 #endif /* __cplusplus */
392
393 #endif /* BRW_VEC4_H */