i965/shader: Get rid of the shader, prog, and shader_prog fields
[mesa.git] / src / mesa / drivers / dri / i965 / brw_vec4.h
1 /*
2 * Copyright © 2011 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #ifndef BRW_VEC4_H
25 #define BRW_VEC4_H
26
27 #include <stdint.h>
28 #include "brw_shader.h"
29 #include "main/compiler.h"
30 #include "program/hash_table.h"
31 #include "brw_program.h"
32
33 #ifdef __cplusplus
34 #include "brw_ir_vec4.h"
35
36 extern "C" {
37 #endif
38
39 #include "brw_context.h"
40 #include "brw_eu.h"
41 #include "intel_asm_annotation.h"
42
43 #ifdef __cplusplus
44 }; /* extern "C" */
45 #endif
46
47 #include "glsl/ir.h"
48 #include "glsl/nir/nir.h"
49
50
51 #ifdef __cplusplus
52 extern "C" {
53 #endif
54
55 #ifdef __cplusplus
56 } /* extern "C" */
57
58 namespace brw {
59
60 class vec4_live_variables;
61
62 /**
63 * The vertex shader front-end.
64 *
65 * Translates either GLSL IR or Mesa IR (for ARB_vertex_program and
66 * fixed-function) into VS IR.
67 */
68 class vec4_visitor : public backend_shader
69 {
70 public:
71 vec4_visitor(const struct brw_compiler *compiler,
72 void *log_data,
73 const struct brw_sampler_prog_key_data *key,
74 struct brw_vue_prog_data *prog_data,
75 nir_shader *shader,
76 void *mem_ctx,
77 bool no_spills,
78 int shader_time_index);
79 ~vec4_visitor();
80
81 dst_reg dst_null_f()
82 {
83 return dst_reg(brw_null_reg());
84 }
85
86 dst_reg dst_null_d()
87 {
88 return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
89 }
90
91 dst_reg dst_null_ud()
92 {
93 return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
94 }
95
96 const struct brw_sampler_prog_key_data * const key_tex;
97 struct brw_vue_prog_data * const prog_data;
98 char *fail_msg;
99 bool failed;
100
101 /**
102 * GLSL IR currently being processed, which is associated with our
103 * driver IR instructions for debugging purposes.
104 */
105 const void *base_ir;
106 const char *current_annotation;
107
108 int first_non_payload_grf;
109 unsigned int max_grf;
110 int *virtual_grf_start;
111 int *virtual_grf_end;
112 brw::vec4_live_variables *live_intervals;
113 dst_reg userplane[MAX_CLIP_PLANES];
114
115 bool need_all_constants_in_pull_buffer;
116
117 /* Regs for vertex results. Generated at ir_variable visiting time
118 * for the ir->location's used.
119 */
120 dst_reg output_reg[BRW_VARYING_SLOT_COUNT];
121 const char *output_reg_annotation[BRW_VARYING_SLOT_COUNT];
122 int *uniform_size;
123 int uniform_array_size; /*< Size of the uniform_size array */
124 int uniforms;
125
126 src_reg shader_start_time;
127
128 bool run();
129 void fail(const char *msg, ...);
130
131 int setup_uniforms(int payload_reg);
132
133 bool reg_allocate_trivial();
134 bool reg_allocate();
135 void evaluate_spill_costs(float *spill_costs, bool *no_spill);
136 int choose_spill_reg(struct ra_graph *g);
137 void spill_reg(int spill_reg);
138 void move_grf_array_access_to_scratch();
139 void move_uniform_array_access_to_pull_constants();
140 void move_push_constants_to_pull_constants();
141 void split_uniform_registers();
142 void pack_uniform_registers();
143 void calculate_live_intervals();
144 void invalidate_live_intervals();
145 void split_virtual_grfs();
146 bool opt_vector_float();
147 bool opt_reduce_swizzle();
148 bool dead_code_eliminate();
149 int var_range_start(unsigned v, unsigned n) const;
150 int var_range_end(unsigned v, unsigned n) const;
151 bool virtual_grf_interferes(int a, int b);
152 bool opt_copy_propagation(bool do_constant_prop = true);
153 bool opt_cse_local(bblock_t *block);
154 bool opt_cse();
155 bool opt_algebraic();
156 bool opt_register_coalesce();
157 bool eliminate_find_live_channel();
158 bool is_dep_ctrl_unsafe(const vec4_instruction *inst);
159 void opt_set_dependency_control();
160 void opt_schedule_instructions();
161
162 vec4_instruction *emit(vec4_instruction *inst);
163
164 vec4_instruction *emit(enum opcode opcode);
165 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst);
166 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
167 const src_reg &src0);
168 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
169 const src_reg &src0, const src_reg &src1);
170 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
171 const src_reg &src0, const src_reg &src1,
172 const src_reg &src2);
173
174 vec4_instruction *emit_before(bblock_t *block,
175 vec4_instruction *inst,
176 vec4_instruction *new_inst);
177
178 #define EMIT1(op) vec4_instruction *op(const dst_reg &, const src_reg &);
179 #define EMIT2(op) vec4_instruction *op(const dst_reg &, const src_reg &, const src_reg &);
180 #define EMIT3(op) vec4_instruction *op(const dst_reg &, const src_reg &, const src_reg &, const src_reg &);
181 EMIT1(MOV)
182 EMIT1(NOT)
183 EMIT1(RNDD)
184 EMIT1(RNDE)
185 EMIT1(RNDZ)
186 EMIT1(FRC)
187 EMIT1(F32TO16)
188 EMIT1(F16TO32)
189 EMIT2(ADD)
190 EMIT2(MUL)
191 EMIT2(MACH)
192 EMIT2(MAC)
193 EMIT2(AND)
194 EMIT2(OR)
195 EMIT2(XOR)
196 EMIT2(DP3)
197 EMIT2(DP4)
198 EMIT2(DPH)
199 EMIT2(SHL)
200 EMIT2(SHR)
201 EMIT2(ASR)
202 vec4_instruction *CMP(dst_reg dst, src_reg src0, src_reg src1,
203 enum brw_conditional_mod condition);
204 vec4_instruction *IF(src_reg src0, src_reg src1,
205 enum brw_conditional_mod condition);
206 vec4_instruction *IF(enum brw_predicate predicate);
207 EMIT1(SCRATCH_READ)
208 EMIT2(SCRATCH_WRITE)
209 EMIT3(LRP)
210 EMIT1(BFREV)
211 EMIT3(BFE)
212 EMIT2(BFI1)
213 EMIT3(BFI2)
214 EMIT1(FBH)
215 EMIT1(FBL)
216 EMIT1(CBIT)
217 EMIT3(MAD)
218 EMIT2(ADDC)
219 EMIT2(SUBB)
220 #undef EMIT1
221 #undef EMIT2
222 #undef EMIT3
223
224 int implied_mrf_writes(vec4_instruction *inst);
225
226 void emit_vp_sop(enum brw_conditional_mod condmod, dst_reg dst,
227 src_reg src0, src_reg src1, src_reg one);
228
229 vec4_instruction *emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst,
230 src_reg src0, src_reg src1);
231
232 vec4_instruction *emit_lrp(const dst_reg &dst, const src_reg &x,
233 const src_reg &y, const src_reg &a);
234
235 /**
236 * Copy any live channel from \p src to the first channel of the
237 * result.
238 */
239 src_reg emit_uniformize(const src_reg &src);
240
241 /**
242 * Emit the correct dot-product instruction for the type of arguments
243 */
244 void emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements);
245
246 src_reg fix_3src_operand(const src_reg &src);
247 src_reg resolve_source_modifiers(const src_reg &src);
248
249 vec4_instruction *emit_math(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
250 const src_reg &src1 = src_reg());
251
252 src_reg fix_math_operand(const src_reg &src);
253
254 void emit_pack_half_2x16(dst_reg dst, src_reg src0);
255 void emit_unpack_half_2x16(dst_reg dst, src_reg src0);
256 void emit_unpack_unorm_4x8(const dst_reg &dst, src_reg src0);
257 void emit_unpack_snorm_4x8(const dst_reg &dst, src_reg src0);
258 void emit_pack_unorm_4x8(const dst_reg &dst, const src_reg &src0);
259 void emit_pack_snorm_4x8(const dst_reg &dst, const src_reg &src0);
260
261 void emit_texture(ir_texture_opcode op,
262 dst_reg dest,
263 const glsl_type *dest_type,
264 src_reg coordinate,
265 int coord_components,
266 src_reg shadow_comparitor,
267 src_reg lod, src_reg lod2,
268 src_reg sample_index,
269 uint32_t constant_offset,
270 src_reg offset_value,
271 src_reg mcs,
272 bool is_cube_array,
273 uint32_t sampler, src_reg sampler_reg);
274
275 uint32_t gather_channel(unsigned gather_component, uint32_t sampler);
276 src_reg emit_mcs_fetch(const glsl_type *coordinate_type, src_reg coordinate,
277 src_reg sampler);
278 void emit_gen6_gather_wa(uint8_t wa, dst_reg dst);
279 void swizzle_result(ir_texture_opcode op, dst_reg dest,
280 src_reg orig_val, uint32_t sampler,
281 const glsl_type *dest_type);
282
283 void emit_ndc_computation();
284 void emit_psiz_and_flags(dst_reg reg);
285 vec4_instruction *emit_generic_urb_slot(dst_reg reg, int varying);
286 virtual void emit_urb_slot(dst_reg reg, int varying);
287
288 void emit_shader_time_begin();
289 void emit_shader_time_end();
290 void emit_shader_time_write(int shader_time_subindex, src_reg value);
291
292 void emit_untyped_atomic(unsigned atomic_op, unsigned surf_index,
293 dst_reg dst, src_reg offset, src_reg src0,
294 src_reg src1);
295
296 void emit_untyped_surface_read(unsigned surf_index, dst_reg dst,
297 src_reg offset);
298
299 src_reg get_scratch_offset(bblock_t *block, vec4_instruction *inst,
300 src_reg *reladdr, int reg_offset);
301 src_reg get_pull_constant_offset(bblock_t *block, vec4_instruction *inst,
302 src_reg *reladdr, int reg_offset);
303 void emit_scratch_read(bblock_t *block, vec4_instruction *inst,
304 dst_reg dst,
305 src_reg orig_src,
306 int base_offset);
307 void emit_scratch_write(bblock_t *block, vec4_instruction *inst,
308 int base_offset);
309 void emit_pull_constant_load(bblock_t *block, vec4_instruction *inst,
310 dst_reg dst,
311 src_reg orig_src,
312 int base_offset);
313 void emit_pull_constant_load_reg(dst_reg dst,
314 src_reg surf_index,
315 src_reg offset,
316 bblock_t *before_block,
317 vec4_instruction *before_inst);
318 src_reg emit_resolve_reladdr(int scratch_loc[], bblock_t *block,
319 vec4_instruction *inst, src_reg src);
320
321 void resolve_ud_negate(src_reg *reg);
322
323 src_reg get_timestamp();
324
325 void dump_instruction(backend_instruction *inst);
326 void dump_instruction(backend_instruction *inst, FILE *file);
327
328 bool is_high_sampler(src_reg sampler);
329
330 virtual void emit_nir_code();
331 virtual void nir_setup_inputs();
332 virtual void nir_setup_uniforms();
333 virtual void nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr);
334 virtual void nir_setup_system_values();
335 virtual void nir_emit_impl(nir_function_impl *impl);
336 virtual void nir_emit_cf_list(exec_list *list);
337 virtual void nir_emit_if(nir_if *if_stmt);
338 virtual void nir_emit_loop(nir_loop *loop);
339 virtual void nir_emit_block(nir_block *block);
340 virtual void nir_emit_instr(nir_instr *instr);
341 virtual void nir_emit_load_const(nir_load_const_instr *instr);
342 virtual void nir_emit_intrinsic(nir_intrinsic_instr *instr);
343 virtual void nir_emit_alu(nir_alu_instr *instr);
344 virtual void nir_emit_jump(nir_jump_instr *instr);
345 virtual void nir_emit_texture(nir_tex_instr *instr);
346 virtual void nir_emit_undef(nir_ssa_undef_instr *instr);
347 virtual void nir_emit_ssbo_atomic(int op, nir_intrinsic_instr *instr);
348
349 dst_reg get_nir_dest(nir_dest dest, enum brw_reg_type type);
350 dst_reg get_nir_dest(nir_dest dest, nir_alu_type type);
351 dst_reg get_nir_dest(nir_dest dest);
352 src_reg get_nir_src(nir_src src, enum brw_reg_type type,
353 unsigned num_components = 4);
354 src_reg get_nir_src(nir_src src, nir_alu_type type,
355 unsigned num_components = 4);
356 src_reg get_nir_src(nir_src src,
357 unsigned num_components = 4);
358
359 virtual dst_reg *make_reg_for_system_value(int location,
360 const glsl_type *type) = 0;
361
362 dst_reg *nir_locals;
363 dst_reg *nir_ssa_values;
364 src_reg *nir_inputs;
365 dst_reg *nir_system_values;
366
367 protected:
368 void emit_vertex();
369 void lower_attributes_to_hw_regs(const int *attribute_map,
370 bool interleaved);
371 void setup_payload_interference(struct ra_graph *g, int first_payload_node,
372 int reg_node_count);
373 virtual void setup_payload() = 0;
374 virtual void emit_prolog() = 0;
375 virtual void emit_thread_end() = 0;
376 virtual void emit_urb_write_header(int mrf) = 0;
377 virtual vec4_instruction *emit_urb_write_opcode(bool complete) = 0;
378 virtual void gs_emit_vertex(int stream_id);
379 virtual void gs_end_primitive();
380
381 private:
382 /**
383 * If true, then register allocation should fail instead of spilling.
384 */
385 const bool no_spills;
386
387 int shader_time_index;
388
389 unsigned last_scratch; /**< measured in 32-byte (register size) units */
390 };
391
392
393 /**
394 * The vertex shader code generator.
395 *
396 * Translates VS IR to actual i965 assembly code.
397 */
398 class vec4_generator
399 {
400 public:
401 vec4_generator(const struct brw_compiler *compiler, void *log_data,
402 struct gl_shader_program *shader_prog,
403 struct gl_program *prog,
404 struct brw_vue_prog_data *prog_data,
405 void *mem_ctx,
406 bool debug_flag,
407 const char *stage_name,
408 const char *stage_abbrev);
409 ~vec4_generator();
410
411 const unsigned *generate_assembly(const cfg_t *cfg, unsigned *asm_size);
412
413 private:
414 void generate_code(const cfg_t *cfg);
415
416 void generate_math1_gen4(vec4_instruction *inst,
417 struct brw_reg dst,
418 struct brw_reg src);
419 void generate_math2_gen4(vec4_instruction *inst,
420 struct brw_reg dst,
421 struct brw_reg src0,
422 struct brw_reg src1);
423 void generate_math_gen6(vec4_instruction *inst,
424 struct brw_reg dst,
425 struct brw_reg src0,
426 struct brw_reg src1);
427
428 void generate_tex(vec4_instruction *inst,
429 struct brw_reg dst,
430 struct brw_reg src,
431 struct brw_reg sampler_index);
432
433 void generate_vs_urb_write(vec4_instruction *inst);
434 void generate_gs_urb_write(vec4_instruction *inst);
435 void generate_gs_urb_write_allocate(vec4_instruction *inst);
436 void generate_gs_thread_end(vec4_instruction *inst);
437 void generate_gs_set_write_offset(struct brw_reg dst,
438 struct brw_reg src0,
439 struct brw_reg src1);
440 void generate_gs_set_vertex_count(struct brw_reg dst,
441 struct brw_reg src);
442 void generate_gs_svb_write(vec4_instruction *inst,
443 struct brw_reg dst,
444 struct brw_reg src0,
445 struct brw_reg src1);
446 void generate_gs_svb_set_destination_index(vec4_instruction *inst,
447 struct brw_reg dst,
448 struct brw_reg src);
449 void generate_gs_set_dword_2(struct brw_reg dst, struct brw_reg src);
450 void generate_gs_prepare_channel_masks(struct brw_reg dst);
451 void generate_gs_set_channel_masks(struct brw_reg dst, struct brw_reg src);
452 void generate_gs_get_instance_id(struct brw_reg dst);
453 void generate_gs_ff_sync_set_primitives(struct brw_reg dst,
454 struct brw_reg src0,
455 struct brw_reg src1,
456 struct brw_reg src2);
457 void generate_gs_ff_sync(vec4_instruction *inst,
458 struct brw_reg dst,
459 struct brw_reg src0,
460 struct brw_reg src1);
461 void generate_gs_set_primitive_id(struct brw_reg dst);
462 void generate_oword_dual_block_offsets(struct brw_reg m1,
463 struct brw_reg index);
464 void generate_scratch_write(vec4_instruction *inst,
465 struct brw_reg dst,
466 struct brw_reg src,
467 struct brw_reg index);
468 void generate_scratch_read(vec4_instruction *inst,
469 struct brw_reg dst,
470 struct brw_reg index);
471 void generate_pull_constant_load(vec4_instruction *inst,
472 struct brw_reg dst,
473 struct brw_reg index,
474 struct brw_reg offset);
475 void generate_pull_constant_load_gen7(vec4_instruction *inst,
476 struct brw_reg dst,
477 struct brw_reg surf_index,
478 struct brw_reg offset);
479 void generate_set_simd4x2_header_gen9(vec4_instruction *inst,
480 struct brw_reg dst);
481
482 void generate_get_buffer_size(vec4_instruction *inst,
483 struct brw_reg dst,
484 struct brw_reg src,
485 struct brw_reg index);
486
487 void generate_unpack_flags(struct brw_reg dst);
488
489 const struct brw_compiler *compiler;
490 void *log_data; /* Passed to compiler->*_log functions */
491
492 const struct brw_device_info *devinfo;
493
494 struct brw_codegen *p;
495
496 struct gl_shader_program *shader_prog;
497 const struct gl_program *prog;
498
499 struct brw_vue_prog_data *prog_data;
500
501 void *mem_ctx;
502 const char *stage_name;
503 const char *stage_abbrev;
504 const bool debug_flag;
505 };
506
507 } /* namespace brw */
508 #endif /* __cplusplus */
509
510 #endif /* BRW_VEC4_H */