i965/vec4: Delete the old vec4_vp code
[mesa.git] / src / mesa / drivers / dri / i965 / brw_vec4.h
1 /*
2 * Copyright © 2011 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #ifndef BRW_VEC4_H
25 #define BRW_VEC4_H
26
27 #include <stdint.h>
28 #include "brw_shader.h"
29 #include "main/compiler.h"
30 #include "program/hash_table.h"
31 #include "brw_program.h"
32
33 #ifdef __cplusplus
34 #include "brw_ir_vec4.h"
35
36 extern "C" {
37 #endif
38
39 #include "brw_context.h"
40 #include "brw_eu.h"
41 #include "intel_asm_annotation.h"
42
43 #ifdef __cplusplus
44 }; /* extern "C" */
45 #endif
46
47 #include "glsl/ir.h"
48 #include "glsl/nir/nir.h"
49
50
51 #ifdef __cplusplus
52 extern "C" {
53 #endif
54
55 #ifdef __cplusplus
56 } /* extern "C" */
57
58 namespace brw {
59
60 class vec4_live_variables;
61
62 /**
63 * The vertex shader front-end.
64 *
65 * Translates either GLSL IR or Mesa IR (for ARB_vertex_program and
66 * fixed-function) into VS IR.
67 */
68 class vec4_visitor : public backend_shader
69 {
70 public:
71 vec4_visitor(const struct brw_compiler *compiler,
72 void *log_data,
73 struct gl_program *prog,
74 const struct brw_sampler_prog_key_data *key,
75 struct brw_vue_prog_data *prog_data,
76 struct gl_shader_program *shader_prog,
77 gl_shader_stage stage,
78 void *mem_ctx,
79 bool no_spills,
80 int shader_time_index);
81 ~vec4_visitor();
82
83 dst_reg dst_null_f()
84 {
85 return dst_reg(brw_null_reg());
86 }
87
88 dst_reg dst_null_d()
89 {
90 return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
91 }
92
93 dst_reg dst_null_ud()
94 {
95 return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
96 }
97
98 const struct brw_sampler_prog_key_data * const key_tex;
99 struct brw_vue_prog_data * const prog_data;
100 unsigned int sanity_param_count;
101
102 char *fail_msg;
103 bool failed;
104
105 /**
106 * GLSL IR currently being processed, which is associated with our
107 * driver IR instructions for debugging purposes.
108 */
109 const void *base_ir;
110 const char *current_annotation;
111
112 int first_non_payload_grf;
113 unsigned int max_grf;
114 int *virtual_grf_start;
115 int *virtual_grf_end;
116 brw::vec4_live_variables *live_intervals;
117 dst_reg userplane[MAX_CLIP_PLANES];
118
119 bool need_all_constants_in_pull_buffer;
120
121 /* Regs for vertex results. Generated at ir_variable visiting time
122 * for the ir->location's used.
123 */
124 dst_reg output_reg[BRW_VARYING_SLOT_COUNT];
125 const char *output_reg_annotation[BRW_VARYING_SLOT_COUNT];
126 int *uniform_size;
127 int *uniform_vector_size;
128 int uniform_array_size; /*< Size of uniform_[vector_]size arrays */
129 int uniforms;
130
131 src_reg shader_start_time;
132
133 bool run();
134 void fail(const char *msg, ...);
135
136 virtual void setup_vec4_uniform_value(unsigned param_offset,
137 const gl_constant_value *values,
138 unsigned n);
139 int setup_uniforms(int payload_reg);
140
141 bool reg_allocate_trivial();
142 bool reg_allocate();
143 void evaluate_spill_costs(float *spill_costs, bool *no_spill);
144 int choose_spill_reg(struct ra_graph *g);
145 void spill_reg(int spill_reg);
146 void move_grf_array_access_to_scratch();
147 void move_uniform_array_access_to_pull_constants();
148 void move_push_constants_to_pull_constants();
149 void split_uniform_registers();
150 void pack_uniform_registers();
151 void calculate_live_intervals();
152 void invalidate_live_intervals();
153 void split_virtual_grfs();
154 bool opt_vector_float();
155 bool opt_reduce_swizzle();
156 bool dead_code_eliminate();
157 int var_range_start(unsigned v, unsigned n) const;
158 int var_range_end(unsigned v, unsigned n) const;
159 bool virtual_grf_interferes(int a, int b);
160 bool opt_copy_propagation(bool do_constant_prop = true);
161 bool opt_cse_local(bblock_t *block);
162 bool opt_cse();
163 bool opt_algebraic();
164 bool opt_register_coalesce();
165 bool eliminate_find_live_channel();
166 bool is_dep_ctrl_unsafe(const vec4_instruction *inst);
167 void opt_set_dependency_control();
168 void opt_schedule_instructions();
169
170 vec4_instruction *emit(vec4_instruction *inst);
171
172 vec4_instruction *emit(enum opcode opcode);
173 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst);
174 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
175 const src_reg &src0);
176 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
177 const src_reg &src0, const src_reg &src1);
178 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
179 const src_reg &src0, const src_reg &src1,
180 const src_reg &src2);
181
182 vec4_instruction *emit_before(bblock_t *block,
183 vec4_instruction *inst,
184 vec4_instruction *new_inst);
185
186 #define EMIT1(op) vec4_instruction *op(const dst_reg &, const src_reg &);
187 #define EMIT2(op) vec4_instruction *op(const dst_reg &, const src_reg &, const src_reg &);
188 #define EMIT3(op) vec4_instruction *op(const dst_reg &, const src_reg &, const src_reg &, const src_reg &);
189 EMIT1(MOV)
190 EMIT1(NOT)
191 EMIT1(RNDD)
192 EMIT1(RNDE)
193 EMIT1(RNDZ)
194 EMIT1(FRC)
195 EMIT1(F32TO16)
196 EMIT1(F16TO32)
197 EMIT2(ADD)
198 EMIT2(MUL)
199 EMIT2(MACH)
200 EMIT2(MAC)
201 EMIT2(AND)
202 EMIT2(OR)
203 EMIT2(XOR)
204 EMIT2(DP3)
205 EMIT2(DP4)
206 EMIT2(DPH)
207 EMIT2(SHL)
208 EMIT2(SHR)
209 EMIT2(ASR)
210 vec4_instruction *CMP(dst_reg dst, src_reg src0, src_reg src1,
211 enum brw_conditional_mod condition);
212 vec4_instruction *IF(src_reg src0, src_reg src1,
213 enum brw_conditional_mod condition);
214 vec4_instruction *IF(enum brw_predicate predicate);
215 EMIT1(SCRATCH_READ)
216 EMIT2(SCRATCH_WRITE)
217 EMIT3(LRP)
218 EMIT1(BFREV)
219 EMIT3(BFE)
220 EMIT2(BFI1)
221 EMIT3(BFI2)
222 EMIT1(FBH)
223 EMIT1(FBL)
224 EMIT1(CBIT)
225 EMIT3(MAD)
226 EMIT2(ADDC)
227 EMIT2(SUBB)
228 #undef EMIT1
229 #undef EMIT2
230 #undef EMIT3
231
232 int implied_mrf_writes(vec4_instruction *inst);
233
234 void emit_vp_sop(enum brw_conditional_mod condmod, dst_reg dst,
235 src_reg src0, src_reg src1, src_reg one);
236
237 vec4_instruction *emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst,
238 src_reg src0, src_reg src1);
239
240 vec4_instruction *emit_lrp(const dst_reg &dst, const src_reg &x,
241 const src_reg &y, const src_reg &a);
242
243 /**
244 * Copy any live channel from \p src to the first channel of the
245 * result.
246 */
247 src_reg emit_uniformize(const src_reg &src);
248
249 /**
250 * Emit the correct dot-product instruction for the type of arguments
251 */
252 void emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements);
253
254 src_reg fix_3src_operand(const src_reg &src);
255 src_reg resolve_source_modifiers(const src_reg &src);
256
257 vec4_instruction *emit_math(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
258 const src_reg &src1 = src_reg());
259
260 src_reg fix_math_operand(const src_reg &src);
261
262 void emit_pack_half_2x16(dst_reg dst, src_reg src0);
263 void emit_unpack_half_2x16(dst_reg dst, src_reg src0);
264 void emit_unpack_unorm_4x8(const dst_reg &dst, src_reg src0);
265 void emit_unpack_snorm_4x8(const dst_reg &dst, src_reg src0);
266 void emit_pack_unorm_4x8(const dst_reg &dst, const src_reg &src0);
267 void emit_pack_snorm_4x8(const dst_reg &dst, const src_reg &src0);
268
269 void emit_texture(ir_texture_opcode op,
270 dst_reg dest,
271 const glsl_type *dest_type,
272 src_reg coordinate,
273 int coord_components,
274 src_reg shadow_comparitor,
275 src_reg lod, src_reg lod2,
276 src_reg sample_index,
277 uint32_t constant_offset,
278 src_reg offset_value,
279 src_reg mcs,
280 bool is_cube_array,
281 uint32_t sampler, src_reg sampler_reg);
282
283 uint32_t gather_channel(unsigned gather_component, uint32_t sampler);
284 src_reg emit_mcs_fetch(const glsl_type *coordinate_type, src_reg coordinate,
285 src_reg sampler);
286 void emit_gen6_gather_wa(uint8_t wa, dst_reg dst);
287 void swizzle_result(ir_texture_opcode op, dst_reg dest,
288 src_reg orig_val, uint32_t sampler,
289 const glsl_type *dest_type);
290
291 void emit_ndc_computation();
292 void emit_psiz_and_flags(dst_reg reg);
293 vec4_instruction *emit_generic_urb_slot(dst_reg reg, int varying);
294 virtual void emit_urb_slot(dst_reg reg, int varying);
295
296 void emit_shader_time_begin();
297 void emit_shader_time_end();
298 void emit_shader_time_write(int shader_time_subindex, src_reg value);
299
300 void emit_untyped_atomic(unsigned atomic_op, unsigned surf_index,
301 dst_reg dst, src_reg offset, src_reg src0,
302 src_reg src1);
303
304 void emit_untyped_surface_read(unsigned surf_index, dst_reg dst,
305 src_reg offset);
306
307 src_reg get_scratch_offset(bblock_t *block, vec4_instruction *inst,
308 src_reg *reladdr, int reg_offset);
309 src_reg get_pull_constant_offset(bblock_t *block, vec4_instruction *inst,
310 src_reg *reladdr, int reg_offset);
311 void emit_scratch_read(bblock_t *block, vec4_instruction *inst,
312 dst_reg dst,
313 src_reg orig_src,
314 int base_offset);
315 void emit_scratch_write(bblock_t *block, vec4_instruction *inst,
316 int base_offset);
317 void emit_pull_constant_load(bblock_t *block, vec4_instruction *inst,
318 dst_reg dst,
319 src_reg orig_src,
320 int base_offset);
321 void emit_pull_constant_load_reg(dst_reg dst,
322 src_reg surf_index,
323 src_reg offset,
324 bblock_t *before_block,
325 vec4_instruction *before_inst);
326 src_reg emit_resolve_reladdr(int scratch_loc[], bblock_t *block,
327 vec4_instruction *inst, src_reg src);
328
329 void resolve_ud_negate(src_reg *reg);
330
331 src_reg get_timestamp();
332
333 void dump_instruction(backend_instruction *inst);
334 void dump_instruction(backend_instruction *inst, FILE *file);
335
336 bool is_high_sampler(src_reg sampler);
337
338 virtual void emit_nir_code();
339 virtual void nir_setup_inputs(nir_shader *shader);
340 virtual void nir_setup_uniforms(nir_shader *shader);
341 virtual void nir_setup_uniform(nir_variable *var);
342 virtual void nir_setup_builtin_uniform(nir_variable *var);
343 virtual void nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr);
344 virtual void nir_setup_system_values(nir_shader *shader);
345 virtual void nir_emit_impl(nir_function_impl *impl);
346 virtual void nir_emit_cf_list(exec_list *list);
347 virtual void nir_emit_if(nir_if *if_stmt);
348 virtual void nir_emit_loop(nir_loop *loop);
349 virtual void nir_emit_block(nir_block *block);
350 virtual void nir_emit_instr(nir_instr *instr);
351 virtual void nir_emit_load_const(nir_load_const_instr *instr);
352 virtual void nir_emit_intrinsic(nir_intrinsic_instr *instr);
353 virtual void nir_emit_alu(nir_alu_instr *instr);
354 virtual void nir_emit_jump(nir_jump_instr *instr);
355 virtual void nir_emit_texture(nir_tex_instr *instr);
356 virtual void nir_emit_undef(nir_ssa_undef_instr *instr);
357 virtual void nir_emit_ssbo_atomic(int op, nir_intrinsic_instr *instr);
358
359 dst_reg get_nir_dest(nir_dest dest, enum brw_reg_type type);
360 dst_reg get_nir_dest(nir_dest dest, nir_alu_type type);
361 dst_reg get_nir_dest(nir_dest dest);
362 src_reg get_nir_src(nir_src src, enum brw_reg_type type,
363 unsigned num_components = 4);
364 src_reg get_nir_src(nir_src src, nir_alu_type type,
365 unsigned num_components = 4);
366 src_reg get_nir_src(nir_src src,
367 unsigned num_components = 4);
368
369 virtual dst_reg *make_reg_for_system_value(int location,
370 const glsl_type *type) = 0;
371
372 dst_reg *nir_locals;
373 dst_reg *nir_ssa_values;
374 src_reg *nir_inputs;
375 dst_reg *nir_system_values;
376
377 protected:
378 void emit_vertex();
379 void lower_attributes_to_hw_regs(const int *attribute_map,
380 bool interleaved);
381 void setup_payload_interference(struct ra_graph *g, int first_payload_node,
382 int reg_node_count);
383 virtual void assign_binding_table_offsets();
384 virtual void setup_payload() = 0;
385 virtual void emit_prolog() = 0;
386 virtual void emit_thread_end() = 0;
387 virtual void emit_urb_write_header(int mrf) = 0;
388 virtual vec4_instruction *emit_urb_write_opcode(bool complete) = 0;
389 virtual void gs_emit_vertex(int stream_id);
390 virtual void gs_end_primitive();
391
392 private:
393 /**
394 * If true, then register allocation should fail instead of spilling.
395 */
396 const bool no_spills;
397
398 int shader_time_index;
399
400 unsigned last_scratch; /**< measured in 32-byte (register size) units */
401 };
402
403
404 /**
405 * The vertex shader code generator.
406 *
407 * Translates VS IR to actual i965 assembly code.
408 */
409 class vec4_generator
410 {
411 public:
412 vec4_generator(const struct brw_compiler *compiler, void *log_data,
413 struct gl_shader_program *shader_prog,
414 struct gl_program *prog,
415 struct brw_vue_prog_data *prog_data,
416 void *mem_ctx,
417 bool debug_flag,
418 const char *stage_name,
419 const char *stage_abbrev);
420 ~vec4_generator();
421
422 const unsigned *generate_assembly(const cfg_t *cfg, unsigned *asm_size);
423
424 private:
425 void generate_code(const cfg_t *cfg);
426
427 void generate_math1_gen4(vec4_instruction *inst,
428 struct brw_reg dst,
429 struct brw_reg src);
430 void generate_math2_gen4(vec4_instruction *inst,
431 struct brw_reg dst,
432 struct brw_reg src0,
433 struct brw_reg src1);
434 void generate_math_gen6(vec4_instruction *inst,
435 struct brw_reg dst,
436 struct brw_reg src0,
437 struct brw_reg src1);
438
439 void generate_tex(vec4_instruction *inst,
440 struct brw_reg dst,
441 struct brw_reg src,
442 struct brw_reg sampler_index);
443
444 void generate_vs_urb_write(vec4_instruction *inst);
445 void generate_gs_urb_write(vec4_instruction *inst);
446 void generate_gs_urb_write_allocate(vec4_instruction *inst);
447 void generate_gs_thread_end(vec4_instruction *inst);
448 void generate_gs_set_write_offset(struct brw_reg dst,
449 struct brw_reg src0,
450 struct brw_reg src1);
451 void generate_gs_set_vertex_count(struct brw_reg dst,
452 struct brw_reg src);
453 void generate_gs_svb_write(vec4_instruction *inst,
454 struct brw_reg dst,
455 struct brw_reg src0,
456 struct brw_reg src1);
457 void generate_gs_svb_set_destination_index(vec4_instruction *inst,
458 struct brw_reg dst,
459 struct brw_reg src);
460 void generate_gs_set_dword_2(struct brw_reg dst, struct brw_reg src);
461 void generate_gs_prepare_channel_masks(struct brw_reg dst);
462 void generate_gs_set_channel_masks(struct brw_reg dst, struct brw_reg src);
463 void generate_gs_get_instance_id(struct brw_reg dst);
464 void generate_gs_ff_sync_set_primitives(struct brw_reg dst,
465 struct brw_reg src0,
466 struct brw_reg src1,
467 struct brw_reg src2);
468 void generate_gs_ff_sync(vec4_instruction *inst,
469 struct brw_reg dst,
470 struct brw_reg src0,
471 struct brw_reg src1);
472 void generate_gs_set_primitive_id(struct brw_reg dst);
473 void generate_oword_dual_block_offsets(struct brw_reg m1,
474 struct brw_reg index);
475 void generate_scratch_write(vec4_instruction *inst,
476 struct brw_reg dst,
477 struct brw_reg src,
478 struct brw_reg index);
479 void generate_scratch_read(vec4_instruction *inst,
480 struct brw_reg dst,
481 struct brw_reg index);
482 void generate_pull_constant_load(vec4_instruction *inst,
483 struct brw_reg dst,
484 struct brw_reg index,
485 struct brw_reg offset);
486 void generate_pull_constant_load_gen7(vec4_instruction *inst,
487 struct brw_reg dst,
488 struct brw_reg surf_index,
489 struct brw_reg offset);
490 void generate_set_simd4x2_header_gen9(vec4_instruction *inst,
491 struct brw_reg dst);
492
493 void generate_get_buffer_size(vec4_instruction *inst,
494 struct brw_reg dst,
495 struct brw_reg src,
496 struct brw_reg index);
497
498 void generate_unpack_flags(struct brw_reg dst);
499
500 const struct brw_compiler *compiler;
501 void *log_data; /* Passed to compiler->*_log functions */
502
503 const struct brw_device_info *devinfo;
504
505 struct brw_codegen *p;
506
507 struct gl_shader_program *shader_prog;
508 const struct gl_program *prog;
509
510 struct brw_vue_prog_data *prog_data;
511
512 void *mem_ctx;
513 const char *stage_name;
514 const char *stage_abbrev;
515 const bool debug_flag;
516 };
517
518 } /* namespace brw */
519 #endif /* __cplusplus */
520
521 #endif /* BRW_VEC4_H */