glsl: add ir_emit_vertex and ir_end_primitive instruction types
[mesa.git] / src / mesa / drivers / dri / i965 / brw_vec4.h
1 /*
2 * Copyright © 2011 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #ifndef BRW_VEC4_H
25 #define BRW_VEC4_H
26
27 #include <stdint.h>
28 #include "brw_shader.h"
29 #include "main/compiler.h"
30 #include "program/hash_table.h"
31
32 extern "C" {
33 #include "brw_vs.h"
34 #include "brw_context.h"
35 #include "brw_eu.h"
36 };
37
38 #include "glsl/ir.h"
39
40 namespace brw {
41
42 class dst_reg;
43
44 unsigned
45 swizzle_for_size(int size);
46
47 class reg
48 {
49 public:
50 /** Register file: ARF, GRF, MRF, IMM. */
51 enum register_file file;
52 /** virtual register number. 0 = fixed hw reg */
53 int reg;
54 /** Offset within the virtual register. */
55 int reg_offset;
56 /** Register type. BRW_REGISTER_TYPE_* */
57 int type;
58 struct brw_reg fixed_hw_reg;
59
60 /** Value for file == BRW_IMMMEDIATE_FILE */
61 union {
62 int32_t i;
63 uint32_t u;
64 float f;
65 } imm;
66 };
67
68 class src_reg : public reg
69 {
70 public:
71 /* Callers of this ralloc-based new need not call delete. It's
72 * easier to just ralloc_free 'ctx' (or any of its ancestors). */
73 static void* operator new(size_t size, void *ctx)
74 {
75 void *node;
76
77 node = ralloc_size(ctx, size);
78 assert(node != NULL);
79
80 return node;
81 }
82
83 void init();
84
85 src_reg(register_file file, int reg, const glsl_type *type);
86 src_reg();
87 src_reg(float f);
88 src_reg(uint32_t u);
89 src_reg(int32_t i);
90
91 bool equals(src_reg *r);
92 bool is_zero() const;
93 bool is_one() const;
94
95 src_reg(class vec4_visitor *v, const struct glsl_type *type);
96
97 explicit src_reg(dst_reg reg);
98
99 GLuint swizzle; /**< SWIZZLE_XYZW swizzles from Mesa. */
100 bool negate;
101 bool abs;
102
103 src_reg *reladdr;
104 };
105
106 class dst_reg : public reg
107 {
108 public:
109 /* Callers of this ralloc-based new need not call delete. It's
110 * easier to just ralloc_free 'ctx' (or any of its ancestors). */
111 static void* operator new(size_t size, void *ctx)
112 {
113 void *node;
114
115 node = ralloc_size(ctx, size);
116 assert(node != NULL);
117
118 return node;
119 }
120
121 void init();
122
123 dst_reg();
124 dst_reg(register_file file, int reg);
125 dst_reg(register_file file, int reg, const glsl_type *type, int writemask);
126 dst_reg(struct brw_reg reg);
127 dst_reg(class vec4_visitor *v, const struct glsl_type *type);
128
129 explicit dst_reg(src_reg reg);
130
131 int writemask; /**< Bitfield of WRITEMASK_[XYZW] */
132
133 src_reg *reladdr;
134 };
135
136 class vec4_instruction : public backend_instruction {
137 public:
138 /* Callers of this ralloc-based new need not call delete. It's
139 * easier to just ralloc_free 'ctx' (or any of its ancestors). */
140 static void* operator new(size_t size, void *ctx)
141 {
142 void *node;
143
144 node = rzalloc_size(ctx, size);
145 assert(node != NULL);
146
147 return node;
148 }
149
150 vec4_instruction(vec4_visitor *v, enum opcode opcode,
151 dst_reg dst = dst_reg(),
152 src_reg src0 = src_reg(),
153 src_reg src1 = src_reg(),
154 src_reg src2 = src_reg());
155
156 struct brw_reg get_dst(void);
157 struct brw_reg get_src(int i);
158
159 dst_reg dst;
160 src_reg src[3];
161
162 bool saturate;
163 bool force_writemask_all;
164 bool no_dd_clear, no_dd_check;
165
166 int conditional_mod; /**< BRW_CONDITIONAL_* */
167
168 int sampler;
169 uint32_t texture_offset; /**< Texture Offset bitfield */
170 int target; /**< MRT target. */
171 bool shadow_compare;
172
173 bool eot;
174 bool header_present;
175 int mlen; /**< SEND message length */
176 int base_mrf; /**< First MRF in the SEND message, if mlen is nonzero. */
177
178 uint32_t offset; /* spill/unspill offset */
179 /** @{
180 * Annotation for the generated IR. One of the two can be set.
181 */
182 const void *ir;
183 const char *annotation;
184
185 bool is_send_from_grf();
186 bool can_reswizzle_dst(int dst_writemask, int swizzle, int swizzle_mask);
187 void reswizzle_dst(int dst_writemask, int swizzle);
188 };
189
190 /**
191 * The vertex shader front-end.
192 *
193 * Translates either GLSL IR or Mesa IR (for ARB_vertex_program and
194 * fixed-function) into VS IR.
195 */
196 class vec4_visitor : public backend_visitor
197 {
198 public:
199 vec4_visitor(struct brw_context *brw,
200 struct brw_vec4_compile *c,
201 struct gl_program *prog,
202 const struct brw_vec4_prog_key *key,
203 struct brw_vec4_prog_data *prog_data,
204 struct gl_shader_program *shader_prog,
205 struct brw_shader *shader,
206 void *mem_ctx,
207 bool debug_flag);
208 ~vec4_visitor();
209
210 dst_reg dst_null_f()
211 {
212 return dst_reg(brw_null_reg());
213 }
214
215 dst_reg dst_null_d()
216 {
217 return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
218 }
219
220 struct gl_program *prog;
221 struct brw_vec4_compile *c;
222 const struct brw_vec4_prog_key *key;
223 struct brw_vec4_prog_data *prog_data;
224 unsigned int sanity_param_count;
225
226 char *fail_msg;
227 bool failed;
228
229 /**
230 * GLSL IR currently being processed, which is associated with our
231 * driver IR instructions for debugging purposes.
232 */
233 const void *base_ir;
234 const char *current_annotation;
235
236 int *virtual_grf_sizes;
237 int virtual_grf_count;
238 int virtual_grf_array_size;
239 int first_non_payload_grf;
240 unsigned int max_grf;
241 int *virtual_grf_start;
242 int *virtual_grf_end;
243 dst_reg userplane[MAX_CLIP_PLANES];
244
245 /**
246 * This is the size to be used for an array with an element per
247 * reg_offset
248 */
249 int virtual_grf_reg_count;
250 /** Per-virtual-grf indices into an array of size virtual_grf_reg_count */
251 int *virtual_grf_reg_map;
252
253 bool live_intervals_valid;
254
255 dst_reg *variable_storage(ir_variable *var);
256
257 void reladdr_to_temp(ir_instruction *ir, src_reg *reg, int *num_reladdr);
258
259 bool need_all_constants_in_pull_buffer;
260
261 /**
262 * \name Visit methods
263 *
264 * As typical for the visitor pattern, there must be one \c visit method for
265 * each concrete subclass of \c ir_instruction. Virtual base classes within
266 * the hierarchy should not have \c visit methods.
267 */
268 /*@{*/
269 virtual void visit(ir_variable *);
270 virtual void visit(ir_loop *);
271 virtual void visit(ir_loop_jump *);
272 virtual void visit(ir_function_signature *);
273 virtual void visit(ir_function *);
274 virtual void visit(ir_expression *);
275 virtual void visit(ir_swizzle *);
276 virtual void visit(ir_dereference_variable *);
277 virtual void visit(ir_dereference_array *);
278 virtual void visit(ir_dereference_record *);
279 virtual void visit(ir_assignment *);
280 virtual void visit(ir_constant *);
281 virtual void visit(ir_call *);
282 virtual void visit(ir_return *);
283 virtual void visit(ir_discard *);
284 virtual void visit(ir_texture *);
285 virtual void visit(ir_if *);
286 virtual void visit(ir_emit_vertex *);
287 virtual void visit(ir_end_primitive *);
288 /*@}*/
289
290 src_reg result;
291
292 /* Regs for vertex results. Generated at ir_variable visiting time
293 * for the ir->location's used.
294 */
295 dst_reg output_reg[BRW_VARYING_SLOT_COUNT];
296 const char *output_reg_annotation[BRW_VARYING_SLOT_COUNT];
297 int uniform_size[MAX_UNIFORMS];
298 int uniform_vector_size[MAX_UNIFORMS];
299 int uniforms;
300
301 src_reg shader_start_time;
302
303 struct hash_table *variable_ht;
304
305 bool run(void);
306 void fail(const char *msg, ...);
307
308 int virtual_grf_alloc(int size);
309 void setup_uniform_clipplane_values();
310 void setup_uniform_values(ir_variable *ir);
311 void setup_builtin_uniform_values(ir_variable *ir);
312 int setup_uniforms(int payload_reg);
313 void setup_payload();
314 bool reg_allocate_trivial();
315 bool reg_allocate();
316 void evaluate_spill_costs(float *spill_costs, bool *no_spill);
317 int choose_spill_reg(struct ra_graph *g);
318 void spill_reg(int spill_reg);
319 void move_grf_array_access_to_scratch();
320 void move_uniform_array_access_to_pull_constants();
321 void move_push_constants_to_pull_constants();
322 void split_uniform_registers();
323 void pack_uniform_registers();
324 void calculate_live_intervals();
325 void split_virtual_grfs();
326 bool dead_code_eliminate();
327 bool virtual_grf_interferes(int a, int b);
328 bool opt_copy_propagation();
329 bool opt_algebraic();
330 bool opt_register_coalesce();
331 void opt_set_dependency_control();
332 void opt_schedule_instructions();
333
334 bool can_do_source_mods(vec4_instruction *inst);
335
336 vec4_instruction *emit(vec4_instruction *inst);
337
338 vec4_instruction *emit(enum opcode opcode);
339
340 vec4_instruction *emit(enum opcode opcode, dst_reg dst, src_reg src0);
341
342 vec4_instruction *emit(enum opcode opcode, dst_reg dst,
343 src_reg src0, src_reg src1);
344
345 vec4_instruction *emit(enum opcode opcode, dst_reg dst,
346 src_reg src0, src_reg src1, src_reg src2);
347
348 vec4_instruction *emit_before(vec4_instruction *inst,
349 vec4_instruction *new_inst);
350
351 vec4_instruction *MOV(dst_reg dst, src_reg src0);
352 vec4_instruction *NOT(dst_reg dst, src_reg src0);
353 vec4_instruction *RNDD(dst_reg dst, src_reg src0);
354 vec4_instruction *RNDE(dst_reg dst, src_reg src0);
355 vec4_instruction *RNDZ(dst_reg dst, src_reg src0);
356 vec4_instruction *FRC(dst_reg dst, src_reg src0);
357 vec4_instruction *F32TO16(dst_reg dst, src_reg src0);
358 vec4_instruction *F16TO32(dst_reg dst, src_reg src0);
359 vec4_instruction *ADD(dst_reg dst, src_reg src0, src_reg src1);
360 vec4_instruction *MUL(dst_reg dst, src_reg src0, src_reg src1);
361 vec4_instruction *MACH(dst_reg dst, src_reg src0, src_reg src1);
362 vec4_instruction *MAC(dst_reg dst, src_reg src0, src_reg src1);
363 vec4_instruction *AND(dst_reg dst, src_reg src0, src_reg src1);
364 vec4_instruction *OR(dst_reg dst, src_reg src0, src_reg src1);
365 vec4_instruction *XOR(dst_reg dst, src_reg src0, src_reg src1);
366 vec4_instruction *DP3(dst_reg dst, src_reg src0, src_reg src1);
367 vec4_instruction *DP4(dst_reg dst, src_reg src0, src_reg src1);
368 vec4_instruction *DPH(dst_reg dst, src_reg src0, src_reg src1);
369 vec4_instruction *SHL(dst_reg dst, src_reg src0, src_reg src1);
370 vec4_instruction *SHR(dst_reg dst, src_reg src0, src_reg src1);
371 vec4_instruction *ASR(dst_reg dst, src_reg src0, src_reg src1);
372 vec4_instruction *CMP(dst_reg dst, src_reg src0, src_reg src1,
373 uint32_t condition);
374 vec4_instruction *IF(src_reg src0, src_reg src1, uint32_t condition);
375 vec4_instruction *IF(uint32_t predicate);
376 vec4_instruction *PULL_CONSTANT_LOAD(dst_reg dst, src_reg index);
377 vec4_instruction *SCRATCH_READ(dst_reg dst, src_reg index);
378 vec4_instruction *SCRATCH_WRITE(dst_reg dst, src_reg src, src_reg index);
379 vec4_instruction *LRP(dst_reg dst, src_reg a, src_reg y, src_reg x);
380 vec4_instruction *BFREV(dst_reg dst, src_reg value);
381 vec4_instruction *BFE(dst_reg dst, src_reg bits, src_reg offset, src_reg value);
382 vec4_instruction *BFI1(dst_reg dst, src_reg bits, src_reg offset);
383 vec4_instruction *BFI2(dst_reg dst, src_reg bfi1_dst, src_reg insert, src_reg base);
384 vec4_instruction *FBH(dst_reg dst, src_reg value);
385 vec4_instruction *FBL(dst_reg dst, src_reg value);
386 vec4_instruction *CBIT(dst_reg dst, src_reg value);
387
388 int implied_mrf_writes(vec4_instruction *inst);
389
390 bool try_rewrite_rhs_to_dst(ir_assignment *ir,
391 dst_reg dst,
392 src_reg src,
393 vec4_instruction *pre_rhs_inst,
394 vec4_instruction *last_rhs_inst);
395
396 bool try_copy_propagation(vec4_instruction *inst, int arg,
397 src_reg *values[4]);
398
399 /** Walks an exec_list of ir_instruction and sends it through this visitor. */
400 void visit_instructions(const exec_list *list);
401
402 void emit_vp_sop(uint32_t condmod, dst_reg dst,
403 src_reg src0, src_reg src1, src_reg one);
404
405 void emit_bool_to_cond_code(ir_rvalue *ir, uint32_t *predicate);
406 void emit_bool_comparison(unsigned int op, dst_reg dst, src_reg src0, src_reg src1);
407 void emit_if_gen6(ir_if *ir);
408
409 void emit_minmax(uint32_t condmod, dst_reg dst, src_reg src0, src_reg src1);
410
411 void emit_block_move(dst_reg *dst, src_reg *src,
412 const struct glsl_type *type, uint32_t predicate);
413
414 void emit_constant_values(dst_reg *dst, ir_constant *value);
415
416 /**
417 * Emit the correct dot-product instruction for the type of arguments
418 */
419 void emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements);
420
421 void emit_scalar(ir_instruction *ir, enum prog_opcode op,
422 dst_reg dst, src_reg src0);
423
424 void emit_scalar(ir_instruction *ir, enum prog_opcode op,
425 dst_reg dst, src_reg src0, src_reg src1);
426
427 void emit_scs(ir_instruction *ir, enum prog_opcode op,
428 dst_reg dst, const src_reg &src);
429
430 src_reg fix_3src_operand(src_reg src);
431
432 void emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src);
433 void emit_math1_gen4(enum opcode opcode, dst_reg dst, src_reg src);
434 void emit_math(enum opcode opcode, dst_reg dst, src_reg src);
435 void emit_math2_gen6(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1);
436 void emit_math2_gen4(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1);
437 void emit_math(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1);
438 src_reg fix_math_operand(src_reg src);
439
440 void emit_pack_half_2x16(dst_reg dst, src_reg src0);
441 void emit_unpack_half_2x16(dst_reg dst, src_reg src0);
442
443 void swizzle_result(ir_texture *ir, src_reg orig_val, int sampler);
444
445 void emit_ndc_computation();
446 void emit_psiz_and_flags(struct brw_reg reg);
447 void emit_clip_distances(struct brw_reg reg, int offset);
448 void emit_generic_urb_slot(dst_reg reg, int varying);
449 void emit_urb_slot(int mrf, int varying);
450
451 void emit_shader_time_begin();
452 void emit_shader_time_end();
453 void emit_shader_time_write(enum shader_time_shader_type type,
454 src_reg value);
455
456 src_reg get_scratch_offset(vec4_instruction *inst,
457 src_reg *reladdr, int reg_offset);
458 src_reg get_pull_constant_offset(vec4_instruction *inst,
459 src_reg *reladdr, int reg_offset);
460 void emit_scratch_read(vec4_instruction *inst,
461 dst_reg dst,
462 src_reg orig_src,
463 int base_offset);
464 void emit_scratch_write(vec4_instruction *inst,
465 int base_offset);
466 void emit_pull_constant_load(vec4_instruction *inst,
467 dst_reg dst,
468 src_reg orig_src,
469 int base_offset);
470
471 bool try_emit_sat(ir_expression *ir);
472 bool try_emit_mad(ir_expression *ir, int mul_arg);
473 void resolve_ud_negate(src_reg *reg);
474
475 src_reg get_timestamp();
476
477 bool process_move_condition(ir_rvalue *ir);
478
479 void dump_instruction(backend_instruction *inst);
480
481 protected:
482 void emit_vertex();
483 void lower_attributes_to_hw_regs(const int *attribute_map);
484 virtual dst_reg *make_reg_for_system_value(ir_variable *ir) = 0;
485 virtual int setup_attributes(int payload_reg) = 0;
486 virtual void emit_prolog() = 0;
487 virtual void emit_program_code() = 0;
488 virtual void emit_thread_end() = 0;
489 virtual void emit_urb_write_header(int mrf) = 0;
490 virtual vec4_instruction *emit_urb_write_opcode(bool complete) = 0;
491 virtual int compute_array_stride(ir_dereference_array *ir);
492
493 const bool debug_flag;
494 };
495
496 class vec4_vs_visitor : public vec4_visitor
497 {
498 public:
499 vec4_vs_visitor(struct brw_context *brw,
500 struct brw_vs_compile *vs_compile,
501 struct brw_vs_prog_data *vs_prog_data,
502 struct gl_shader_program *prog,
503 struct brw_shader *shader,
504 void *mem_ctx);
505
506 protected:
507 virtual dst_reg *make_reg_for_system_value(ir_variable *ir);
508 virtual int setup_attributes(int payload_reg);
509 virtual void emit_prolog();
510 virtual void emit_program_code();
511 virtual void emit_thread_end();
512 virtual void emit_urb_write_header(int mrf);
513 virtual vec4_instruction *emit_urb_write_opcode(bool complete);
514
515 private:
516 void setup_vp_regs();
517 dst_reg get_vp_dst_reg(const prog_dst_register &dst);
518 src_reg get_vp_src_reg(const prog_src_register &src);
519
520 struct brw_vs_compile * const vs_compile;
521 struct brw_vs_prog_data * const vs_prog_data;
522 src_reg *vp_temp_regs;
523 src_reg vp_addr_reg;
524 };
525
526 /**
527 * The vertex shader code generator.
528 *
529 * Translates VS IR to actual i965 assembly code.
530 */
531 class vec4_generator
532 {
533 public:
534 vec4_generator(struct brw_context *brw,
535 struct gl_shader_program *shader_prog,
536 struct gl_program *prog,
537 void *mem_ctx,
538 bool debug_flag);
539 ~vec4_generator();
540
541 const unsigned *generate_assembly(exec_list *insts, unsigned *asm_size);
542
543 private:
544 void generate_code(exec_list *instructions);
545 void generate_vec4_instruction(vec4_instruction *inst,
546 struct brw_reg dst,
547 struct brw_reg *src);
548
549 void generate_math1_gen4(vec4_instruction *inst,
550 struct brw_reg dst,
551 struct brw_reg src);
552 void generate_math1_gen6(vec4_instruction *inst,
553 struct brw_reg dst,
554 struct brw_reg src);
555 void generate_math2_gen4(vec4_instruction *inst,
556 struct brw_reg dst,
557 struct brw_reg src0,
558 struct brw_reg src1);
559 void generate_math2_gen6(vec4_instruction *inst,
560 struct brw_reg dst,
561 struct brw_reg src0,
562 struct brw_reg src1);
563 void generate_math2_gen7(vec4_instruction *inst,
564 struct brw_reg dst,
565 struct brw_reg src0,
566 struct brw_reg src1);
567
568 void generate_tex(vec4_instruction *inst,
569 struct brw_reg dst,
570 struct brw_reg src);
571
572 void generate_urb_write(vec4_instruction *inst);
573 void generate_oword_dual_block_offsets(struct brw_reg m1,
574 struct brw_reg index);
575 void generate_scratch_write(vec4_instruction *inst,
576 struct brw_reg dst,
577 struct brw_reg src,
578 struct brw_reg index);
579 void generate_scratch_read(vec4_instruction *inst,
580 struct brw_reg dst,
581 struct brw_reg index);
582 void generate_pull_constant_load(vec4_instruction *inst,
583 struct brw_reg dst,
584 struct brw_reg index,
585 struct brw_reg offset);
586 void generate_pull_constant_load_gen7(vec4_instruction *inst,
587 struct brw_reg dst,
588 struct brw_reg surf_index,
589 struct brw_reg offset);
590
591 struct brw_context *brw;
592 struct gl_context *ctx;
593
594 struct brw_compile *p;
595
596 struct gl_shader_program *shader_prog;
597 struct gl_shader *shader;
598 const struct gl_program *prog;
599
600 void *mem_ctx;
601 const bool debug_flag;
602 };
603
604 } /* namespace brw */
605
606 #endif /* BRW_VEC4_H */