i965/vec4: Only zero out unused message components when there are any.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_eu.h
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #ifndef BRW_EU_H
34 #define BRW_EU_H
35
36 #include <stdbool.h>
37 #include "brw_structs.h"
38 #include "brw_defines.h"
39 #include "brw_reg.h"
40 #include "program/prog_instruction.h"
41
42 #ifdef __cplusplus
43 extern "C" {
44 #endif
45
46 #define BRW_EU_MAX_INSN_STACK 5
47
48 struct brw_compile {
49 struct brw_instruction *store;
50 int store_size;
51 GLuint nr_insn;
52 unsigned int next_insn_offset;
53
54 void *mem_ctx;
55
56 /* Allow clients to push/pop instruction state:
57 */
58 struct brw_instruction stack[BRW_EU_MAX_INSN_STACK];
59 bool compressed_stack[BRW_EU_MAX_INSN_STACK];
60 struct brw_instruction *current;
61
62 GLuint flag_value;
63 bool single_program_flow;
64 bool compressed;
65 struct brw_context *brw;
66
67 /* Control flow stacks:
68 * - if_stack contains IF and ELSE instructions which must be patched
69 * (and popped) once the matching ENDIF instruction is encountered.
70 *
71 * Just store the instruction pointer(an index).
72 */
73 int *if_stack;
74 int if_stack_depth;
75 int if_stack_array_size;
76
77 /**
78 * loop_stack contains the instruction pointers of the starts of loops which
79 * must be patched (and popped) once the matching WHILE instruction is
80 * encountered.
81 */
82 int *loop_stack;
83 /**
84 * pre-gen6, the BREAK and CONT instructions had to tell how many IF/ENDIF
85 * blocks they were popping out of, to fix up the mask stack. This tracks
86 * the IF/ENDIF nesting in each current nested loop level.
87 */
88 int *if_depth_in_loop;
89 int loop_stack_depth;
90 int loop_stack_array_size;
91 };
92
93 static INLINE struct brw_instruction *current_insn( struct brw_compile *p)
94 {
95 return &p->store[p->nr_insn];
96 }
97
98 void brw_pop_insn_state( struct brw_compile *p );
99 void brw_push_insn_state( struct brw_compile *p );
100 void brw_set_mask_control( struct brw_compile *p, GLuint value );
101 void brw_set_saturate( struct brw_compile *p, bool enable );
102 void brw_set_access_mode( struct brw_compile *p, GLuint access_mode );
103 void brw_set_compression_control(struct brw_compile *p, enum brw_compression c);
104 void brw_set_predicate_control_flag_value( struct brw_compile *p, GLuint value );
105 void brw_set_predicate_control( struct brw_compile *p, GLuint pc );
106 void brw_set_predicate_inverse(struct brw_compile *p, bool predicate_inverse);
107 void brw_set_conditionalmod( struct brw_compile *p, GLuint conditional );
108 void brw_set_flag_reg(struct brw_compile *p, int reg, int subreg);
109 void brw_set_acc_write_control(struct brw_compile *p, GLuint value);
110
111 void brw_init_compile(struct brw_context *, struct brw_compile *p,
112 void *mem_ctx);
113 void brw_dump_compile(struct brw_compile *p, FILE *out, int start, int end);
114 const GLuint *brw_get_program( struct brw_compile *p, GLuint *sz );
115
116 struct brw_instruction *brw_next_insn(struct brw_compile *p, GLuint opcode);
117 void brw_set_dest(struct brw_compile *p, struct brw_instruction *insn,
118 struct brw_reg dest);
119 void brw_set_src0(struct brw_compile *p, struct brw_instruction *insn,
120 struct brw_reg reg);
121
122 void gen6_resolve_implied_move(struct brw_compile *p,
123 struct brw_reg *src,
124 GLuint msg_reg_nr);
125
126 /* Helpers for regular instructions:
127 */
128 #define ALU1(OP) \
129 struct brw_instruction *brw_##OP(struct brw_compile *p, \
130 struct brw_reg dest, \
131 struct brw_reg src0);
132
133 #define ALU2(OP) \
134 struct brw_instruction *brw_##OP(struct brw_compile *p, \
135 struct brw_reg dest, \
136 struct brw_reg src0, \
137 struct brw_reg src1);
138
139 #define ALU3(OP) \
140 struct brw_instruction *brw_##OP(struct brw_compile *p, \
141 struct brw_reg dest, \
142 struct brw_reg src0, \
143 struct brw_reg src1, \
144 struct brw_reg src2);
145
146 #define ROUND(OP) \
147 void brw_##OP(struct brw_compile *p, struct brw_reg dest, struct brw_reg src0);
148
149 ALU1(MOV)
150 ALU2(SEL)
151 ALU1(NOT)
152 ALU2(AND)
153 ALU2(OR)
154 ALU2(XOR)
155 ALU2(SHR)
156 ALU2(SHL)
157 ALU2(ASR)
158 ALU1(F32TO16)
159 ALU1(F16TO32)
160 ALU2(JMPI)
161 ALU2(ADD)
162 ALU2(AVG)
163 ALU2(MUL)
164 ALU1(FRC)
165 ALU1(RNDD)
166 ALU2(MAC)
167 ALU2(MACH)
168 ALU1(LZD)
169 ALU2(DP4)
170 ALU2(DPH)
171 ALU2(DP3)
172 ALU2(DP2)
173 ALU2(LINE)
174 ALU2(PLN)
175 ALU3(MAD)
176 ALU3(LRP)
177 ALU1(BFREV)
178 ALU3(BFE)
179 ALU2(BFI1)
180 ALU3(BFI2)
181 ALU1(FBH)
182 ALU1(FBL)
183 ALU1(CBIT)
184
185 ROUND(RNDZ)
186 ROUND(RNDE)
187
188 #undef ALU1
189 #undef ALU2
190 #undef ALU3
191 #undef ROUND
192
193
194 /* Helpers for SEND instruction:
195 */
196 void brw_set_sampler_message(struct brw_compile *p,
197 struct brw_instruction *insn,
198 GLuint binding_table_index,
199 GLuint sampler,
200 GLuint msg_type,
201 GLuint response_length,
202 GLuint msg_length,
203 GLuint header_present,
204 GLuint simd_mode,
205 GLuint return_format);
206
207 void brw_set_dp_read_message(struct brw_compile *p,
208 struct brw_instruction *insn,
209 GLuint binding_table_index,
210 GLuint msg_control,
211 GLuint msg_type,
212 GLuint target_cache,
213 GLuint msg_length,
214 bool header_present,
215 GLuint response_length);
216
217 void brw_set_dp_write_message(struct brw_compile *p,
218 struct brw_instruction *insn,
219 GLuint binding_table_index,
220 GLuint msg_control,
221 GLuint msg_type,
222 GLuint msg_length,
223 bool header_present,
224 GLuint last_render_target,
225 GLuint response_length,
226 GLuint end_of_thread,
227 GLuint send_commit_msg);
228
229 enum brw_urb_write_flags {
230 BRW_URB_WRITE_NO_FLAGS = 0,
231
232 /**
233 * Causes a new URB entry to be allocated, and its address stored in the
234 * destination register (gen < 7).
235 */
236 BRW_URB_WRITE_ALLOCATE = 0x1,
237
238 /**
239 * Causes the current URB entry to be deallocated (gen < 7).
240 */
241 BRW_URB_WRITE_UNUSED = 0x2,
242
243 /**
244 * Causes the thread to terminate.
245 */
246 BRW_URB_WRITE_EOT = 0x4,
247
248 /**
249 * Indicates that the given URB entry is complete, and may be sent further
250 * down the 3D pipeline (gen < 7).
251 */
252 BRW_URB_WRITE_COMPLETE = 0x8,
253
254 /**
255 * Indicates that an additional offset (which may be different for the two
256 * vec4 slots) is stored in the message header (gen == 7).
257 */
258 BRW_URB_WRITE_PER_SLOT_OFFSET = 0x10,
259
260 /**
261 * Convenient combination of flags: end the thread while simultaneously
262 * marking the given URB entry as complete.
263 */
264 BRW_URB_WRITE_EOT_COMPLETE = BRW_URB_WRITE_EOT | BRW_URB_WRITE_COMPLETE,
265
266 /**
267 * Convenient combination of flags: mark the given URB entry as complete
268 * and simultaneously allocate a new one.
269 */
270 BRW_URB_WRITE_ALLOCATE_COMPLETE =
271 BRW_URB_WRITE_ALLOCATE | BRW_URB_WRITE_COMPLETE,
272 };
273
274 #ifdef __cplusplus
275 /**
276 * Allow brw_urb_write_flags enums to be ORed together.
277 */
278 inline brw_urb_write_flags
279 operator|(brw_urb_write_flags x, brw_urb_write_flags y)
280 {
281 return static_cast<brw_urb_write_flags>(static_cast<int>(x) |
282 static_cast<int>(y));
283 }
284 #endif
285
286 void brw_urb_WRITE(struct brw_compile *p,
287 struct brw_reg dest,
288 GLuint msg_reg_nr,
289 struct brw_reg src0,
290 enum brw_urb_write_flags flags,
291 GLuint msg_length,
292 GLuint response_length,
293 GLuint offset,
294 GLuint swizzle);
295
296 void brw_ff_sync(struct brw_compile *p,
297 struct brw_reg dest,
298 GLuint msg_reg_nr,
299 struct brw_reg src0,
300 bool allocate,
301 GLuint response_length,
302 bool eot);
303
304 void brw_svb_write(struct brw_compile *p,
305 struct brw_reg dest,
306 GLuint msg_reg_nr,
307 struct brw_reg src0,
308 GLuint binding_table_index,
309 bool send_commit_msg);
310
311 void brw_fb_WRITE(struct brw_compile *p,
312 int dispatch_width,
313 GLuint msg_reg_nr,
314 struct brw_reg src0,
315 GLuint msg_control,
316 GLuint binding_table_index,
317 GLuint msg_length,
318 GLuint response_length,
319 bool eot,
320 bool header_present);
321
322 void brw_SAMPLE(struct brw_compile *p,
323 struct brw_reg dest,
324 GLuint msg_reg_nr,
325 struct brw_reg src0,
326 GLuint binding_table_index,
327 GLuint sampler,
328 GLuint msg_type,
329 GLuint response_length,
330 GLuint msg_length,
331 GLuint header_present,
332 GLuint simd_mode,
333 GLuint return_format);
334
335 void brw_math( struct brw_compile *p,
336 struct brw_reg dest,
337 GLuint function,
338 GLuint msg_reg_nr,
339 struct brw_reg src,
340 GLuint data_type,
341 GLuint precision );
342
343 void brw_math2(struct brw_compile *p,
344 struct brw_reg dest,
345 GLuint function,
346 struct brw_reg src0,
347 struct brw_reg src1);
348
349 void brw_oword_block_read(struct brw_compile *p,
350 struct brw_reg dest,
351 struct brw_reg mrf,
352 uint32_t offset,
353 uint32_t bind_table_index);
354
355 void brw_oword_block_read_scratch(struct brw_compile *p,
356 struct brw_reg dest,
357 struct brw_reg mrf,
358 int num_regs,
359 GLuint offset);
360
361 void brw_oword_block_write_scratch(struct brw_compile *p,
362 struct brw_reg mrf,
363 int num_regs,
364 GLuint offset);
365
366 void brw_shader_time_add(struct brw_compile *p,
367 struct brw_reg payload,
368 uint32_t surf_index);
369
370 /* If/else/endif. Works by manipulating the execution flags on each
371 * channel.
372 */
373 struct brw_instruction *brw_IF(struct brw_compile *p,
374 GLuint execute_size);
375 struct brw_instruction *gen6_IF(struct brw_compile *p, uint32_t conditional,
376 struct brw_reg src0, struct brw_reg src1);
377
378 void brw_ELSE(struct brw_compile *p);
379 void brw_ENDIF(struct brw_compile *p);
380
381 /* DO/WHILE loops:
382 */
383 struct brw_instruction *brw_DO(struct brw_compile *p,
384 GLuint execute_size);
385
386 struct brw_instruction *brw_WHILE(struct brw_compile *p);
387
388 struct brw_instruction *brw_BREAK(struct brw_compile *p);
389 struct brw_instruction *brw_CONT(struct brw_compile *p);
390 struct brw_instruction *gen6_CONT(struct brw_compile *p);
391 struct brw_instruction *gen6_HALT(struct brw_compile *p);
392 /* Forward jumps:
393 */
394 void brw_land_fwd_jump(struct brw_compile *p, int jmp_insn_idx);
395
396
397
398 void brw_NOP(struct brw_compile *p);
399
400 void brw_WAIT(struct brw_compile *p);
401
402 /* Special case: there is never a destination, execution size will be
403 * taken from src0:
404 */
405 void brw_CMP(struct brw_compile *p,
406 struct brw_reg dest,
407 GLuint conditional,
408 struct brw_reg src0,
409 struct brw_reg src1);
410
411 /***********************************************************************
412 * brw_eu_util.c:
413 */
414
415 void brw_copy_indirect_to_indirect(struct brw_compile *p,
416 struct brw_indirect dst_ptr,
417 struct brw_indirect src_ptr,
418 GLuint count);
419
420 void brw_copy_from_indirect(struct brw_compile *p,
421 struct brw_reg dst,
422 struct brw_indirect ptr,
423 GLuint count);
424
425 void brw_copy4(struct brw_compile *p,
426 struct brw_reg dst,
427 struct brw_reg src,
428 GLuint count);
429
430 void brw_copy8(struct brw_compile *p,
431 struct brw_reg dst,
432 struct brw_reg src,
433 GLuint count);
434
435 void brw_math_invert( struct brw_compile *p,
436 struct brw_reg dst,
437 struct brw_reg src);
438
439 void brw_set_src1(struct brw_compile *p,
440 struct brw_instruction *insn,
441 struct brw_reg reg);
442
443 void brw_set_uip_jip(struct brw_compile *p);
444
445 uint32_t brw_swap_cmod(uint32_t cmod);
446
447 /* brw_eu_compact.c */
448 void brw_init_compaction_tables(struct brw_context *brw);
449 void brw_compact_instructions(struct brw_compile *p);
450 void brw_uncompact_instruction(struct brw_context *brw,
451 struct brw_instruction *dst,
452 struct brw_compact_instruction *src);
453 bool brw_try_compact_instruction(struct brw_compile *p,
454 struct brw_compact_instruction *dst,
455 struct brw_instruction *src);
456
457 void brw_debug_compact_uncompact(struct brw_context *brw,
458 struct brw_instruction *orig,
459 struct brw_instruction *uncompacted);
460
461 #ifdef __cplusplus
462 }
463 #endif
464
465 #endif