i965/gen7: Implement code generation for untyped surface read instructions.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_eu.h
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #ifndef BRW_EU_H
34 #define BRW_EU_H
35
36 #include <stdbool.h>
37 #include "brw_structs.h"
38 #include "brw_defines.h"
39 #include "brw_reg.h"
40 #include "program/prog_instruction.h"
41
42 #ifdef __cplusplus
43 extern "C" {
44 #endif
45
46 #define BRW_EU_MAX_INSN_STACK 5
47
48 struct brw_compile {
49 struct brw_instruction *store;
50 int store_size;
51 GLuint nr_insn;
52 unsigned int next_insn_offset;
53
54 void *mem_ctx;
55
56 /* Allow clients to push/pop instruction state:
57 */
58 struct brw_instruction stack[BRW_EU_MAX_INSN_STACK];
59 bool compressed_stack[BRW_EU_MAX_INSN_STACK];
60 struct brw_instruction *current;
61
62 GLuint flag_value;
63 bool single_program_flow;
64 bool compressed;
65 struct brw_context *brw;
66
67 /* Control flow stacks:
68 * - if_stack contains IF and ELSE instructions which must be patched
69 * (and popped) once the matching ENDIF instruction is encountered.
70 *
71 * Just store the instruction pointer(an index).
72 */
73 int *if_stack;
74 int if_stack_depth;
75 int if_stack_array_size;
76
77 /**
78 * loop_stack contains the instruction pointers of the starts of loops which
79 * must be patched (and popped) once the matching WHILE instruction is
80 * encountered.
81 */
82 int *loop_stack;
83 /**
84 * pre-gen6, the BREAK and CONT instructions had to tell how many IF/ENDIF
85 * blocks they were popping out of, to fix up the mask stack. This tracks
86 * the IF/ENDIF nesting in each current nested loop level.
87 */
88 int *if_depth_in_loop;
89 int loop_stack_depth;
90 int loop_stack_array_size;
91 };
92
93 static INLINE struct brw_instruction *current_insn( struct brw_compile *p)
94 {
95 return &p->store[p->nr_insn];
96 }
97
98 void brw_pop_insn_state( struct brw_compile *p );
99 void brw_push_insn_state( struct brw_compile *p );
100 void brw_set_mask_control( struct brw_compile *p, GLuint value );
101 void brw_set_saturate( struct brw_compile *p, bool enable );
102 void brw_set_access_mode( struct brw_compile *p, GLuint access_mode );
103 void brw_set_compression_control(struct brw_compile *p, enum brw_compression c);
104 void brw_set_predicate_control_flag_value( struct brw_compile *p, GLuint value );
105 void brw_set_predicate_control( struct brw_compile *p, GLuint pc );
106 void brw_set_predicate_inverse(struct brw_compile *p, bool predicate_inverse);
107 void brw_set_conditionalmod( struct brw_compile *p, GLuint conditional );
108 void brw_set_flag_reg(struct brw_compile *p, int reg, int subreg);
109 void brw_set_acc_write_control(struct brw_compile *p, GLuint value);
110
111 void brw_init_compile(struct brw_context *, struct brw_compile *p,
112 void *mem_ctx);
113 void brw_dump_compile(struct brw_compile *p, FILE *out, int start, int end);
114 const GLuint *brw_get_program( struct brw_compile *p, GLuint *sz );
115
116 struct brw_instruction *brw_next_insn(struct brw_compile *p, GLuint opcode);
117 void brw_set_dest(struct brw_compile *p, struct brw_instruction *insn,
118 struct brw_reg dest);
119 void brw_set_src0(struct brw_compile *p, struct brw_instruction *insn,
120 struct brw_reg reg);
121
122 void gen6_resolve_implied_move(struct brw_compile *p,
123 struct brw_reg *src,
124 GLuint msg_reg_nr);
125
126 /* Helpers for regular instructions:
127 */
128 #define ALU1(OP) \
129 struct brw_instruction *brw_##OP(struct brw_compile *p, \
130 struct brw_reg dest, \
131 struct brw_reg src0);
132
133 #define ALU2(OP) \
134 struct brw_instruction *brw_##OP(struct brw_compile *p, \
135 struct brw_reg dest, \
136 struct brw_reg src0, \
137 struct brw_reg src1);
138
139 #define ALU3(OP) \
140 struct brw_instruction *brw_##OP(struct brw_compile *p, \
141 struct brw_reg dest, \
142 struct brw_reg src0, \
143 struct brw_reg src1, \
144 struct brw_reg src2);
145
146 #define ROUND(OP) \
147 void brw_##OP(struct brw_compile *p, struct brw_reg dest, struct brw_reg src0);
148
149 ALU1(MOV)
150 ALU2(SEL)
151 ALU1(NOT)
152 ALU2(AND)
153 ALU2(OR)
154 ALU2(XOR)
155 ALU2(SHR)
156 ALU2(SHL)
157 ALU2(ASR)
158 ALU1(F32TO16)
159 ALU1(F16TO32)
160 ALU2(JMPI)
161 ALU2(ADD)
162 ALU2(AVG)
163 ALU2(MUL)
164 ALU1(FRC)
165 ALU1(RNDD)
166 ALU2(MAC)
167 ALU2(MACH)
168 ALU1(LZD)
169 ALU2(DP4)
170 ALU2(DPH)
171 ALU2(DP3)
172 ALU2(DP2)
173 ALU2(LINE)
174 ALU2(PLN)
175 ALU3(MAD)
176 ALU3(LRP)
177 ALU1(BFREV)
178 ALU3(BFE)
179 ALU2(BFI1)
180 ALU3(BFI2)
181 ALU1(FBH)
182 ALU1(FBL)
183 ALU1(CBIT)
184 ALU2(ADDC)
185 ALU2(SUBB)
186
187 ROUND(RNDZ)
188 ROUND(RNDE)
189
190 #undef ALU1
191 #undef ALU2
192 #undef ALU3
193 #undef ROUND
194
195
196 /* Helpers for SEND instruction:
197 */
198 void brw_set_sampler_message(struct brw_compile *p,
199 struct brw_instruction *insn,
200 GLuint binding_table_index,
201 GLuint sampler,
202 GLuint msg_type,
203 GLuint response_length,
204 GLuint msg_length,
205 GLuint header_present,
206 GLuint simd_mode,
207 GLuint return_format);
208
209 void brw_set_dp_read_message(struct brw_compile *p,
210 struct brw_instruction *insn,
211 GLuint binding_table_index,
212 GLuint msg_control,
213 GLuint msg_type,
214 GLuint target_cache,
215 GLuint msg_length,
216 bool header_present,
217 GLuint response_length);
218
219 void brw_set_dp_write_message(struct brw_compile *p,
220 struct brw_instruction *insn,
221 GLuint binding_table_index,
222 GLuint msg_control,
223 GLuint msg_type,
224 GLuint msg_length,
225 bool header_present,
226 GLuint last_render_target,
227 GLuint response_length,
228 GLuint end_of_thread,
229 GLuint send_commit_msg);
230
231 enum brw_urb_write_flags {
232 BRW_URB_WRITE_NO_FLAGS = 0,
233
234 /**
235 * Causes a new URB entry to be allocated, and its address stored in the
236 * destination register (gen < 7).
237 */
238 BRW_URB_WRITE_ALLOCATE = 0x1,
239
240 /**
241 * Causes the current URB entry to be deallocated (gen < 7).
242 */
243 BRW_URB_WRITE_UNUSED = 0x2,
244
245 /**
246 * Causes the thread to terminate.
247 */
248 BRW_URB_WRITE_EOT = 0x4,
249
250 /**
251 * Indicates that the given URB entry is complete, and may be sent further
252 * down the 3D pipeline (gen < 7).
253 */
254 BRW_URB_WRITE_COMPLETE = 0x8,
255
256 /**
257 * Indicates that an additional offset (which may be different for the two
258 * vec4 slots) is stored in the message header (gen == 7).
259 */
260 BRW_URB_WRITE_PER_SLOT_OFFSET = 0x10,
261
262 /**
263 * Indicates that the channel masks in the URB_WRITE message header should
264 * not be overridden to 0xff (gen == 7).
265 */
266 BRW_URB_WRITE_USE_CHANNEL_MASKS = 0x20,
267
268 /**
269 * Indicates that the data should be sent to the URB using the
270 * URB_WRITE_OWORD message rather than URB_WRITE_HWORD (gen == 7). This
271 * causes offsets to be interpreted as multiples of an OWORD instead of an
272 * HWORD, and only allows one OWORD to be written.
273 */
274 BRW_URB_WRITE_OWORD = 0x40,
275
276 /**
277 * Convenient combination of flags: end the thread while simultaneously
278 * marking the given URB entry as complete.
279 */
280 BRW_URB_WRITE_EOT_COMPLETE = BRW_URB_WRITE_EOT | BRW_URB_WRITE_COMPLETE,
281
282 /**
283 * Convenient combination of flags: mark the given URB entry as complete
284 * and simultaneously allocate a new one.
285 */
286 BRW_URB_WRITE_ALLOCATE_COMPLETE =
287 BRW_URB_WRITE_ALLOCATE | BRW_URB_WRITE_COMPLETE,
288 };
289
290 #ifdef __cplusplus
291 /**
292 * Allow brw_urb_write_flags enums to be ORed together.
293 */
294 inline brw_urb_write_flags
295 operator|(brw_urb_write_flags x, brw_urb_write_flags y)
296 {
297 return static_cast<brw_urb_write_flags>(static_cast<int>(x) |
298 static_cast<int>(y));
299 }
300 #endif
301
302 void brw_urb_WRITE(struct brw_compile *p,
303 struct brw_reg dest,
304 GLuint msg_reg_nr,
305 struct brw_reg src0,
306 enum brw_urb_write_flags flags,
307 GLuint msg_length,
308 GLuint response_length,
309 GLuint offset,
310 GLuint swizzle);
311
312 void brw_ff_sync(struct brw_compile *p,
313 struct brw_reg dest,
314 GLuint msg_reg_nr,
315 struct brw_reg src0,
316 bool allocate,
317 GLuint response_length,
318 bool eot);
319
320 void brw_svb_write(struct brw_compile *p,
321 struct brw_reg dest,
322 GLuint msg_reg_nr,
323 struct brw_reg src0,
324 GLuint binding_table_index,
325 bool send_commit_msg);
326
327 void brw_fb_WRITE(struct brw_compile *p,
328 int dispatch_width,
329 GLuint msg_reg_nr,
330 struct brw_reg src0,
331 GLuint msg_control,
332 GLuint binding_table_index,
333 GLuint msg_length,
334 GLuint response_length,
335 bool eot,
336 bool header_present);
337
338 void brw_SAMPLE(struct brw_compile *p,
339 struct brw_reg dest,
340 GLuint msg_reg_nr,
341 struct brw_reg src0,
342 GLuint binding_table_index,
343 GLuint sampler,
344 GLuint msg_type,
345 GLuint response_length,
346 GLuint msg_length,
347 GLuint header_present,
348 GLuint simd_mode,
349 GLuint return_format);
350
351 void brw_math( struct brw_compile *p,
352 struct brw_reg dest,
353 GLuint function,
354 GLuint msg_reg_nr,
355 struct brw_reg src,
356 GLuint data_type,
357 GLuint precision );
358
359 void brw_math2(struct brw_compile *p,
360 struct brw_reg dest,
361 GLuint function,
362 struct brw_reg src0,
363 struct brw_reg src1);
364
365 void brw_oword_block_read(struct brw_compile *p,
366 struct brw_reg dest,
367 struct brw_reg mrf,
368 uint32_t offset,
369 uint32_t bind_table_index);
370
371 void brw_oword_block_read_scratch(struct brw_compile *p,
372 struct brw_reg dest,
373 struct brw_reg mrf,
374 int num_regs,
375 GLuint offset);
376
377 void brw_oword_block_write_scratch(struct brw_compile *p,
378 struct brw_reg mrf,
379 int num_regs,
380 GLuint offset);
381
382 void brw_shader_time_add(struct brw_compile *p,
383 struct brw_reg payload,
384 uint32_t surf_index);
385
386 /* If/else/endif. Works by manipulating the execution flags on each
387 * channel.
388 */
389 struct brw_instruction *brw_IF(struct brw_compile *p,
390 GLuint execute_size);
391 struct brw_instruction *gen6_IF(struct brw_compile *p, uint32_t conditional,
392 struct brw_reg src0, struct brw_reg src1);
393
394 void brw_ELSE(struct brw_compile *p);
395 void brw_ENDIF(struct brw_compile *p);
396
397 /* DO/WHILE loops:
398 */
399 struct brw_instruction *brw_DO(struct brw_compile *p,
400 GLuint execute_size);
401
402 struct brw_instruction *brw_WHILE(struct brw_compile *p);
403
404 struct brw_instruction *brw_BREAK(struct brw_compile *p);
405 struct brw_instruction *brw_CONT(struct brw_compile *p);
406 struct brw_instruction *gen6_CONT(struct brw_compile *p);
407 struct brw_instruction *gen6_HALT(struct brw_compile *p);
408 /* Forward jumps:
409 */
410 void brw_land_fwd_jump(struct brw_compile *p, int jmp_insn_idx);
411
412
413
414 void brw_NOP(struct brw_compile *p);
415
416 void brw_WAIT(struct brw_compile *p);
417
418 /* Special case: there is never a destination, execution size will be
419 * taken from src0:
420 */
421 void brw_CMP(struct brw_compile *p,
422 struct brw_reg dest,
423 GLuint conditional,
424 struct brw_reg src0,
425 struct brw_reg src1);
426
427 void
428 brw_untyped_atomic(struct brw_compile *p,
429 struct brw_reg dest,
430 struct brw_reg mrf,
431 GLuint atomic_op,
432 GLuint bind_table_index,
433 GLuint msg_length,
434 GLuint response_length);
435
436 void
437 brw_untyped_surface_read(struct brw_compile *p,
438 struct brw_reg dest,
439 struct brw_reg mrf,
440 GLuint bind_table_index,
441 GLuint msg_length,
442 GLuint response_length);
443
444 /***********************************************************************
445 * brw_eu_util.c:
446 */
447
448 void brw_copy_indirect_to_indirect(struct brw_compile *p,
449 struct brw_indirect dst_ptr,
450 struct brw_indirect src_ptr,
451 GLuint count);
452
453 void brw_copy_from_indirect(struct brw_compile *p,
454 struct brw_reg dst,
455 struct brw_indirect ptr,
456 GLuint count);
457
458 void brw_copy4(struct brw_compile *p,
459 struct brw_reg dst,
460 struct brw_reg src,
461 GLuint count);
462
463 void brw_copy8(struct brw_compile *p,
464 struct brw_reg dst,
465 struct brw_reg src,
466 GLuint count);
467
468 void brw_math_invert( struct brw_compile *p,
469 struct brw_reg dst,
470 struct brw_reg src);
471
472 void brw_set_src1(struct brw_compile *p,
473 struct brw_instruction *insn,
474 struct brw_reg reg);
475
476 void brw_set_uip_jip(struct brw_compile *p);
477
478 uint32_t brw_swap_cmod(uint32_t cmod);
479
480 /* brw_eu_compact.c */
481 void brw_init_compaction_tables(struct brw_context *brw);
482 void brw_compact_instructions(struct brw_compile *p);
483 void brw_uncompact_instruction(struct brw_context *brw,
484 struct brw_instruction *dst,
485 struct brw_compact_instruction *src);
486 bool brw_try_compact_instruction(struct brw_compile *p,
487 struct brw_compact_instruction *dst,
488 struct brw_instruction *src);
489
490 void brw_debug_compact_uncompact(struct brw_context *brw,
491 struct brw_instruction *orig,
492 struct brw_instruction *uncompacted);
493
494 #ifdef __cplusplus
495 }
496 #endif
497
498 #endif