r300g: Fix recursive Draw flush.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_eu_emit.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "brw_context.h"
34 #include "brw_defines.h"
35 #include "brw_eu.h"
36
37
38
39
40 /***********************************************************************
41 * Internal helper for constructing instructions
42 */
43
44 static void guess_execution_size( struct brw_instruction *insn,
45 struct brw_reg reg )
46 {
47 if (reg.width == BRW_WIDTH_8 &&
48 insn->header.compression_control == BRW_COMPRESSION_COMPRESSED)
49 insn->header.execution_size = BRW_EXECUTE_16;
50 else
51 insn->header.execution_size = reg.width; /* note - definitions are compatible */
52 }
53
54
55 static void brw_set_dest( struct brw_instruction *insn,
56 struct brw_reg dest )
57 {
58 if (dest.type != BRW_ARCHITECTURE_REGISTER_FILE)
59 assert(dest.nr < 128);
60
61 insn->bits1.da1.dest_reg_file = dest.file;
62 insn->bits1.da1.dest_reg_type = dest.type;
63 insn->bits1.da1.dest_address_mode = dest.address_mode;
64
65 if (dest.address_mode == BRW_ADDRESS_DIRECT) {
66 insn->bits1.da1.dest_reg_nr = dest.nr;
67
68 if (insn->header.access_mode == BRW_ALIGN_1) {
69 insn->bits1.da1.dest_subreg_nr = dest.subnr;
70 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
71 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
72 insn->bits1.da1.dest_horiz_stride = dest.hstride;
73 }
74 else {
75 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
76 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
77 }
78 }
79 else {
80 insn->bits1.ia1.dest_subreg_nr = dest.subnr;
81
82 /* These are different sizes in align1 vs align16:
83 */
84 if (insn->header.access_mode == BRW_ALIGN_1) {
85 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
86 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
87 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
88 insn->bits1.ia1.dest_horiz_stride = dest.hstride;
89 }
90 else {
91 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
92 }
93 }
94
95 /* NEW: Set the execution size based on dest.width and
96 * insn->compression_control:
97 */
98 guess_execution_size(insn, dest);
99 }
100
101 static void brw_set_src0( struct brw_instruction *insn,
102 struct brw_reg reg )
103 {
104 assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
105
106 if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
107 assert(reg.nr < 128);
108
109 insn->bits1.da1.src0_reg_file = reg.file;
110 insn->bits1.da1.src0_reg_type = reg.type;
111 insn->bits2.da1.src0_abs = reg.abs;
112 insn->bits2.da1.src0_negate = reg.negate;
113 insn->bits2.da1.src0_address_mode = reg.address_mode;
114
115 if (reg.file == BRW_IMMEDIATE_VALUE) {
116 insn->bits3.ud = reg.dw1.ud;
117
118 /* Required to set some fields in src1 as well:
119 */
120 insn->bits1.da1.src1_reg_file = 0; /* arf */
121 insn->bits1.da1.src1_reg_type = reg.type;
122 }
123 else
124 {
125 if (reg.address_mode == BRW_ADDRESS_DIRECT) {
126 if (insn->header.access_mode == BRW_ALIGN_1) {
127 insn->bits2.da1.src0_subreg_nr = reg.subnr;
128 insn->bits2.da1.src0_reg_nr = reg.nr;
129 }
130 else {
131 insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
132 insn->bits2.da16.src0_reg_nr = reg.nr;
133 }
134 }
135 else {
136 insn->bits2.ia1.src0_subreg_nr = reg.subnr;
137
138 if (insn->header.access_mode == BRW_ALIGN_1) {
139 insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
140 }
141 else {
142 insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
143 }
144 }
145
146 if (insn->header.access_mode == BRW_ALIGN_1) {
147 if (reg.width == BRW_WIDTH_1 &&
148 insn->header.execution_size == BRW_EXECUTE_1) {
149 insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
150 insn->bits2.da1.src0_width = BRW_WIDTH_1;
151 insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
152 }
153 else {
154 insn->bits2.da1.src0_horiz_stride = reg.hstride;
155 insn->bits2.da1.src0_width = reg.width;
156 insn->bits2.da1.src0_vert_stride = reg.vstride;
157 }
158 }
159 else {
160 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
161 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
162 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
163 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
164
165 /* This is an oddity of the fact we're using the same
166 * descriptions for registers in align_16 as align_1:
167 */
168 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
169 insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
170 else
171 insn->bits2.da16.src0_vert_stride = reg.vstride;
172 }
173 }
174 }
175
176
177 void brw_set_src1( struct brw_instruction *insn,
178 struct brw_reg reg )
179 {
180 assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
181
182 assert(reg.nr < 128);
183
184 insn->bits1.da1.src1_reg_file = reg.file;
185 insn->bits1.da1.src1_reg_type = reg.type;
186 insn->bits3.da1.src1_abs = reg.abs;
187 insn->bits3.da1.src1_negate = reg.negate;
188
189 /* Only src1 can be immediate in two-argument instructions.
190 */
191 assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
192
193 if (reg.file == BRW_IMMEDIATE_VALUE) {
194 insn->bits3.ud = reg.dw1.ud;
195 }
196 else {
197 /* This is a hardware restriction, which may or may not be lifted
198 * in the future:
199 */
200 assert (reg.address_mode == BRW_ADDRESS_DIRECT);
201 //assert (reg.file == BRW_GENERAL_REGISTER_FILE);
202
203 if (insn->header.access_mode == BRW_ALIGN_1) {
204 insn->bits3.da1.src1_subreg_nr = reg.subnr;
205 insn->bits3.da1.src1_reg_nr = reg.nr;
206 }
207 else {
208 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
209 insn->bits3.da16.src1_reg_nr = reg.nr;
210 }
211
212 if (insn->header.access_mode == BRW_ALIGN_1) {
213 if (reg.width == BRW_WIDTH_1 &&
214 insn->header.execution_size == BRW_EXECUTE_1) {
215 insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
216 insn->bits3.da1.src1_width = BRW_WIDTH_1;
217 insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
218 }
219 else {
220 insn->bits3.da1.src1_horiz_stride = reg.hstride;
221 insn->bits3.da1.src1_width = reg.width;
222 insn->bits3.da1.src1_vert_stride = reg.vstride;
223 }
224 }
225 else {
226 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
227 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
228 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
229 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
230
231 /* This is an oddity of the fact we're using the same
232 * descriptions for registers in align_16 as align_1:
233 */
234 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
235 insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
236 else
237 insn->bits3.da16.src1_vert_stride = reg.vstride;
238 }
239 }
240 }
241
242
243
244 static void brw_set_math_message( struct brw_instruction *insn,
245 GLuint msg_length,
246 GLuint response_length,
247 GLuint function,
248 GLuint integer_type,
249 GLboolean low_precision,
250 GLboolean saturate,
251 GLuint dataType )
252 {
253 brw_set_src1(insn, brw_imm_d(0));
254
255 insn->bits3.math.function = function;
256 insn->bits3.math.int_type = integer_type;
257 insn->bits3.math.precision = low_precision;
258 insn->bits3.math.saturate = saturate;
259 insn->bits3.math.data_type = dataType;
260 insn->bits3.math.response_length = response_length;
261 insn->bits3.math.msg_length = msg_length;
262 insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH;
263 insn->bits3.math.end_of_thread = 0;
264 }
265
266 static void brw_set_urb_message( struct brw_instruction *insn,
267 GLboolean allocate,
268 GLboolean used,
269 GLuint msg_length,
270 GLuint response_length,
271 GLboolean end_of_thread,
272 GLboolean complete,
273 GLuint offset,
274 GLuint swizzle_control )
275 {
276 brw_set_src1(insn, brw_imm_d(0));
277
278 insn->bits3.urb.opcode = 0; /* ? */
279 insn->bits3.urb.offset = offset;
280 insn->bits3.urb.swizzle_control = swizzle_control;
281 insn->bits3.urb.allocate = allocate;
282 insn->bits3.urb.used = used; /* ? */
283 insn->bits3.urb.complete = complete;
284 insn->bits3.urb.response_length = response_length;
285 insn->bits3.urb.msg_length = msg_length;
286 insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB;
287 insn->bits3.urb.end_of_thread = end_of_thread;
288 }
289
290 static void brw_set_dp_write_message( struct brw_instruction *insn,
291 GLuint binding_table_index,
292 GLuint msg_control,
293 GLuint msg_type,
294 GLuint msg_length,
295 GLuint pixel_scoreboard_clear,
296 GLuint response_length,
297 GLuint end_of_thread )
298 {
299 brw_set_src1(insn, brw_imm_d(0));
300
301 insn->bits3.dp_write.binding_table_index = binding_table_index;
302 insn->bits3.dp_write.msg_control = msg_control;
303 insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear;
304 insn->bits3.dp_write.msg_type = msg_type;
305 insn->bits3.dp_write.send_commit_msg = 0;
306 insn->bits3.dp_write.response_length = response_length;
307 insn->bits3.dp_write.msg_length = msg_length;
308 insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
309 insn->bits3.urb.end_of_thread = end_of_thread;
310 }
311
312 static void brw_set_dp_read_message( struct brw_instruction *insn,
313 GLuint binding_table_index,
314 GLuint msg_control,
315 GLuint msg_type,
316 GLuint target_cache,
317 GLuint msg_length,
318 GLuint response_length,
319 GLuint end_of_thread )
320 {
321 brw_set_src1(insn, brw_imm_d(0));
322
323 insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
324 insn->bits3.dp_read.msg_control = msg_control; /*8:11*/
325 insn->bits3.dp_read.msg_type = msg_type; /*12:13*/
326 insn->bits3.dp_read.target_cache = target_cache; /*14:15*/
327 insn->bits3.dp_read.response_length = response_length; /*16:19*/
328 insn->bits3.dp_read.msg_length = msg_length; /*20:23*/
329 insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/
330 insn->bits3.dp_read.pad1 = 0; /*28:30*/
331 insn->bits3.dp_read.end_of_thread = end_of_thread; /*31*/
332 }
333
334 static void brw_set_sampler_message(struct brw_context *brw,
335 struct brw_instruction *insn,
336 GLuint binding_table_index,
337 GLuint sampler,
338 GLuint msg_type,
339 GLuint response_length,
340 GLuint msg_length,
341 GLboolean eot)
342 {
343 brw_set_src1(insn, brw_imm_d(0));
344
345 if (BRW_IS_G4X(brw)) {
346 insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
347 insn->bits3.sampler_g4x.sampler = sampler;
348 insn->bits3.sampler_g4x.msg_type = msg_type;
349 insn->bits3.sampler_g4x.response_length = response_length;
350 insn->bits3.sampler_g4x.msg_length = msg_length;
351 insn->bits3.sampler_g4x.end_of_thread = eot;
352 insn->bits3.sampler_g4x.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
353 } else {
354 insn->bits3.sampler.binding_table_index = binding_table_index;
355 insn->bits3.sampler.sampler = sampler;
356 insn->bits3.sampler.msg_type = msg_type;
357 insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
358 insn->bits3.sampler.response_length = response_length;
359 insn->bits3.sampler.msg_length = msg_length;
360 insn->bits3.sampler.end_of_thread = eot;
361 insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
362 }
363 }
364
365
366
367 static struct brw_instruction *next_insn( struct brw_compile *p,
368 GLuint opcode )
369 {
370 struct brw_instruction *insn;
371
372 assert(p->nr_insn + 1 < BRW_EU_MAX_INSN);
373
374 insn = &p->store[p->nr_insn++];
375 memcpy(insn, p->current, sizeof(*insn));
376
377 /* Reset this one-shot flag:
378 */
379
380 if (p->current->header.destreg__conditonalmod) {
381 p->current->header.destreg__conditonalmod = 0;
382 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
383 }
384
385 insn->header.opcode = opcode;
386 return insn;
387 }
388
389
390 static struct brw_instruction *brw_alu1( struct brw_compile *p,
391 GLuint opcode,
392 struct brw_reg dest,
393 struct brw_reg src )
394 {
395 struct brw_instruction *insn = next_insn(p, opcode);
396 brw_set_dest(insn, dest);
397 brw_set_src0(insn, src);
398 return insn;
399 }
400
401 static struct brw_instruction *brw_alu2(struct brw_compile *p,
402 GLuint opcode,
403 struct brw_reg dest,
404 struct brw_reg src0,
405 struct brw_reg src1 )
406 {
407 struct brw_instruction *insn = next_insn(p, opcode);
408 brw_set_dest(insn, dest);
409 brw_set_src0(insn, src0);
410 brw_set_src1(insn, src1);
411 return insn;
412 }
413
414
415 /***********************************************************************
416 * Convenience routines.
417 */
418 #define ALU1(OP) \
419 struct brw_instruction *brw_##OP(struct brw_compile *p, \
420 struct brw_reg dest, \
421 struct brw_reg src0) \
422 { \
423 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
424 }
425
426 #define ALU2(OP) \
427 struct brw_instruction *brw_##OP(struct brw_compile *p, \
428 struct brw_reg dest, \
429 struct brw_reg src0, \
430 struct brw_reg src1) \
431 { \
432 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
433 }
434
435
436 ALU1(MOV)
437 ALU2(SEL)
438 ALU1(NOT)
439 ALU2(AND)
440 ALU2(OR)
441 ALU2(XOR)
442 ALU2(SHR)
443 ALU2(SHL)
444 ALU2(RSR)
445 ALU2(RSL)
446 ALU2(ASR)
447 ALU2(ADD)
448 ALU2(MUL)
449 ALU1(FRC)
450 ALU1(RNDD)
451 ALU1(RNDZ)
452 ALU2(MAC)
453 ALU2(MACH)
454 ALU1(LZD)
455 ALU2(DP4)
456 ALU2(DPH)
457 ALU2(DP3)
458 ALU2(DP2)
459 ALU2(LINE)
460
461
462
463
464 void brw_NOP(struct brw_compile *p)
465 {
466 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);
467 brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
468 brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
469 brw_set_src1(insn, brw_imm_ud(0x0));
470 }
471
472
473
474
475
476 /***********************************************************************
477 * Comparisons, if/else/endif
478 */
479
480 struct brw_instruction *brw_JMPI(struct brw_compile *p,
481 struct brw_reg dest,
482 struct brw_reg src0,
483 struct brw_reg src1)
484 {
485 struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
486
487 p->current->header.predicate_control = BRW_PREDICATE_NONE;
488
489 return insn;
490 }
491
492 /* EU takes the value from the flag register and pushes it onto some
493 * sort of a stack (presumably merging with any flag value already on
494 * the stack). Within an if block, the flags at the top of the stack
495 * control execution on each channel of the unit, eg. on each of the
496 * 16 pixel values in our wm programs.
497 *
498 * When the matching 'else' instruction is reached (presumably by
499 * countdown of the instruction count patched in by our ELSE/ENDIF
500 * functions), the relevent flags are inverted.
501 *
502 * When the matching 'endif' instruction is reached, the flags are
503 * popped off. If the stack is now empty, normal execution resumes.
504 *
505 * No attempt is made to deal with stack overflow (14 elements?).
506 */
507 struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size)
508 {
509 struct brw_instruction *insn;
510
511 if (p->single_program_flow) {
512 assert(execute_size == BRW_EXECUTE_1);
513
514 insn = next_insn(p, BRW_OPCODE_ADD);
515 insn->header.predicate_inverse = 1;
516 } else {
517 insn = next_insn(p, BRW_OPCODE_IF);
518 }
519
520 /* Override the defaults for this instruction:
521 */
522 brw_set_dest(insn, brw_ip_reg());
523 brw_set_src0(insn, brw_ip_reg());
524 brw_set_src1(insn, brw_imm_d(0x0));
525
526 insn->header.execution_size = execute_size;
527 insn->header.compression_control = BRW_COMPRESSION_NONE;
528 insn->header.predicate_control = BRW_PREDICATE_NORMAL;
529 insn->header.mask_control = BRW_MASK_ENABLE;
530 if (!p->single_program_flow)
531 insn->header.thread_control = BRW_THREAD_SWITCH;
532
533 p->current->header.predicate_control = BRW_PREDICATE_NONE;
534
535 return insn;
536 }
537
538
539 struct brw_instruction *brw_ELSE(struct brw_compile *p,
540 struct brw_instruction *if_insn)
541 {
542 struct brw_instruction *insn;
543
544 if (p->single_program_flow) {
545 insn = next_insn(p, BRW_OPCODE_ADD);
546 } else {
547 insn = next_insn(p, BRW_OPCODE_ELSE);
548 }
549
550 brw_set_dest(insn, brw_ip_reg());
551 brw_set_src0(insn, brw_ip_reg());
552 brw_set_src1(insn, brw_imm_d(0x0));
553
554 insn->header.compression_control = BRW_COMPRESSION_NONE;
555 insn->header.execution_size = if_insn->header.execution_size;
556 insn->header.mask_control = BRW_MASK_ENABLE;
557 if (!p->single_program_flow)
558 insn->header.thread_control = BRW_THREAD_SWITCH;
559
560 /* Patch the if instruction to point at this instruction.
561 */
562 if (p->single_program_flow) {
563 assert(if_insn->header.opcode == BRW_OPCODE_ADD);
564
565 if_insn->bits3.ud = (insn - if_insn + 1) * 16;
566 } else {
567 assert(if_insn->header.opcode == BRW_OPCODE_IF);
568
569 if_insn->bits3.if_else.jump_count = insn - if_insn;
570 if_insn->bits3.if_else.pop_count = 1;
571 if_insn->bits3.if_else.pad0 = 0;
572 }
573
574 return insn;
575 }
576
577 void brw_ENDIF(struct brw_compile *p,
578 struct brw_instruction *patch_insn)
579 {
580 if (p->single_program_flow) {
581 /* In single program flow mode, there's no need to execute an ENDIF,
582 * since we don't need to do any stack operations, and if we're executing
583 * currently, we want to just continue executing.
584 */
585 struct brw_instruction *next = &p->store[p->nr_insn];
586
587 assert(patch_insn->header.opcode == BRW_OPCODE_ADD);
588
589 patch_insn->bits3.ud = (next - patch_insn) * 16;
590 } else {
591 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF);
592
593 brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
594 brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
595 brw_set_src1(insn, brw_imm_d(0x0));
596
597 insn->header.compression_control = BRW_COMPRESSION_NONE;
598 insn->header.execution_size = patch_insn->header.execution_size;
599 insn->header.mask_control = BRW_MASK_ENABLE;
600 insn->header.thread_control = BRW_THREAD_SWITCH;
601
602 assert(patch_insn->bits3.if_else.jump_count == 0);
603
604 /* Patch the if or else instructions to point at this or the next
605 * instruction respectively.
606 */
607 if (patch_insn->header.opcode == BRW_OPCODE_IF) {
608 /* Automagically turn it into an IFF:
609 */
610 patch_insn->header.opcode = BRW_OPCODE_IFF;
611 patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1;
612 patch_insn->bits3.if_else.pop_count = 0;
613 patch_insn->bits3.if_else.pad0 = 0;
614 } else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) {
615 patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1;
616 patch_insn->bits3.if_else.pop_count = 1;
617 patch_insn->bits3.if_else.pad0 = 0;
618 } else {
619 assert(0);
620 }
621
622 /* Also pop item off the stack in the endif instruction:
623 */
624 insn->bits3.if_else.jump_count = 0;
625 insn->bits3.if_else.pop_count = 1;
626 insn->bits3.if_else.pad0 = 0;
627 }
628 }
629
630 struct brw_instruction *brw_BREAK(struct brw_compile *p)
631 {
632 struct brw_instruction *insn;
633 insn = next_insn(p, BRW_OPCODE_BREAK);
634 brw_set_dest(insn, brw_ip_reg());
635 brw_set_src0(insn, brw_ip_reg());
636 brw_set_src1(insn, brw_imm_d(0x0));
637 insn->header.compression_control = BRW_COMPRESSION_NONE;
638 insn->header.execution_size = BRW_EXECUTE_8;
639 /* insn->header.mask_control = BRW_MASK_DISABLE; */
640 insn->bits3.if_else.pad0 = 0;
641 return insn;
642 }
643
644 struct brw_instruction *brw_CONT(struct brw_compile *p)
645 {
646 struct brw_instruction *insn;
647 insn = next_insn(p, BRW_OPCODE_CONTINUE);
648 brw_set_dest(insn, brw_ip_reg());
649 brw_set_src0(insn, brw_ip_reg());
650 brw_set_src1(insn, brw_imm_d(0x0));
651 insn->header.compression_control = BRW_COMPRESSION_NONE;
652 insn->header.execution_size = BRW_EXECUTE_8;
653 /* insn->header.mask_control = BRW_MASK_DISABLE; */
654 insn->bits3.if_else.pad0 = 0;
655 return insn;
656 }
657
658 /* DO/WHILE loop:
659 */
660 struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
661 {
662 if (p->single_program_flow) {
663 return &p->store[p->nr_insn];
664 } else {
665 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
666
667 /* Override the defaults for this instruction:
668 */
669 brw_set_dest(insn, brw_null_reg());
670 brw_set_src0(insn, brw_null_reg());
671 brw_set_src1(insn, brw_null_reg());
672
673 insn->header.compression_control = BRW_COMPRESSION_NONE;
674 insn->header.execution_size = execute_size;
675 insn->header.predicate_control = BRW_PREDICATE_NONE;
676 /* insn->header.mask_control = BRW_MASK_ENABLE; */
677 /* insn->header.mask_control = BRW_MASK_DISABLE; */
678
679 return insn;
680 }
681 }
682
683
684
685 struct brw_instruction *brw_WHILE(struct brw_compile *p,
686 struct brw_instruction *do_insn)
687 {
688 struct brw_instruction *insn;
689
690 if (p->single_program_flow)
691 insn = next_insn(p, BRW_OPCODE_ADD);
692 else
693 insn = next_insn(p, BRW_OPCODE_WHILE);
694
695 brw_set_dest(insn, brw_ip_reg());
696 brw_set_src0(insn, brw_ip_reg());
697 brw_set_src1(insn, brw_imm_d(0x0));
698
699 insn->header.compression_control = BRW_COMPRESSION_NONE;
700
701 if (p->single_program_flow) {
702 insn->header.execution_size = BRW_EXECUTE_1;
703
704 insn->bits3.d = (do_insn - insn) * 16;
705 } else {
706 insn->header.execution_size = do_insn->header.execution_size;
707
708 assert(do_insn->header.opcode == BRW_OPCODE_DO);
709 insn->bits3.if_else.jump_count = do_insn - insn + 1;
710 insn->bits3.if_else.pop_count = 0;
711 insn->bits3.if_else.pad0 = 0;
712 }
713
714 /* insn->header.mask_control = BRW_MASK_ENABLE; */
715
716 /* insn->header.mask_control = BRW_MASK_DISABLE; */
717 p->current->header.predicate_control = BRW_PREDICATE_NONE;
718 return insn;
719 }
720
721
722 /* FORWARD JUMPS:
723 */
724 void brw_land_fwd_jump(struct brw_compile *p,
725 struct brw_instruction *jmp_insn)
726 {
727 struct brw_instruction *landing = &p->store[p->nr_insn];
728
729 assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
730 assert(jmp_insn->bits1.da1.src1_reg_file = BRW_IMMEDIATE_VALUE);
731
732 jmp_insn->bits3.ud = (landing - jmp_insn) - 1;
733 }
734
735
736
737 /* To integrate with the above, it makes sense that the comparison
738 * instruction should populate the flag register. It might be simpler
739 * just to use the flag reg for most WM tasks?
740 */
741 void brw_CMP(struct brw_compile *p,
742 struct brw_reg dest,
743 GLuint conditional,
744 struct brw_reg src0,
745 struct brw_reg src1)
746 {
747 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
748
749 insn->header.destreg__conditonalmod = conditional;
750 brw_set_dest(insn, dest);
751 brw_set_src0(insn, src0);
752 brw_set_src1(insn, src1);
753
754 /* guess_execution_size(insn, src0); */
755
756
757 /* Make it so that future instructions will use the computed flag
758 * value until brw_set_predicate_control_flag_value() is called
759 * again.
760 */
761 if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
762 dest.nr == 0) {
763 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
764 p->flag_value = 0xff;
765 }
766 }
767
768
769
770 /***********************************************************************
771 * Helpers for the various SEND message types:
772 */
773
774 /** Extended math function, float[8].
775 */
776 void brw_math( struct brw_compile *p,
777 struct brw_reg dest,
778 GLuint function,
779 GLuint saturate,
780 GLuint msg_reg_nr,
781 struct brw_reg src,
782 GLuint data_type,
783 GLuint precision )
784 {
785 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
786 GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
787 GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
788
789 /* Example code doesn't set predicate_control for send
790 * instructions.
791 */
792 insn->header.predicate_control = 0;
793 insn->header.destreg__conditonalmod = msg_reg_nr;
794
795 brw_set_dest(insn, dest);
796 brw_set_src0(insn, src);
797 brw_set_math_message(insn,
798 msg_length, response_length,
799 function,
800 BRW_MATH_INTEGER_UNSIGNED,
801 precision,
802 saturate,
803 data_type);
804 }
805
806 /**
807 * Extended math function, float[16].
808 * Use 2 send instructions.
809 */
810 void brw_math_16( struct brw_compile *p,
811 struct brw_reg dest,
812 GLuint function,
813 GLuint saturate,
814 GLuint msg_reg_nr,
815 struct brw_reg src,
816 GLuint precision )
817 {
818 struct brw_instruction *insn;
819 GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
820 GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
821
822 /* First instruction:
823 */
824 brw_push_insn_state(p);
825 brw_set_predicate_control_flag_value(p, 0xff);
826 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
827
828 insn = next_insn(p, BRW_OPCODE_SEND);
829 insn->header.destreg__conditonalmod = msg_reg_nr;
830
831 brw_set_dest(insn, dest);
832 brw_set_src0(insn, src);
833 brw_set_math_message(insn,
834 msg_length, response_length,
835 function,
836 BRW_MATH_INTEGER_UNSIGNED,
837 precision,
838 saturate,
839 BRW_MATH_DATA_VECTOR);
840
841 /* Second instruction:
842 */
843 insn = next_insn(p, BRW_OPCODE_SEND);
844 insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
845 insn->header.destreg__conditonalmod = msg_reg_nr+1;
846
847 brw_set_dest(insn, offset(dest,1));
848 brw_set_src0(insn, src);
849 brw_set_math_message(insn,
850 msg_length, response_length,
851 function,
852 BRW_MATH_INTEGER_UNSIGNED,
853 precision,
854 saturate,
855 BRW_MATH_DATA_VECTOR);
856
857 brw_pop_insn_state(p);
858 }
859
860
861 /**
862 * Write block of 16 dwords/floats to the data port Render Cache scratch buffer.
863 * Scratch offset should be a multiple of 64.
864 * Used for register spilling.
865 */
866 void brw_dp_WRITE_16( struct brw_compile *p,
867 struct brw_reg src,
868 GLuint scratch_offset )
869 {
870 GLuint msg_reg_nr = 1;
871 {
872 brw_push_insn_state(p);
873 brw_set_mask_control(p, BRW_MASK_DISABLE);
874 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
875
876 /* set message header global offset field (reg 0, element 2) */
877 brw_MOV(p,
878 retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
879 brw_imm_d(scratch_offset));
880
881 brw_pop_insn_state(p);
882 }
883
884 {
885 GLuint msg_length = 3;
886 struct brw_reg dest = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
887 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
888
889 insn->header.predicate_control = 0; /* XXX */
890 insn->header.compression_control = BRW_COMPRESSION_NONE;
891 insn->header.destreg__conditonalmod = msg_reg_nr;
892
893 brw_set_dest(insn, dest);
894 brw_set_src0(insn, src);
895
896 brw_set_dp_write_message(insn,
897 255, /* binding table index (255=stateless) */
898 BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */
899 BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */
900 msg_length,
901 0, /* pixel scoreboard */
902 0, /* response_length */
903 0); /* eot */
904 }
905 }
906
907
908 /**
909 * Read block of 16 dwords/floats from the data port Render Cache scratch buffer.
910 * Scratch offset should be a multiple of 64.
911 * Used for register spilling.
912 */
913 void brw_dp_READ_16( struct brw_compile *p,
914 struct brw_reg dest,
915 GLuint scratch_offset )
916 {
917 GLuint msg_reg_nr = 1;
918 {
919 brw_push_insn_state(p);
920 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
921 brw_set_mask_control(p, BRW_MASK_DISABLE);
922
923 /* set message header global offset field (reg 0, element 2) */
924 brw_MOV(p,
925 retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
926 brw_imm_d(scratch_offset));
927
928 brw_pop_insn_state(p);
929 }
930
931 {
932 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
933
934 insn->header.predicate_control = 0; /* XXX */
935 insn->header.compression_control = BRW_COMPRESSION_NONE;
936 insn->header.destreg__conditonalmod = msg_reg_nr;
937
938 brw_set_dest(insn, dest); /* UW? */
939 brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW));
940
941 brw_set_dp_read_message(insn,
942 255, /* binding table index (255=stateless) */
943 3, /* msg_control (3 means 4 Owords) */
944 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
945 1, /* target cache (render/scratch) */
946 1, /* msg_length */
947 2, /* response_length */
948 0); /* eot */
949 }
950 }
951
952
953 /**
954 * Read a float[4] vector from the data port Data Cache (const buffer).
955 * Location (in buffer) should be a multiple of 16.
956 * Used for fetching shader constants.
957 * If relAddr is true, we'll do an indirect fetch using the address register.
958 */
959 void brw_dp_READ_4( struct brw_compile *p,
960 struct brw_reg dest,
961 GLboolean relAddr,
962 GLuint location,
963 GLuint bind_table_index )
964 {
965 /* XXX: relAddr not implemented */
966 GLuint msg_reg_nr = 1;
967 {
968 struct brw_reg b;
969 brw_push_insn_state(p);
970 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
971 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
972 brw_set_mask_control(p, BRW_MASK_DISABLE);
973
974 /* Setup MRF[1] with location/offset into const buffer */
975 b = brw_message_reg(msg_reg_nr);
976 b = retype(b, BRW_REGISTER_TYPE_UD);
977 /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
978 * when the docs say only dword[2] should be set. Hmmm. But it works.
979 */
980 brw_MOV(p, b, brw_imm_ud(location));
981 brw_pop_insn_state(p);
982 }
983
984 {
985 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
986
987 insn->header.predicate_control = BRW_PREDICATE_NONE;
988 insn->header.compression_control = BRW_COMPRESSION_NONE;
989 insn->header.destreg__conditonalmod = msg_reg_nr;
990 insn->header.mask_control = BRW_MASK_DISABLE;
991
992 /* cast dest to a uword[8] vector */
993 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
994
995 brw_set_dest(insn, dest);
996 brw_set_src0(insn, brw_null_reg());
997
998 brw_set_dp_read_message(insn,
999 bind_table_index,
1000 0, /* msg_control (0 means 1 Oword) */
1001 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1002 0, /* source cache = data cache */
1003 1, /* msg_length */
1004 1, /* response_length (1 Oword) */
1005 0); /* eot */
1006 }
1007 }
1008
1009
1010 /**
1011 * Read float[4] constant(s) from VS constant buffer.
1012 * For relative addressing, two float[4] constants will be read into 'dest'.
1013 * Otherwise, one float[4] constant will be read into the lower half of 'dest'.
1014 */
1015 void brw_dp_READ_4_vs(struct brw_compile *p,
1016 struct brw_reg dest,
1017 GLuint oword,
1018 GLboolean relAddr,
1019 struct brw_reg addrReg,
1020 GLuint location,
1021 GLuint bind_table_index)
1022 {
1023 GLuint msg_reg_nr = 1;
1024
1025 assert(oword < 2);
1026 /*
1027 printf("vs const read msg, location %u, msg_reg_nr %d\n",
1028 location, msg_reg_nr);
1029 */
1030
1031 /* Setup MRF[1] with location/offset into const buffer */
1032 {
1033 struct brw_reg b;
1034
1035 brw_push_insn_state(p);
1036 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1037 brw_set_mask_control(p, BRW_MASK_DISABLE);
1038 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1039 /*brw_set_access_mode(p, BRW_ALIGN_16);*/
1040
1041 /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
1042 * when the docs say only dword[2] should be set. Hmmm. But it works.
1043 */
1044 b = brw_message_reg(msg_reg_nr);
1045 b = retype(b, BRW_REGISTER_TYPE_UD);
1046 /*b = get_element_ud(b, 2);*/
1047 if (relAddr) {
1048 brw_ADD(p, b, addrReg, brw_imm_ud(location));
1049 }
1050 else {
1051 brw_MOV(p, b, brw_imm_ud(location));
1052 }
1053
1054 brw_pop_insn_state(p);
1055 }
1056
1057 {
1058 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1059
1060 insn->header.predicate_control = BRW_PREDICATE_NONE;
1061 insn->header.compression_control = BRW_COMPRESSION_NONE;
1062 insn->header.destreg__conditonalmod = msg_reg_nr;
1063 insn->header.mask_control = BRW_MASK_DISABLE;
1064 /*insn->header.access_mode = BRW_ALIGN_16;*/
1065
1066 brw_set_dest(insn, dest);
1067 brw_set_src0(insn, brw_null_reg());
1068
1069 brw_set_dp_read_message(insn,
1070 bind_table_index,
1071 oword, /* 0 = lower Oword, 1 = upper Oword */
1072 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1073 0, /* source cache = data cache */
1074 1, /* msg_length */
1075 1, /* response_length (1 Oword) */
1076 0); /* eot */
1077 }
1078 }
1079
1080
1081
1082 void brw_fb_WRITE(struct brw_compile *p,
1083 struct brw_reg dest,
1084 GLuint msg_reg_nr,
1085 struct brw_reg src0,
1086 GLuint binding_table_index,
1087 GLuint msg_length,
1088 GLuint response_length,
1089 GLboolean eot)
1090 {
1091 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1092
1093 insn->header.predicate_control = 0; /* XXX */
1094 insn->header.compression_control = BRW_COMPRESSION_NONE;
1095 insn->header.destreg__conditonalmod = msg_reg_nr;
1096
1097 brw_set_dest(insn, dest);
1098 brw_set_src0(insn, src0);
1099 brw_set_dp_write_message(insn,
1100 binding_table_index,
1101 BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE, /* msg_control */
1102 BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE, /* msg_type */
1103 msg_length,
1104 1, /* pixel scoreboard */
1105 response_length,
1106 eot);
1107 }
1108
1109
1110 /**
1111 * Texture sample instruction.
1112 * Note: the msg_type plus msg_length values determine exactly what kind
1113 * of sampling operation is performed. See volume 4, page 161 of docs.
1114 */
1115 void brw_SAMPLE(struct brw_compile *p,
1116 struct brw_reg dest,
1117 GLuint msg_reg_nr,
1118 struct brw_reg src0,
1119 GLuint binding_table_index,
1120 GLuint sampler,
1121 GLuint writemask,
1122 GLuint msg_type,
1123 GLuint response_length,
1124 GLuint msg_length,
1125 GLboolean eot)
1126 {
1127 GLboolean need_stall = 0;
1128
1129 if (writemask == 0) {
1130 /*_mesa_printf("%s: zero writemask??\n", __FUNCTION__); */
1131 return;
1132 }
1133
1134 /* Hardware doesn't do destination dependency checking on send
1135 * instructions properly. Add a workaround which generates the
1136 * dependency by other means. In practice it seems like this bug
1137 * only crops up for texture samples, and only where registers are
1138 * written by the send and then written again later without being
1139 * read in between. Luckily for us, we already track that
1140 * information and use it to modify the writemask for the
1141 * instruction, so that is a guide for whether a workaround is
1142 * needed.
1143 */
1144 if (writemask != WRITEMASK_XYZW) {
1145 GLuint dst_offset = 0;
1146 GLuint i, newmask = 0, len = 0;
1147
1148 for (i = 0; i < 4; i++) {
1149 if (writemask & (1<<i))
1150 break;
1151 dst_offset += 2;
1152 }
1153 for (; i < 4; i++) {
1154 if (!(writemask & (1<<i)))
1155 break;
1156 newmask |= 1<<i;
1157 len++;
1158 }
1159
1160 if (newmask != writemask) {
1161 need_stall = 1;
1162 /* _mesa_printf("need stall %x %x\n", newmask , writemask); */
1163 }
1164 else {
1165 struct brw_reg m1 = brw_message_reg(msg_reg_nr);
1166
1167 newmask = ~newmask & WRITEMASK_XYZW;
1168
1169 brw_push_insn_state(p);
1170
1171 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1172 brw_set_mask_control(p, BRW_MASK_DISABLE);
1173
1174 brw_MOV(p, m1, brw_vec8_grf(0,0));
1175 brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12));
1176
1177 brw_pop_insn_state(p);
1178
1179 src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
1180 dest = offset(dest, dst_offset);
1181 response_length = len * 2;
1182 }
1183 }
1184
1185 {
1186 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1187
1188 insn->header.predicate_control = 0; /* XXX */
1189 insn->header.compression_control = BRW_COMPRESSION_NONE;
1190 insn->header.destreg__conditonalmod = msg_reg_nr;
1191
1192 brw_set_dest(insn, dest);
1193 brw_set_src0(insn, src0);
1194 brw_set_sampler_message(p->brw, insn,
1195 binding_table_index,
1196 sampler,
1197 msg_type,
1198 response_length,
1199 msg_length,
1200 eot);
1201 }
1202
1203 if (need_stall) {
1204 struct brw_reg reg = vec8(offset(dest, response_length-1));
1205
1206 /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 }
1207 */
1208 brw_push_insn_state(p);
1209 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1210 brw_MOV(p, reg, reg);
1211 brw_pop_insn_state(p);
1212 }
1213
1214 }
1215
1216 /* All these variables are pretty confusing - we might be better off
1217 * using bitmasks and macros for this, in the old style. Or perhaps
1218 * just having the caller instantiate the fields in dword3 itself.
1219 */
1220 void brw_urb_WRITE(struct brw_compile *p,
1221 struct brw_reg dest,
1222 GLuint msg_reg_nr,
1223 struct brw_reg src0,
1224 GLboolean allocate,
1225 GLboolean used,
1226 GLuint msg_length,
1227 GLuint response_length,
1228 GLboolean eot,
1229 GLboolean writes_complete,
1230 GLuint offset,
1231 GLuint swizzle)
1232 {
1233 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1234
1235 assert(msg_length < BRW_MAX_MRF);
1236
1237 brw_set_dest(insn, dest);
1238 brw_set_src0(insn, src0);
1239 brw_set_src1(insn, brw_imm_d(0));
1240
1241 insn->header.destreg__conditonalmod = msg_reg_nr;
1242
1243 brw_set_urb_message(insn,
1244 allocate,
1245 used,
1246 msg_length,
1247 response_length,
1248 eot,
1249 writes_complete,
1250 offset,
1251 swizzle);
1252 }
1253