i965: Use Ivybridge's "Legacy Data Port" for reads/writes.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_eu_emit.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "brw_context.h"
34 #include "brw_defines.h"
35 #include "brw_eu.h"
36
37 #include "glsl/ralloc.h"
38
39 /***********************************************************************
40 * Internal helper for constructing instructions
41 */
42
43 static void guess_execution_size(struct brw_compile *p,
44 struct brw_instruction *insn,
45 struct brw_reg reg)
46 {
47 if (reg.width == BRW_WIDTH_8 && p->compressed)
48 insn->header.execution_size = BRW_EXECUTE_16;
49 else
50 insn->header.execution_size = reg.width; /* note - definitions are compatible */
51 }
52
53
54 /**
55 * Prior to Sandybridge, the SEND instruction accepted non-MRF source
56 * registers, implicitly moving the operand to a message register.
57 *
58 * On Sandybridge, this is no longer the case. This function performs the
59 * explicit move; it should be called before emitting a SEND instruction.
60 */
61 void
62 gen6_resolve_implied_move(struct brw_compile *p,
63 struct brw_reg *src,
64 GLuint msg_reg_nr)
65 {
66 struct intel_context *intel = &p->brw->intel;
67 if (intel->gen < 6)
68 return;
69
70 if (src->file != BRW_ARCHITECTURE_REGISTER_FILE || src->nr != BRW_ARF_NULL) {
71 brw_push_insn_state(p);
72 brw_set_mask_control(p, BRW_MASK_DISABLE);
73 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
74 brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD),
75 retype(*src, BRW_REGISTER_TYPE_UD));
76 brw_pop_insn_state(p);
77 }
78 *src = brw_message_reg(msg_reg_nr);
79 }
80
81 static void
82 gen7_convert_mrf_to_grf(struct brw_compile *p, struct brw_reg *reg)
83 {
84 struct intel_context *intel = &p->brw->intel;
85 if (intel->gen == 7 && reg->file == BRW_MESSAGE_REGISTER_FILE) {
86 reg->file = BRW_GENERAL_REGISTER_FILE;
87 reg->nr += 111;
88 }
89 }
90
91
92 void
93 brw_set_dest(struct brw_compile *p, struct brw_instruction *insn,
94 struct brw_reg dest)
95 {
96 if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
97 dest.file != BRW_MESSAGE_REGISTER_FILE)
98 assert(dest.nr < 128);
99
100 gen7_convert_mrf_to_grf(p, &dest);
101
102 insn->bits1.da1.dest_reg_file = dest.file;
103 insn->bits1.da1.dest_reg_type = dest.type;
104 insn->bits1.da1.dest_address_mode = dest.address_mode;
105
106 if (dest.address_mode == BRW_ADDRESS_DIRECT) {
107 insn->bits1.da1.dest_reg_nr = dest.nr;
108
109 if (insn->header.access_mode == BRW_ALIGN_1) {
110 insn->bits1.da1.dest_subreg_nr = dest.subnr;
111 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
112 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
113 insn->bits1.da1.dest_horiz_stride = dest.hstride;
114 }
115 else {
116 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
117 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
118 /* even ignored in da16, still need to set as '01' */
119 insn->bits1.da16.dest_horiz_stride = 1;
120 }
121 }
122 else {
123 insn->bits1.ia1.dest_subreg_nr = dest.subnr;
124
125 /* These are different sizes in align1 vs align16:
126 */
127 if (insn->header.access_mode == BRW_ALIGN_1) {
128 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
129 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
130 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
131 insn->bits1.ia1.dest_horiz_stride = dest.hstride;
132 }
133 else {
134 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
135 /* even ignored in da16, still need to set as '01' */
136 insn->bits1.ia16.dest_horiz_stride = 1;
137 }
138 }
139
140 /* NEW: Set the execution size based on dest.width and
141 * insn->compression_control:
142 */
143 guess_execution_size(p, insn, dest);
144 }
145
146 extern int reg_type_size[];
147
148 static void
149 validate_reg(struct brw_instruction *insn, struct brw_reg reg)
150 {
151 int hstride_for_reg[] = {0, 1, 2, 4};
152 int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
153 int width_for_reg[] = {1, 2, 4, 8, 16};
154 int execsize_for_reg[] = {1, 2, 4, 8, 16};
155 int width, hstride, vstride, execsize;
156
157 if (reg.file == BRW_IMMEDIATE_VALUE) {
158 /* 3.3.6: Region Parameters. Restriction: Immediate vectors
159 * mean the destination has to be 128-bit aligned and the
160 * destination horiz stride has to be a word.
161 */
162 if (reg.type == BRW_REGISTER_TYPE_V) {
163 assert(hstride_for_reg[insn->bits1.da1.dest_horiz_stride] *
164 reg_type_size[insn->bits1.da1.dest_reg_type] == 2);
165 }
166
167 return;
168 }
169
170 if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE &&
171 reg.file == BRW_ARF_NULL)
172 return;
173
174 assert(reg.hstride >= 0 && reg.hstride < Elements(hstride_for_reg));
175 hstride = hstride_for_reg[reg.hstride];
176
177 if (reg.vstride == 0xf) {
178 vstride = -1;
179 } else {
180 assert(reg.vstride >= 0 && reg.vstride < Elements(vstride_for_reg));
181 vstride = vstride_for_reg[reg.vstride];
182 }
183
184 assert(reg.width >= 0 && reg.width < Elements(width_for_reg));
185 width = width_for_reg[reg.width];
186
187 assert(insn->header.execution_size >= 0 &&
188 insn->header.execution_size < Elements(execsize_for_reg));
189 execsize = execsize_for_reg[insn->header.execution_size];
190
191 /* Restrictions from 3.3.10: Register Region Restrictions. */
192 /* 3. */
193 assert(execsize >= width);
194
195 /* 4. */
196 if (execsize == width && hstride != 0) {
197 assert(vstride == -1 || vstride == width * hstride);
198 }
199
200 /* 5. */
201 if (execsize == width && hstride == 0) {
202 /* no restriction on vstride. */
203 }
204
205 /* 6. */
206 if (width == 1) {
207 assert(hstride == 0);
208 }
209
210 /* 7. */
211 if (execsize == 1 && width == 1) {
212 assert(hstride == 0);
213 assert(vstride == 0);
214 }
215
216 /* 8. */
217 if (vstride == 0 && hstride == 0) {
218 assert(width == 1);
219 }
220
221 /* 10. Check destination issues. */
222 }
223
224 void
225 brw_set_src0(struct brw_compile *p, struct brw_instruction *insn,
226 struct brw_reg reg)
227 {
228 if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
229 assert(reg.nr < 128);
230
231 gen7_convert_mrf_to_grf(p, &reg);
232
233 validate_reg(insn, reg);
234
235 insn->bits1.da1.src0_reg_file = reg.file;
236 insn->bits1.da1.src0_reg_type = reg.type;
237 insn->bits2.da1.src0_abs = reg.abs;
238 insn->bits2.da1.src0_negate = reg.negate;
239 insn->bits2.da1.src0_address_mode = reg.address_mode;
240
241 if (reg.file == BRW_IMMEDIATE_VALUE) {
242 insn->bits3.ud = reg.dw1.ud;
243
244 /* Required to set some fields in src1 as well:
245 */
246 insn->bits1.da1.src1_reg_file = 0; /* arf */
247 insn->bits1.da1.src1_reg_type = reg.type;
248 }
249 else
250 {
251 if (reg.address_mode == BRW_ADDRESS_DIRECT) {
252 if (insn->header.access_mode == BRW_ALIGN_1) {
253 insn->bits2.da1.src0_subreg_nr = reg.subnr;
254 insn->bits2.da1.src0_reg_nr = reg.nr;
255 }
256 else {
257 insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
258 insn->bits2.da16.src0_reg_nr = reg.nr;
259 }
260 }
261 else {
262 insn->bits2.ia1.src0_subreg_nr = reg.subnr;
263
264 if (insn->header.access_mode == BRW_ALIGN_1) {
265 insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
266 }
267 else {
268 insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
269 }
270 }
271
272 if (insn->header.access_mode == BRW_ALIGN_1) {
273 if (reg.width == BRW_WIDTH_1 &&
274 insn->header.execution_size == BRW_EXECUTE_1) {
275 insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
276 insn->bits2.da1.src0_width = BRW_WIDTH_1;
277 insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
278 }
279 else {
280 insn->bits2.da1.src0_horiz_stride = reg.hstride;
281 insn->bits2.da1.src0_width = reg.width;
282 insn->bits2.da1.src0_vert_stride = reg.vstride;
283 }
284 }
285 else {
286 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
287 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
288 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
289 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
290
291 /* This is an oddity of the fact we're using the same
292 * descriptions for registers in align_16 as align_1:
293 */
294 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
295 insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
296 else
297 insn->bits2.da16.src0_vert_stride = reg.vstride;
298 }
299 }
300 }
301
302
303 void brw_set_src1(struct brw_compile *p,
304 struct brw_instruction *insn,
305 struct brw_reg reg)
306 {
307 assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
308
309 assert(reg.nr < 128);
310
311 gen7_convert_mrf_to_grf(p, &reg);
312
313 validate_reg(insn, reg);
314
315 insn->bits1.da1.src1_reg_file = reg.file;
316 insn->bits1.da1.src1_reg_type = reg.type;
317 insn->bits3.da1.src1_abs = reg.abs;
318 insn->bits3.da1.src1_negate = reg.negate;
319
320 /* Only src1 can be immediate in two-argument instructions.
321 */
322 assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
323
324 if (reg.file == BRW_IMMEDIATE_VALUE) {
325 insn->bits3.ud = reg.dw1.ud;
326 }
327 else {
328 /* This is a hardware restriction, which may or may not be lifted
329 * in the future:
330 */
331 assert (reg.address_mode == BRW_ADDRESS_DIRECT);
332 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
333
334 if (insn->header.access_mode == BRW_ALIGN_1) {
335 insn->bits3.da1.src1_subreg_nr = reg.subnr;
336 insn->bits3.da1.src1_reg_nr = reg.nr;
337 }
338 else {
339 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
340 insn->bits3.da16.src1_reg_nr = reg.nr;
341 }
342
343 if (insn->header.access_mode == BRW_ALIGN_1) {
344 if (reg.width == BRW_WIDTH_1 &&
345 insn->header.execution_size == BRW_EXECUTE_1) {
346 insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
347 insn->bits3.da1.src1_width = BRW_WIDTH_1;
348 insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
349 }
350 else {
351 insn->bits3.da1.src1_horiz_stride = reg.hstride;
352 insn->bits3.da1.src1_width = reg.width;
353 insn->bits3.da1.src1_vert_stride = reg.vstride;
354 }
355 }
356 else {
357 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
358 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
359 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
360 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
361
362 /* This is an oddity of the fact we're using the same
363 * descriptions for registers in align_16 as align_1:
364 */
365 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
366 insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
367 else
368 insn->bits3.da16.src1_vert_stride = reg.vstride;
369 }
370 }
371 }
372
373
374
375 static void brw_set_math_message( struct brw_compile *p,
376 struct brw_instruction *insn,
377 GLuint function,
378 GLuint integer_type,
379 bool low_precision,
380 bool saturate,
381 GLuint dataType )
382 {
383 struct brw_context *brw = p->brw;
384 struct intel_context *intel = &brw->intel;
385 unsigned msg_length;
386 unsigned response_length;
387
388 /* Infer message length from the function */
389 switch (function) {
390 case BRW_MATH_FUNCTION_POW:
391 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT:
392 case BRW_MATH_FUNCTION_INT_DIV_REMAINDER:
393 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
394 msg_length = 2;
395 break;
396 default:
397 msg_length = 1;
398 break;
399 }
400
401 /* Infer response length from the function */
402 switch (function) {
403 case BRW_MATH_FUNCTION_SINCOS:
404 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
405 response_length = 2;
406 break;
407 default:
408 response_length = 1;
409 break;
410 }
411
412 brw_set_src1(p, insn, brw_imm_d(0));
413 if (intel->gen == 5) {
414 insn->bits3.math_gen5.function = function;
415 insn->bits3.math_gen5.int_type = integer_type;
416 insn->bits3.math_gen5.precision = low_precision;
417 insn->bits3.math_gen5.saturate = saturate;
418 insn->bits3.math_gen5.data_type = dataType;
419 insn->bits3.math_gen5.snapshot = 0;
420 insn->bits3.math_gen5.header_present = 0;
421 insn->bits3.math_gen5.response_length = response_length;
422 insn->bits3.math_gen5.msg_length = msg_length;
423 insn->bits3.math_gen5.end_of_thread = 0;
424 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_MATH;
425 insn->bits2.send_gen5.end_of_thread = 0;
426 } else {
427 insn->bits3.math.function = function;
428 insn->bits3.math.int_type = integer_type;
429 insn->bits3.math.precision = low_precision;
430 insn->bits3.math.saturate = saturate;
431 insn->bits3.math.data_type = dataType;
432 insn->bits3.math.response_length = response_length;
433 insn->bits3.math.msg_length = msg_length;
434 insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH;
435 insn->bits3.math.end_of_thread = 0;
436 }
437 }
438
439
440 static void brw_set_ff_sync_message(struct brw_compile *p,
441 struct brw_instruction *insn,
442 bool allocate,
443 GLuint response_length,
444 bool end_of_thread)
445 {
446 struct brw_context *brw = p->brw;
447 struct intel_context *intel = &brw->intel;
448 brw_set_src1(p, insn, brw_imm_d(0));
449
450 insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */
451 insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */
452 insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */
453 insn->bits3.urb_gen5.allocate = allocate;
454 insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */
455 insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */
456 insn->bits3.urb_gen5.header_present = 1;
457 insn->bits3.urb_gen5.response_length = response_length; /* may be 1 or 0 */
458 insn->bits3.urb_gen5.msg_length = 1;
459 insn->bits3.urb_gen5.end_of_thread = end_of_thread;
460 if (intel->gen >= 6) {
461 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB;
462 } else {
463 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB;
464 insn->bits2.send_gen5.end_of_thread = end_of_thread;
465 }
466 }
467
468 static void brw_set_urb_message( struct brw_compile *p,
469 struct brw_instruction *insn,
470 bool allocate,
471 bool used,
472 GLuint msg_length,
473 GLuint response_length,
474 bool end_of_thread,
475 bool complete,
476 GLuint offset,
477 GLuint swizzle_control )
478 {
479 struct brw_context *brw = p->brw;
480 struct intel_context *intel = &brw->intel;
481 brw_set_src1(p, insn, brw_imm_d(0));
482
483 if (intel->gen == 7) {
484 insn->bits3.urb_gen7.opcode = 0; /* URB_WRITE_HWORD */
485 insn->bits3.urb_gen7.offset = offset;
486 assert(swizzle_control != BRW_URB_SWIZZLE_TRANSPOSE);
487 insn->bits3.urb_gen7.swizzle_control = swizzle_control;
488 /* per_slot_offset = 0 makes it ignore offsets in message header */
489 insn->bits3.urb_gen7.per_slot_offset = 0;
490 insn->bits3.urb_gen7.complete = complete;
491 insn->bits3.urb_gen7.header_present = 1;
492 insn->bits3.urb_gen7.response_length = response_length;
493 insn->bits3.urb_gen7.msg_length = msg_length;
494 insn->bits3.urb_gen7.end_of_thread = end_of_thread;
495 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB;
496 } else if (intel->gen >= 5) {
497 insn->bits3.urb_gen5.opcode = 0; /* URB_WRITE */
498 insn->bits3.urb_gen5.offset = offset;
499 insn->bits3.urb_gen5.swizzle_control = swizzle_control;
500 insn->bits3.urb_gen5.allocate = allocate;
501 insn->bits3.urb_gen5.used = used; /* ? */
502 insn->bits3.urb_gen5.complete = complete;
503 insn->bits3.urb_gen5.header_present = 1;
504 insn->bits3.urb_gen5.response_length = response_length;
505 insn->bits3.urb_gen5.msg_length = msg_length;
506 insn->bits3.urb_gen5.end_of_thread = end_of_thread;
507 if (intel->gen >= 6) {
508 /* For SNB, the SFID bits moved to the condmod bits, and
509 * EOT stayed in bits3 above. Does the EOT bit setting
510 * below on Ironlake even do anything?
511 */
512 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB;
513 } else {
514 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB;
515 insn->bits2.send_gen5.end_of_thread = end_of_thread;
516 }
517 } else {
518 insn->bits3.urb.opcode = 0; /* ? */
519 insn->bits3.urb.offset = offset;
520 insn->bits3.urb.swizzle_control = swizzle_control;
521 insn->bits3.urb.allocate = allocate;
522 insn->bits3.urb.used = used; /* ? */
523 insn->bits3.urb.complete = complete;
524 insn->bits3.urb.response_length = response_length;
525 insn->bits3.urb.msg_length = msg_length;
526 insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB;
527 insn->bits3.urb.end_of_thread = end_of_thread;
528 }
529 }
530
531 void
532 brw_set_dp_write_message(struct brw_compile *p,
533 struct brw_instruction *insn,
534 GLuint binding_table_index,
535 GLuint msg_control,
536 GLuint msg_type,
537 GLuint msg_length,
538 bool header_present,
539 GLuint pixel_scoreboard_clear,
540 GLuint response_length,
541 GLuint end_of_thread,
542 GLuint send_commit_msg)
543 {
544 struct brw_context *brw = p->brw;
545 struct intel_context *intel = &brw->intel;
546 brw_set_src1(p, insn, brw_imm_ud(0));
547
548 if (intel->gen >= 7) {
549 /* Use the Render Cache for RT writes; otherwise use the Data Cache */
550 unsigned sfid = GEN7_MESSAGE_TARGET_DP_DATA_CACHE;
551 if (msg_type == GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE)
552 sfid = GEN6_MESSAGE_TARGET_DP_RENDER_CACHE;
553
554 insn->header.destreg__conditionalmod = sfid;
555
556 insn->bits3.gen7_dp.binding_table_index = binding_table_index;
557 insn->bits3.gen7_dp.msg_control = msg_control;
558 insn->bits3.gen7_dp.pixel_scoreboard_clear = pixel_scoreboard_clear;
559 insn->bits3.gen7_dp.msg_type = msg_type;
560 insn->bits3.gen7_dp.header_present = header_present;
561 insn->bits3.gen7_dp.response_length = response_length;
562 insn->bits3.gen7_dp.msg_length = msg_length;
563 insn->bits3.gen7_dp.end_of_thread = end_of_thread;
564 } else if (intel->gen == 6) {
565 insn->bits3.gen6_dp.binding_table_index = binding_table_index;
566 insn->bits3.gen6_dp.msg_control = msg_control;
567 insn->bits3.gen6_dp.pixel_scoreboard_clear = pixel_scoreboard_clear;
568 insn->bits3.gen6_dp.msg_type = msg_type;
569 insn->bits3.gen6_dp.send_commit_msg = send_commit_msg;
570 insn->bits3.gen6_dp.header_present = header_present;
571 insn->bits3.gen6_dp.response_length = response_length;
572 insn->bits3.gen6_dp.msg_length = msg_length;
573 insn->bits3.gen6_dp.end_of_thread = end_of_thread;
574
575 /* We always use the render cache for write messages */
576 insn->header.destreg__conditionalmod = GEN6_MESSAGE_TARGET_DP_RENDER_CACHE;
577 } else if (intel->gen == 5) {
578 insn->bits3.dp_write_gen5.binding_table_index = binding_table_index;
579 insn->bits3.dp_write_gen5.msg_control = msg_control;
580 insn->bits3.dp_write_gen5.pixel_scoreboard_clear = pixel_scoreboard_clear;
581 insn->bits3.dp_write_gen5.msg_type = msg_type;
582 insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg;
583 insn->bits3.dp_write_gen5.header_present = header_present;
584 insn->bits3.dp_write_gen5.response_length = response_length;
585 insn->bits3.dp_write_gen5.msg_length = msg_length;
586 insn->bits3.dp_write_gen5.end_of_thread = end_of_thread;
587 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
588 insn->bits2.send_gen5.end_of_thread = end_of_thread;
589 } else {
590 insn->bits3.dp_write.binding_table_index = binding_table_index;
591 insn->bits3.dp_write.msg_control = msg_control;
592 insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear;
593 insn->bits3.dp_write.msg_type = msg_type;
594 insn->bits3.dp_write.send_commit_msg = send_commit_msg;
595 insn->bits3.dp_write.response_length = response_length;
596 insn->bits3.dp_write.msg_length = msg_length;
597 insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
598 insn->bits3.dp_write.end_of_thread = end_of_thread;
599 }
600 }
601
602 void
603 brw_set_dp_read_message(struct brw_compile *p,
604 struct brw_instruction *insn,
605 GLuint binding_table_index,
606 GLuint msg_control,
607 GLuint msg_type,
608 GLuint target_cache,
609 GLuint msg_length,
610 GLuint response_length)
611 {
612 struct brw_context *brw = p->brw;
613 struct intel_context *intel = &brw->intel;
614 brw_set_src1(p, insn, brw_imm_d(0));
615
616 if (intel->gen >= 7) {
617 insn->bits3.gen7_dp.binding_table_index = binding_table_index;
618 insn->bits3.gen7_dp.msg_control = msg_control;
619 insn->bits3.gen7_dp.pixel_scoreboard_clear = 0;
620 insn->bits3.gen7_dp.msg_type = msg_type;
621 insn->bits3.gen7_dp.header_present = 1;
622 insn->bits3.gen7_dp.response_length = response_length;
623 insn->bits3.gen7_dp.msg_length = msg_length;
624 insn->bits3.gen7_dp.end_of_thread = 0;
625 insn->header.destreg__conditionalmod = GEN7_MESSAGE_TARGET_DP_DATA_CACHE;
626 } else if (intel->gen == 6) {
627 uint32_t target_function;
628
629 if (target_cache == BRW_DATAPORT_READ_TARGET_DATA_CACHE)
630 target_function = GEN6_MESSAGE_TARGET_DP_SAMPLER_CACHE;
631 else
632 target_function = GEN6_MESSAGE_TARGET_DP_RENDER_CACHE;
633
634 insn->bits3.gen6_dp.binding_table_index = binding_table_index;
635 insn->bits3.gen6_dp.msg_control = msg_control;
636 insn->bits3.gen6_dp.pixel_scoreboard_clear = 0;
637 insn->bits3.gen6_dp.msg_type = msg_type;
638 insn->bits3.gen6_dp.send_commit_msg = 0;
639 insn->bits3.gen6_dp.header_present = 1;
640 insn->bits3.gen6_dp.response_length = response_length;
641 insn->bits3.gen6_dp.msg_length = msg_length;
642 insn->bits3.gen6_dp.end_of_thread = 0;
643 insn->header.destreg__conditionalmod = target_function;
644 } else if (intel->gen == 5) {
645 insn->bits3.dp_read_gen5.binding_table_index = binding_table_index;
646 insn->bits3.dp_read_gen5.msg_control = msg_control;
647 insn->bits3.dp_read_gen5.msg_type = msg_type;
648 insn->bits3.dp_read_gen5.target_cache = target_cache;
649 insn->bits3.dp_read_gen5.header_present = 1;
650 insn->bits3.dp_read_gen5.response_length = response_length;
651 insn->bits3.dp_read_gen5.msg_length = msg_length;
652 insn->bits3.dp_read_gen5.pad1 = 0;
653 insn->bits3.dp_read_gen5.end_of_thread = 0;
654 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ;
655 insn->bits2.send_gen5.end_of_thread = 0;
656 } else if (intel->is_g4x) {
657 insn->bits3.dp_read_g4x.binding_table_index = binding_table_index; /*0:7*/
658 insn->bits3.dp_read_g4x.msg_control = msg_control; /*8:10*/
659 insn->bits3.dp_read_g4x.msg_type = msg_type; /*11:13*/
660 insn->bits3.dp_read_g4x.target_cache = target_cache; /*14:15*/
661 insn->bits3.dp_read_g4x.response_length = response_length; /*16:19*/
662 insn->bits3.dp_read_g4x.msg_length = msg_length; /*20:23*/
663 insn->bits3.dp_read_g4x.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/
664 insn->bits3.dp_read_g4x.pad1 = 0;
665 insn->bits3.dp_read_g4x.end_of_thread = 0;
666 } else {
667 insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
668 insn->bits3.dp_read.msg_control = msg_control; /*8:11*/
669 insn->bits3.dp_read.msg_type = msg_type; /*12:13*/
670 insn->bits3.dp_read.target_cache = target_cache; /*14:15*/
671 insn->bits3.dp_read.response_length = response_length; /*16:19*/
672 insn->bits3.dp_read.msg_length = msg_length; /*20:23*/
673 insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/
674 insn->bits3.dp_read.pad1 = 0; /*28:30*/
675 insn->bits3.dp_read.end_of_thread = 0; /*31*/
676 }
677 }
678
679 static void brw_set_sampler_message(struct brw_compile *p,
680 struct brw_instruction *insn,
681 GLuint binding_table_index,
682 GLuint sampler,
683 GLuint msg_type,
684 GLuint response_length,
685 GLuint msg_length,
686 bool eot,
687 GLuint header_present,
688 GLuint simd_mode)
689 {
690 struct brw_context *brw = p->brw;
691 struct intel_context *intel = &brw->intel;
692 assert(eot == 0);
693 brw_set_src1(p, insn, brw_imm_d(0));
694
695 if (intel->gen >= 7) {
696 insn->bits3.sampler_gen7.binding_table_index = binding_table_index;
697 insn->bits3.sampler_gen7.sampler = sampler;
698 insn->bits3.sampler_gen7.msg_type = msg_type;
699 insn->bits3.sampler_gen7.simd_mode = simd_mode;
700 insn->bits3.sampler_gen7.header_present = header_present;
701 insn->bits3.sampler_gen7.response_length = response_length;
702 insn->bits3.sampler_gen7.msg_length = msg_length;
703 insn->bits3.sampler_gen7.end_of_thread = eot;
704 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_SAMPLER;
705 } else if (intel->gen >= 5) {
706 insn->bits3.sampler_gen5.binding_table_index = binding_table_index;
707 insn->bits3.sampler_gen5.sampler = sampler;
708 insn->bits3.sampler_gen5.msg_type = msg_type;
709 insn->bits3.sampler_gen5.simd_mode = simd_mode;
710 insn->bits3.sampler_gen5.header_present = header_present;
711 insn->bits3.sampler_gen5.response_length = response_length;
712 insn->bits3.sampler_gen5.msg_length = msg_length;
713 insn->bits3.sampler_gen5.end_of_thread = eot;
714 if (intel->gen >= 6)
715 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_SAMPLER;
716 else {
717 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_SAMPLER;
718 insn->bits2.send_gen5.end_of_thread = eot;
719 }
720 } else if (intel->is_g4x) {
721 insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
722 insn->bits3.sampler_g4x.sampler = sampler;
723 insn->bits3.sampler_g4x.msg_type = msg_type;
724 insn->bits3.sampler_g4x.response_length = response_length;
725 insn->bits3.sampler_g4x.msg_length = msg_length;
726 insn->bits3.sampler_g4x.end_of_thread = eot;
727 insn->bits3.sampler_g4x.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
728 } else {
729 insn->bits3.sampler.binding_table_index = binding_table_index;
730 insn->bits3.sampler.sampler = sampler;
731 insn->bits3.sampler.msg_type = msg_type;
732 insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
733 insn->bits3.sampler.response_length = response_length;
734 insn->bits3.sampler.msg_length = msg_length;
735 insn->bits3.sampler.end_of_thread = eot;
736 insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
737 }
738 }
739
740
741 #define next_insn brw_next_insn
742 struct brw_instruction *
743 brw_next_insn(struct brw_compile *p, GLuint opcode)
744 {
745 struct brw_instruction *insn;
746
747 assert(p->nr_insn + 1 < BRW_EU_MAX_INSN);
748
749 insn = &p->store[p->nr_insn++];
750 memcpy(insn, p->current, sizeof(*insn));
751
752 /* Reset this one-shot flag:
753 */
754
755 if (p->current->header.destreg__conditionalmod) {
756 p->current->header.destreg__conditionalmod = 0;
757 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
758 }
759
760 insn->header.opcode = opcode;
761 return insn;
762 }
763
764 static struct brw_instruction *brw_alu1( struct brw_compile *p,
765 GLuint opcode,
766 struct brw_reg dest,
767 struct brw_reg src )
768 {
769 struct brw_instruction *insn = next_insn(p, opcode);
770 brw_set_dest(p, insn, dest);
771 brw_set_src0(p, insn, src);
772 return insn;
773 }
774
775 static struct brw_instruction *brw_alu2(struct brw_compile *p,
776 GLuint opcode,
777 struct brw_reg dest,
778 struct brw_reg src0,
779 struct brw_reg src1 )
780 {
781 struct brw_instruction *insn = next_insn(p, opcode);
782 brw_set_dest(p, insn, dest);
783 brw_set_src0(p, insn, src0);
784 brw_set_src1(p, insn, src1);
785 return insn;
786 }
787
788
789 /***********************************************************************
790 * Convenience routines.
791 */
792 #define ALU1(OP) \
793 struct brw_instruction *brw_##OP(struct brw_compile *p, \
794 struct brw_reg dest, \
795 struct brw_reg src0) \
796 { \
797 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
798 }
799
800 #define ALU2(OP) \
801 struct brw_instruction *brw_##OP(struct brw_compile *p, \
802 struct brw_reg dest, \
803 struct brw_reg src0, \
804 struct brw_reg src1) \
805 { \
806 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
807 }
808
809 /* Rounding operations (other than RNDD) require two instructions - the first
810 * stores a rounded value (possibly the wrong way) in the dest register, but
811 * also sets a per-channel "increment bit" in the flag register. A predicated
812 * add of 1.0 fixes dest to contain the desired result.
813 *
814 * Sandybridge and later appear to round correctly without an ADD.
815 */
816 #define ROUND(OP) \
817 void brw_##OP(struct brw_compile *p, \
818 struct brw_reg dest, \
819 struct brw_reg src) \
820 { \
821 struct brw_instruction *rnd, *add; \
822 rnd = next_insn(p, BRW_OPCODE_##OP); \
823 brw_set_dest(p, rnd, dest); \
824 brw_set_src0(p, rnd, src); \
825 \
826 if (p->brw->intel.gen < 6) { \
827 /* turn on round-increments */ \
828 rnd->header.destreg__conditionalmod = BRW_CONDITIONAL_R; \
829 add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \
830 add->header.predicate_control = BRW_PREDICATE_NORMAL; \
831 } \
832 }
833
834
835 ALU1(MOV)
836 ALU2(SEL)
837 ALU1(NOT)
838 ALU2(AND)
839 ALU2(OR)
840 ALU2(XOR)
841 ALU2(SHR)
842 ALU2(SHL)
843 ALU2(RSR)
844 ALU2(RSL)
845 ALU2(ASR)
846 ALU1(FRC)
847 ALU1(RNDD)
848 ALU2(MAC)
849 ALU2(MACH)
850 ALU1(LZD)
851 ALU2(DP4)
852 ALU2(DPH)
853 ALU2(DP3)
854 ALU2(DP2)
855 ALU2(LINE)
856 ALU2(PLN)
857
858
859 ROUND(RNDZ)
860 ROUND(RNDE)
861
862
863 struct brw_instruction *brw_ADD(struct brw_compile *p,
864 struct brw_reg dest,
865 struct brw_reg src0,
866 struct brw_reg src1)
867 {
868 /* 6.2.2: add */
869 if (src0.type == BRW_REGISTER_TYPE_F ||
870 (src0.file == BRW_IMMEDIATE_VALUE &&
871 src0.type == BRW_REGISTER_TYPE_VF)) {
872 assert(src1.type != BRW_REGISTER_TYPE_UD);
873 assert(src1.type != BRW_REGISTER_TYPE_D);
874 }
875
876 if (src1.type == BRW_REGISTER_TYPE_F ||
877 (src1.file == BRW_IMMEDIATE_VALUE &&
878 src1.type == BRW_REGISTER_TYPE_VF)) {
879 assert(src0.type != BRW_REGISTER_TYPE_UD);
880 assert(src0.type != BRW_REGISTER_TYPE_D);
881 }
882
883 return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1);
884 }
885
886 struct brw_instruction *brw_MUL(struct brw_compile *p,
887 struct brw_reg dest,
888 struct brw_reg src0,
889 struct brw_reg src1)
890 {
891 /* 6.32.38: mul */
892 if (src0.type == BRW_REGISTER_TYPE_D ||
893 src0.type == BRW_REGISTER_TYPE_UD ||
894 src1.type == BRW_REGISTER_TYPE_D ||
895 src1.type == BRW_REGISTER_TYPE_UD) {
896 assert(dest.type != BRW_REGISTER_TYPE_F);
897 }
898
899 if (src0.type == BRW_REGISTER_TYPE_F ||
900 (src0.file == BRW_IMMEDIATE_VALUE &&
901 src0.type == BRW_REGISTER_TYPE_VF)) {
902 assert(src1.type != BRW_REGISTER_TYPE_UD);
903 assert(src1.type != BRW_REGISTER_TYPE_D);
904 }
905
906 if (src1.type == BRW_REGISTER_TYPE_F ||
907 (src1.file == BRW_IMMEDIATE_VALUE &&
908 src1.type == BRW_REGISTER_TYPE_VF)) {
909 assert(src0.type != BRW_REGISTER_TYPE_UD);
910 assert(src0.type != BRW_REGISTER_TYPE_D);
911 }
912
913 assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE ||
914 src0.nr != BRW_ARF_ACCUMULATOR);
915 assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE ||
916 src1.nr != BRW_ARF_ACCUMULATOR);
917
918 return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1);
919 }
920
921
922 void brw_NOP(struct brw_compile *p)
923 {
924 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);
925 brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
926 brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
927 brw_set_src1(p, insn, brw_imm_ud(0x0));
928 }
929
930
931
932
933
934 /***********************************************************************
935 * Comparisons, if/else/endif
936 */
937
938 struct brw_instruction *brw_JMPI(struct brw_compile *p,
939 struct brw_reg dest,
940 struct brw_reg src0,
941 struct brw_reg src1)
942 {
943 struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
944
945 insn->header.execution_size = 1;
946 insn->header.compression_control = BRW_COMPRESSION_NONE;
947 insn->header.mask_control = BRW_MASK_DISABLE;
948
949 p->current->header.predicate_control = BRW_PREDICATE_NONE;
950
951 return insn;
952 }
953
954 static void
955 push_if_stack(struct brw_compile *p, struct brw_instruction *inst)
956 {
957 p->if_stack[p->if_stack_depth] = inst;
958
959 p->if_stack_depth++;
960 if (p->if_stack_array_size <= p->if_stack_depth) {
961 p->if_stack_array_size *= 2;
962 p->if_stack = reralloc(p->mem_ctx, p->if_stack, struct brw_instruction *,
963 p->if_stack_array_size);
964 }
965 }
966
967 /* EU takes the value from the flag register and pushes it onto some
968 * sort of a stack (presumably merging with any flag value already on
969 * the stack). Within an if block, the flags at the top of the stack
970 * control execution on each channel of the unit, eg. on each of the
971 * 16 pixel values in our wm programs.
972 *
973 * When the matching 'else' instruction is reached (presumably by
974 * countdown of the instruction count patched in by our ELSE/ENDIF
975 * functions), the relevent flags are inverted.
976 *
977 * When the matching 'endif' instruction is reached, the flags are
978 * popped off. If the stack is now empty, normal execution resumes.
979 */
980 struct brw_instruction *
981 brw_IF(struct brw_compile *p, GLuint execute_size)
982 {
983 struct intel_context *intel = &p->brw->intel;
984 struct brw_instruction *insn;
985
986 insn = next_insn(p, BRW_OPCODE_IF);
987
988 /* Override the defaults for this instruction:
989 */
990 if (intel->gen < 6) {
991 brw_set_dest(p, insn, brw_ip_reg());
992 brw_set_src0(p, insn, brw_ip_reg());
993 brw_set_src1(p, insn, brw_imm_d(0x0));
994 } else if (intel->gen == 6) {
995 brw_set_dest(p, insn, brw_imm_w(0));
996 insn->bits1.branch_gen6.jump_count = 0;
997 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
998 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
999 } else {
1000 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1001 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1002 brw_set_src1(p, insn, brw_imm_ud(0));
1003 insn->bits3.break_cont.jip = 0;
1004 insn->bits3.break_cont.uip = 0;
1005 }
1006
1007 insn->header.execution_size = execute_size;
1008 insn->header.compression_control = BRW_COMPRESSION_NONE;
1009 insn->header.predicate_control = BRW_PREDICATE_NORMAL;
1010 insn->header.mask_control = BRW_MASK_ENABLE;
1011 if (!p->single_program_flow)
1012 insn->header.thread_control = BRW_THREAD_SWITCH;
1013
1014 p->current->header.predicate_control = BRW_PREDICATE_NONE;
1015
1016 push_if_stack(p, insn);
1017 return insn;
1018 }
1019
1020 /* This function is only used for gen6-style IF instructions with an
1021 * embedded comparison (conditional modifier). It is not used on gen7.
1022 */
1023 struct brw_instruction *
1024 gen6_IF(struct brw_compile *p, uint32_t conditional,
1025 struct brw_reg src0, struct brw_reg src1)
1026 {
1027 struct brw_instruction *insn;
1028
1029 insn = next_insn(p, BRW_OPCODE_IF);
1030
1031 brw_set_dest(p, insn, brw_imm_w(0));
1032 if (p->compressed) {
1033 insn->header.execution_size = BRW_EXECUTE_16;
1034 } else {
1035 insn->header.execution_size = BRW_EXECUTE_8;
1036 }
1037 insn->bits1.branch_gen6.jump_count = 0;
1038 brw_set_src0(p, insn, src0);
1039 brw_set_src1(p, insn, src1);
1040
1041 assert(insn->header.compression_control == BRW_COMPRESSION_NONE);
1042 assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
1043 insn->header.destreg__conditionalmod = conditional;
1044
1045 if (!p->single_program_flow)
1046 insn->header.thread_control = BRW_THREAD_SWITCH;
1047
1048 push_if_stack(p, insn);
1049 return insn;
1050 }
1051
1052 /**
1053 * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs.
1054 */
1055 static void
1056 convert_IF_ELSE_to_ADD(struct brw_compile *p,
1057 struct brw_instruction *if_inst,
1058 struct brw_instruction *else_inst)
1059 {
1060 /* The next instruction (where the ENDIF would be, if it existed) */
1061 struct brw_instruction *next_inst = &p->store[p->nr_insn];
1062
1063 assert(p->single_program_flow);
1064 assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
1065 assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
1066 assert(if_inst->header.execution_size == BRW_EXECUTE_1);
1067
1068 /* Convert IF to an ADD instruction that moves the instruction pointer
1069 * to the first instruction of the ELSE block. If there is no ELSE
1070 * block, point to where ENDIF would be. Reverse the predicate.
1071 *
1072 * There's no need to execute an ENDIF since we don't need to do any
1073 * stack operations, and if we're currently executing, we just want to
1074 * continue normally.
1075 */
1076 if_inst->header.opcode = BRW_OPCODE_ADD;
1077 if_inst->header.predicate_inverse = 1;
1078
1079 if (else_inst != NULL) {
1080 /* Convert ELSE to an ADD instruction that points where the ENDIF
1081 * would be.
1082 */
1083 else_inst->header.opcode = BRW_OPCODE_ADD;
1084
1085 if_inst->bits3.ud = (else_inst - if_inst + 1) * 16;
1086 else_inst->bits3.ud = (next_inst - else_inst) * 16;
1087 } else {
1088 if_inst->bits3.ud = (next_inst - if_inst) * 16;
1089 }
1090 }
1091
1092 /**
1093 * Patch IF and ELSE instructions with appropriate jump targets.
1094 */
1095 static void
1096 patch_IF_ELSE(struct brw_compile *p,
1097 struct brw_instruction *if_inst,
1098 struct brw_instruction *else_inst,
1099 struct brw_instruction *endif_inst)
1100 {
1101 struct intel_context *intel = &p->brw->intel;
1102
1103 assert(!p->single_program_flow);
1104 assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
1105 assert(endif_inst != NULL);
1106 assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
1107
1108 unsigned br = 1;
1109 /* Jump count is for 64bit data chunk each, so one 128bit instruction
1110 * requires 2 chunks.
1111 */
1112 if (intel->gen >= 5)
1113 br = 2;
1114
1115 assert(endif_inst->header.opcode == BRW_OPCODE_ENDIF);
1116 endif_inst->header.execution_size = if_inst->header.execution_size;
1117
1118 if (else_inst == NULL) {
1119 /* Patch IF -> ENDIF */
1120 if (intel->gen < 6) {
1121 /* Turn it into an IFF, which means no mask stack operations for
1122 * all-false and jumping past the ENDIF.
1123 */
1124 if_inst->header.opcode = BRW_OPCODE_IFF;
1125 if_inst->bits3.if_else.jump_count = br * (endif_inst - if_inst + 1);
1126 if_inst->bits3.if_else.pop_count = 0;
1127 if_inst->bits3.if_else.pad0 = 0;
1128 } else if (intel->gen == 6) {
1129 /* As of gen6, there is no IFF and IF must point to the ENDIF. */
1130 if_inst->bits1.branch_gen6.jump_count = br * (endif_inst - if_inst);
1131 } else {
1132 if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
1133 if_inst->bits3.break_cont.jip = br * (endif_inst - if_inst);
1134 }
1135 } else {
1136 else_inst->header.execution_size = if_inst->header.execution_size;
1137
1138 /* Patch IF -> ELSE */
1139 if (intel->gen < 6) {
1140 if_inst->bits3.if_else.jump_count = br * (else_inst - if_inst);
1141 if_inst->bits3.if_else.pop_count = 0;
1142 if_inst->bits3.if_else.pad0 = 0;
1143 } else if (intel->gen == 6) {
1144 if_inst->bits1.branch_gen6.jump_count = br * (else_inst - if_inst + 1);
1145 }
1146
1147 /* Patch ELSE -> ENDIF */
1148 if (intel->gen < 6) {
1149 /* BRW_OPCODE_ELSE pre-gen6 should point just past the
1150 * matching ENDIF.
1151 */
1152 else_inst->bits3.if_else.jump_count = br*(endif_inst - else_inst + 1);
1153 else_inst->bits3.if_else.pop_count = 1;
1154 else_inst->bits3.if_else.pad0 = 0;
1155 } else if (intel->gen == 6) {
1156 /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
1157 else_inst->bits1.branch_gen6.jump_count = br*(endif_inst - else_inst);
1158 } else {
1159 /* The IF instruction's JIP should point just past the ELSE */
1160 if_inst->bits3.break_cont.jip = br * (else_inst - if_inst + 1);
1161 /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
1162 if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
1163 else_inst->bits3.break_cont.jip = br * (endif_inst - else_inst);
1164 }
1165 }
1166 }
1167
1168 void
1169 brw_ELSE(struct brw_compile *p)
1170 {
1171 struct intel_context *intel = &p->brw->intel;
1172 struct brw_instruction *insn;
1173
1174 insn = next_insn(p, BRW_OPCODE_ELSE);
1175
1176 if (intel->gen < 6) {
1177 brw_set_dest(p, insn, brw_ip_reg());
1178 brw_set_src0(p, insn, brw_ip_reg());
1179 brw_set_src1(p, insn, brw_imm_d(0x0));
1180 } else if (intel->gen == 6) {
1181 brw_set_dest(p, insn, brw_imm_w(0));
1182 insn->bits1.branch_gen6.jump_count = 0;
1183 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1184 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1185 } else {
1186 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1187 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1188 brw_set_src1(p, insn, brw_imm_ud(0));
1189 insn->bits3.break_cont.jip = 0;
1190 insn->bits3.break_cont.uip = 0;
1191 }
1192
1193 insn->header.compression_control = BRW_COMPRESSION_NONE;
1194 insn->header.mask_control = BRW_MASK_ENABLE;
1195 if (!p->single_program_flow)
1196 insn->header.thread_control = BRW_THREAD_SWITCH;
1197
1198 push_if_stack(p, insn);
1199 }
1200
1201 void
1202 brw_ENDIF(struct brw_compile *p)
1203 {
1204 struct intel_context *intel = &p->brw->intel;
1205 struct brw_instruction *insn;
1206 struct brw_instruction *else_inst = NULL;
1207 struct brw_instruction *if_inst = NULL;
1208
1209 /* Pop the IF and (optional) ELSE instructions from the stack */
1210 p->if_stack_depth--;
1211 if (p->if_stack[p->if_stack_depth]->header.opcode == BRW_OPCODE_ELSE) {
1212 else_inst = p->if_stack[p->if_stack_depth];
1213 p->if_stack_depth--;
1214 }
1215 if_inst = p->if_stack[p->if_stack_depth];
1216
1217 if (p->single_program_flow) {
1218 /* ENDIF is useless; don't bother emitting it. */
1219 convert_IF_ELSE_to_ADD(p, if_inst, else_inst);
1220 return;
1221 }
1222
1223 insn = next_insn(p, BRW_OPCODE_ENDIF);
1224
1225 if (intel->gen < 6) {
1226 brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
1227 brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
1228 brw_set_src1(p, insn, brw_imm_d(0x0));
1229 } else if (intel->gen == 6) {
1230 brw_set_dest(p, insn, brw_imm_w(0));
1231 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1232 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1233 } else {
1234 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1235 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1236 brw_set_src1(p, insn, brw_imm_ud(0));
1237 }
1238
1239 insn->header.compression_control = BRW_COMPRESSION_NONE;
1240 insn->header.mask_control = BRW_MASK_ENABLE;
1241 insn->header.thread_control = BRW_THREAD_SWITCH;
1242
1243 /* Also pop item off the stack in the endif instruction: */
1244 if (intel->gen < 6) {
1245 insn->bits3.if_else.jump_count = 0;
1246 insn->bits3.if_else.pop_count = 1;
1247 insn->bits3.if_else.pad0 = 0;
1248 } else if (intel->gen == 6) {
1249 insn->bits1.branch_gen6.jump_count = 2;
1250 } else {
1251 insn->bits3.break_cont.jip = 2;
1252 }
1253 patch_IF_ELSE(p, if_inst, else_inst, insn);
1254 }
1255
1256 struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count)
1257 {
1258 struct intel_context *intel = &p->brw->intel;
1259 struct brw_instruction *insn;
1260
1261 insn = next_insn(p, BRW_OPCODE_BREAK);
1262 if (intel->gen >= 6) {
1263 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1264 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1265 brw_set_src1(p, insn, brw_imm_d(0x0));
1266 } else {
1267 brw_set_dest(p, insn, brw_ip_reg());
1268 brw_set_src0(p, insn, brw_ip_reg());
1269 brw_set_src1(p, insn, brw_imm_d(0x0));
1270 insn->bits3.if_else.pad0 = 0;
1271 insn->bits3.if_else.pop_count = pop_count;
1272 }
1273 insn->header.compression_control = BRW_COMPRESSION_NONE;
1274 insn->header.execution_size = BRW_EXECUTE_8;
1275
1276 return insn;
1277 }
1278
1279 struct brw_instruction *gen6_CONT(struct brw_compile *p,
1280 struct brw_instruction *do_insn)
1281 {
1282 struct brw_instruction *insn;
1283
1284 insn = next_insn(p, BRW_OPCODE_CONTINUE);
1285 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1286 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1287 brw_set_dest(p, insn, brw_ip_reg());
1288 brw_set_src0(p, insn, brw_ip_reg());
1289 brw_set_src1(p, insn, brw_imm_d(0x0));
1290
1291 insn->header.compression_control = BRW_COMPRESSION_NONE;
1292 insn->header.execution_size = BRW_EXECUTE_8;
1293 return insn;
1294 }
1295
1296 struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count)
1297 {
1298 struct brw_instruction *insn;
1299 insn = next_insn(p, BRW_OPCODE_CONTINUE);
1300 brw_set_dest(p, insn, brw_ip_reg());
1301 brw_set_src0(p, insn, brw_ip_reg());
1302 brw_set_src1(p, insn, brw_imm_d(0x0));
1303 insn->header.compression_control = BRW_COMPRESSION_NONE;
1304 insn->header.execution_size = BRW_EXECUTE_8;
1305 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1306 insn->bits3.if_else.pad0 = 0;
1307 insn->bits3.if_else.pop_count = pop_count;
1308 return insn;
1309 }
1310
1311 /* DO/WHILE loop:
1312 *
1313 * The DO/WHILE is just an unterminated loop -- break or continue are
1314 * used for control within the loop. We have a few ways they can be
1315 * done.
1316 *
1317 * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
1318 * jip and no DO instruction.
1319 *
1320 * For non-uniform control flow pre-gen6, there's a DO instruction to
1321 * push the mask, and a WHILE to jump back, and BREAK to get out and
1322 * pop the mask.
1323 *
1324 * For gen6, there's no more mask stack, so no need for DO. WHILE
1325 * just points back to the first instruction of the loop.
1326 */
1327 struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
1328 {
1329 struct intel_context *intel = &p->brw->intel;
1330
1331 if (intel->gen >= 6 || p->single_program_flow) {
1332 return &p->store[p->nr_insn];
1333 } else {
1334 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
1335
1336 /* Override the defaults for this instruction:
1337 */
1338 brw_set_dest(p, insn, brw_null_reg());
1339 brw_set_src0(p, insn, brw_null_reg());
1340 brw_set_src1(p, insn, brw_null_reg());
1341
1342 insn->header.compression_control = BRW_COMPRESSION_NONE;
1343 insn->header.execution_size = execute_size;
1344 insn->header.predicate_control = BRW_PREDICATE_NONE;
1345 /* insn->header.mask_control = BRW_MASK_ENABLE; */
1346 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1347
1348 return insn;
1349 }
1350 }
1351
1352
1353
1354 struct brw_instruction *brw_WHILE(struct brw_compile *p,
1355 struct brw_instruction *do_insn)
1356 {
1357 struct intel_context *intel = &p->brw->intel;
1358 struct brw_instruction *insn;
1359 GLuint br = 1;
1360
1361 if (intel->gen >= 5)
1362 br = 2;
1363
1364 if (intel->gen >= 7) {
1365 insn = next_insn(p, BRW_OPCODE_WHILE);
1366
1367 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1368 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1369 brw_set_src1(p, insn, brw_imm_ud(0));
1370 insn->bits3.break_cont.jip = br * (do_insn - insn);
1371
1372 insn->header.execution_size = BRW_EXECUTE_8;
1373 } else if (intel->gen == 6) {
1374 insn = next_insn(p, BRW_OPCODE_WHILE);
1375
1376 brw_set_dest(p, insn, brw_imm_w(0));
1377 insn->bits1.branch_gen6.jump_count = br * (do_insn - insn);
1378 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1379 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1380
1381 insn->header.execution_size = BRW_EXECUTE_8;
1382 } else {
1383 if (p->single_program_flow) {
1384 insn = next_insn(p, BRW_OPCODE_ADD);
1385
1386 brw_set_dest(p, insn, brw_ip_reg());
1387 brw_set_src0(p, insn, brw_ip_reg());
1388 brw_set_src1(p, insn, brw_imm_d((do_insn - insn) * 16));
1389 insn->header.execution_size = BRW_EXECUTE_1;
1390 } else {
1391 insn = next_insn(p, BRW_OPCODE_WHILE);
1392
1393 assert(do_insn->header.opcode == BRW_OPCODE_DO);
1394
1395 brw_set_dest(p, insn, brw_ip_reg());
1396 brw_set_src0(p, insn, brw_ip_reg());
1397 brw_set_src1(p, insn, brw_imm_d(0));
1398
1399 insn->header.execution_size = do_insn->header.execution_size;
1400 insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
1401 insn->bits3.if_else.pop_count = 0;
1402 insn->bits3.if_else.pad0 = 0;
1403 }
1404 }
1405 insn->header.compression_control = BRW_COMPRESSION_NONE;
1406 p->current->header.predicate_control = BRW_PREDICATE_NONE;
1407
1408 return insn;
1409 }
1410
1411
1412 /* FORWARD JUMPS:
1413 */
1414 void brw_land_fwd_jump(struct brw_compile *p,
1415 struct brw_instruction *jmp_insn)
1416 {
1417 struct intel_context *intel = &p->brw->intel;
1418 struct brw_instruction *landing = &p->store[p->nr_insn];
1419 GLuint jmpi = 1;
1420
1421 if (intel->gen >= 5)
1422 jmpi = 2;
1423
1424 assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
1425 assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE);
1426
1427 jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1);
1428 }
1429
1430
1431
1432 /* To integrate with the above, it makes sense that the comparison
1433 * instruction should populate the flag register. It might be simpler
1434 * just to use the flag reg for most WM tasks?
1435 */
1436 void brw_CMP(struct brw_compile *p,
1437 struct brw_reg dest,
1438 GLuint conditional,
1439 struct brw_reg src0,
1440 struct brw_reg src1)
1441 {
1442 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
1443
1444 insn->header.destreg__conditionalmod = conditional;
1445 brw_set_dest(p, insn, dest);
1446 brw_set_src0(p, insn, src0);
1447 brw_set_src1(p, insn, src1);
1448
1449 /* guess_execution_size(insn, src0); */
1450
1451
1452 /* Make it so that future instructions will use the computed flag
1453 * value until brw_set_predicate_control_flag_value() is called
1454 * again.
1455 */
1456 if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
1457 dest.nr == 0) {
1458 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
1459 p->flag_value = 0xff;
1460 }
1461 }
1462
1463 /* Issue 'wait' instruction for n1, host could program MMIO
1464 to wake up thread. */
1465 void brw_WAIT (struct brw_compile *p)
1466 {
1467 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_WAIT);
1468 struct brw_reg src = brw_notification_1_reg();
1469
1470 brw_set_dest(p, insn, src);
1471 brw_set_src0(p, insn, src);
1472 brw_set_src1(p, insn, brw_null_reg());
1473 insn->header.execution_size = 0; /* must */
1474 insn->header.predicate_control = 0;
1475 insn->header.compression_control = 0;
1476 }
1477
1478
1479 /***********************************************************************
1480 * Helpers for the various SEND message types:
1481 */
1482
1483 /** Extended math function, float[8].
1484 */
1485 void brw_math( struct brw_compile *p,
1486 struct brw_reg dest,
1487 GLuint function,
1488 GLuint saturate,
1489 GLuint msg_reg_nr,
1490 struct brw_reg src,
1491 GLuint data_type,
1492 GLuint precision )
1493 {
1494 struct intel_context *intel = &p->brw->intel;
1495
1496 if (intel->gen >= 6) {
1497 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
1498
1499 assert(dest.file == BRW_GENERAL_REGISTER_FILE);
1500 assert(src.file == BRW_GENERAL_REGISTER_FILE);
1501
1502 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1503 assert(src.hstride == BRW_HORIZONTAL_STRIDE_1);
1504
1505 /* Source modifiers are ignored for extended math instructions. */
1506 assert(!src.negate);
1507 assert(!src.abs);
1508
1509 if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
1510 function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER ||
1511 function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1512 assert(src.type != BRW_REGISTER_TYPE_F);
1513 } else {
1514 assert(src.type == BRW_REGISTER_TYPE_F);
1515 }
1516
1517 /* Math is the same ISA format as other opcodes, except that CondModifier
1518 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1519 */
1520 insn->header.destreg__conditionalmod = function;
1521 insn->header.saturate = saturate;
1522
1523 brw_set_dest(p, insn, dest);
1524 brw_set_src0(p, insn, src);
1525 brw_set_src1(p, insn, brw_null_reg());
1526 } else {
1527 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1528
1529 /* Example code doesn't set predicate_control for send
1530 * instructions.
1531 */
1532 insn->header.predicate_control = 0;
1533 insn->header.destreg__conditionalmod = msg_reg_nr;
1534
1535 brw_set_dest(p, insn, dest);
1536 brw_set_src0(p, insn, src);
1537 brw_set_math_message(p,
1538 insn,
1539 function,
1540 src.type == BRW_REGISTER_TYPE_D,
1541 precision,
1542 saturate,
1543 data_type);
1544 }
1545 }
1546
1547 /** Extended math function, float[8].
1548 */
1549 void brw_math2(struct brw_compile *p,
1550 struct brw_reg dest,
1551 GLuint function,
1552 struct brw_reg src0,
1553 struct brw_reg src1)
1554 {
1555 struct intel_context *intel = &p->brw->intel;
1556 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
1557
1558 assert(intel->gen >= 6);
1559 (void) intel;
1560
1561
1562 assert(dest.file == BRW_GENERAL_REGISTER_FILE);
1563 assert(src0.file == BRW_GENERAL_REGISTER_FILE);
1564 assert(src1.file == BRW_GENERAL_REGISTER_FILE);
1565
1566 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1567 assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1);
1568 assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1);
1569
1570 if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
1571 function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER ||
1572 function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1573 assert(src0.type != BRW_REGISTER_TYPE_F);
1574 assert(src1.type != BRW_REGISTER_TYPE_F);
1575 } else {
1576 assert(src0.type == BRW_REGISTER_TYPE_F);
1577 assert(src1.type == BRW_REGISTER_TYPE_F);
1578 }
1579
1580 /* Source modifiers are ignored for extended math instructions. */
1581 assert(!src0.negate);
1582 assert(!src0.abs);
1583 assert(!src1.negate);
1584 assert(!src1.abs);
1585
1586 /* Math is the same ISA format as other opcodes, except that CondModifier
1587 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1588 */
1589 insn->header.destreg__conditionalmod = function;
1590
1591 brw_set_dest(p, insn, dest);
1592 brw_set_src0(p, insn, src0);
1593 brw_set_src1(p, insn, src1);
1594 }
1595
1596 /**
1597 * Extended math function, float[16].
1598 * Use 2 send instructions.
1599 */
1600 void brw_math_16( struct brw_compile *p,
1601 struct brw_reg dest,
1602 GLuint function,
1603 GLuint saturate,
1604 GLuint msg_reg_nr,
1605 struct brw_reg src,
1606 GLuint precision )
1607 {
1608 struct intel_context *intel = &p->brw->intel;
1609 struct brw_instruction *insn;
1610
1611 if (intel->gen >= 6) {
1612 insn = next_insn(p, BRW_OPCODE_MATH);
1613
1614 /* Math is the same ISA format as other opcodes, except that CondModifier
1615 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1616 */
1617 insn->header.destreg__conditionalmod = function;
1618 insn->header.saturate = saturate;
1619
1620 /* Source modifiers are ignored for extended math instructions. */
1621 assert(!src.negate);
1622 assert(!src.abs);
1623
1624 brw_set_dest(p, insn, dest);
1625 brw_set_src0(p, insn, src);
1626 brw_set_src1(p, insn, brw_null_reg());
1627 return;
1628 }
1629
1630 /* First instruction:
1631 */
1632 brw_push_insn_state(p);
1633 brw_set_predicate_control_flag_value(p, 0xff);
1634 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1635
1636 insn = next_insn(p, BRW_OPCODE_SEND);
1637 insn->header.destreg__conditionalmod = msg_reg_nr;
1638
1639 brw_set_dest(p, insn, dest);
1640 brw_set_src0(p, insn, src);
1641 brw_set_math_message(p,
1642 insn,
1643 function,
1644 BRW_MATH_INTEGER_UNSIGNED,
1645 precision,
1646 saturate,
1647 BRW_MATH_DATA_VECTOR);
1648
1649 /* Second instruction:
1650 */
1651 insn = next_insn(p, BRW_OPCODE_SEND);
1652 insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
1653 insn->header.destreg__conditionalmod = msg_reg_nr+1;
1654
1655 brw_set_dest(p, insn, offset(dest,1));
1656 brw_set_src0(p, insn, src);
1657 brw_set_math_message(p,
1658 insn,
1659 function,
1660 BRW_MATH_INTEGER_UNSIGNED,
1661 precision,
1662 saturate,
1663 BRW_MATH_DATA_VECTOR);
1664
1665 brw_pop_insn_state(p);
1666 }
1667
1668
1669 /**
1670 * Write a block of OWORDs (half a GRF each) from the scratch buffer,
1671 * using a constant offset per channel.
1672 *
1673 * The offset must be aligned to oword size (16 bytes). Used for
1674 * register spilling.
1675 */
1676 void brw_oword_block_write_scratch(struct brw_compile *p,
1677 struct brw_reg mrf,
1678 int num_regs,
1679 GLuint offset)
1680 {
1681 struct intel_context *intel = &p->brw->intel;
1682 uint32_t msg_control, msg_type;
1683 int mlen;
1684
1685 if (intel->gen >= 6)
1686 offset /= 16;
1687
1688 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1689
1690 if (num_regs == 1) {
1691 msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
1692 mlen = 2;
1693 } else {
1694 msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
1695 mlen = 3;
1696 }
1697
1698 /* Set up the message header. This is g0, with g0.2 filled with
1699 * the offset. We don't want to leave our offset around in g0 or
1700 * it'll screw up texture samples, so set it up inside the message
1701 * reg.
1702 */
1703 {
1704 brw_push_insn_state(p);
1705 brw_set_mask_control(p, BRW_MASK_DISABLE);
1706 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1707
1708 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
1709
1710 /* set message header global offset field (reg 0, element 2) */
1711 brw_MOV(p,
1712 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
1713 mrf.nr,
1714 2), BRW_REGISTER_TYPE_UD),
1715 brw_imm_ud(offset));
1716
1717 brw_pop_insn_state(p);
1718 }
1719
1720 {
1721 struct brw_reg dest;
1722 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1723 int send_commit_msg;
1724 struct brw_reg src_header = retype(brw_vec8_grf(0, 0),
1725 BRW_REGISTER_TYPE_UW);
1726
1727 if (insn->header.compression_control != BRW_COMPRESSION_NONE) {
1728 insn->header.compression_control = BRW_COMPRESSION_NONE;
1729 src_header = vec16(src_header);
1730 }
1731 assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
1732 insn->header.destreg__conditionalmod = mrf.nr;
1733
1734 /* Until gen6, writes followed by reads from the same location
1735 * are not guaranteed to be ordered unless write_commit is set.
1736 * If set, then a no-op write is issued to the destination
1737 * register to set a dependency, and a read from the destination
1738 * can be used to ensure the ordering.
1739 *
1740 * For gen6, only writes between different threads need ordering
1741 * protection. Our use of DP writes is all about register
1742 * spilling within a thread.
1743 */
1744 if (intel->gen >= 6) {
1745 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
1746 send_commit_msg = 0;
1747 } else {
1748 dest = src_header;
1749 send_commit_msg = 1;
1750 }
1751
1752 brw_set_dest(p, insn, dest);
1753 if (intel->gen >= 6) {
1754 brw_set_src0(p, insn, mrf);
1755 } else {
1756 brw_set_src0(p, insn, brw_null_reg());
1757 }
1758
1759 if (intel->gen >= 6)
1760 msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
1761 else
1762 msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
1763
1764 brw_set_dp_write_message(p,
1765 insn,
1766 255, /* binding table index (255=stateless) */
1767 msg_control,
1768 msg_type,
1769 mlen,
1770 true, /* header_present */
1771 0, /* pixel scoreboard */
1772 send_commit_msg, /* response_length */
1773 0, /* eot */
1774 send_commit_msg);
1775 }
1776 }
1777
1778
1779 /**
1780 * Read a block of owords (half a GRF each) from the scratch buffer
1781 * using a constant index per channel.
1782 *
1783 * Offset must be aligned to oword size (16 bytes). Used for register
1784 * spilling.
1785 */
1786 void
1787 brw_oword_block_read_scratch(struct brw_compile *p,
1788 struct brw_reg dest,
1789 struct brw_reg mrf,
1790 int num_regs,
1791 GLuint offset)
1792 {
1793 struct intel_context *intel = &p->brw->intel;
1794 uint32_t msg_control;
1795 int rlen;
1796
1797 if (intel->gen >= 6)
1798 offset /= 16;
1799
1800 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1801 dest = retype(dest, BRW_REGISTER_TYPE_UW);
1802
1803 if (num_regs == 1) {
1804 msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
1805 rlen = 1;
1806 } else {
1807 msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
1808 rlen = 2;
1809 }
1810
1811 {
1812 brw_push_insn_state(p);
1813 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1814 brw_set_mask_control(p, BRW_MASK_DISABLE);
1815
1816 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
1817
1818 /* set message header global offset field (reg 0, element 2) */
1819 brw_MOV(p,
1820 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
1821 mrf.nr,
1822 2), BRW_REGISTER_TYPE_UD),
1823 brw_imm_ud(offset));
1824
1825 brw_pop_insn_state(p);
1826 }
1827
1828 {
1829 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1830
1831 assert(insn->header.predicate_control == 0);
1832 insn->header.compression_control = BRW_COMPRESSION_NONE;
1833 insn->header.destreg__conditionalmod = mrf.nr;
1834
1835 brw_set_dest(p, insn, dest); /* UW? */
1836 if (intel->gen >= 6) {
1837 brw_set_src0(p, insn, mrf);
1838 } else {
1839 brw_set_src0(p, insn, brw_null_reg());
1840 }
1841
1842 brw_set_dp_read_message(p,
1843 insn,
1844 255, /* binding table index (255=stateless) */
1845 msg_control,
1846 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1847 BRW_DATAPORT_READ_TARGET_RENDER_CACHE,
1848 1, /* msg_length */
1849 rlen);
1850 }
1851 }
1852
1853 /**
1854 * Read a float[4] vector from the data port Data Cache (const buffer).
1855 * Location (in buffer) should be a multiple of 16.
1856 * Used for fetching shader constants.
1857 */
1858 void brw_oword_block_read(struct brw_compile *p,
1859 struct brw_reg dest,
1860 struct brw_reg mrf,
1861 uint32_t offset,
1862 uint32_t bind_table_index)
1863 {
1864 struct intel_context *intel = &p->brw->intel;
1865
1866 /* On newer hardware, offset is in units of owords. */
1867 if (intel->gen >= 6)
1868 offset /= 16;
1869
1870 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1871
1872 brw_push_insn_state(p);
1873 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1874 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1875 brw_set_mask_control(p, BRW_MASK_DISABLE);
1876
1877 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
1878
1879 /* set message header global offset field (reg 0, element 2) */
1880 brw_MOV(p,
1881 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
1882 mrf.nr,
1883 2), BRW_REGISTER_TYPE_UD),
1884 brw_imm_ud(offset));
1885
1886 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1887 insn->header.destreg__conditionalmod = mrf.nr;
1888
1889 /* cast dest to a uword[8] vector */
1890 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
1891
1892 brw_set_dest(p, insn, dest);
1893 if (intel->gen >= 6) {
1894 brw_set_src0(p, insn, mrf);
1895 } else {
1896 brw_set_src0(p, insn, brw_null_reg());
1897 }
1898
1899 brw_set_dp_read_message(p,
1900 insn,
1901 bind_table_index,
1902 BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW,
1903 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ,
1904 BRW_DATAPORT_READ_TARGET_DATA_CACHE,
1905 1, /* msg_length */
1906 1); /* response_length (1 reg, 2 owords!) */
1907
1908 brw_pop_insn_state(p);
1909 }
1910
1911 /**
1912 * Read a set of dwords from the data port Data Cache (const buffer).
1913 *
1914 * Location (in buffer) appears as UD offsets in the register after
1915 * the provided mrf header reg.
1916 */
1917 void brw_dword_scattered_read(struct brw_compile *p,
1918 struct brw_reg dest,
1919 struct brw_reg mrf,
1920 uint32_t bind_table_index)
1921 {
1922 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1923
1924 brw_push_insn_state(p);
1925 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1926 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1927 brw_set_mask_control(p, BRW_MASK_DISABLE);
1928 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
1929 brw_pop_insn_state(p);
1930
1931 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1932 insn->header.destreg__conditionalmod = mrf.nr;
1933
1934 /* cast dest to a uword[8] vector */
1935 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
1936
1937 brw_set_dest(p, insn, dest);
1938 brw_set_src0(p, insn, brw_null_reg());
1939
1940 brw_set_dp_read_message(p,
1941 insn,
1942 bind_table_index,
1943 BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS,
1944 BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ,
1945 BRW_DATAPORT_READ_TARGET_DATA_CACHE,
1946 2, /* msg_length */
1947 1); /* response_length */
1948 }
1949
1950
1951
1952 /**
1953 * Read float[4] constant(s) from VS constant buffer.
1954 * For relative addressing, two float[4] constants will be read into 'dest'.
1955 * Otherwise, one float[4] constant will be read into the lower half of 'dest'.
1956 */
1957 void brw_dp_READ_4_vs(struct brw_compile *p,
1958 struct brw_reg dest,
1959 GLuint location,
1960 GLuint bind_table_index)
1961 {
1962 struct intel_context *intel = &p->brw->intel;
1963 struct brw_instruction *insn;
1964 GLuint msg_reg_nr = 1;
1965
1966 if (intel->gen >= 6)
1967 location /= 16;
1968
1969 /* Setup MRF[1] with location/offset into const buffer */
1970 brw_push_insn_state(p);
1971 brw_set_access_mode(p, BRW_ALIGN_1);
1972 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1973 brw_set_mask_control(p, BRW_MASK_DISABLE);
1974 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1975 brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 2),
1976 BRW_REGISTER_TYPE_UD),
1977 brw_imm_ud(location));
1978 brw_pop_insn_state(p);
1979
1980 insn = next_insn(p, BRW_OPCODE_SEND);
1981
1982 insn->header.predicate_control = BRW_PREDICATE_NONE;
1983 insn->header.compression_control = BRW_COMPRESSION_NONE;
1984 insn->header.destreg__conditionalmod = msg_reg_nr;
1985 insn->header.mask_control = BRW_MASK_DISABLE;
1986
1987 brw_set_dest(p, insn, dest);
1988 if (intel->gen >= 6) {
1989 brw_set_src0(p, insn, brw_message_reg(msg_reg_nr));
1990 } else {
1991 brw_set_src0(p, insn, brw_null_reg());
1992 }
1993
1994 brw_set_dp_read_message(p,
1995 insn,
1996 bind_table_index,
1997 0,
1998 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1999 BRW_DATAPORT_READ_TARGET_DATA_CACHE,
2000 1, /* msg_length */
2001 1); /* response_length (1 Oword) */
2002 }
2003
2004 /**
2005 * Read a float[4] constant per vertex from VS constant buffer, with
2006 * relative addressing.
2007 */
2008 void brw_dp_READ_4_vs_relative(struct brw_compile *p,
2009 struct brw_reg dest,
2010 struct brw_reg addr_reg,
2011 GLuint offset,
2012 GLuint bind_table_index)
2013 {
2014 struct intel_context *intel = &p->brw->intel;
2015 struct brw_reg src = brw_vec8_grf(0, 0);
2016 int msg_type;
2017
2018 /* Setup MRF[1] with offset into const buffer */
2019 brw_push_insn_state(p);
2020 brw_set_access_mode(p, BRW_ALIGN_1);
2021 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
2022 brw_set_mask_control(p, BRW_MASK_DISABLE);
2023 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
2024
2025 /* M1.0 is block offset 0, M1.4 is block offset 1, all other
2026 * fields ignored.
2027 */
2028 brw_ADD(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_D),
2029 addr_reg, brw_imm_d(offset));
2030 brw_pop_insn_state(p);
2031
2032 gen6_resolve_implied_move(p, &src, 0);
2033 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
2034
2035 insn->header.predicate_control = BRW_PREDICATE_NONE;
2036 insn->header.compression_control = BRW_COMPRESSION_NONE;
2037 insn->header.destreg__conditionalmod = 0;
2038 insn->header.mask_control = BRW_MASK_DISABLE;
2039
2040 brw_set_dest(p, insn, dest);
2041 brw_set_src0(p, insn, src);
2042
2043 if (intel->gen >= 6)
2044 msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
2045 else if (intel->gen == 5 || intel->is_g4x)
2046 msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
2047 else
2048 msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
2049
2050 brw_set_dp_read_message(p,
2051 insn,
2052 bind_table_index,
2053 BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
2054 msg_type,
2055 BRW_DATAPORT_READ_TARGET_DATA_CACHE,
2056 2, /* msg_length */
2057 1); /* response_length */
2058 }
2059
2060
2061
2062 void brw_fb_WRITE(struct brw_compile *p,
2063 int dispatch_width,
2064 GLuint msg_reg_nr,
2065 struct brw_reg src0,
2066 GLuint binding_table_index,
2067 GLuint msg_length,
2068 GLuint response_length,
2069 bool eot,
2070 bool header_present)
2071 {
2072 struct intel_context *intel = &p->brw->intel;
2073 struct brw_instruction *insn;
2074 GLuint msg_control, msg_type;
2075 struct brw_reg dest;
2076
2077 if (dispatch_width == 16)
2078 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
2079 else
2080 dest = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW);
2081
2082 if (intel->gen >= 6 && binding_table_index == 0) {
2083 insn = next_insn(p, BRW_OPCODE_SENDC);
2084 } else {
2085 insn = next_insn(p, BRW_OPCODE_SEND);
2086 }
2087 /* The execution mask is ignored for render target writes. */
2088 insn->header.predicate_control = 0;
2089 insn->header.compression_control = BRW_COMPRESSION_NONE;
2090
2091 if (intel->gen >= 6) {
2092 /* headerless version, just submit color payload */
2093 src0 = brw_message_reg(msg_reg_nr);
2094
2095 msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
2096 } else {
2097 insn->header.destreg__conditionalmod = msg_reg_nr;
2098
2099 msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
2100 }
2101
2102 if (dispatch_width == 16)
2103 msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
2104 else
2105 msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01;
2106
2107 brw_set_dest(p, insn, dest);
2108 brw_set_src0(p, insn, src0);
2109 brw_set_dp_write_message(p,
2110 insn,
2111 binding_table_index,
2112 msg_control,
2113 msg_type,
2114 msg_length,
2115 header_present,
2116 1, /* pixel scoreboard */
2117 response_length,
2118 eot,
2119 0 /* send_commit_msg */);
2120 }
2121
2122
2123 /**
2124 * Texture sample instruction.
2125 * Note: the msg_type plus msg_length values determine exactly what kind
2126 * of sampling operation is performed. See volume 4, page 161 of docs.
2127 */
2128 void brw_SAMPLE(struct brw_compile *p,
2129 struct brw_reg dest,
2130 GLuint msg_reg_nr,
2131 struct brw_reg src0,
2132 GLuint binding_table_index,
2133 GLuint sampler,
2134 GLuint writemask,
2135 GLuint msg_type,
2136 GLuint response_length,
2137 GLuint msg_length,
2138 bool eot,
2139 GLuint header_present,
2140 GLuint simd_mode)
2141 {
2142 struct intel_context *intel = &p->brw->intel;
2143 bool need_stall = 0;
2144
2145 if (writemask == 0) {
2146 /*printf("%s: zero writemask??\n", __FUNCTION__); */
2147 return;
2148 }
2149
2150 /* Hardware doesn't do destination dependency checking on send
2151 * instructions properly. Add a workaround which generates the
2152 * dependency by other means. In practice it seems like this bug
2153 * only crops up for texture samples, and only where registers are
2154 * written by the send and then written again later without being
2155 * read in between. Luckily for us, we already track that
2156 * information and use it to modify the writemask for the
2157 * instruction, so that is a guide for whether a workaround is
2158 * needed.
2159 */
2160 if (writemask != WRITEMASK_XYZW) {
2161 GLuint dst_offset = 0;
2162 GLuint i, newmask = 0, len = 0;
2163
2164 for (i = 0; i < 4; i++) {
2165 if (writemask & (1<<i))
2166 break;
2167 dst_offset += 2;
2168 }
2169 for (; i < 4; i++) {
2170 if (!(writemask & (1<<i)))
2171 break;
2172 newmask |= 1<<i;
2173 len++;
2174 }
2175
2176 if (newmask != writemask) {
2177 need_stall = 1;
2178 /* printf("need stall %x %x\n", newmask , writemask); */
2179 }
2180 else {
2181 bool dispatch_16 = false;
2182
2183 struct brw_reg m1 = brw_message_reg(msg_reg_nr);
2184
2185 guess_execution_size(p, p->current, dest);
2186 if (p->current->header.execution_size == BRW_EXECUTE_16)
2187 dispatch_16 = true;
2188
2189 newmask = ~newmask & WRITEMASK_XYZW;
2190
2191 brw_push_insn_state(p);
2192
2193 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
2194 brw_set_mask_control(p, BRW_MASK_DISABLE);
2195
2196 brw_MOV(p, retype(m1, BRW_REGISTER_TYPE_UD),
2197 retype(brw_vec8_grf(0,0), BRW_REGISTER_TYPE_UD));
2198 brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12));
2199
2200 brw_pop_insn_state(p);
2201
2202 src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
2203 dest = offset(dest, dst_offset);
2204
2205 /* For 16-wide dispatch, masked channels are skipped in the
2206 * response. For 8-wide, masked channels still take up slots,
2207 * and are just not written to.
2208 */
2209 if (dispatch_16)
2210 response_length = len * 2;
2211 }
2212 }
2213
2214 {
2215 struct brw_instruction *insn;
2216
2217 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2218
2219 insn = next_insn(p, BRW_OPCODE_SEND);
2220 insn->header.predicate_control = 0; /* XXX */
2221 insn->header.compression_control = BRW_COMPRESSION_NONE;
2222 if (intel->gen < 6)
2223 insn->header.destreg__conditionalmod = msg_reg_nr;
2224
2225 brw_set_dest(p, insn, dest);
2226 brw_set_src0(p, insn, src0);
2227 brw_set_sampler_message(p, insn,
2228 binding_table_index,
2229 sampler,
2230 msg_type,
2231 response_length,
2232 msg_length,
2233 eot,
2234 header_present,
2235 simd_mode);
2236 }
2237
2238 if (need_stall) {
2239 struct brw_reg reg = vec8(offset(dest, response_length-1));
2240
2241 /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 }
2242 */
2243 brw_push_insn_state(p);
2244 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
2245 brw_MOV(p, retype(reg, BRW_REGISTER_TYPE_UD),
2246 retype(reg, BRW_REGISTER_TYPE_UD));
2247 brw_pop_insn_state(p);
2248 }
2249
2250 }
2251
2252 /* All these variables are pretty confusing - we might be better off
2253 * using bitmasks and macros for this, in the old style. Or perhaps
2254 * just having the caller instantiate the fields in dword3 itself.
2255 */
2256 void brw_urb_WRITE(struct brw_compile *p,
2257 struct brw_reg dest,
2258 GLuint msg_reg_nr,
2259 struct brw_reg src0,
2260 bool allocate,
2261 bool used,
2262 GLuint msg_length,
2263 GLuint response_length,
2264 bool eot,
2265 bool writes_complete,
2266 GLuint offset,
2267 GLuint swizzle)
2268 {
2269 struct intel_context *intel = &p->brw->intel;
2270 struct brw_instruction *insn;
2271
2272 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2273
2274 if (intel->gen == 7) {
2275 /* Enable Channel Masks in the URB_WRITE_HWORD message header */
2276 brw_push_insn_state(p);
2277 brw_set_access_mode(p, BRW_ALIGN_1);
2278 brw_OR(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 5),
2279 BRW_REGISTER_TYPE_UD),
2280 retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD),
2281 brw_imm_ud(0xff00));
2282 brw_pop_insn_state(p);
2283 }
2284
2285 insn = next_insn(p, BRW_OPCODE_SEND);
2286
2287 assert(msg_length < BRW_MAX_MRF);
2288
2289 brw_set_dest(p, insn, dest);
2290 brw_set_src0(p, insn, src0);
2291 brw_set_src1(p, insn, brw_imm_d(0));
2292
2293 if (intel->gen < 6)
2294 insn->header.destreg__conditionalmod = msg_reg_nr;
2295
2296 brw_set_urb_message(p,
2297 insn,
2298 allocate,
2299 used,
2300 msg_length,
2301 response_length,
2302 eot,
2303 writes_complete,
2304 offset,
2305 swizzle);
2306 }
2307
2308 static int
2309 brw_find_next_block_end(struct brw_compile *p, int start)
2310 {
2311 int ip;
2312
2313 for (ip = start + 1; ip < p->nr_insn; ip++) {
2314 struct brw_instruction *insn = &p->store[ip];
2315
2316 switch (insn->header.opcode) {
2317 case BRW_OPCODE_ENDIF:
2318 case BRW_OPCODE_ELSE:
2319 case BRW_OPCODE_WHILE:
2320 return ip;
2321 }
2322 }
2323 assert(!"not reached");
2324 return start + 1;
2325 }
2326
2327 /* There is no DO instruction on gen6, so to find the end of the loop
2328 * we have to see if the loop is jumping back before our start
2329 * instruction.
2330 */
2331 static int
2332 brw_find_loop_end(struct brw_compile *p, int start)
2333 {
2334 struct intel_context *intel = &p->brw->intel;
2335 int ip;
2336 int br = 2;
2337
2338 for (ip = start + 1; ip < p->nr_insn; ip++) {
2339 struct brw_instruction *insn = &p->store[ip];
2340
2341 if (insn->header.opcode == BRW_OPCODE_WHILE) {
2342 int jip = intel->gen == 6 ? insn->bits1.branch_gen6.jump_count
2343 : insn->bits3.break_cont.jip;
2344 if (ip + jip / br <= start)
2345 return ip;
2346 }
2347 }
2348 assert(!"not reached");
2349 return start + 1;
2350 }
2351
2352 /* After program generation, go back and update the UIP and JIP of
2353 * BREAK and CONT instructions to their correct locations.
2354 */
2355 void
2356 brw_set_uip_jip(struct brw_compile *p)
2357 {
2358 struct intel_context *intel = &p->brw->intel;
2359 int ip;
2360 int br = 2;
2361
2362 if (intel->gen < 6)
2363 return;
2364
2365 for (ip = 0; ip < p->nr_insn; ip++) {
2366 struct brw_instruction *insn = &p->store[ip];
2367
2368 switch (insn->header.opcode) {
2369 case BRW_OPCODE_BREAK:
2370 insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip);
2371 /* Gen7 UIP points to WHILE; Gen6 points just after it */
2372 insn->bits3.break_cont.uip =
2373 br * (brw_find_loop_end(p, ip) - ip + (intel->gen == 6 ? 1 : 0));
2374 break;
2375 case BRW_OPCODE_CONTINUE:
2376 insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip);
2377 insn->bits3.break_cont.uip = br * (brw_find_loop_end(p, ip) - ip);
2378
2379 assert(insn->bits3.break_cont.uip != 0);
2380 assert(insn->bits3.break_cont.jip != 0);
2381 break;
2382 }
2383 }
2384 }
2385
2386 void brw_ff_sync(struct brw_compile *p,
2387 struct brw_reg dest,
2388 GLuint msg_reg_nr,
2389 struct brw_reg src0,
2390 bool allocate,
2391 GLuint response_length,
2392 bool eot)
2393 {
2394 struct intel_context *intel = &p->brw->intel;
2395 struct brw_instruction *insn;
2396
2397 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2398
2399 insn = next_insn(p, BRW_OPCODE_SEND);
2400 brw_set_dest(p, insn, dest);
2401 brw_set_src0(p, insn, src0);
2402 brw_set_src1(p, insn, brw_imm_d(0));
2403
2404 if (intel->gen < 6)
2405 insn->header.destreg__conditionalmod = msg_reg_nr;
2406
2407 brw_set_ff_sync_message(p,
2408 insn,
2409 allocate,
2410 response_length,
2411 eot);
2412 }