i965: For color clears, only disable writes to components that exist.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_eu_emit.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keithw@vmware.com>
30 */
31
32
33 #include "brw_context.h"
34 #include "brw_defines.h"
35 #include "brw_eu.h"
36
37 #include "glsl/ralloc.h"
38
39 /***********************************************************************
40 * Internal helper for constructing instructions
41 */
42
43 static void guess_execution_size(struct brw_compile *p,
44 struct brw_instruction *insn,
45 struct brw_reg reg)
46 {
47 if (reg.width == BRW_WIDTH_8 && p->compressed)
48 insn->header.execution_size = BRW_EXECUTE_16;
49 else
50 insn->header.execution_size = reg.width; /* note - definitions are compatible */
51 }
52
53
54 /**
55 * Prior to Sandybridge, the SEND instruction accepted non-MRF source
56 * registers, implicitly moving the operand to a message register.
57 *
58 * On Sandybridge, this is no longer the case. This function performs the
59 * explicit move; it should be called before emitting a SEND instruction.
60 */
61 void
62 gen6_resolve_implied_move(struct brw_compile *p,
63 struct brw_reg *src,
64 unsigned msg_reg_nr)
65 {
66 struct brw_context *brw = p->brw;
67 if (brw->gen < 6)
68 return;
69
70 if (src->file == BRW_MESSAGE_REGISTER_FILE)
71 return;
72
73 if (src->file != BRW_ARCHITECTURE_REGISTER_FILE || src->nr != BRW_ARF_NULL) {
74 brw_push_insn_state(p);
75 brw_set_mask_control(p, BRW_MASK_DISABLE);
76 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
77 brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD),
78 retype(*src, BRW_REGISTER_TYPE_UD));
79 brw_pop_insn_state(p);
80 }
81 *src = brw_message_reg(msg_reg_nr);
82 }
83
84 static void
85 gen7_convert_mrf_to_grf(struct brw_compile *p, struct brw_reg *reg)
86 {
87 /* From the Ivybridge PRM, Volume 4 Part 3, page 218 ("send"):
88 * "The send with EOT should use register space R112-R127 for <src>. This is
89 * to enable loading of a new thread into the same slot while the message
90 * with EOT for current thread is pending dispatch."
91 *
92 * Since we're pretending to have 16 MRFs anyway, we may as well use the
93 * registers required for messages with EOT.
94 */
95 struct brw_context *brw = p->brw;
96 if (brw->gen == 7 && reg->file == BRW_MESSAGE_REGISTER_FILE) {
97 reg->file = BRW_GENERAL_REGISTER_FILE;
98 reg->nr += GEN7_MRF_HACK_START;
99 }
100 }
101
102 /**
103 * Convert a brw_reg_type enumeration value into the hardware representation.
104 *
105 * The hardware encoding may depend on whether the value is an immediate.
106 */
107 unsigned
108 brw_reg_type_to_hw_type(const struct brw_context *brw,
109 enum brw_reg_type type, unsigned file)
110 {
111 if (file == BRW_IMMEDIATE_VALUE) {
112 const static int imm_hw_types[] = {
113 [BRW_REGISTER_TYPE_UD] = BRW_HW_REG_TYPE_UD,
114 [BRW_REGISTER_TYPE_D] = BRW_HW_REG_TYPE_D,
115 [BRW_REGISTER_TYPE_UW] = BRW_HW_REG_TYPE_UW,
116 [BRW_REGISTER_TYPE_W] = BRW_HW_REG_TYPE_W,
117 [BRW_REGISTER_TYPE_F] = BRW_HW_REG_TYPE_F,
118 [BRW_REGISTER_TYPE_UB] = -1,
119 [BRW_REGISTER_TYPE_B] = -1,
120 [BRW_REGISTER_TYPE_UV] = BRW_HW_REG_IMM_TYPE_UV,
121 [BRW_REGISTER_TYPE_VF] = BRW_HW_REG_IMM_TYPE_VF,
122 [BRW_REGISTER_TYPE_V] = BRW_HW_REG_IMM_TYPE_V,
123 [BRW_REGISTER_TYPE_DF] = GEN8_HW_REG_IMM_TYPE_DF,
124 [BRW_REGISTER_TYPE_HF] = GEN8_HW_REG_IMM_TYPE_HF,
125 [BRW_REGISTER_TYPE_UQ] = GEN8_HW_REG_TYPE_UQ,
126 [BRW_REGISTER_TYPE_Q] = GEN8_HW_REG_TYPE_Q,
127 };
128 assert(type < ARRAY_SIZE(imm_hw_types));
129 assert(imm_hw_types[type] != -1);
130 assert(brw->gen >= 8 || type < BRW_REGISTER_TYPE_DF);
131 return imm_hw_types[type];
132 } else {
133 /* Non-immediate registers */
134 const static int hw_types[] = {
135 [BRW_REGISTER_TYPE_UD] = BRW_HW_REG_TYPE_UD,
136 [BRW_REGISTER_TYPE_D] = BRW_HW_REG_TYPE_D,
137 [BRW_REGISTER_TYPE_UW] = BRW_HW_REG_TYPE_UW,
138 [BRW_REGISTER_TYPE_W] = BRW_HW_REG_TYPE_W,
139 [BRW_REGISTER_TYPE_UB] = BRW_HW_REG_NON_IMM_TYPE_UB,
140 [BRW_REGISTER_TYPE_B] = BRW_HW_REG_NON_IMM_TYPE_B,
141 [BRW_REGISTER_TYPE_F] = BRW_HW_REG_TYPE_F,
142 [BRW_REGISTER_TYPE_UV] = -1,
143 [BRW_REGISTER_TYPE_VF] = -1,
144 [BRW_REGISTER_TYPE_V] = -1,
145 [BRW_REGISTER_TYPE_DF] = GEN7_HW_REG_NON_IMM_TYPE_DF,
146 [BRW_REGISTER_TYPE_HF] = GEN8_HW_REG_NON_IMM_TYPE_HF,
147 [BRW_REGISTER_TYPE_UQ] = GEN8_HW_REG_TYPE_UQ,
148 [BRW_REGISTER_TYPE_Q] = GEN8_HW_REG_TYPE_Q,
149 };
150 assert(type < ARRAY_SIZE(hw_types));
151 assert(hw_types[type] != -1);
152 assert(brw->gen >= 7 || type < BRW_REGISTER_TYPE_DF);
153 assert(brw->gen >= 8 || type < BRW_REGISTER_TYPE_HF);
154 return hw_types[type];
155 }
156 }
157
158 void
159 brw_set_dest(struct brw_compile *p, struct brw_instruction *insn,
160 struct brw_reg dest)
161 {
162 if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
163 dest.file != BRW_MESSAGE_REGISTER_FILE)
164 assert(dest.nr < 128);
165
166 gen7_convert_mrf_to_grf(p, &dest);
167
168 insn->bits1.da1.dest_reg_file = dest.file;
169 insn->bits1.da1.dest_reg_type =
170 brw_reg_type_to_hw_type(p->brw, dest.type, dest.file);
171 insn->bits1.da1.dest_address_mode = dest.address_mode;
172
173 if (dest.address_mode == BRW_ADDRESS_DIRECT) {
174 insn->bits1.da1.dest_reg_nr = dest.nr;
175
176 if (insn->header.access_mode == BRW_ALIGN_1) {
177 insn->bits1.da1.dest_subreg_nr = dest.subnr;
178 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
179 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
180 insn->bits1.da1.dest_horiz_stride = dest.hstride;
181 }
182 else {
183 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
184 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
185 if (dest.file == BRW_GENERAL_REGISTER_FILE ||
186 dest.file == BRW_MESSAGE_REGISTER_FILE) {
187 assert(dest.dw1.bits.writemask != 0);
188 }
189 /* From the Ivybridge PRM, Vol 4, Part 3, Section 5.2.4.1:
190 * Although Dst.HorzStride is a don't care for Align16, HW needs
191 * this to be programmed as "01".
192 */
193 insn->bits1.da16.dest_horiz_stride = 1;
194 }
195 }
196 else {
197 insn->bits1.ia1.dest_subreg_nr = dest.subnr;
198
199 /* These are different sizes in align1 vs align16:
200 */
201 if (insn->header.access_mode == BRW_ALIGN_1) {
202 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
203 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
204 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
205 insn->bits1.ia1.dest_horiz_stride = dest.hstride;
206 }
207 else {
208 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
209 /* even ignored in da16, still need to set as '01' */
210 insn->bits1.ia16.dest_horiz_stride = 1;
211 }
212 }
213
214 /* NEW: Set the execution size based on dest.width and
215 * insn->compression_control:
216 */
217 guess_execution_size(p, insn, dest);
218 }
219
220 extern int reg_type_size[];
221
222 static void
223 validate_reg(struct brw_instruction *insn, struct brw_reg reg)
224 {
225 int hstride_for_reg[] = {0, 1, 2, 4};
226 int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
227 int width_for_reg[] = {1, 2, 4, 8, 16};
228 int execsize_for_reg[] = {1, 2, 4, 8, 16};
229 int width, hstride, vstride, execsize;
230
231 if (reg.file == BRW_IMMEDIATE_VALUE) {
232 /* 3.3.6: Region Parameters. Restriction: Immediate vectors
233 * mean the destination has to be 128-bit aligned and the
234 * destination horiz stride has to be a word.
235 */
236 if (reg.type == BRW_REGISTER_TYPE_V) {
237 assert(hstride_for_reg[insn->bits1.da1.dest_horiz_stride] *
238 reg_type_size[insn->bits1.da1.dest_reg_type] == 2);
239 }
240
241 return;
242 }
243
244 if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE &&
245 reg.file == BRW_ARF_NULL)
246 return;
247
248 assert(reg.hstride >= 0 && reg.hstride < Elements(hstride_for_reg));
249 hstride = hstride_for_reg[reg.hstride];
250
251 if (reg.vstride == 0xf) {
252 vstride = -1;
253 } else {
254 assert(reg.vstride >= 0 && reg.vstride < Elements(vstride_for_reg));
255 vstride = vstride_for_reg[reg.vstride];
256 }
257
258 assert(reg.width >= 0 && reg.width < Elements(width_for_reg));
259 width = width_for_reg[reg.width];
260
261 assert(insn->header.execution_size >= 0 &&
262 insn->header.execution_size < Elements(execsize_for_reg));
263 execsize = execsize_for_reg[insn->header.execution_size];
264
265 /* Restrictions from 3.3.10: Register Region Restrictions. */
266 /* 3. */
267 assert(execsize >= width);
268
269 /* 4. */
270 if (execsize == width && hstride != 0) {
271 assert(vstride == -1 || vstride == width * hstride);
272 }
273
274 /* 5. */
275 if (execsize == width && hstride == 0) {
276 /* no restriction on vstride. */
277 }
278
279 /* 6. */
280 if (width == 1) {
281 assert(hstride == 0);
282 }
283
284 /* 7. */
285 if (execsize == 1 && width == 1) {
286 assert(hstride == 0);
287 assert(vstride == 0);
288 }
289
290 /* 8. */
291 if (vstride == 0 && hstride == 0) {
292 assert(width == 1);
293 }
294
295 /* 10. Check destination issues. */
296 }
297
298 void
299 brw_set_src0(struct brw_compile *p, struct brw_instruction *insn,
300 struct brw_reg reg)
301 {
302 struct brw_context *brw = p->brw;
303
304 if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
305 assert(reg.nr < 128);
306
307 gen7_convert_mrf_to_grf(p, &reg);
308
309 if (brw->gen >= 6 && (insn->header.opcode == BRW_OPCODE_SEND ||
310 insn->header.opcode == BRW_OPCODE_SENDC)) {
311 /* Any source modifiers or regions will be ignored, since this just
312 * identifies the MRF/GRF to start reading the message contents from.
313 * Check for some likely failures.
314 */
315 assert(!reg.negate);
316 assert(!reg.abs);
317 assert(reg.address_mode == BRW_ADDRESS_DIRECT);
318 }
319
320 validate_reg(insn, reg);
321
322 insn->bits1.da1.src0_reg_file = reg.file;
323 insn->bits1.da1.src0_reg_type =
324 brw_reg_type_to_hw_type(brw, reg.type, reg.file);
325 insn->bits2.da1.src0_abs = reg.abs;
326 insn->bits2.da1.src0_negate = reg.negate;
327 insn->bits2.da1.src0_address_mode = reg.address_mode;
328
329 if (reg.file == BRW_IMMEDIATE_VALUE) {
330 insn->bits3.ud = reg.dw1.ud;
331
332 /* Required to set some fields in src1 as well:
333 */
334 insn->bits1.da1.src1_reg_file = 0; /* arf */
335 insn->bits1.da1.src1_reg_type = insn->bits1.da1.src0_reg_type;
336 }
337 else
338 {
339 if (reg.address_mode == BRW_ADDRESS_DIRECT) {
340 if (insn->header.access_mode == BRW_ALIGN_1) {
341 insn->bits2.da1.src0_subreg_nr = reg.subnr;
342 insn->bits2.da1.src0_reg_nr = reg.nr;
343 }
344 else {
345 insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
346 insn->bits2.da16.src0_reg_nr = reg.nr;
347 }
348 }
349 else {
350 insn->bits2.ia1.src0_subreg_nr = reg.subnr;
351
352 if (insn->header.access_mode == BRW_ALIGN_1) {
353 insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
354 }
355 else {
356 insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
357 }
358 }
359
360 if (insn->header.access_mode == BRW_ALIGN_1) {
361 if (reg.width == BRW_WIDTH_1 &&
362 insn->header.execution_size == BRW_EXECUTE_1) {
363 insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
364 insn->bits2.da1.src0_width = BRW_WIDTH_1;
365 insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
366 }
367 else {
368 insn->bits2.da1.src0_horiz_stride = reg.hstride;
369 insn->bits2.da1.src0_width = reg.width;
370 insn->bits2.da1.src0_vert_stride = reg.vstride;
371 }
372 }
373 else {
374 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
375 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
376 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
377 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
378
379 /* This is an oddity of the fact we're using the same
380 * descriptions for registers in align_16 as align_1:
381 */
382 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
383 insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
384 else
385 insn->bits2.da16.src0_vert_stride = reg.vstride;
386 }
387 }
388 }
389
390
391 void brw_set_src1(struct brw_compile *p,
392 struct brw_instruction *insn,
393 struct brw_reg reg)
394 {
395 assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
396
397 if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
398 assert(reg.nr < 128);
399
400 gen7_convert_mrf_to_grf(p, &reg);
401
402 validate_reg(insn, reg);
403
404 insn->bits1.da1.src1_reg_file = reg.file;
405 insn->bits1.da1.src1_reg_type =
406 brw_reg_type_to_hw_type(p->brw, reg.type, reg.file);
407 insn->bits3.da1.src1_abs = reg.abs;
408 insn->bits3.da1.src1_negate = reg.negate;
409
410 /* Only src1 can be immediate in two-argument instructions.
411 */
412 assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
413
414 if (reg.file == BRW_IMMEDIATE_VALUE) {
415 insn->bits3.ud = reg.dw1.ud;
416 }
417 else {
418 /* This is a hardware restriction, which may or may not be lifted
419 * in the future:
420 */
421 assert (reg.address_mode == BRW_ADDRESS_DIRECT);
422 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
423
424 if (insn->header.access_mode == BRW_ALIGN_1) {
425 insn->bits3.da1.src1_subreg_nr = reg.subnr;
426 insn->bits3.da1.src1_reg_nr = reg.nr;
427 }
428 else {
429 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
430 insn->bits3.da16.src1_reg_nr = reg.nr;
431 }
432
433 if (insn->header.access_mode == BRW_ALIGN_1) {
434 if (reg.width == BRW_WIDTH_1 &&
435 insn->header.execution_size == BRW_EXECUTE_1) {
436 insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
437 insn->bits3.da1.src1_width = BRW_WIDTH_1;
438 insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
439 }
440 else {
441 insn->bits3.da1.src1_horiz_stride = reg.hstride;
442 insn->bits3.da1.src1_width = reg.width;
443 insn->bits3.da1.src1_vert_stride = reg.vstride;
444 }
445 }
446 else {
447 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
448 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
449 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
450 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
451
452 /* This is an oddity of the fact we're using the same
453 * descriptions for registers in align_16 as align_1:
454 */
455 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
456 insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
457 else
458 insn->bits3.da16.src1_vert_stride = reg.vstride;
459 }
460 }
461 }
462
463 /**
464 * Set the Message Descriptor and Extended Message Descriptor fields
465 * for SEND messages.
466 *
467 * \note This zeroes out the Function Control bits, so it must be called
468 * \b before filling out any message-specific data. Callers can
469 * choose not to fill in irrelevant bits; they will be zero.
470 */
471 static void
472 brw_set_message_descriptor(struct brw_compile *p,
473 struct brw_instruction *inst,
474 enum brw_message_target sfid,
475 unsigned msg_length,
476 unsigned response_length,
477 bool header_present,
478 bool end_of_thread)
479 {
480 struct brw_context *brw = p->brw;
481
482 brw_set_src1(p, inst, brw_imm_d(0));
483
484 if (brw->gen >= 5) {
485 inst->bits3.generic_gen5.header_present = header_present;
486 inst->bits3.generic_gen5.response_length = response_length;
487 inst->bits3.generic_gen5.msg_length = msg_length;
488 inst->bits3.generic_gen5.end_of_thread = end_of_thread;
489
490 if (brw->gen >= 6) {
491 /* On Gen6+ Message target/SFID goes in bits 27:24 of the header */
492 inst->header.destreg__conditionalmod = sfid;
493 } else {
494 /* Set Extended Message Descriptor (ex_desc) */
495 inst->bits2.send_gen5.sfid = sfid;
496 inst->bits2.send_gen5.end_of_thread = end_of_thread;
497 }
498 } else {
499 inst->bits3.generic.response_length = response_length;
500 inst->bits3.generic.msg_length = msg_length;
501 inst->bits3.generic.msg_target = sfid;
502 inst->bits3.generic.end_of_thread = end_of_thread;
503 }
504 }
505
506 static void brw_set_math_message( struct brw_compile *p,
507 struct brw_instruction *insn,
508 unsigned function,
509 unsigned integer_type,
510 bool low_precision,
511 unsigned dataType )
512 {
513 struct brw_context *brw = p->brw;
514 unsigned msg_length;
515 unsigned response_length;
516
517 /* Infer message length from the function */
518 switch (function) {
519 case BRW_MATH_FUNCTION_POW:
520 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT:
521 case BRW_MATH_FUNCTION_INT_DIV_REMAINDER:
522 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
523 msg_length = 2;
524 break;
525 default:
526 msg_length = 1;
527 break;
528 }
529
530 /* Infer response length from the function */
531 switch (function) {
532 case BRW_MATH_FUNCTION_SINCOS:
533 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
534 response_length = 2;
535 break;
536 default:
537 response_length = 1;
538 break;
539 }
540
541
542 brw_set_message_descriptor(p, insn, BRW_SFID_MATH,
543 msg_length, response_length, false, false);
544 if (brw->gen == 5) {
545 insn->bits3.math_gen5.function = function;
546 insn->bits3.math_gen5.int_type = integer_type;
547 insn->bits3.math_gen5.precision = low_precision;
548 insn->bits3.math_gen5.saturate = insn->header.saturate;
549 insn->bits3.math_gen5.data_type = dataType;
550 insn->bits3.math_gen5.snapshot = 0;
551 } else {
552 insn->bits3.math.function = function;
553 insn->bits3.math.int_type = integer_type;
554 insn->bits3.math.precision = low_precision;
555 insn->bits3.math.saturate = insn->header.saturate;
556 insn->bits3.math.data_type = dataType;
557 }
558 insn->header.saturate = 0;
559 }
560
561
562 static void brw_set_ff_sync_message(struct brw_compile *p,
563 struct brw_instruction *insn,
564 bool allocate,
565 unsigned response_length,
566 bool end_of_thread)
567 {
568 brw_set_message_descriptor(p, insn, BRW_SFID_URB,
569 1, response_length, true, end_of_thread);
570 insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */
571 insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */
572 insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */
573 insn->bits3.urb_gen5.allocate = allocate;
574 insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */
575 insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */
576 }
577
578 static void brw_set_urb_message( struct brw_compile *p,
579 struct brw_instruction *insn,
580 enum brw_urb_write_flags flags,
581 unsigned msg_length,
582 unsigned response_length,
583 unsigned offset,
584 unsigned swizzle_control )
585 {
586 struct brw_context *brw = p->brw;
587
588 brw_set_message_descriptor(p, insn, BRW_SFID_URB,
589 msg_length, response_length, true,
590 flags & BRW_URB_WRITE_EOT);
591 if (brw->gen == 7) {
592 if (flags & BRW_URB_WRITE_OWORD) {
593 assert(msg_length == 2); /* header + one OWORD of data */
594 insn->bits3.urb_gen7.opcode = BRW_URB_OPCODE_WRITE_OWORD;
595 } else {
596 insn->bits3.urb_gen7.opcode = BRW_URB_OPCODE_WRITE_HWORD;
597 }
598 insn->bits3.urb_gen7.offset = offset;
599 assert(swizzle_control != BRW_URB_SWIZZLE_TRANSPOSE);
600 insn->bits3.urb_gen7.swizzle_control = swizzle_control;
601 insn->bits3.urb_gen7.per_slot_offset =
602 flags & BRW_URB_WRITE_PER_SLOT_OFFSET ? 1 : 0;
603 insn->bits3.urb_gen7.complete = flags & BRW_URB_WRITE_COMPLETE ? 1 : 0;
604 } else if (brw->gen >= 5) {
605 insn->bits3.urb_gen5.opcode = 0; /* URB_WRITE */
606 insn->bits3.urb_gen5.offset = offset;
607 insn->bits3.urb_gen5.swizzle_control = swizzle_control;
608 insn->bits3.urb_gen5.allocate = flags & BRW_URB_WRITE_ALLOCATE ? 1 : 0;
609 insn->bits3.urb_gen5.used = flags & BRW_URB_WRITE_UNUSED ? 0 : 1;
610 insn->bits3.urb_gen5.complete = flags & BRW_URB_WRITE_COMPLETE ? 1 : 0;
611 } else {
612 insn->bits3.urb.opcode = 0; /* ? */
613 insn->bits3.urb.offset = offset;
614 insn->bits3.urb.swizzle_control = swizzle_control;
615 insn->bits3.urb.allocate = flags & BRW_URB_WRITE_ALLOCATE ? 1 : 0;
616 insn->bits3.urb.used = flags & BRW_URB_WRITE_UNUSED ? 0 : 1;
617 insn->bits3.urb.complete = flags & BRW_URB_WRITE_COMPLETE ? 1 : 0;
618 }
619 }
620
621 void
622 brw_set_dp_write_message(struct brw_compile *p,
623 struct brw_instruction *insn,
624 unsigned binding_table_index,
625 unsigned msg_control,
626 unsigned msg_type,
627 unsigned msg_length,
628 bool header_present,
629 unsigned last_render_target,
630 unsigned response_length,
631 unsigned end_of_thread,
632 unsigned send_commit_msg)
633 {
634 struct brw_context *brw = p->brw;
635 unsigned sfid;
636
637 if (brw->gen >= 7) {
638 /* Use the Render Cache for RT writes; otherwise use the Data Cache */
639 if (msg_type == GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE)
640 sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
641 else
642 sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
643 } else if (brw->gen == 6) {
644 /* Use the render cache for all write messages. */
645 sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
646 } else {
647 sfid = BRW_SFID_DATAPORT_WRITE;
648 }
649
650 brw_set_message_descriptor(p, insn, sfid, msg_length, response_length,
651 header_present, end_of_thread);
652
653 if (brw->gen >= 7) {
654 insn->bits3.gen7_dp.binding_table_index = binding_table_index;
655 insn->bits3.gen7_dp.msg_control = msg_control;
656 insn->bits3.gen7_dp.last_render_target = last_render_target;
657 insn->bits3.gen7_dp.msg_type = msg_type;
658 } else if (brw->gen == 6) {
659 insn->bits3.gen6_dp.binding_table_index = binding_table_index;
660 insn->bits3.gen6_dp.msg_control = msg_control;
661 insn->bits3.gen6_dp.last_render_target = last_render_target;
662 insn->bits3.gen6_dp.msg_type = msg_type;
663 insn->bits3.gen6_dp.send_commit_msg = send_commit_msg;
664 } else if (brw->gen == 5) {
665 insn->bits3.dp_write_gen5.binding_table_index = binding_table_index;
666 insn->bits3.dp_write_gen5.msg_control = msg_control;
667 insn->bits3.dp_write_gen5.last_render_target = last_render_target;
668 insn->bits3.dp_write_gen5.msg_type = msg_type;
669 insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg;
670 } else {
671 insn->bits3.dp_write.binding_table_index = binding_table_index;
672 insn->bits3.dp_write.msg_control = msg_control;
673 insn->bits3.dp_write.last_render_target = last_render_target;
674 insn->bits3.dp_write.msg_type = msg_type;
675 insn->bits3.dp_write.send_commit_msg = send_commit_msg;
676 }
677 }
678
679 void
680 brw_set_dp_read_message(struct brw_compile *p,
681 struct brw_instruction *insn,
682 unsigned binding_table_index,
683 unsigned msg_control,
684 unsigned msg_type,
685 unsigned target_cache,
686 unsigned msg_length,
687 bool header_present,
688 unsigned response_length)
689 {
690 struct brw_context *brw = p->brw;
691 unsigned sfid;
692
693 if (brw->gen >= 7) {
694 sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
695 } else if (brw->gen == 6) {
696 if (target_cache == BRW_DATAPORT_READ_TARGET_RENDER_CACHE)
697 sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
698 else
699 sfid = GEN6_SFID_DATAPORT_SAMPLER_CACHE;
700 } else {
701 sfid = BRW_SFID_DATAPORT_READ;
702 }
703
704 brw_set_message_descriptor(p, insn, sfid, msg_length, response_length,
705 header_present, false);
706
707 if (brw->gen >= 7) {
708 insn->bits3.gen7_dp.binding_table_index = binding_table_index;
709 insn->bits3.gen7_dp.msg_control = msg_control;
710 insn->bits3.gen7_dp.last_render_target = 0;
711 insn->bits3.gen7_dp.msg_type = msg_type;
712 } else if (brw->gen == 6) {
713 insn->bits3.gen6_dp.binding_table_index = binding_table_index;
714 insn->bits3.gen6_dp.msg_control = msg_control;
715 insn->bits3.gen6_dp.last_render_target = 0;
716 insn->bits3.gen6_dp.msg_type = msg_type;
717 insn->bits3.gen6_dp.send_commit_msg = 0;
718 } else if (brw->gen == 5) {
719 insn->bits3.dp_read_gen5.binding_table_index = binding_table_index;
720 insn->bits3.dp_read_gen5.msg_control = msg_control;
721 insn->bits3.dp_read_gen5.msg_type = msg_type;
722 insn->bits3.dp_read_gen5.target_cache = target_cache;
723 } else if (brw->is_g4x) {
724 insn->bits3.dp_read_g4x.binding_table_index = binding_table_index; /*0:7*/
725 insn->bits3.dp_read_g4x.msg_control = msg_control; /*8:10*/
726 insn->bits3.dp_read_g4x.msg_type = msg_type; /*11:13*/
727 insn->bits3.dp_read_g4x.target_cache = target_cache; /*14:15*/
728 } else {
729 insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
730 insn->bits3.dp_read.msg_control = msg_control; /*8:11*/
731 insn->bits3.dp_read.msg_type = msg_type; /*12:13*/
732 insn->bits3.dp_read.target_cache = target_cache; /*14:15*/
733 }
734 }
735
736 void
737 brw_set_sampler_message(struct brw_compile *p,
738 struct brw_instruction *insn,
739 unsigned binding_table_index,
740 unsigned sampler,
741 unsigned msg_type,
742 unsigned response_length,
743 unsigned msg_length,
744 unsigned header_present,
745 unsigned simd_mode,
746 unsigned return_format)
747 {
748 struct brw_context *brw = p->brw;
749
750 brw_set_message_descriptor(p, insn, BRW_SFID_SAMPLER, msg_length,
751 response_length, header_present, false);
752
753 if (brw->gen >= 7) {
754 insn->bits3.sampler_gen7.binding_table_index = binding_table_index;
755 insn->bits3.sampler_gen7.sampler = sampler;
756 insn->bits3.sampler_gen7.msg_type = msg_type;
757 insn->bits3.sampler_gen7.simd_mode = simd_mode;
758 } else if (brw->gen >= 5) {
759 insn->bits3.sampler_gen5.binding_table_index = binding_table_index;
760 insn->bits3.sampler_gen5.sampler = sampler;
761 insn->bits3.sampler_gen5.msg_type = msg_type;
762 insn->bits3.sampler_gen5.simd_mode = simd_mode;
763 } else if (brw->is_g4x) {
764 insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
765 insn->bits3.sampler_g4x.sampler = sampler;
766 insn->bits3.sampler_g4x.msg_type = msg_type;
767 } else {
768 insn->bits3.sampler.binding_table_index = binding_table_index;
769 insn->bits3.sampler.sampler = sampler;
770 insn->bits3.sampler.msg_type = msg_type;
771 insn->bits3.sampler.return_format = return_format;
772 }
773 }
774
775
776 #define next_insn brw_next_insn
777 struct brw_instruction *
778 brw_next_insn(struct brw_compile *p, unsigned opcode)
779 {
780 struct brw_instruction *insn;
781
782 if (p->nr_insn + 1 > p->store_size) {
783 if (0) {
784 fprintf(stderr, "incresing the store size to %d\n",
785 p->store_size << 1);
786 }
787 p->store_size <<= 1;
788 p->store = reralloc(p->mem_ctx, p->store,
789 struct brw_instruction, p->store_size);
790 if (!p->store)
791 assert(!"realloc eu store memeory failed");
792 }
793
794 p->next_insn_offset += 16;
795 insn = &p->store[p->nr_insn++];
796 memcpy(insn, p->current, sizeof(*insn));
797
798 /* Reset this one-shot flag:
799 */
800
801 if (p->current->header.destreg__conditionalmod) {
802 p->current->header.destreg__conditionalmod = 0;
803 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
804 }
805
806 insn->header.opcode = opcode;
807 return insn;
808 }
809
810 static struct brw_instruction *brw_alu1( struct brw_compile *p,
811 unsigned opcode,
812 struct brw_reg dest,
813 struct brw_reg src )
814 {
815 struct brw_instruction *insn = next_insn(p, opcode);
816 brw_set_dest(p, insn, dest);
817 brw_set_src0(p, insn, src);
818 return insn;
819 }
820
821 static struct brw_instruction *brw_alu2(struct brw_compile *p,
822 unsigned opcode,
823 struct brw_reg dest,
824 struct brw_reg src0,
825 struct brw_reg src1 )
826 {
827 struct brw_instruction *insn = next_insn(p, opcode);
828 brw_set_dest(p, insn, dest);
829 brw_set_src0(p, insn, src0);
830 brw_set_src1(p, insn, src1);
831 return insn;
832 }
833
834 static int
835 get_3src_subreg_nr(struct brw_reg reg)
836 {
837 if (reg.vstride == BRW_VERTICAL_STRIDE_0) {
838 assert(brw_is_single_value_swizzle(reg.dw1.bits.swizzle));
839 return reg.subnr / 4 + BRW_GET_SWZ(reg.dw1.bits.swizzle, 0);
840 } else {
841 return reg.subnr / 4;
842 }
843 }
844
845 static struct brw_instruction *brw_alu3(struct brw_compile *p,
846 unsigned opcode,
847 struct brw_reg dest,
848 struct brw_reg src0,
849 struct brw_reg src1,
850 struct brw_reg src2)
851 {
852 struct brw_context *brw = p->brw;
853 struct brw_instruction *insn = next_insn(p, opcode);
854
855 gen7_convert_mrf_to_grf(p, &dest);
856
857 assert(insn->header.access_mode == BRW_ALIGN_16);
858
859 assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
860 dest.file == BRW_MESSAGE_REGISTER_FILE);
861 assert(dest.nr < 128);
862 assert(dest.address_mode == BRW_ADDRESS_DIRECT);
863 assert(dest.type == BRW_REGISTER_TYPE_F ||
864 dest.type == BRW_REGISTER_TYPE_D ||
865 dest.type == BRW_REGISTER_TYPE_UD);
866 insn->bits1.da3src.dest_reg_file = (dest.file == BRW_MESSAGE_REGISTER_FILE);
867 insn->bits1.da3src.dest_reg_nr = dest.nr;
868 insn->bits1.da3src.dest_subreg_nr = dest.subnr / 16;
869 insn->bits1.da3src.dest_writemask = dest.dw1.bits.writemask;
870 guess_execution_size(p, insn, dest);
871
872 assert(src0.file == BRW_GENERAL_REGISTER_FILE);
873 assert(src0.address_mode == BRW_ADDRESS_DIRECT);
874 assert(src0.nr < 128);
875 insn->bits2.da3src.src0_swizzle = src0.dw1.bits.swizzle;
876 insn->bits2.da3src.src0_subreg_nr = get_3src_subreg_nr(src0);
877 insn->bits2.da3src.src0_reg_nr = src0.nr;
878 insn->bits1.da3src.src0_abs = src0.abs;
879 insn->bits1.da3src.src0_negate = src0.negate;
880 insn->bits2.da3src.src0_rep_ctrl = src0.vstride == BRW_VERTICAL_STRIDE_0;
881
882 assert(src1.file == BRW_GENERAL_REGISTER_FILE);
883 assert(src1.address_mode == BRW_ADDRESS_DIRECT);
884 assert(src1.nr < 128);
885 insn->bits2.da3src.src1_swizzle = src1.dw1.bits.swizzle;
886 insn->bits2.da3src.src1_subreg_nr_low = get_3src_subreg_nr(src1) & 0x3;
887 insn->bits3.da3src.src1_subreg_nr_high = get_3src_subreg_nr(src1) >> 2;
888 insn->bits2.da3src.src1_rep_ctrl = src1.vstride == BRW_VERTICAL_STRIDE_0;
889 insn->bits3.da3src.src1_reg_nr = src1.nr;
890 insn->bits1.da3src.src1_abs = src1.abs;
891 insn->bits1.da3src.src1_negate = src1.negate;
892
893 assert(src2.file == BRW_GENERAL_REGISTER_FILE);
894 assert(src2.address_mode == BRW_ADDRESS_DIRECT);
895 assert(src2.nr < 128);
896 insn->bits3.da3src.src2_swizzle = src2.dw1.bits.swizzle;
897 insn->bits3.da3src.src2_subreg_nr = get_3src_subreg_nr(src2);
898 insn->bits3.da3src.src2_rep_ctrl = src2.vstride == BRW_VERTICAL_STRIDE_0;
899 insn->bits3.da3src.src2_reg_nr = src2.nr;
900 insn->bits1.da3src.src2_abs = src2.abs;
901 insn->bits1.da3src.src2_negate = src2.negate;
902
903 if (brw->gen >= 7) {
904 /* Set both the source and destination types based on dest.type,
905 * ignoring the source register types. The MAD and LRP emitters ensure
906 * that all four types are float. The BFE and BFI2 emitters, however,
907 * may send us mixed D and UD types and want us to ignore that and use
908 * the destination type.
909 */
910 switch (dest.type) {
911 case BRW_REGISTER_TYPE_F:
912 insn->bits1.da3src.src_type = BRW_3SRC_TYPE_F;
913 insn->bits1.da3src.dst_type = BRW_3SRC_TYPE_F;
914 break;
915 case BRW_REGISTER_TYPE_D:
916 insn->bits1.da3src.src_type = BRW_3SRC_TYPE_D;
917 insn->bits1.da3src.dst_type = BRW_3SRC_TYPE_D;
918 break;
919 case BRW_REGISTER_TYPE_UD:
920 insn->bits1.da3src.src_type = BRW_3SRC_TYPE_UD;
921 insn->bits1.da3src.dst_type = BRW_3SRC_TYPE_UD;
922 break;
923 }
924 }
925
926 return insn;
927 }
928
929
930 /***********************************************************************
931 * Convenience routines.
932 */
933 #define ALU1(OP) \
934 struct brw_instruction *brw_##OP(struct brw_compile *p, \
935 struct brw_reg dest, \
936 struct brw_reg src0) \
937 { \
938 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
939 }
940
941 #define ALU2(OP) \
942 struct brw_instruction *brw_##OP(struct brw_compile *p, \
943 struct brw_reg dest, \
944 struct brw_reg src0, \
945 struct brw_reg src1) \
946 { \
947 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
948 }
949
950 #define ALU3(OP) \
951 struct brw_instruction *brw_##OP(struct brw_compile *p, \
952 struct brw_reg dest, \
953 struct brw_reg src0, \
954 struct brw_reg src1, \
955 struct brw_reg src2) \
956 { \
957 return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
958 }
959
960 #define ALU3F(OP) \
961 struct brw_instruction *brw_##OP(struct brw_compile *p, \
962 struct brw_reg dest, \
963 struct brw_reg src0, \
964 struct brw_reg src1, \
965 struct brw_reg src2) \
966 { \
967 assert(dest.type == BRW_REGISTER_TYPE_F); \
968 assert(src0.type == BRW_REGISTER_TYPE_F); \
969 assert(src1.type == BRW_REGISTER_TYPE_F); \
970 assert(src2.type == BRW_REGISTER_TYPE_F); \
971 return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
972 }
973
974 /* Rounding operations (other than RNDD) require two instructions - the first
975 * stores a rounded value (possibly the wrong way) in the dest register, but
976 * also sets a per-channel "increment bit" in the flag register. A predicated
977 * add of 1.0 fixes dest to contain the desired result.
978 *
979 * Sandybridge and later appear to round correctly without an ADD.
980 */
981 #define ROUND(OP) \
982 void brw_##OP(struct brw_compile *p, \
983 struct brw_reg dest, \
984 struct brw_reg src) \
985 { \
986 struct brw_instruction *rnd, *add; \
987 rnd = next_insn(p, BRW_OPCODE_##OP); \
988 brw_set_dest(p, rnd, dest); \
989 brw_set_src0(p, rnd, src); \
990 \
991 if (p->brw->gen < 6) { \
992 /* turn on round-increments */ \
993 rnd->header.destreg__conditionalmod = BRW_CONDITIONAL_R; \
994 add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \
995 add->header.predicate_control = BRW_PREDICATE_NORMAL; \
996 } \
997 }
998
999
1000 ALU1(MOV)
1001 ALU2(SEL)
1002 ALU1(NOT)
1003 ALU2(AND)
1004 ALU2(OR)
1005 ALU2(XOR)
1006 ALU2(SHR)
1007 ALU2(SHL)
1008 ALU2(ASR)
1009 ALU1(F32TO16)
1010 ALU1(F16TO32)
1011 ALU1(FRC)
1012 ALU1(RNDD)
1013 ALU2(MAC)
1014 ALU2(MACH)
1015 ALU1(LZD)
1016 ALU2(DP4)
1017 ALU2(DPH)
1018 ALU2(DP3)
1019 ALU2(DP2)
1020 ALU2(LINE)
1021 ALU2(PLN)
1022 ALU3F(MAD)
1023 ALU3F(LRP)
1024 ALU1(BFREV)
1025 ALU3(BFE)
1026 ALU2(BFI1)
1027 ALU3(BFI2)
1028 ALU1(FBH)
1029 ALU1(FBL)
1030 ALU1(CBIT)
1031 ALU2(ADDC)
1032 ALU2(SUBB)
1033
1034 ROUND(RNDZ)
1035 ROUND(RNDE)
1036
1037
1038 struct brw_instruction *brw_ADD(struct brw_compile *p,
1039 struct brw_reg dest,
1040 struct brw_reg src0,
1041 struct brw_reg src1)
1042 {
1043 /* 6.2.2: add */
1044 if (src0.type == BRW_REGISTER_TYPE_F ||
1045 (src0.file == BRW_IMMEDIATE_VALUE &&
1046 src0.type == BRW_REGISTER_TYPE_VF)) {
1047 assert(src1.type != BRW_REGISTER_TYPE_UD);
1048 assert(src1.type != BRW_REGISTER_TYPE_D);
1049 }
1050
1051 if (src1.type == BRW_REGISTER_TYPE_F ||
1052 (src1.file == BRW_IMMEDIATE_VALUE &&
1053 src1.type == BRW_REGISTER_TYPE_VF)) {
1054 assert(src0.type != BRW_REGISTER_TYPE_UD);
1055 assert(src0.type != BRW_REGISTER_TYPE_D);
1056 }
1057
1058 return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1);
1059 }
1060
1061 struct brw_instruction *brw_AVG(struct brw_compile *p,
1062 struct brw_reg dest,
1063 struct brw_reg src0,
1064 struct brw_reg src1)
1065 {
1066 assert(dest.type == src0.type);
1067 assert(src0.type == src1.type);
1068 switch (src0.type) {
1069 case BRW_REGISTER_TYPE_B:
1070 case BRW_REGISTER_TYPE_UB:
1071 case BRW_REGISTER_TYPE_W:
1072 case BRW_REGISTER_TYPE_UW:
1073 case BRW_REGISTER_TYPE_D:
1074 case BRW_REGISTER_TYPE_UD:
1075 break;
1076 default:
1077 assert(!"Bad type for brw_AVG");
1078 }
1079
1080 return brw_alu2(p, BRW_OPCODE_AVG, dest, src0, src1);
1081 }
1082
1083 struct brw_instruction *brw_MUL(struct brw_compile *p,
1084 struct brw_reg dest,
1085 struct brw_reg src0,
1086 struct brw_reg src1)
1087 {
1088 /* 6.32.38: mul */
1089 if (src0.type == BRW_REGISTER_TYPE_D ||
1090 src0.type == BRW_REGISTER_TYPE_UD ||
1091 src1.type == BRW_REGISTER_TYPE_D ||
1092 src1.type == BRW_REGISTER_TYPE_UD) {
1093 assert(dest.type != BRW_REGISTER_TYPE_F);
1094 }
1095
1096 if (src0.type == BRW_REGISTER_TYPE_F ||
1097 (src0.file == BRW_IMMEDIATE_VALUE &&
1098 src0.type == BRW_REGISTER_TYPE_VF)) {
1099 assert(src1.type != BRW_REGISTER_TYPE_UD);
1100 assert(src1.type != BRW_REGISTER_TYPE_D);
1101 }
1102
1103 if (src1.type == BRW_REGISTER_TYPE_F ||
1104 (src1.file == BRW_IMMEDIATE_VALUE &&
1105 src1.type == BRW_REGISTER_TYPE_VF)) {
1106 assert(src0.type != BRW_REGISTER_TYPE_UD);
1107 assert(src0.type != BRW_REGISTER_TYPE_D);
1108 }
1109
1110 assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE ||
1111 src0.nr != BRW_ARF_ACCUMULATOR);
1112 assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE ||
1113 src1.nr != BRW_ARF_ACCUMULATOR);
1114
1115 return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1);
1116 }
1117
1118
1119 void brw_NOP(struct brw_compile *p)
1120 {
1121 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);
1122 brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
1123 brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
1124 brw_set_src1(p, insn, brw_imm_ud(0x0));
1125 }
1126
1127
1128
1129
1130
1131 /***********************************************************************
1132 * Comparisons, if/else/endif
1133 */
1134
1135 struct brw_instruction *brw_JMPI(struct brw_compile *p,
1136 struct brw_reg dest,
1137 struct brw_reg src0,
1138 struct brw_reg src1)
1139 {
1140 struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
1141
1142 insn->header.execution_size = 1;
1143 insn->header.compression_control = BRW_COMPRESSION_NONE;
1144 insn->header.mask_control = BRW_MASK_DISABLE;
1145
1146 p->current->header.predicate_control = BRW_PREDICATE_NONE;
1147
1148 return insn;
1149 }
1150
1151 static void
1152 push_if_stack(struct brw_compile *p, struct brw_instruction *inst)
1153 {
1154 p->if_stack[p->if_stack_depth] = inst - p->store;
1155
1156 p->if_stack_depth++;
1157 if (p->if_stack_array_size <= p->if_stack_depth) {
1158 p->if_stack_array_size *= 2;
1159 p->if_stack = reralloc(p->mem_ctx, p->if_stack, int,
1160 p->if_stack_array_size);
1161 }
1162 }
1163
1164 static struct brw_instruction *
1165 pop_if_stack(struct brw_compile *p)
1166 {
1167 p->if_stack_depth--;
1168 return &p->store[p->if_stack[p->if_stack_depth]];
1169 }
1170
1171 static void
1172 push_loop_stack(struct brw_compile *p, struct brw_instruction *inst)
1173 {
1174 if (p->loop_stack_array_size < p->loop_stack_depth) {
1175 p->loop_stack_array_size *= 2;
1176 p->loop_stack = reralloc(p->mem_ctx, p->loop_stack, int,
1177 p->loop_stack_array_size);
1178 p->if_depth_in_loop = reralloc(p->mem_ctx, p->if_depth_in_loop, int,
1179 p->loop_stack_array_size);
1180 }
1181
1182 p->loop_stack[p->loop_stack_depth] = inst - p->store;
1183 p->loop_stack_depth++;
1184 p->if_depth_in_loop[p->loop_stack_depth] = 0;
1185 }
1186
1187 static struct brw_instruction *
1188 get_inner_do_insn(struct brw_compile *p)
1189 {
1190 return &p->store[p->loop_stack[p->loop_stack_depth - 1]];
1191 }
1192
1193 /* EU takes the value from the flag register and pushes it onto some
1194 * sort of a stack (presumably merging with any flag value already on
1195 * the stack). Within an if block, the flags at the top of the stack
1196 * control execution on each channel of the unit, eg. on each of the
1197 * 16 pixel values in our wm programs.
1198 *
1199 * When the matching 'else' instruction is reached (presumably by
1200 * countdown of the instruction count patched in by our ELSE/ENDIF
1201 * functions), the relevent flags are inverted.
1202 *
1203 * When the matching 'endif' instruction is reached, the flags are
1204 * popped off. If the stack is now empty, normal execution resumes.
1205 */
1206 struct brw_instruction *
1207 brw_IF(struct brw_compile *p, unsigned execute_size)
1208 {
1209 struct brw_context *brw = p->brw;
1210 struct brw_instruction *insn;
1211
1212 insn = next_insn(p, BRW_OPCODE_IF);
1213
1214 /* Override the defaults for this instruction:
1215 */
1216 if (brw->gen < 6) {
1217 brw_set_dest(p, insn, brw_ip_reg());
1218 brw_set_src0(p, insn, brw_ip_reg());
1219 brw_set_src1(p, insn, brw_imm_d(0x0));
1220 } else if (brw->gen == 6) {
1221 brw_set_dest(p, insn, brw_imm_w(0));
1222 insn->bits1.branch_gen6.jump_count = 0;
1223 brw_set_src0(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
1224 brw_set_src1(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
1225 } else {
1226 brw_set_dest(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
1227 brw_set_src0(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
1228 brw_set_src1(p, insn, brw_imm_ud(0));
1229 insn->bits3.break_cont.jip = 0;
1230 insn->bits3.break_cont.uip = 0;
1231 }
1232
1233 insn->header.execution_size = execute_size;
1234 insn->header.compression_control = BRW_COMPRESSION_NONE;
1235 insn->header.predicate_control = BRW_PREDICATE_NORMAL;
1236 insn->header.mask_control = BRW_MASK_ENABLE;
1237 if (!p->single_program_flow)
1238 insn->header.thread_control = BRW_THREAD_SWITCH;
1239
1240 p->current->header.predicate_control = BRW_PREDICATE_NONE;
1241
1242 push_if_stack(p, insn);
1243 p->if_depth_in_loop[p->loop_stack_depth]++;
1244 return insn;
1245 }
1246
1247 /* This function is only used for gen6-style IF instructions with an
1248 * embedded comparison (conditional modifier). It is not used on gen7.
1249 */
1250 struct brw_instruction *
1251 gen6_IF(struct brw_compile *p, uint32_t conditional,
1252 struct brw_reg src0, struct brw_reg src1)
1253 {
1254 struct brw_instruction *insn;
1255
1256 insn = next_insn(p, BRW_OPCODE_IF);
1257
1258 brw_set_dest(p, insn, brw_imm_w(0));
1259 if (p->compressed) {
1260 insn->header.execution_size = BRW_EXECUTE_16;
1261 } else {
1262 insn->header.execution_size = BRW_EXECUTE_8;
1263 }
1264 insn->bits1.branch_gen6.jump_count = 0;
1265 brw_set_src0(p, insn, src0);
1266 brw_set_src1(p, insn, src1);
1267
1268 assert(insn->header.compression_control == BRW_COMPRESSION_NONE);
1269 assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
1270 insn->header.destreg__conditionalmod = conditional;
1271
1272 if (!p->single_program_flow)
1273 insn->header.thread_control = BRW_THREAD_SWITCH;
1274
1275 push_if_stack(p, insn);
1276 return insn;
1277 }
1278
1279 /**
1280 * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs.
1281 */
1282 static void
1283 convert_IF_ELSE_to_ADD(struct brw_compile *p,
1284 struct brw_instruction *if_inst,
1285 struct brw_instruction *else_inst)
1286 {
1287 /* The next instruction (where the ENDIF would be, if it existed) */
1288 struct brw_instruction *next_inst = &p->store[p->nr_insn];
1289
1290 assert(p->single_program_flow);
1291 assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
1292 assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
1293 assert(if_inst->header.execution_size == BRW_EXECUTE_1);
1294
1295 /* Convert IF to an ADD instruction that moves the instruction pointer
1296 * to the first instruction of the ELSE block. If there is no ELSE
1297 * block, point to where ENDIF would be. Reverse the predicate.
1298 *
1299 * There's no need to execute an ENDIF since we don't need to do any
1300 * stack operations, and if we're currently executing, we just want to
1301 * continue normally.
1302 */
1303 if_inst->header.opcode = BRW_OPCODE_ADD;
1304 if_inst->header.predicate_inverse = 1;
1305
1306 if (else_inst != NULL) {
1307 /* Convert ELSE to an ADD instruction that points where the ENDIF
1308 * would be.
1309 */
1310 else_inst->header.opcode = BRW_OPCODE_ADD;
1311
1312 if_inst->bits3.ud = (else_inst - if_inst + 1) * 16;
1313 else_inst->bits3.ud = (next_inst - else_inst) * 16;
1314 } else {
1315 if_inst->bits3.ud = (next_inst - if_inst) * 16;
1316 }
1317 }
1318
1319 /**
1320 * Patch IF and ELSE instructions with appropriate jump targets.
1321 */
1322 static void
1323 patch_IF_ELSE(struct brw_compile *p,
1324 struct brw_instruction *if_inst,
1325 struct brw_instruction *else_inst,
1326 struct brw_instruction *endif_inst)
1327 {
1328 struct brw_context *brw = p->brw;
1329
1330 /* We shouldn't be patching IF and ELSE instructions in single program flow
1331 * mode when gen < 6, because in single program flow mode on those
1332 * platforms, we convert flow control instructions to conditional ADDs that
1333 * operate on IP (see brw_ENDIF).
1334 *
1335 * However, on Gen6, writing to IP doesn't work in single program flow mode
1336 * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
1337 * not be updated by non-flow control instructions."). And on later
1338 * platforms, there is no significant benefit to converting control flow
1339 * instructions to conditional ADDs. So we do patch IF and ELSE
1340 * instructions in single program flow mode on those platforms.
1341 */
1342 if (brw->gen < 6)
1343 assert(!p->single_program_flow);
1344
1345 assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
1346 assert(endif_inst != NULL);
1347 assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
1348
1349 unsigned br = 1;
1350 /* Jump count is for 64bit data chunk each, so one 128bit instruction
1351 * requires 2 chunks.
1352 */
1353 if (brw->gen >= 5)
1354 br = 2;
1355
1356 assert(endif_inst->header.opcode == BRW_OPCODE_ENDIF);
1357 endif_inst->header.execution_size = if_inst->header.execution_size;
1358
1359 if (else_inst == NULL) {
1360 /* Patch IF -> ENDIF */
1361 if (brw->gen < 6) {
1362 /* Turn it into an IFF, which means no mask stack operations for
1363 * all-false and jumping past the ENDIF.
1364 */
1365 if_inst->header.opcode = BRW_OPCODE_IFF;
1366 if_inst->bits3.if_else.jump_count = br * (endif_inst - if_inst + 1);
1367 if_inst->bits3.if_else.pop_count = 0;
1368 if_inst->bits3.if_else.pad0 = 0;
1369 } else if (brw->gen == 6) {
1370 /* As of gen6, there is no IFF and IF must point to the ENDIF. */
1371 if_inst->bits1.branch_gen6.jump_count = br * (endif_inst - if_inst);
1372 } else {
1373 if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
1374 if_inst->bits3.break_cont.jip = br * (endif_inst - if_inst);
1375 }
1376 } else {
1377 else_inst->header.execution_size = if_inst->header.execution_size;
1378
1379 /* Patch IF -> ELSE */
1380 if (brw->gen < 6) {
1381 if_inst->bits3.if_else.jump_count = br * (else_inst - if_inst);
1382 if_inst->bits3.if_else.pop_count = 0;
1383 if_inst->bits3.if_else.pad0 = 0;
1384 } else if (brw->gen == 6) {
1385 if_inst->bits1.branch_gen6.jump_count = br * (else_inst - if_inst + 1);
1386 }
1387
1388 /* Patch ELSE -> ENDIF */
1389 if (brw->gen < 6) {
1390 /* BRW_OPCODE_ELSE pre-gen6 should point just past the
1391 * matching ENDIF.
1392 */
1393 else_inst->bits3.if_else.jump_count = br*(endif_inst - else_inst + 1);
1394 else_inst->bits3.if_else.pop_count = 1;
1395 else_inst->bits3.if_else.pad0 = 0;
1396 } else if (brw->gen == 6) {
1397 /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
1398 else_inst->bits1.branch_gen6.jump_count = br*(endif_inst - else_inst);
1399 } else {
1400 /* The IF instruction's JIP should point just past the ELSE */
1401 if_inst->bits3.break_cont.jip = br * (else_inst - if_inst + 1);
1402 /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
1403 if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
1404 else_inst->bits3.break_cont.jip = br * (endif_inst - else_inst);
1405 }
1406 }
1407 }
1408
1409 void
1410 brw_ELSE(struct brw_compile *p)
1411 {
1412 struct brw_context *brw = p->brw;
1413 struct brw_instruction *insn;
1414
1415 insn = next_insn(p, BRW_OPCODE_ELSE);
1416
1417 if (brw->gen < 6) {
1418 brw_set_dest(p, insn, brw_ip_reg());
1419 brw_set_src0(p, insn, brw_ip_reg());
1420 brw_set_src1(p, insn, brw_imm_d(0x0));
1421 } else if (brw->gen == 6) {
1422 brw_set_dest(p, insn, brw_imm_w(0));
1423 insn->bits1.branch_gen6.jump_count = 0;
1424 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1425 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1426 } else {
1427 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1428 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1429 brw_set_src1(p, insn, brw_imm_ud(0));
1430 insn->bits3.break_cont.jip = 0;
1431 insn->bits3.break_cont.uip = 0;
1432 }
1433
1434 insn->header.compression_control = BRW_COMPRESSION_NONE;
1435 insn->header.mask_control = BRW_MASK_ENABLE;
1436 if (!p->single_program_flow)
1437 insn->header.thread_control = BRW_THREAD_SWITCH;
1438
1439 push_if_stack(p, insn);
1440 }
1441
1442 void
1443 brw_ENDIF(struct brw_compile *p)
1444 {
1445 struct brw_context *brw = p->brw;
1446 struct brw_instruction *insn = NULL;
1447 struct brw_instruction *else_inst = NULL;
1448 struct brw_instruction *if_inst = NULL;
1449 struct brw_instruction *tmp;
1450 bool emit_endif = true;
1451
1452 /* In single program flow mode, we can express IF and ELSE instructions
1453 * equivalently as ADD instructions that operate on IP. On platforms prior
1454 * to Gen6, flow control instructions cause an implied thread switch, so
1455 * this is a significant savings.
1456 *
1457 * However, on Gen6, writing to IP doesn't work in single program flow mode
1458 * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
1459 * not be updated by non-flow control instructions."). And on later
1460 * platforms, there is no significant benefit to converting control flow
1461 * instructions to conditional ADDs. So we only do this trick on Gen4 and
1462 * Gen5.
1463 */
1464 if (brw->gen < 6 && p->single_program_flow)
1465 emit_endif = false;
1466
1467 /*
1468 * A single next_insn() may change the base adress of instruction store
1469 * memory(p->store), so call it first before referencing the instruction
1470 * store pointer from an index
1471 */
1472 if (emit_endif)
1473 insn = next_insn(p, BRW_OPCODE_ENDIF);
1474
1475 /* Pop the IF and (optional) ELSE instructions from the stack */
1476 p->if_depth_in_loop[p->loop_stack_depth]--;
1477 tmp = pop_if_stack(p);
1478 if (tmp->header.opcode == BRW_OPCODE_ELSE) {
1479 else_inst = tmp;
1480 tmp = pop_if_stack(p);
1481 }
1482 if_inst = tmp;
1483
1484 if (!emit_endif) {
1485 /* ENDIF is useless; don't bother emitting it. */
1486 convert_IF_ELSE_to_ADD(p, if_inst, else_inst);
1487 return;
1488 }
1489
1490 if (brw->gen < 6) {
1491 brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
1492 brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
1493 brw_set_src1(p, insn, brw_imm_d(0x0));
1494 } else if (brw->gen == 6) {
1495 brw_set_dest(p, insn, brw_imm_w(0));
1496 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1497 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1498 } else {
1499 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1500 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1501 brw_set_src1(p, insn, brw_imm_ud(0));
1502 }
1503
1504 insn->header.compression_control = BRW_COMPRESSION_NONE;
1505 insn->header.mask_control = BRW_MASK_ENABLE;
1506 insn->header.thread_control = BRW_THREAD_SWITCH;
1507
1508 /* Also pop item off the stack in the endif instruction: */
1509 if (brw->gen < 6) {
1510 insn->bits3.if_else.jump_count = 0;
1511 insn->bits3.if_else.pop_count = 1;
1512 insn->bits3.if_else.pad0 = 0;
1513 } else if (brw->gen == 6) {
1514 insn->bits1.branch_gen6.jump_count = 2;
1515 } else {
1516 insn->bits3.break_cont.jip = 2;
1517 }
1518 patch_IF_ELSE(p, if_inst, else_inst, insn);
1519 }
1520
1521 struct brw_instruction *brw_BREAK(struct brw_compile *p)
1522 {
1523 struct brw_context *brw = p->brw;
1524 struct brw_instruction *insn;
1525
1526 insn = next_insn(p, BRW_OPCODE_BREAK);
1527 if (brw->gen >= 6) {
1528 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1529 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1530 brw_set_src1(p, insn, brw_imm_d(0x0));
1531 } else {
1532 brw_set_dest(p, insn, brw_ip_reg());
1533 brw_set_src0(p, insn, brw_ip_reg());
1534 brw_set_src1(p, insn, brw_imm_d(0x0));
1535 insn->bits3.if_else.pad0 = 0;
1536 insn->bits3.if_else.pop_count = p->if_depth_in_loop[p->loop_stack_depth];
1537 }
1538 insn->header.compression_control = BRW_COMPRESSION_NONE;
1539 insn->header.execution_size = BRW_EXECUTE_8;
1540
1541 return insn;
1542 }
1543
1544 struct brw_instruction *gen6_CONT(struct brw_compile *p)
1545 {
1546 struct brw_instruction *insn;
1547
1548 insn = next_insn(p, BRW_OPCODE_CONTINUE);
1549 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1550 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1551 brw_set_dest(p, insn, brw_ip_reg());
1552 brw_set_src0(p, insn, brw_ip_reg());
1553 brw_set_src1(p, insn, brw_imm_d(0x0));
1554
1555 insn->header.compression_control = BRW_COMPRESSION_NONE;
1556 insn->header.execution_size = BRW_EXECUTE_8;
1557 return insn;
1558 }
1559
1560 struct brw_instruction *brw_CONT(struct brw_compile *p)
1561 {
1562 struct brw_instruction *insn;
1563 insn = next_insn(p, BRW_OPCODE_CONTINUE);
1564 brw_set_dest(p, insn, brw_ip_reg());
1565 brw_set_src0(p, insn, brw_ip_reg());
1566 brw_set_src1(p, insn, brw_imm_d(0x0));
1567 insn->header.compression_control = BRW_COMPRESSION_NONE;
1568 insn->header.execution_size = BRW_EXECUTE_8;
1569 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1570 insn->bits3.if_else.pad0 = 0;
1571 insn->bits3.if_else.pop_count = p->if_depth_in_loop[p->loop_stack_depth];
1572 return insn;
1573 }
1574
1575 struct brw_instruction *gen6_HALT(struct brw_compile *p)
1576 {
1577 struct brw_instruction *insn;
1578
1579 insn = next_insn(p, BRW_OPCODE_HALT);
1580 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1581 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1582 brw_set_src1(p, insn, brw_imm_d(0x0)); /* UIP and JIP, updated later. */
1583
1584 if (p->compressed) {
1585 insn->header.execution_size = BRW_EXECUTE_16;
1586 } else {
1587 insn->header.compression_control = BRW_COMPRESSION_NONE;
1588 insn->header.execution_size = BRW_EXECUTE_8;
1589 }
1590 return insn;
1591 }
1592
1593 /* DO/WHILE loop:
1594 *
1595 * The DO/WHILE is just an unterminated loop -- break or continue are
1596 * used for control within the loop. We have a few ways they can be
1597 * done.
1598 *
1599 * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
1600 * jip and no DO instruction.
1601 *
1602 * For non-uniform control flow pre-gen6, there's a DO instruction to
1603 * push the mask, and a WHILE to jump back, and BREAK to get out and
1604 * pop the mask.
1605 *
1606 * For gen6, there's no more mask stack, so no need for DO. WHILE
1607 * just points back to the first instruction of the loop.
1608 */
1609 struct brw_instruction *brw_DO(struct brw_compile *p, unsigned execute_size)
1610 {
1611 struct brw_context *brw = p->brw;
1612
1613 if (brw->gen >= 6 || p->single_program_flow) {
1614 push_loop_stack(p, &p->store[p->nr_insn]);
1615 return &p->store[p->nr_insn];
1616 } else {
1617 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
1618
1619 push_loop_stack(p, insn);
1620
1621 /* Override the defaults for this instruction:
1622 */
1623 brw_set_dest(p, insn, brw_null_reg());
1624 brw_set_src0(p, insn, brw_null_reg());
1625 brw_set_src1(p, insn, brw_null_reg());
1626
1627 insn->header.compression_control = BRW_COMPRESSION_NONE;
1628 insn->header.execution_size = execute_size;
1629 insn->header.predicate_control = BRW_PREDICATE_NONE;
1630 /* insn->header.mask_control = BRW_MASK_ENABLE; */
1631 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1632
1633 return insn;
1634 }
1635 }
1636
1637 /**
1638 * For pre-gen6, we patch BREAK/CONT instructions to point at the WHILE
1639 * instruction here.
1640 *
1641 * For gen6+, see brw_set_uip_jip(), which doesn't care so much about the loop
1642 * nesting, since it can always just point to the end of the block/current loop.
1643 */
1644 static void
1645 brw_patch_break_cont(struct brw_compile *p, struct brw_instruction *while_inst)
1646 {
1647 struct brw_context *brw = p->brw;
1648 struct brw_instruction *do_inst = get_inner_do_insn(p);
1649 struct brw_instruction *inst;
1650 int br = (brw->gen == 5) ? 2 : 1;
1651
1652 for (inst = while_inst - 1; inst != do_inst; inst--) {
1653 /* If the jump count is != 0, that means that this instruction has already
1654 * been patched because it's part of a loop inside of the one we're
1655 * patching.
1656 */
1657 if (inst->header.opcode == BRW_OPCODE_BREAK &&
1658 inst->bits3.if_else.jump_count == 0) {
1659 inst->bits3.if_else.jump_count = br * ((while_inst - inst) + 1);
1660 } else if (inst->header.opcode == BRW_OPCODE_CONTINUE &&
1661 inst->bits3.if_else.jump_count == 0) {
1662 inst->bits3.if_else.jump_count = br * (while_inst - inst);
1663 }
1664 }
1665 }
1666
1667 struct brw_instruction *brw_WHILE(struct brw_compile *p)
1668 {
1669 struct brw_context *brw = p->brw;
1670 struct brw_instruction *insn, *do_insn;
1671 unsigned br = 1;
1672
1673 if (brw->gen >= 5)
1674 br = 2;
1675
1676 if (brw->gen >= 7) {
1677 insn = next_insn(p, BRW_OPCODE_WHILE);
1678 do_insn = get_inner_do_insn(p);
1679
1680 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1681 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1682 brw_set_src1(p, insn, brw_imm_ud(0));
1683 insn->bits3.break_cont.jip = br * (do_insn - insn);
1684
1685 insn->header.execution_size = BRW_EXECUTE_8;
1686 } else if (brw->gen == 6) {
1687 insn = next_insn(p, BRW_OPCODE_WHILE);
1688 do_insn = get_inner_do_insn(p);
1689
1690 brw_set_dest(p, insn, brw_imm_w(0));
1691 insn->bits1.branch_gen6.jump_count = br * (do_insn - insn);
1692 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1693 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1694
1695 insn->header.execution_size = BRW_EXECUTE_8;
1696 } else {
1697 if (p->single_program_flow) {
1698 insn = next_insn(p, BRW_OPCODE_ADD);
1699 do_insn = get_inner_do_insn(p);
1700
1701 brw_set_dest(p, insn, brw_ip_reg());
1702 brw_set_src0(p, insn, brw_ip_reg());
1703 brw_set_src1(p, insn, brw_imm_d((do_insn - insn) * 16));
1704 insn->header.execution_size = BRW_EXECUTE_1;
1705 } else {
1706 insn = next_insn(p, BRW_OPCODE_WHILE);
1707 do_insn = get_inner_do_insn(p);
1708
1709 assert(do_insn->header.opcode == BRW_OPCODE_DO);
1710
1711 brw_set_dest(p, insn, brw_ip_reg());
1712 brw_set_src0(p, insn, brw_ip_reg());
1713 brw_set_src1(p, insn, brw_imm_d(0));
1714
1715 insn->header.execution_size = do_insn->header.execution_size;
1716 insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
1717 insn->bits3.if_else.pop_count = 0;
1718 insn->bits3.if_else.pad0 = 0;
1719
1720 brw_patch_break_cont(p, insn);
1721 }
1722 }
1723 insn->header.compression_control = BRW_COMPRESSION_NONE;
1724 p->current->header.predicate_control = BRW_PREDICATE_NONE;
1725
1726 p->loop_stack_depth--;
1727
1728 return insn;
1729 }
1730
1731
1732 /* FORWARD JUMPS:
1733 */
1734 void brw_land_fwd_jump(struct brw_compile *p, int jmp_insn_idx)
1735 {
1736 struct brw_context *brw = p->brw;
1737 struct brw_instruction *jmp_insn = &p->store[jmp_insn_idx];
1738 unsigned jmpi = 1;
1739
1740 if (brw->gen >= 5)
1741 jmpi = 2;
1742
1743 assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
1744 assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE);
1745
1746 jmp_insn->bits3.ud = jmpi * (p->nr_insn - jmp_insn_idx - 1);
1747 }
1748
1749
1750
1751 /* To integrate with the above, it makes sense that the comparison
1752 * instruction should populate the flag register. It might be simpler
1753 * just to use the flag reg for most WM tasks?
1754 */
1755 void brw_CMP(struct brw_compile *p,
1756 struct brw_reg dest,
1757 unsigned conditional,
1758 struct brw_reg src0,
1759 struct brw_reg src1)
1760 {
1761 struct brw_context *brw = p->brw;
1762 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
1763
1764 insn->header.destreg__conditionalmod = conditional;
1765 brw_set_dest(p, insn, dest);
1766 brw_set_src0(p, insn, src0);
1767 brw_set_src1(p, insn, src1);
1768
1769 /* guess_execution_size(insn, src0); */
1770
1771
1772 /* Make it so that future instructions will use the computed flag
1773 * value until brw_set_predicate_control_flag_value() is called
1774 * again.
1775 */
1776 if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
1777 dest.nr == 0) {
1778 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
1779 p->flag_value = 0xff;
1780 }
1781
1782 /* Item WaCMPInstNullDstForcesThreadSwitch in the Haswell Bspec workarounds
1783 * page says:
1784 * "Any CMP instruction with a null destination must use a {switch}."
1785 *
1786 * It also applies to other Gen7 platforms (IVB, BYT) even though it isn't
1787 * mentioned on their work-arounds pages.
1788 */
1789 if (brw->gen == 7) {
1790 if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
1791 dest.nr == BRW_ARF_NULL) {
1792 insn->header.thread_control = BRW_THREAD_SWITCH;
1793 }
1794 }
1795 }
1796
1797 /* Issue 'wait' instruction for n1, host could program MMIO
1798 to wake up thread. */
1799 void brw_WAIT (struct brw_compile *p)
1800 {
1801 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_WAIT);
1802 struct brw_reg src = brw_notification_1_reg();
1803
1804 brw_set_dest(p, insn, src);
1805 brw_set_src0(p, insn, src);
1806 brw_set_src1(p, insn, brw_null_reg());
1807 insn->header.execution_size = 0; /* must */
1808 insn->header.predicate_control = 0;
1809 insn->header.compression_control = 0;
1810 }
1811
1812
1813 /***********************************************************************
1814 * Helpers for the various SEND message types:
1815 */
1816
1817 /** Extended math function, float[8].
1818 */
1819 void brw_math( struct brw_compile *p,
1820 struct brw_reg dest,
1821 unsigned function,
1822 unsigned msg_reg_nr,
1823 struct brw_reg src,
1824 unsigned data_type,
1825 unsigned precision )
1826 {
1827 struct brw_context *brw = p->brw;
1828
1829 if (brw->gen >= 6) {
1830 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
1831
1832 assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
1833 (brw->gen >= 7 && dest.file == BRW_MESSAGE_REGISTER_FILE));
1834 assert(src.file == BRW_GENERAL_REGISTER_FILE);
1835
1836 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1837 if (brw->gen == 6)
1838 assert(src.hstride == BRW_HORIZONTAL_STRIDE_1);
1839
1840 /* Source modifiers are ignored for extended math instructions on Gen6. */
1841 if (brw->gen == 6) {
1842 assert(!src.negate);
1843 assert(!src.abs);
1844 }
1845
1846 if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
1847 function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER ||
1848 function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1849 assert(src.type != BRW_REGISTER_TYPE_F);
1850 } else {
1851 assert(src.type == BRW_REGISTER_TYPE_F);
1852 }
1853
1854 /* Math is the same ISA format as other opcodes, except that CondModifier
1855 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1856 */
1857 insn->header.destreg__conditionalmod = function;
1858
1859 brw_set_dest(p, insn, dest);
1860 brw_set_src0(p, insn, src);
1861 brw_set_src1(p, insn, brw_null_reg());
1862 } else {
1863 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1864
1865 /* Example code doesn't set predicate_control for send
1866 * instructions.
1867 */
1868 insn->header.predicate_control = 0;
1869 insn->header.destreg__conditionalmod = msg_reg_nr;
1870
1871 brw_set_dest(p, insn, dest);
1872 brw_set_src0(p, insn, src);
1873 brw_set_math_message(p,
1874 insn,
1875 function,
1876 src.type == BRW_REGISTER_TYPE_D,
1877 precision,
1878 data_type);
1879 }
1880 }
1881
1882 /** Extended math function, float[8].
1883 */
1884 void brw_math2(struct brw_compile *p,
1885 struct brw_reg dest,
1886 unsigned function,
1887 struct brw_reg src0,
1888 struct brw_reg src1)
1889 {
1890 struct brw_context *brw = p->brw;
1891 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
1892
1893 assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
1894 (brw->gen >= 7 && dest.file == BRW_MESSAGE_REGISTER_FILE));
1895 assert(src0.file == BRW_GENERAL_REGISTER_FILE);
1896 assert(src1.file == BRW_GENERAL_REGISTER_FILE);
1897
1898 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1899 if (brw->gen == 6) {
1900 assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1);
1901 assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1);
1902 }
1903
1904 if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
1905 function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER ||
1906 function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1907 assert(src0.type != BRW_REGISTER_TYPE_F);
1908 assert(src1.type != BRW_REGISTER_TYPE_F);
1909 } else {
1910 assert(src0.type == BRW_REGISTER_TYPE_F);
1911 assert(src1.type == BRW_REGISTER_TYPE_F);
1912 }
1913
1914 /* Source modifiers are ignored for extended math instructions on Gen6. */
1915 if (brw->gen == 6) {
1916 assert(!src0.negate);
1917 assert(!src0.abs);
1918 assert(!src1.negate);
1919 assert(!src1.abs);
1920 }
1921
1922 /* Math is the same ISA format as other opcodes, except that CondModifier
1923 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1924 */
1925 insn->header.destreg__conditionalmod = function;
1926
1927 brw_set_dest(p, insn, dest);
1928 brw_set_src0(p, insn, src0);
1929 brw_set_src1(p, insn, src1);
1930 }
1931
1932
1933 /**
1934 * Write a block of OWORDs (half a GRF each) from the scratch buffer,
1935 * using a constant offset per channel.
1936 *
1937 * The offset must be aligned to oword size (16 bytes). Used for
1938 * register spilling.
1939 */
1940 void brw_oword_block_write_scratch(struct brw_compile *p,
1941 struct brw_reg mrf,
1942 int num_regs,
1943 unsigned offset)
1944 {
1945 struct brw_context *brw = p->brw;
1946 uint32_t msg_control, msg_type;
1947 int mlen;
1948
1949 if (brw->gen >= 6)
1950 offset /= 16;
1951
1952 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1953
1954 if (num_regs == 1) {
1955 msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
1956 mlen = 2;
1957 } else {
1958 msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
1959 mlen = 3;
1960 }
1961
1962 /* Set up the message header. This is g0, with g0.2 filled with
1963 * the offset. We don't want to leave our offset around in g0 or
1964 * it'll screw up texture samples, so set it up inside the message
1965 * reg.
1966 */
1967 {
1968 brw_push_insn_state(p);
1969 brw_set_mask_control(p, BRW_MASK_DISABLE);
1970 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1971
1972 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
1973
1974 /* set message header global offset field (reg 0, element 2) */
1975 brw_MOV(p,
1976 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
1977 mrf.nr,
1978 2), BRW_REGISTER_TYPE_UD),
1979 brw_imm_ud(offset));
1980
1981 brw_pop_insn_state(p);
1982 }
1983
1984 {
1985 struct brw_reg dest;
1986 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1987 int send_commit_msg;
1988 struct brw_reg src_header = retype(brw_vec8_grf(0, 0),
1989 BRW_REGISTER_TYPE_UW);
1990
1991 if (insn->header.compression_control != BRW_COMPRESSION_NONE) {
1992 insn->header.compression_control = BRW_COMPRESSION_NONE;
1993 src_header = vec16(src_header);
1994 }
1995 assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
1996 insn->header.destreg__conditionalmod = mrf.nr;
1997
1998 /* Until gen6, writes followed by reads from the same location
1999 * are not guaranteed to be ordered unless write_commit is set.
2000 * If set, then a no-op write is issued to the destination
2001 * register to set a dependency, and a read from the destination
2002 * can be used to ensure the ordering.
2003 *
2004 * For gen6, only writes between different threads need ordering
2005 * protection. Our use of DP writes is all about register
2006 * spilling within a thread.
2007 */
2008 if (brw->gen >= 6) {
2009 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
2010 send_commit_msg = 0;
2011 } else {
2012 dest = src_header;
2013 send_commit_msg = 1;
2014 }
2015
2016 brw_set_dest(p, insn, dest);
2017 if (brw->gen >= 6) {
2018 brw_set_src0(p, insn, mrf);
2019 } else {
2020 brw_set_src0(p, insn, brw_null_reg());
2021 }
2022
2023 if (brw->gen >= 6)
2024 msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
2025 else
2026 msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
2027
2028 brw_set_dp_write_message(p,
2029 insn,
2030 255, /* binding table index (255=stateless) */
2031 msg_control,
2032 msg_type,
2033 mlen,
2034 true, /* header_present */
2035 0, /* not a render target */
2036 send_commit_msg, /* response_length */
2037 0, /* eot */
2038 send_commit_msg);
2039 }
2040 }
2041
2042
2043 /**
2044 * Read a block of owords (half a GRF each) from the scratch buffer
2045 * using a constant index per channel.
2046 *
2047 * Offset must be aligned to oword size (16 bytes). Used for register
2048 * spilling.
2049 */
2050 void
2051 brw_oword_block_read_scratch(struct brw_compile *p,
2052 struct brw_reg dest,
2053 struct brw_reg mrf,
2054 int num_regs,
2055 unsigned offset)
2056 {
2057 struct brw_context *brw = p->brw;
2058 uint32_t msg_control;
2059 int rlen;
2060
2061 if (brw->gen >= 6)
2062 offset /= 16;
2063
2064 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
2065 dest = retype(dest, BRW_REGISTER_TYPE_UW);
2066
2067 if (num_regs == 1) {
2068 msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
2069 rlen = 1;
2070 } else {
2071 msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
2072 rlen = 2;
2073 }
2074
2075 {
2076 brw_push_insn_state(p);
2077 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
2078 brw_set_mask_control(p, BRW_MASK_DISABLE);
2079
2080 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
2081
2082 /* set message header global offset field (reg 0, element 2) */
2083 brw_MOV(p,
2084 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
2085 mrf.nr,
2086 2), BRW_REGISTER_TYPE_UD),
2087 brw_imm_ud(offset));
2088
2089 brw_pop_insn_state(p);
2090 }
2091
2092 {
2093 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
2094
2095 assert(insn->header.predicate_control == 0);
2096 insn->header.compression_control = BRW_COMPRESSION_NONE;
2097 insn->header.destreg__conditionalmod = mrf.nr;
2098
2099 brw_set_dest(p, insn, dest); /* UW? */
2100 if (brw->gen >= 6) {
2101 brw_set_src0(p, insn, mrf);
2102 } else {
2103 brw_set_src0(p, insn, brw_null_reg());
2104 }
2105
2106 brw_set_dp_read_message(p,
2107 insn,
2108 255, /* binding table index (255=stateless) */
2109 msg_control,
2110 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
2111 BRW_DATAPORT_READ_TARGET_RENDER_CACHE,
2112 1, /* msg_length */
2113 true, /* header_present */
2114 rlen);
2115 }
2116 }
2117
2118 void
2119 gen7_block_read_scratch(struct brw_compile *p,
2120 struct brw_reg dest,
2121 int num_regs,
2122 unsigned offset)
2123 {
2124 dest = retype(dest, BRW_REGISTER_TYPE_UW);
2125
2126 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
2127
2128 assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
2129 insn->header.compression_control = BRW_COMPRESSION_NONE;
2130
2131 brw_set_dest(p, insn, dest);
2132
2133 /* The HW requires that the header is present; this is to get the g0.5
2134 * scratch offset.
2135 */
2136 bool header_present = true;
2137 brw_set_src0(p, insn, brw_vec8_grf(0, 0));
2138
2139 brw_set_message_descriptor(p, insn,
2140 GEN7_SFID_DATAPORT_DATA_CACHE,
2141 1, /* mlen: just g0 */
2142 num_regs,
2143 header_present,
2144 false);
2145
2146 insn->bits3.ud |= GEN7_DATAPORT_SCRATCH_READ;
2147
2148 assert(num_regs == 1 || num_regs == 2 || num_regs == 4);
2149 insn->bits3.ud |= (num_regs - 1) << GEN7_DATAPORT_SCRATCH_NUM_REGS_SHIFT;
2150
2151 /* According to the docs, offset is "A 12-bit HWord offset into the memory
2152 * Immediate Memory buffer as specified by binding table 0xFF." An HWORD
2153 * is 32 bytes, which happens to be the size of a register.
2154 */
2155 offset /= REG_SIZE;
2156 assert(offset < (1 << 12));
2157 insn->bits3.ud |= offset;
2158 }
2159
2160 /**
2161 * Read a float[4] vector from the data port Data Cache (const buffer).
2162 * Location (in buffer) should be a multiple of 16.
2163 * Used for fetching shader constants.
2164 */
2165 void brw_oword_block_read(struct brw_compile *p,
2166 struct brw_reg dest,
2167 struct brw_reg mrf,
2168 uint32_t offset,
2169 uint32_t bind_table_index)
2170 {
2171 struct brw_context *brw = p->brw;
2172
2173 /* On newer hardware, offset is in units of owords. */
2174 if (brw->gen >= 6)
2175 offset /= 16;
2176
2177 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
2178
2179 brw_push_insn_state(p);
2180 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
2181 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
2182 brw_set_mask_control(p, BRW_MASK_DISABLE);
2183
2184 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
2185
2186 /* set message header global offset field (reg 0, element 2) */
2187 brw_MOV(p,
2188 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
2189 mrf.nr,
2190 2), BRW_REGISTER_TYPE_UD),
2191 brw_imm_ud(offset));
2192
2193 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
2194 insn->header.destreg__conditionalmod = mrf.nr;
2195
2196 /* cast dest to a uword[8] vector */
2197 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
2198
2199 brw_set_dest(p, insn, dest);
2200 if (brw->gen >= 6) {
2201 brw_set_src0(p, insn, mrf);
2202 } else {
2203 brw_set_src0(p, insn, brw_null_reg());
2204 }
2205
2206 brw_set_dp_read_message(p,
2207 insn,
2208 bind_table_index,
2209 BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW,
2210 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ,
2211 BRW_DATAPORT_READ_TARGET_DATA_CACHE,
2212 1, /* msg_length */
2213 true, /* header_present */
2214 1); /* response_length (1 reg, 2 owords!) */
2215
2216 brw_pop_insn_state(p);
2217 }
2218
2219
2220 void brw_fb_WRITE(struct brw_compile *p,
2221 int dispatch_width,
2222 unsigned msg_reg_nr,
2223 struct brw_reg src0,
2224 unsigned msg_control,
2225 unsigned binding_table_index,
2226 unsigned msg_length,
2227 unsigned response_length,
2228 bool eot,
2229 bool header_present)
2230 {
2231 struct brw_context *brw = p->brw;
2232 struct brw_instruction *insn;
2233 unsigned msg_type;
2234 struct brw_reg dest;
2235
2236 if (dispatch_width == 16)
2237 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
2238 else
2239 dest = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW);
2240
2241 if (brw->gen >= 6) {
2242 insn = next_insn(p, BRW_OPCODE_SENDC);
2243 } else {
2244 insn = next_insn(p, BRW_OPCODE_SEND);
2245 }
2246 insn->header.compression_control = BRW_COMPRESSION_NONE;
2247
2248 if (brw->gen >= 6) {
2249 /* headerless version, just submit color payload */
2250 src0 = brw_message_reg(msg_reg_nr);
2251
2252 msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
2253 } else {
2254 insn->header.destreg__conditionalmod = msg_reg_nr;
2255
2256 msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
2257 }
2258
2259 brw_set_dest(p, insn, dest);
2260 brw_set_src0(p, insn, src0);
2261 brw_set_dp_write_message(p,
2262 insn,
2263 binding_table_index,
2264 msg_control,
2265 msg_type,
2266 msg_length,
2267 header_present,
2268 eot, /* last render target write */
2269 response_length,
2270 eot,
2271 0 /* send_commit_msg */);
2272 }
2273
2274
2275 /**
2276 * Texture sample instruction.
2277 * Note: the msg_type plus msg_length values determine exactly what kind
2278 * of sampling operation is performed. See volume 4, page 161 of docs.
2279 */
2280 void brw_SAMPLE(struct brw_compile *p,
2281 struct brw_reg dest,
2282 unsigned msg_reg_nr,
2283 struct brw_reg src0,
2284 unsigned binding_table_index,
2285 unsigned sampler,
2286 unsigned msg_type,
2287 unsigned response_length,
2288 unsigned msg_length,
2289 unsigned header_present,
2290 unsigned simd_mode,
2291 unsigned return_format)
2292 {
2293 struct brw_context *brw = p->brw;
2294 struct brw_instruction *insn;
2295
2296 if (msg_reg_nr != -1)
2297 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2298
2299 insn = next_insn(p, BRW_OPCODE_SEND);
2300 insn->header.predicate_control = 0; /* XXX */
2301
2302 /* From the 965 PRM (volume 4, part 1, section 14.2.41):
2303 *
2304 * "Instruction compression is not allowed for this instruction (that
2305 * is, send). The hardware behavior is undefined if this instruction is
2306 * set as compressed. However, compress control can be set to "SecHalf"
2307 * to affect the EMask generation."
2308 *
2309 * No similar wording is found in later PRMs, but there are examples
2310 * utilizing send with SecHalf. More importantly, SIMD8 sampler messages
2311 * are allowed in SIMD16 mode and they could not work without SecHalf. For
2312 * these reasons, we allow BRW_COMPRESSION_2NDHALF here.
2313 */
2314 if (insn->header.compression_control != BRW_COMPRESSION_2NDHALF)
2315 insn->header.compression_control = BRW_COMPRESSION_NONE;
2316
2317 if (brw->gen < 6)
2318 insn->header.destreg__conditionalmod = msg_reg_nr;
2319
2320 brw_set_dest(p, insn, dest);
2321 brw_set_src0(p, insn, src0);
2322 brw_set_sampler_message(p, insn,
2323 binding_table_index,
2324 sampler,
2325 msg_type,
2326 response_length,
2327 msg_length,
2328 header_present,
2329 simd_mode,
2330 return_format);
2331 }
2332
2333 /* All these variables are pretty confusing - we might be better off
2334 * using bitmasks and macros for this, in the old style. Or perhaps
2335 * just having the caller instantiate the fields in dword3 itself.
2336 */
2337 void brw_urb_WRITE(struct brw_compile *p,
2338 struct brw_reg dest,
2339 unsigned msg_reg_nr,
2340 struct brw_reg src0,
2341 enum brw_urb_write_flags flags,
2342 unsigned msg_length,
2343 unsigned response_length,
2344 unsigned offset,
2345 unsigned swizzle)
2346 {
2347 struct brw_context *brw = p->brw;
2348 struct brw_instruction *insn;
2349
2350 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2351
2352 if (brw->gen == 7 && !(flags & BRW_URB_WRITE_USE_CHANNEL_MASKS)) {
2353 /* Enable Channel Masks in the URB_WRITE_HWORD message header */
2354 brw_push_insn_state(p);
2355 brw_set_access_mode(p, BRW_ALIGN_1);
2356 brw_set_mask_control(p, BRW_MASK_DISABLE);
2357 brw_OR(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 5),
2358 BRW_REGISTER_TYPE_UD),
2359 retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD),
2360 brw_imm_ud(0xff00));
2361 brw_pop_insn_state(p);
2362 }
2363
2364 insn = next_insn(p, BRW_OPCODE_SEND);
2365
2366 assert(msg_length < BRW_MAX_MRF);
2367
2368 brw_set_dest(p, insn, dest);
2369 brw_set_src0(p, insn, src0);
2370 brw_set_src1(p, insn, brw_imm_d(0));
2371
2372 if (brw->gen < 6)
2373 insn->header.destreg__conditionalmod = msg_reg_nr;
2374
2375 brw_set_urb_message(p,
2376 insn,
2377 flags,
2378 msg_length,
2379 response_length,
2380 offset,
2381 swizzle);
2382 }
2383
2384 static int
2385 next_ip(struct brw_compile *p, int ip)
2386 {
2387 struct brw_instruction *insn = (void *)p->store + ip;
2388
2389 if (insn->header.cmpt_control)
2390 return ip + 8;
2391 else
2392 return ip + 16;
2393 }
2394
2395 static int
2396 brw_find_next_block_end(struct brw_compile *p, int start)
2397 {
2398 int ip;
2399 void *store = p->store;
2400
2401 for (ip = next_ip(p, start); ip < p->next_insn_offset; ip = next_ip(p, ip)) {
2402 struct brw_instruction *insn = store + ip;
2403
2404 switch (insn->header.opcode) {
2405 case BRW_OPCODE_ENDIF:
2406 case BRW_OPCODE_ELSE:
2407 case BRW_OPCODE_WHILE:
2408 case BRW_OPCODE_HALT:
2409 return ip;
2410 }
2411 }
2412
2413 return 0;
2414 }
2415
2416 /* There is no DO instruction on gen6, so to find the end of the loop
2417 * we have to see if the loop is jumping back before our start
2418 * instruction.
2419 */
2420 static int
2421 brw_find_loop_end(struct brw_compile *p, int start)
2422 {
2423 struct brw_context *brw = p->brw;
2424 int ip;
2425 int scale = 8;
2426 void *store = p->store;
2427
2428 /* Always start after the instruction (such as a WHILE) we're trying to fix
2429 * up.
2430 */
2431 for (ip = next_ip(p, start); ip < p->next_insn_offset; ip = next_ip(p, ip)) {
2432 struct brw_instruction *insn = store + ip;
2433
2434 if (insn->header.opcode == BRW_OPCODE_WHILE) {
2435 int jip = brw->gen == 6 ? insn->bits1.branch_gen6.jump_count
2436 : insn->bits3.break_cont.jip;
2437 if (ip + jip * scale <= start)
2438 return ip;
2439 }
2440 }
2441 assert(!"not reached");
2442 return start;
2443 }
2444
2445 /* After program generation, go back and update the UIP and JIP of
2446 * BREAK, CONT, and HALT instructions to their correct locations.
2447 */
2448 void
2449 brw_set_uip_jip(struct brw_compile *p)
2450 {
2451 struct brw_context *brw = p->brw;
2452 int ip;
2453 int scale = 8;
2454 void *store = p->store;
2455
2456 if (brw->gen < 6)
2457 return;
2458
2459 for (ip = 0; ip < p->next_insn_offset; ip = next_ip(p, ip)) {
2460 struct brw_instruction *insn = store + ip;
2461
2462 if (insn->header.cmpt_control) {
2463 /* Fixups for compacted BREAK/CONTINUE not supported yet. */
2464 assert(insn->header.opcode != BRW_OPCODE_BREAK &&
2465 insn->header.opcode != BRW_OPCODE_CONTINUE &&
2466 insn->header.opcode != BRW_OPCODE_HALT);
2467 continue;
2468 }
2469
2470 int block_end_ip = brw_find_next_block_end(p, ip);
2471 switch (insn->header.opcode) {
2472 case BRW_OPCODE_BREAK:
2473 assert(block_end_ip != 0);
2474 insn->bits3.break_cont.jip = (block_end_ip - ip) / scale;
2475 /* Gen7 UIP points to WHILE; Gen6 points just after it */
2476 insn->bits3.break_cont.uip =
2477 (brw_find_loop_end(p, ip) - ip +
2478 (brw->gen == 6 ? 16 : 0)) / scale;
2479 break;
2480 case BRW_OPCODE_CONTINUE:
2481 assert(block_end_ip != 0);
2482 insn->bits3.break_cont.jip = (block_end_ip - ip) / scale;
2483 insn->bits3.break_cont.uip =
2484 (brw_find_loop_end(p, ip) - ip) / scale;
2485
2486 assert(insn->bits3.break_cont.uip != 0);
2487 assert(insn->bits3.break_cont.jip != 0);
2488 break;
2489
2490 case BRW_OPCODE_ENDIF:
2491 if (block_end_ip == 0)
2492 insn->bits3.break_cont.jip = 2;
2493 else
2494 insn->bits3.break_cont.jip = (block_end_ip - ip) / scale;
2495 break;
2496
2497 case BRW_OPCODE_HALT:
2498 /* From the Sandy Bridge PRM (volume 4, part 2, section 8.3.19):
2499 *
2500 * "In case of the halt instruction not inside any conditional
2501 * code block, the value of <JIP> and <UIP> should be the
2502 * same. In case of the halt instruction inside conditional code
2503 * block, the <UIP> should be the end of the program, and the
2504 * <JIP> should be end of the most inner conditional code block."
2505 *
2506 * The uip will have already been set by whoever set up the
2507 * instruction.
2508 */
2509 if (block_end_ip == 0) {
2510 insn->bits3.break_cont.jip = insn->bits3.break_cont.uip;
2511 } else {
2512 insn->bits3.break_cont.jip = (block_end_ip - ip) / scale;
2513 }
2514 assert(insn->bits3.break_cont.uip != 0);
2515 assert(insn->bits3.break_cont.jip != 0);
2516 break;
2517 }
2518 }
2519 }
2520
2521 void brw_ff_sync(struct brw_compile *p,
2522 struct brw_reg dest,
2523 unsigned msg_reg_nr,
2524 struct brw_reg src0,
2525 bool allocate,
2526 unsigned response_length,
2527 bool eot)
2528 {
2529 struct brw_context *brw = p->brw;
2530 struct brw_instruction *insn;
2531
2532 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2533
2534 insn = next_insn(p, BRW_OPCODE_SEND);
2535 brw_set_dest(p, insn, dest);
2536 brw_set_src0(p, insn, src0);
2537 brw_set_src1(p, insn, brw_imm_d(0));
2538
2539 if (brw->gen < 6)
2540 insn->header.destreg__conditionalmod = msg_reg_nr;
2541
2542 brw_set_ff_sync_message(p,
2543 insn,
2544 allocate,
2545 response_length,
2546 eot);
2547 }
2548
2549 /**
2550 * Emit the SEND instruction necessary to generate stream output data on Gen6
2551 * (for transform feedback).
2552 *
2553 * If send_commit_msg is true, this is the last piece of stream output data
2554 * from this thread, so send the data as a committed write. According to the
2555 * Sandy Bridge PRM (volume 2 part 1, section 4.5.1):
2556 *
2557 * "Prior to End of Thread with a URB_WRITE, the kernel must ensure all
2558 * writes are complete by sending the final write as a committed write."
2559 */
2560 void
2561 brw_svb_write(struct brw_compile *p,
2562 struct brw_reg dest,
2563 unsigned msg_reg_nr,
2564 struct brw_reg src0,
2565 unsigned binding_table_index,
2566 bool send_commit_msg)
2567 {
2568 struct brw_instruction *insn;
2569
2570 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2571
2572 insn = next_insn(p, BRW_OPCODE_SEND);
2573 brw_set_dest(p, insn, dest);
2574 brw_set_src0(p, insn, src0);
2575 brw_set_src1(p, insn, brw_imm_d(0));
2576 brw_set_dp_write_message(p, insn,
2577 binding_table_index,
2578 0, /* msg_control: ignored */
2579 GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE,
2580 1, /* msg_length */
2581 true, /* header_present */
2582 0, /* last_render_target: ignored */
2583 send_commit_msg, /* response_length */
2584 0, /* end_of_thread */
2585 send_commit_msg); /* send_commit_msg */
2586 }
2587
2588 static void
2589 brw_set_dp_untyped_atomic_message(struct brw_compile *p,
2590 struct brw_instruction *insn,
2591 unsigned atomic_op,
2592 unsigned bind_table_index,
2593 unsigned msg_length,
2594 unsigned response_length,
2595 bool header_present)
2596 {
2597 if (p->brw->is_haswell) {
2598 brw_set_message_descriptor(p, insn, HSW_SFID_DATAPORT_DATA_CACHE_1,
2599 msg_length, response_length,
2600 header_present, false);
2601
2602
2603 if (insn->header.access_mode == BRW_ALIGN_1) {
2604 if (insn->header.execution_size != BRW_EXECUTE_16)
2605 insn->bits3.ud |= 1 << 12; /* SIMD8 mode */
2606
2607 insn->bits3.gen7_dp.msg_type =
2608 HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP;
2609 } else {
2610 insn->bits3.gen7_dp.msg_type =
2611 HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2;
2612 }
2613
2614 } else {
2615 brw_set_message_descriptor(p, insn, GEN7_SFID_DATAPORT_DATA_CACHE,
2616 msg_length, response_length,
2617 header_present, false);
2618
2619 insn->bits3.gen7_dp.msg_type = GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP;
2620
2621 if (insn->header.execution_size != BRW_EXECUTE_16)
2622 insn->bits3.ud |= 1 << 12; /* SIMD8 mode */
2623 }
2624
2625 if (response_length)
2626 insn->bits3.ud |= 1 << 13; /* Return data expected */
2627
2628 insn->bits3.gen7_dp.binding_table_index = bind_table_index;
2629 insn->bits3.ud |= atomic_op << 8;
2630 }
2631
2632 void
2633 brw_untyped_atomic(struct brw_compile *p,
2634 struct brw_reg dest,
2635 struct brw_reg mrf,
2636 unsigned atomic_op,
2637 unsigned bind_table_index,
2638 unsigned msg_length,
2639 unsigned response_length) {
2640 struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND);
2641
2642 brw_set_dest(p, insn, retype(dest, BRW_REGISTER_TYPE_UD));
2643 brw_set_src0(p, insn, retype(mrf, BRW_REGISTER_TYPE_UD));
2644 brw_set_src1(p, insn, brw_imm_d(0));
2645 brw_set_dp_untyped_atomic_message(
2646 p, insn, atomic_op, bind_table_index, msg_length, response_length,
2647 insn->header.access_mode == BRW_ALIGN_1);
2648 }
2649
2650 static void
2651 brw_set_dp_untyped_surface_read_message(struct brw_compile *p,
2652 struct brw_instruction *insn,
2653 unsigned bind_table_index,
2654 unsigned msg_length,
2655 unsigned response_length,
2656 bool header_present)
2657 {
2658 const unsigned dispatch_width =
2659 (insn->header.execution_size == BRW_EXECUTE_16 ? 16 : 8);
2660 const unsigned num_channels = response_length / (dispatch_width / 8);
2661
2662 if (p->brw->is_haswell) {
2663 brw_set_message_descriptor(p, insn, HSW_SFID_DATAPORT_DATA_CACHE_1,
2664 msg_length, response_length,
2665 header_present, false);
2666
2667 insn->bits3.gen7_dp.msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ;
2668 } else {
2669 brw_set_message_descriptor(p, insn, GEN7_SFID_DATAPORT_DATA_CACHE,
2670 msg_length, response_length,
2671 header_present, false);
2672
2673 insn->bits3.gen7_dp.msg_type = GEN7_DATAPORT_DC_UNTYPED_SURFACE_READ;
2674 }
2675
2676 if (insn->header.access_mode == BRW_ALIGN_1) {
2677 if (dispatch_width == 16)
2678 insn->bits3.ud |= 1 << 12; /* SIMD16 mode */
2679 else
2680 insn->bits3.ud |= 2 << 12; /* SIMD8 mode */
2681 }
2682
2683 insn->bits3.gen7_dp.binding_table_index = bind_table_index;
2684
2685 /* Set mask of 32-bit channels to drop. */
2686 insn->bits3.ud |= (0xf & (0xf << num_channels)) << 8;
2687 }
2688
2689 void
2690 brw_untyped_surface_read(struct brw_compile *p,
2691 struct brw_reg dest,
2692 struct brw_reg mrf,
2693 unsigned bind_table_index,
2694 unsigned msg_length,
2695 unsigned response_length)
2696 {
2697 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
2698
2699 brw_set_dest(p, insn, retype(dest, BRW_REGISTER_TYPE_UD));
2700 brw_set_src0(p, insn, retype(mrf, BRW_REGISTER_TYPE_UD));
2701 brw_set_dp_untyped_surface_read_message(
2702 p, insn, bind_table_index, msg_length, response_length,
2703 insn->header.access_mode == BRW_ALIGN_1);
2704 }
2705
2706 /**
2707 * This instruction is generated as a single-channel align1 instruction by
2708 * both the VS and FS stages when using INTEL_DEBUG=shader_time.
2709 *
2710 * We can't use the typed atomic op in the FS because that has the execution
2711 * mask ANDed with the pixel mask, but we just want to write the one dword for
2712 * all the pixels.
2713 *
2714 * We don't use the SIMD4x2 atomic ops in the VS because want to just write
2715 * one u32. So we use the same untyped atomic write message as the pixel
2716 * shader.
2717 *
2718 * The untyped atomic operation requires a BUFFER surface type with RAW
2719 * format, and is only accessible through the legacy DATA_CACHE dataport
2720 * messages.
2721 */
2722 void brw_shader_time_add(struct brw_compile *p,
2723 struct brw_reg payload,
2724 uint32_t surf_index)
2725 {
2726 struct brw_context *brw = p->brw;
2727 assert(brw->gen >= 7);
2728
2729 brw_push_insn_state(p);
2730 brw_set_access_mode(p, BRW_ALIGN_1);
2731 brw_set_mask_control(p, BRW_MASK_DISABLE);
2732 struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND);
2733 brw_pop_insn_state(p);
2734
2735 /* We use brw_vec1_reg and unmasked because we want to increment the given
2736 * offset only once.
2737 */
2738 brw_set_dest(p, send, brw_vec1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
2739 BRW_ARF_NULL, 0));
2740 brw_set_src0(p, send, brw_vec1_reg(payload.file,
2741 payload.nr, 0));
2742 brw_set_dp_untyped_atomic_message(p, send, BRW_AOP_ADD, surf_index,
2743 2 /* message length */,
2744 0 /* response length */,
2745 false /* header present */);
2746 }