1ebd7a91ba9291a0c61e9f42fefa991c59f293f2
[mesa.git] / src / mesa / drivers / dri / i965 / brw_eu_emit.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keithw@vmware.com>
30 */
31
32
33 #include "brw_context.h"
34 #include "brw_defines.h"
35 #include "brw_eu.h"
36
37 #include "glsl/ralloc.h"
38
39 /***********************************************************************
40 * Internal helper for constructing instructions
41 */
42
43 static void guess_execution_size(struct brw_compile *p,
44 struct brw_instruction *insn,
45 struct brw_reg reg)
46 {
47 if (reg.width == BRW_WIDTH_8 && p->compressed)
48 insn->header.execution_size = BRW_EXECUTE_16;
49 else
50 insn->header.execution_size = reg.width; /* note - definitions are compatible */
51 }
52
53
54 /**
55 * Prior to Sandybridge, the SEND instruction accepted non-MRF source
56 * registers, implicitly moving the operand to a message register.
57 *
58 * On Sandybridge, this is no longer the case. This function performs the
59 * explicit move; it should be called before emitting a SEND instruction.
60 */
61 void
62 gen6_resolve_implied_move(struct brw_compile *p,
63 struct brw_reg *src,
64 unsigned msg_reg_nr)
65 {
66 struct brw_context *brw = p->brw;
67 if (brw->gen < 6)
68 return;
69
70 if (src->file == BRW_MESSAGE_REGISTER_FILE)
71 return;
72
73 if (src->file != BRW_ARCHITECTURE_REGISTER_FILE || src->nr != BRW_ARF_NULL) {
74 brw_push_insn_state(p);
75 brw_set_mask_control(p, BRW_MASK_DISABLE);
76 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
77 brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD),
78 retype(*src, BRW_REGISTER_TYPE_UD));
79 brw_pop_insn_state(p);
80 }
81 *src = brw_message_reg(msg_reg_nr);
82 }
83
84 static void
85 gen7_convert_mrf_to_grf(struct brw_compile *p, struct brw_reg *reg)
86 {
87 /* From the Ivybridge PRM, Volume 4 Part 3, page 218 ("send"):
88 * "The send with EOT should use register space R112-R127 for <src>. This is
89 * to enable loading of a new thread into the same slot while the message
90 * with EOT for current thread is pending dispatch."
91 *
92 * Since we're pretending to have 16 MRFs anyway, we may as well use the
93 * registers required for messages with EOT.
94 */
95 struct brw_context *brw = p->brw;
96 if (brw->gen == 7 && reg->file == BRW_MESSAGE_REGISTER_FILE) {
97 reg->file = BRW_GENERAL_REGISTER_FILE;
98 reg->nr += GEN7_MRF_HACK_START;
99 }
100 }
101
102 /**
103 * Convert a brw_reg_type enumeration value into the hardware representation.
104 *
105 * The hardware encoding may depend on whether the value is an immediate.
106 */
107 unsigned
108 brw_reg_type_to_hw_type(const struct brw_context *brw,
109 enum brw_reg_type type, unsigned file)
110 {
111 if (file == BRW_IMMEDIATE_VALUE) {
112 const static int imm_hw_types[] = {
113 [BRW_REGISTER_TYPE_UD] = BRW_HW_REG_TYPE_UD,
114 [BRW_REGISTER_TYPE_D] = BRW_HW_REG_TYPE_D,
115 [BRW_REGISTER_TYPE_UW] = BRW_HW_REG_TYPE_UW,
116 [BRW_REGISTER_TYPE_W] = BRW_HW_REG_TYPE_W,
117 [BRW_REGISTER_TYPE_F] = BRW_HW_REG_TYPE_F,
118 [BRW_REGISTER_TYPE_UB] = -1,
119 [BRW_REGISTER_TYPE_B] = -1,
120 [BRW_REGISTER_TYPE_UV] = BRW_HW_REG_IMM_TYPE_UV,
121 [BRW_REGISTER_TYPE_VF] = BRW_HW_REG_IMM_TYPE_VF,
122 [BRW_REGISTER_TYPE_V] = BRW_HW_REG_IMM_TYPE_V,
123 [BRW_REGISTER_TYPE_DF] = GEN8_HW_REG_IMM_TYPE_DF,
124 [BRW_REGISTER_TYPE_HF] = GEN8_HW_REG_IMM_TYPE_HF,
125 [BRW_REGISTER_TYPE_UQ] = GEN8_HW_REG_TYPE_UQ,
126 [BRW_REGISTER_TYPE_Q] = GEN8_HW_REG_TYPE_Q,
127 };
128 assert(type < ARRAY_SIZE(imm_hw_types));
129 assert(imm_hw_types[type] != -1);
130 assert(brw->gen >= 8 || type < BRW_REGISTER_TYPE_DF);
131 return imm_hw_types[type];
132 } else {
133 /* Non-immediate registers */
134 const static int hw_types[] = {
135 [BRW_REGISTER_TYPE_UD] = BRW_HW_REG_TYPE_UD,
136 [BRW_REGISTER_TYPE_D] = BRW_HW_REG_TYPE_D,
137 [BRW_REGISTER_TYPE_UW] = BRW_HW_REG_TYPE_UW,
138 [BRW_REGISTER_TYPE_W] = BRW_HW_REG_TYPE_W,
139 [BRW_REGISTER_TYPE_UB] = BRW_HW_REG_NON_IMM_TYPE_UB,
140 [BRW_REGISTER_TYPE_B] = BRW_HW_REG_NON_IMM_TYPE_B,
141 [BRW_REGISTER_TYPE_F] = BRW_HW_REG_TYPE_F,
142 [BRW_REGISTER_TYPE_UV] = -1,
143 [BRW_REGISTER_TYPE_VF] = -1,
144 [BRW_REGISTER_TYPE_V] = -1,
145 [BRW_REGISTER_TYPE_DF] = GEN7_HW_REG_NON_IMM_TYPE_DF,
146 [BRW_REGISTER_TYPE_HF] = GEN8_HW_REG_NON_IMM_TYPE_HF,
147 [BRW_REGISTER_TYPE_UQ] = GEN8_HW_REG_TYPE_UQ,
148 [BRW_REGISTER_TYPE_Q] = GEN8_HW_REG_TYPE_Q,
149 };
150 assert(type < ARRAY_SIZE(hw_types));
151 assert(hw_types[type] != -1);
152 assert(brw->gen >= 7 || type < BRW_REGISTER_TYPE_DF);
153 assert(brw->gen >= 8 || type < BRW_REGISTER_TYPE_HF);
154 return hw_types[type];
155 }
156 }
157
158 void
159 brw_set_dest(struct brw_compile *p, struct brw_instruction *insn,
160 struct brw_reg dest)
161 {
162 if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
163 dest.file != BRW_MESSAGE_REGISTER_FILE)
164 assert(dest.nr < 128);
165
166 gen7_convert_mrf_to_grf(p, &dest);
167
168 insn->bits1.da1.dest_reg_file = dest.file;
169 insn->bits1.da1.dest_reg_type =
170 brw_reg_type_to_hw_type(p->brw, dest.type, dest.file);
171 insn->bits1.da1.dest_address_mode = dest.address_mode;
172
173 if (dest.address_mode == BRW_ADDRESS_DIRECT) {
174 insn->bits1.da1.dest_reg_nr = dest.nr;
175
176 if (insn->header.access_mode == BRW_ALIGN_1) {
177 insn->bits1.da1.dest_subreg_nr = dest.subnr;
178 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
179 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
180 insn->bits1.da1.dest_horiz_stride = dest.hstride;
181 }
182 else {
183 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
184 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
185 if (dest.file == BRW_GENERAL_REGISTER_FILE ||
186 dest.file == BRW_MESSAGE_REGISTER_FILE) {
187 assert(dest.dw1.bits.writemask != 0);
188 }
189 /* From the Ivybridge PRM, Vol 4, Part 3, Section 5.2.4.1:
190 * Although Dst.HorzStride is a don't care for Align16, HW needs
191 * this to be programmed as "01".
192 */
193 insn->bits1.da16.dest_horiz_stride = 1;
194 }
195 }
196 else {
197 insn->bits1.ia1.dest_subreg_nr = dest.subnr;
198
199 /* These are different sizes in align1 vs align16:
200 */
201 if (insn->header.access_mode == BRW_ALIGN_1) {
202 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
203 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
204 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
205 insn->bits1.ia1.dest_horiz_stride = dest.hstride;
206 }
207 else {
208 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
209 /* even ignored in da16, still need to set as '01' */
210 insn->bits1.ia16.dest_horiz_stride = 1;
211 }
212 }
213
214 /* NEW: Set the execution size based on dest.width and
215 * insn->compression_control:
216 */
217 guess_execution_size(p, insn, dest);
218 }
219
220 extern int reg_type_size[];
221
222 static void
223 validate_reg(struct brw_instruction *insn, struct brw_reg reg)
224 {
225 int hstride_for_reg[] = {0, 1, 2, 4};
226 int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
227 int width_for_reg[] = {1, 2, 4, 8, 16};
228 int execsize_for_reg[] = {1, 2, 4, 8, 16};
229 int width, hstride, vstride, execsize;
230
231 if (reg.file == BRW_IMMEDIATE_VALUE) {
232 /* 3.3.6: Region Parameters. Restriction: Immediate vectors
233 * mean the destination has to be 128-bit aligned and the
234 * destination horiz stride has to be a word.
235 */
236 if (reg.type == BRW_REGISTER_TYPE_V) {
237 assert(hstride_for_reg[insn->bits1.da1.dest_horiz_stride] *
238 reg_type_size[insn->bits1.da1.dest_reg_type] == 2);
239 }
240
241 return;
242 }
243
244 if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE &&
245 reg.file == BRW_ARF_NULL)
246 return;
247
248 assert(reg.hstride >= 0 && reg.hstride < Elements(hstride_for_reg));
249 hstride = hstride_for_reg[reg.hstride];
250
251 if (reg.vstride == 0xf) {
252 vstride = -1;
253 } else {
254 assert(reg.vstride >= 0 && reg.vstride < Elements(vstride_for_reg));
255 vstride = vstride_for_reg[reg.vstride];
256 }
257
258 assert(reg.width >= 0 && reg.width < Elements(width_for_reg));
259 width = width_for_reg[reg.width];
260
261 assert(insn->header.execution_size >= 0 &&
262 insn->header.execution_size < Elements(execsize_for_reg));
263 execsize = execsize_for_reg[insn->header.execution_size];
264
265 /* Restrictions from 3.3.10: Register Region Restrictions. */
266 /* 3. */
267 assert(execsize >= width);
268
269 /* 4. */
270 if (execsize == width && hstride != 0) {
271 assert(vstride == -1 || vstride == width * hstride);
272 }
273
274 /* 5. */
275 if (execsize == width && hstride == 0) {
276 /* no restriction on vstride. */
277 }
278
279 /* 6. */
280 if (width == 1) {
281 assert(hstride == 0);
282 }
283
284 /* 7. */
285 if (execsize == 1 && width == 1) {
286 assert(hstride == 0);
287 assert(vstride == 0);
288 }
289
290 /* 8. */
291 if (vstride == 0 && hstride == 0) {
292 assert(width == 1);
293 }
294
295 /* 10. Check destination issues. */
296 }
297
298 void
299 brw_set_src0(struct brw_compile *p, struct brw_instruction *insn,
300 struct brw_reg reg)
301 {
302 struct brw_context *brw = p->brw;
303
304 if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
305 assert(reg.nr < 128);
306
307 gen7_convert_mrf_to_grf(p, &reg);
308
309 if (brw->gen >= 6 && (insn->header.opcode == BRW_OPCODE_SEND ||
310 insn->header.opcode == BRW_OPCODE_SENDC)) {
311 /* Any source modifiers or regions will be ignored, since this just
312 * identifies the MRF/GRF to start reading the message contents from.
313 * Check for some likely failures.
314 */
315 assert(!reg.negate);
316 assert(!reg.abs);
317 assert(reg.address_mode == BRW_ADDRESS_DIRECT);
318 }
319
320 validate_reg(insn, reg);
321
322 insn->bits1.da1.src0_reg_file = reg.file;
323 insn->bits1.da1.src0_reg_type =
324 brw_reg_type_to_hw_type(brw, reg.type, reg.file);
325 insn->bits2.da1.src0_abs = reg.abs;
326 insn->bits2.da1.src0_negate = reg.negate;
327 insn->bits2.da1.src0_address_mode = reg.address_mode;
328
329 if (reg.file == BRW_IMMEDIATE_VALUE) {
330 insn->bits3.ud = reg.dw1.ud;
331
332 /* Required to set some fields in src1 as well:
333 */
334 insn->bits1.da1.src1_reg_file = 0; /* arf */
335 insn->bits1.da1.src1_reg_type = insn->bits1.da1.src0_reg_type;
336 }
337 else
338 {
339 if (reg.address_mode == BRW_ADDRESS_DIRECT) {
340 if (insn->header.access_mode == BRW_ALIGN_1) {
341 insn->bits2.da1.src0_subreg_nr = reg.subnr;
342 insn->bits2.da1.src0_reg_nr = reg.nr;
343 }
344 else {
345 insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
346 insn->bits2.da16.src0_reg_nr = reg.nr;
347 }
348 }
349 else {
350 insn->bits2.ia1.src0_subreg_nr = reg.subnr;
351
352 if (insn->header.access_mode == BRW_ALIGN_1) {
353 insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
354 }
355 else {
356 insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
357 }
358 }
359
360 if (insn->header.access_mode == BRW_ALIGN_1) {
361 if (reg.width == BRW_WIDTH_1 &&
362 insn->header.execution_size == BRW_EXECUTE_1) {
363 insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
364 insn->bits2.da1.src0_width = BRW_WIDTH_1;
365 insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
366 }
367 else {
368 insn->bits2.da1.src0_horiz_stride = reg.hstride;
369 insn->bits2.da1.src0_width = reg.width;
370 insn->bits2.da1.src0_vert_stride = reg.vstride;
371 }
372 }
373 else {
374 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
375 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
376 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
377 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
378
379 /* This is an oddity of the fact we're using the same
380 * descriptions for registers in align_16 as align_1:
381 */
382 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
383 insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
384 else
385 insn->bits2.da16.src0_vert_stride = reg.vstride;
386 }
387 }
388 }
389
390
391 void
392 brw_set_src1(struct brw_compile *p,
393 struct brw_instruction *insn,
394 struct brw_reg reg)
395 {
396 assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
397
398 if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
399 assert(reg.nr < 128);
400
401 gen7_convert_mrf_to_grf(p, &reg);
402
403 validate_reg(insn, reg);
404
405 insn->bits1.da1.src1_reg_file = reg.file;
406 insn->bits1.da1.src1_reg_type =
407 brw_reg_type_to_hw_type(p->brw, reg.type, reg.file);
408 insn->bits3.da1.src1_abs = reg.abs;
409 insn->bits3.da1.src1_negate = reg.negate;
410
411 /* Only src1 can be immediate in two-argument instructions.
412 */
413 assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
414
415 if (reg.file == BRW_IMMEDIATE_VALUE) {
416 insn->bits3.ud = reg.dw1.ud;
417 }
418 else {
419 /* This is a hardware restriction, which may or may not be lifted
420 * in the future:
421 */
422 assert (reg.address_mode == BRW_ADDRESS_DIRECT);
423 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
424
425 if (insn->header.access_mode == BRW_ALIGN_1) {
426 insn->bits3.da1.src1_subreg_nr = reg.subnr;
427 insn->bits3.da1.src1_reg_nr = reg.nr;
428 }
429 else {
430 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
431 insn->bits3.da16.src1_reg_nr = reg.nr;
432 }
433
434 if (insn->header.access_mode == BRW_ALIGN_1) {
435 if (reg.width == BRW_WIDTH_1 &&
436 insn->header.execution_size == BRW_EXECUTE_1) {
437 insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
438 insn->bits3.da1.src1_width = BRW_WIDTH_1;
439 insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
440 }
441 else {
442 insn->bits3.da1.src1_horiz_stride = reg.hstride;
443 insn->bits3.da1.src1_width = reg.width;
444 insn->bits3.da1.src1_vert_stride = reg.vstride;
445 }
446 }
447 else {
448 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
449 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
450 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
451 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
452
453 /* This is an oddity of the fact we're using the same
454 * descriptions for registers in align_16 as align_1:
455 */
456 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
457 insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
458 else
459 insn->bits3.da16.src1_vert_stride = reg.vstride;
460 }
461 }
462 }
463
464 /**
465 * Set the Message Descriptor and Extended Message Descriptor fields
466 * for SEND messages.
467 *
468 * \note This zeroes out the Function Control bits, so it must be called
469 * \b before filling out any message-specific data. Callers can
470 * choose not to fill in irrelevant bits; they will be zero.
471 */
472 static void
473 brw_set_message_descriptor(struct brw_compile *p,
474 struct brw_instruction *inst,
475 enum brw_message_target sfid,
476 unsigned msg_length,
477 unsigned response_length,
478 bool header_present,
479 bool end_of_thread)
480 {
481 struct brw_context *brw = p->brw;
482
483 brw_set_src1(p, inst, brw_imm_d(0));
484
485 if (brw->gen >= 5) {
486 inst->bits3.generic_gen5.header_present = header_present;
487 inst->bits3.generic_gen5.response_length = response_length;
488 inst->bits3.generic_gen5.msg_length = msg_length;
489 inst->bits3.generic_gen5.end_of_thread = end_of_thread;
490
491 if (brw->gen >= 6) {
492 /* On Gen6+ Message target/SFID goes in bits 27:24 of the header */
493 inst->header.destreg__conditionalmod = sfid;
494 } else {
495 /* Set Extended Message Descriptor (ex_desc) */
496 inst->bits2.send_gen5.sfid = sfid;
497 inst->bits2.send_gen5.end_of_thread = end_of_thread;
498 }
499 } else {
500 inst->bits3.generic.response_length = response_length;
501 inst->bits3.generic.msg_length = msg_length;
502 inst->bits3.generic.msg_target = sfid;
503 inst->bits3.generic.end_of_thread = end_of_thread;
504 }
505 }
506
507 static void brw_set_math_message( struct brw_compile *p,
508 struct brw_instruction *insn,
509 unsigned function,
510 unsigned integer_type,
511 bool low_precision,
512 unsigned dataType )
513 {
514 struct brw_context *brw = p->brw;
515 unsigned msg_length;
516 unsigned response_length;
517
518 /* Infer message length from the function */
519 switch (function) {
520 case BRW_MATH_FUNCTION_POW:
521 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT:
522 case BRW_MATH_FUNCTION_INT_DIV_REMAINDER:
523 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
524 msg_length = 2;
525 break;
526 default:
527 msg_length = 1;
528 break;
529 }
530
531 /* Infer response length from the function */
532 switch (function) {
533 case BRW_MATH_FUNCTION_SINCOS:
534 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
535 response_length = 2;
536 break;
537 default:
538 response_length = 1;
539 break;
540 }
541
542
543 brw_set_message_descriptor(p, insn, BRW_SFID_MATH,
544 msg_length, response_length, false, false);
545 if (brw->gen == 5) {
546 insn->bits3.math_gen5.function = function;
547 insn->bits3.math_gen5.int_type = integer_type;
548 insn->bits3.math_gen5.precision = low_precision;
549 insn->bits3.math_gen5.saturate = insn->header.saturate;
550 insn->bits3.math_gen5.data_type = dataType;
551 insn->bits3.math_gen5.snapshot = 0;
552 } else {
553 insn->bits3.math.function = function;
554 insn->bits3.math.int_type = integer_type;
555 insn->bits3.math.precision = low_precision;
556 insn->bits3.math.saturate = insn->header.saturate;
557 insn->bits3.math.data_type = dataType;
558 }
559 insn->header.saturate = 0;
560 }
561
562
563 static void brw_set_ff_sync_message(struct brw_compile *p,
564 struct brw_instruction *insn,
565 bool allocate,
566 unsigned response_length,
567 bool end_of_thread)
568 {
569 brw_set_message_descriptor(p, insn, BRW_SFID_URB,
570 1, response_length, true, end_of_thread);
571 insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */
572 insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */
573 insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */
574 insn->bits3.urb_gen5.allocate = allocate;
575 insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */
576 insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */
577 }
578
579 static void brw_set_urb_message( struct brw_compile *p,
580 struct brw_instruction *insn,
581 enum brw_urb_write_flags flags,
582 unsigned msg_length,
583 unsigned response_length,
584 unsigned offset,
585 unsigned swizzle_control )
586 {
587 struct brw_context *brw = p->brw;
588
589 brw_set_message_descriptor(p, insn, BRW_SFID_URB,
590 msg_length, response_length, true,
591 flags & BRW_URB_WRITE_EOT);
592 if (brw->gen == 7) {
593 if (flags & BRW_URB_WRITE_OWORD) {
594 assert(msg_length == 2); /* header + one OWORD of data */
595 insn->bits3.urb_gen7.opcode = BRW_URB_OPCODE_WRITE_OWORD;
596 } else {
597 insn->bits3.urb_gen7.opcode = BRW_URB_OPCODE_WRITE_HWORD;
598 }
599 insn->bits3.urb_gen7.offset = offset;
600 assert(swizzle_control != BRW_URB_SWIZZLE_TRANSPOSE);
601 insn->bits3.urb_gen7.swizzle_control = swizzle_control;
602 insn->bits3.urb_gen7.per_slot_offset =
603 flags & BRW_URB_WRITE_PER_SLOT_OFFSET ? 1 : 0;
604 insn->bits3.urb_gen7.complete = flags & BRW_URB_WRITE_COMPLETE ? 1 : 0;
605 } else if (brw->gen >= 5) {
606 insn->bits3.urb_gen5.opcode = 0; /* URB_WRITE */
607 insn->bits3.urb_gen5.offset = offset;
608 insn->bits3.urb_gen5.swizzle_control = swizzle_control;
609 insn->bits3.urb_gen5.allocate = flags & BRW_URB_WRITE_ALLOCATE ? 1 : 0;
610 insn->bits3.urb_gen5.used = flags & BRW_URB_WRITE_UNUSED ? 0 : 1;
611 insn->bits3.urb_gen5.complete = flags & BRW_URB_WRITE_COMPLETE ? 1 : 0;
612 } else {
613 insn->bits3.urb.opcode = 0; /* ? */
614 insn->bits3.urb.offset = offset;
615 insn->bits3.urb.swizzle_control = swizzle_control;
616 insn->bits3.urb.allocate = flags & BRW_URB_WRITE_ALLOCATE ? 1 : 0;
617 insn->bits3.urb.used = flags & BRW_URB_WRITE_UNUSED ? 0 : 1;
618 insn->bits3.urb.complete = flags & BRW_URB_WRITE_COMPLETE ? 1 : 0;
619 }
620 }
621
622 void
623 brw_set_dp_write_message(struct brw_compile *p,
624 struct brw_instruction *insn,
625 unsigned binding_table_index,
626 unsigned msg_control,
627 unsigned msg_type,
628 unsigned msg_length,
629 bool header_present,
630 unsigned last_render_target,
631 unsigned response_length,
632 unsigned end_of_thread,
633 unsigned send_commit_msg)
634 {
635 struct brw_context *brw = p->brw;
636 unsigned sfid;
637
638 if (brw->gen >= 7) {
639 /* Use the Render Cache for RT writes; otherwise use the Data Cache */
640 if (msg_type == GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE)
641 sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
642 else
643 sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
644 } else if (brw->gen == 6) {
645 /* Use the render cache for all write messages. */
646 sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
647 } else {
648 sfid = BRW_SFID_DATAPORT_WRITE;
649 }
650
651 brw_set_message_descriptor(p, insn, sfid, msg_length, response_length,
652 header_present, end_of_thread);
653
654 if (brw->gen >= 7) {
655 insn->bits3.gen7_dp.binding_table_index = binding_table_index;
656 insn->bits3.gen7_dp.msg_control = msg_control;
657 insn->bits3.gen7_dp.last_render_target = last_render_target;
658 insn->bits3.gen7_dp.msg_type = msg_type;
659 } else if (brw->gen == 6) {
660 insn->bits3.gen6_dp.binding_table_index = binding_table_index;
661 insn->bits3.gen6_dp.msg_control = msg_control;
662 insn->bits3.gen6_dp.last_render_target = last_render_target;
663 insn->bits3.gen6_dp.msg_type = msg_type;
664 insn->bits3.gen6_dp.send_commit_msg = send_commit_msg;
665 } else if (brw->gen == 5) {
666 insn->bits3.dp_write_gen5.binding_table_index = binding_table_index;
667 insn->bits3.dp_write_gen5.msg_control = msg_control;
668 insn->bits3.dp_write_gen5.last_render_target = last_render_target;
669 insn->bits3.dp_write_gen5.msg_type = msg_type;
670 insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg;
671 } else {
672 insn->bits3.dp_write.binding_table_index = binding_table_index;
673 insn->bits3.dp_write.msg_control = msg_control;
674 insn->bits3.dp_write.last_render_target = last_render_target;
675 insn->bits3.dp_write.msg_type = msg_type;
676 insn->bits3.dp_write.send_commit_msg = send_commit_msg;
677 }
678 }
679
680 void
681 brw_set_dp_read_message(struct brw_compile *p,
682 struct brw_instruction *insn,
683 unsigned binding_table_index,
684 unsigned msg_control,
685 unsigned msg_type,
686 unsigned target_cache,
687 unsigned msg_length,
688 bool header_present,
689 unsigned response_length)
690 {
691 struct brw_context *brw = p->brw;
692 unsigned sfid;
693
694 if (brw->gen >= 7) {
695 sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
696 } else if (brw->gen == 6) {
697 if (target_cache == BRW_DATAPORT_READ_TARGET_RENDER_CACHE)
698 sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
699 else
700 sfid = GEN6_SFID_DATAPORT_SAMPLER_CACHE;
701 } else {
702 sfid = BRW_SFID_DATAPORT_READ;
703 }
704
705 brw_set_message_descriptor(p, insn, sfid, msg_length, response_length,
706 header_present, false);
707
708 if (brw->gen >= 7) {
709 insn->bits3.gen7_dp.binding_table_index = binding_table_index;
710 insn->bits3.gen7_dp.msg_control = msg_control;
711 insn->bits3.gen7_dp.last_render_target = 0;
712 insn->bits3.gen7_dp.msg_type = msg_type;
713 } else if (brw->gen == 6) {
714 insn->bits3.gen6_dp.binding_table_index = binding_table_index;
715 insn->bits3.gen6_dp.msg_control = msg_control;
716 insn->bits3.gen6_dp.last_render_target = 0;
717 insn->bits3.gen6_dp.msg_type = msg_type;
718 insn->bits3.gen6_dp.send_commit_msg = 0;
719 } else if (brw->gen == 5) {
720 insn->bits3.dp_read_gen5.binding_table_index = binding_table_index;
721 insn->bits3.dp_read_gen5.msg_control = msg_control;
722 insn->bits3.dp_read_gen5.msg_type = msg_type;
723 insn->bits3.dp_read_gen5.target_cache = target_cache;
724 } else if (brw->is_g4x) {
725 insn->bits3.dp_read_g4x.binding_table_index = binding_table_index; /*0:7*/
726 insn->bits3.dp_read_g4x.msg_control = msg_control; /*8:10*/
727 insn->bits3.dp_read_g4x.msg_type = msg_type; /*11:13*/
728 insn->bits3.dp_read_g4x.target_cache = target_cache; /*14:15*/
729 } else {
730 insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
731 insn->bits3.dp_read.msg_control = msg_control; /*8:11*/
732 insn->bits3.dp_read.msg_type = msg_type; /*12:13*/
733 insn->bits3.dp_read.target_cache = target_cache; /*14:15*/
734 }
735 }
736
737 void
738 brw_set_sampler_message(struct brw_compile *p,
739 struct brw_instruction *insn,
740 unsigned binding_table_index,
741 unsigned sampler,
742 unsigned msg_type,
743 unsigned response_length,
744 unsigned msg_length,
745 unsigned header_present,
746 unsigned simd_mode,
747 unsigned return_format)
748 {
749 struct brw_context *brw = p->brw;
750
751 brw_set_message_descriptor(p, insn, BRW_SFID_SAMPLER, msg_length,
752 response_length, header_present, false);
753
754 if (brw->gen >= 7) {
755 insn->bits3.sampler_gen7.binding_table_index = binding_table_index;
756 insn->bits3.sampler_gen7.sampler = sampler;
757 insn->bits3.sampler_gen7.msg_type = msg_type;
758 insn->bits3.sampler_gen7.simd_mode = simd_mode;
759 } else if (brw->gen >= 5) {
760 insn->bits3.sampler_gen5.binding_table_index = binding_table_index;
761 insn->bits3.sampler_gen5.sampler = sampler;
762 insn->bits3.sampler_gen5.msg_type = msg_type;
763 insn->bits3.sampler_gen5.simd_mode = simd_mode;
764 } else if (brw->is_g4x) {
765 insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
766 insn->bits3.sampler_g4x.sampler = sampler;
767 insn->bits3.sampler_g4x.msg_type = msg_type;
768 } else {
769 insn->bits3.sampler.binding_table_index = binding_table_index;
770 insn->bits3.sampler.sampler = sampler;
771 insn->bits3.sampler.msg_type = msg_type;
772 insn->bits3.sampler.return_format = return_format;
773 }
774 }
775
776
777 #define next_insn brw_next_insn
778 struct brw_instruction *
779 brw_next_insn(struct brw_compile *p, unsigned opcode)
780 {
781 struct brw_instruction *insn;
782
783 if (p->nr_insn + 1 > p->store_size) {
784 if (0) {
785 fprintf(stderr, "incresing the store size to %d\n",
786 p->store_size << 1);
787 }
788 p->store_size <<= 1;
789 p->store = reralloc(p->mem_ctx, p->store,
790 struct brw_instruction, p->store_size);
791 if (!p->store)
792 assert(!"realloc eu store memeory failed");
793 }
794
795 p->next_insn_offset += 16;
796 insn = &p->store[p->nr_insn++];
797 memcpy(insn, p->current, sizeof(*insn));
798
799 /* Reset this one-shot flag:
800 */
801
802 if (p->current->header.destreg__conditionalmod) {
803 p->current->header.destreg__conditionalmod = 0;
804 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
805 }
806
807 insn->header.opcode = opcode;
808 return insn;
809 }
810
811 static struct brw_instruction *brw_alu1( struct brw_compile *p,
812 unsigned opcode,
813 struct brw_reg dest,
814 struct brw_reg src )
815 {
816 struct brw_instruction *insn = next_insn(p, opcode);
817 brw_set_dest(p, insn, dest);
818 brw_set_src0(p, insn, src);
819 return insn;
820 }
821
822 static struct brw_instruction *brw_alu2(struct brw_compile *p,
823 unsigned opcode,
824 struct brw_reg dest,
825 struct brw_reg src0,
826 struct brw_reg src1 )
827 {
828 struct brw_instruction *insn = next_insn(p, opcode);
829 brw_set_dest(p, insn, dest);
830 brw_set_src0(p, insn, src0);
831 brw_set_src1(p, insn, src1);
832 return insn;
833 }
834
835 static int
836 get_3src_subreg_nr(struct brw_reg reg)
837 {
838 if (reg.vstride == BRW_VERTICAL_STRIDE_0) {
839 assert(brw_is_single_value_swizzle(reg.dw1.bits.swizzle));
840 return reg.subnr / 4 + BRW_GET_SWZ(reg.dw1.bits.swizzle, 0);
841 } else {
842 return reg.subnr / 4;
843 }
844 }
845
846 static struct brw_instruction *brw_alu3(struct brw_compile *p,
847 unsigned opcode,
848 struct brw_reg dest,
849 struct brw_reg src0,
850 struct brw_reg src1,
851 struct brw_reg src2)
852 {
853 struct brw_context *brw = p->brw;
854 struct brw_instruction *insn = next_insn(p, opcode);
855
856 gen7_convert_mrf_to_grf(p, &dest);
857
858 assert(insn->header.access_mode == BRW_ALIGN_16);
859
860 assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
861 dest.file == BRW_MESSAGE_REGISTER_FILE);
862 assert(dest.nr < 128);
863 assert(dest.address_mode == BRW_ADDRESS_DIRECT);
864 assert(dest.type == BRW_REGISTER_TYPE_F ||
865 dest.type == BRW_REGISTER_TYPE_D ||
866 dest.type == BRW_REGISTER_TYPE_UD);
867 insn->bits1.da3src.dest_reg_file = (dest.file == BRW_MESSAGE_REGISTER_FILE);
868 insn->bits1.da3src.dest_reg_nr = dest.nr;
869 insn->bits1.da3src.dest_subreg_nr = dest.subnr / 16;
870 insn->bits1.da3src.dest_writemask = dest.dw1.bits.writemask;
871 guess_execution_size(p, insn, dest);
872
873 assert(src0.file == BRW_GENERAL_REGISTER_FILE);
874 assert(src0.address_mode == BRW_ADDRESS_DIRECT);
875 assert(src0.nr < 128);
876 insn->bits2.da3src.src0_swizzle = src0.dw1.bits.swizzle;
877 insn->bits2.da3src.src0_subreg_nr = get_3src_subreg_nr(src0);
878 insn->bits2.da3src.src0_reg_nr = src0.nr;
879 insn->bits1.da3src.src0_abs = src0.abs;
880 insn->bits1.da3src.src0_negate = src0.negate;
881 insn->bits2.da3src.src0_rep_ctrl = src0.vstride == BRW_VERTICAL_STRIDE_0;
882
883 assert(src1.file == BRW_GENERAL_REGISTER_FILE);
884 assert(src1.address_mode == BRW_ADDRESS_DIRECT);
885 assert(src1.nr < 128);
886 insn->bits2.da3src.src1_swizzle = src1.dw1.bits.swizzle;
887 insn->bits2.da3src.src1_subreg_nr_low = get_3src_subreg_nr(src1) & 0x3;
888 insn->bits3.da3src.src1_subreg_nr_high = get_3src_subreg_nr(src1) >> 2;
889 insn->bits2.da3src.src1_rep_ctrl = src1.vstride == BRW_VERTICAL_STRIDE_0;
890 insn->bits3.da3src.src1_reg_nr = src1.nr;
891 insn->bits1.da3src.src1_abs = src1.abs;
892 insn->bits1.da3src.src1_negate = src1.negate;
893
894 assert(src2.file == BRW_GENERAL_REGISTER_FILE);
895 assert(src2.address_mode == BRW_ADDRESS_DIRECT);
896 assert(src2.nr < 128);
897 insn->bits3.da3src.src2_swizzle = src2.dw1.bits.swizzle;
898 insn->bits3.da3src.src2_subreg_nr = get_3src_subreg_nr(src2);
899 insn->bits3.da3src.src2_rep_ctrl = src2.vstride == BRW_VERTICAL_STRIDE_0;
900 insn->bits3.da3src.src2_reg_nr = src2.nr;
901 insn->bits1.da3src.src2_abs = src2.abs;
902 insn->bits1.da3src.src2_negate = src2.negate;
903
904 if (brw->gen >= 7) {
905 /* Set both the source and destination types based on dest.type,
906 * ignoring the source register types. The MAD and LRP emitters ensure
907 * that all four types are float. The BFE and BFI2 emitters, however,
908 * may send us mixed D and UD types and want us to ignore that and use
909 * the destination type.
910 */
911 switch (dest.type) {
912 case BRW_REGISTER_TYPE_F:
913 insn->bits1.da3src.src_type = BRW_3SRC_TYPE_F;
914 insn->bits1.da3src.dst_type = BRW_3SRC_TYPE_F;
915 break;
916 case BRW_REGISTER_TYPE_D:
917 insn->bits1.da3src.src_type = BRW_3SRC_TYPE_D;
918 insn->bits1.da3src.dst_type = BRW_3SRC_TYPE_D;
919 break;
920 case BRW_REGISTER_TYPE_UD:
921 insn->bits1.da3src.src_type = BRW_3SRC_TYPE_UD;
922 insn->bits1.da3src.dst_type = BRW_3SRC_TYPE_UD;
923 break;
924 }
925 }
926
927 return insn;
928 }
929
930
931 /***********************************************************************
932 * Convenience routines.
933 */
934 #define ALU1(OP) \
935 struct brw_instruction *brw_##OP(struct brw_compile *p, \
936 struct brw_reg dest, \
937 struct brw_reg src0) \
938 { \
939 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
940 }
941
942 #define ALU2(OP) \
943 struct brw_instruction *brw_##OP(struct brw_compile *p, \
944 struct brw_reg dest, \
945 struct brw_reg src0, \
946 struct brw_reg src1) \
947 { \
948 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
949 }
950
951 #define ALU3(OP) \
952 struct brw_instruction *brw_##OP(struct brw_compile *p, \
953 struct brw_reg dest, \
954 struct brw_reg src0, \
955 struct brw_reg src1, \
956 struct brw_reg src2) \
957 { \
958 return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
959 }
960
961 #define ALU3F(OP) \
962 struct brw_instruction *brw_##OP(struct brw_compile *p, \
963 struct brw_reg dest, \
964 struct brw_reg src0, \
965 struct brw_reg src1, \
966 struct brw_reg src2) \
967 { \
968 assert(dest.type == BRW_REGISTER_TYPE_F); \
969 assert(src0.type == BRW_REGISTER_TYPE_F); \
970 assert(src1.type == BRW_REGISTER_TYPE_F); \
971 assert(src2.type == BRW_REGISTER_TYPE_F); \
972 return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
973 }
974
975 /* Rounding operations (other than RNDD) require two instructions - the first
976 * stores a rounded value (possibly the wrong way) in the dest register, but
977 * also sets a per-channel "increment bit" in the flag register. A predicated
978 * add of 1.0 fixes dest to contain the desired result.
979 *
980 * Sandybridge and later appear to round correctly without an ADD.
981 */
982 #define ROUND(OP) \
983 void brw_##OP(struct brw_compile *p, \
984 struct brw_reg dest, \
985 struct brw_reg src) \
986 { \
987 struct brw_instruction *rnd, *add; \
988 rnd = next_insn(p, BRW_OPCODE_##OP); \
989 brw_set_dest(p, rnd, dest); \
990 brw_set_src0(p, rnd, src); \
991 \
992 if (p->brw->gen < 6) { \
993 /* turn on round-increments */ \
994 rnd->header.destreg__conditionalmod = BRW_CONDITIONAL_R; \
995 add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \
996 add->header.predicate_control = BRW_PREDICATE_NORMAL; \
997 } \
998 }
999
1000
1001 ALU1(MOV)
1002 ALU2(SEL)
1003 ALU1(NOT)
1004 ALU2(AND)
1005 ALU2(OR)
1006 ALU2(XOR)
1007 ALU2(SHR)
1008 ALU2(SHL)
1009 ALU2(ASR)
1010 ALU1(F32TO16)
1011 ALU1(F16TO32)
1012 ALU1(FRC)
1013 ALU1(RNDD)
1014 ALU2(MAC)
1015 ALU2(MACH)
1016 ALU1(LZD)
1017 ALU2(DP4)
1018 ALU2(DPH)
1019 ALU2(DP3)
1020 ALU2(DP2)
1021 ALU2(LINE)
1022 ALU2(PLN)
1023 ALU3F(MAD)
1024 ALU3F(LRP)
1025 ALU1(BFREV)
1026 ALU3(BFE)
1027 ALU2(BFI1)
1028 ALU3(BFI2)
1029 ALU1(FBH)
1030 ALU1(FBL)
1031 ALU1(CBIT)
1032 ALU2(ADDC)
1033 ALU2(SUBB)
1034
1035 ROUND(RNDZ)
1036 ROUND(RNDE)
1037
1038
1039 struct brw_instruction *brw_ADD(struct brw_compile *p,
1040 struct brw_reg dest,
1041 struct brw_reg src0,
1042 struct brw_reg src1)
1043 {
1044 /* 6.2.2: add */
1045 if (src0.type == BRW_REGISTER_TYPE_F ||
1046 (src0.file == BRW_IMMEDIATE_VALUE &&
1047 src0.type == BRW_REGISTER_TYPE_VF)) {
1048 assert(src1.type != BRW_REGISTER_TYPE_UD);
1049 assert(src1.type != BRW_REGISTER_TYPE_D);
1050 }
1051
1052 if (src1.type == BRW_REGISTER_TYPE_F ||
1053 (src1.file == BRW_IMMEDIATE_VALUE &&
1054 src1.type == BRW_REGISTER_TYPE_VF)) {
1055 assert(src0.type != BRW_REGISTER_TYPE_UD);
1056 assert(src0.type != BRW_REGISTER_TYPE_D);
1057 }
1058
1059 return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1);
1060 }
1061
1062 struct brw_instruction *brw_AVG(struct brw_compile *p,
1063 struct brw_reg dest,
1064 struct brw_reg src0,
1065 struct brw_reg src1)
1066 {
1067 assert(dest.type == src0.type);
1068 assert(src0.type == src1.type);
1069 switch (src0.type) {
1070 case BRW_REGISTER_TYPE_B:
1071 case BRW_REGISTER_TYPE_UB:
1072 case BRW_REGISTER_TYPE_W:
1073 case BRW_REGISTER_TYPE_UW:
1074 case BRW_REGISTER_TYPE_D:
1075 case BRW_REGISTER_TYPE_UD:
1076 break;
1077 default:
1078 assert(!"Bad type for brw_AVG");
1079 }
1080
1081 return brw_alu2(p, BRW_OPCODE_AVG, dest, src0, src1);
1082 }
1083
1084 struct brw_instruction *brw_MUL(struct brw_compile *p,
1085 struct brw_reg dest,
1086 struct brw_reg src0,
1087 struct brw_reg src1)
1088 {
1089 /* 6.32.38: mul */
1090 if (src0.type == BRW_REGISTER_TYPE_D ||
1091 src0.type == BRW_REGISTER_TYPE_UD ||
1092 src1.type == BRW_REGISTER_TYPE_D ||
1093 src1.type == BRW_REGISTER_TYPE_UD) {
1094 assert(dest.type != BRW_REGISTER_TYPE_F);
1095 }
1096
1097 if (src0.type == BRW_REGISTER_TYPE_F ||
1098 (src0.file == BRW_IMMEDIATE_VALUE &&
1099 src0.type == BRW_REGISTER_TYPE_VF)) {
1100 assert(src1.type != BRW_REGISTER_TYPE_UD);
1101 assert(src1.type != BRW_REGISTER_TYPE_D);
1102 }
1103
1104 if (src1.type == BRW_REGISTER_TYPE_F ||
1105 (src1.file == BRW_IMMEDIATE_VALUE &&
1106 src1.type == BRW_REGISTER_TYPE_VF)) {
1107 assert(src0.type != BRW_REGISTER_TYPE_UD);
1108 assert(src0.type != BRW_REGISTER_TYPE_D);
1109 }
1110
1111 assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE ||
1112 src0.nr != BRW_ARF_ACCUMULATOR);
1113 assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE ||
1114 src1.nr != BRW_ARF_ACCUMULATOR);
1115
1116 return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1);
1117 }
1118
1119
1120 void brw_NOP(struct brw_compile *p)
1121 {
1122 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);
1123 brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
1124 brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
1125 brw_set_src1(p, insn, brw_imm_ud(0x0));
1126 }
1127
1128
1129
1130
1131
1132 /***********************************************************************
1133 * Comparisons, if/else/endif
1134 */
1135
1136 struct brw_instruction *brw_JMPI(struct brw_compile *p,
1137 struct brw_reg dest,
1138 struct brw_reg src0,
1139 struct brw_reg src1)
1140 {
1141 struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
1142
1143 insn->header.execution_size = 1;
1144 insn->header.compression_control = BRW_COMPRESSION_NONE;
1145 insn->header.mask_control = BRW_MASK_DISABLE;
1146
1147 p->current->header.predicate_control = BRW_PREDICATE_NONE;
1148
1149 return insn;
1150 }
1151
1152 static void
1153 push_if_stack(struct brw_compile *p, struct brw_instruction *inst)
1154 {
1155 p->if_stack[p->if_stack_depth] = inst - p->store;
1156
1157 p->if_stack_depth++;
1158 if (p->if_stack_array_size <= p->if_stack_depth) {
1159 p->if_stack_array_size *= 2;
1160 p->if_stack = reralloc(p->mem_ctx, p->if_stack, int,
1161 p->if_stack_array_size);
1162 }
1163 }
1164
1165 static struct brw_instruction *
1166 pop_if_stack(struct brw_compile *p)
1167 {
1168 p->if_stack_depth--;
1169 return &p->store[p->if_stack[p->if_stack_depth]];
1170 }
1171
1172 static void
1173 push_loop_stack(struct brw_compile *p, struct brw_instruction *inst)
1174 {
1175 if (p->loop_stack_array_size < p->loop_stack_depth) {
1176 p->loop_stack_array_size *= 2;
1177 p->loop_stack = reralloc(p->mem_ctx, p->loop_stack, int,
1178 p->loop_stack_array_size);
1179 p->if_depth_in_loop = reralloc(p->mem_ctx, p->if_depth_in_loop, int,
1180 p->loop_stack_array_size);
1181 }
1182
1183 p->loop_stack[p->loop_stack_depth] = inst - p->store;
1184 p->loop_stack_depth++;
1185 p->if_depth_in_loop[p->loop_stack_depth] = 0;
1186 }
1187
1188 static struct brw_instruction *
1189 get_inner_do_insn(struct brw_compile *p)
1190 {
1191 return &p->store[p->loop_stack[p->loop_stack_depth - 1]];
1192 }
1193
1194 /* EU takes the value from the flag register and pushes it onto some
1195 * sort of a stack (presumably merging with any flag value already on
1196 * the stack). Within an if block, the flags at the top of the stack
1197 * control execution on each channel of the unit, eg. on each of the
1198 * 16 pixel values in our wm programs.
1199 *
1200 * When the matching 'else' instruction is reached (presumably by
1201 * countdown of the instruction count patched in by our ELSE/ENDIF
1202 * functions), the relevent flags are inverted.
1203 *
1204 * When the matching 'endif' instruction is reached, the flags are
1205 * popped off. If the stack is now empty, normal execution resumes.
1206 */
1207 struct brw_instruction *
1208 brw_IF(struct brw_compile *p, unsigned execute_size)
1209 {
1210 struct brw_context *brw = p->brw;
1211 struct brw_instruction *insn;
1212
1213 insn = next_insn(p, BRW_OPCODE_IF);
1214
1215 /* Override the defaults for this instruction:
1216 */
1217 if (brw->gen < 6) {
1218 brw_set_dest(p, insn, brw_ip_reg());
1219 brw_set_src0(p, insn, brw_ip_reg());
1220 brw_set_src1(p, insn, brw_imm_d(0x0));
1221 } else if (brw->gen == 6) {
1222 brw_set_dest(p, insn, brw_imm_w(0));
1223 insn->bits1.branch_gen6.jump_count = 0;
1224 brw_set_src0(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
1225 brw_set_src1(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
1226 } else {
1227 brw_set_dest(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
1228 brw_set_src0(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
1229 brw_set_src1(p, insn, brw_imm_ud(0));
1230 insn->bits3.break_cont.jip = 0;
1231 insn->bits3.break_cont.uip = 0;
1232 }
1233
1234 insn->header.execution_size = execute_size;
1235 insn->header.compression_control = BRW_COMPRESSION_NONE;
1236 insn->header.predicate_control = BRW_PREDICATE_NORMAL;
1237 insn->header.mask_control = BRW_MASK_ENABLE;
1238 if (!p->single_program_flow)
1239 insn->header.thread_control = BRW_THREAD_SWITCH;
1240
1241 p->current->header.predicate_control = BRW_PREDICATE_NONE;
1242
1243 push_if_stack(p, insn);
1244 p->if_depth_in_loop[p->loop_stack_depth]++;
1245 return insn;
1246 }
1247
1248 /* This function is only used for gen6-style IF instructions with an
1249 * embedded comparison (conditional modifier). It is not used on gen7.
1250 */
1251 struct brw_instruction *
1252 gen6_IF(struct brw_compile *p, uint32_t conditional,
1253 struct brw_reg src0, struct brw_reg src1)
1254 {
1255 struct brw_instruction *insn;
1256
1257 insn = next_insn(p, BRW_OPCODE_IF);
1258
1259 brw_set_dest(p, insn, brw_imm_w(0));
1260 if (p->compressed) {
1261 insn->header.execution_size = BRW_EXECUTE_16;
1262 } else {
1263 insn->header.execution_size = BRW_EXECUTE_8;
1264 }
1265 insn->bits1.branch_gen6.jump_count = 0;
1266 brw_set_src0(p, insn, src0);
1267 brw_set_src1(p, insn, src1);
1268
1269 assert(insn->header.compression_control == BRW_COMPRESSION_NONE);
1270 assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
1271 insn->header.destreg__conditionalmod = conditional;
1272
1273 if (!p->single_program_flow)
1274 insn->header.thread_control = BRW_THREAD_SWITCH;
1275
1276 push_if_stack(p, insn);
1277 return insn;
1278 }
1279
1280 /**
1281 * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs.
1282 */
1283 static void
1284 convert_IF_ELSE_to_ADD(struct brw_compile *p,
1285 struct brw_instruction *if_inst,
1286 struct brw_instruction *else_inst)
1287 {
1288 /* The next instruction (where the ENDIF would be, if it existed) */
1289 struct brw_instruction *next_inst = &p->store[p->nr_insn];
1290
1291 assert(p->single_program_flow);
1292 assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
1293 assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
1294 assert(if_inst->header.execution_size == BRW_EXECUTE_1);
1295
1296 /* Convert IF to an ADD instruction that moves the instruction pointer
1297 * to the first instruction of the ELSE block. If there is no ELSE
1298 * block, point to where ENDIF would be. Reverse the predicate.
1299 *
1300 * There's no need to execute an ENDIF since we don't need to do any
1301 * stack operations, and if we're currently executing, we just want to
1302 * continue normally.
1303 */
1304 if_inst->header.opcode = BRW_OPCODE_ADD;
1305 if_inst->header.predicate_inverse = 1;
1306
1307 if (else_inst != NULL) {
1308 /* Convert ELSE to an ADD instruction that points where the ENDIF
1309 * would be.
1310 */
1311 else_inst->header.opcode = BRW_OPCODE_ADD;
1312
1313 if_inst->bits3.ud = (else_inst - if_inst + 1) * 16;
1314 else_inst->bits3.ud = (next_inst - else_inst) * 16;
1315 } else {
1316 if_inst->bits3.ud = (next_inst - if_inst) * 16;
1317 }
1318 }
1319
1320 /**
1321 * Patch IF and ELSE instructions with appropriate jump targets.
1322 */
1323 static void
1324 patch_IF_ELSE(struct brw_compile *p,
1325 struct brw_instruction *if_inst,
1326 struct brw_instruction *else_inst,
1327 struct brw_instruction *endif_inst)
1328 {
1329 struct brw_context *brw = p->brw;
1330
1331 /* We shouldn't be patching IF and ELSE instructions in single program flow
1332 * mode when gen < 6, because in single program flow mode on those
1333 * platforms, we convert flow control instructions to conditional ADDs that
1334 * operate on IP (see brw_ENDIF).
1335 *
1336 * However, on Gen6, writing to IP doesn't work in single program flow mode
1337 * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
1338 * not be updated by non-flow control instructions."). And on later
1339 * platforms, there is no significant benefit to converting control flow
1340 * instructions to conditional ADDs. So we do patch IF and ELSE
1341 * instructions in single program flow mode on those platforms.
1342 */
1343 if (brw->gen < 6)
1344 assert(!p->single_program_flow);
1345
1346 assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
1347 assert(endif_inst != NULL);
1348 assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
1349
1350 unsigned br = 1;
1351 /* Jump count is for 64bit data chunk each, so one 128bit instruction
1352 * requires 2 chunks.
1353 */
1354 if (brw->gen >= 5)
1355 br = 2;
1356
1357 assert(endif_inst->header.opcode == BRW_OPCODE_ENDIF);
1358 endif_inst->header.execution_size = if_inst->header.execution_size;
1359
1360 if (else_inst == NULL) {
1361 /* Patch IF -> ENDIF */
1362 if (brw->gen < 6) {
1363 /* Turn it into an IFF, which means no mask stack operations for
1364 * all-false and jumping past the ENDIF.
1365 */
1366 if_inst->header.opcode = BRW_OPCODE_IFF;
1367 if_inst->bits3.if_else.jump_count = br * (endif_inst - if_inst + 1);
1368 if_inst->bits3.if_else.pop_count = 0;
1369 if_inst->bits3.if_else.pad0 = 0;
1370 } else if (brw->gen == 6) {
1371 /* As of gen6, there is no IFF and IF must point to the ENDIF. */
1372 if_inst->bits1.branch_gen6.jump_count = br * (endif_inst - if_inst);
1373 } else {
1374 if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
1375 if_inst->bits3.break_cont.jip = br * (endif_inst - if_inst);
1376 }
1377 } else {
1378 else_inst->header.execution_size = if_inst->header.execution_size;
1379
1380 /* Patch IF -> ELSE */
1381 if (brw->gen < 6) {
1382 if_inst->bits3.if_else.jump_count = br * (else_inst - if_inst);
1383 if_inst->bits3.if_else.pop_count = 0;
1384 if_inst->bits3.if_else.pad0 = 0;
1385 } else if (brw->gen == 6) {
1386 if_inst->bits1.branch_gen6.jump_count = br * (else_inst - if_inst + 1);
1387 }
1388
1389 /* Patch ELSE -> ENDIF */
1390 if (brw->gen < 6) {
1391 /* BRW_OPCODE_ELSE pre-gen6 should point just past the
1392 * matching ENDIF.
1393 */
1394 else_inst->bits3.if_else.jump_count = br*(endif_inst - else_inst + 1);
1395 else_inst->bits3.if_else.pop_count = 1;
1396 else_inst->bits3.if_else.pad0 = 0;
1397 } else if (brw->gen == 6) {
1398 /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
1399 else_inst->bits1.branch_gen6.jump_count = br*(endif_inst - else_inst);
1400 } else {
1401 /* The IF instruction's JIP should point just past the ELSE */
1402 if_inst->bits3.break_cont.jip = br * (else_inst - if_inst + 1);
1403 /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
1404 if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
1405 else_inst->bits3.break_cont.jip = br * (endif_inst - else_inst);
1406 }
1407 }
1408 }
1409
1410 void
1411 brw_ELSE(struct brw_compile *p)
1412 {
1413 struct brw_context *brw = p->brw;
1414 struct brw_instruction *insn;
1415
1416 insn = next_insn(p, BRW_OPCODE_ELSE);
1417
1418 if (brw->gen < 6) {
1419 brw_set_dest(p, insn, brw_ip_reg());
1420 brw_set_src0(p, insn, brw_ip_reg());
1421 brw_set_src1(p, insn, brw_imm_d(0x0));
1422 } else if (brw->gen == 6) {
1423 brw_set_dest(p, insn, brw_imm_w(0));
1424 insn->bits1.branch_gen6.jump_count = 0;
1425 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1426 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1427 } else {
1428 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1429 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1430 brw_set_src1(p, insn, brw_imm_ud(0));
1431 insn->bits3.break_cont.jip = 0;
1432 insn->bits3.break_cont.uip = 0;
1433 }
1434
1435 insn->header.compression_control = BRW_COMPRESSION_NONE;
1436 insn->header.mask_control = BRW_MASK_ENABLE;
1437 if (!p->single_program_flow)
1438 insn->header.thread_control = BRW_THREAD_SWITCH;
1439
1440 push_if_stack(p, insn);
1441 }
1442
1443 void
1444 brw_ENDIF(struct brw_compile *p)
1445 {
1446 struct brw_context *brw = p->brw;
1447 struct brw_instruction *insn = NULL;
1448 struct brw_instruction *else_inst = NULL;
1449 struct brw_instruction *if_inst = NULL;
1450 struct brw_instruction *tmp;
1451 bool emit_endif = true;
1452
1453 /* In single program flow mode, we can express IF and ELSE instructions
1454 * equivalently as ADD instructions that operate on IP. On platforms prior
1455 * to Gen6, flow control instructions cause an implied thread switch, so
1456 * this is a significant savings.
1457 *
1458 * However, on Gen6, writing to IP doesn't work in single program flow mode
1459 * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
1460 * not be updated by non-flow control instructions."). And on later
1461 * platforms, there is no significant benefit to converting control flow
1462 * instructions to conditional ADDs. So we only do this trick on Gen4 and
1463 * Gen5.
1464 */
1465 if (brw->gen < 6 && p->single_program_flow)
1466 emit_endif = false;
1467
1468 /*
1469 * A single next_insn() may change the base adress of instruction store
1470 * memory(p->store), so call it first before referencing the instruction
1471 * store pointer from an index
1472 */
1473 if (emit_endif)
1474 insn = next_insn(p, BRW_OPCODE_ENDIF);
1475
1476 /* Pop the IF and (optional) ELSE instructions from the stack */
1477 p->if_depth_in_loop[p->loop_stack_depth]--;
1478 tmp = pop_if_stack(p);
1479 if (tmp->header.opcode == BRW_OPCODE_ELSE) {
1480 else_inst = tmp;
1481 tmp = pop_if_stack(p);
1482 }
1483 if_inst = tmp;
1484
1485 if (!emit_endif) {
1486 /* ENDIF is useless; don't bother emitting it. */
1487 convert_IF_ELSE_to_ADD(p, if_inst, else_inst);
1488 return;
1489 }
1490
1491 if (brw->gen < 6) {
1492 brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
1493 brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
1494 brw_set_src1(p, insn, brw_imm_d(0x0));
1495 } else if (brw->gen == 6) {
1496 brw_set_dest(p, insn, brw_imm_w(0));
1497 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1498 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1499 } else {
1500 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1501 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1502 brw_set_src1(p, insn, brw_imm_ud(0));
1503 }
1504
1505 insn->header.compression_control = BRW_COMPRESSION_NONE;
1506 insn->header.mask_control = BRW_MASK_ENABLE;
1507 insn->header.thread_control = BRW_THREAD_SWITCH;
1508
1509 /* Also pop item off the stack in the endif instruction: */
1510 if (brw->gen < 6) {
1511 insn->bits3.if_else.jump_count = 0;
1512 insn->bits3.if_else.pop_count = 1;
1513 insn->bits3.if_else.pad0 = 0;
1514 } else if (brw->gen == 6) {
1515 insn->bits1.branch_gen6.jump_count = 2;
1516 } else {
1517 insn->bits3.break_cont.jip = 2;
1518 }
1519 patch_IF_ELSE(p, if_inst, else_inst, insn);
1520 }
1521
1522 struct brw_instruction *brw_BREAK(struct brw_compile *p)
1523 {
1524 struct brw_context *brw = p->brw;
1525 struct brw_instruction *insn;
1526
1527 insn = next_insn(p, BRW_OPCODE_BREAK);
1528 if (brw->gen >= 6) {
1529 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1530 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1531 brw_set_src1(p, insn, brw_imm_d(0x0));
1532 } else {
1533 brw_set_dest(p, insn, brw_ip_reg());
1534 brw_set_src0(p, insn, brw_ip_reg());
1535 brw_set_src1(p, insn, brw_imm_d(0x0));
1536 insn->bits3.if_else.pad0 = 0;
1537 insn->bits3.if_else.pop_count = p->if_depth_in_loop[p->loop_stack_depth];
1538 }
1539 insn->header.compression_control = BRW_COMPRESSION_NONE;
1540 insn->header.execution_size = BRW_EXECUTE_8;
1541
1542 return insn;
1543 }
1544
1545 struct brw_instruction *gen6_CONT(struct brw_compile *p)
1546 {
1547 struct brw_instruction *insn;
1548
1549 insn = next_insn(p, BRW_OPCODE_CONTINUE);
1550 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1551 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1552 brw_set_dest(p, insn, brw_ip_reg());
1553 brw_set_src0(p, insn, brw_ip_reg());
1554 brw_set_src1(p, insn, brw_imm_d(0x0));
1555
1556 insn->header.compression_control = BRW_COMPRESSION_NONE;
1557 insn->header.execution_size = BRW_EXECUTE_8;
1558 return insn;
1559 }
1560
1561 struct brw_instruction *brw_CONT(struct brw_compile *p)
1562 {
1563 struct brw_instruction *insn;
1564 insn = next_insn(p, BRW_OPCODE_CONTINUE);
1565 brw_set_dest(p, insn, brw_ip_reg());
1566 brw_set_src0(p, insn, brw_ip_reg());
1567 brw_set_src1(p, insn, brw_imm_d(0x0));
1568 insn->header.compression_control = BRW_COMPRESSION_NONE;
1569 insn->header.execution_size = BRW_EXECUTE_8;
1570 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1571 insn->bits3.if_else.pad0 = 0;
1572 insn->bits3.if_else.pop_count = p->if_depth_in_loop[p->loop_stack_depth];
1573 return insn;
1574 }
1575
1576 struct brw_instruction *gen6_HALT(struct brw_compile *p)
1577 {
1578 struct brw_instruction *insn;
1579
1580 insn = next_insn(p, BRW_OPCODE_HALT);
1581 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1582 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1583 brw_set_src1(p, insn, brw_imm_d(0x0)); /* UIP and JIP, updated later. */
1584
1585 if (p->compressed) {
1586 insn->header.execution_size = BRW_EXECUTE_16;
1587 } else {
1588 insn->header.compression_control = BRW_COMPRESSION_NONE;
1589 insn->header.execution_size = BRW_EXECUTE_8;
1590 }
1591 return insn;
1592 }
1593
1594 /* DO/WHILE loop:
1595 *
1596 * The DO/WHILE is just an unterminated loop -- break or continue are
1597 * used for control within the loop. We have a few ways they can be
1598 * done.
1599 *
1600 * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
1601 * jip and no DO instruction.
1602 *
1603 * For non-uniform control flow pre-gen6, there's a DO instruction to
1604 * push the mask, and a WHILE to jump back, and BREAK to get out and
1605 * pop the mask.
1606 *
1607 * For gen6, there's no more mask stack, so no need for DO. WHILE
1608 * just points back to the first instruction of the loop.
1609 */
1610 struct brw_instruction *brw_DO(struct brw_compile *p, unsigned execute_size)
1611 {
1612 struct brw_context *brw = p->brw;
1613
1614 if (brw->gen >= 6 || p->single_program_flow) {
1615 push_loop_stack(p, &p->store[p->nr_insn]);
1616 return &p->store[p->nr_insn];
1617 } else {
1618 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
1619
1620 push_loop_stack(p, insn);
1621
1622 /* Override the defaults for this instruction:
1623 */
1624 brw_set_dest(p, insn, brw_null_reg());
1625 brw_set_src0(p, insn, brw_null_reg());
1626 brw_set_src1(p, insn, brw_null_reg());
1627
1628 insn->header.compression_control = BRW_COMPRESSION_NONE;
1629 insn->header.execution_size = execute_size;
1630 insn->header.predicate_control = BRW_PREDICATE_NONE;
1631 /* insn->header.mask_control = BRW_MASK_ENABLE; */
1632 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1633
1634 return insn;
1635 }
1636 }
1637
1638 /**
1639 * For pre-gen6, we patch BREAK/CONT instructions to point at the WHILE
1640 * instruction here.
1641 *
1642 * For gen6+, see brw_set_uip_jip(), which doesn't care so much about the loop
1643 * nesting, since it can always just point to the end of the block/current loop.
1644 */
1645 static void
1646 brw_patch_break_cont(struct brw_compile *p, struct brw_instruction *while_inst)
1647 {
1648 struct brw_context *brw = p->brw;
1649 struct brw_instruction *do_inst = get_inner_do_insn(p);
1650 struct brw_instruction *inst;
1651 int br = (brw->gen == 5) ? 2 : 1;
1652
1653 for (inst = while_inst - 1; inst != do_inst; inst--) {
1654 /* If the jump count is != 0, that means that this instruction has already
1655 * been patched because it's part of a loop inside of the one we're
1656 * patching.
1657 */
1658 if (inst->header.opcode == BRW_OPCODE_BREAK &&
1659 inst->bits3.if_else.jump_count == 0) {
1660 inst->bits3.if_else.jump_count = br * ((while_inst - inst) + 1);
1661 } else if (inst->header.opcode == BRW_OPCODE_CONTINUE &&
1662 inst->bits3.if_else.jump_count == 0) {
1663 inst->bits3.if_else.jump_count = br * (while_inst - inst);
1664 }
1665 }
1666 }
1667
1668 struct brw_instruction *brw_WHILE(struct brw_compile *p)
1669 {
1670 struct brw_context *brw = p->brw;
1671 struct brw_instruction *insn, *do_insn;
1672 unsigned br = 1;
1673
1674 if (brw->gen >= 5)
1675 br = 2;
1676
1677 if (brw->gen >= 7) {
1678 insn = next_insn(p, BRW_OPCODE_WHILE);
1679 do_insn = get_inner_do_insn(p);
1680
1681 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1682 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1683 brw_set_src1(p, insn, brw_imm_ud(0));
1684 insn->bits3.break_cont.jip = br * (do_insn - insn);
1685
1686 insn->header.execution_size = BRW_EXECUTE_8;
1687 } else if (brw->gen == 6) {
1688 insn = next_insn(p, BRW_OPCODE_WHILE);
1689 do_insn = get_inner_do_insn(p);
1690
1691 brw_set_dest(p, insn, brw_imm_w(0));
1692 insn->bits1.branch_gen6.jump_count = br * (do_insn - insn);
1693 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1694 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1695
1696 insn->header.execution_size = BRW_EXECUTE_8;
1697 } else {
1698 if (p->single_program_flow) {
1699 insn = next_insn(p, BRW_OPCODE_ADD);
1700 do_insn = get_inner_do_insn(p);
1701
1702 brw_set_dest(p, insn, brw_ip_reg());
1703 brw_set_src0(p, insn, brw_ip_reg());
1704 brw_set_src1(p, insn, brw_imm_d((do_insn - insn) * 16));
1705 insn->header.execution_size = BRW_EXECUTE_1;
1706 } else {
1707 insn = next_insn(p, BRW_OPCODE_WHILE);
1708 do_insn = get_inner_do_insn(p);
1709
1710 assert(do_insn->header.opcode == BRW_OPCODE_DO);
1711
1712 brw_set_dest(p, insn, brw_ip_reg());
1713 brw_set_src0(p, insn, brw_ip_reg());
1714 brw_set_src1(p, insn, brw_imm_d(0));
1715
1716 insn->header.execution_size = do_insn->header.execution_size;
1717 insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
1718 insn->bits3.if_else.pop_count = 0;
1719 insn->bits3.if_else.pad0 = 0;
1720
1721 brw_patch_break_cont(p, insn);
1722 }
1723 }
1724 insn->header.compression_control = BRW_COMPRESSION_NONE;
1725 p->current->header.predicate_control = BRW_PREDICATE_NONE;
1726
1727 p->loop_stack_depth--;
1728
1729 return insn;
1730 }
1731
1732
1733 /* FORWARD JUMPS:
1734 */
1735 void brw_land_fwd_jump(struct brw_compile *p, int jmp_insn_idx)
1736 {
1737 struct brw_context *brw = p->brw;
1738 struct brw_instruction *jmp_insn = &p->store[jmp_insn_idx];
1739 unsigned jmpi = 1;
1740
1741 if (brw->gen >= 5)
1742 jmpi = 2;
1743
1744 assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
1745 assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE);
1746
1747 jmp_insn->bits3.ud = jmpi * (p->nr_insn - jmp_insn_idx - 1);
1748 }
1749
1750
1751
1752 /* To integrate with the above, it makes sense that the comparison
1753 * instruction should populate the flag register. It might be simpler
1754 * just to use the flag reg for most WM tasks?
1755 */
1756 void brw_CMP(struct brw_compile *p,
1757 struct brw_reg dest,
1758 unsigned conditional,
1759 struct brw_reg src0,
1760 struct brw_reg src1)
1761 {
1762 struct brw_context *brw = p->brw;
1763 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
1764
1765 insn->header.destreg__conditionalmod = conditional;
1766 brw_set_dest(p, insn, dest);
1767 brw_set_src0(p, insn, src0);
1768 brw_set_src1(p, insn, src1);
1769
1770 /* guess_execution_size(insn, src0); */
1771
1772
1773 /* Make it so that future instructions will use the computed flag
1774 * value until brw_set_predicate_control_flag_value() is called
1775 * again.
1776 */
1777 if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
1778 dest.nr == 0) {
1779 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
1780 p->flag_value = 0xff;
1781 }
1782
1783 /* Item WaCMPInstNullDstForcesThreadSwitch in the Haswell Bspec workarounds
1784 * page says:
1785 * "Any CMP instruction with a null destination must use a {switch}."
1786 *
1787 * It also applies to other Gen7 platforms (IVB, BYT) even though it isn't
1788 * mentioned on their work-arounds pages.
1789 */
1790 if (brw->gen == 7) {
1791 if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
1792 dest.nr == BRW_ARF_NULL) {
1793 insn->header.thread_control = BRW_THREAD_SWITCH;
1794 }
1795 }
1796 }
1797
1798 /* Issue 'wait' instruction for n1, host could program MMIO
1799 to wake up thread. */
1800 void brw_WAIT (struct brw_compile *p)
1801 {
1802 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_WAIT);
1803 struct brw_reg src = brw_notification_1_reg();
1804
1805 brw_set_dest(p, insn, src);
1806 brw_set_src0(p, insn, src);
1807 brw_set_src1(p, insn, brw_null_reg());
1808 insn->header.execution_size = 0; /* must */
1809 insn->header.predicate_control = 0;
1810 insn->header.compression_control = 0;
1811 }
1812
1813
1814 /***********************************************************************
1815 * Helpers for the various SEND message types:
1816 */
1817
1818 /** Extended math function, float[8].
1819 */
1820 void brw_math( struct brw_compile *p,
1821 struct brw_reg dest,
1822 unsigned function,
1823 unsigned msg_reg_nr,
1824 struct brw_reg src,
1825 unsigned data_type,
1826 unsigned precision )
1827 {
1828 struct brw_context *brw = p->brw;
1829
1830 if (brw->gen >= 6) {
1831 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
1832
1833 assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
1834 (brw->gen >= 7 && dest.file == BRW_MESSAGE_REGISTER_FILE));
1835 assert(src.file == BRW_GENERAL_REGISTER_FILE);
1836
1837 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1838 if (brw->gen == 6)
1839 assert(src.hstride == BRW_HORIZONTAL_STRIDE_1);
1840
1841 /* Source modifiers are ignored for extended math instructions on Gen6. */
1842 if (brw->gen == 6) {
1843 assert(!src.negate);
1844 assert(!src.abs);
1845 }
1846
1847 if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
1848 function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER ||
1849 function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1850 assert(src.type != BRW_REGISTER_TYPE_F);
1851 } else {
1852 assert(src.type == BRW_REGISTER_TYPE_F);
1853 }
1854
1855 /* Math is the same ISA format as other opcodes, except that CondModifier
1856 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1857 */
1858 insn->header.destreg__conditionalmod = function;
1859
1860 brw_set_dest(p, insn, dest);
1861 brw_set_src0(p, insn, src);
1862 brw_set_src1(p, insn, brw_null_reg());
1863 } else {
1864 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1865
1866 /* Example code doesn't set predicate_control for send
1867 * instructions.
1868 */
1869 insn->header.predicate_control = 0;
1870 insn->header.destreg__conditionalmod = msg_reg_nr;
1871
1872 brw_set_dest(p, insn, dest);
1873 brw_set_src0(p, insn, src);
1874 brw_set_math_message(p,
1875 insn,
1876 function,
1877 src.type == BRW_REGISTER_TYPE_D,
1878 precision,
1879 data_type);
1880 }
1881 }
1882
1883 /** Extended math function, float[8].
1884 */
1885 void brw_math2(struct brw_compile *p,
1886 struct brw_reg dest,
1887 unsigned function,
1888 struct brw_reg src0,
1889 struct brw_reg src1)
1890 {
1891 struct brw_context *brw = p->brw;
1892 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
1893
1894 assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
1895 (brw->gen >= 7 && dest.file == BRW_MESSAGE_REGISTER_FILE));
1896 assert(src0.file == BRW_GENERAL_REGISTER_FILE);
1897 assert(src1.file == BRW_GENERAL_REGISTER_FILE);
1898
1899 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1900 if (brw->gen == 6) {
1901 assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1);
1902 assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1);
1903 }
1904
1905 if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
1906 function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER ||
1907 function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1908 assert(src0.type != BRW_REGISTER_TYPE_F);
1909 assert(src1.type != BRW_REGISTER_TYPE_F);
1910 } else {
1911 assert(src0.type == BRW_REGISTER_TYPE_F);
1912 assert(src1.type == BRW_REGISTER_TYPE_F);
1913 }
1914
1915 /* Source modifiers are ignored for extended math instructions on Gen6. */
1916 if (brw->gen == 6) {
1917 assert(!src0.negate);
1918 assert(!src0.abs);
1919 assert(!src1.negate);
1920 assert(!src1.abs);
1921 }
1922
1923 /* Math is the same ISA format as other opcodes, except that CondModifier
1924 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1925 */
1926 insn->header.destreg__conditionalmod = function;
1927
1928 brw_set_dest(p, insn, dest);
1929 brw_set_src0(p, insn, src0);
1930 brw_set_src1(p, insn, src1);
1931 }
1932
1933
1934 /**
1935 * Write a block of OWORDs (half a GRF each) from the scratch buffer,
1936 * using a constant offset per channel.
1937 *
1938 * The offset must be aligned to oword size (16 bytes). Used for
1939 * register spilling.
1940 */
1941 void brw_oword_block_write_scratch(struct brw_compile *p,
1942 struct brw_reg mrf,
1943 int num_regs,
1944 unsigned offset)
1945 {
1946 struct brw_context *brw = p->brw;
1947 uint32_t msg_control, msg_type;
1948 int mlen;
1949
1950 if (brw->gen >= 6)
1951 offset /= 16;
1952
1953 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1954
1955 if (num_regs == 1) {
1956 msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
1957 mlen = 2;
1958 } else {
1959 msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
1960 mlen = 3;
1961 }
1962
1963 /* Set up the message header. This is g0, with g0.2 filled with
1964 * the offset. We don't want to leave our offset around in g0 or
1965 * it'll screw up texture samples, so set it up inside the message
1966 * reg.
1967 */
1968 {
1969 brw_push_insn_state(p);
1970 brw_set_mask_control(p, BRW_MASK_DISABLE);
1971 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1972
1973 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
1974
1975 /* set message header global offset field (reg 0, element 2) */
1976 brw_MOV(p,
1977 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
1978 mrf.nr,
1979 2), BRW_REGISTER_TYPE_UD),
1980 brw_imm_ud(offset));
1981
1982 brw_pop_insn_state(p);
1983 }
1984
1985 {
1986 struct brw_reg dest;
1987 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1988 int send_commit_msg;
1989 struct brw_reg src_header = retype(brw_vec8_grf(0, 0),
1990 BRW_REGISTER_TYPE_UW);
1991
1992 if (insn->header.compression_control != BRW_COMPRESSION_NONE) {
1993 insn->header.compression_control = BRW_COMPRESSION_NONE;
1994 src_header = vec16(src_header);
1995 }
1996 assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
1997 insn->header.destreg__conditionalmod = mrf.nr;
1998
1999 /* Until gen6, writes followed by reads from the same location
2000 * are not guaranteed to be ordered unless write_commit is set.
2001 * If set, then a no-op write is issued to the destination
2002 * register to set a dependency, and a read from the destination
2003 * can be used to ensure the ordering.
2004 *
2005 * For gen6, only writes between different threads need ordering
2006 * protection. Our use of DP writes is all about register
2007 * spilling within a thread.
2008 */
2009 if (brw->gen >= 6) {
2010 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
2011 send_commit_msg = 0;
2012 } else {
2013 dest = src_header;
2014 send_commit_msg = 1;
2015 }
2016
2017 brw_set_dest(p, insn, dest);
2018 if (brw->gen >= 6) {
2019 brw_set_src0(p, insn, mrf);
2020 } else {
2021 brw_set_src0(p, insn, brw_null_reg());
2022 }
2023
2024 if (brw->gen >= 6)
2025 msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
2026 else
2027 msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
2028
2029 brw_set_dp_write_message(p,
2030 insn,
2031 255, /* binding table index (255=stateless) */
2032 msg_control,
2033 msg_type,
2034 mlen,
2035 true, /* header_present */
2036 0, /* not a render target */
2037 send_commit_msg, /* response_length */
2038 0, /* eot */
2039 send_commit_msg);
2040 }
2041 }
2042
2043
2044 /**
2045 * Read a block of owords (half a GRF each) from the scratch buffer
2046 * using a constant index per channel.
2047 *
2048 * Offset must be aligned to oword size (16 bytes). Used for register
2049 * spilling.
2050 */
2051 void
2052 brw_oword_block_read_scratch(struct brw_compile *p,
2053 struct brw_reg dest,
2054 struct brw_reg mrf,
2055 int num_regs,
2056 unsigned offset)
2057 {
2058 struct brw_context *brw = p->brw;
2059 uint32_t msg_control;
2060 int rlen;
2061
2062 if (brw->gen >= 6)
2063 offset /= 16;
2064
2065 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
2066 dest = retype(dest, BRW_REGISTER_TYPE_UW);
2067
2068 if (num_regs == 1) {
2069 msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
2070 rlen = 1;
2071 } else {
2072 msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
2073 rlen = 2;
2074 }
2075
2076 {
2077 brw_push_insn_state(p);
2078 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
2079 brw_set_mask_control(p, BRW_MASK_DISABLE);
2080
2081 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
2082
2083 /* set message header global offset field (reg 0, element 2) */
2084 brw_MOV(p,
2085 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
2086 mrf.nr,
2087 2), BRW_REGISTER_TYPE_UD),
2088 brw_imm_ud(offset));
2089
2090 brw_pop_insn_state(p);
2091 }
2092
2093 {
2094 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
2095
2096 assert(insn->header.predicate_control == 0);
2097 insn->header.compression_control = BRW_COMPRESSION_NONE;
2098 insn->header.destreg__conditionalmod = mrf.nr;
2099
2100 brw_set_dest(p, insn, dest); /* UW? */
2101 if (brw->gen >= 6) {
2102 brw_set_src0(p, insn, mrf);
2103 } else {
2104 brw_set_src0(p, insn, brw_null_reg());
2105 }
2106
2107 brw_set_dp_read_message(p,
2108 insn,
2109 255, /* binding table index (255=stateless) */
2110 msg_control,
2111 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
2112 BRW_DATAPORT_READ_TARGET_RENDER_CACHE,
2113 1, /* msg_length */
2114 true, /* header_present */
2115 rlen);
2116 }
2117 }
2118
2119 void
2120 gen7_block_read_scratch(struct brw_compile *p,
2121 struct brw_reg dest,
2122 int num_regs,
2123 unsigned offset)
2124 {
2125 dest = retype(dest, BRW_REGISTER_TYPE_UW);
2126
2127 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
2128
2129 assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
2130 insn->header.compression_control = BRW_COMPRESSION_NONE;
2131
2132 brw_set_dest(p, insn, dest);
2133
2134 /* The HW requires that the header is present; this is to get the g0.5
2135 * scratch offset.
2136 */
2137 bool header_present = true;
2138 brw_set_src0(p, insn, brw_vec8_grf(0, 0));
2139
2140 brw_set_message_descriptor(p, insn,
2141 GEN7_SFID_DATAPORT_DATA_CACHE,
2142 1, /* mlen: just g0 */
2143 num_regs,
2144 header_present,
2145 false);
2146
2147 insn->bits3.ud |= GEN7_DATAPORT_SCRATCH_READ;
2148
2149 assert(num_regs == 1 || num_regs == 2 || num_regs == 4);
2150 insn->bits3.ud |= (num_regs - 1) << GEN7_DATAPORT_SCRATCH_NUM_REGS_SHIFT;
2151
2152 /* According to the docs, offset is "A 12-bit HWord offset into the memory
2153 * Immediate Memory buffer as specified by binding table 0xFF." An HWORD
2154 * is 32 bytes, which happens to be the size of a register.
2155 */
2156 offset /= REG_SIZE;
2157 assert(offset < (1 << 12));
2158 insn->bits3.ud |= offset;
2159 }
2160
2161 /**
2162 * Read a float[4] vector from the data port Data Cache (const buffer).
2163 * Location (in buffer) should be a multiple of 16.
2164 * Used for fetching shader constants.
2165 */
2166 void brw_oword_block_read(struct brw_compile *p,
2167 struct brw_reg dest,
2168 struct brw_reg mrf,
2169 uint32_t offset,
2170 uint32_t bind_table_index)
2171 {
2172 struct brw_context *brw = p->brw;
2173
2174 /* On newer hardware, offset is in units of owords. */
2175 if (brw->gen >= 6)
2176 offset /= 16;
2177
2178 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
2179
2180 brw_push_insn_state(p);
2181 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
2182 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
2183 brw_set_mask_control(p, BRW_MASK_DISABLE);
2184
2185 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
2186
2187 /* set message header global offset field (reg 0, element 2) */
2188 brw_MOV(p,
2189 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
2190 mrf.nr,
2191 2), BRW_REGISTER_TYPE_UD),
2192 brw_imm_ud(offset));
2193
2194 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
2195 insn->header.destreg__conditionalmod = mrf.nr;
2196
2197 /* cast dest to a uword[8] vector */
2198 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
2199
2200 brw_set_dest(p, insn, dest);
2201 if (brw->gen >= 6) {
2202 brw_set_src0(p, insn, mrf);
2203 } else {
2204 brw_set_src0(p, insn, brw_null_reg());
2205 }
2206
2207 brw_set_dp_read_message(p,
2208 insn,
2209 bind_table_index,
2210 BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW,
2211 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ,
2212 BRW_DATAPORT_READ_TARGET_DATA_CACHE,
2213 1, /* msg_length */
2214 true, /* header_present */
2215 1); /* response_length (1 reg, 2 owords!) */
2216
2217 brw_pop_insn_state(p);
2218 }
2219
2220
2221 void brw_fb_WRITE(struct brw_compile *p,
2222 int dispatch_width,
2223 unsigned msg_reg_nr,
2224 struct brw_reg src0,
2225 unsigned msg_control,
2226 unsigned binding_table_index,
2227 unsigned msg_length,
2228 unsigned response_length,
2229 bool eot,
2230 bool header_present)
2231 {
2232 struct brw_context *brw = p->brw;
2233 struct brw_instruction *insn;
2234 unsigned msg_type;
2235 struct brw_reg dest;
2236
2237 if (dispatch_width == 16)
2238 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
2239 else
2240 dest = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW);
2241
2242 if (brw->gen >= 6) {
2243 insn = next_insn(p, BRW_OPCODE_SENDC);
2244 } else {
2245 insn = next_insn(p, BRW_OPCODE_SEND);
2246 }
2247 insn->header.compression_control = BRW_COMPRESSION_NONE;
2248
2249 if (brw->gen >= 6) {
2250 /* headerless version, just submit color payload */
2251 src0 = brw_message_reg(msg_reg_nr);
2252
2253 msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
2254 } else {
2255 insn->header.destreg__conditionalmod = msg_reg_nr;
2256
2257 msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
2258 }
2259
2260 brw_set_dest(p, insn, dest);
2261 brw_set_src0(p, insn, src0);
2262 brw_set_dp_write_message(p,
2263 insn,
2264 binding_table_index,
2265 msg_control,
2266 msg_type,
2267 msg_length,
2268 header_present,
2269 eot, /* last render target write */
2270 response_length,
2271 eot,
2272 0 /* send_commit_msg */);
2273 }
2274
2275
2276 /**
2277 * Texture sample instruction.
2278 * Note: the msg_type plus msg_length values determine exactly what kind
2279 * of sampling operation is performed. See volume 4, page 161 of docs.
2280 */
2281 void brw_SAMPLE(struct brw_compile *p,
2282 struct brw_reg dest,
2283 unsigned msg_reg_nr,
2284 struct brw_reg src0,
2285 unsigned binding_table_index,
2286 unsigned sampler,
2287 unsigned msg_type,
2288 unsigned response_length,
2289 unsigned msg_length,
2290 unsigned header_present,
2291 unsigned simd_mode,
2292 unsigned return_format)
2293 {
2294 struct brw_context *brw = p->brw;
2295 struct brw_instruction *insn;
2296
2297 if (msg_reg_nr != -1)
2298 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2299
2300 insn = next_insn(p, BRW_OPCODE_SEND);
2301 insn->header.predicate_control = 0; /* XXX */
2302
2303 /* From the 965 PRM (volume 4, part 1, section 14.2.41):
2304 *
2305 * "Instruction compression is not allowed for this instruction (that
2306 * is, send). The hardware behavior is undefined if this instruction is
2307 * set as compressed. However, compress control can be set to "SecHalf"
2308 * to affect the EMask generation."
2309 *
2310 * No similar wording is found in later PRMs, but there are examples
2311 * utilizing send with SecHalf. More importantly, SIMD8 sampler messages
2312 * are allowed in SIMD16 mode and they could not work without SecHalf. For
2313 * these reasons, we allow BRW_COMPRESSION_2NDHALF here.
2314 */
2315 if (insn->header.compression_control != BRW_COMPRESSION_2NDHALF)
2316 insn->header.compression_control = BRW_COMPRESSION_NONE;
2317
2318 if (brw->gen < 6)
2319 insn->header.destreg__conditionalmod = msg_reg_nr;
2320
2321 brw_set_dest(p, insn, dest);
2322 brw_set_src0(p, insn, src0);
2323 brw_set_sampler_message(p, insn,
2324 binding_table_index,
2325 sampler,
2326 msg_type,
2327 response_length,
2328 msg_length,
2329 header_present,
2330 simd_mode,
2331 return_format);
2332 }
2333
2334 /* All these variables are pretty confusing - we might be better off
2335 * using bitmasks and macros for this, in the old style. Or perhaps
2336 * just having the caller instantiate the fields in dword3 itself.
2337 */
2338 void brw_urb_WRITE(struct brw_compile *p,
2339 struct brw_reg dest,
2340 unsigned msg_reg_nr,
2341 struct brw_reg src0,
2342 enum brw_urb_write_flags flags,
2343 unsigned msg_length,
2344 unsigned response_length,
2345 unsigned offset,
2346 unsigned swizzle)
2347 {
2348 struct brw_context *brw = p->brw;
2349 struct brw_instruction *insn;
2350
2351 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2352
2353 if (brw->gen == 7 && !(flags & BRW_URB_WRITE_USE_CHANNEL_MASKS)) {
2354 /* Enable Channel Masks in the URB_WRITE_HWORD message header */
2355 brw_push_insn_state(p);
2356 brw_set_access_mode(p, BRW_ALIGN_1);
2357 brw_set_mask_control(p, BRW_MASK_DISABLE);
2358 brw_OR(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 5),
2359 BRW_REGISTER_TYPE_UD),
2360 retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD),
2361 brw_imm_ud(0xff00));
2362 brw_pop_insn_state(p);
2363 }
2364
2365 insn = next_insn(p, BRW_OPCODE_SEND);
2366
2367 assert(msg_length < BRW_MAX_MRF);
2368
2369 brw_set_dest(p, insn, dest);
2370 brw_set_src0(p, insn, src0);
2371 brw_set_src1(p, insn, brw_imm_d(0));
2372
2373 if (brw->gen < 6)
2374 insn->header.destreg__conditionalmod = msg_reg_nr;
2375
2376 brw_set_urb_message(p,
2377 insn,
2378 flags,
2379 msg_length,
2380 response_length,
2381 offset,
2382 swizzle);
2383 }
2384
2385 static int
2386 next_ip(struct brw_compile *p, int ip)
2387 {
2388 struct brw_instruction *insn = (void *)p->store + ip;
2389
2390 if (insn->header.cmpt_control)
2391 return ip + 8;
2392 else
2393 return ip + 16;
2394 }
2395
2396 static int
2397 brw_find_next_block_end(struct brw_compile *p, int start)
2398 {
2399 int ip;
2400 void *store = p->store;
2401
2402 for (ip = next_ip(p, start); ip < p->next_insn_offset; ip = next_ip(p, ip)) {
2403 struct brw_instruction *insn = store + ip;
2404
2405 switch (insn->header.opcode) {
2406 case BRW_OPCODE_ENDIF:
2407 case BRW_OPCODE_ELSE:
2408 case BRW_OPCODE_WHILE:
2409 case BRW_OPCODE_HALT:
2410 return ip;
2411 }
2412 }
2413
2414 return 0;
2415 }
2416
2417 /* There is no DO instruction on gen6, so to find the end of the loop
2418 * we have to see if the loop is jumping back before our start
2419 * instruction.
2420 */
2421 static int
2422 brw_find_loop_end(struct brw_compile *p, int start)
2423 {
2424 struct brw_context *brw = p->brw;
2425 int ip;
2426 int scale = 8;
2427 void *store = p->store;
2428
2429 /* Always start after the instruction (such as a WHILE) we're trying to fix
2430 * up.
2431 */
2432 for (ip = next_ip(p, start); ip < p->next_insn_offset; ip = next_ip(p, ip)) {
2433 struct brw_instruction *insn = store + ip;
2434
2435 if (insn->header.opcode == BRW_OPCODE_WHILE) {
2436 int jip = brw->gen == 6 ? insn->bits1.branch_gen6.jump_count
2437 : insn->bits3.break_cont.jip;
2438 if (ip + jip * scale <= start)
2439 return ip;
2440 }
2441 }
2442 assert(!"not reached");
2443 return start;
2444 }
2445
2446 /* After program generation, go back and update the UIP and JIP of
2447 * BREAK, CONT, and HALT instructions to their correct locations.
2448 */
2449 void
2450 brw_set_uip_jip(struct brw_compile *p)
2451 {
2452 struct brw_context *brw = p->brw;
2453 int ip;
2454 int scale = 8;
2455 void *store = p->store;
2456
2457 if (brw->gen < 6)
2458 return;
2459
2460 for (ip = 0; ip < p->next_insn_offset; ip = next_ip(p, ip)) {
2461 struct brw_instruction *insn = store + ip;
2462
2463 if (insn->header.cmpt_control) {
2464 /* Fixups for compacted BREAK/CONTINUE not supported yet. */
2465 assert(insn->header.opcode != BRW_OPCODE_BREAK &&
2466 insn->header.opcode != BRW_OPCODE_CONTINUE &&
2467 insn->header.opcode != BRW_OPCODE_HALT);
2468 continue;
2469 }
2470
2471 int block_end_ip = brw_find_next_block_end(p, ip);
2472 switch (insn->header.opcode) {
2473 case BRW_OPCODE_BREAK:
2474 assert(block_end_ip != 0);
2475 insn->bits3.break_cont.jip = (block_end_ip - ip) / scale;
2476 /* Gen7 UIP points to WHILE; Gen6 points just after it */
2477 insn->bits3.break_cont.uip =
2478 (brw_find_loop_end(p, ip) - ip +
2479 (brw->gen == 6 ? 16 : 0)) / scale;
2480 break;
2481 case BRW_OPCODE_CONTINUE:
2482 assert(block_end_ip != 0);
2483 insn->bits3.break_cont.jip = (block_end_ip - ip) / scale;
2484 insn->bits3.break_cont.uip =
2485 (brw_find_loop_end(p, ip) - ip) / scale;
2486
2487 assert(insn->bits3.break_cont.uip != 0);
2488 assert(insn->bits3.break_cont.jip != 0);
2489 break;
2490
2491 case BRW_OPCODE_ENDIF:
2492 if (block_end_ip == 0)
2493 insn->bits3.break_cont.jip = 2;
2494 else
2495 insn->bits3.break_cont.jip = (block_end_ip - ip) / scale;
2496 break;
2497
2498 case BRW_OPCODE_HALT:
2499 /* From the Sandy Bridge PRM (volume 4, part 2, section 8.3.19):
2500 *
2501 * "In case of the halt instruction not inside any conditional
2502 * code block, the value of <JIP> and <UIP> should be the
2503 * same. In case of the halt instruction inside conditional code
2504 * block, the <UIP> should be the end of the program, and the
2505 * <JIP> should be end of the most inner conditional code block."
2506 *
2507 * The uip will have already been set by whoever set up the
2508 * instruction.
2509 */
2510 if (block_end_ip == 0) {
2511 insn->bits3.break_cont.jip = insn->bits3.break_cont.uip;
2512 } else {
2513 insn->bits3.break_cont.jip = (block_end_ip - ip) / scale;
2514 }
2515 assert(insn->bits3.break_cont.uip != 0);
2516 assert(insn->bits3.break_cont.jip != 0);
2517 break;
2518 }
2519 }
2520 }
2521
2522 void brw_ff_sync(struct brw_compile *p,
2523 struct brw_reg dest,
2524 unsigned msg_reg_nr,
2525 struct brw_reg src0,
2526 bool allocate,
2527 unsigned response_length,
2528 bool eot)
2529 {
2530 struct brw_context *brw = p->brw;
2531 struct brw_instruction *insn;
2532
2533 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2534
2535 insn = next_insn(p, BRW_OPCODE_SEND);
2536 brw_set_dest(p, insn, dest);
2537 brw_set_src0(p, insn, src0);
2538 brw_set_src1(p, insn, brw_imm_d(0));
2539
2540 if (brw->gen < 6)
2541 insn->header.destreg__conditionalmod = msg_reg_nr;
2542
2543 brw_set_ff_sync_message(p,
2544 insn,
2545 allocate,
2546 response_length,
2547 eot);
2548 }
2549
2550 /**
2551 * Emit the SEND instruction necessary to generate stream output data on Gen6
2552 * (for transform feedback).
2553 *
2554 * If send_commit_msg is true, this is the last piece of stream output data
2555 * from this thread, so send the data as a committed write. According to the
2556 * Sandy Bridge PRM (volume 2 part 1, section 4.5.1):
2557 *
2558 * "Prior to End of Thread with a URB_WRITE, the kernel must ensure all
2559 * writes are complete by sending the final write as a committed write."
2560 */
2561 void
2562 brw_svb_write(struct brw_compile *p,
2563 struct brw_reg dest,
2564 unsigned msg_reg_nr,
2565 struct brw_reg src0,
2566 unsigned binding_table_index,
2567 bool send_commit_msg)
2568 {
2569 struct brw_instruction *insn;
2570
2571 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2572
2573 insn = next_insn(p, BRW_OPCODE_SEND);
2574 brw_set_dest(p, insn, dest);
2575 brw_set_src0(p, insn, src0);
2576 brw_set_src1(p, insn, brw_imm_d(0));
2577 brw_set_dp_write_message(p, insn,
2578 binding_table_index,
2579 0, /* msg_control: ignored */
2580 GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE,
2581 1, /* msg_length */
2582 true, /* header_present */
2583 0, /* last_render_target: ignored */
2584 send_commit_msg, /* response_length */
2585 0, /* end_of_thread */
2586 send_commit_msg); /* send_commit_msg */
2587 }
2588
2589 static void
2590 brw_set_dp_untyped_atomic_message(struct brw_compile *p,
2591 struct brw_instruction *insn,
2592 unsigned atomic_op,
2593 unsigned bind_table_index,
2594 unsigned msg_length,
2595 unsigned response_length,
2596 bool header_present)
2597 {
2598 if (p->brw->is_haswell) {
2599 brw_set_message_descriptor(p, insn, HSW_SFID_DATAPORT_DATA_CACHE_1,
2600 msg_length, response_length,
2601 header_present, false);
2602
2603
2604 if (insn->header.access_mode == BRW_ALIGN_1) {
2605 if (insn->header.execution_size != BRW_EXECUTE_16)
2606 insn->bits3.ud |= 1 << 12; /* SIMD8 mode */
2607
2608 insn->bits3.gen7_dp.msg_type =
2609 HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP;
2610 } else {
2611 insn->bits3.gen7_dp.msg_type =
2612 HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2;
2613 }
2614
2615 } else {
2616 brw_set_message_descriptor(p, insn, GEN7_SFID_DATAPORT_DATA_CACHE,
2617 msg_length, response_length,
2618 header_present, false);
2619
2620 insn->bits3.gen7_dp.msg_type = GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP;
2621
2622 if (insn->header.execution_size != BRW_EXECUTE_16)
2623 insn->bits3.ud |= 1 << 12; /* SIMD8 mode */
2624 }
2625
2626 if (response_length)
2627 insn->bits3.ud |= 1 << 13; /* Return data expected */
2628
2629 insn->bits3.gen7_dp.binding_table_index = bind_table_index;
2630 insn->bits3.ud |= atomic_op << 8;
2631 }
2632
2633 void
2634 brw_untyped_atomic(struct brw_compile *p,
2635 struct brw_reg dest,
2636 struct brw_reg mrf,
2637 unsigned atomic_op,
2638 unsigned bind_table_index,
2639 unsigned msg_length,
2640 unsigned response_length) {
2641 struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND);
2642
2643 brw_set_dest(p, insn, retype(dest, BRW_REGISTER_TYPE_UD));
2644 brw_set_src0(p, insn, retype(mrf, BRW_REGISTER_TYPE_UD));
2645 brw_set_src1(p, insn, brw_imm_d(0));
2646 brw_set_dp_untyped_atomic_message(
2647 p, insn, atomic_op, bind_table_index, msg_length, response_length,
2648 insn->header.access_mode == BRW_ALIGN_1);
2649 }
2650
2651 static void
2652 brw_set_dp_untyped_surface_read_message(struct brw_compile *p,
2653 struct brw_instruction *insn,
2654 unsigned bind_table_index,
2655 unsigned msg_length,
2656 unsigned response_length,
2657 bool header_present)
2658 {
2659 const unsigned dispatch_width =
2660 (insn->header.execution_size == BRW_EXECUTE_16 ? 16 : 8);
2661 const unsigned num_channels = response_length / (dispatch_width / 8);
2662
2663 if (p->brw->is_haswell) {
2664 brw_set_message_descriptor(p, insn, HSW_SFID_DATAPORT_DATA_CACHE_1,
2665 msg_length, response_length,
2666 header_present, false);
2667
2668 insn->bits3.gen7_dp.msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ;
2669 } else {
2670 brw_set_message_descriptor(p, insn, GEN7_SFID_DATAPORT_DATA_CACHE,
2671 msg_length, response_length,
2672 header_present, false);
2673
2674 insn->bits3.gen7_dp.msg_type = GEN7_DATAPORT_DC_UNTYPED_SURFACE_READ;
2675 }
2676
2677 if (insn->header.access_mode == BRW_ALIGN_1) {
2678 if (dispatch_width == 16)
2679 insn->bits3.ud |= 1 << 12; /* SIMD16 mode */
2680 else
2681 insn->bits3.ud |= 2 << 12; /* SIMD8 mode */
2682 }
2683
2684 insn->bits3.gen7_dp.binding_table_index = bind_table_index;
2685
2686 /* Set mask of 32-bit channels to drop. */
2687 insn->bits3.ud |= (0xf & (0xf << num_channels)) << 8;
2688 }
2689
2690 void
2691 brw_untyped_surface_read(struct brw_compile *p,
2692 struct brw_reg dest,
2693 struct brw_reg mrf,
2694 unsigned bind_table_index,
2695 unsigned msg_length,
2696 unsigned response_length)
2697 {
2698 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
2699
2700 brw_set_dest(p, insn, retype(dest, BRW_REGISTER_TYPE_UD));
2701 brw_set_src0(p, insn, retype(mrf, BRW_REGISTER_TYPE_UD));
2702 brw_set_dp_untyped_surface_read_message(
2703 p, insn, bind_table_index, msg_length, response_length,
2704 insn->header.access_mode == BRW_ALIGN_1);
2705 }
2706
2707 /**
2708 * This instruction is generated as a single-channel align1 instruction by
2709 * both the VS and FS stages when using INTEL_DEBUG=shader_time.
2710 *
2711 * We can't use the typed atomic op in the FS because that has the execution
2712 * mask ANDed with the pixel mask, but we just want to write the one dword for
2713 * all the pixels.
2714 *
2715 * We don't use the SIMD4x2 atomic ops in the VS because want to just write
2716 * one u32. So we use the same untyped atomic write message as the pixel
2717 * shader.
2718 *
2719 * The untyped atomic operation requires a BUFFER surface type with RAW
2720 * format, and is only accessible through the legacy DATA_CACHE dataport
2721 * messages.
2722 */
2723 void brw_shader_time_add(struct brw_compile *p,
2724 struct brw_reg payload,
2725 uint32_t surf_index)
2726 {
2727 struct brw_context *brw = p->brw;
2728 assert(brw->gen >= 7);
2729
2730 brw_push_insn_state(p);
2731 brw_set_access_mode(p, BRW_ALIGN_1);
2732 brw_set_mask_control(p, BRW_MASK_DISABLE);
2733 struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND);
2734 brw_pop_insn_state(p);
2735
2736 /* We use brw_vec1_reg and unmasked because we want to increment the given
2737 * offset only once.
2738 */
2739 brw_set_dest(p, send, brw_vec1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
2740 BRW_ARF_NULL, 0));
2741 brw_set_src0(p, send, brw_vec1_reg(payload.file,
2742 payload.nr, 0));
2743 brw_set_dp_untyped_atomic_message(p, send, BRW_AOP_ADD, surf_index,
2744 2 /* message length */,
2745 0 /* response length */,
2746 false /* header present */);
2747 }