i965: Fix flat integral varyings.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_eu_emit.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "brw_context.h"
34 #include "brw_defines.h"
35 #include "brw_eu.h"
36
37 #include "glsl/ralloc.h"
38
39 /***********************************************************************
40 * Internal helper for constructing instructions
41 */
42
43 static void guess_execution_size(struct brw_compile *p,
44 struct brw_instruction *insn,
45 struct brw_reg reg)
46 {
47 if (reg.width == BRW_WIDTH_8 && p->compressed)
48 insn->header.execution_size = BRW_EXECUTE_16;
49 else
50 insn->header.execution_size = reg.width; /* note - definitions are compatible */
51 }
52
53
54 /**
55 * Prior to Sandybridge, the SEND instruction accepted non-MRF source
56 * registers, implicitly moving the operand to a message register.
57 *
58 * On Sandybridge, this is no longer the case. This function performs the
59 * explicit move; it should be called before emitting a SEND instruction.
60 */
61 void
62 gen6_resolve_implied_move(struct brw_compile *p,
63 struct brw_reg *src,
64 GLuint msg_reg_nr)
65 {
66 struct intel_context *intel = &p->brw->intel;
67 if (intel->gen < 6)
68 return;
69
70 if (src->file != BRW_ARCHITECTURE_REGISTER_FILE || src->nr != BRW_ARF_NULL) {
71 brw_push_insn_state(p);
72 brw_set_mask_control(p, BRW_MASK_DISABLE);
73 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
74 brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD),
75 retype(*src, BRW_REGISTER_TYPE_UD));
76 brw_pop_insn_state(p);
77 }
78 *src = brw_message_reg(msg_reg_nr);
79 }
80
81 static void
82 gen7_convert_mrf_to_grf(struct brw_compile *p, struct brw_reg *reg)
83 {
84 struct intel_context *intel = &p->brw->intel;
85 if (intel->gen == 7 && reg->file == BRW_MESSAGE_REGISTER_FILE) {
86 reg->file = BRW_GENERAL_REGISTER_FILE;
87 reg->nr += 111;
88 }
89 }
90
91
92 void
93 brw_set_dest(struct brw_compile *p, struct brw_instruction *insn,
94 struct brw_reg dest)
95 {
96 if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
97 dest.file != BRW_MESSAGE_REGISTER_FILE)
98 assert(dest.nr < 128);
99
100 gen7_convert_mrf_to_grf(p, &dest);
101
102 insn->bits1.da1.dest_reg_file = dest.file;
103 insn->bits1.da1.dest_reg_type = dest.type;
104 insn->bits1.da1.dest_address_mode = dest.address_mode;
105
106 if (dest.address_mode == BRW_ADDRESS_DIRECT) {
107 insn->bits1.da1.dest_reg_nr = dest.nr;
108
109 if (insn->header.access_mode == BRW_ALIGN_1) {
110 insn->bits1.da1.dest_subreg_nr = dest.subnr;
111 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
112 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
113 insn->bits1.da1.dest_horiz_stride = dest.hstride;
114 }
115 else {
116 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
117 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
118 /* even ignored in da16, still need to set as '01' */
119 insn->bits1.da16.dest_horiz_stride = 1;
120 }
121 }
122 else {
123 insn->bits1.ia1.dest_subreg_nr = dest.subnr;
124
125 /* These are different sizes in align1 vs align16:
126 */
127 if (insn->header.access_mode == BRW_ALIGN_1) {
128 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
129 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
130 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
131 insn->bits1.ia1.dest_horiz_stride = dest.hstride;
132 }
133 else {
134 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
135 /* even ignored in da16, still need to set as '01' */
136 insn->bits1.ia16.dest_horiz_stride = 1;
137 }
138 }
139
140 /* NEW: Set the execution size based on dest.width and
141 * insn->compression_control:
142 */
143 guess_execution_size(p, insn, dest);
144 }
145
146 extern int reg_type_size[];
147
148 static void
149 validate_reg(struct brw_instruction *insn, struct brw_reg reg)
150 {
151 int hstride_for_reg[] = {0, 1, 2, 4};
152 int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
153 int width_for_reg[] = {1, 2, 4, 8, 16};
154 int execsize_for_reg[] = {1, 2, 4, 8, 16};
155 int width, hstride, vstride, execsize;
156
157 if (reg.file == BRW_IMMEDIATE_VALUE) {
158 /* 3.3.6: Region Parameters. Restriction: Immediate vectors
159 * mean the destination has to be 128-bit aligned and the
160 * destination horiz stride has to be a word.
161 */
162 if (reg.type == BRW_REGISTER_TYPE_V) {
163 assert(hstride_for_reg[insn->bits1.da1.dest_horiz_stride] *
164 reg_type_size[insn->bits1.da1.dest_reg_type] == 2);
165 }
166
167 return;
168 }
169
170 if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE &&
171 reg.file == BRW_ARF_NULL)
172 return;
173
174 assert(reg.hstride >= 0 && reg.hstride < Elements(hstride_for_reg));
175 hstride = hstride_for_reg[reg.hstride];
176
177 if (reg.vstride == 0xf) {
178 vstride = -1;
179 } else {
180 assert(reg.vstride >= 0 && reg.vstride < Elements(vstride_for_reg));
181 vstride = vstride_for_reg[reg.vstride];
182 }
183
184 assert(reg.width >= 0 && reg.width < Elements(width_for_reg));
185 width = width_for_reg[reg.width];
186
187 assert(insn->header.execution_size >= 0 &&
188 insn->header.execution_size < Elements(execsize_for_reg));
189 execsize = execsize_for_reg[insn->header.execution_size];
190
191 /* Restrictions from 3.3.10: Register Region Restrictions. */
192 /* 3. */
193 assert(execsize >= width);
194
195 /* 4. */
196 if (execsize == width && hstride != 0) {
197 assert(vstride == -1 || vstride == width * hstride);
198 }
199
200 /* 5. */
201 if (execsize == width && hstride == 0) {
202 /* no restriction on vstride. */
203 }
204
205 /* 6. */
206 if (width == 1) {
207 assert(hstride == 0);
208 }
209
210 /* 7. */
211 if (execsize == 1 && width == 1) {
212 assert(hstride == 0);
213 assert(vstride == 0);
214 }
215
216 /* 8. */
217 if (vstride == 0 && hstride == 0) {
218 assert(width == 1);
219 }
220
221 /* 10. Check destination issues. */
222 }
223
224 void
225 brw_set_src0(struct brw_compile *p, struct brw_instruction *insn,
226 struct brw_reg reg)
227 {
228 if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
229 assert(reg.nr < 128);
230
231 gen7_convert_mrf_to_grf(p, &reg);
232
233 validate_reg(insn, reg);
234
235 insn->bits1.da1.src0_reg_file = reg.file;
236 insn->bits1.da1.src0_reg_type = reg.type;
237 insn->bits2.da1.src0_abs = reg.abs;
238 insn->bits2.da1.src0_negate = reg.negate;
239 insn->bits2.da1.src0_address_mode = reg.address_mode;
240
241 if (reg.file == BRW_IMMEDIATE_VALUE) {
242 insn->bits3.ud = reg.dw1.ud;
243
244 /* Required to set some fields in src1 as well:
245 */
246 insn->bits1.da1.src1_reg_file = 0; /* arf */
247 insn->bits1.da1.src1_reg_type = reg.type;
248 }
249 else
250 {
251 if (reg.address_mode == BRW_ADDRESS_DIRECT) {
252 if (insn->header.access_mode == BRW_ALIGN_1) {
253 insn->bits2.da1.src0_subreg_nr = reg.subnr;
254 insn->bits2.da1.src0_reg_nr = reg.nr;
255 }
256 else {
257 insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
258 insn->bits2.da16.src0_reg_nr = reg.nr;
259 }
260 }
261 else {
262 insn->bits2.ia1.src0_subreg_nr = reg.subnr;
263
264 if (insn->header.access_mode == BRW_ALIGN_1) {
265 insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
266 }
267 else {
268 insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
269 }
270 }
271
272 if (insn->header.access_mode == BRW_ALIGN_1) {
273 if (reg.width == BRW_WIDTH_1 &&
274 insn->header.execution_size == BRW_EXECUTE_1) {
275 insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
276 insn->bits2.da1.src0_width = BRW_WIDTH_1;
277 insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
278 }
279 else {
280 insn->bits2.da1.src0_horiz_stride = reg.hstride;
281 insn->bits2.da1.src0_width = reg.width;
282 insn->bits2.da1.src0_vert_stride = reg.vstride;
283 }
284 }
285 else {
286 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
287 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
288 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
289 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
290
291 /* This is an oddity of the fact we're using the same
292 * descriptions for registers in align_16 as align_1:
293 */
294 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
295 insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
296 else
297 insn->bits2.da16.src0_vert_stride = reg.vstride;
298 }
299 }
300 }
301
302
303 void brw_set_src1(struct brw_compile *p,
304 struct brw_instruction *insn,
305 struct brw_reg reg)
306 {
307 assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
308
309 assert(reg.nr < 128);
310
311 gen7_convert_mrf_to_grf(p, &reg);
312
313 validate_reg(insn, reg);
314
315 insn->bits1.da1.src1_reg_file = reg.file;
316 insn->bits1.da1.src1_reg_type = reg.type;
317 insn->bits3.da1.src1_abs = reg.abs;
318 insn->bits3.da1.src1_negate = reg.negate;
319
320 /* Only src1 can be immediate in two-argument instructions.
321 */
322 assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
323
324 if (reg.file == BRW_IMMEDIATE_VALUE) {
325 insn->bits3.ud = reg.dw1.ud;
326 }
327 else {
328 /* This is a hardware restriction, which may or may not be lifted
329 * in the future:
330 */
331 assert (reg.address_mode == BRW_ADDRESS_DIRECT);
332 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
333
334 if (insn->header.access_mode == BRW_ALIGN_1) {
335 insn->bits3.da1.src1_subreg_nr = reg.subnr;
336 insn->bits3.da1.src1_reg_nr = reg.nr;
337 }
338 else {
339 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
340 insn->bits3.da16.src1_reg_nr = reg.nr;
341 }
342
343 if (insn->header.access_mode == BRW_ALIGN_1) {
344 if (reg.width == BRW_WIDTH_1 &&
345 insn->header.execution_size == BRW_EXECUTE_1) {
346 insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
347 insn->bits3.da1.src1_width = BRW_WIDTH_1;
348 insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
349 }
350 else {
351 insn->bits3.da1.src1_horiz_stride = reg.hstride;
352 insn->bits3.da1.src1_width = reg.width;
353 insn->bits3.da1.src1_vert_stride = reg.vstride;
354 }
355 }
356 else {
357 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
358 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
359 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
360 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
361
362 /* This is an oddity of the fact we're using the same
363 * descriptions for registers in align_16 as align_1:
364 */
365 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
366 insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
367 else
368 insn->bits3.da16.src1_vert_stride = reg.vstride;
369 }
370 }
371 }
372
373 /**
374 * Set the Message Descriptor and Extended Message Descriptor fields
375 * for SEND messages.
376 *
377 * \note This zeroes out the Function Control bits, so it must be called
378 * \b before filling out any message-specific data. Callers can
379 * choose not to fill in irrelevant bits; they will be zero.
380 */
381 static void
382 brw_set_message_descriptor(struct brw_compile *p,
383 struct brw_instruction *inst,
384 enum brw_message_target sfid,
385 unsigned msg_length,
386 unsigned response_length,
387 bool header_present,
388 bool end_of_thread)
389 {
390 struct intel_context *intel = &p->brw->intel;
391
392 brw_set_src1(p, inst, brw_imm_d(0));
393
394 if (intel->gen >= 5) {
395 inst->bits3.generic_gen5.header_present = header_present;
396 inst->bits3.generic_gen5.response_length = response_length;
397 inst->bits3.generic_gen5.msg_length = msg_length;
398 inst->bits3.generic_gen5.end_of_thread = end_of_thread;
399
400 if (intel->gen >= 6) {
401 /* On Gen6+ Message target/SFID goes in bits 27:24 of the header */
402 inst->header.destreg__conditionalmod = sfid;
403 } else {
404 /* Set Extended Message Descriptor (ex_desc) */
405 inst->bits2.send_gen5.sfid = sfid;
406 inst->bits2.send_gen5.end_of_thread = end_of_thread;
407 }
408 } else {
409 inst->bits3.generic.response_length = response_length;
410 inst->bits3.generic.msg_length = msg_length;
411 inst->bits3.generic.msg_target = sfid;
412 inst->bits3.generic.end_of_thread = end_of_thread;
413 }
414 }
415
416 static void brw_set_math_message( struct brw_compile *p,
417 struct brw_instruction *insn,
418 GLuint function,
419 GLuint integer_type,
420 bool low_precision,
421 bool saturate,
422 GLuint dataType )
423 {
424 struct brw_context *brw = p->brw;
425 struct intel_context *intel = &brw->intel;
426 unsigned msg_length;
427 unsigned response_length;
428
429 /* Infer message length from the function */
430 switch (function) {
431 case BRW_MATH_FUNCTION_POW:
432 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT:
433 case BRW_MATH_FUNCTION_INT_DIV_REMAINDER:
434 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
435 msg_length = 2;
436 break;
437 default:
438 msg_length = 1;
439 break;
440 }
441
442 /* Infer response length from the function */
443 switch (function) {
444 case BRW_MATH_FUNCTION_SINCOS:
445 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
446 response_length = 2;
447 break;
448 default:
449 response_length = 1;
450 break;
451 }
452
453 brw_set_message_descriptor(p, insn, BRW_SFID_MATH,
454 msg_length, response_length, false, false);
455 if (intel->gen == 5) {
456 insn->bits3.math_gen5.function = function;
457 insn->bits3.math_gen5.int_type = integer_type;
458 insn->bits3.math_gen5.precision = low_precision;
459 insn->bits3.math_gen5.saturate = saturate;
460 insn->bits3.math_gen5.data_type = dataType;
461 insn->bits3.math_gen5.snapshot = 0;
462 } else {
463 insn->bits3.math.function = function;
464 insn->bits3.math.int_type = integer_type;
465 insn->bits3.math.precision = low_precision;
466 insn->bits3.math.saturate = saturate;
467 insn->bits3.math.data_type = dataType;
468 }
469 }
470
471
472 static void brw_set_ff_sync_message(struct brw_compile *p,
473 struct brw_instruction *insn,
474 bool allocate,
475 GLuint response_length,
476 bool end_of_thread)
477 {
478 brw_set_message_descriptor(p, insn, BRW_SFID_URB,
479 1, response_length, true, end_of_thread);
480 insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */
481 insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */
482 insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */
483 insn->bits3.urb_gen5.allocate = allocate;
484 insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */
485 insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */
486 }
487
488 static void brw_set_urb_message( struct brw_compile *p,
489 struct brw_instruction *insn,
490 bool allocate,
491 bool used,
492 GLuint msg_length,
493 GLuint response_length,
494 bool end_of_thread,
495 bool complete,
496 GLuint offset,
497 GLuint swizzle_control )
498 {
499 struct brw_context *brw = p->brw;
500 struct intel_context *intel = &brw->intel;
501
502 brw_set_message_descriptor(p, insn, BRW_SFID_URB,
503 msg_length, response_length, true, end_of_thread);
504 if (intel->gen == 7) {
505 insn->bits3.urb_gen7.opcode = 0; /* URB_WRITE_HWORD */
506 insn->bits3.urb_gen7.offset = offset;
507 assert(swizzle_control != BRW_URB_SWIZZLE_TRANSPOSE);
508 insn->bits3.urb_gen7.swizzle_control = swizzle_control;
509 /* per_slot_offset = 0 makes it ignore offsets in message header */
510 insn->bits3.urb_gen7.per_slot_offset = 0;
511 insn->bits3.urb_gen7.complete = complete;
512 } else if (intel->gen >= 5) {
513 insn->bits3.urb_gen5.opcode = 0; /* URB_WRITE */
514 insn->bits3.urb_gen5.offset = offset;
515 insn->bits3.urb_gen5.swizzle_control = swizzle_control;
516 insn->bits3.urb_gen5.allocate = allocate;
517 insn->bits3.urb_gen5.used = used; /* ? */
518 insn->bits3.urb_gen5.complete = complete;
519 } else {
520 insn->bits3.urb.opcode = 0; /* ? */
521 insn->bits3.urb.offset = offset;
522 insn->bits3.urb.swizzle_control = swizzle_control;
523 insn->bits3.urb.allocate = allocate;
524 insn->bits3.urb.used = used; /* ? */
525 insn->bits3.urb.complete = complete;
526 }
527 }
528
529 void
530 brw_set_dp_write_message(struct brw_compile *p,
531 struct brw_instruction *insn,
532 GLuint binding_table_index,
533 GLuint msg_control,
534 GLuint msg_type,
535 GLuint msg_length,
536 bool header_present,
537 GLuint last_render_target,
538 GLuint response_length,
539 GLuint end_of_thread,
540 GLuint send_commit_msg)
541 {
542 struct brw_context *brw = p->brw;
543 struct intel_context *intel = &brw->intel;
544 unsigned sfid;
545
546 if (intel->gen >= 7) {
547 /* Use the Render Cache for RT writes; otherwise use the Data Cache */
548 if (msg_type == GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE)
549 sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
550 else
551 sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
552 } else if (intel->gen == 6) {
553 /* Use the render cache for all write messages. */
554 sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
555 } else {
556 sfid = BRW_SFID_DATAPORT_WRITE;
557 }
558
559 brw_set_message_descriptor(p, insn, sfid, msg_length, response_length,
560 header_present, end_of_thread);
561
562 if (intel->gen >= 7) {
563 insn->bits3.gen7_dp.binding_table_index = binding_table_index;
564 insn->bits3.gen7_dp.msg_control = msg_control;
565 insn->bits3.gen7_dp.last_render_target = last_render_target;
566 insn->bits3.gen7_dp.msg_type = msg_type;
567 } else if (intel->gen == 6) {
568 insn->bits3.gen6_dp.binding_table_index = binding_table_index;
569 insn->bits3.gen6_dp.msg_control = msg_control;
570 insn->bits3.gen6_dp.last_render_target = last_render_target;
571 insn->bits3.gen6_dp.msg_type = msg_type;
572 insn->bits3.gen6_dp.send_commit_msg = send_commit_msg;
573 } else if (intel->gen == 5) {
574 insn->bits3.dp_write_gen5.binding_table_index = binding_table_index;
575 insn->bits3.dp_write_gen5.msg_control = msg_control;
576 insn->bits3.dp_write_gen5.last_render_target = last_render_target;
577 insn->bits3.dp_write_gen5.msg_type = msg_type;
578 insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg;
579 } else {
580 insn->bits3.dp_write.binding_table_index = binding_table_index;
581 insn->bits3.dp_write.msg_control = msg_control;
582 insn->bits3.dp_write.last_render_target = last_render_target;
583 insn->bits3.dp_write.msg_type = msg_type;
584 insn->bits3.dp_write.send_commit_msg = send_commit_msg;
585 }
586 }
587
588 void
589 brw_set_dp_read_message(struct brw_compile *p,
590 struct brw_instruction *insn,
591 GLuint binding_table_index,
592 GLuint msg_control,
593 GLuint msg_type,
594 GLuint target_cache,
595 GLuint msg_length,
596 GLuint response_length)
597 {
598 struct brw_context *brw = p->brw;
599 struct intel_context *intel = &brw->intel;
600 unsigned sfid;
601
602 if (intel->gen >= 7) {
603 sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
604 } else if (intel->gen == 6) {
605 if (target_cache == BRW_DATAPORT_READ_TARGET_RENDER_CACHE)
606 sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
607 else
608 sfid = GEN6_SFID_DATAPORT_SAMPLER_CACHE;
609 } else {
610 sfid = BRW_SFID_DATAPORT_READ;
611 }
612
613 brw_set_message_descriptor(p, insn, sfid, msg_length, response_length,
614 true, false);
615
616 if (intel->gen >= 7) {
617 insn->bits3.gen7_dp.binding_table_index = binding_table_index;
618 insn->bits3.gen7_dp.msg_control = msg_control;
619 insn->bits3.gen7_dp.last_render_target = 0;
620 insn->bits3.gen7_dp.msg_type = msg_type;
621 } else if (intel->gen == 6) {
622 insn->bits3.gen6_dp.binding_table_index = binding_table_index;
623 insn->bits3.gen6_dp.msg_control = msg_control;
624 insn->bits3.gen6_dp.last_render_target = 0;
625 insn->bits3.gen6_dp.msg_type = msg_type;
626 insn->bits3.gen6_dp.send_commit_msg = 0;
627 } else if (intel->gen == 5) {
628 insn->bits3.dp_read_gen5.binding_table_index = binding_table_index;
629 insn->bits3.dp_read_gen5.msg_control = msg_control;
630 insn->bits3.dp_read_gen5.msg_type = msg_type;
631 insn->bits3.dp_read_gen5.target_cache = target_cache;
632 } else if (intel->is_g4x) {
633 insn->bits3.dp_read_g4x.binding_table_index = binding_table_index; /*0:7*/
634 insn->bits3.dp_read_g4x.msg_control = msg_control; /*8:10*/
635 insn->bits3.dp_read_g4x.msg_type = msg_type; /*11:13*/
636 insn->bits3.dp_read_g4x.target_cache = target_cache; /*14:15*/
637 } else {
638 insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
639 insn->bits3.dp_read.msg_control = msg_control; /*8:11*/
640 insn->bits3.dp_read.msg_type = msg_type; /*12:13*/
641 insn->bits3.dp_read.target_cache = target_cache; /*14:15*/
642 }
643 }
644
645 static void brw_set_sampler_message(struct brw_compile *p,
646 struct brw_instruction *insn,
647 GLuint binding_table_index,
648 GLuint sampler,
649 GLuint msg_type,
650 GLuint response_length,
651 GLuint msg_length,
652 GLuint header_present,
653 GLuint simd_mode)
654 {
655 struct brw_context *brw = p->brw;
656 struct intel_context *intel = &brw->intel;
657
658 brw_set_message_descriptor(p, insn, BRW_SFID_SAMPLER, msg_length,
659 response_length, header_present, false);
660
661 if (intel->gen >= 7) {
662 insn->bits3.sampler_gen7.binding_table_index = binding_table_index;
663 insn->bits3.sampler_gen7.sampler = sampler;
664 insn->bits3.sampler_gen7.msg_type = msg_type;
665 insn->bits3.sampler_gen7.simd_mode = simd_mode;
666 } else if (intel->gen >= 5) {
667 insn->bits3.sampler_gen5.binding_table_index = binding_table_index;
668 insn->bits3.sampler_gen5.sampler = sampler;
669 insn->bits3.sampler_gen5.msg_type = msg_type;
670 insn->bits3.sampler_gen5.simd_mode = simd_mode;
671 } else if (intel->is_g4x) {
672 insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
673 insn->bits3.sampler_g4x.sampler = sampler;
674 insn->bits3.sampler_g4x.msg_type = msg_type;
675 } else {
676 insn->bits3.sampler.binding_table_index = binding_table_index;
677 insn->bits3.sampler.sampler = sampler;
678 insn->bits3.sampler.msg_type = msg_type;
679 insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
680 }
681 }
682
683
684 #define next_insn brw_next_insn
685 struct brw_instruction *
686 brw_next_insn(struct brw_compile *p, GLuint opcode)
687 {
688 struct brw_instruction *insn;
689
690 assert(p->nr_insn + 1 < BRW_EU_MAX_INSN);
691
692 insn = &p->store[p->nr_insn++];
693 memcpy(insn, p->current, sizeof(*insn));
694
695 /* Reset this one-shot flag:
696 */
697
698 if (p->current->header.destreg__conditionalmod) {
699 p->current->header.destreg__conditionalmod = 0;
700 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
701 }
702
703 insn->header.opcode = opcode;
704 return insn;
705 }
706
707 static struct brw_instruction *brw_alu1( struct brw_compile *p,
708 GLuint opcode,
709 struct brw_reg dest,
710 struct brw_reg src )
711 {
712 struct brw_instruction *insn = next_insn(p, opcode);
713 brw_set_dest(p, insn, dest);
714 brw_set_src0(p, insn, src);
715 return insn;
716 }
717
718 static struct brw_instruction *brw_alu2(struct brw_compile *p,
719 GLuint opcode,
720 struct brw_reg dest,
721 struct brw_reg src0,
722 struct brw_reg src1 )
723 {
724 struct brw_instruction *insn = next_insn(p, opcode);
725 brw_set_dest(p, insn, dest);
726 brw_set_src0(p, insn, src0);
727 brw_set_src1(p, insn, src1);
728 return insn;
729 }
730
731
732 /***********************************************************************
733 * Convenience routines.
734 */
735 #define ALU1(OP) \
736 struct brw_instruction *brw_##OP(struct brw_compile *p, \
737 struct brw_reg dest, \
738 struct brw_reg src0) \
739 { \
740 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
741 }
742
743 #define ALU2(OP) \
744 struct brw_instruction *brw_##OP(struct brw_compile *p, \
745 struct brw_reg dest, \
746 struct brw_reg src0, \
747 struct brw_reg src1) \
748 { \
749 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
750 }
751
752 /* Rounding operations (other than RNDD) require two instructions - the first
753 * stores a rounded value (possibly the wrong way) in the dest register, but
754 * also sets a per-channel "increment bit" in the flag register. A predicated
755 * add of 1.0 fixes dest to contain the desired result.
756 *
757 * Sandybridge and later appear to round correctly without an ADD.
758 */
759 #define ROUND(OP) \
760 void brw_##OP(struct brw_compile *p, \
761 struct brw_reg dest, \
762 struct brw_reg src) \
763 { \
764 struct brw_instruction *rnd, *add; \
765 rnd = next_insn(p, BRW_OPCODE_##OP); \
766 brw_set_dest(p, rnd, dest); \
767 brw_set_src0(p, rnd, src); \
768 \
769 if (p->brw->intel.gen < 6) { \
770 /* turn on round-increments */ \
771 rnd->header.destreg__conditionalmod = BRW_CONDITIONAL_R; \
772 add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \
773 add->header.predicate_control = BRW_PREDICATE_NORMAL; \
774 } \
775 }
776
777
778 ALU1(MOV)
779 ALU2(SEL)
780 ALU1(NOT)
781 ALU2(AND)
782 ALU2(OR)
783 ALU2(XOR)
784 ALU2(SHR)
785 ALU2(SHL)
786 ALU2(RSR)
787 ALU2(RSL)
788 ALU2(ASR)
789 ALU1(FRC)
790 ALU1(RNDD)
791 ALU2(MAC)
792 ALU2(MACH)
793 ALU1(LZD)
794 ALU2(DP4)
795 ALU2(DPH)
796 ALU2(DP3)
797 ALU2(DP2)
798 ALU2(LINE)
799 ALU2(PLN)
800
801
802 ROUND(RNDZ)
803 ROUND(RNDE)
804
805
806 struct brw_instruction *brw_ADD(struct brw_compile *p,
807 struct brw_reg dest,
808 struct brw_reg src0,
809 struct brw_reg src1)
810 {
811 /* 6.2.2: add */
812 if (src0.type == BRW_REGISTER_TYPE_F ||
813 (src0.file == BRW_IMMEDIATE_VALUE &&
814 src0.type == BRW_REGISTER_TYPE_VF)) {
815 assert(src1.type != BRW_REGISTER_TYPE_UD);
816 assert(src1.type != BRW_REGISTER_TYPE_D);
817 }
818
819 if (src1.type == BRW_REGISTER_TYPE_F ||
820 (src1.file == BRW_IMMEDIATE_VALUE &&
821 src1.type == BRW_REGISTER_TYPE_VF)) {
822 assert(src0.type != BRW_REGISTER_TYPE_UD);
823 assert(src0.type != BRW_REGISTER_TYPE_D);
824 }
825
826 return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1);
827 }
828
829 struct brw_instruction *brw_MUL(struct brw_compile *p,
830 struct brw_reg dest,
831 struct brw_reg src0,
832 struct brw_reg src1)
833 {
834 /* 6.32.38: mul */
835 if (src0.type == BRW_REGISTER_TYPE_D ||
836 src0.type == BRW_REGISTER_TYPE_UD ||
837 src1.type == BRW_REGISTER_TYPE_D ||
838 src1.type == BRW_REGISTER_TYPE_UD) {
839 assert(dest.type != BRW_REGISTER_TYPE_F);
840 }
841
842 if (src0.type == BRW_REGISTER_TYPE_F ||
843 (src0.file == BRW_IMMEDIATE_VALUE &&
844 src0.type == BRW_REGISTER_TYPE_VF)) {
845 assert(src1.type != BRW_REGISTER_TYPE_UD);
846 assert(src1.type != BRW_REGISTER_TYPE_D);
847 }
848
849 if (src1.type == BRW_REGISTER_TYPE_F ||
850 (src1.file == BRW_IMMEDIATE_VALUE &&
851 src1.type == BRW_REGISTER_TYPE_VF)) {
852 assert(src0.type != BRW_REGISTER_TYPE_UD);
853 assert(src0.type != BRW_REGISTER_TYPE_D);
854 }
855
856 assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE ||
857 src0.nr != BRW_ARF_ACCUMULATOR);
858 assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE ||
859 src1.nr != BRW_ARF_ACCUMULATOR);
860
861 return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1);
862 }
863
864
865 void brw_NOP(struct brw_compile *p)
866 {
867 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);
868 brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
869 brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
870 brw_set_src1(p, insn, brw_imm_ud(0x0));
871 }
872
873
874
875
876
877 /***********************************************************************
878 * Comparisons, if/else/endif
879 */
880
881 struct brw_instruction *brw_JMPI(struct brw_compile *p,
882 struct brw_reg dest,
883 struct brw_reg src0,
884 struct brw_reg src1)
885 {
886 struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
887
888 insn->header.execution_size = 1;
889 insn->header.compression_control = BRW_COMPRESSION_NONE;
890 insn->header.mask_control = BRW_MASK_DISABLE;
891
892 p->current->header.predicate_control = BRW_PREDICATE_NONE;
893
894 return insn;
895 }
896
897 static void
898 push_if_stack(struct brw_compile *p, struct brw_instruction *inst)
899 {
900 p->if_stack[p->if_stack_depth] = inst;
901
902 p->if_stack_depth++;
903 if (p->if_stack_array_size <= p->if_stack_depth) {
904 p->if_stack_array_size *= 2;
905 p->if_stack = reralloc(p->mem_ctx, p->if_stack, struct brw_instruction *,
906 p->if_stack_array_size);
907 }
908 }
909
910 /* EU takes the value from the flag register and pushes it onto some
911 * sort of a stack (presumably merging with any flag value already on
912 * the stack). Within an if block, the flags at the top of the stack
913 * control execution on each channel of the unit, eg. on each of the
914 * 16 pixel values in our wm programs.
915 *
916 * When the matching 'else' instruction is reached (presumably by
917 * countdown of the instruction count patched in by our ELSE/ENDIF
918 * functions), the relevent flags are inverted.
919 *
920 * When the matching 'endif' instruction is reached, the flags are
921 * popped off. If the stack is now empty, normal execution resumes.
922 */
923 struct brw_instruction *
924 brw_IF(struct brw_compile *p, GLuint execute_size)
925 {
926 struct intel_context *intel = &p->brw->intel;
927 struct brw_instruction *insn;
928
929 insn = next_insn(p, BRW_OPCODE_IF);
930
931 /* Override the defaults for this instruction:
932 */
933 if (intel->gen < 6) {
934 brw_set_dest(p, insn, brw_ip_reg());
935 brw_set_src0(p, insn, brw_ip_reg());
936 brw_set_src1(p, insn, brw_imm_d(0x0));
937 } else if (intel->gen == 6) {
938 brw_set_dest(p, insn, brw_imm_w(0));
939 insn->bits1.branch_gen6.jump_count = 0;
940 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
941 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
942 } else {
943 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
944 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
945 brw_set_src1(p, insn, brw_imm_ud(0));
946 insn->bits3.break_cont.jip = 0;
947 insn->bits3.break_cont.uip = 0;
948 }
949
950 insn->header.execution_size = execute_size;
951 insn->header.compression_control = BRW_COMPRESSION_NONE;
952 insn->header.predicate_control = BRW_PREDICATE_NORMAL;
953 insn->header.mask_control = BRW_MASK_ENABLE;
954 if (!p->single_program_flow)
955 insn->header.thread_control = BRW_THREAD_SWITCH;
956
957 p->current->header.predicate_control = BRW_PREDICATE_NONE;
958
959 push_if_stack(p, insn);
960 return insn;
961 }
962
963 /* This function is only used for gen6-style IF instructions with an
964 * embedded comparison (conditional modifier). It is not used on gen7.
965 */
966 struct brw_instruction *
967 gen6_IF(struct brw_compile *p, uint32_t conditional,
968 struct brw_reg src0, struct brw_reg src1)
969 {
970 struct brw_instruction *insn;
971
972 insn = next_insn(p, BRW_OPCODE_IF);
973
974 brw_set_dest(p, insn, brw_imm_w(0));
975 if (p->compressed) {
976 insn->header.execution_size = BRW_EXECUTE_16;
977 } else {
978 insn->header.execution_size = BRW_EXECUTE_8;
979 }
980 insn->bits1.branch_gen6.jump_count = 0;
981 brw_set_src0(p, insn, src0);
982 brw_set_src1(p, insn, src1);
983
984 assert(insn->header.compression_control == BRW_COMPRESSION_NONE);
985 assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
986 insn->header.destreg__conditionalmod = conditional;
987
988 if (!p->single_program_flow)
989 insn->header.thread_control = BRW_THREAD_SWITCH;
990
991 push_if_stack(p, insn);
992 return insn;
993 }
994
995 /**
996 * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs.
997 */
998 static void
999 convert_IF_ELSE_to_ADD(struct brw_compile *p,
1000 struct brw_instruction *if_inst,
1001 struct brw_instruction *else_inst)
1002 {
1003 /* The next instruction (where the ENDIF would be, if it existed) */
1004 struct brw_instruction *next_inst = &p->store[p->nr_insn];
1005
1006 assert(p->single_program_flow);
1007 assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
1008 assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
1009 assert(if_inst->header.execution_size == BRW_EXECUTE_1);
1010
1011 /* Convert IF to an ADD instruction that moves the instruction pointer
1012 * to the first instruction of the ELSE block. If there is no ELSE
1013 * block, point to where ENDIF would be. Reverse the predicate.
1014 *
1015 * There's no need to execute an ENDIF since we don't need to do any
1016 * stack operations, and if we're currently executing, we just want to
1017 * continue normally.
1018 */
1019 if_inst->header.opcode = BRW_OPCODE_ADD;
1020 if_inst->header.predicate_inverse = 1;
1021
1022 if (else_inst != NULL) {
1023 /* Convert ELSE to an ADD instruction that points where the ENDIF
1024 * would be.
1025 */
1026 else_inst->header.opcode = BRW_OPCODE_ADD;
1027
1028 if_inst->bits3.ud = (else_inst - if_inst + 1) * 16;
1029 else_inst->bits3.ud = (next_inst - else_inst) * 16;
1030 } else {
1031 if_inst->bits3.ud = (next_inst - if_inst) * 16;
1032 }
1033 }
1034
1035 /**
1036 * Patch IF and ELSE instructions with appropriate jump targets.
1037 */
1038 static void
1039 patch_IF_ELSE(struct brw_compile *p,
1040 struct brw_instruction *if_inst,
1041 struct brw_instruction *else_inst,
1042 struct brw_instruction *endif_inst)
1043 {
1044 struct intel_context *intel = &p->brw->intel;
1045
1046 assert(!p->single_program_flow);
1047 assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
1048 assert(endif_inst != NULL);
1049 assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
1050
1051 unsigned br = 1;
1052 /* Jump count is for 64bit data chunk each, so one 128bit instruction
1053 * requires 2 chunks.
1054 */
1055 if (intel->gen >= 5)
1056 br = 2;
1057
1058 assert(endif_inst->header.opcode == BRW_OPCODE_ENDIF);
1059 endif_inst->header.execution_size = if_inst->header.execution_size;
1060
1061 if (else_inst == NULL) {
1062 /* Patch IF -> ENDIF */
1063 if (intel->gen < 6) {
1064 /* Turn it into an IFF, which means no mask stack operations for
1065 * all-false and jumping past the ENDIF.
1066 */
1067 if_inst->header.opcode = BRW_OPCODE_IFF;
1068 if_inst->bits3.if_else.jump_count = br * (endif_inst - if_inst + 1);
1069 if_inst->bits3.if_else.pop_count = 0;
1070 if_inst->bits3.if_else.pad0 = 0;
1071 } else if (intel->gen == 6) {
1072 /* As of gen6, there is no IFF and IF must point to the ENDIF. */
1073 if_inst->bits1.branch_gen6.jump_count = br * (endif_inst - if_inst);
1074 } else {
1075 if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
1076 if_inst->bits3.break_cont.jip = br * (endif_inst - if_inst);
1077 }
1078 } else {
1079 else_inst->header.execution_size = if_inst->header.execution_size;
1080
1081 /* Patch IF -> ELSE */
1082 if (intel->gen < 6) {
1083 if_inst->bits3.if_else.jump_count = br * (else_inst - if_inst);
1084 if_inst->bits3.if_else.pop_count = 0;
1085 if_inst->bits3.if_else.pad0 = 0;
1086 } else if (intel->gen == 6) {
1087 if_inst->bits1.branch_gen6.jump_count = br * (else_inst - if_inst + 1);
1088 }
1089
1090 /* Patch ELSE -> ENDIF */
1091 if (intel->gen < 6) {
1092 /* BRW_OPCODE_ELSE pre-gen6 should point just past the
1093 * matching ENDIF.
1094 */
1095 else_inst->bits3.if_else.jump_count = br*(endif_inst - else_inst + 1);
1096 else_inst->bits3.if_else.pop_count = 1;
1097 else_inst->bits3.if_else.pad0 = 0;
1098 } else if (intel->gen == 6) {
1099 /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
1100 else_inst->bits1.branch_gen6.jump_count = br*(endif_inst - else_inst);
1101 } else {
1102 /* The IF instruction's JIP should point just past the ELSE */
1103 if_inst->bits3.break_cont.jip = br * (else_inst - if_inst + 1);
1104 /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
1105 if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
1106 else_inst->bits3.break_cont.jip = br * (endif_inst - else_inst);
1107 }
1108 }
1109 }
1110
1111 void
1112 brw_ELSE(struct brw_compile *p)
1113 {
1114 struct intel_context *intel = &p->brw->intel;
1115 struct brw_instruction *insn;
1116
1117 insn = next_insn(p, BRW_OPCODE_ELSE);
1118
1119 if (intel->gen < 6) {
1120 brw_set_dest(p, insn, brw_ip_reg());
1121 brw_set_src0(p, insn, brw_ip_reg());
1122 brw_set_src1(p, insn, brw_imm_d(0x0));
1123 } else if (intel->gen == 6) {
1124 brw_set_dest(p, insn, brw_imm_w(0));
1125 insn->bits1.branch_gen6.jump_count = 0;
1126 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1127 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1128 } else {
1129 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1130 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1131 brw_set_src1(p, insn, brw_imm_ud(0));
1132 insn->bits3.break_cont.jip = 0;
1133 insn->bits3.break_cont.uip = 0;
1134 }
1135
1136 insn->header.compression_control = BRW_COMPRESSION_NONE;
1137 insn->header.mask_control = BRW_MASK_ENABLE;
1138 if (!p->single_program_flow)
1139 insn->header.thread_control = BRW_THREAD_SWITCH;
1140
1141 push_if_stack(p, insn);
1142 }
1143
1144 void
1145 brw_ENDIF(struct brw_compile *p)
1146 {
1147 struct intel_context *intel = &p->brw->intel;
1148 struct brw_instruction *insn;
1149 struct brw_instruction *else_inst = NULL;
1150 struct brw_instruction *if_inst = NULL;
1151
1152 /* Pop the IF and (optional) ELSE instructions from the stack */
1153 p->if_stack_depth--;
1154 if (p->if_stack[p->if_stack_depth]->header.opcode == BRW_OPCODE_ELSE) {
1155 else_inst = p->if_stack[p->if_stack_depth];
1156 p->if_stack_depth--;
1157 }
1158 if_inst = p->if_stack[p->if_stack_depth];
1159
1160 if (p->single_program_flow) {
1161 /* ENDIF is useless; don't bother emitting it. */
1162 convert_IF_ELSE_to_ADD(p, if_inst, else_inst);
1163 return;
1164 }
1165
1166 insn = next_insn(p, BRW_OPCODE_ENDIF);
1167
1168 if (intel->gen < 6) {
1169 brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
1170 brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
1171 brw_set_src1(p, insn, brw_imm_d(0x0));
1172 } else if (intel->gen == 6) {
1173 brw_set_dest(p, insn, brw_imm_w(0));
1174 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1175 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1176 } else {
1177 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1178 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1179 brw_set_src1(p, insn, brw_imm_ud(0));
1180 }
1181
1182 insn->header.compression_control = BRW_COMPRESSION_NONE;
1183 insn->header.mask_control = BRW_MASK_ENABLE;
1184 insn->header.thread_control = BRW_THREAD_SWITCH;
1185
1186 /* Also pop item off the stack in the endif instruction: */
1187 if (intel->gen < 6) {
1188 insn->bits3.if_else.jump_count = 0;
1189 insn->bits3.if_else.pop_count = 1;
1190 insn->bits3.if_else.pad0 = 0;
1191 } else if (intel->gen == 6) {
1192 insn->bits1.branch_gen6.jump_count = 2;
1193 } else {
1194 insn->bits3.break_cont.jip = 2;
1195 }
1196 patch_IF_ELSE(p, if_inst, else_inst, insn);
1197 }
1198
1199 struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count)
1200 {
1201 struct intel_context *intel = &p->brw->intel;
1202 struct brw_instruction *insn;
1203
1204 insn = next_insn(p, BRW_OPCODE_BREAK);
1205 if (intel->gen >= 6) {
1206 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1207 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1208 brw_set_src1(p, insn, brw_imm_d(0x0));
1209 } else {
1210 brw_set_dest(p, insn, brw_ip_reg());
1211 brw_set_src0(p, insn, brw_ip_reg());
1212 brw_set_src1(p, insn, brw_imm_d(0x0));
1213 insn->bits3.if_else.pad0 = 0;
1214 insn->bits3.if_else.pop_count = pop_count;
1215 }
1216 insn->header.compression_control = BRW_COMPRESSION_NONE;
1217 insn->header.execution_size = BRW_EXECUTE_8;
1218
1219 return insn;
1220 }
1221
1222 struct brw_instruction *gen6_CONT(struct brw_compile *p,
1223 struct brw_instruction *do_insn)
1224 {
1225 struct brw_instruction *insn;
1226
1227 insn = next_insn(p, BRW_OPCODE_CONTINUE);
1228 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1229 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1230 brw_set_dest(p, insn, brw_ip_reg());
1231 brw_set_src0(p, insn, brw_ip_reg());
1232 brw_set_src1(p, insn, brw_imm_d(0x0));
1233
1234 insn->header.compression_control = BRW_COMPRESSION_NONE;
1235 insn->header.execution_size = BRW_EXECUTE_8;
1236 return insn;
1237 }
1238
1239 struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count)
1240 {
1241 struct brw_instruction *insn;
1242 insn = next_insn(p, BRW_OPCODE_CONTINUE);
1243 brw_set_dest(p, insn, brw_ip_reg());
1244 brw_set_src0(p, insn, brw_ip_reg());
1245 brw_set_src1(p, insn, brw_imm_d(0x0));
1246 insn->header.compression_control = BRW_COMPRESSION_NONE;
1247 insn->header.execution_size = BRW_EXECUTE_8;
1248 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1249 insn->bits3.if_else.pad0 = 0;
1250 insn->bits3.if_else.pop_count = pop_count;
1251 return insn;
1252 }
1253
1254 /* DO/WHILE loop:
1255 *
1256 * The DO/WHILE is just an unterminated loop -- break or continue are
1257 * used for control within the loop. We have a few ways they can be
1258 * done.
1259 *
1260 * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
1261 * jip and no DO instruction.
1262 *
1263 * For non-uniform control flow pre-gen6, there's a DO instruction to
1264 * push the mask, and a WHILE to jump back, and BREAK to get out and
1265 * pop the mask.
1266 *
1267 * For gen6, there's no more mask stack, so no need for DO. WHILE
1268 * just points back to the first instruction of the loop.
1269 */
1270 struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
1271 {
1272 struct intel_context *intel = &p->brw->intel;
1273
1274 if (intel->gen >= 6 || p->single_program_flow) {
1275 return &p->store[p->nr_insn];
1276 } else {
1277 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
1278
1279 /* Override the defaults for this instruction:
1280 */
1281 brw_set_dest(p, insn, brw_null_reg());
1282 brw_set_src0(p, insn, brw_null_reg());
1283 brw_set_src1(p, insn, brw_null_reg());
1284
1285 insn->header.compression_control = BRW_COMPRESSION_NONE;
1286 insn->header.execution_size = execute_size;
1287 insn->header.predicate_control = BRW_PREDICATE_NONE;
1288 /* insn->header.mask_control = BRW_MASK_ENABLE; */
1289 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1290
1291 return insn;
1292 }
1293 }
1294
1295
1296
1297 struct brw_instruction *brw_WHILE(struct brw_compile *p,
1298 struct brw_instruction *do_insn)
1299 {
1300 struct intel_context *intel = &p->brw->intel;
1301 struct brw_instruction *insn;
1302 GLuint br = 1;
1303
1304 if (intel->gen >= 5)
1305 br = 2;
1306
1307 if (intel->gen >= 7) {
1308 insn = next_insn(p, BRW_OPCODE_WHILE);
1309
1310 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1311 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1312 brw_set_src1(p, insn, brw_imm_ud(0));
1313 insn->bits3.break_cont.jip = br * (do_insn - insn);
1314
1315 insn->header.execution_size = BRW_EXECUTE_8;
1316 } else if (intel->gen == 6) {
1317 insn = next_insn(p, BRW_OPCODE_WHILE);
1318
1319 brw_set_dest(p, insn, brw_imm_w(0));
1320 insn->bits1.branch_gen6.jump_count = br * (do_insn - insn);
1321 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1322 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1323
1324 insn->header.execution_size = BRW_EXECUTE_8;
1325 } else {
1326 if (p->single_program_flow) {
1327 insn = next_insn(p, BRW_OPCODE_ADD);
1328
1329 brw_set_dest(p, insn, brw_ip_reg());
1330 brw_set_src0(p, insn, brw_ip_reg());
1331 brw_set_src1(p, insn, brw_imm_d((do_insn - insn) * 16));
1332 insn->header.execution_size = BRW_EXECUTE_1;
1333 } else {
1334 insn = next_insn(p, BRW_OPCODE_WHILE);
1335
1336 assert(do_insn->header.opcode == BRW_OPCODE_DO);
1337
1338 brw_set_dest(p, insn, brw_ip_reg());
1339 brw_set_src0(p, insn, brw_ip_reg());
1340 brw_set_src1(p, insn, brw_imm_d(0));
1341
1342 insn->header.execution_size = do_insn->header.execution_size;
1343 insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
1344 insn->bits3.if_else.pop_count = 0;
1345 insn->bits3.if_else.pad0 = 0;
1346 }
1347 }
1348 insn->header.compression_control = BRW_COMPRESSION_NONE;
1349 p->current->header.predicate_control = BRW_PREDICATE_NONE;
1350
1351 return insn;
1352 }
1353
1354
1355 /* FORWARD JUMPS:
1356 */
1357 void brw_land_fwd_jump(struct brw_compile *p,
1358 struct brw_instruction *jmp_insn)
1359 {
1360 struct intel_context *intel = &p->brw->intel;
1361 struct brw_instruction *landing = &p->store[p->nr_insn];
1362 GLuint jmpi = 1;
1363
1364 if (intel->gen >= 5)
1365 jmpi = 2;
1366
1367 assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
1368 assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE);
1369
1370 jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1);
1371 }
1372
1373
1374
1375 /* To integrate with the above, it makes sense that the comparison
1376 * instruction should populate the flag register. It might be simpler
1377 * just to use the flag reg for most WM tasks?
1378 */
1379 void brw_CMP(struct brw_compile *p,
1380 struct brw_reg dest,
1381 GLuint conditional,
1382 struct brw_reg src0,
1383 struct brw_reg src1)
1384 {
1385 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
1386
1387 insn->header.destreg__conditionalmod = conditional;
1388 brw_set_dest(p, insn, dest);
1389 brw_set_src0(p, insn, src0);
1390 brw_set_src1(p, insn, src1);
1391
1392 /* guess_execution_size(insn, src0); */
1393
1394
1395 /* Make it so that future instructions will use the computed flag
1396 * value until brw_set_predicate_control_flag_value() is called
1397 * again.
1398 */
1399 if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
1400 dest.nr == 0) {
1401 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
1402 p->flag_value = 0xff;
1403 }
1404 }
1405
1406 /* Issue 'wait' instruction for n1, host could program MMIO
1407 to wake up thread. */
1408 void brw_WAIT (struct brw_compile *p)
1409 {
1410 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_WAIT);
1411 struct brw_reg src = brw_notification_1_reg();
1412
1413 brw_set_dest(p, insn, src);
1414 brw_set_src0(p, insn, src);
1415 brw_set_src1(p, insn, brw_null_reg());
1416 insn->header.execution_size = 0; /* must */
1417 insn->header.predicate_control = 0;
1418 insn->header.compression_control = 0;
1419 }
1420
1421
1422 /***********************************************************************
1423 * Helpers for the various SEND message types:
1424 */
1425
1426 /** Extended math function, float[8].
1427 */
1428 void brw_math( struct brw_compile *p,
1429 struct brw_reg dest,
1430 GLuint function,
1431 GLuint saturate,
1432 GLuint msg_reg_nr,
1433 struct brw_reg src,
1434 GLuint data_type,
1435 GLuint precision )
1436 {
1437 struct intel_context *intel = &p->brw->intel;
1438
1439 if (intel->gen >= 6) {
1440 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
1441
1442 assert(dest.file == BRW_GENERAL_REGISTER_FILE);
1443 assert(src.file == BRW_GENERAL_REGISTER_FILE);
1444
1445 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1446 assert(src.hstride == BRW_HORIZONTAL_STRIDE_1);
1447
1448 /* Source modifiers are ignored for extended math instructions. */
1449 assert(!src.negate);
1450 assert(!src.abs);
1451
1452 if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
1453 function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER ||
1454 function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1455 assert(src.type != BRW_REGISTER_TYPE_F);
1456 } else {
1457 assert(src.type == BRW_REGISTER_TYPE_F);
1458 }
1459
1460 /* Math is the same ISA format as other opcodes, except that CondModifier
1461 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1462 */
1463 insn->header.destreg__conditionalmod = function;
1464 insn->header.saturate = saturate;
1465
1466 brw_set_dest(p, insn, dest);
1467 brw_set_src0(p, insn, src);
1468 brw_set_src1(p, insn, brw_null_reg());
1469 } else {
1470 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1471
1472 /* Example code doesn't set predicate_control for send
1473 * instructions.
1474 */
1475 insn->header.predicate_control = 0;
1476 insn->header.destreg__conditionalmod = msg_reg_nr;
1477
1478 brw_set_dest(p, insn, dest);
1479 brw_set_src0(p, insn, src);
1480 brw_set_math_message(p,
1481 insn,
1482 function,
1483 src.type == BRW_REGISTER_TYPE_D,
1484 precision,
1485 saturate,
1486 data_type);
1487 }
1488 }
1489
1490 /** Extended math function, float[8].
1491 */
1492 void brw_math2(struct brw_compile *p,
1493 struct brw_reg dest,
1494 GLuint function,
1495 struct brw_reg src0,
1496 struct brw_reg src1)
1497 {
1498 struct intel_context *intel = &p->brw->intel;
1499 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
1500
1501 assert(intel->gen >= 6);
1502 (void) intel;
1503
1504
1505 assert(dest.file == BRW_GENERAL_REGISTER_FILE);
1506 assert(src0.file == BRW_GENERAL_REGISTER_FILE);
1507 assert(src1.file == BRW_GENERAL_REGISTER_FILE);
1508
1509 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1510 assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1);
1511 assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1);
1512
1513 if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
1514 function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER ||
1515 function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1516 assert(src0.type != BRW_REGISTER_TYPE_F);
1517 assert(src1.type != BRW_REGISTER_TYPE_F);
1518 } else {
1519 assert(src0.type == BRW_REGISTER_TYPE_F);
1520 assert(src1.type == BRW_REGISTER_TYPE_F);
1521 }
1522
1523 /* Source modifiers are ignored for extended math instructions. */
1524 assert(!src0.negate);
1525 assert(!src0.abs);
1526 assert(!src1.negate);
1527 assert(!src1.abs);
1528
1529 /* Math is the same ISA format as other opcodes, except that CondModifier
1530 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1531 */
1532 insn->header.destreg__conditionalmod = function;
1533
1534 brw_set_dest(p, insn, dest);
1535 brw_set_src0(p, insn, src0);
1536 brw_set_src1(p, insn, src1);
1537 }
1538
1539 /**
1540 * Extended math function, float[16].
1541 * Use 2 send instructions.
1542 */
1543 void brw_math_16( struct brw_compile *p,
1544 struct brw_reg dest,
1545 GLuint function,
1546 GLuint saturate,
1547 GLuint msg_reg_nr,
1548 struct brw_reg src,
1549 GLuint precision )
1550 {
1551 struct intel_context *intel = &p->brw->intel;
1552 struct brw_instruction *insn;
1553
1554 if (intel->gen >= 6) {
1555 insn = next_insn(p, BRW_OPCODE_MATH);
1556
1557 /* Math is the same ISA format as other opcodes, except that CondModifier
1558 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1559 */
1560 insn->header.destreg__conditionalmod = function;
1561 insn->header.saturate = saturate;
1562
1563 /* Source modifiers are ignored for extended math instructions. */
1564 assert(!src.negate);
1565 assert(!src.abs);
1566
1567 brw_set_dest(p, insn, dest);
1568 brw_set_src0(p, insn, src);
1569 brw_set_src1(p, insn, brw_null_reg());
1570 return;
1571 }
1572
1573 /* First instruction:
1574 */
1575 brw_push_insn_state(p);
1576 brw_set_predicate_control_flag_value(p, 0xff);
1577 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1578
1579 insn = next_insn(p, BRW_OPCODE_SEND);
1580 insn->header.destreg__conditionalmod = msg_reg_nr;
1581
1582 brw_set_dest(p, insn, dest);
1583 brw_set_src0(p, insn, src);
1584 brw_set_math_message(p,
1585 insn,
1586 function,
1587 BRW_MATH_INTEGER_UNSIGNED,
1588 precision,
1589 saturate,
1590 BRW_MATH_DATA_VECTOR);
1591
1592 /* Second instruction:
1593 */
1594 insn = next_insn(p, BRW_OPCODE_SEND);
1595 insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
1596 insn->header.destreg__conditionalmod = msg_reg_nr+1;
1597
1598 brw_set_dest(p, insn, offset(dest,1));
1599 brw_set_src0(p, insn, src);
1600 brw_set_math_message(p,
1601 insn,
1602 function,
1603 BRW_MATH_INTEGER_UNSIGNED,
1604 precision,
1605 saturate,
1606 BRW_MATH_DATA_VECTOR);
1607
1608 brw_pop_insn_state(p);
1609 }
1610
1611
1612 /**
1613 * Write a block of OWORDs (half a GRF each) from the scratch buffer,
1614 * using a constant offset per channel.
1615 *
1616 * The offset must be aligned to oword size (16 bytes). Used for
1617 * register spilling.
1618 */
1619 void brw_oword_block_write_scratch(struct brw_compile *p,
1620 struct brw_reg mrf,
1621 int num_regs,
1622 GLuint offset)
1623 {
1624 struct intel_context *intel = &p->brw->intel;
1625 uint32_t msg_control, msg_type;
1626 int mlen;
1627
1628 if (intel->gen >= 6)
1629 offset /= 16;
1630
1631 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1632
1633 if (num_regs == 1) {
1634 msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
1635 mlen = 2;
1636 } else {
1637 msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
1638 mlen = 3;
1639 }
1640
1641 /* Set up the message header. This is g0, with g0.2 filled with
1642 * the offset. We don't want to leave our offset around in g0 or
1643 * it'll screw up texture samples, so set it up inside the message
1644 * reg.
1645 */
1646 {
1647 brw_push_insn_state(p);
1648 brw_set_mask_control(p, BRW_MASK_DISABLE);
1649 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1650
1651 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
1652
1653 /* set message header global offset field (reg 0, element 2) */
1654 brw_MOV(p,
1655 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
1656 mrf.nr,
1657 2), BRW_REGISTER_TYPE_UD),
1658 brw_imm_ud(offset));
1659
1660 brw_pop_insn_state(p);
1661 }
1662
1663 {
1664 struct brw_reg dest;
1665 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1666 int send_commit_msg;
1667 struct brw_reg src_header = retype(brw_vec8_grf(0, 0),
1668 BRW_REGISTER_TYPE_UW);
1669
1670 if (insn->header.compression_control != BRW_COMPRESSION_NONE) {
1671 insn->header.compression_control = BRW_COMPRESSION_NONE;
1672 src_header = vec16(src_header);
1673 }
1674 assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
1675 insn->header.destreg__conditionalmod = mrf.nr;
1676
1677 /* Until gen6, writes followed by reads from the same location
1678 * are not guaranteed to be ordered unless write_commit is set.
1679 * If set, then a no-op write is issued to the destination
1680 * register to set a dependency, and a read from the destination
1681 * can be used to ensure the ordering.
1682 *
1683 * For gen6, only writes between different threads need ordering
1684 * protection. Our use of DP writes is all about register
1685 * spilling within a thread.
1686 */
1687 if (intel->gen >= 6) {
1688 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
1689 send_commit_msg = 0;
1690 } else {
1691 dest = src_header;
1692 send_commit_msg = 1;
1693 }
1694
1695 brw_set_dest(p, insn, dest);
1696 if (intel->gen >= 6) {
1697 brw_set_src0(p, insn, mrf);
1698 } else {
1699 brw_set_src0(p, insn, brw_null_reg());
1700 }
1701
1702 if (intel->gen >= 6)
1703 msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
1704 else
1705 msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
1706
1707 brw_set_dp_write_message(p,
1708 insn,
1709 255, /* binding table index (255=stateless) */
1710 msg_control,
1711 msg_type,
1712 mlen,
1713 true, /* header_present */
1714 0, /* not a render target */
1715 send_commit_msg, /* response_length */
1716 0, /* eot */
1717 send_commit_msg);
1718 }
1719 }
1720
1721
1722 /**
1723 * Read a block of owords (half a GRF each) from the scratch buffer
1724 * using a constant index per channel.
1725 *
1726 * Offset must be aligned to oword size (16 bytes). Used for register
1727 * spilling.
1728 */
1729 void
1730 brw_oword_block_read_scratch(struct brw_compile *p,
1731 struct brw_reg dest,
1732 struct brw_reg mrf,
1733 int num_regs,
1734 GLuint offset)
1735 {
1736 struct intel_context *intel = &p->brw->intel;
1737 uint32_t msg_control;
1738 int rlen;
1739
1740 if (intel->gen >= 6)
1741 offset /= 16;
1742
1743 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1744 dest = retype(dest, BRW_REGISTER_TYPE_UW);
1745
1746 if (num_regs == 1) {
1747 msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
1748 rlen = 1;
1749 } else {
1750 msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
1751 rlen = 2;
1752 }
1753
1754 {
1755 brw_push_insn_state(p);
1756 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1757 brw_set_mask_control(p, BRW_MASK_DISABLE);
1758
1759 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
1760
1761 /* set message header global offset field (reg 0, element 2) */
1762 brw_MOV(p,
1763 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
1764 mrf.nr,
1765 2), BRW_REGISTER_TYPE_UD),
1766 brw_imm_ud(offset));
1767
1768 brw_pop_insn_state(p);
1769 }
1770
1771 {
1772 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1773
1774 assert(insn->header.predicate_control == 0);
1775 insn->header.compression_control = BRW_COMPRESSION_NONE;
1776 insn->header.destreg__conditionalmod = mrf.nr;
1777
1778 brw_set_dest(p, insn, dest); /* UW? */
1779 if (intel->gen >= 6) {
1780 brw_set_src0(p, insn, mrf);
1781 } else {
1782 brw_set_src0(p, insn, brw_null_reg());
1783 }
1784
1785 brw_set_dp_read_message(p,
1786 insn,
1787 255, /* binding table index (255=stateless) */
1788 msg_control,
1789 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1790 BRW_DATAPORT_READ_TARGET_RENDER_CACHE,
1791 1, /* msg_length */
1792 rlen);
1793 }
1794 }
1795
1796 /**
1797 * Read a float[4] vector from the data port Data Cache (const buffer).
1798 * Location (in buffer) should be a multiple of 16.
1799 * Used for fetching shader constants.
1800 */
1801 void brw_oword_block_read(struct brw_compile *p,
1802 struct brw_reg dest,
1803 struct brw_reg mrf,
1804 uint32_t offset,
1805 uint32_t bind_table_index)
1806 {
1807 struct intel_context *intel = &p->brw->intel;
1808
1809 /* On newer hardware, offset is in units of owords. */
1810 if (intel->gen >= 6)
1811 offset /= 16;
1812
1813 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1814
1815 brw_push_insn_state(p);
1816 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1817 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1818 brw_set_mask_control(p, BRW_MASK_DISABLE);
1819
1820 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
1821
1822 /* set message header global offset field (reg 0, element 2) */
1823 brw_MOV(p,
1824 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
1825 mrf.nr,
1826 2), BRW_REGISTER_TYPE_UD),
1827 brw_imm_ud(offset));
1828
1829 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1830 insn->header.destreg__conditionalmod = mrf.nr;
1831
1832 /* cast dest to a uword[8] vector */
1833 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
1834
1835 brw_set_dest(p, insn, dest);
1836 if (intel->gen >= 6) {
1837 brw_set_src0(p, insn, mrf);
1838 } else {
1839 brw_set_src0(p, insn, brw_null_reg());
1840 }
1841
1842 brw_set_dp_read_message(p,
1843 insn,
1844 bind_table_index,
1845 BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW,
1846 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ,
1847 BRW_DATAPORT_READ_TARGET_DATA_CACHE,
1848 1, /* msg_length */
1849 1); /* response_length (1 reg, 2 owords!) */
1850
1851 brw_pop_insn_state(p);
1852 }
1853
1854 /**
1855 * Read a set of dwords from the data port Data Cache (const buffer).
1856 *
1857 * Location (in buffer) appears as UD offsets in the register after
1858 * the provided mrf header reg.
1859 */
1860 void brw_dword_scattered_read(struct brw_compile *p,
1861 struct brw_reg dest,
1862 struct brw_reg mrf,
1863 uint32_t bind_table_index)
1864 {
1865 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1866
1867 brw_push_insn_state(p);
1868 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1869 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1870 brw_set_mask_control(p, BRW_MASK_DISABLE);
1871 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
1872 brw_pop_insn_state(p);
1873
1874 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1875 insn->header.destreg__conditionalmod = mrf.nr;
1876
1877 /* cast dest to a uword[8] vector */
1878 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
1879
1880 brw_set_dest(p, insn, dest);
1881 brw_set_src0(p, insn, brw_null_reg());
1882
1883 brw_set_dp_read_message(p,
1884 insn,
1885 bind_table_index,
1886 BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS,
1887 BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ,
1888 BRW_DATAPORT_READ_TARGET_DATA_CACHE,
1889 2, /* msg_length */
1890 1); /* response_length */
1891 }
1892
1893
1894
1895 /**
1896 * Read float[4] constant(s) from VS constant buffer.
1897 * For relative addressing, two float[4] constants will be read into 'dest'.
1898 * Otherwise, one float[4] constant will be read into the lower half of 'dest'.
1899 */
1900 void brw_dp_READ_4_vs(struct brw_compile *p,
1901 struct brw_reg dest,
1902 GLuint location,
1903 GLuint bind_table_index)
1904 {
1905 struct intel_context *intel = &p->brw->intel;
1906 struct brw_instruction *insn;
1907 GLuint msg_reg_nr = 1;
1908
1909 if (intel->gen >= 6)
1910 location /= 16;
1911
1912 /* Setup MRF[1] with location/offset into const buffer */
1913 brw_push_insn_state(p);
1914 brw_set_access_mode(p, BRW_ALIGN_1);
1915 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1916 brw_set_mask_control(p, BRW_MASK_DISABLE);
1917 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1918 brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 2),
1919 BRW_REGISTER_TYPE_UD),
1920 brw_imm_ud(location));
1921 brw_pop_insn_state(p);
1922
1923 insn = next_insn(p, BRW_OPCODE_SEND);
1924
1925 insn->header.predicate_control = BRW_PREDICATE_NONE;
1926 insn->header.compression_control = BRW_COMPRESSION_NONE;
1927 insn->header.destreg__conditionalmod = msg_reg_nr;
1928 insn->header.mask_control = BRW_MASK_DISABLE;
1929
1930 brw_set_dest(p, insn, dest);
1931 if (intel->gen >= 6) {
1932 brw_set_src0(p, insn, brw_message_reg(msg_reg_nr));
1933 } else {
1934 brw_set_src0(p, insn, brw_null_reg());
1935 }
1936
1937 brw_set_dp_read_message(p,
1938 insn,
1939 bind_table_index,
1940 0,
1941 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1942 BRW_DATAPORT_READ_TARGET_DATA_CACHE,
1943 1, /* msg_length */
1944 1); /* response_length (1 Oword) */
1945 }
1946
1947 /**
1948 * Read a float[4] constant per vertex from VS constant buffer, with
1949 * relative addressing.
1950 */
1951 void brw_dp_READ_4_vs_relative(struct brw_compile *p,
1952 struct brw_reg dest,
1953 struct brw_reg addr_reg,
1954 GLuint offset,
1955 GLuint bind_table_index)
1956 {
1957 struct intel_context *intel = &p->brw->intel;
1958 struct brw_reg src = brw_vec8_grf(0, 0);
1959 int msg_type;
1960
1961 /* Setup MRF[1] with offset into const buffer */
1962 brw_push_insn_state(p);
1963 brw_set_access_mode(p, BRW_ALIGN_1);
1964 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1965 brw_set_mask_control(p, BRW_MASK_DISABLE);
1966 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1967
1968 /* M1.0 is block offset 0, M1.4 is block offset 1, all other
1969 * fields ignored.
1970 */
1971 brw_ADD(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_D),
1972 addr_reg, brw_imm_d(offset));
1973 brw_pop_insn_state(p);
1974
1975 gen6_resolve_implied_move(p, &src, 0);
1976 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1977
1978 insn->header.predicate_control = BRW_PREDICATE_NONE;
1979 insn->header.compression_control = BRW_COMPRESSION_NONE;
1980 insn->header.destreg__conditionalmod = 0;
1981 insn->header.mask_control = BRW_MASK_DISABLE;
1982
1983 brw_set_dest(p, insn, dest);
1984 brw_set_src0(p, insn, src);
1985
1986 if (intel->gen >= 6)
1987 msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
1988 else if (intel->gen == 5 || intel->is_g4x)
1989 msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
1990 else
1991 msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
1992
1993 brw_set_dp_read_message(p,
1994 insn,
1995 bind_table_index,
1996 BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
1997 msg_type,
1998 BRW_DATAPORT_READ_TARGET_DATA_CACHE,
1999 2, /* msg_length */
2000 1); /* response_length */
2001 }
2002
2003
2004
2005 void brw_fb_WRITE(struct brw_compile *p,
2006 int dispatch_width,
2007 GLuint msg_reg_nr,
2008 struct brw_reg src0,
2009 GLuint binding_table_index,
2010 GLuint msg_length,
2011 GLuint response_length,
2012 bool eot,
2013 bool header_present)
2014 {
2015 struct intel_context *intel = &p->brw->intel;
2016 struct brw_instruction *insn;
2017 GLuint msg_control, msg_type;
2018 struct brw_reg dest;
2019
2020 if (dispatch_width == 16)
2021 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
2022 else
2023 dest = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW);
2024
2025 if (intel->gen >= 6 && binding_table_index == 0) {
2026 insn = next_insn(p, BRW_OPCODE_SENDC);
2027 } else {
2028 insn = next_insn(p, BRW_OPCODE_SEND);
2029 }
2030 /* The execution mask is ignored for render target writes. */
2031 insn->header.predicate_control = 0;
2032 insn->header.compression_control = BRW_COMPRESSION_NONE;
2033
2034 if (intel->gen >= 6) {
2035 /* headerless version, just submit color payload */
2036 src0 = brw_message_reg(msg_reg_nr);
2037
2038 msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
2039 } else {
2040 insn->header.destreg__conditionalmod = msg_reg_nr;
2041
2042 msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
2043 }
2044
2045 if (dispatch_width == 16)
2046 msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
2047 else
2048 msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01;
2049
2050 brw_set_dest(p, insn, dest);
2051 brw_set_src0(p, insn, src0);
2052 brw_set_dp_write_message(p,
2053 insn,
2054 binding_table_index,
2055 msg_control,
2056 msg_type,
2057 msg_length,
2058 header_present,
2059 1, /* last render target write */
2060 response_length,
2061 eot,
2062 0 /* send_commit_msg */);
2063 }
2064
2065
2066 /**
2067 * Texture sample instruction.
2068 * Note: the msg_type plus msg_length values determine exactly what kind
2069 * of sampling operation is performed. See volume 4, page 161 of docs.
2070 */
2071 void brw_SAMPLE(struct brw_compile *p,
2072 struct brw_reg dest,
2073 GLuint msg_reg_nr,
2074 struct brw_reg src0,
2075 GLuint binding_table_index,
2076 GLuint sampler,
2077 GLuint writemask,
2078 GLuint msg_type,
2079 GLuint response_length,
2080 GLuint msg_length,
2081 GLuint header_present,
2082 GLuint simd_mode)
2083 {
2084 struct intel_context *intel = &p->brw->intel;
2085 bool need_stall = 0;
2086
2087 if (writemask == 0) {
2088 /*printf("%s: zero writemask??\n", __FUNCTION__); */
2089 return;
2090 }
2091
2092 /* Hardware doesn't do destination dependency checking on send
2093 * instructions properly. Add a workaround which generates the
2094 * dependency by other means. In practice it seems like this bug
2095 * only crops up for texture samples, and only where registers are
2096 * written by the send and then written again later without being
2097 * read in between. Luckily for us, we already track that
2098 * information and use it to modify the writemask for the
2099 * instruction, so that is a guide for whether a workaround is
2100 * needed.
2101 */
2102 if (writemask != WRITEMASK_XYZW) {
2103 GLuint dst_offset = 0;
2104 GLuint i, newmask = 0, len = 0;
2105
2106 for (i = 0; i < 4; i++) {
2107 if (writemask & (1<<i))
2108 break;
2109 dst_offset += 2;
2110 }
2111 for (; i < 4; i++) {
2112 if (!(writemask & (1<<i)))
2113 break;
2114 newmask |= 1<<i;
2115 len++;
2116 }
2117
2118 if (newmask != writemask) {
2119 need_stall = 1;
2120 /* printf("need stall %x %x\n", newmask , writemask); */
2121 }
2122 else {
2123 bool dispatch_16 = false;
2124
2125 struct brw_reg m1 = brw_message_reg(msg_reg_nr);
2126
2127 guess_execution_size(p, p->current, dest);
2128 if (p->current->header.execution_size == BRW_EXECUTE_16)
2129 dispatch_16 = true;
2130
2131 newmask = ~newmask & WRITEMASK_XYZW;
2132
2133 brw_push_insn_state(p);
2134
2135 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
2136 brw_set_mask_control(p, BRW_MASK_DISABLE);
2137
2138 brw_MOV(p, retype(m1, BRW_REGISTER_TYPE_UD),
2139 retype(brw_vec8_grf(0,0), BRW_REGISTER_TYPE_UD));
2140 brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12));
2141
2142 brw_pop_insn_state(p);
2143
2144 src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
2145 dest = offset(dest, dst_offset);
2146
2147 /* For 16-wide dispatch, masked channels are skipped in the
2148 * response. For 8-wide, masked channels still take up slots,
2149 * and are just not written to.
2150 */
2151 if (dispatch_16)
2152 response_length = len * 2;
2153 }
2154 }
2155
2156 {
2157 struct brw_instruction *insn;
2158
2159 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2160
2161 insn = next_insn(p, BRW_OPCODE_SEND);
2162 insn->header.predicate_control = 0; /* XXX */
2163 insn->header.compression_control = BRW_COMPRESSION_NONE;
2164 if (intel->gen < 6)
2165 insn->header.destreg__conditionalmod = msg_reg_nr;
2166
2167 brw_set_dest(p, insn, dest);
2168 brw_set_src0(p, insn, src0);
2169 brw_set_sampler_message(p, insn,
2170 binding_table_index,
2171 sampler,
2172 msg_type,
2173 response_length,
2174 msg_length,
2175 header_present,
2176 simd_mode);
2177 }
2178
2179 if (need_stall) {
2180 struct brw_reg reg = vec8(offset(dest, response_length-1));
2181
2182 /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 }
2183 */
2184 brw_push_insn_state(p);
2185 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
2186 brw_MOV(p, retype(reg, BRW_REGISTER_TYPE_UD),
2187 retype(reg, BRW_REGISTER_TYPE_UD));
2188 brw_pop_insn_state(p);
2189 }
2190
2191 }
2192
2193 /* All these variables are pretty confusing - we might be better off
2194 * using bitmasks and macros for this, in the old style. Or perhaps
2195 * just having the caller instantiate the fields in dword3 itself.
2196 */
2197 void brw_urb_WRITE(struct brw_compile *p,
2198 struct brw_reg dest,
2199 GLuint msg_reg_nr,
2200 struct brw_reg src0,
2201 bool allocate,
2202 bool used,
2203 GLuint msg_length,
2204 GLuint response_length,
2205 bool eot,
2206 bool writes_complete,
2207 GLuint offset,
2208 GLuint swizzle)
2209 {
2210 struct intel_context *intel = &p->brw->intel;
2211 struct brw_instruction *insn;
2212
2213 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2214
2215 if (intel->gen == 7) {
2216 /* Enable Channel Masks in the URB_WRITE_HWORD message header */
2217 brw_push_insn_state(p);
2218 brw_set_access_mode(p, BRW_ALIGN_1);
2219 brw_OR(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 5),
2220 BRW_REGISTER_TYPE_UD),
2221 retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD),
2222 brw_imm_ud(0xff00));
2223 brw_pop_insn_state(p);
2224 }
2225
2226 insn = next_insn(p, BRW_OPCODE_SEND);
2227
2228 assert(msg_length < BRW_MAX_MRF);
2229
2230 brw_set_dest(p, insn, dest);
2231 brw_set_src0(p, insn, src0);
2232 brw_set_src1(p, insn, brw_imm_d(0));
2233
2234 if (intel->gen < 6)
2235 insn->header.destreg__conditionalmod = msg_reg_nr;
2236
2237 brw_set_urb_message(p,
2238 insn,
2239 allocate,
2240 used,
2241 msg_length,
2242 response_length,
2243 eot,
2244 writes_complete,
2245 offset,
2246 swizzle);
2247 }
2248
2249 static int
2250 brw_find_next_block_end(struct brw_compile *p, int start)
2251 {
2252 int ip;
2253
2254 for (ip = start + 1; ip < p->nr_insn; ip++) {
2255 struct brw_instruction *insn = &p->store[ip];
2256
2257 switch (insn->header.opcode) {
2258 case BRW_OPCODE_ENDIF:
2259 case BRW_OPCODE_ELSE:
2260 case BRW_OPCODE_WHILE:
2261 return ip;
2262 }
2263 }
2264 assert(!"not reached");
2265 return start + 1;
2266 }
2267
2268 /* There is no DO instruction on gen6, so to find the end of the loop
2269 * we have to see if the loop is jumping back before our start
2270 * instruction.
2271 */
2272 static int
2273 brw_find_loop_end(struct brw_compile *p, int start)
2274 {
2275 struct intel_context *intel = &p->brw->intel;
2276 int ip;
2277 int br = 2;
2278
2279 for (ip = start + 1; ip < p->nr_insn; ip++) {
2280 struct brw_instruction *insn = &p->store[ip];
2281
2282 if (insn->header.opcode == BRW_OPCODE_WHILE) {
2283 int jip = intel->gen == 6 ? insn->bits1.branch_gen6.jump_count
2284 : insn->bits3.break_cont.jip;
2285 if (ip + jip / br <= start)
2286 return ip;
2287 }
2288 }
2289 assert(!"not reached");
2290 return start + 1;
2291 }
2292
2293 /* After program generation, go back and update the UIP and JIP of
2294 * BREAK and CONT instructions to their correct locations.
2295 */
2296 void
2297 brw_set_uip_jip(struct brw_compile *p)
2298 {
2299 struct intel_context *intel = &p->brw->intel;
2300 int ip;
2301 int br = 2;
2302
2303 if (intel->gen < 6)
2304 return;
2305
2306 for (ip = 0; ip < p->nr_insn; ip++) {
2307 struct brw_instruction *insn = &p->store[ip];
2308
2309 switch (insn->header.opcode) {
2310 case BRW_OPCODE_BREAK:
2311 insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip);
2312 /* Gen7 UIP points to WHILE; Gen6 points just after it */
2313 insn->bits3.break_cont.uip =
2314 br * (brw_find_loop_end(p, ip) - ip + (intel->gen == 6 ? 1 : 0));
2315 break;
2316 case BRW_OPCODE_CONTINUE:
2317 insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip);
2318 insn->bits3.break_cont.uip = br * (brw_find_loop_end(p, ip) - ip);
2319
2320 assert(insn->bits3.break_cont.uip != 0);
2321 assert(insn->bits3.break_cont.jip != 0);
2322 break;
2323 }
2324 }
2325 }
2326
2327 void brw_ff_sync(struct brw_compile *p,
2328 struct brw_reg dest,
2329 GLuint msg_reg_nr,
2330 struct brw_reg src0,
2331 bool allocate,
2332 GLuint response_length,
2333 bool eot)
2334 {
2335 struct intel_context *intel = &p->brw->intel;
2336 struct brw_instruction *insn;
2337
2338 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2339
2340 insn = next_insn(p, BRW_OPCODE_SEND);
2341 brw_set_dest(p, insn, dest);
2342 brw_set_src0(p, insn, src0);
2343 brw_set_src1(p, insn, brw_imm_d(0));
2344
2345 if (intel->gen < 6)
2346 insn->header.destreg__conditionalmod = msg_reg_nr;
2347
2348 brw_set_ff_sync_message(p,
2349 insn,
2350 allocate,
2351 response_length,
2352 eot);
2353 }