i965/gen4: Fix sampling from integer textures.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_eu_emit.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "brw_context.h"
34 #include "brw_defines.h"
35 #include "brw_eu.h"
36
37 #include "glsl/ralloc.h"
38
39 /***********************************************************************
40 * Internal helper for constructing instructions
41 */
42
43 static void guess_execution_size(struct brw_compile *p,
44 struct brw_instruction *insn,
45 struct brw_reg reg)
46 {
47 if (reg.width == BRW_WIDTH_8 && p->compressed)
48 insn->header.execution_size = BRW_EXECUTE_16;
49 else
50 insn->header.execution_size = reg.width; /* note - definitions are compatible */
51 }
52
53
54 /**
55 * Prior to Sandybridge, the SEND instruction accepted non-MRF source
56 * registers, implicitly moving the operand to a message register.
57 *
58 * On Sandybridge, this is no longer the case. This function performs the
59 * explicit move; it should be called before emitting a SEND instruction.
60 */
61 void
62 gen6_resolve_implied_move(struct brw_compile *p,
63 struct brw_reg *src,
64 GLuint msg_reg_nr)
65 {
66 struct intel_context *intel = &p->brw->intel;
67 if (intel->gen < 6)
68 return;
69
70 if (src->file != BRW_ARCHITECTURE_REGISTER_FILE || src->nr != BRW_ARF_NULL) {
71 brw_push_insn_state(p);
72 brw_set_mask_control(p, BRW_MASK_DISABLE);
73 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
74 brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD),
75 retype(*src, BRW_REGISTER_TYPE_UD));
76 brw_pop_insn_state(p);
77 }
78 *src = brw_message_reg(msg_reg_nr);
79 }
80
81 static void
82 gen7_convert_mrf_to_grf(struct brw_compile *p, struct brw_reg *reg)
83 {
84 struct intel_context *intel = &p->brw->intel;
85 if (intel->gen == 7 && reg->file == BRW_MESSAGE_REGISTER_FILE) {
86 reg->file = BRW_GENERAL_REGISTER_FILE;
87 reg->nr += 111;
88 }
89 }
90
91
92 void
93 brw_set_dest(struct brw_compile *p, struct brw_instruction *insn,
94 struct brw_reg dest)
95 {
96 if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
97 dest.file != BRW_MESSAGE_REGISTER_FILE)
98 assert(dest.nr < 128);
99
100 gen7_convert_mrf_to_grf(p, &dest);
101
102 insn->bits1.da1.dest_reg_file = dest.file;
103 insn->bits1.da1.dest_reg_type = dest.type;
104 insn->bits1.da1.dest_address_mode = dest.address_mode;
105
106 if (dest.address_mode == BRW_ADDRESS_DIRECT) {
107 insn->bits1.da1.dest_reg_nr = dest.nr;
108
109 if (insn->header.access_mode == BRW_ALIGN_1) {
110 insn->bits1.da1.dest_subreg_nr = dest.subnr;
111 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
112 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
113 insn->bits1.da1.dest_horiz_stride = dest.hstride;
114 }
115 else {
116 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
117 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
118 /* even ignored in da16, still need to set as '01' */
119 insn->bits1.da16.dest_horiz_stride = 1;
120 }
121 }
122 else {
123 insn->bits1.ia1.dest_subreg_nr = dest.subnr;
124
125 /* These are different sizes in align1 vs align16:
126 */
127 if (insn->header.access_mode == BRW_ALIGN_1) {
128 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
129 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
130 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
131 insn->bits1.ia1.dest_horiz_stride = dest.hstride;
132 }
133 else {
134 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
135 /* even ignored in da16, still need to set as '01' */
136 insn->bits1.ia16.dest_horiz_stride = 1;
137 }
138 }
139
140 /* NEW: Set the execution size based on dest.width and
141 * insn->compression_control:
142 */
143 guess_execution_size(p, insn, dest);
144 }
145
146 extern int reg_type_size[];
147
148 static void
149 validate_reg(struct brw_instruction *insn, struct brw_reg reg)
150 {
151 int hstride_for_reg[] = {0, 1, 2, 4};
152 int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
153 int width_for_reg[] = {1, 2, 4, 8, 16};
154 int execsize_for_reg[] = {1, 2, 4, 8, 16};
155 int width, hstride, vstride, execsize;
156
157 if (reg.file == BRW_IMMEDIATE_VALUE) {
158 /* 3.3.6: Region Parameters. Restriction: Immediate vectors
159 * mean the destination has to be 128-bit aligned and the
160 * destination horiz stride has to be a word.
161 */
162 if (reg.type == BRW_REGISTER_TYPE_V) {
163 assert(hstride_for_reg[insn->bits1.da1.dest_horiz_stride] *
164 reg_type_size[insn->bits1.da1.dest_reg_type] == 2);
165 }
166
167 return;
168 }
169
170 if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE &&
171 reg.file == BRW_ARF_NULL)
172 return;
173
174 assert(reg.hstride >= 0 && reg.hstride < Elements(hstride_for_reg));
175 hstride = hstride_for_reg[reg.hstride];
176
177 if (reg.vstride == 0xf) {
178 vstride = -1;
179 } else {
180 assert(reg.vstride >= 0 && reg.vstride < Elements(vstride_for_reg));
181 vstride = vstride_for_reg[reg.vstride];
182 }
183
184 assert(reg.width >= 0 && reg.width < Elements(width_for_reg));
185 width = width_for_reg[reg.width];
186
187 assert(insn->header.execution_size >= 0 &&
188 insn->header.execution_size < Elements(execsize_for_reg));
189 execsize = execsize_for_reg[insn->header.execution_size];
190
191 /* Restrictions from 3.3.10: Register Region Restrictions. */
192 /* 3. */
193 assert(execsize >= width);
194
195 /* 4. */
196 if (execsize == width && hstride != 0) {
197 assert(vstride == -1 || vstride == width * hstride);
198 }
199
200 /* 5. */
201 if (execsize == width && hstride == 0) {
202 /* no restriction on vstride. */
203 }
204
205 /* 6. */
206 if (width == 1) {
207 assert(hstride == 0);
208 }
209
210 /* 7. */
211 if (execsize == 1 && width == 1) {
212 assert(hstride == 0);
213 assert(vstride == 0);
214 }
215
216 /* 8. */
217 if (vstride == 0 && hstride == 0) {
218 assert(width == 1);
219 }
220
221 /* 10. Check destination issues. */
222 }
223
224 void
225 brw_set_src0(struct brw_compile *p, struct brw_instruction *insn,
226 struct brw_reg reg)
227 {
228 if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
229 assert(reg.nr < 128);
230
231 gen7_convert_mrf_to_grf(p, &reg);
232
233 validate_reg(insn, reg);
234
235 insn->bits1.da1.src0_reg_file = reg.file;
236 insn->bits1.da1.src0_reg_type = reg.type;
237 insn->bits2.da1.src0_abs = reg.abs;
238 insn->bits2.da1.src0_negate = reg.negate;
239 insn->bits2.da1.src0_address_mode = reg.address_mode;
240
241 if (reg.file == BRW_IMMEDIATE_VALUE) {
242 insn->bits3.ud = reg.dw1.ud;
243
244 /* Required to set some fields in src1 as well:
245 */
246 insn->bits1.da1.src1_reg_file = 0; /* arf */
247 insn->bits1.da1.src1_reg_type = reg.type;
248 }
249 else
250 {
251 if (reg.address_mode == BRW_ADDRESS_DIRECT) {
252 if (insn->header.access_mode == BRW_ALIGN_1) {
253 insn->bits2.da1.src0_subreg_nr = reg.subnr;
254 insn->bits2.da1.src0_reg_nr = reg.nr;
255 }
256 else {
257 insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
258 insn->bits2.da16.src0_reg_nr = reg.nr;
259 }
260 }
261 else {
262 insn->bits2.ia1.src0_subreg_nr = reg.subnr;
263
264 if (insn->header.access_mode == BRW_ALIGN_1) {
265 insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
266 }
267 else {
268 insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
269 }
270 }
271
272 if (insn->header.access_mode == BRW_ALIGN_1) {
273 if (reg.width == BRW_WIDTH_1 &&
274 insn->header.execution_size == BRW_EXECUTE_1) {
275 insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
276 insn->bits2.da1.src0_width = BRW_WIDTH_1;
277 insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
278 }
279 else {
280 insn->bits2.da1.src0_horiz_stride = reg.hstride;
281 insn->bits2.da1.src0_width = reg.width;
282 insn->bits2.da1.src0_vert_stride = reg.vstride;
283 }
284 }
285 else {
286 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
287 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
288 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
289 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
290
291 /* This is an oddity of the fact we're using the same
292 * descriptions for registers in align_16 as align_1:
293 */
294 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
295 insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
296 else
297 insn->bits2.da16.src0_vert_stride = reg.vstride;
298 }
299 }
300 }
301
302
303 void brw_set_src1(struct brw_compile *p,
304 struct brw_instruction *insn,
305 struct brw_reg reg)
306 {
307 assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
308
309 assert(reg.nr < 128);
310
311 gen7_convert_mrf_to_grf(p, &reg);
312
313 validate_reg(insn, reg);
314
315 insn->bits1.da1.src1_reg_file = reg.file;
316 insn->bits1.da1.src1_reg_type = reg.type;
317 insn->bits3.da1.src1_abs = reg.abs;
318 insn->bits3.da1.src1_negate = reg.negate;
319
320 /* Only src1 can be immediate in two-argument instructions.
321 */
322 assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
323
324 if (reg.file == BRW_IMMEDIATE_VALUE) {
325 insn->bits3.ud = reg.dw1.ud;
326 }
327 else {
328 /* This is a hardware restriction, which may or may not be lifted
329 * in the future:
330 */
331 assert (reg.address_mode == BRW_ADDRESS_DIRECT);
332 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
333
334 if (insn->header.access_mode == BRW_ALIGN_1) {
335 insn->bits3.da1.src1_subreg_nr = reg.subnr;
336 insn->bits3.da1.src1_reg_nr = reg.nr;
337 }
338 else {
339 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
340 insn->bits3.da16.src1_reg_nr = reg.nr;
341 }
342
343 if (insn->header.access_mode == BRW_ALIGN_1) {
344 if (reg.width == BRW_WIDTH_1 &&
345 insn->header.execution_size == BRW_EXECUTE_1) {
346 insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
347 insn->bits3.da1.src1_width = BRW_WIDTH_1;
348 insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
349 }
350 else {
351 insn->bits3.da1.src1_horiz_stride = reg.hstride;
352 insn->bits3.da1.src1_width = reg.width;
353 insn->bits3.da1.src1_vert_stride = reg.vstride;
354 }
355 }
356 else {
357 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
358 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
359 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
360 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
361
362 /* This is an oddity of the fact we're using the same
363 * descriptions for registers in align_16 as align_1:
364 */
365 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
366 insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
367 else
368 insn->bits3.da16.src1_vert_stride = reg.vstride;
369 }
370 }
371 }
372
373 /**
374 * Set the Message Descriptor and Extended Message Descriptor fields
375 * for SEND messages.
376 *
377 * \note This zeroes out the Function Control bits, so it must be called
378 * \b before filling out any message-specific data. Callers can
379 * choose not to fill in irrelevant bits; they will be zero.
380 */
381 static void
382 brw_set_message_descriptor(struct brw_compile *p,
383 struct brw_instruction *inst,
384 enum brw_message_target sfid,
385 unsigned msg_length,
386 unsigned response_length,
387 bool header_present,
388 bool end_of_thread)
389 {
390 struct intel_context *intel = &p->brw->intel;
391
392 brw_set_src1(p, inst, brw_imm_d(0));
393
394 if (intel->gen >= 5) {
395 inst->bits3.generic_gen5.header_present = header_present;
396 inst->bits3.generic_gen5.response_length = response_length;
397 inst->bits3.generic_gen5.msg_length = msg_length;
398 inst->bits3.generic_gen5.end_of_thread = end_of_thread;
399
400 if (intel->gen >= 6) {
401 /* On Gen6+ Message target/SFID goes in bits 27:24 of the header */
402 inst->header.destreg__conditionalmod = sfid;
403 } else {
404 /* Set Extended Message Descriptor (ex_desc) */
405 inst->bits2.send_gen5.sfid = sfid;
406 inst->bits2.send_gen5.end_of_thread = end_of_thread;
407 }
408 } else {
409 inst->bits3.generic.response_length = response_length;
410 inst->bits3.generic.msg_length = msg_length;
411 inst->bits3.generic.msg_target = sfid;
412 inst->bits3.generic.end_of_thread = end_of_thread;
413 }
414 }
415
416 static void brw_set_math_message( struct brw_compile *p,
417 struct brw_instruction *insn,
418 GLuint function,
419 GLuint integer_type,
420 bool low_precision,
421 bool saturate,
422 GLuint dataType )
423 {
424 struct brw_context *brw = p->brw;
425 struct intel_context *intel = &brw->intel;
426 unsigned msg_length;
427 unsigned response_length;
428
429 /* Infer message length from the function */
430 switch (function) {
431 case BRW_MATH_FUNCTION_POW:
432 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT:
433 case BRW_MATH_FUNCTION_INT_DIV_REMAINDER:
434 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
435 msg_length = 2;
436 break;
437 default:
438 msg_length = 1;
439 break;
440 }
441
442 /* Infer response length from the function */
443 switch (function) {
444 case BRW_MATH_FUNCTION_SINCOS:
445 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
446 response_length = 2;
447 break;
448 default:
449 response_length = 1;
450 break;
451 }
452
453 brw_set_message_descriptor(p, insn, BRW_SFID_MATH,
454 msg_length, response_length, false, false);
455 if (intel->gen == 5) {
456 insn->bits3.math_gen5.function = function;
457 insn->bits3.math_gen5.int_type = integer_type;
458 insn->bits3.math_gen5.precision = low_precision;
459 insn->bits3.math_gen5.saturate = saturate;
460 insn->bits3.math_gen5.data_type = dataType;
461 insn->bits3.math_gen5.snapshot = 0;
462 } else {
463 insn->bits3.math.function = function;
464 insn->bits3.math.int_type = integer_type;
465 insn->bits3.math.precision = low_precision;
466 insn->bits3.math.saturate = saturate;
467 insn->bits3.math.data_type = dataType;
468 }
469 }
470
471
472 static void brw_set_ff_sync_message(struct brw_compile *p,
473 struct brw_instruction *insn,
474 bool allocate,
475 GLuint response_length,
476 bool end_of_thread)
477 {
478 brw_set_message_descriptor(p, insn, BRW_SFID_URB,
479 1, response_length, true, end_of_thread);
480 insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */
481 insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */
482 insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */
483 insn->bits3.urb_gen5.allocate = allocate;
484 insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */
485 insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */
486 }
487
488 static void brw_set_urb_message( struct brw_compile *p,
489 struct brw_instruction *insn,
490 bool allocate,
491 bool used,
492 GLuint msg_length,
493 GLuint response_length,
494 bool end_of_thread,
495 bool complete,
496 GLuint offset,
497 GLuint swizzle_control )
498 {
499 struct brw_context *brw = p->brw;
500 struct intel_context *intel = &brw->intel;
501
502 brw_set_message_descriptor(p, insn, BRW_SFID_URB,
503 msg_length, response_length, true, end_of_thread);
504 if (intel->gen == 7) {
505 insn->bits3.urb_gen7.opcode = 0; /* URB_WRITE_HWORD */
506 insn->bits3.urb_gen7.offset = offset;
507 assert(swizzle_control != BRW_URB_SWIZZLE_TRANSPOSE);
508 insn->bits3.urb_gen7.swizzle_control = swizzle_control;
509 /* per_slot_offset = 0 makes it ignore offsets in message header */
510 insn->bits3.urb_gen7.per_slot_offset = 0;
511 insn->bits3.urb_gen7.complete = complete;
512 } else if (intel->gen >= 5) {
513 insn->bits3.urb_gen5.opcode = 0; /* URB_WRITE */
514 insn->bits3.urb_gen5.offset = offset;
515 insn->bits3.urb_gen5.swizzle_control = swizzle_control;
516 insn->bits3.urb_gen5.allocate = allocate;
517 insn->bits3.urb_gen5.used = used; /* ? */
518 insn->bits3.urb_gen5.complete = complete;
519 } else {
520 insn->bits3.urb.opcode = 0; /* ? */
521 insn->bits3.urb.offset = offset;
522 insn->bits3.urb.swizzle_control = swizzle_control;
523 insn->bits3.urb.allocate = allocate;
524 insn->bits3.urb.used = used; /* ? */
525 insn->bits3.urb.complete = complete;
526 }
527 }
528
529 void
530 brw_set_dp_write_message(struct brw_compile *p,
531 struct brw_instruction *insn,
532 GLuint binding_table_index,
533 GLuint msg_control,
534 GLuint msg_type,
535 GLuint msg_length,
536 bool header_present,
537 GLuint last_render_target,
538 GLuint response_length,
539 GLuint end_of_thread,
540 GLuint send_commit_msg)
541 {
542 struct brw_context *brw = p->brw;
543 struct intel_context *intel = &brw->intel;
544 unsigned sfid;
545
546 if (intel->gen >= 7) {
547 /* Use the Render Cache for RT writes; otherwise use the Data Cache */
548 if (msg_type == GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE)
549 sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
550 else
551 sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
552 } else if (intel->gen == 6) {
553 /* Use the render cache for all write messages. */
554 sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
555 } else {
556 sfid = BRW_SFID_DATAPORT_WRITE;
557 }
558
559 brw_set_message_descriptor(p, insn, sfid, msg_length, response_length,
560 header_present, end_of_thread);
561
562 if (intel->gen >= 7) {
563 insn->bits3.gen7_dp.binding_table_index = binding_table_index;
564 insn->bits3.gen7_dp.msg_control = msg_control;
565 insn->bits3.gen7_dp.last_render_target = last_render_target;
566 insn->bits3.gen7_dp.msg_type = msg_type;
567 } else if (intel->gen == 6) {
568 insn->bits3.gen6_dp.binding_table_index = binding_table_index;
569 insn->bits3.gen6_dp.msg_control = msg_control;
570 insn->bits3.gen6_dp.last_render_target = last_render_target;
571 insn->bits3.gen6_dp.msg_type = msg_type;
572 insn->bits3.gen6_dp.send_commit_msg = send_commit_msg;
573 } else if (intel->gen == 5) {
574 insn->bits3.dp_write_gen5.binding_table_index = binding_table_index;
575 insn->bits3.dp_write_gen5.msg_control = msg_control;
576 insn->bits3.dp_write_gen5.last_render_target = last_render_target;
577 insn->bits3.dp_write_gen5.msg_type = msg_type;
578 insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg;
579 } else {
580 insn->bits3.dp_write.binding_table_index = binding_table_index;
581 insn->bits3.dp_write.msg_control = msg_control;
582 insn->bits3.dp_write.last_render_target = last_render_target;
583 insn->bits3.dp_write.msg_type = msg_type;
584 insn->bits3.dp_write.send_commit_msg = send_commit_msg;
585 }
586 }
587
588 void
589 brw_set_dp_read_message(struct brw_compile *p,
590 struct brw_instruction *insn,
591 GLuint binding_table_index,
592 GLuint msg_control,
593 GLuint msg_type,
594 GLuint target_cache,
595 GLuint msg_length,
596 GLuint response_length)
597 {
598 struct brw_context *brw = p->brw;
599 struct intel_context *intel = &brw->intel;
600 unsigned sfid;
601
602 if (intel->gen >= 7) {
603 sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
604 } else if (intel->gen == 6) {
605 if (target_cache == BRW_DATAPORT_READ_TARGET_RENDER_CACHE)
606 sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
607 else
608 sfid = GEN6_SFID_DATAPORT_SAMPLER_CACHE;
609 } else {
610 sfid = BRW_SFID_DATAPORT_READ;
611 }
612
613 brw_set_message_descriptor(p, insn, sfid, msg_length, response_length,
614 true, false);
615
616 if (intel->gen >= 7) {
617 insn->bits3.gen7_dp.binding_table_index = binding_table_index;
618 insn->bits3.gen7_dp.msg_control = msg_control;
619 insn->bits3.gen7_dp.last_render_target = 0;
620 insn->bits3.gen7_dp.msg_type = msg_type;
621 } else if (intel->gen == 6) {
622 insn->bits3.gen6_dp.binding_table_index = binding_table_index;
623 insn->bits3.gen6_dp.msg_control = msg_control;
624 insn->bits3.gen6_dp.last_render_target = 0;
625 insn->bits3.gen6_dp.msg_type = msg_type;
626 insn->bits3.gen6_dp.send_commit_msg = 0;
627 } else if (intel->gen == 5) {
628 insn->bits3.dp_read_gen5.binding_table_index = binding_table_index;
629 insn->bits3.dp_read_gen5.msg_control = msg_control;
630 insn->bits3.dp_read_gen5.msg_type = msg_type;
631 insn->bits3.dp_read_gen5.target_cache = target_cache;
632 } else if (intel->is_g4x) {
633 insn->bits3.dp_read_g4x.binding_table_index = binding_table_index; /*0:7*/
634 insn->bits3.dp_read_g4x.msg_control = msg_control; /*8:10*/
635 insn->bits3.dp_read_g4x.msg_type = msg_type; /*11:13*/
636 insn->bits3.dp_read_g4x.target_cache = target_cache; /*14:15*/
637 } else {
638 insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
639 insn->bits3.dp_read.msg_control = msg_control; /*8:11*/
640 insn->bits3.dp_read.msg_type = msg_type; /*12:13*/
641 insn->bits3.dp_read.target_cache = target_cache; /*14:15*/
642 }
643 }
644
645 static void brw_set_sampler_message(struct brw_compile *p,
646 struct brw_instruction *insn,
647 GLuint binding_table_index,
648 GLuint sampler,
649 GLuint msg_type,
650 GLuint response_length,
651 GLuint msg_length,
652 GLuint header_present,
653 GLuint simd_mode,
654 GLuint return_format)
655 {
656 struct brw_context *brw = p->brw;
657 struct intel_context *intel = &brw->intel;
658
659 brw_set_message_descriptor(p, insn, BRW_SFID_SAMPLER, msg_length,
660 response_length, header_present, false);
661
662 if (intel->gen >= 7) {
663 insn->bits3.sampler_gen7.binding_table_index = binding_table_index;
664 insn->bits3.sampler_gen7.sampler = sampler;
665 insn->bits3.sampler_gen7.msg_type = msg_type;
666 insn->bits3.sampler_gen7.simd_mode = simd_mode;
667 } else if (intel->gen >= 5) {
668 insn->bits3.sampler_gen5.binding_table_index = binding_table_index;
669 insn->bits3.sampler_gen5.sampler = sampler;
670 insn->bits3.sampler_gen5.msg_type = msg_type;
671 insn->bits3.sampler_gen5.simd_mode = simd_mode;
672 } else if (intel->is_g4x) {
673 insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
674 insn->bits3.sampler_g4x.sampler = sampler;
675 insn->bits3.sampler_g4x.msg_type = msg_type;
676 } else {
677 insn->bits3.sampler.binding_table_index = binding_table_index;
678 insn->bits3.sampler.sampler = sampler;
679 insn->bits3.sampler.msg_type = msg_type;
680 insn->bits3.sampler.return_format = return_format;
681 }
682 }
683
684
685 #define next_insn brw_next_insn
686 struct brw_instruction *
687 brw_next_insn(struct brw_compile *p, GLuint opcode)
688 {
689 struct brw_instruction *insn;
690
691 assert(p->nr_insn + 1 < BRW_EU_MAX_INSN);
692
693 insn = &p->store[p->nr_insn++];
694 memcpy(insn, p->current, sizeof(*insn));
695
696 /* Reset this one-shot flag:
697 */
698
699 if (p->current->header.destreg__conditionalmod) {
700 p->current->header.destreg__conditionalmod = 0;
701 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
702 }
703
704 insn->header.opcode = opcode;
705 return insn;
706 }
707
708 static struct brw_instruction *brw_alu1( struct brw_compile *p,
709 GLuint opcode,
710 struct brw_reg dest,
711 struct brw_reg src )
712 {
713 struct brw_instruction *insn = next_insn(p, opcode);
714 brw_set_dest(p, insn, dest);
715 brw_set_src0(p, insn, src);
716 return insn;
717 }
718
719 static struct brw_instruction *brw_alu2(struct brw_compile *p,
720 GLuint opcode,
721 struct brw_reg dest,
722 struct brw_reg src0,
723 struct brw_reg src1 )
724 {
725 struct brw_instruction *insn = next_insn(p, opcode);
726 brw_set_dest(p, insn, dest);
727 brw_set_src0(p, insn, src0);
728 brw_set_src1(p, insn, src1);
729 return insn;
730 }
731
732
733 /***********************************************************************
734 * Convenience routines.
735 */
736 #define ALU1(OP) \
737 struct brw_instruction *brw_##OP(struct brw_compile *p, \
738 struct brw_reg dest, \
739 struct brw_reg src0) \
740 { \
741 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
742 }
743
744 #define ALU2(OP) \
745 struct brw_instruction *brw_##OP(struct brw_compile *p, \
746 struct brw_reg dest, \
747 struct brw_reg src0, \
748 struct brw_reg src1) \
749 { \
750 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
751 }
752
753 /* Rounding operations (other than RNDD) require two instructions - the first
754 * stores a rounded value (possibly the wrong way) in the dest register, but
755 * also sets a per-channel "increment bit" in the flag register. A predicated
756 * add of 1.0 fixes dest to contain the desired result.
757 *
758 * Sandybridge and later appear to round correctly without an ADD.
759 */
760 #define ROUND(OP) \
761 void brw_##OP(struct brw_compile *p, \
762 struct brw_reg dest, \
763 struct brw_reg src) \
764 { \
765 struct brw_instruction *rnd, *add; \
766 rnd = next_insn(p, BRW_OPCODE_##OP); \
767 brw_set_dest(p, rnd, dest); \
768 brw_set_src0(p, rnd, src); \
769 \
770 if (p->brw->intel.gen < 6) { \
771 /* turn on round-increments */ \
772 rnd->header.destreg__conditionalmod = BRW_CONDITIONAL_R; \
773 add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \
774 add->header.predicate_control = BRW_PREDICATE_NORMAL; \
775 } \
776 }
777
778
779 ALU1(MOV)
780 ALU2(SEL)
781 ALU1(NOT)
782 ALU2(AND)
783 ALU2(OR)
784 ALU2(XOR)
785 ALU2(SHR)
786 ALU2(SHL)
787 ALU2(RSR)
788 ALU2(RSL)
789 ALU2(ASR)
790 ALU1(FRC)
791 ALU1(RNDD)
792 ALU2(MAC)
793 ALU2(MACH)
794 ALU1(LZD)
795 ALU2(DP4)
796 ALU2(DPH)
797 ALU2(DP3)
798 ALU2(DP2)
799 ALU2(LINE)
800 ALU2(PLN)
801
802
803 ROUND(RNDZ)
804 ROUND(RNDE)
805
806
807 struct brw_instruction *brw_ADD(struct brw_compile *p,
808 struct brw_reg dest,
809 struct brw_reg src0,
810 struct brw_reg src1)
811 {
812 /* 6.2.2: add */
813 if (src0.type == BRW_REGISTER_TYPE_F ||
814 (src0.file == BRW_IMMEDIATE_VALUE &&
815 src0.type == BRW_REGISTER_TYPE_VF)) {
816 assert(src1.type != BRW_REGISTER_TYPE_UD);
817 assert(src1.type != BRW_REGISTER_TYPE_D);
818 }
819
820 if (src1.type == BRW_REGISTER_TYPE_F ||
821 (src1.file == BRW_IMMEDIATE_VALUE &&
822 src1.type == BRW_REGISTER_TYPE_VF)) {
823 assert(src0.type != BRW_REGISTER_TYPE_UD);
824 assert(src0.type != BRW_REGISTER_TYPE_D);
825 }
826
827 return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1);
828 }
829
830 struct brw_instruction *brw_MUL(struct brw_compile *p,
831 struct brw_reg dest,
832 struct brw_reg src0,
833 struct brw_reg src1)
834 {
835 /* 6.32.38: mul */
836 if (src0.type == BRW_REGISTER_TYPE_D ||
837 src0.type == BRW_REGISTER_TYPE_UD ||
838 src1.type == BRW_REGISTER_TYPE_D ||
839 src1.type == BRW_REGISTER_TYPE_UD) {
840 assert(dest.type != BRW_REGISTER_TYPE_F);
841 }
842
843 if (src0.type == BRW_REGISTER_TYPE_F ||
844 (src0.file == BRW_IMMEDIATE_VALUE &&
845 src0.type == BRW_REGISTER_TYPE_VF)) {
846 assert(src1.type != BRW_REGISTER_TYPE_UD);
847 assert(src1.type != BRW_REGISTER_TYPE_D);
848 }
849
850 if (src1.type == BRW_REGISTER_TYPE_F ||
851 (src1.file == BRW_IMMEDIATE_VALUE &&
852 src1.type == BRW_REGISTER_TYPE_VF)) {
853 assert(src0.type != BRW_REGISTER_TYPE_UD);
854 assert(src0.type != BRW_REGISTER_TYPE_D);
855 }
856
857 assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE ||
858 src0.nr != BRW_ARF_ACCUMULATOR);
859 assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE ||
860 src1.nr != BRW_ARF_ACCUMULATOR);
861
862 return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1);
863 }
864
865
866 void brw_NOP(struct brw_compile *p)
867 {
868 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);
869 brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
870 brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
871 brw_set_src1(p, insn, brw_imm_ud(0x0));
872 }
873
874
875
876
877
878 /***********************************************************************
879 * Comparisons, if/else/endif
880 */
881
882 struct brw_instruction *brw_JMPI(struct brw_compile *p,
883 struct brw_reg dest,
884 struct brw_reg src0,
885 struct brw_reg src1)
886 {
887 struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
888
889 insn->header.execution_size = 1;
890 insn->header.compression_control = BRW_COMPRESSION_NONE;
891 insn->header.mask_control = BRW_MASK_DISABLE;
892
893 p->current->header.predicate_control = BRW_PREDICATE_NONE;
894
895 return insn;
896 }
897
898 static void
899 push_if_stack(struct brw_compile *p, struct brw_instruction *inst)
900 {
901 p->if_stack[p->if_stack_depth] = inst;
902
903 p->if_stack_depth++;
904 if (p->if_stack_array_size <= p->if_stack_depth) {
905 p->if_stack_array_size *= 2;
906 p->if_stack = reralloc(p->mem_ctx, p->if_stack, struct brw_instruction *,
907 p->if_stack_array_size);
908 }
909 }
910
911 /* EU takes the value from the flag register and pushes it onto some
912 * sort of a stack (presumably merging with any flag value already on
913 * the stack). Within an if block, the flags at the top of the stack
914 * control execution on each channel of the unit, eg. on each of the
915 * 16 pixel values in our wm programs.
916 *
917 * When the matching 'else' instruction is reached (presumably by
918 * countdown of the instruction count patched in by our ELSE/ENDIF
919 * functions), the relevent flags are inverted.
920 *
921 * When the matching 'endif' instruction is reached, the flags are
922 * popped off. If the stack is now empty, normal execution resumes.
923 */
924 struct brw_instruction *
925 brw_IF(struct brw_compile *p, GLuint execute_size)
926 {
927 struct intel_context *intel = &p->brw->intel;
928 struct brw_instruction *insn;
929
930 insn = next_insn(p, BRW_OPCODE_IF);
931
932 /* Override the defaults for this instruction:
933 */
934 if (intel->gen < 6) {
935 brw_set_dest(p, insn, brw_ip_reg());
936 brw_set_src0(p, insn, brw_ip_reg());
937 brw_set_src1(p, insn, brw_imm_d(0x0));
938 } else if (intel->gen == 6) {
939 brw_set_dest(p, insn, brw_imm_w(0));
940 insn->bits1.branch_gen6.jump_count = 0;
941 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
942 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
943 } else {
944 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
945 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
946 brw_set_src1(p, insn, brw_imm_ud(0));
947 insn->bits3.break_cont.jip = 0;
948 insn->bits3.break_cont.uip = 0;
949 }
950
951 insn->header.execution_size = execute_size;
952 insn->header.compression_control = BRW_COMPRESSION_NONE;
953 insn->header.predicate_control = BRW_PREDICATE_NORMAL;
954 insn->header.mask_control = BRW_MASK_ENABLE;
955 if (!p->single_program_flow)
956 insn->header.thread_control = BRW_THREAD_SWITCH;
957
958 p->current->header.predicate_control = BRW_PREDICATE_NONE;
959
960 push_if_stack(p, insn);
961 return insn;
962 }
963
964 /* This function is only used for gen6-style IF instructions with an
965 * embedded comparison (conditional modifier). It is not used on gen7.
966 */
967 struct brw_instruction *
968 gen6_IF(struct brw_compile *p, uint32_t conditional,
969 struct brw_reg src0, struct brw_reg src1)
970 {
971 struct brw_instruction *insn;
972
973 insn = next_insn(p, BRW_OPCODE_IF);
974
975 brw_set_dest(p, insn, brw_imm_w(0));
976 if (p->compressed) {
977 insn->header.execution_size = BRW_EXECUTE_16;
978 } else {
979 insn->header.execution_size = BRW_EXECUTE_8;
980 }
981 insn->bits1.branch_gen6.jump_count = 0;
982 brw_set_src0(p, insn, src0);
983 brw_set_src1(p, insn, src1);
984
985 assert(insn->header.compression_control == BRW_COMPRESSION_NONE);
986 assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
987 insn->header.destreg__conditionalmod = conditional;
988
989 if (!p->single_program_flow)
990 insn->header.thread_control = BRW_THREAD_SWITCH;
991
992 push_if_stack(p, insn);
993 return insn;
994 }
995
996 /**
997 * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs.
998 */
999 static void
1000 convert_IF_ELSE_to_ADD(struct brw_compile *p,
1001 struct brw_instruction *if_inst,
1002 struct brw_instruction *else_inst)
1003 {
1004 /* The next instruction (where the ENDIF would be, if it existed) */
1005 struct brw_instruction *next_inst = &p->store[p->nr_insn];
1006
1007 assert(p->single_program_flow);
1008 assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
1009 assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
1010 assert(if_inst->header.execution_size == BRW_EXECUTE_1);
1011
1012 /* Convert IF to an ADD instruction that moves the instruction pointer
1013 * to the first instruction of the ELSE block. If there is no ELSE
1014 * block, point to where ENDIF would be. Reverse the predicate.
1015 *
1016 * There's no need to execute an ENDIF since we don't need to do any
1017 * stack operations, and if we're currently executing, we just want to
1018 * continue normally.
1019 */
1020 if_inst->header.opcode = BRW_OPCODE_ADD;
1021 if_inst->header.predicate_inverse = 1;
1022
1023 if (else_inst != NULL) {
1024 /* Convert ELSE to an ADD instruction that points where the ENDIF
1025 * would be.
1026 */
1027 else_inst->header.opcode = BRW_OPCODE_ADD;
1028
1029 if_inst->bits3.ud = (else_inst - if_inst + 1) * 16;
1030 else_inst->bits3.ud = (next_inst - else_inst) * 16;
1031 } else {
1032 if_inst->bits3.ud = (next_inst - if_inst) * 16;
1033 }
1034 }
1035
1036 /**
1037 * Patch IF and ELSE instructions with appropriate jump targets.
1038 */
1039 static void
1040 patch_IF_ELSE(struct brw_compile *p,
1041 struct brw_instruction *if_inst,
1042 struct brw_instruction *else_inst,
1043 struct brw_instruction *endif_inst)
1044 {
1045 struct intel_context *intel = &p->brw->intel;
1046
1047 assert(!p->single_program_flow);
1048 assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
1049 assert(endif_inst != NULL);
1050 assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
1051
1052 unsigned br = 1;
1053 /* Jump count is for 64bit data chunk each, so one 128bit instruction
1054 * requires 2 chunks.
1055 */
1056 if (intel->gen >= 5)
1057 br = 2;
1058
1059 assert(endif_inst->header.opcode == BRW_OPCODE_ENDIF);
1060 endif_inst->header.execution_size = if_inst->header.execution_size;
1061
1062 if (else_inst == NULL) {
1063 /* Patch IF -> ENDIF */
1064 if (intel->gen < 6) {
1065 /* Turn it into an IFF, which means no mask stack operations for
1066 * all-false and jumping past the ENDIF.
1067 */
1068 if_inst->header.opcode = BRW_OPCODE_IFF;
1069 if_inst->bits3.if_else.jump_count = br * (endif_inst - if_inst + 1);
1070 if_inst->bits3.if_else.pop_count = 0;
1071 if_inst->bits3.if_else.pad0 = 0;
1072 } else if (intel->gen == 6) {
1073 /* As of gen6, there is no IFF and IF must point to the ENDIF. */
1074 if_inst->bits1.branch_gen6.jump_count = br * (endif_inst - if_inst);
1075 } else {
1076 if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
1077 if_inst->bits3.break_cont.jip = br * (endif_inst - if_inst);
1078 }
1079 } else {
1080 else_inst->header.execution_size = if_inst->header.execution_size;
1081
1082 /* Patch IF -> ELSE */
1083 if (intel->gen < 6) {
1084 if_inst->bits3.if_else.jump_count = br * (else_inst - if_inst);
1085 if_inst->bits3.if_else.pop_count = 0;
1086 if_inst->bits3.if_else.pad0 = 0;
1087 } else if (intel->gen == 6) {
1088 if_inst->bits1.branch_gen6.jump_count = br * (else_inst - if_inst + 1);
1089 }
1090
1091 /* Patch ELSE -> ENDIF */
1092 if (intel->gen < 6) {
1093 /* BRW_OPCODE_ELSE pre-gen6 should point just past the
1094 * matching ENDIF.
1095 */
1096 else_inst->bits3.if_else.jump_count = br*(endif_inst - else_inst + 1);
1097 else_inst->bits3.if_else.pop_count = 1;
1098 else_inst->bits3.if_else.pad0 = 0;
1099 } else if (intel->gen == 6) {
1100 /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
1101 else_inst->bits1.branch_gen6.jump_count = br*(endif_inst - else_inst);
1102 } else {
1103 /* The IF instruction's JIP should point just past the ELSE */
1104 if_inst->bits3.break_cont.jip = br * (else_inst - if_inst + 1);
1105 /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
1106 if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
1107 else_inst->bits3.break_cont.jip = br * (endif_inst - else_inst);
1108 }
1109 }
1110 }
1111
1112 void
1113 brw_ELSE(struct brw_compile *p)
1114 {
1115 struct intel_context *intel = &p->brw->intel;
1116 struct brw_instruction *insn;
1117
1118 insn = next_insn(p, BRW_OPCODE_ELSE);
1119
1120 if (intel->gen < 6) {
1121 brw_set_dest(p, insn, brw_ip_reg());
1122 brw_set_src0(p, insn, brw_ip_reg());
1123 brw_set_src1(p, insn, brw_imm_d(0x0));
1124 } else if (intel->gen == 6) {
1125 brw_set_dest(p, insn, brw_imm_w(0));
1126 insn->bits1.branch_gen6.jump_count = 0;
1127 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1128 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1129 } else {
1130 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1131 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1132 brw_set_src1(p, insn, brw_imm_ud(0));
1133 insn->bits3.break_cont.jip = 0;
1134 insn->bits3.break_cont.uip = 0;
1135 }
1136
1137 insn->header.compression_control = BRW_COMPRESSION_NONE;
1138 insn->header.mask_control = BRW_MASK_ENABLE;
1139 if (!p->single_program_flow)
1140 insn->header.thread_control = BRW_THREAD_SWITCH;
1141
1142 push_if_stack(p, insn);
1143 }
1144
1145 void
1146 brw_ENDIF(struct brw_compile *p)
1147 {
1148 struct intel_context *intel = &p->brw->intel;
1149 struct brw_instruction *insn;
1150 struct brw_instruction *else_inst = NULL;
1151 struct brw_instruction *if_inst = NULL;
1152
1153 /* Pop the IF and (optional) ELSE instructions from the stack */
1154 p->if_stack_depth--;
1155 if (p->if_stack[p->if_stack_depth]->header.opcode == BRW_OPCODE_ELSE) {
1156 else_inst = p->if_stack[p->if_stack_depth];
1157 p->if_stack_depth--;
1158 }
1159 if_inst = p->if_stack[p->if_stack_depth];
1160
1161 if (p->single_program_flow) {
1162 /* ENDIF is useless; don't bother emitting it. */
1163 convert_IF_ELSE_to_ADD(p, if_inst, else_inst);
1164 return;
1165 }
1166
1167 insn = next_insn(p, BRW_OPCODE_ENDIF);
1168
1169 if (intel->gen < 6) {
1170 brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
1171 brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
1172 brw_set_src1(p, insn, brw_imm_d(0x0));
1173 } else if (intel->gen == 6) {
1174 brw_set_dest(p, insn, brw_imm_w(0));
1175 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1176 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1177 } else {
1178 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1179 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1180 brw_set_src1(p, insn, brw_imm_ud(0));
1181 }
1182
1183 insn->header.compression_control = BRW_COMPRESSION_NONE;
1184 insn->header.mask_control = BRW_MASK_ENABLE;
1185 insn->header.thread_control = BRW_THREAD_SWITCH;
1186
1187 /* Also pop item off the stack in the endif instruction: */
1188 if (intel->gen < 6) {
1189 insn->bits3.if_else.jump_count = 0;
1190 insn->bits3.if_else.pop_count = 1;
1191 insn->bits3.if_else.pad0 = 0;
1192 } else if (intel->gen == 6) {
1193 insn->bits1.branch_gen6.jump_count = 2;
1194 } else {
1195 insn->bits3.break_cont.jip = 2;
1196 }
1197 patch_IF_ELSE(p, if_inst, else_inst, insn);
1198 }
1199
1200 struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count)
1201 {
1202 struct intel_context *intel = &p->brw->intel;
1203 struct brw_instruction *insn;
1204
1205 insn = next_insn(p, BRW_OPCODE_BREAK);
1206 if (intel->gen >= 6) {
1207 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1208 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1209 brw_set_src1(p, insn, brw_imm_d(0x0));
1210 } else {
1211 brw_set_dest(p, insn, brw_ip_reg());
1212 brw_set_src0(p, insn, brw_ip_reg());
1213 brw_set_src1(p, insn, brw_imm_d(0x0));
1214 insn->bits3.if_else.pad0 = 0;
1215 insn->bits3.if_else.pop_count = pop_count;
1216 }
1217 insn->header.compression_control = BRW_COMPRESSION_NONE;
1218 insn->header.execution_size = BRW_EXECUTE_8;
1219
1220 return insn;
1221 }
1222
1223 struct brw_instruction *gen6_CONT(struct brw_compile *p,
1224 struct brw_instruction *do_insn)
1225 {
1226 struct brw_instruction *insn;
1227
1228 insn = next_insn(p, BRW_OPCODE_CONTINUE);
1229 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1230 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1231 brw_set_dest(p, insn, brw_ip_reg());
1232 brw_set_src0(p, insn, brw_ip_reg());
1233 brw_set_src1(p, insn, brw_imm_d(0x0));
1234
1235 insn->header.compression_control = BRW_COMPRESSION_NONE;
1236 insn->header.execution_size = BRW_EXECUTE_8;
1237 return insn;
1238 }
1239
1240 struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count)
1241 {
1242 struct brw_instruction *insn;
1243 insn = next_insn(p, BRW_OPCODE_CONTINUE);
1244 brw_set_dest(p, insn, brw_ip_reg());
1245 brw_set_src0(p, insn, brw_ip_reg());
1246 brw_set_src1(p, insn, brw_imm_d(0x0));
1247 insn->header.compression_control = BRW_COMPRESSION_NONE;
1248 insn->header.execution_size = BRW_EXECUTE_8;
1249 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1250 insn->bits3.if_else.pad0 = 0;
1251 insn->bits3.if_else.pop_count = pop_count;
1252 return insn;
1253 }
1254
1255 /* DO/WHILE loop:
1256 *
1257 * The DO/WHILE is just an unterminated loop -- break or continue are
1258 * used for control within the loop. We have a few ways they can be
1259 * done.
1260 *
1261 * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
1262 * jip and no DO instruction.
1263 *
1264 * For non-uniform control flow pre-gen6, there's a DO instruction to
1265 * push the mask, and a WHILE to jump back, and BREAK to get out and
1266 * pop the mask.
1267 *
1268 * For gen6, there's no more mask stack, so no need for DO. WHILE
1269 * just points back to the first instruction of the loop.
1270 */
1271 struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
1272 {
1273 struct intel_context *intel = &p->brw->intel;
1274
1275 if (intel->gen >= 6 || p->single_program_flow) {
1276 return &p->store[p->nr_insn];
1277 } else {
1278 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
1279
1280 /* Override the defaults for this instruction:
1281 */
1282 brw_set_dest(p, insn, brw_null_reg());
1283 brw_set_src0(p, insn, brw_null_reg());
1284 brw_set_src1(p, insn, brw_null_reg());
1285
1286 insn->header.compression_control = BRW_COMPRESSION_NONE;
1287 insn->header.execution_size = execute_size;
1288 insn->header.predicate_control = BRW_PREDICATE_NONE;
1289 /* insn->header.mask_control = BRW_MASK_ENABLE; */
1290 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1291
1292 return insn;
1293 }
1294 }
1295
1296
1297
1298 struct brw_instruction *brw_WHILE(struct brw_compile *p,
1299 struct brw_instruction *do_insn)
1300 {
1301 struct intel_context *intel = &p->brw->intel;
1302 struct brw_instruction *insn;
1303 GLuint br = 1;
1304
1305 if (intel->gen >= 5)
1306 br = 2;
1307
1308 if (intel->gen >= 7) {
1309 insn = next_insn(p, BRW_OPCODE_WHILE);
1310
1311 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1312 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1313 brw_set_src1(p, insn, brw_imm_ud(0));
1314 insn->bits3.break_cont.jip = br * (do_insn - insn);
1315
1316 insn->header.execution_size = BRW_EXECUTE_8;
1317 } else if (intel->gen == 6) {
1318 insn = next_insn(p, BRW_OPCODE_WHILE);
1319
1320 brw_set_dest(p, insn, brw_imm_w(0));
1321 insn->bits1.branch_gen6.jump_count = br * (do_insn - insn);
1322 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1323 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1324
1325 insn->header.execution_size = BRW_EXECUTE_8;
1326 } else {
1327 if (p->single_program_flow) {
1328 insn = next_insn(p, BRW_OPCODE_ADD);
1329
1330 brw_set_dest(p, insn, brw_ip_reg());
1331 brw_set_src0(p, insn, brw_ip_reg());
1332 brw_set_src1(p, insn, brw_imm_d((do_insn - insn) * 16));
1333 insn->header.execution_size = BRW_EXECUTE_1;
1334 } else {
1335 insn = next_insn(p, BRW_OPCODE_WHILE);
1336
1337 assert(do_insn->header.opcode == BRW_OPCODE_DO);
1338
1339 brw_set_dest(p, insn, brw_ip_reg());
1340 brw_set_src0(p, insn, brw_ip_reg());
1341 brw_set_src1(p, insn, brw_imm_d(0));
1342
1343 insn->header.execution_size = do_insn->header.execution_size;
1344 insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
1345 insn->bits3.if_else.pop_count = 0;
1346 insn->bits3.if_else.pad0 = 0;
1347 }
1348 }
1349 insn->header.compression_control = BRW_COMPRESSION_NONE;
1350 p->current->header.predicate_control = BRW_PREDICATE_NONE;
1351
1352 return insn;
1353 }
1354
1355
1356 /* FORWARD JUMPS:
1357 */
1358 void brw_land_fwd_jump(struct brw_compile *p,
1359 struct brw_instruction *jmp_insn)
1360 {
1361 struct intel_context *intel = &p->brw->intel;
1362 struct brw_instruction *landing = &p->store[p->nr_insn];
1363 GLuint jmpi = 1;
1364
1365 if (intel->gen >= 5)
1366 jmpi = 2;
1367
1368 assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
1369 assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE);
1370
1371 jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1);
1372 }
1373
1374
1375
1376 /* To integrate with the above, it makes sense that the comparison
1377 * instruction should populate the flag register. It might be simpler
1378 * just to use the flag reg for most WM tasks?
1379 */
1380 void brw_CMP(struct brw_compile *p,
1381 struct brw_reg dest,
1382 GLuint conditional,
1383 struct brw_reg src0,
1384 struct brw_reg src1)
1385 {
1386 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
1387
1388 insn->header.destreg__conditionalmod = conditional;
1389 brw_set_dest(p, insn, dest);
1390 brw_set_src0(p, insn, src0);
1391 brw_set_src1(p, insn, src1);
1392
1393 /* guess_execution_size(insn, src0); */
1394
1395
1396 /* Make it so that future instructions will use the computed flag
1397 * value until brw_set_predicate_control_flag_value() is called
1398 * again.
1399 */
1400 if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
1401 dest.nr == 0) {
1402 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
1403 p->flag_value = 0xff;
1404 }
1405 }
1406
1407 /* Issue 'wait' instruction for n1, host could program MMIO
1408 to wake up thread. */
1409 void brw_WAIT (struct brw_compile *p)
1410 {
1411 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_WAIT);
1412 struct brw_reg src = brw_notification_1_reg();
1413
1414 brw_set_dest(p, insn, src);
1415 brw_set_src0(p, insn, src);
1416 brw_set_src1(p, insn, brw_null_reg());
1417 insn->header.execution_size = 0; /* must */
1418 insn->header.predicate_control = 0;
1419 insn->header.compression_control = 0;
1420 }
1421
1422
1423 /***********************************************************************
1424 * Helpers for the various SEND message types:
1425 */
1426
1427 /** Extended math function, float[8].
1428 */
1429 void brw_math( struct brw_compile *p,
1430 struct brw_reg dest,
1431 GLuint function,
1432 GLuint saturate,
1433 GLuint msg_reg_nr,
1434 struct brw_reg src,
1435 GLuint data_type,
1436 GLuint precision )
1437 {
1438 struct intel_context *intel = &p->brw->intel;
1439
1440 if (intel->gen >= 6) {
1441 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
1442
1443 assert(dest.file == BRW_GENERAL_REGISTER_FILE);
1444 assert(src.file == BRW_GENERAL_REGISTER_FILE);
1445
1446 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1447 if (intel->gen == 6)
1448 assert(src.hstride == BRW_HORIZONTAL_STRIDE_1);
1449
1450 /* Source modifiers are ignored for extended math instructions on Gen6. */
1451 if (intel->gen == 6) {
1452 assert(!src.negate);
1453 assert(!src.abs);
1454 }
1455
1456 if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
1457 function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER ||
1458 function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1459 assert(src.type != BRW_REGISTER_TYPE_F);
1460 } else {
1461 assert(src.type == BRW_REGISTER_TYPE_F);
1462 }
1463
1464 /* Math is the same ISA format as other opcodes, except that CondModifier
1465 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1466 */
1467 insn->header.destreg__conditionalmod = function;
1468 insn->header.saturate = saturate;
1469
1470 brw_set_dest(p, insn, dest);
1471 brw_set_src0(p, insn, src);
1472 brw_set_src1(p, insn, brw_null_reg());
1473 } else {
1474 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1475
1476 /* Example code doesn't set predicate_control for send
1477 * instructions.
1478 */
1479 insn->header.predicate_control = 0;
1480 insn->header.destreg__conditionalmod = msg_reg_nr;
1481
1482 brw_set_dest(p, insn, dest);
1483 brw_set_src0(p, insn, src);
1484 brw_set_math_message(p,
1485 insn,
1486 function,
1487 src.type == BRW_REGISTER_TYPE_D,
1488 precision,
1489 saturate,
1490 data_type);
1491 }
1492 }
1493
1494 /** Extended math function, float[8].
1495 */
1496 void brw_math2(struct brw_compile *p,
1497 struct brw_reg dest,
1498 GLuint function,
1499 struct brw_reg src0,
1500 struct brw_reg src1)
1501 {
1502 struct intel_context *intel = &p->brw->intel;
1503 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
1504
1505 assert(intel->gen >= 6);
1506 (void) intel;
1507
1508
1509 assert(dest.file == BRW_GENERAL_REGISTER_FILE);
1510 assert(src0.file == BRW_GENERAL_REGISTER_FILE);
1511 assert(src1.file == BRW_GENERAL_REGISTER_FILE);
1512
1513 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1514 if (intel->gen == 6) {
1515 assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1);
1516 assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1);
1517 }
1518
1519 if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
1520 function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER ||
1521 function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1522 assert(src0.type != BRW_REGISTER_TYPE_F);
1523 assert(src1.type != BRW_REGISTER_TYPE_F);
1524 } else {
1525 assert(src0.type == BRW_REGISTER_TYPE_F);
1526 assert(src1.type == BRW_REGISTER_TYPE_F);
1527 }
1528
1529 /* Source modifiers are ignored for extended math instructions on Gen6. */
1530 if (intel->gen == 6) {
1531 assert(!src0.negate);
1532 assert(!src0.abs);
1533 assert(!src1.negate);
1534 assert(!src1.abs);
1535 }
1536
1537 /* Math is the same ISA format as other opcodes, except that CondModifier
1538 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1539 */
1540 insn->header.destreg__conditionalmod = function;
1541
1542 brw_set_dest(p, insn, dest);
1543 brw_set_src0(p, insn, src0);
1544 brw_set_src1(p, insn, src1);
1545 }
1546
1547 /**
1548 * Extended math function, float[16].
1549 * Use 2 send instructions.
1550 */
1551 void brw_math_16( struct brw_compile *p,
1552 struct brw_reg dest,
1553 GLuint function,
1554 GLuint saturate,
1555 GLuint msg_reg_nr,
1556 struct brw_reg src,
1557 GLuint precision )
1558 {
1559 struct intel_context *intel = &p->brw->intel;
1560 struct brw_instruction *insn;
1561
1562 if (intel->gen >= 6) {
1563 insn = next_insn(p, BRW_OPCODE_MATH);
1564
1565 /* Math is the same ISA format as other opcodes, except that CondModifier
1566 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1567 */
1568 insn->header.destreg__conditionalmod = function;
1569 insn->header.saturate = saturate;
1570
1571 /* Source modifiers are ignored for extended math instructions. */
1572 assert(!src.negate);
1573 assert(!src.abs);
1574
1575 brw_set_dest(p, insn, dest);
1576 brw_set_src0(p, insn, src);
1577 brw_set_src1(p, insn, brw_null_reg());
1578 return;
1579 }
1580
1581 /* First instruction:
1582 */
1583 brw_push_insn_state(p);
1584 brw_set_predicate_control_flag_value(p, 0xff);
1585 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1586
1587 insn = next_insn(p, BRW_OPCODE_SEND);
1588 insn->header.destreg__conditionalmod = msg_reg_nr;
1589
1590 brw_set_dest(p, insn, dest);
1591 brw_set_src0(p, insn, src);
1592 brw_set_math_message(p,
1593 insn,
1594 function,
1595 BRW_MATH_INTEGER_UNSIGNED,
1596 precision,
1597 saturate,
1598 BRW_MATH_DATA_VECTOR);
1599
1600 /* Second instruction:
1601 */
1602 insn = next_insn(p, BRW_OPCODE_SEND);
1603 insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
1604 insn->header.destreg__conditionalmod = msg_reg_nr+1;
1605
1606 brw_set_dest(p, insn, offset(dest,1));
1607 brw_set_src0(p, insn, src);
1608 brw_set_math_message(p,
1609 insn,
1610 function,
1611 BRW_MATH_INTEGER_UNSIGNED,
1612 precision,
1613 saturate,
1614 BRW_MATH_DATA_VECTOR);
1615
1616 brw_pop_insn_state(p);
1617 }
1618
1619
1620 /**
1621 * Write a block of OWORDs (half a GRF each) from the scratch buffer,
1622 * using a constant offset per channel.
1623 *
1624 * The offset must be aligned to oword size (16 bytes). Used for
1625 * register spilling.
1626 */
1627 void brw_oword_block_write_scratch(struct brw_compile *p,
1628 struct brw_reg mrf,
1629 int num_regs,
1630 GLuint offset)
1631 {
1632 struct intel_context *intel = &p->brw->intel;
1633 uint32_t msg_control, msg_type;
1634 int mlen;
1635
1636 if (intel->gen >= 6)
1637 offset /= 16;
1638
1639 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1640
1641 if (num_regs == 1) {
1642 msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
1643 mlen = 2;
1644 } else {
1645 msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
1646 mlen = 3;
1647 }
1648
1649 /* Set up the message header. This is g0, with g0.2 filled with
1650 * the offset. We don't want to leave our offset around in g0 or
1651 * it'll screw up texture samples, so set it up inside the message
1652 * reg.
1653 */
1654 {
1655 brw_push_insn_state(p);
1656 brw_set_mask_control(p, BRW_MASK_DISABLE);
1657 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1658
1659 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
1660
1661 /* set message header global offset field (reg 0, element 2) */
1662 brw_MOV(p,
1663 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
1664 mrf.nr,
1665 2), BRW_REGISTER_TYPE_UD),
1666 brw_imm_ud(offset));
1667
1668 brw_pop_insn_state(p);
1669 }
1670
1671 {
1672 struct brw_reg dest;
1673 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1674 int send_commit_msg;
1675 struct brw_reg src_header = retype(brw_vec8_grf(0, 0),
1676 BRW_REGISTER_TYPE_UW);
1677
1678 if (insn->header.compression_control != BRW_COMPRESSION_NONE) {
1679 insn->header.compression_control = BRW_COMPRESSION_NONE;
1680 src_header = vec16(src_header);
1681 }
1682 assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
1683 insn->header.destreg__conditionalmod = mrf.nr;
1684
1685 /* Until gen6, writes followed by reads from the same location
1686 * are not guaranteed to be ordered unless write_commit is set.
1687 * If set, then a no-op write is issued to the destination
1688 * register to set a dependency, and a read from the destination
1689 * can be used to ensure the ordering.
1690 *
1691 * For gen6, only writes between different threads need ordering
1692 * protection. Our use of DP writes is all about register
1693 * spilling within a thread.
1694 */
1695 if (intel->gen >= 6) {
1696 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
1697 send_commit_msg = 0;
1698 } else {
1699 dest = src_header;
1700 send_commit_msg = 1;
1701 }
1702
1703 brw_set_dest(p, insn, dest);
1704 if (intel->gen >= 6) {
1705 brw_set_src0(p, insn, mrf);
1706 } else {
1707 brw_set_src0(p, insn, brw_null_reg());
1708 }
1709
1710 if (intel->gen >= 6)
1711 msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
1712 else
1713 msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
1714
1715 brw_set_dp_write_message(p,
1716 insn,
1717 255, /* binding table index (255=stateless) */
1718 msg_control,
1719 msg_type,
1720 mlen,
1721 true, /* header_present */
1722 0, /* not a render target */
1723 send_commit_msg, /* response_length */
1724 0, /* eot */
1725 send_commit_msg);
1726 }
1727 }
1728
1729
1730 /**
1731 * Read a block of owords (half a GRF each) from the scratch buffer
1732 * using a constant index per channel.
1733 *
1734 * Offset must be aligned to oword size (16 bytes). Used for register
1735 * spilling.
1736 */
1737 void
1738 brw_oword_block_read_scratch(struct brw_compile *p,
1739 struct brw_reg dest,
1740 struct brw_reg mrf,
1741 int num_regs,
1742 GLuint offset)
1743 {
1744 struct intel_context *intel = &p->brw->intel;
1745 uint32_t msg_control;
1746 int rlen;
1747
1748 if (intel->gen >= 6)
1749 offset /= 16;
1750
1751 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1752 dest = retype(dest, BRW_REGISTER_TYPE_UW);
1753
1754 if (num_regs == 1) {
1755 msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
1756 rlen = 1;
1757 } else {
1758 msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
1759 rlen = 2;
1760 }
1761
1762 {
1763 brw_push_insn_state(p);
1764 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1765 brw_set_mask_control(p, BRW_MASK_DISABLE);
1766
1767 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
1768
1769 /* set message header global offset field (reg 0, element 2) */
1770 brw_MOV(p,
1771 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
1772 mrf.nr,
1773 2), BRW_REGISTER_TYPE_UD),
1774 brw_imm_ud(offset));
1775
1776 brw_pop_insn_state(p);
1777 }
1778
1779 {
1780 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1781
1782 assert(insn->header.predicate_control == 0);
1783 insn->header.compression_control = BRW_COMPRESSION_NONE;
1784 insn->header.destreg__conditionalmod = mrf.nr;
1785
1786 brw_set_dest(p, insn, dest); /* UW? */
1787 if (intel->gen >= 6) {
1788 brw_set_src0(p, insn, mrf);
1789 } else {
1790 brw_set_src0(p, insn, brw_null_reg());
1791 }
1792
1793 brw_set_dp_read_message(p,
1794 insn,
1795 255, /* binding table index (255=stateless) */
1796 msg_control,
1797 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1798 BRW_DATAPORT_READ_TARGET_RENDER_CACHE,
1799 1, /* msg_length */
1800 rlen);
1801 }
1802 }
1803
1804 /**
1805 * Read a float[4] vector from the data port Data Cache (const buffer).
1806 * Location (in buffer) should be a multiple of 16.
1807 * Used for fetching shader constants.
1808 */
1809 void brw_oword_block_read(struct brw_compile *p,
1810 struct brw_reg dest,
1811 struct brw_reg mrf,
1812 uint32_t offset,
1813 uint32_t bind_table_index)
1814 {
1815 struct intel_context *intel = &p->brw->intel;
1816
1817 /* On newer hardware, offset is in units of owords. */
1818 if (intel->gen >= 6)
1819 offset /= 16;
1820
1821 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1822
1823 brw_push_insn_state(p);
1824 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1825 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1826 brw_set_mask_control(p, BRW_MASK_DISABLE);
1827
1828 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
1829
1830 /* set message header global offset field (reg 0, element 2) */
1831 brw_MOV(p,
1832 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
1833 mrf.nr,
1834 2), BRW_REGISTER_TYPE_UD),
1835 brw_imm_ud(offset));
1836
1837 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1838 insn->header.destreg__conditionalmod = mrf.nr;
1839
1840 /* cast dest to a uword[8] vector */
1841 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
1842
1843 brw_set_dest(p, insn, dest);
1844 if (intel->gen >= 6) {
1845 brw_set_src0(p, insn, mrf);
1846 } else {
1847 brw_set_src0(p, insn, brw_null_reg());
1848 }
1849
1850 brw_set_dp_read_message(p,
1851 insn,
1852 bind_table_index,
1853 BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW,
1854 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ,
1855 BRW_DATAPORT_READ_TARGET_DATA_CACHE,
1856 1, /* msg_length */
1857 1); /* response_length (1 reg, 2 owords!) */
1858
1859 brw_pop_insn_state(p);
1860 }
1861
1862 /**
1863 * Read a set of dwords from the data port Data Cache (const buffer).
1864 *
1865 * Location (in buffer) appears as UD offsets in the register after
1866 * the provided mrf header reg.
1867 */
1868 void brw_dword_scattered_read(struct brw_compile *p,
1869 struct brw_reg dest,
1870 struct brw_reg mrf,
1871 uint32_t bind_table_index)
1872 {
1873 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1874
1875 brw_push_insn_state(p);
1876 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1877 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1878 brw_set_mask_control(p, BRW_MASK_DISABLE);
1879 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
1880 brw_pop_insn_state(p);
1881
1882 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1883 insn->header.destreg__conditionalmod = mrf.nr;
1884
1885 /* cast dest to a uword[8] vector */
1886 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
1887
1888 brw_set_dest(p, insn, dest);
1889 brw_set_src0(p, insn, brw_null_reg());
1890
1891 brw_set_dp_read_message(p,
1892 insn,
1893 bind_table_index,
1894 BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS,
1895 BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ,
1896 BRW_DATAPORT_READ_TARGET_DATA_CACHE,
1897 2, /* msg_length */
1898 1); /* response_length */
1899 }
1900
1901
1902
1903 /**
1904 * Read float[4] constant(s) from VS constant buffer.
1905 * For relative addressing, two float[4] constants will be read into 'dest'.
1906 * Otherwise, one float[4] constant will be read into the lower half of 'dest'.
1907 */
1908 void brw_dp_READ_4_vs(struct brw_compile *p,
1909 struct brw_reg dest,
1910 GLuint location,
1911 GLuint bind_table_index)
1912 {
1913 struct intel_context *intel = &p->brw->intel;
1914 struct brw_instruction *insn;
1915 GLuint msg_reg_nr = 1;
1916
1917 if (intel->gen >= 6)
1918 location /= 16;
1919
1920 /* Setup MRF[1] with location/offset into const buffer */
1921 brw_push_insn_state(p);
1922 brw_set_access_mode(p, BRW_ALIGN_1);
1923 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1924 brw_set_mask_control(p, BRW_MASK_DISABLE);
1925 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1926 brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 2),
1927 BRW_REGISTER_TYPE_UD),
1928 brw_imm_ud(location));
1929 brw_pop_insn_state(p);
1930
1931 insn = next_insn(p, BRW_OPCODE_SEND);
1932
1933 insn->header.predicate_control = BRW_PREDICATE_NONE;
1934 insn->header.compression_control = BRW_COMPRESSION_NONE;
1935 insn->header.destreg__conditionalmod = msg_reg_nr;
1936 insn->header.mask_control = BRW_MASK_DISABLE;
1937
1938 brw_set_dest(p, insn, dest);
1939 if (intel->gen >= 6) {
1940 brw_set_src0(p, insn, brw_message_reg(msg_reg_nr));
1941 } else {
1942 brw_set_src0(p, insn, brw_null_reg());
1943 }
1944
1945 brw_set_dp_read_message(p,
1946 insn,
1947 bind_table_index,
1948 0,
1949 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1950 BRW_DATAPORT_READ_TARGET_DATA_CACHE,
1951 1, /* msg_length */
1952 1); /* response_length (1 Oword) */
1953 }
1954
1955 /**
1956 * Read a float[4] constant per vertex from VS constant buffer, with
1957 * relative addressing.
1958 */
1959 void brw_dp_READ_4_vs_relative(struct brw_compile *p,
1960 struct brw_reg dest,
1961 struct brw_reg addr_reg,
1962 GLuint offset,
1963 GLuint bind_table_index)
1964 {
1965 struct intel_context *intel = &p->brw->intel;
1966 struct brw_reg src = brw_vec8_grf(0, 0);
1967 int msg_type;
1968
1969 /* Setup MRF[1] with offset into const buffer */
1970 brw_push_insn_state(p);
1971 brw_set_access_mode(p, BRW_ALIGN_1);
1972 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1973 brw_set_mask_control(p, BRW_MASK_DISABLE);
1974 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1975
1976 /* M1.0 is block offset 0, M1.4 is block offset 1, all other
1977 * fields ignored.
1978 */
1979 brw_ADD(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_D),
1980 addr_reg, brw_imm_d(offset));
1981 brw_pop_insn_state(p);
1982
1983 gen6_resolve_implied_move(p, &src, 0);
1984 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1985
1986 insn->header.predicate_control = BRW_PREDICATE_NONE;
1987 insn->header.compression_control = BRW_COMPRESSION_NONE;
1988 insn->header.destreg__conditionalmod = 0;
1989 insn->header.mask_control = BRW_MASK_DISABLE;
1990
1991 brw_set_dest(p, insn, dest);
1992 brw_set_src0(p, insn, src);
1993
1994 if (intel->gen >= 6)
1995 msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
1996 else if (intel->gen == 5 || intel->is_g4x)
1997 msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
1998 else
1999 msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
2000
2001 brw_set_dp_read_message(p,
2002 insn,
2003 bind_table_index,
2004 BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
2005 msg_type,
2006 BRW_DATAPORT_READ_TARGET_DATA_CACHE,
2007 2, /* msg_length */
2008 1); /* response_length */
2009 }
2010
2011
2012
2013 void brw_fb_WRITE(struct brw_compile *p,
2014 int dispatch_width,
2015 GLuint msg_reg_nr,
2016 struct brw_reg src0,
2017 GLuint binding_table_index,
2018 GLuint msg_length,
2019 GLuint response_length,
2020 bool eot,
2021 bool header_present)
2022 {
2023 struct intel_context *intel = &p->brw->intel;
2024 struct brw_instruction *insn;
2025 GLuint msg_control, msg_type;
2026 struct brw_reg dest;
2027
2028 if (dispatch_width == 16)
2029 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
2030 else
2031 dest = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW);
2032
2033 if (intel->gen >= 6 && binding_table_index == 0) {
2034 insn = next_insn(p, BRW_OPCODE_SENDC);
2035 } else {
2036 insn = next_insn(p, BRW_OPCODE_SEND);
2037 }
2038 /* The execution mask is ignored for render target writes. */
2039 insn->header.predicate_control = 0;
2040 insn->header.compression_control = BRW_COMPRESSION_NONE;
2041
2042 if (intel->gen >= 6) {
2043 /* headerless version, just submit color payload */
2044 src0 = brw_message_reg(msg_reg_nr);
2045
2046 msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
2047 } else {
2048 insn->header.destreg__conditionalmod = msg_reg_nr;
2049
2050 msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
2051 }
2052
2053 if (dispatch_width == 16)
2054 msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
2055 else
2056 msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01;
2057
2058 brw_set_dest(p, insn, dest);
2059 brw_set_src0(p, insn, src0);
2060 brw_set_dp_write_message(p,
2061 insn,
2062 binding_table_index,
2063 msg_control,
2064 msg_type,
2065 msg_length,
2066 header_present,
2067 1, /* last render target write */
2068 response_length,
2069 eot,
2070 0 /* send_commit_msg */);
2071 }
2072
2073
2074 /**
2075 * Texture sample instruction.
2076 * Note: the msg_type plus msg_length values determine exactly what kind
2077 * of sampling operation is performed. See volume 4, page 161 of docs.
2078 */
2079 void brw_SAMPLE(struct brw_compile *p,
2080 struct brw_reg dest,
2081 GLuint msg_reg_nr,
2082 struct brw_reg src0,
2083 GLuint binding_table_index,
2084 GLuint sampler,
2085 GLuint writemask,
2086 GLuint msg_type,
2087 GLuint response_length,
2088 GLuint msg_length,
2089 GLuint header_present,
2090 GLuint simd_mode,
2091 GLuint return_format)
2092 {
2093 struct intel_context *intel = &p->brw->intel;
2094 bool need_stall = 0;
2095
2096 if (writemask == 0) {
2097 /*printf("%s: zero writemask??\n", __FUNCTION__); */
2098 return;
2099 }
2100
2101 /* Hardware doesn't do destination dependency checking on send
2102 * instructions properly. Add a workaround which generates the
2103 * dependency by other means. In practice it seems like this bug
2104 * only crops up for texture samples, and only where registers are
2105 * written by the send and then written again later without being
2106 * read in between. Luckily for us, we already track that
2107 * information and use it to modify the writemask for the
2108 * instruction, so that is a guide for whether a workaround is
2109 * needed.
2110 */
2111 if (writemask != WRITEMASK_XYZW) {
2112 GLuint dst_offset = 0;
2113 GLuint i, newmask = 0, len = 0;
2114
2115 for (i = 0; i < 4; i++) {
2116 if (writemask & (1<<i))
2117 break;
2118 dst_offset += 2;
2119 }
2120 for (; i < 4; i++) {
2121 if (!(writemask & (1<<i)))
2122 break;
2123 newmask |= 1<<i;
2124 len++;
2125 }
2126
2127 if (newmask != writemask) {
2128 need_stall = 1;
2129 /* printf("need stall %x %x\n", newmask , writemask); */
2130 }
2131 else {
2132 bool dispatch_16 = false;
2133
2134 struct brw_reg m1 = brw_message_reg(msg_reg_nr);
2135
2136 guess_execution_size(p, p->current, dest);
2137 if (p->current->header.execution_size == BRW_EXECUTE_16)
2138 dispatch_16 = true;
2139
2140 newmask = ~newmask & WRITEMASK_XYZW;
2141
2142 brw_push_insn_state(p);
2143
2144 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
2145 brw_set_mask_control(p, BRW_MASK_DISABLE);
2146
2147 brw_MOV(p, retype(m1, BRW_REGISTER_TYPE_UD),
2148 retype(brw_vec8_grf(0,0), BRW_REGISTER_TYPE_UD));
2149 brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12));
2150
2151 brw_pop_insn_state(p);
2152
2153 src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
2154 dest = offset(dest, dst_offset);
2155
2156 /* For 16-wide dispatch, masked channels are skipped in the
2157 * response. For 8-wide, masked channels still take up slots,
2158 * and are just not written to.
2159 */
2160 if (dispatch_16)
2161 response_length = len * 2;
2162 }
2163 }
2164
2165 {
2166 struct brw_instruction *insn;
2167
2168 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2169
2170 insn = next_insn(p, BRW_OPCODE_SEND);
2171 insn->header.predicate_control = 0; /* XXX */
2172 insn->header.compression_control = BRW_COMPRESSION_NONE;
2173 if (intel->gen < 6)
2174 insn->header.destreg__conditionalmod = msg_reg_nr;
2175
2176 brw_set_dest(p, insn, dest);
2177 brw_set_src0(p, insn, src0);
2178 brw_set_sampler_message(p, insn,
2179 binding_table_index,
2180 sampler,
2181 msg_type,
2182 response_length,
2183 msg_length,
2184 header_present,
2185 simd_mode,
2186 return_format);
2187 }
2188
2189 if (need_stall) {
2190 struct brw_reg reg = vec8(offset(dest, response_length-1));
2191
2192 /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 }
2193 */
2194 brw_push_insn_state(p);
2195 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
2196 brw_MOV(p, retype(reg, BRW_REGISTER_TYPE_UD),
2197 retype(reg, BRW_REGISTER_TYPE_UD));
2198 brw_pop_insn_state(p);
2199 }
2200
2201 }
2202
2203 /* All these variables are pretty confusing - we might be better off
2204 * using bitmasks and macros for this, in the old style. Or perhaps
2205 * just having the caller instantiate the fields in dword3 itself.
2206 */
2207 void brw_urb_WRITE(struct brw_compile *p,
2208 struct brw_reg dest,
2209 GLuint msg_reg_nr,
2210 struct brw_reg src0,
2211 bool allocate,
2212 bool used,
2213 GLuint msg_length,
2214 GLuint response_length,
2215 bool eot,
2216 bool writes_complete,
2217 GLuint offset,
2218 GLuint swizzle)
2219 {
2220 struct intel_context *intel = &p->brw->intel;
2221 struct brw_instruction *insn;
2222
2223 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2224
2225 if (intel->gen == 7) {
2226 /* Enable Channel Masks in the URB_WRITE_HWORD message header */
2227 brw_push_insn_state(p);
2228 brw_set_access_mode(p, BRW_ALIGN_1);
2229 brw_OR(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 5),
2230 BRW_REGISTER_TYPE_UD),
2231 retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD),
2232 brw_imm_ud(0xff00));
2233 brw_pop_insn_state(p);
2234 }
2235
2236 insn = next_insn(p, BRW_OPCODE_SEND);
2237
2238 assert(msg_length < BRW_MAX_MRF);
2239
2240 brw_set_dest(p, insn, dest);
2241 brw_set_src0(p, insn, src0);
2242 brw_set_src1(p, insn, brw_imm_d(0));
2243
2244 if (intel->gen < 6)
2245 insn->header.destreg__conditionalmod = msg_reg_nr;
2246
2247 brw_set_urb_message(p,
2248 insn,
2249 allocate,
2250 used,
2251 msg_length,
2252 response_length,
2253 eot,
2254 writes_complete,
2255 offset,
2256 swizzle);
2257 }
2258
2259 static int
2260 brw_find_next_block_end(struct brw_compile *p, int start)
2261 {
2262 int ip;
2263
2264 for (ip = start + 1; ip < p->nr_insn; ip++) {
2265 struct brw_instruction *insn = &p->store[ip];
2266
2267 switch (insn->header.opcode) {
2268 case BRW_OPCODE_ENDIF:
2269 case BRW_OPCODE_ELSE:
2270 case BRW_OPCODE_WHILE:
2271 return ip;
2272 }
2273 }
2274 assert(!"not reached");
2275 return start + 1;
2276 }
2277
2278 /* There is no DO instruction on gen6, so to find the end of the loop
2279 * we have to see if the loop is jumping back before our start
2280 * instruction.
2281 */
2282 static int
2283 brw_find_loop_end(struct brw_compile *p, int start)
2284 {
2285 struct intel_context *intel = &p->brw->intel;
2286 int ip;
2287 int br = 2;
2288
2289 for (ip = start + 1; ip < p->nr_insn; ip++) {
2290 struct brw_instruction *insn = &p->store[ip];
2291
2292 if (insn->header.opcode == BRW_OPCODE_WHILE) {
2293 int jip = intel->gen == 6 ? insn->bits1.branch_gen6.jump_count
2294 : insn->bits3.break_cont.jip;
2295 if (ip + jip / br <= start)
2296 return ip;
2297 }
2298 }
2299 assert(!"not reached");
2300 return start + 1;
2301 }
2302
2303 /* After program generation, go back and update the UIP and JIP of
2304 * BREAK and CONT instructions to their correct locations.
2305 */
2306 void
2307 brw_set_uip_jip(struct brw_compile *p)
2308 {
2309 struct intel_context *intel = &p->brw->intel;
2310 int ip;
2311 int br = 2;
2312
2313 if (intel->gen < 6)
2314 return;
2315
2316 for (ip = 0; ip < p->nr_insn; ip++) {
2317 struct brw_instruction *insn = &p->store[ip];
2318
2319 switch (insn->header.opcode) {
2320 case BRW_OPCODE_BREAK:
2321 insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip);
2322 /* Gen7 UIP points to WHILE; Gen6 points just after it */
2323 insn->bits3.break_cont.uip =
2324 br * (brw_find_loop_end(p, ip) - ip + (intel->gen == 6 ? 1 : 0));
2325 break;
2326 case BRW_OPCODE_CONTINUE:
2327 insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip);
2328 insn->bits3.break_cont.uip = br * (brw_find_loop_end(p, ip) - ip);
2329
2330 assert(insn->bits3.break_cont.uip != 0);
2331 assert(insn->bits3.break_cont.jip != 0);
2332 break;
2333 }
2334 }
2335 }
2336
2337 void brw_ff_sync(struct brw_compile *p,
2338 struct brw_reg dest,
2339 GLuint msg_reg_nr,
2340 struct brw_reg src0,
2341 bool allocate,
2342 GLuint response_length,
2343 bool eot)
2344 {
2345 struct intel_context *intel = &p->brw->intel;
2346 struct brw_instruction *insn;
2347
2348 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2349
2350 insn = next_insn(p, BRW_OPCODE_SEND);
2351 brw_set_dest(p, insn, dest);
2352 brw_set_src0(p, insn, src0);
2353 brw_set_src1(p, insn, brw_imm_d(0));
2354
2355 if (intel->gen < 6)
2356 insn->header.destreg__conditionalmod = msg_reg_nr;
2357
2358 brw_set_ff_sync_message(p,
2359 insn,
2360 allocate,
2361 response_length,
2362 eot);
2363 }