i965: Fix GS hang on Sandybridge
[mesa.git] / src / mesa / drivers / dri / i965 / brw_eu_emit.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "brw_context.h"
34 #include "brw_defines.h"
35 #include "brw_eu.h"
36
37
38
39
40 /***********************************************************************
41 * Internal helper for constructing instructions
42 */
43
44 static void guess_execution_size( struct brw_instruction *insn,
45 struct brw_reg reg )
46 {
47 if (reg.width == BRW_WIDTH_8 &&
48 insn->header.compression_control == BRW_COMPRESSION_COMPRESSED)
49 insn->header.execution_size = BRW_EXECUTE_16;
50 else
51 insn->header.execution_size = reg.width; /* note - definitions are compatible */
52 }
53
54
55 static void brw_set_dest( struct brw_instruction *insn,
56 struct brw_reg dest )
57 {
58 if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
59 dest.file != BRW_MESSAGE_REGISTER_FILE)
60 assert(dest.nr < 128);
61
62 insn->bits1.da1.dest_reg_file = dest.file;
63 insn->bits1.da1.dest_reg_type = dest.type;
64 insn->bits1.da1.dest_address_mode = dest.address_mode;
65
66 if (dest.address_mode == BRW_ADDRESS_DIRECT) {
67 insn->bits1.da1.dest_reg_nr = dest.nr;
68
69 if (insn->header.access_mode == BRW_ALIGN_1) {
70 insn->bits1.da1.dest_subreg_nr = dest.subnr;
71 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
72 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
73 insn->bits1.da1.dest_horiz_stride = dest.hstride;
74 }
75 else {
76 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
77 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
78 /* even ignored in da16, still need to set as '01' */
79 insn->bits1.da16.dest_horiz_stride = 1;
80 }
81 }
82 else {
83 insn->bits1.ia1.dest_subreg_nr = dest.subnr;
84
85 /* These are different sizes in align1 vs align16:
86 */
87 if (insn->header.access_mode == BRW_ALIGN_1) {
88 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
89 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
90 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
91 insn->bits1.ia1.dest_horiz_stride = dest.hstride;
92 }
93 else {
94 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
95 /* even ignored in da16, still need to set as '01' */
96 insn->bits1.ia16.dest_horiz_stride = 1;
97 }
98 }
99
100 /* NEW: Set the execution size based on dest.width and
101 * insn->compression_control:
102 */
103 guess_execution_size(insn, dest);
104 }
105
106 extern int reg_type_size[];
107
108 static void
109 validate_reg(struct brw_instruction *insn, struct brw_reg reg)
110 {
111 int hstride_for_reg[] = {0, 1, 2, 4};
112 int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
113 int width_for_reg[] = {1, 2, 4, 8, 16};
114 int execsize_for_reg[] = {1, 2, 4, 8, 16};
115 int width, hstride, vstride, execsize;
116
117 if (reg.file == BRW_IMMEDIATE_VALUE) {
118 /* 3.3.6: Region Parameters. Restriction: Immediate vectors
119 * mean the destination has to be 128-bit aligned and the
120 * destination horiz stride has to be a word.
121 */
122 if (reg.type == BRW_REGISTER_TYPE_V) {
123 assert(hstride_for_reg[insn->bits1.da1.dest_horiz_stride] *
124 reg_type_size[insn->bits1.da1.dest_reg_type] == 2);
125 }
126
127 return;
128 }
129
130 if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE &&
131 reg.file == BRW_ARF_NULL)
132 return;
133
134 assert(reg.hstride >= 0 && reg.hstride < Elements(hstride_for_reg));
135 hstride = hstride_for_reg[reg.hstride];
136
137 if (reg.vstride == 0xf) {
138 vstride = -1;
139 } else {
140 assert(reg.vstride >= 0 && reg.vstride < Elements(vstride_for_reg));
141 vstride = vstride_for_reg[reg.vstride];
142 }
143
144 assert(reg.width >= 0 && reg.width < Elements(width_for_reg));
145 width = width_for_reg[reg.width];
146
147 assert(insn->header.execution_size >= 0 &&
148 insn->header.execution_size < Elements(execsize_for_reg));
149 execsize = execsize_for_reg[insn->header.execution_size];
150
151 /* Restrictions from 3.3.10: Register Region Restrictions. */
152 /* 3. */
153 assert(execsize >= width);
154
155 /* 4. */
156 if (execsize == width && hstride != 0) {
157 assert(vstride == -1 || vstride == width * hstride);
158 }
159
160 /* 5. */
161 if (execsize == width && hstride == 0) {
162 /* no restriction on vstride. */
163 }
164
165 /* 6. */
166 if (width == 1) {
167 assert(hstride == 0);
168 }
169
170 /* 7. */
171 if (execsize == 1 && width == 1) {
172 assert(hstride == 0);
173 assert(vstride == 0);
174 }
175
176 /* 8. */
177 if (vstride == 0 && hstride == 0) {
178 assert(width == 1);
179 }
180
181 /* 10. Check destination issues. */
182 }
183
184 static void brw_set_src0( struct brw_instruction *insn,
185 struct brw_reg reg )
186 {
187 if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
188 assert(reg.nr < 128);
189
190 validate_reg(insn, reg);
191
192 insn->bits1.da1.src0_reg_file = reg.file;
193 insn->bits1.da1.src0_reg_type = reg.type;
194 insn->bits2.da1.src0_abs = reg.abs;
195 insn->bits2.da1.src0_negate = reg.negate;
196 insn->bits2.da1.src0_address_mode = reg.address_mode;
197
198 if (reg.file == BRW_IMMEDIATE_VALUE) {
199 insn->bits3.ud = reg.dw1.ud;
200
201 /* Required to set some fields in src1 as well:
202 */
203 insn->bits1.da1.src1_reg_file = 0; /* arf */
204 insn->bits1.da1.src1_reg_type = reg.type;
205 }
206 else
207 {
208 if (reg.address_mode == BRW_ADDRESS_DIRECT) {
209 if (insn->header.access_mode == BRW_ALIGN_1) {
210 insn->bits2.da1.src0_subreg_nr = reg.subnr;
211 insn->bits2.da1.src0_reg_nr = reg.nr;
212 }
213 else {
214 insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
215 insn->bits2.da16.src0_reg_nr = reg.nr;
216 }
217 }
218 else {
219 insn->bits2.ia1.src0_subreg_nr = reg.subnr;
220
221 if (insn->header.access_mode == BRW_ALIGN_1) {
222 insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
223 }
224 else {
225 insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
226 }
227 }
228
229 if (insn->header.access_mode == BRW_ALIGN_1) {
230 if (reg.width == BRW_WIDTH_1 &&
231 insn->header.execution_size == BRW_EXECUTE_1) {
232 insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
233 insn->bits2.da1.src0_width = BRW_WIDTH_1;
234 insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
235 }
236 else {
237 insn->bits2.da1.src0_horiz_stride = reg.hstride;
238 insn->bits2.da1.src0_width = reg.width;
239 insn->bits2.da1.src0_vert_stride = reg.vstride;
240 }
241 }
242 else {
243 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
244 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
245 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
246 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
247
248 /* This is an oddity of the fact we're using the same
249 * descriptions for registers in align_16 as align_1:
250 */
251 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
252 insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
253 else
254 insn->bits2.da16.src0_vert_stride = reg.vstride;
255 }
256 }
257 }
258
259
260 void brw_set_src1( struct brw_instruction *insn,
261 struct brw_reg reg )
262 {
263 assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
264
265 assert(reg.nr < 128);
266
267 validate_reg(insn, reg);
268
269 insn->bits1.da1.src1_reg_file = reg.file;
270 insn->bits1.da1.src1_reg_type = reg.type;
271 insn->bits3.da1.src1_abs = reg.abs;
272 insn->bits3.da1.src1_negate = reg.negate;
273
274 /* Only src1 can be immediate in two-argument instructions.
275 */
276 assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
277
278 if (reg.file == BRW_IMMEDIATE_VALUE) {
279 insn->bits3.ud = reg.dw1.ud;
280 }
281 else {
282 /* This is a hardware restriction, which may or may not be lifted
283 * in the future:
284 */
285 assert (reg.address_mode == BRW_ADDRESS_DIRECT);
286 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
287
288 if (insn->header.access_mode == BRW_ALIGN_1) {
289 insn->bits3.da1.src1_subreg_nr = reg.subnr;
290 insn->bits3.da1.src1_reg_nr = reg.nr;
291 }
292 else {
293 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
294 insn->bits3.da16.src1_reg_nr = reg.nr;
295 }
296
297 if (insn->header.access_mode == BRW_ALIGN_1) {
298 if (reg.width == BRW_WIDTH_1 &&
299 insn->header.execution_size == BRW_EXECUTE_1) {
300 insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
301 insn->bits3.da1.src1_width = BRW_WIDTH_1;
302 insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
303 }
304 else {
305 insn->bits3.da1.src1_horiz_stride = reg.hstride;
306 insn->bits3.da1.src1_width = reg.width;
307 insn->bits3.da1.src1_vert_stride = reg.vstride;
308 }
309 }
310 else {
311 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
312 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
313 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
314 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
315
316 /* This is an oddity of the fact we're using the same
317 * descriptions for registers in align_16 as align_1:
318 */
319 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
320 insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
321 else
322 insn->bits3.da16.src1_vert_stride = reg.vstride;
323 }
324 }
325 }
326
327
328
329 static void brw_set_math_message( struct brw_context *brw,
330 struct brw_instruction *insn,
331 GLuint msg_length,
332 GLuint response_length,
333 GLuint function,
334 GLuint integer_type,
335 GLboolean low_precision,
336 GLboolean saturate,
337 GLuint dataType )
338 {
339 struct intel_context *intel = &brw->intel;
340 brw_set_src1(insn, brw_imm_d(0));
341
342 if (intel->gen == 5) {
343 insn->bits3.math_gen5.function = function;
344 insn->bits3.math_gen5.int_type = integer_type;
345 insn->bits3.math_gen5.precision = low_precision;
346 insn->bits3.math_gen5.saturate = saturate;
347 insn->bits3.math_gen5.data_type = dataType;
348 insn->bits3.math_gen5.snapshot = 0;
349 insn->bits3.math_gen5.header_present = 0;
350 insn->bits3.math_gen5.response_length = response_length;
351 insn->bits3.math_gen5.msg_length = msg_length;
352 insn->bits3.math_gen5.end_of_thread = 0;
353 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_MATH;
354 insn->bits2.send_gen5.end_of_thread = 0;
355 } else {
356 insn->bits3.math.function = function;
357 insn->bits3.math.int_type = integer_type;
358 insn->bits3.math.precision = low_precision;
359 insn->bits3.math.saturate = saturate;
360 insn->bits3.math.data_type = dataType;
361 insn->bits3.math.response_length = response_length;
362 insn->bits3.math.msg_length = msg_length;
363 insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH;
364 insn->bits3.math.end_of_thread = 0;
365 }
366 }
367
368
369 static void brw_set_ff_sync_message(struct brw_context *brw,
370 struct brw_instruction *insn,
371 GLboolean allocate,
372 GLuint response_length,
373 GLboolean end_of_thread)
374 {
375 struct intel_context *intel = &brw->intel;
376 brw_set_src1(insn, brw_imm_d(0));
377
378 insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */
379 insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */
380 insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */
381 insn->bits3.urb_gen5.allocate = allocate;
382 insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */
383 insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */
384 insn->bits3.urb_gen5.header_present = 1;
385 insn->bits3.urb_gen5.response_length = response_length; /* may be 1 or 0 */
386 insn->bits3.urb_gen5.msg_length = 1;
387 insn->bits3.urb_gen5.end_of_thread = end_of_thread;
388 if (intel->gen >= 6) {
389 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB;
390 } else {
391 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB;
392 insn->bits2.send_gen5.end_of_thread = end_of_thread;
393 }
394 }
395
396 static void brw_set_urb_message( struct brw_context *brw,
397 struct brw_instruction *insn,
398 GLboolean allocate,
399 GLboolean used,
400 GLuint msg_length,
401 GLuint response_length,
402 GLboolean end_of_thread,
403 GLboolean complete,
404 GLuint offset,
405 GLuint swizzle_control )
406 {
407 struct intel_context *intel = &brw->intel;
408 brw_set_src1(insn, brw_imm_d(0));
409
410 if (intel->gen >= 5) {
411 insn->bits3.urb_gen5.opcode = 0; /* ? */
412 insn->bits3.urb_gen5.offset = offset;
413 insn->bits3.urb_gen5.swizzle_control = swizzle_control;
414 insn->bits3.urb_gen5.allocate = allocate;
415 insn->bits3.urb_gen5.used = used; /* ? */
416 insn->bits3.urb_gen5.complete = complete;
417 insn->bits3.urb_gen5.header_present = 1;
418 insn->bits3.urb_gen5.response_length = response_length;
419 insn->bits3.urb_gen5.msg_length = msg_length;
420 insn->bits3.urb_gen5.end_of_thread = end_of_thread;
421 if (intel->gen >= 6) {
422 /* For SNB, the SFID bits moved to the condmod bits, and
423 * EOT stayed in bits3 above. Does the EOT bit setting
424 * below on Ironlake even do anything?
425 */
426 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB;
427 } else {
428 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB;
429 insn->bits2.send_gen5.end_of_thread = end_of_thread;
430 }
431 } else {
432 insn->bits3.urb.opcode = 0; /* ? */
433 insn->bits3.urb.offset = offset;
434 insn->bits3.urb.swizzle_control = swizzle_control;
435 insn->bits3.urb.allocate = allocate;
436 insn->bits3.urb.used = used; /* ? */
437 insn->bits3.urb.complete = complete;
438 insn->bits3.urb.response_length = response_length;
439 insn->bits3.urb.msg_length = msg_length;
440 insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB;
441 insn->bits3.urb.end_of_thread = end_of_thread;
442 }
443 }
444
445 static void brw_set_dp_write_message( struct brw_context *brw,
446 struct brw_instruction *insn,
447 GLuint binding_table_index,
448 GLuint msg_control,
449 GLuint msg_type,
450 GLuint msg_length,
451 GLboolean header_present,
452 GLuint pixel_scoreboard_clear,
453 GLuint response_length,
454 GLuint end_of_thread,
455 GLuint send_commit_msg)
456 {
457 struct intel_context *intel = &brw->intel;
458 brw_set_src1(insn, brw_imm_ud(0));
459
460 if (intel->gen >= 6) {
461 insn->bits3.dp_render_cache.binding_table_index = binding_table_index;
462 insn->bits3.dp_render_cache.msg_control = msg_control;
463 insn->bits3.dp_render_cache.pixel_scoreboard_clear = pixel_scoreboard_clear;
464 insn->bits3.dp_render_cache.msg_type = msg_type;
465 insn->bits3.dp_render_cache.send_commit_msg = send_commit_msg;
466 insn->bits3.dp_render_cache.header_present = header_present;
467 insn->bits3.dp_render_cache.response_length = response_length;
468 insn->bits3.dp_render_cache.msg_length = msg_length;
469 insn->bits3.dp_render_cache.end_of_thread = end_of_thread;
470 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
471 /* XXX really need below? */
472 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
473 insn->bits2.send_gen5.end_of_thread = end_of_thread;
474 } else if (intel->gen == 5) {
475 insn->bits3.dp_write_gen5.binding_table_index = binding_table_index;
476 insn->bits3.dp_write_gen5.msg_control = msg_control;
477 insn->bits3.dp_write_gen5.pixel_scoreboard_clear = pixel_scoreboard_clear;
478 insn->bits3.dp_write_gen5.msg_type = msg_type;
479 insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg;
480 insn->bits3.dp_write_gen5.header_present = header_present;
481 insn->bits3.dp_write_gen5.response_length = response_length;
482 insn->bits3.dp_write_gen5.msg_length = msg_length;
483 insn->bits3.dp_write_gen5.end_of_thread = end_of_thread;
484 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
485 insn->bits2.send_gen5.end_of_thread = end_of_thread;
486 } else {
487 insn->bits3.dp_write.binding_table_index = binding_table_index;
488 insn->bits3.dp_write.msg_control = msg_control;
489 insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear;
490 insn->bits3.dp_write.msg_type = msg_type;
491 insn->bits3.dp_write.send_commit_msg = send_commit_msg;
492 insn->bits3.dp_write.response_length = response_length;
493 insn->bits3.dp_write.msg_length = msg_length;
494 insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
495 insn->bits3.dp_write.end_of_thread = end_of_thread;
496 }
497 }
498
499 static void brw_set_dp_read_message( struct brw_context *brw,
500 struct brw_instruction *insn,
501 GLuint binding_table_index,
502 GLuint msg_control,
503 GLuint msg_type,
504 GLuint target_cache,
505 GLuint msg_length,
506 GLuint response_length,
507 GLuint end_of_thread )
508 {
509 struct intel_context *intel = &brw->intel;
510 brw_set_src1(insn, brw_imm_d(0));
511
512 if (intel->gen == 5) {
513 insn->bits3.dp_read_gen5.binding_table_index = binding_table_index;
514 insn->bits3.dp_read_gen5.msg_control = msg_control;
515 insn->bits3.dp_read_gen5.msg_type = msg_type;
516 insn->bits3.dp_read_gen5.target_cache = target_cache;
517 insn->bits3.dp_read_gen5.header_present = 1;
518 insn->bits3.dp_read_gen5.response_length = response_length;
519 insn->bits3.dp_read_gen5.msg_length = msg_length;
520 insn->bits3.dp_read_gen5.pad1 = 0;
521 insn->bits3.dp_read_gen5.end_of_thread = end_of_thread;
522 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ;
523 insn->bits2.send_gen5.end_of_thread = end_of_thread;
524 } else {
525 insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
526 insn->bits3.dp_read.msg_control = msg_control; /*8:11*/
527 insn->bits3.dp_read.msg_type = msg_type; /*12:13*/
528 insn->bits3.dp_read.target_cache = target_cache; /*14:15*/
529 insn->bits3.dp_read.response_length = response_length; /*16:19*/
530 insn->bits3.dp_read.msg_length = msg_length; /*20:23*/
531 insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/
532 insn->bits3.dp_read.pad1 = 0; /*28:30*/
533 insn->bits3.dp_read.end_of_thread = end_of_thread; /*31*/
534 }
535 }
536
537 static void brw_set_sampler_message(struct brw_context *brw,
538 struct brw_instruction *insn,
539 GLuint binding_table_index,
540 GLuint sampler,
541 GLuint msg_type,
542 GLuint response_length,
543 GLuint msg_length,
544 GLboolean eot,
545 GLuint header_present,
546 GLuint simd_mode)
547 {
548 struct intel_context *intel = &brw->intel;
549 assert(eot == 0);
550 brw_set_src1(insn, brw_imm_d(0));
551
552 if (intel->gen >= 5) {
553 insn->bits3.sampler_gen5.binding_table_index = binding_table_index;
554 insn->bits3.sampler_gen5.sampler = sampler;
555 insn->bits3.sampler_gen5.msg_type = msg_type;
556 insn->bits3.sampler_gen5.simd_mode = simd_mode;
557 insn->bits3.sampler_gen5.header_present = header_present;
558 insn->bits3.sampler_gen5.response_length = response_length;
559 insn->bits3.sampler_gen5.msg_length = msg_length;
560 insn->bits3.sampler_gen5.end_of_thread = eot;
561 if (intel->gen >= 6)
562 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_SAMPLER;
563 else {
564 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_SAMPLER;
565 insn->bits2.send_gen5.end_of_thread = eot;
566 }
567 } else if (intel->is_g4x) {
568 insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
569 insn->bits3.sampler_g4x.sampler = sampler;
570 insn->bits3.sampler_g4x.msg_type = msg_type;
571 insn->bits3.sampler_g4x.response_length = response_length;
572 insn->bits3.sampler_g4x.msg_length = msg_length;
573 insn->bits3.sampler_g4x.end_of_thread = eot;
574 insn->bits3.sampler_g4x.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
575 } else {
576 insn->bits3.sampler.binding_table_index = binding_table_index;
577 insn->bits3.sampler.sampler = sampler;
578 insn->bits3.sampler.msg_type = msg_type;
579 insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
580 insn->bits3.sampler.response_length = response_length;
581 insn->bits3.sampler.msg_length = msg_length;
582 insn->bits3.sampler.end_of_thread = eot;
583 insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
584 }
585 }
586
587
588
589 static struct brw_instruction *next_insn( struct brw_compile *p,
590 GLuint opcode )
591 {
592 struct brw_instruction *insn;
593
594 assert(p->nr_insn + 1 < BRW_EU_MAX_INSN);
595
596 insn = &p->store[p->nr_insn++];
597 memcpy(insn, p->current, sizeof(*insn));
598
599 /* Reset this one-shot flag:
600 */
601
602 if (p->current->header.destreg__conditionalmod) {
603 p->current->header.destreg__conditionalmod = 0;
604 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
605 }
606
607 insn->header.opcode = opcode;
608 return insn;
609 }
610
611
612 static struct brw_instruction *brw_alu1( struct brw_compile *p,
613 GLuint opcode,
614 struct brw_reg dest,
615 struct brw_reg src )
616 {
617 struct brw_instruction *insn = next_insn(p, opcode);
618 brw_set_dest(insn, dest);
619 brw_set_src0(insn, src);
620 return insn;
621 }
622
623 static struct brw_instruction *brw_alu2(struct brw_compile *p,
624 GLuint opcode,
625 struct brw_reg dest,
626 struct brw_reg src0,
627 struct brw_reg src1 )
628 {
629 struct brw_instruction *insn = next_insn(p, opcode);
630 brw_set_dest(insn, dest);
631 brw_set_src0(insn, src0);
632 brw_set_src1(insn, src1);
633 return insn;
634 }
635
636
637 /***********************************************************************
638 * Convenience routines.
639 */
640 #define ALU1(OP) \
641 struct brw_instruction *brw_##OP(struct brw_compile *p, \
642 struct brw_reg dest, \
643 struct brw_reg src0) \
644 { \
645 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
646 }
647
648 #define ALU2(OP) \
649 struct brw_instruction *brw_##OP(struct brw_compile *p, \
650 struct brw_reg dest, \
651 struct brw_reg src0, \
652 struct brw_reg src1) \
653 { \
654 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
655 }
656
657
658 ALU1(MOV)
659 ALU2(SEL)
660 ALU1(NOT)
661 ALU2(AND)
662 ALU2(OR)
663 ALU2(XOR)
664 ALU2(SHR)
665 ALU2(SHL)
666 ALU2(RSR)
667 ALU2(RSL)
668 ALU2(ASR)
669 ALU1(FRC)
670 ALU1(RNDD)
671 ALU1(RNDZ)
672 ALU2(MAC)
673 ALU2(MACH)
674 ALU1(LZD)
675 ALU2(DP4)
676 ALU2(DPH)
677 ALU2(DP3)
678 ALU2(DP2)
679 ALU2(LINE)
680 ALU2(PLN)
681
682 struct brw_instruction *brw_ADD(struct brw_compile *p,
683 struct brw_reg dest,
684 struct brw_reg src0,
685 struct brw_reg src1)
686 {
687 /* 6.2.2: add */
688 if (src0.type == BRW_REGISTER_TYPE_F ||
689 (src0.file == BRW_IMMEDIATE_VALUE &&
690 src0.type == BRW_REGISTER_TYPE_VF)) {
691 assert(src1.type != BRW_REGISTER_TYPE_UD);
692 assert(src1.type != BRW_REGISTER_TYPE_D);
693 }
694
695 if (src1.type == BRW_REGISTER_TYPE_F ||
696 (src1.file == BRW_IMMEDIATE_VALUE &&
697 src1.type == BRW_REGISTER_TYPE_VF)) {
698 assert(src0.type != BRW_REGISTER_TYPE_UD);
699 assert(src0.type != BRW_REGISTER_TYPE_D);
700 }
701
702 return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1);
703 }
704
705 struct brw_instruction *brw_MUL(struct brw_compile *p,
706 struct brw_reg dest,
707 struct brw_reg src0,
708 struct brw_reg src1)
709 {
710 /* 6.32.38: mul */
711 if (src0.type == BRW_REGISTER_TYPE_D ||
712 src0.type == BRW_REGISTER_TYPE_UD ||
713 src1.type == BRW_REGISTER_TYPE_D ||
714 src1.type == BRW_REGISTER_TYPE_UD) {
715 assert(dest.type != BRW_REGISTER_TYPE_F);
716 }
717
718 if (src0.type == BRW_REGISTER_TYPE_F ||
719 (src0.file == BRW_IMMEDIATE_VALUE &&
720 src0.type == BRW_REGISTER_TYPE_VF)) {
721 assert(src1.type != BRW_REGISTER_TYPE_UD);
722 assert(src1.type != BRW_REGISTER_TYPE_D);
723 }
724
725 if (src1.type == BRW_REGISTER_TYPE_F ||
726 (src1.file == BRW_IMMEDIATE_VALUE &&
727 src1.type == BRW_REGISTER_TYPE_VF)) {
728 assert(src0.type != BRW_REGISTER_TYPE_UD);
729 assert(src0.type != BRW_REGISTER_TYPE_D);
730 }
731
732 assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE ||
733 src0.nr != BRW_ARF_ACCUMULATOR);
734 assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE ||
735 src1.nr != BRW_ARF_ACCUMULATOR);
736
737 return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1);
738 }
739
740
741 void brw_NOP(struct brw_compile *p)
742 {
743 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);
744 brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
745 brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
746 brw_set_src1(insn, brw_imm_ud(0x0));
747 }
748
749
750
751
752
753 /***********************************************************************
754 * Comparisons, if/else/endif
755 */
756
757 struct brw_instruction *brw_JMPI(struct brw_compile *p,
758 struct brw_reg dest,
759 struct brw_reg src0,
760 struct brw_reg src1)
761 {
762 struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
763
764 insn->header.execution_size = 1;
765 insn->header.compression_control = BRW_COMPRESSION_NONE;
766 insn->header.mask_control = BRW_MASK_DISABLE;
767
768 p->current->header.predicate_control = BRW_PREDICATE_NONE;
769
770 return insn;
771 }
772
773 /* EU takes the value from the flag register and pushes it onto some
774 * sort of a stack (presumably merging with any flag value already on
775 * the stack). Within an if block, the flags at the top of the stack
776 * control execution on each channel of the unit, eg. on each of the
777 * 16 pixel values in our wm programs.
778 *
779 * When the matching 'else' instruction is reached (presumably by
780 * countdown of the instruction count patched in by our ELSE/ENDIF
781 * functions), the relevent flags are inverted.
782 *
783 * When the matching 'endif' instruction is reached, the flags are
784 * popped off. If the stack is now empty, normal execution resumes.
785 *
786 * No attempt is made to deal with stack overflow (14 elements?).
787 */
788 struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size)
789 {
790 struct intel_context *intel = &p->brw->intel;
791 struct brw_instruction *insn;
792
793 if (p->single_program_flow) {
794 assert(execute_size == BRW_EXECUTE_1);
795
796 insn = next_insn(p, BRW_OPCODE_ADD);
797 insn->header.predicate_inverse = 1;
798 } else {
799 insn = next_insn(p, BRW_OPCODE_IF);
800 }
801
802 /* Override the defaults for this instruction:
803 */
804 if (intel->gen < 6) {
805 brw_set_dest(insn, brw_ip_reg());
806 brw_set_src0(insn, brw_ip_reg());
807 brw_set_src1(insn, brw_imm_d(0x0));
808 } else {
809 brw_set_dest(insn, brw_imm_w(0));
810 brw_set_src0(insn, brw_null_reg());
811 brw_set_src1(insn, brw_null_reg());
812 }
813
814 insn->header.execution_size = execute_size;
815 insn->header.compression_control = BRW_COMPRESSION_NONE;
816 insn->header.predicate_control = BRW_PREDICATE_NORMAL;
817 insn->header.mask_control = BRW_MASK_ENABLE;
818 if (!p->single_program_flow)
819 insn->header.thread_control = BRW_THREAD_SWITCH;
820
821 p->current->header.predicate_control = BRW_PREDICATE_NONE;
822
823 return insn;
824 }
825
826
827 struct brw_instruction *brw_ELSE(struct brw_compile *p,
828 struct brw_instruction *if_insn)
829 {
830 struct intel_context *intel = &p->brw->intel;
831 struct brw_instruction *insn;
832 GLuint br = 1;
833
834 /* jump count is for 64bit data chunk each, so one 128bit
835 instruction requires 2 chunks. */
836 if (intel->gen >= 5)
837 br = 2;
838
839 if (p->single_program_flow) {
840 insn = next_insn(p, BRW_OPCODE_ADD);
841 } else {
842 insn = next_insn(p, BRW_OPCODE_ELSE);
843 }
844
845 if (intel->gen < 6) {
846 brw_set_dest(insn, brw_ip_reg());
847 brw_set_src0(insn, brw_ip_reg());
848 brw_set_src1(insn, brw_imm_d(0x0));
849 } else {
850 brw_set_dest(insn, brw_imm_w(0));
851 brw_set_src0(insn, brw_null_reg());
852 brw_set_src1(insn, brw_null_reg());
853 }
854
855 insn->header.compression_control = BRW_COMPRESSION_NONE;
856 insn->header.execution_size = if_insn->header.execution_size;
857 insn->header.mask_control = BRW_MASK_ENABLE;
858 if (!p->single_program_flow)
859 insn->header.thread_control = BRW_THREAD_SWITCH;
860
861 /* Patch the if instruction to point at this instruction.
862 */
863 if (p->single_program_flow) {
864 assert(if_insn->header.opcode == BRW_OPCODE_ADD);
865
866 if_insn->bits3.ud = (insn - if_insn + 1) * 16;
867 } else {
868 assert(if_insn->header.opcode == BRW_OPCODE_IF);
869
870 if (intel->gen < 6) {
871 if_insn->bits3.if_else.jump_count = br * (insn - if_insn);
872 if_insn->bits3.if_else.pop_count = 0;
873 if_insn->bits3.if_else.pad0 = 0;
874 } else {
875 if_insn->bits1.branch_gen6.jump_count = br * (insn - if_insn + 1);
876 }
877 }
878
879 return insn;
880 }
881
882 void brw_ENDIF(struct brw_compile *p,
883 struct brw_instruction *patch_insn)
884 {
885 struct intel_context *intel = &p->brw->intel;
886 GLuint br = 1;
887
888 if (intel->gen >= 5)
889 br = 2;
890
891 if (p->single_program_flow) {
892 /* In single program flow mode, there's no need to execute an ENDIF,
893 * since we don't need to do any stack operations, and if we're executing
894 * currently, we want to just continue executing.
895 */
896 struct brw_instruction *next = &p->store[p->nr_insn];
897
898 assert(patch_insn->header.opcode == BRW_OPCODE_ADD);
899
900 patch_insn->bits3.ud = (next - patch_insn) * 16;
901 } else {
902 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF);
903
904 if (intel->gen < 6) {
905 brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
906 brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
907 brw_set_src1(insn, brw_imm_d(0x0));
908 } else {
909 brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_W));
910 brw_set_src0(insn, brw_null_reg());
911 brw_set_src1(insn, brw_null_reg());
912 }
913
914 insn->header.compression_control = BRW_COMPRESSION_NONE;
915 insn->header.execution_size = patch_insn->header.execution_size;
916 insn->header.mask_control = BRW_MASK_ENABLE;
917 insn->header.thread_control = BRW_THREAD_SWITCH;
918
919 assert(patch_insn->bits3.if_else.jump_count == 0);
920
921 /* Patch the if or else instructions to point at this or the next
922 * instruction respectively.
923 */
924 if (patch_insn->header.opcode == BRW_OPCODE_IF) {
925 if (intel->gen < 6) {
926 /* Turn it into an IFF, which means no mask stack operations for
927 * all-false and jumping past the ENDIF.
928 */
929 patch_insn->header.opcode = BRW_OPCODE_IFF;
930 patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1);
931 patch_insn->bits3.if_else.pop_count = 0;
932 patch_insn->bits3.if_else.pad0 = 0;
933 } else {
934 /* As of gen6, there is no IFF and IF must point to the ENDIF. */
935 patch_insn->bits1.branch_gen6.jump_count = br * (insn - patch_insn);
936 }
937 } else {
938 assert(patch_insn->header.opcode == BRW_OPCODE_ELSE);
939 if (intel->gen < 6) {
940 /* BRW_OPCODE_ELSE pre-gen6 should point just past the
941 * matching ENDIF.
942 */
943 patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1);
944 patch_insn->bits3.if_else.pop_count = 1;
945 patch_insn->bits3.if_else.pad0 = 0;
946 } else {
947 /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
948 patch_insn->bits1.branch_gen6.jump_count = br * (insn - patch_insn);
949 }
950 }
951
952 /* Also pop item off the stack in the endif instruction:
953 */
954 if (intel->gen < 6) {
955 insn->bits3.if_else.jump_count = 0;
956 insn->bits3.if_else.pop_count = 1;
957 insn->bits3.if_else.pad0 = 0;
958 } else {
959 insn->bits1.branch_gen6.jump_count = 2;
960 }
961 }
962 }
963
964 struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count)
965 {
966 struct brw_instruction *insn;
967 insn = next_insn(p, BRW_OPCODE_BREAK);
968 brw_set_dest(insn, brw_ip_reg());
969 brw_set_src0(insn, brw_ip_reg());
970 brw_set_src1(insn, brw_imm_d(0x0));
971 insn->header.compression_control = BRW_COMPRESSION_NONE;
972 insn->header.execution_size = BRW_EXECUTE_8;
973 /* insn->header.mask_control = BRW_MASK_DISABLE; */
974 insn->bits3.if_else.pad0 = 0;
975 insn->bits3.if_else.pop_count = pop_count;
976 return insn;
977 }
978
979 struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count)
980 {
981 struct brw_instruction *insn;
982 insn = next_insn(p, BRW_OPCODE_CONTINUE);
983 brw_set_dest(insn, brw_ip_reg());
984 brw_set_src0(insn, brw_ip_reg());
985 brw_set_src1(insn, brw_imm_d(0x0));
986 insn->header.compression_control = BRW_COMPRESSION_NONE;
987 insn->header.execution_size = BRW_EXECUTE_8;
988 /* insn->header.mask_control = BRW_MASK_DISABLE; */
989 insn->bits3.if_else.pad0 = 0;
990 insn->bits3.if_else.pop_count = pop_count;
991 return insn;
992 }
993
994 /* DO/WHILE loop:
995 */
996 struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
997 {
998 if (p->single_program_flow) {
999 return &p->store[p->nr_insn];
1000 } else {
1001 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
1002
1003 /* Override the defaults for this instruction:
1004 */
1005 brw_set_dest(insn, brw_null_reg());
1006 brw_set_src0(insn, brw_null_reg());
1007 brw_set_src1(insn, brw_null_reg());
1008
1009 insn->header.compression_control = BRW_COMPRESSION_NONE;
1010 insn->header.execution_size = execute_size;
1011 insn->header.predicate_control = BRW_PREDICATE_NONE;
1012 /* insn->header.mask_control = BRW_MASK_ENABLE; */
1013 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1014
1015 return insn;
1016 }
1017 }
1018
1019
1020
1021 struct brw_instruction *brw_WHILE(struct brw_compile *p,
1022 struct brw_instruction *do_insn)
1023 {
1024 struct intel_context *intel = &p->brw->intel;
1025 struct brw_instruction *insn;
1026 GLuint br = 1;
1027
1028 if (intel->gen >= 5)
1029 br = 2;
1030
1031 if (p->single_program_flow)
1032 insn = next_insn(p, BRW_OPCODE_ADD);
1033 else
1034 insn = next_insn(p, BRW_OPCODE_WHILE);
1035
1036 brw_set_dest(insn, brw_ip_reg());
1037 brw_set_src0(insn, brw_ip_reg());
1038 brw_set_src1(insn, brw_imm_d(0x0));
1039
1040 insn->header.compression_control = BRW_COMPRESSION_NONE;
1041
1042 if (p->single_program_flow) {
1043 insn->header.execution_size = BRW_EXECUTE_1;
1044
1045 insn->bits3.d = (do_insn - insn) * 16;
1046 } else {
1047 insn->header.execution_size = do_insn->header.execution_size;
1048
1049 assert(do_insn->header.opcode == BRW_OPCODE_DO);
1050 insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
1051 insn->bits3.if_else.pop_count = 0;
1052 insn->bits3.if_else.pad0 = 0;
1053 }
1054
1055 /* insn->header.mask_control = BRW_MASK_ENABLE; */
1056
1057 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1058 p->current->header.predicate_control = BRW_PREDICATE_NONE;
1059 return insn;
1060 }
1061
1062
1063 /* FORWARD JUMPS:
1064 */
1065 void brw_land_fwd_jump(struct brw_compile *p,
1066 struct brw_instruction *jmp_insn)
1067 {
1068 struct intel_context *intel = &p->brw->intel;
1069 struct brw_instruction *landing = &p->store[p->nr_insn];
1070 GLuint jmpi = 1;
1071
1072 if (intel->gen >= 5)
1073 jmpi = 2;
1074
1075 assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
1076 assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE);
1077
1078 jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1);
1079 }
1080
1081
1082
1083 /* To integrate with the above, it makes sense that the comparison
1084 * instruction should populate the flag register. It might be simpler
1085 * just to use the flag reg for most WM tasks?
1086 */
1087 void brw_CMP(struct brw_compile *p,
1088 struct brw_reg dest,
1089 GLuint conditional,
1090 struct brw_reg src0,
1091 struct brw_reg src1)
1092 {
1093 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
1094
1095 insn->header.destreg__conditionalmod = conditional;
1096 brw_set_dest(insn, dest);
1097 brw_set_src0(insn, src0);
1098 brw_set_src1(insn, src1);
1099
1100 /* guess_execution_size(insn, src0); */
1101
1102
1103 /* Make it so that future instructions will use the computed flag
1104 * value until brw_set_predicate_control_flag_value() is called
1105 * again.
1106 */
1107 if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
1108 dest.nr == 0) {
1109 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
1110 p->flag_value = 0xff;
1111 }
1112 }
1113
1114 /* Issue 'wait' instruction for n1, host could program MMIO
1115 to wake up thread. */
1116 void brw_WAIT (struct brw_compile *p)
1117 {
1118 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_WAIT);
1119 struct brw_reg src = brw_notification_1_reg();
1120
1121 brw_set_dest(insn, src);
1122 brw_set_src0(insn, src);
1123 brw_set_src1(insn, brw_null_reg());
1124 insn->header.execution_size = 0; /* must */
1125 insn->header.predicate_control = 0;
1126 insn->header.compression_control = 0;
1127 }
1128
1129
1130 /***********************************************************************
1131 * Helpers for the various SEND message types:
1132 */
1133
1134 /** Extended math function, float[8].
1135 */
1136 void brw_math( struct brw_compile *p,
1137 struct brw_reg dest,
1138 GLuint function,
1139 GLuint saturate,
1140 GLuint msg_reg_nr,
1141 struct brw_reg src,
1142 GLuint data_type,
1143 GLuint precision )
1144 {
1145 struct intel_context *intel = &p->brw->intel;
1146
1147 if (intel->gen >= 6) {
1148 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
1149
1150 assert(dest.file == BRW_GENERAL_REGISTER_FILE);
1151 assert(src.file == BRW_GENERAL_REGISTER_FILE);
1152
1153 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1154 assert(src.hstride == BRW_HORIZONTAL_STRIDE_1);
1155
1156 if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT &&
1157 function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1158 assert(src.type == BRW_REGISTER_TYPE_F);
1159 }
1160
1161 /* Math is the same ISA format as other opcodes, except that CondModifier
1162 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1163 */
1164 insn->header.destreg__conditionalmod = function;
1165
1166 brw_set_dest(insn, dest);
1167 brw_set_src0(insn, src);
1168 brw_set_src1(insn, brw_null_reg());
1169 } else {
1170 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1171 GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
1172 GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
1173 /* Example code doesn't set predicate_control for send
1174 * instructions.
1175 */
1176 insn->header.predicate_control = 0;
1177 insn->header.destreg__conditionalmod = msg_reg_nr;
1178
1179 brw_set_dest(insn, dest);
1180 brw_set_src0(insn, src);
1181 brw_set_math_message(p->brw,
1182 insn,
1183 msg_length, response_length,
1184 function,
1185 BRW_MATH_INTEGER_UNSIGNED,
1186 precision,
1187 saturate,
1188 data_type);
1189 }
1190 }
1191
1192 /** Extended math function, float[8].
1193 */
1194 void brw_math2(struct brw_compile *p,
1195 struct brw_reg dest,
1196 GLuint function,
1197 struct brw_reg src0,
1198 struct brw_reg src1)
1199 {
1200 struct intel_context *intel = &p->brw->intel;
1201 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
1202
1203 assert(intel->gen >= 6);
1204 (void) intel;
1205
1206
1207 assert(dest.file == BRW_GENERAL_REGISTER_FILE);
1208 assert(src0.file == BRW_GENERAL_REGISTER_FILE);
1209 assert(src1.file == BRW_GENERAL_REGISTER_FILE);
1210
1211 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1212 assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1);
1213 assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1);
1214
1215 if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT &&
1216 function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1217 assert(src0.type == BRW_REGISTER_TYPE_F);
1218 assert(src1.type == BRW_REGISTER_TYPE_F);
1219 }
1220
1221 /* Math is the same ISA format as other opcodes, except that CondModifier
1222 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1223 */
1224 insn->header.destreg__conditionalmod = function;
1225
1226 brw_set_dest(insn, dest);
1227 brw_set_src0(insn, src0);
1228 brw_set_src1(insn, src1);
1229 }
1230
1231 /**
1232 * Extended math function, float[16].
1233 * Use 2 send instructions.
1234 */
1235 void brw_math_16( struct brw_compile *p,
1236 struct brw_reg dest,
1237 GLuint function,
1238 GLuint saturate,
1239 GLuint msg_reg_nr,
1240 struct brw_reg src,
1241 GLuint precision )
1242 {
1243 struct intel_context *intel = &p->brw->intel;
1244 struct brw_instruction *insn;
1245 GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
1246 GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
1247
1248 if (intel->gen >= 6) {
1249 insn = next_insn(p, BRW_OPCODE_MATH);
1250
1251 /* Math is the same ISA format as other opcodes, except that CondModifier
1252 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1253 */
1254 insn->header.destreg__conditionalmod = function;
1255
1256 brw_set_dest(insn, dest);
1257 brw_set_src0(insn, src);
1258 brw_set_src1(insn, brw_null_reg());
1259 return;
1260 }
1261
1262 /* First instruction:
1263 */
1264 brw_push_insn_state(p);
1265 brw_set_predicate_control_flag_value(p, 0xff);
1266 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1267
1268 insn = next_insn(p, BRW_OPCODE_SEND);
1269 insn->header.destreg__conditionalmod = msg_reg_nr;
1270
1271 brw_set_dest(insn, dest);
1272 brw_set_src0(insn, src);
1273 brw_set_math_message(p->brw,
1274 insn,
1275 msg_length, response_length,
1276 function,
1277 BRW_MATH_INTEGER_UNSIGNED,
1278 precision,
1279 saturate,
1280 BRW_MATH_DATA_VECTOR);
1281
1282 /* Second instruction:
1283 */
1284 insn = next_insn(p, BRW_OPCODE_SEND);
1285 insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
1286 insn->header.destreg__conditionalmod = msg_reg_nr+1;
1287
1288 brw_set_dest(insn, offset(dest,1));
1289 brw_set_src0(insn, src);
1290 brw_set_math_message(p->brw,
1291 insn,
1292 msg_length, response_length,
1293 function,
1294 BRW_MATH_INTEGER_UNSIGNED,
1295 precision,
1296 saturate,
1297 BRW_MATH_DATA_VECTOR);
1298
1299 brw_pop_insn_state(p);
1300 }
1301
1302
1303 /**
1304 * Write block of 16 dwords/floats to the data port Render Cache scratch buffer.
1305 * Scratch offset should be a multiple of 64.
1306 * Used for register spilling.
1307 */
1308 void brw_dp_WRITE_16( struct brw_compile *p,
1309 struct brw_reg src,
1310 GLuint scratch_offset )
1311 {
1312 struct intel_context *intel = &p->brw->intel;
1313 GLuint msg_reg_nr = 1;
1314 {
1315 brw_push_insn_state(p);
1316 brw_set_mask_control(p, BRW_MASK_DISABLE);
1317 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1318
1319 /* set message header global offset field (reg 0, element 2) */
1320 brw_MOV(p,
1321 retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
1322 brw_imm_d(scratch_offset));
1323
1324 brw_pop_insn_state(p);
1325 }
1326
1327 {
1328 GLuint msg_length = 3;
1329 struct brw_reg dest;
1330 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1331 int send_commit_msg;
1332
1333 insn->header.predicate_control = 0; /* XXX */
1334 insn->header.compression_control = BRW_COMPRESSION_NONE;
1335 insn->header.destreg__conditionalmod = msg_reg_nr;
1336
1337 /* Until gen6, writes followed by reads from the same location
1338 * are not guaranteed to be ordered unless write_commit is set.
1339 * If set, then a no-op write is issued to the destination
1340 * register to set a dependency, and a read from the destination
1341 * can be used to ensure the ordering.
1342 *
1343 * For gen6, only writes between different threads need ordering
1344 * protection. Our use of DP writes is all about register
1345 * spilling within a thread.
1346 */
1347 if (intel->gen >= 6) {
1348 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
1349 send_commit_msg = 0;
1350 } else {
1351 dest = brw_uw16_grf(0, 0);
1352 send_commit_msg = 1;
1353 }
1354
1355 brw_set_dest(insn, dest);
1356 brw_set_src0(insn, src);
1357
1358 brw_set_dp_write_message(p->brw,
1359 insn,
1360 255, /* binding table index (255=stateless) */
1361 BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */
1362 BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */
1363 msg_length,
1364 GL_TRUE, /* header_present */
1365 0, /* pixel scoreboard */
1366 send_commit_msg, /* response_length */
1367 0, /* eot */
1368 send_commit_msg);
1369 }
1370 }
1371
1372
1373 /**
1374 * Read block of 16 dwords/floats from the data port Render Cache scratch buffer.
1375 * Scratch offset should be a multiple of 64.
1376 * Used for register spilling.
1377 */
1378 void brw_dp_READ_16( struct brw_compile *p,
1379 struct brw_reg dest,
1380 GLuint scratch_offset )
1381 {
1382 GLuint msg_reg_nr = 1;
1383 {
1384 brw_push_insn_state(p);
1385 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1386 brw_set_mask_control(p, BRW_MASK_DISABLE);
1387
1388 /* set message header global offset field (reg 0, element 2) */
1389 brw_MOV(p,
1390 retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
1391 brw_imm_d(scratch_offset));
1392
1393 brw_pop_insn_state(p);
1394 }
1395
1396 {
1397 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1398
1399 insn->header.predicate_control = 0; /* XXX */
1400 insn->header.compression_control = BRW_COMPRESSION_NONE;
1401 insn->header.destreg__conditionalmod = msg_reg_nr;
1402
1403 brw_set_dest(insn, dest); /* UW? */
1404 brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW));
1405
1406 brw_set_dp_read_message(p->brw,
1407 insn,
1408 255, /* binding table index (255=stateless) */
1409 BRW_DATAPORT_OWORD_BLOCK_4_OWORDS,
1410 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1411 1, /* target cache (render/scratch) */
1412 1, /* msg_length */
1413 2, /* response_length */
1414 0); /* eot */
1415 }
1416 }
1417
1418
1419 /**
1420 * Read a float[4] vector from the data port Data Cache (const buffer).
1421 * Location (in buffer) should be a multiple of 16.
1422 * Used for fetching shader constants.
1423 * If relAddr is true, we'll do an indirect fetch using the address register.
1424 */
1425 void brw_dp_READ_4( struct brw_compile *p,
1426 struct brw_reg dest,
1427 GLboolean relAddr,
1428 GLuint location,
1429 GLuint bind_table_index )
1430 {
1431 /* XXX: relAddr not implemented */
1432 GLuint msg_reg_nr = 1;
1433 {
1434 struct brw_reg b;
1435 brw_push_insn_state(p);
1436 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1437 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1438 brw_set_mask_control(p, BRW_MASK_DISABLE);
1439
1440 /* Setup MRF[1] with location/offset into const buffer */
1441 b = brw_message_reg(msg_reg_nr);
1442 b = retype(b, BRW_REGISTER_TYPE_UD);
1443 /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
1444 * when the docs say only dword[2] should be set. Hmmm. But it works.
1445 */
1446 brw_MOV(p, b, brw_imm_ud(location));
1447 brw_pop_insn_state(p);
1448 }
1449
1450 {
1451 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1452
1453 insn->header.predicate_control = BRW_PREDICATE_NONE;
1454 insn->header.compression_control = BRW_COMPRESSION_NONE;
1455 insn->header.destreg__conditionalmod = msg_reg_nr;
1456 insn->header.mask_control = BRW_MASK_DISABLE;
1457
1458 /* cast dest to a uword[8] vector */
1459 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
1460
1461 brw_set_dest(insn, dest);
1462 brw_set_src0(insn, brw_null_reg());
1463
1464 brw_set_dp_read_message(p->brw,
1465 insn,
1466 bind_table_index,
1467 0, /* msg_control (0 means 1 Oword) */
1468 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1469 0, /* source cache = data cache */
1470 1, /* msg_length */
1471 1, /* response_length (1 Oword) */
1472 0); /* eot */
1473 }
1474 }
1475
1476
1477 /**
1478 * Read float[4] constant(s) from VS constant buffer.
1479 * For relative addressing, two float[4] constants will be read into 'dest'.
1480 * Otherwise, one float[4] constant will be read into the lower half of 'dest'.
1481 */
1482 void brw_dp_READ_4_vs(struct brw_compile *p,
1483 struct brw_reg dest,
1484 GLuint location,
1485 GLuint bind_table_index)
1486 {
1487 struct brw_instruction *insn;
1488 GLuint msg_reg_nr = 1;
1489 struct brw_reg b;
1490
1491 /*
1492 printf("vs const read msg, location %u, msg_reg_nr %d\n",
1493 location, msg_reg_nr);
1494 */
1495
1496 /* Setup MRF[1] with location/offset into const buffer */
1497 brw_push_insn_state(p);
1498 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1499 brw_set_mask_control(p, BRW_MASK_DISABLE);
1500 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1501
1502 /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
1503 * when the docs say only dword[2] should be set. Hmmm. But it works.
1504 */
1505 b = brw_message_reg(msg_reg_nr);
1506 b = retype(b, BRW_REGISTER_TYPE_UD);
1507 /*b = get_element_ud(b, 2);*/
1508 brw_MOV(p, b, brw_imm_ud(location));
1509
1510 brw_pop_insn_state(p);
1511
1512 insn = next_insn(p, BRW_OPCODE_SEND);
1513
1514 insn->header.predicate_control = BRW_PREDICATE_NONE;
1515 insn->header.compression_control = BRW_COMPRESSION_NONE;
1516 insn->header.destreg__conditionalmod = msg_reg_nr;
1517 insn->header.mask_control = BRW_MASK_DISABLE;
1518
1519 brw_set_dest(insn, dest);
1520 brw_set_src0(insn, brw_null_reg());
1521
1522 brw_set_dp_read_message(p->brw,
1523 insn,
1524 bind_table_index,
1525 0,
1526 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1527 0, /* source cache = data cache */
1528 1, /* msg_length */
1529 1, /* response_length (1 Oword) */
1530 0); /* eot */
1531 }
1532
1533 /**
1534 * Read a float[4] constant per vertex from VS constant buffer, with
1535 * relative addressing.
1536 */
1537 void brw_dp_READ_4_vs_relative(struct brw_compile *p,
1538 struct brw_reg dest,
1539 struct brw_reg addr_reg,
1540 GLuint offset,
1541 GLuint bind_table_index)
1542 {
1543 struct intel_context *intel = &p->brw->intel;
1544 int msg_type;
1545
1546 /* Setup MRF[1] with offset into const buffer */
1547 brw_push_insn_state(p);
1548 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1549 brw_set_mask_control(p, BRW_MASK_DISABLE);
1550 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1551
1552 /* M1.0 is block offset 0, M1.4 is block offset 1, all other
1553 * fields ignored.
1554 */
1555 brw_ADD(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD),
1556 addr_reg, brw_imm_d(offset));
1557 brw_pop_insn_state(p);
1558
1559 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1560
1561 insn->header.predicate_control = BRW_PREDICATE_NONE;
1562 insn->header.compression_control = BRW_COMPRESSION_NONE;
1563 insn->header.destreg__conditionalmod = 0;
1564 insn->header.mask_control = BRW_MASK_DISABLE;
1565
1566 brw_set_dest(insn, dest);
1567 brw_set_src0(insn, brw_vec8_grf(0, 0));
1568
1569 if (intel->gen == 6)
1570 msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
1571 else if (intel->gen == 5 || intel->is_g4x)
1572 msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
1573 else
1574 msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
1575
1576 brw_set_dp_read_message(p->brw,
1577 insn,
1578 bind_table_index,
1579 BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
1580 msg_type,
1581 0, /* source cache = data cache */
1582 2, /* msg_length */
1583 1, /* response_length */
1584 0); /* eot */
1585 }
1586
1587
1588
1589 void brw_fb_WRITE(struct brw_compile *p,
1590 int dispatch_width,
1591 struct brw_reg dest,
1592 GLuint msg_reg_nr,
1593 struct brw_reg src0,
1594 GLuint binding_table_index,
1595 GLuint msg_length,
1596 GLuint response_length,
1597 GLboolean eot)
1598 {
1599 struct intel_context *intel = &p->brw->intel;
1600 struct brw_instruction *insn;
1601 GLuint msg_control, msg_type;
1602 GLboolean header_present = GL_TRUE;
1603
1604 insn = next_insn(p, BRW_OPCODE_SEND);
1605 insn->header.predicate_control = 0; /* XXX */
1606 insn->header.compression_control = BRW_COMPRESSION_NONE;
1607
1608 if (intel->gen >= 6) {
1609 if (msg_length == 4)
1610 header_present = GL_FALSE;
1611
1612 /* headerless version, just submit color payload */
1613 src0 = brw_message_reg(msg_reg_nr);
1614
1615 msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE_GEN6;
1616 } else {
1617 insn->header.destreg__conditionalmod = msg_reg_nr;
1618
1619 msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
1620 }
1621
1622 if (dispatch_width == 16)
1623 msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
1624 else
1625 msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01;
1626
1627 brw_set_dest(insn, dest);
1628 brw_set_src0(insn, src0);
1629 brw_set_dp_write_message(p->brw,
1630 insn,
1631 binding_table_index,
1632 msg_control,
1633 msg_type,
1634 msg_length,
1635 header_present,
1636 1, /* pixel scoreboard */
1637 response_length,
1638 eot,
1639 0 /* send_commit_msg */);
1640 }
1641
1642
1643 /**
1644 * Texture sample instruction.
1645 * Note: the msg_type plus msg_length values determine exactly what kind
1646 * of sampling operation is performed. See volume 4, page 161 of docs.
1647 */
1648 void brw_SAMPLE(struct brw_compile *p,
1649 struct brw_reg dest,
1650 GLuint msg_reg_nr,
1651 struct brw_reg src0,
1652 GLuint binding_table_index,
1653 GLuint sampler,
1654 GLuint writemask,
1655 GLuint msg_type,
1656 GLuint response_length,
1657 GLuint msg_length,
1658 GLboolean eot,
1659 GLuint header_present,
1660 GLuint simd_mode)
1661 {
1662 struct intel_context *intel = &p->brw->intel;
1663 GLboolean need_stall = 0;
1664
1665 if (writemask == 0) {
1666 /*printf("%s: zero writemask??\n", __FUNCTION__); */
1667 return;
1668 }
1669
1670 /* Hardware doesn't do destination dependency checking on send
1671 * instructions properly. Add a workaround which generates the
1672 * dependency by other means. In practice it seems like this bug
1673 * only crops up for texture samples, and only where registers are
1674 * written by the send and then written again later without being
1675 * read in between. Luckily for us, we already track that
1676 * information and use it to modify the writemask for the
1677 * instruction, so that is a guide for whether a workaround is
1678 * needed.
1679 */
1680 if (writemask != WRITEMASK_XYZW) {
1681 GLuint dst_offset = 0;
1682 GLuint i, newmask = 0, len = 0;
1683
1684 for (i = 0; i < 4; i++) {
1685 if (writemask & (1<<i))
1686 break;
1687 dst_offset += 2;
1688 }
1689 for (; i < 4; i++) {
1690 if (!(writemask & (1<<i)))
1691 break;
1692 newmask |= 1<<i;
1693 len++;
1694 }
1695
1696 if (newmask != writemask) {
1697 need_stall = 1;
1698 /* printf("need stall %x %x\n", newmask , writemask); */
1699 }
1700 else {
1701 GLboolean dispatch_16 = GL_FALSE;
1702
1703 struct brw_reg m1 = brw_message_reg(msg_reg_nr);
1704
1705 guess_execution_size(p->current, dest);
1706 if (p->current->header.execution_size == BRW_EXECUTE_16)
1707 dispatch_16 = GL_TRUE;
1708
1709 newmask = ~newmask & WRITEMASK_XYZW;
1710
1711 brw_push_insn_state(p);
1712
1713 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1714 brw_set_mask_control(p, BRW_MASK_DISABLE);
1715
1716 brw_MOV(p, m1, brw_vec8_grf(0,0));
1717 brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12));
1718
1719 brw_pop_insn_state(p);
1720
1721 src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
1722 dest = offset(dest, dst_offset);
1723
1724 /* For 16-wide dispatch, masked channels are skipped in the
1725 * response. For 8-wide, masked channels still take up slots,
1726 * and are just not written to.
1727 */
1728 if (dispatch_16)
1729 response_length = len * 2;
1730 }
1731 }
1732
1733 {
1734 struct brw_instruction *insn;
1735
1736 /* Sandybridge doesn't have the implied move for SENDs,
1737 * and the first message register index comes from src0.
1738 */
1739 if (intel->gen >= 6) {
1740 brw_push_insn_state(p);
1741 brw_set_mask_control( p, BRW_MASK_DISABLE );
1742 /* m1 contains header? */
1743 brw_MOV(p, brw_message_reg(msg_reg_nr), src0);
1744 brw_pop_insn_state(p);
1745 src0 = brw_message_reg(msg_reg_nr);
1746 }
1747
1748 insn = next_insn(p, BRW_OPCODE_SEND);
1749 insn->header.predicate_control = 0; /* XXX */
1750 insn->header.compression_control = BRW_COMPRESSION_NONE;
1751 if (intel->gen < 6)
1752 insn->header.destreg__conditionalmod = msg_reg_nr;
1753
1754 brw_set_dest(insn, dest);
1755 brw_set_src0(insn, src0);
1756 brw_set_sampler_message(p->brw, insn,
1757 binding_table_index,
1758 sampler,
1759 msg_type,
1760 response_length,
1761 msg_length,
1762 eot,
1763 header_present,
1764 simd_mode);
1765 }
1766
1767 if (need_stall) {
1768 struct brw_reg reg = vec8(offset(dest, response_length-1));
1769
1770 /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 }
1771 */
1772 brw_push_insn_state(p);
1773 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1774 brw_MOV(p, reg, reg);
1775 brw_pop_insn_state(p);
1776 }
1777
1778 }
1779
1780 /* All these variables are pretty confusing - we might be better off
1781 * using bitmasks and macros for this, in the old style. Or perhaps
1782 * just having the caller instantiate the fields in dword3 itself.
1783 */
1784 void brw_urb_WRITE(struct brw_compile *p,
1785 struct brw_reg dest,
1786 GLuint msg_reg_nr,
1787 struct brw_reg src0,
1788 GLboolean allocate,
1789 GLboolean used,
1790 GLuint msg_length,
1791 GLuint response_length,
1792 GLboolean eot,
1793 GLboolean writes_complete,
1794 GLuint offset,
1795 GLuint swizzle)
1796 {
1797 struct intel_context *intel = &p->brw->intel;
1798 struct brw_instruction *insn;
1799
1800 /* Sandybridge doesn't have the implied move for SENDs,
1801 * and the first message register index comes from src0.
1802 */
1803 if (intel->gen >= 6) {
1804 brw_push_insn_state(p);
1805 brw_set_mask_control( p, BRW_MASK_DISABLE );
1806 brw_MOV(p, brw_message_reg(msg_reg_nr), src0);
1807 brw_pop_insn_state(p);
1808 src0 = brw_message_reg(msg_reg_nr);
1809 }
1810
1811 insn = next_insn(p, BRW_OPCODE_SEND);
1812
1813 assert(msg_length < BRW_MAX_MRF);
1814
1815 brw_set_dest(insn, dest);
1816 brw_set_src0(insn, src0);
1817 brw_set_src1(insn, brw_imm_d(0));
1818
1819 if (intel->gen < 6)
1820 insn->header.destreg__conditionalmod = msg_reg_nr;
1821
1822 brw_set_urb_message(p->brw,
1823 insn,
1824 allocate,
1825 used,
1826 msg_length,
1827 response_length,
1828 eot,
1829 writes_complete,
1830 offset,
1831 swizzle);
1832 }
1833
1834 void brw_ff_sync(struct brw_compile *p,
1835 struct brw_reg dest,
1836 GLuint msg_reg_nr,
1837 struct brw_reg src0,
1838 GLboolean allocate,
1839 GLuint response_length,
1840 GLboolean eot)
1841 {
1842 struct intel_context *intel = &p->brw->intel;
1843 struct brw_instruction *insn;
1844
1845 /* Sandybridge doesn't have the implied move for SENDs,
1846 * and the first message register index comes from src0.
1847 */
1848 if (intel->gen >= 6) {
1849 brw_push_insn_state(p);
1850 brw_set_mask_control( p, BRW_MASK_DISABLE );
1851 brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD),
1852 retype(src0, BRW_REGISTER_TYPE_UD));
1853 brw_pop_insn_state(p);
1854 src0 = brw_message_reg(msg_reg_nr);
1855 }
1856
1857 insn = next_insn(p, BRW_OPCODE_SEND);
1858 brw_set_dest(insn, dest);
1859 brw_set_src0(insn, src0);
1860 brw_set_src1(insn, brw_imm_d(0));
1861
1862 if (intel->gen < 6)
1863 insn->header.destreg__conditionalmod = msg_reg_nr;
1864
1865 brw_set_ff_sync_message(p->brw,
1866 insn,
1867 allocate,
1868 response_length,
1869 eot);
1870 }