i965: Correctly emit the RNDZ instruction.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_eu_emit.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "brw_context.h"
34 #include "brw_defines.h"
35 #include "brw_eu.h"
36
37
38
39
40 /***********************************************************************
41 * Internal helper for constructing instructions
42 */
43
44 static void guess_execution_size( struct brw_instruction *insn,
45 struct brw_reg reg )
46 {
47 if (reg.width == BRW_WIDTH_8 &&
48 insn->header.compression_control == BRW_COMPRESSION_COMPRESSED)
49 insn->header.execution_size = BRW_EXECUTE_16;
50 else
51 insn->header.execution_size = reg.width; /* note - definitions are compatible */
52 }
53
54
55 static void brw_set_dest( struct brw_instruction *insn,
56 struct brw_reg dest )
57 {
58 if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
59 dest.file != BRW_MESSAGE_REGISTER_FILE)
60 assert(dest.nr < 128);
61
62 insn->bits1.da1.dest_reg_file = dest.file;
63 insn->bits1.da1.dest_reg_type = dest.type;
64 insn->bits1.da1.dest_address_mode = dest.address_mode;
65
66 if (dest.address_mode == BRW_ADDRESS_DIRECT) {
67 insn->bits1.da1.dest_reg_nr = dest.nr;
68
69 if (insn->header.access_mode == BRW_ALIGN_1) {
70 insn->bits1.da1.dest_subreg_nr = dest.subnr;
71 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
72 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
73 insn->bits1.da1.dest_horiz_stride = dest.hstride;
74 }
75 else {
76 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
77 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
78 /* even ignored in da16, still need to set as '01' */
79 insn->bits1.da16.dest_horiz_stride = 1;
80 }
81 }
82 else {
83 insn->bits1.ia1.dest_subreg_nr = dest.subnr;
84
85 /* These are different sizes in align1 vs align16:
86 */
87 if (insn->header.access_mode == BRW_ALIGN_1) {
88 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
89 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
90 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
91 insn->bits1.ia1.dest_horiz_stride = dest.hstride;
92 }
93 else {
94 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
95 /* even ignored in da16, still need to set as '01' */
96 insn->bits1.ia16.dest_horiz_stride = 1;
97 }
98 }
99
100 /* NEW: Set the execution size based on dest.width and
101 * insn->compression_control:
102 */
103 guess_execution_size(insn, dest);
104 }
105
106 extern int reg_type_size[];
107
108 static void
109 validate_reg(struct brw_instruction *insn, struct brw_reg reg)
110 {
111 int hstride_for_reg[] = {0, 1, 2, 4};
112 int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
113 int width_for_reg[] = {1, 2, 4, 8, 16};
114 int execsize_for_reg[] = {1, 2, 4, 8, 16};
115 int width, hstride, vstride, execsize;
116
117 if (reg.file == BRW_IMMEDIATE_VALUE) {
118 /* 3.3.6: Region Parameters. Restriction: Immediate vectors
119 * mean the destination has to be 128-bit aligned and the
120 * destination horiz stride has to be a word.
121 */
122 if (reg.type == BRW_REGISTER_TYPE_V) {
123 assert(hstride_for_reg[insn->bits1.da1.dest_horiz_stride] *
124 reg_type_size[insn->bits1.da1.dest_reg_type] == 2);
125 }
126
127 return;
128 }
129
130 if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE &&
131 reg.file == BRW_ARF_NULL)
132 return;
133
134 assert(reg.hstride >= 0 && reg.hstride < Elements(hstride_for_reg));
135 hstride = hstride_for_reg[reg.hstride];
136
137 if (reg.vstride == 0xf) {
138 vstride = -1;
139 } else {
140 assert(reg.vstride >= 0 && reg.vstride < Elements(vstride_for_reg));
141 vstride = vstride_for_reg[reg.vstride];
142 }
143
144 assert(reg.width >= 0 && reg.width < Elements(width_for_reg));
145 width = width_for_reg[reg.width];
146
147 assert(insn->header.execution_size >= 0 &&
148 insn->header.execution_size < Elements(execsize_for_reg));
149 execsize = execsize_for_reg[insn->header.execution_size];
150
151 /* Restrictions from 3.3.10: Register Region Restrictions. */
152 /* 3. */
153 assert(execsize >= width);
154
155 /* 4. */
156 if (execsize == width && hstride != 0) {
157 assert(vstride == -1 || vstride == width * hstride);
158 }
159
160 /* 5. */
161 if (execsize == width && hstride == 0) {
162 /* no restriction on vstride. */
163 }
164
165 /* 6. */
166 if (width == 1) {
167 assert(hstride == 0);
168 }
169
170 /* 7. */
171 if (execsize == 1 && width == 1) {
172 assert(hstride == 0);
173 assert(vstride == 0);
174 }
175
176 /* 8. */
177 if (vstride == 0 && hstride == 0) {
178 assert(width == 1);
179 }
180
181 /* 10. Check destination issues. */
182 }
183
184 static void brw_set_src0( struct brw_instruction *insn,
185 struct brw_reg reg )
186 {
187 if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
188 assert(reg.nr < 128);
189
190 validate_reg(insn, reg);
191
192 insn->bits1.da1.src0_reg_file = reg.file;
193 insn->bits1.da1.src0_reg_type = reg.type;
194 insn->bits2.da1.src0_abs = reg.abs;
195 insn->bits2.da1.src0_negate = reg.negate;
196 insn->bits2.da1.src0_address_mode = reg.address_mode;
197
198 if (reg.file == BRW_IMMEDIATE_VALUE) {
199 insn->bits3.ud = reg.dw1.ud;
200
201 /* Required to set some fields in src1 as well:
202 */
203 insn->bits1.da1.src1_reg_file = 0; /* arf */
204 insn->bits1.da1.src1_reg_type = reg.type;
205 }
206 else
207 {
208 if (reg.address_mode == BRW_ADDRESS_DIRECT) {
209 if (insn->header.access_mode == BRW_ALIGN_1) {
210 insn->bits2.da1.src0_subreg_nr = reg.subnr;
211 insn->bits2.da1.src0_reg_nr = reg.nr;
212 }
213 else {
214 insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
215 insn->bits2.da16.src0_reg_nr = reg.nr;
216 }
217 }
218 else {
219 insn->bits2.ia1.src0_subreg_nr = reg.subnr;
220
221 if (insn->header.access_mode == BRW_ALIGN_1) {
222 insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
223 }
224 else {
225 insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
226 }
227 }
228
229 if (insn->header.access_mode == BRW_ALIGN_1) {
230 if (reg.width == BRW_WIDTH_1 &&
231 insn->header.execution_size == BRW_EXECUTE_1) {
232 insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
233 insn->bits2.da1.src0_width = BRW_WIDTH_1;
234 insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
235 }
236 else {
237 insn->bits2.da1.src0_horiz_stride = reg.hstride;
238 insn->bits2.da1.src0_width = reg.width;
239 insn->bits2.da1.src0_vert_stride = reg.vstride;
240 }
241 }
242 else {
243 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
244 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
245 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
246 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
247
248 /* This is an oddity of the fact we're using the same
249 * descriptions for registers in align_16 as align_1:
250 */
251 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
252 insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
253 else
254 insn->bits2.da16.src0_vert_stride = reg.vstride;
255 }
256 }
257 }
258
259
260 void brw_set_src1( struct brw_instruction *insn,
261 struct brw_reg reg )
262 {
263 assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
264
265 assert(reg.nr < 128);
266
267 validate_reg(insn, reg);
268
269 insn->bits1.da1.src1_reg_file = reg.file;
270 insn->bits1.da1.src1_reg_type = reg.type;
271 insn->bits3.da1.src1_abs = reg.abs;
272 insn->bits3.da1.src1_negate = reg.negate;
273
274 /* Only src1 can be immediate in two-argument instructions.
275 */
276 assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
277
278 if (reg.file == BRW_IMMEDIATE_VALUE) {
279 insn->bits3.ud = reg.dw1.ud;
280 }
281 else {
282 /* This is a hardware restriction, which may or may not be lifted
283 * in the future:
284 */
285 assert (reg.address_mode == BRW_ADDRESS_DIRECT);
286 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
287
288 if (insn->header.access_mode == BRW_ALIGN_1) {
289 insn->bits3.da1.src1_subreg_nr = reg.subnr;
290 insn->bits3.da1.src1_reg_nr = reg.nr;
291 }
292 else {
293 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
294 insn->bits3.da16.src1_reg_nr = reg.nr;
295 }
296
297 if (insn->header.access_mode == BRW_ALIGN_1) {
298 if (reg.width == BRW_WIDTH_1 &&
299 insn->header.execution_size == BRW_EXECUTE_1) {
300 insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
301 insn->bits3.da1.src1_width = BRW_WIDTH_1;
302 insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
303 }
304 else {
305 insn->bits3.da1.src1_horiz_stride = reg.hstride;
306 insn->bits3.da1.src1_width = reg.width;
307 insn->bits3.da1.src1_vert_stride = reg.vstride;
308 }
309 }
310 else {
311 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
312 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
313 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
314 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
315
316 /* This is an oddity of the fact we're using the same
317 * descriptions for registers in align_16 as align_1:
318 */
319 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
320 insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
321 else
322 insn->bits3.da16.src1_vert_stride = reg.vstride;
323 }
324 }
325 }
326
327
328
329 static void brw_set_math_message( struct brw_context *brw,
330 struct brw_instruction *insn,
331 GLuint msg_length,
332 GLuint response_length,
333 GLuint function,
334 GLuint integer_type,
335 GLboolean low_precision,
336 GLboolean saturate,
337 GLuint dataType )
338 {
339 struct intel_context *intel = &brw->intel;
340 brw_set_src1(insn, brw_imm_d(0));
341
342 if (intel->gen == 5) {
343 insn->bits3.math_gen5.function = function;
344 insn->bits3.math_gen5.int_type = integer_type;
345 insn->bits3.math_gen5.precision = low_precision;
346 insn->bits3.math_gen5.saturate = saturate;
347 insn->bits3.math_gen5.data_type = dataType;
348 insn->bits3.math_gen5.snapshot = 0;
349 insn->bits3.math_gen5.header_present = 0;
350 insn->bits3.math_gen5.response_length = response_length;
351 insn->bits3.math_gen5.msg_length = msg_length;
352 insn->bits3.math_gen5.end_of_thread = 0;
353 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_MATH;
354 insn->bits2.send_gen5.end_of_thread = 0;
355 } else {
356 insn->bits3.math.function = function;
357 insn->bits3.math.int_type = integer_type;
358 insn->bits3.math.precision = low_precision;
359 insn->bits3.math.saturate = saturate;
360 insn->bits3.math.data_type = dataType;
361 insn->bits3.math.response_length = response_length;
362 insn->bits3.math.msg_length = msg_length;
363 insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH;
364 insn->bits3.math.end_of_thread = 0;
365 }
366 }
367
368
369 static void brw_set_ff_sync_message(struct brw_context *brw,
370 struct brw_instruction *insn,
371 GLboolean allocate,
372 GLuint response_length,
373 GLboolean end_of_thread)
374 {
375 struct intel_context *intel = &brw->intel;
376 brw_set_src1(insn, brw_imm_d(0));
377
378 insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */
379 insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */
380 insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */
381 insn->bits3.urb_gen5.allocate = allocate;
382 insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */
383 insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */
384 insn->bits3.urb_gen5.header_present = 1;
385 insn->bits3.urb_gen5.response_length = response_length; /* may be 1 or 0 */
386 insn->bits3.urb_gen5.msg_length = 1;
387 insn->bits3.urb_gen5.end_of_thread = end_of_thread;
388 if (intel->gen >= 6) {
389 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB;
390 } else {
391 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB;
392 insn->bits2.send_gen5.end_of_thread = end_of_thread;
393 }
394 }
395
396 static void brw_set_urb_message( struct brw_context *brw,
397 struct brw_instruction *insn,
398 GLboolean allocate,
399 GLboolean used,
400 GLuint msg_length,
401 GLuint response_length,
402 GLboolean end_of_thread,
403 GLboolean complete,
404 GLuint offset,
405 GLuint swizzle_control )
406 {
407 struct intel_context *intel = &brw->intel;
408 brw_set_src1(insn, brw_imm_d(0));
409
410 if (intel->gen >= 5) {
411 insn->bits3.urb_gen5.opcode = 0; /* ? */
412 insn->bits3.urb_gen5.offset = offset;
413 insn->bits3.urb_gen5.swizzle_control = swizzle_control;
414 insn->bits3.urb_gen5.allocate = allocate;
415 insn->bits3.urb_gen5.used = used; /* ? */
416 insn->bits3.urb_gen5.complete = complete;
417 insn->bits3.urb_gen5.header_present = 1;
418 insn->bits3.urb_gen5.response_length = response_length;
419 insn->bits3.urb_gen5.msg_length = msg_length;
420 insn->bits3.urb_gen5.end_of_thread = end_of_thread;
421 if (intel->gen >= 6) {
422 /* For SNB, the SFID bits moved to the condmod bits, and
423 * EOT stayed in bits3 above. Does the EOT bit setting
424 * below on Ironlake even do anything?
425 */
426 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB;
427 } else {
428 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB;
429 insn->bits2.send_gen5.end_of_thread = end_of_thread;
430 }
431 } else {
432 insn->bits3.urb.opcode = 0; /* ? */
433 insn->bits3.urb.offset = offset;
434 insn->bits3.urb.swizzle_control = swizzle_control;
435 insn->bits3.urb.allocate = allocate;
436 insn->bits3.urb.used = used; /* ? */
437 insn->bits3.urb.complete = complete;
438 insn->bits3.urb.response_length = response_length;
439 insn->bits3.urb.msg_length = msg_length;
440 insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB;
441 insn->bits3.urb.end_of_thread = end_of_thread;
442 }
443 }
444
445 static void brw_set_dp_write_message( struct brw_context *brw,
446 struct brw_instruction *insn,
447 GLuint binding_table_index,
448 GLuint msg_control,
449 GLuint msg_type,
450 GLuint msg_length,
451 GLboolean header_present,
452 GLuint pixel_scoreboard_clear,
453 GLuint response_length,
454 GLuint end_of_thread,
455 GLuint send_commit_msg)
456 {
457 struct intel_context *intel = &brw->intel;
458 brw_set_src1(insn, brw_imm_ud(0));
459
460 if (intel->gen >= 6) {
461 insn->bits3.dp_render_cache.binding_table_index = binding_table_index;
462 insn->bits3.dp_render_cache.msg_control = msg_control;
463 insn->bits3.dp_render_cache.pixel_scoreboard_clear = pixel_scoreboard_clear;
464 insn->bits3.dp_render_cache.msg_type = msg_type;
465 insn->bits3.dp_render_cache.send_commit_msg = send_commit_msg;
466 insn->bits3.dp_render_cache.header_present = header_present;
467 insn->bits3.dp_render_cache.response_length = response_length;
468 insn->bits3.dp_render_cache.msg_length = msg_length;
469 insn->bits3.dp_render_cache.end_of_thread = end_of_thread;
470 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
471 /* XXX really need below? */
472 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
473 insn->bits2.send_gen5.end_of_thread = end_of_thread;
474 } else if (intel->gen == 5) {
475 insn->bits3.dp_write_gen5.binding_table_index = binding_table_index;
476 insn->bits3.dp_write_gen5.msg_control = msg_control;
477 insn->bits3.dp_write_gen5.pixel_scoreboard_clear = pixel_scoreboard_clear;
478 insn->bits3.dp_write_gen5.msg_type = msg_type;
479 insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg;
480 insn->bits3.dp_write_gen5.header_present = header_present;
481 insn->bits3.dp_write_gen5.response_length = response_length;
482 insn->bits3.dp_write_gen5.msg_length = msg_length;
483 insn->bits3.dp_write_gen5.end_of_thread = end_of_thread;
484 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
485 insn->bits2.send_gen5.end_of_thread = end_of_thread;
486 } else {
487 insn->bits3.dp_write.binding_table_index = binding_table_index;
488 insn->bits3.dp_write.msg_control = msg_control;
489 insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear;
490 insn->bits3.dp_write.msg_type = msg_type;
491 insn->bits3.dp_write.send_commit_msg = send_commit_msg;
492 insn->bits3.dp_write.response_length = response_length;
493 insn->bits3.dp_write.msg_length = msg_length;
494 insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
495 insn->bits3.dp_write.end_of_thread = end_of_thread;
496 }
497 }
498
499 static void brw_set_dp_read_message( struct brw_context *brw,
500 struct brw_instruction *insn,
501 GLuint binding_table_index,
502 GLuint msg_control,
503 GLuint msg_type,
504 GLuint target_cache,
505 GLuint msg_length,
506 GLuint response_length,
507 GLuint end_of_thread )
508 {
509 struct intel_context *intel = &brw->intel;
510 brw_set_src1(insn, brw_imm_d(0));
511
512 if (intel->gen == 5) {
513 insn->bits3.dp_read_gen5.binding_table_index = binding_table_index;
514 insn->bits3.dp_read_gen5.msg_control = msg_control;
515 insn->bits3.dp_read_gen5.msg_type = msg_type;
516 insn->bits3.dp_read_gen5.target_cache = target_cache;
517 insn->bits3.dp_read_gen5.header_present = 1;
518 insn->bits3.dp_read_gen5.response_length = response_length;
519 insn->bits3.dp_read_gen5.msg_length = msg_length;
520 insn->bits3.dp_read_gen5.pad1 = 0;
521 insn->bits3.dp_read_gen5.end_of_thread = end_of_thread;
522 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ;
523 insn->bits2.send_gen5.end_of_thread = end_of_thread;
524 } else {
525 insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
526 insn->bits3.dp_read.msg_control = msg_control; /*8:11*/
527 insn->bits3.dp_read.msg_type = msg_type; /*12:13*/
528 insn->bits3.dp_read.target_cache = target_cache; /*14:15*/
529 insn->bits3.dp_read.response_length = response_length; /*16:19*/
530 insn->bits3.dp_read.msg_length = msg_length; /*20:23*/
531 insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/
532 insn->bits3.dp_read.pad1 = 0; /*28:30*/
533 insn->bits3.dp_read.end_of_thread = end_of_thread; /*31*/
534 }
535 }
536
537 static void brw_set_sampler_message(struct brw_context *brw,
538 struct brw_instruction *insn,
539 GLuint binding_table_index,
540 GLuint sampler,
541 GLuint msg_type,
542 GLuint response_length,
543 GLuint msg_length,
544 GLboolean eot,
545 GLuint header_present,
546 GLuint simd_mode)
547 {
548 struct intel_context *intel = &brw->intel;
549 assert(eot == 0);
550 brw_set_src1(insn, brw_imm_d(0));
551
552 if (intel->gen >= 5) {
553 insn->bits3.sampler_gen5.binding_table_index = binding_table_index;
554 insn->bits3.sampler_gen5.sampler = sampler;
555 insn->bits3.sampler_gen5.msg_type = msg_type;
556 insn->bits3.sampler_gen5.simd_mode = simd_mode;
557 insn->bits3.sampler_gen5.header_present = header_present;
558 insn->bits3.sampler_gen5.response_length = response_length;
559 insn->bits3.sampler_gen5.msg_length = msg_length;
560 insn->bits3.sampler_gen5.end_of_thread = eot;
561 if (intel->gen >= 6)
562 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_SAMPLER;
563 else {
564 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_SAMPLER;
565 insn->bits2.send_gen5.end_of_thread = eot;
566 }
567 } else if (intel->is_g4x) {
568 insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
569 insn->bits3.sampler_g4x.sampler = sampler;
570 insn->bits3.sampler_g4x.msg_type = msg_type;
571 insn->bits3.sampler_g4x.response_length = response_length;
572 insn->bits3.sampler_g4x.msg_length = msg_length;
573 insn->bits3.sampler_g4x.end_of_thread = eot;
574 insn->bits3.sampler_g4x.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
575 } else {
576 insn->bits3.sampler.binding_table_index = binding_table_index;
577 insn->bits3.sampler.sampler = sampler;
578 insn->bits3.sampler.msg_type = msg_type;
579 insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
580 insn->bits3.sampler.response_length = response_length;
581 insn->bits3.sampler.msg_length = msg_length;
582 insn->bits3.sampler.end_of_thread = eot;
583 insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
584 }
585 }
586
587
588
589 static struct brw_instruction *next_insn( struct brw_compile *p,
590 GLuint opcode )
591 {
592 struct brw_instruction *insn;
593
594 assert(p->nr_insn + 1 < BRW_EU_MAX_INSN);
595
596 insn = &p->store[p->nr_insn++];
597 memcpy(insn, p->current, sizeof(*insn));
598
599 /* Reset this one-shot flag:
600 */
601
602 if (p->current->header.destreg__conditionalmod) {
603 p->current->header.destreg__conditionalmod = 0;
604 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
605 }
606
607 insn->header.opcode = opcode;
608 return insn;
609 }
610
611
612 static struct brw_instruction *brw_alu1( struct brw_compile *p,
613 GLuint opcode,
614 struct brw_reg dest,
615 struct brw_reg src )
616 {
617 struct brw_instruction *insn = next_insn(p, opcode);
618 brw_set_dest(insn, dest);
619 brw_set_src0(insn, src);
620 return insn;
621 }
622
623 static struct brw_instruction *brw_alu2(struct brw_compile *p,
624 GLuint opcode,
625 struct brw_reg dest,
626 struct brw_reg src0,
627 struct brw_reg src1 )
628 {
629 struct brw_instruction *insn = next_insn(p, opcode);
630 brw_set_dest(insn, dest);
631 brw_set_src0(insn, src0);
632 brw_set_src1(insn, src1);
633 return insn;
634 }
635
636
637 /***********************************************************************
638 * Convenience routines.
639 */
640 #define ALU1(OP) \
641 struct brw_instruction *brw_##OP(struct brw_compile *p, \
642 struct brw_reg dest, \
643 struct brw_reg src0) \
644 { \
645 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
646 }
647
648 #define ALU2(OP) \
649 struct brw_instruction *brw_##OP(struct brw_compile *p, \
650 struct brw_reg dest, \
651 struct brw_reg src0, \
652 struct brw_reg src1) \
653 { \
654 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
655 }
656
657 /* Rounding operations (other than RNDD) require two instructions - the first
658 * stores a rounded value (possibly the wrong way) in the dest register, but
659 * also sets a per-channel "increment bit" in the flag register. A predicated
660 * add of 1.0 fixes dest to contain the desired result.
661 */
662 #define ROUND(OP) \
663 void brw_##OP(struct brw_compile *p, \
664 struct brw_reg dest, \
665 struct brw_reg src) \
666 { \
667 struct brw_instruction *rnd, *add; \
668 rnd = next_insn(p, BRW_OPCODE_##OP); \
669 brw_set_dest(rnd, dest); \
670 brw_set_src0(rnd, src); \
671 rnd->header.destreg__conditionalmod = 0x7; /* turn on round-increments */ \
672 \
673 add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \
674 add->header.predicate_control = BRW_PREDICATE_NORMAL; \
675 }
676
677
678 ALU1(MOV)
679 ALU2(SEL)
680 ALU1(NOT)
681 ALU2(AND)
682 ALU2(OR)
683 ALU2(XOR)
684 ALU2(SHR)
685 ALU2(SHL)
686 ALU2(RSR)
687 ALU2(RSL)
688 ALU2(ASR)
689 ALU1(FRC)
690 ALU1(RNDD)
691 ALU2(MAC)
692 ALU2(MACH)
693 ALU1(LZD)
694 ALU2(DP4)
695 ALU2(DPH)
696 ALU2(DP3)
697 ALU2(DP2)
698 ALU2(LINE)
699 ALU2(PLN)
700
701
702 ROUND(RNDZ)
703
704
705 struct brw_instruction *brw_ADD(struct brw_compile *p,
706 struct brw_reg dest,
707 struct brw_reg src0,
708 struct brw_reg src1)
709 {
710 /* 6.2.2: add */
711 if (src0.type == BRW_REGISTER_TYPE_F ||
712 (src0.file == BRW_IMMEDIATE_VALUE &&
713 src0.type == BRW_REGISTER_TYPE_VF)) {
714 assert(src1.type != BRW_REGISTER_TYPE_UD);
715 assert(src1.type != BRW_REGISTER_TYPE_D);
716 }
717
718 if (src1.type == BRW_REGISTER_TYPE_F ||
719 (src1.file == BRW_IMMEDIATE_VALUE &&
720 src1.type == BRW_REGISTER_TYPE_VF)) {
721 assert(src0.type != BRW_REGISTER_TYPE_UD);
722 assert(src0.type != BRW_REGISTER_TYPE_D);
723 }
724
725 return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1);
726 }
727
728 struct brw_instruction *brw_MUL(struct brw_compile *p,
729 struct brw_reg dest,
730 struct brw_reg src0,
731 struct brw_reg src1)
732 {
733 /* 6.32.38: mul */
734 if (src0.type == BRW_REGISTER_TYPE_D ||
735 src0.type == BRW_REGISTER_TYPE_UD ||
736 src1.type == BRW_REGISTER_TYPE_D ||
737 src1.type == BRW_REGISTER_TYPE_UD) {
738 assert(dest.type != BRW_REGISTER_TYPE_F);
739 }
740
741 if (src0.type == BRW_REGISTER_TYPE_F ||
742 (src0.file == BRW_IMMEDIATE_VALUE &&
743 src0.type == BRW_REGISTER_TYPE_VF)) {
744 assert(src1.type != BRW_REGISTER_TYPE_UD);
745 assert(src1.type != BRW_REGISTER_TYPE_D);
746 }
747
748 if (src1.type == BRW_REGISTER_TYPE_F ||
749 (src1.file == BRW_IMMEDIATE_VALUE &&
750 src1.type == BRW_REGISTER_TYPE_VF)) {
751 assert(src0.type != BRW_REGISTER_TYPE_UD);
752 assert(src0.type != BRW_REGISTER_TYPE_D);
753 }
754
755 assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE ||
756 src0.nr != BRW_ARF_ACCUMULATOR);
757 assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE ||
758 src1.nr != BRW_ARF_ACCUMULATOR);
759
760 return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1);
761 }
762
763
764 void brw_NOP(struct brw_compile *p)
765 {
766 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);
767 brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
768 brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
769 brw_set_src1(insn, brw_imm_ud(0x0));
770 }
771
772
773
774
775
776 /***********************************************************************
777 * Comparisons, if/else/endif
778 */
779
780 struct brw_instruction *brw_JMPI(struct brw_compile *p,
781 struct brw_reg dest,
782 struct brw_reg src0,
783 struct brw_reg src1)
784 {
785 struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
786
787 insn->header.execution_size = 1;
788 insn->header.compression_control = BRW_COMPRESSION_NONE;
789 insn->header.mask_control = BRW_MASK_DISABLE;
790
791 p->current->header.predicate_control = BRW_PREDICATE_NONE;
792
793 return insn;
794 }
795
796 /* EU takes the value from the flag register and pushes it onto some
797 * sort of a stack (presumably merging with any flag value already on
798 * the stack). Within an if block, the flags at the top of the stack
799 * control execution on each channel of the unit, eg. on each of the
800 * 16 pixel values in our wm programs.
801 *
802 * When the matching 'else' instruction is reached (presumably by
803 * countdown of the instruction count patched in by our ELSE/ENDIF
804 * functions), the relevent flags are inverted.
805 *
806 * When the matching 'endif' instruction is reached, the flags are
807 * popped off. If the stack is now empty, normal execution resumes.
808 *
809 * No attempt is made to deal with stack overflow (14 elements?).
810 */
811 struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size)
812 {
813 struct intel_context *intel = &p->brw->intel;
814 struct brw_instruction *insn;
815
816 if (p->single_program_flow) {
817 assert(execute_size == BRW_EXECUTE_1);
818
819 insn = next_insn(p, BRW_OPCODE_ADD);
820 insn->header.predicate_inverse = 1;
821 } else {
822 insn = next_insn(p, BRW_OPCODE_IF);
823 }
824
825 /* Override the defaults for this instruction:
826 */
827 if (intel->gen < 6) {
828 brw_set_dest(insn, brw_ip_reg());
829 brw_set_src0(insn, brw_ip_reg());
830 brw_set_src1(insn, brw_imm_d(0x0));
831 } else {
832 brw_set_dest(insn, brw_imm_w(0));
833 brw_set_src0(insn, brw_null_reg());
834 brw_set_src1(insn, brw_null_reg());
835 }
836
837 insn->header.execution_size = execute_size;
838 insn->header.compression_control = BRW_COMPRESSION_NONE;
839 insn->header.predicate_control = BRW_PREDICATE_NORMAL;
840 insn->header.mask_control = BRW_MASK_ENABLE;
841 if (!p->single_program_flow)
842 insn->header.thread_control = BRW_THREAD_SWITCH;
843
844 p->current->header.predicate_control = BRW_PREDICATE_NONE;
845
846 return insn;
847 }
848
849
850 struct brw_instruction *brw_ELSE(struct brw_compile *p,
851 struct brw_instruction *if_insn)
852 {
853 struct intel_context *intel = &p->brw->intel;
854 struct brw_instruction *insn;
855 GLuint br = 1;
856
857 /* jump count is for 64bit data chunk each, so one 128bit
858 instruction requires 2 chunks. */
859 if (intel->gen >= 5)
860 br = 2;
861
862 if (p->single_program_flow) {
863 insn = next_insn(p, BRW_OPCODE_ADD);
864 } else {
865 insn = next_insn(p, BRW_OPCODE_ELSE);
866 }
867
868 if (intel->gen < 6) {
869 brw_set_dest(insn, brw_ip_reg());
870 brw_set_src0(insn, brw_ip_reg());
871 brw_set_src1(insn, brw_imm_d(0x0));
872 } else {
873 brw_set_dest(insn, brw_imm_w(0));
874 brw_set_src0(insn, brw_null_reg());
875 brw_set_src1(insn, brw_null_reg());
876 }
877
878 insn->header.compression_control = BRW_COMPRESSION_NONE;
879 insn->header.execution_size = if_insn->header.execution_size;
880 insn->header.mask_control = BRW_MASK_ENABLE;
881 if (!p->single_program_flow)
882 insn->header.thread_control = BRW_THREAD_SWITCH;
883
884 /* Patch the if instruction to point at this instruction.
885 */
886 if (p->single_program_flow) {
887 assert(if_insn->header.opcode == BRW_OPCODE_ADD);
888
889 if_insn->bits3.ud = (insn - if_insn + 1) * 16;
890 } else {
891 assert(if_insn->header.opcode == BRW_OPCODE_IF);
892
893 if (intel->gen < 6) {
894 if_insn->bits3.if_else.jump_count = br * (insn - if_insn);
895 if_insn->bits3.if_else.pop_count = 0;
896 if_insn->bits3.if_else.pad0 = 0;
897 } else {
898 if_insn->bits1.branch_gen6.jump_count = br * (insn - if_insn + 1);
899 }
900 }
901
902 return insn;
903 }
904
905 void brw_ENDIF(struct brw_compile *p,
906 struct brw_instruction *patch_insn)
907 {
908 struct intel_context *intel = &p->brw->intel;
909 GLuint br = 1;
910
911 if (intel->gen >= 5)
912 br = 2;
913
914 if (p->single_program_flow) {
915 /* In single program flow mode, there's no need to execute an ENDIF,
916 * since we don't need to do any stack operations, and if we're executing
917 * currently, we want to just continue executing.
918 */
919 struct brw_instruction *next = &p->store[p->nr_insn];
920
921 assert(patch_insn->header.opcode == BRW_OPCODE_ADD);
922
923 patch_insn->bits3.ud = (next - patch_insn) * 16;
924 } else {
925 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF);
926
927 if (intel->gen < 6) {
928 brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
929 brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
930 brw_set_src1(insn, brw_imm_d(0x0));
931 } else {
932 brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_W));
933 brw_set_src0(insn, brw_null_reg());
934 brw_set_src1(insn, brw_null_reg());
935 }
936
937 insn->header.compression_control = BRW_COMPRESSION_NONE;
938 insn->header.execution_size = patch_insn->header.execution_size;
939 insn->header.mask_control = BRW_MASK_ENABLE;
940 insn->header.thread_control = BRW_THREAD_SWITCH;
941
942 assert(patch_insn->bits3.if_else.jump_count == 0);
943
944 /* Patch the if or else instructions to point at this or the next
945 * instruction respectively.
946 */
947 if (patch_insn->header.opcode == BRW_OPCODE_IF) {
948 if (intel->gen < 6) {
949 /* Turn it into an IFF, which means no mask stack operations for
950 * all-false and jumping past the ENDIF.
951 */
952 patch_insn->header.opcode = BRW_OPCODE_IFF;
953 patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1);
954 patch_insn->bits3.if_else.pop_count = 0;
955 patch_insn->bits3.if_else.pad0 = 0;
956 } else {
957 /* As of gen6, there is no IFF and IF must point to the ENDIF. */
958 patch_insn->bits1.branch_gen6.jump_count = br * (insn - patch_insn);
959 }
960 } else {
961 assert(patch_insn->header.opcode == BRW_OPCODE_ELSE);
962 if (intel->gen < 6) {
963 /* BRW_OPCODE_ELSE pre-gen6 should point just past the
964 * matching ENDIF.
965 */
966 patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1);
967 patch_insn->bits3.if_else.pop_count = 1;
968 patch_insn->bits3.if_else.pad0 = 0;
969 } else {
970 /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
971 patch_insn->bits1.branch_gen6.jump_count = br * (insn - patch_insn);
972 }
973 }
974
975 /* Also pop item off the stack in the endif instruction:
976 */
977 if (intel->gen < 6) {
978 insn->bits3.if_else.jump_count = 0;
979 insn->bits3.if_else.pop_count = 1;
980 insn->bits3.if_else.pad0 = 0;
981 } else {
982 insn->bits1.branch_gen6.jump_count = 2;
983 }
984 }
985 }
986
987 struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count)
988 {
989 struct brw_instruction *insn;
990 insn = next_insn(p, BRW_OPCODE_BREAK);
991 brw_set_dest(insn, brw_ip_reg());
992 brw_set_src0(insn, brw_ip_reg());
993 brw_set_src1(insn, brw_imm_d(0x0));
994 insn->header.compression_control = BRW_COMPRESSION_NONE;
995 insn->header.execution_size = BRW_EXECUTE_8;
996 /* insn->header.mask_control = BRW_MASK_DISABLE; */
997 insn->bits3.if_else.pad0 = 0;
998 insn->bits3.if_else.pop_count = pop_count;
999 return insn;
1000 }
1001
1002 struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count)
1003 {
1004 struct brw_instruction *insn;
1005 insn = next_insn(p, BRW_OPCODE_CONTINUE);
1006 brw_set_dest(insn, brw_ip_reg());
1007 brw_set_src0(insn, brw_ip_reg());
1008 brw_set_src1(insn, brw_imm_d(0x0));
1009 insn->header.compression_control = BRW_COMPRESSION_NONE;
1010 insn->header.execution_size = BRW_EXECUTE_8;
1011 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1012 insn->bits3.if_else.pad0 = 0;
1013 insn->bits3.if_else.pop_count = pop_count;
1014 return insn;
1015 }
1016
1017 /* DO/WHILE loop:
1018 */
1019 struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
1020 {
1021 if (p->single_program_flow) {
1022 return &p->store[p->nr_insn];
1023 } else {
1024 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
1025
1026 /* Override the defaults for this instruction:
1027 */
1028 brw_set_dest(insn, brw_null_reg());
1029 brw_set_src0(insn, brw_null_reg());
1030 brw_set_src1(insn, brw_null_reg());
1031
1032 insn->header.compression_control = BRW_COMPRESSION_NONE;
1033 insn->header.execution_size = execute_size;
1034 insn->header.predicate_control = BRW_PREDICATE_NONE;
1035 /* insn->header.mask_control = BRW_MASK_ENABLE; */
1036 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1037
1038 return insn;
1039 }
1040 }
1041
1042
1043
1044 struct brw_instruction *brw_WHILE(struct brw_compile *p,
1045 struct brw_instruction *do_insn)
1046 {
1047 struct intel_context *intel = &p->brw->intel;
1048 struct brw_instruction *insn;
1049 GLuint br = 1;
1050
1051 if (intel->gen >= 5)
1052 br = 2;
1053
1054 if (p->single_program_flow)
1055 insn = next_insn(p, BRW_OPCODE_ADD);
1056 else
1057 insn = next_insn(p, BRW_OPCODE_WHILE);
1058
1059 brw_set_dest(insn, brw_ip_reg());
1060 brw_set_src0(insn, brw_ip_reg());
1061 brw_set_src1(insn, brw_imm_d(0x0));
1062
1063 insn->header.compression_control = BRW_COMPRESSION_NONE;
1064
1065 if (p->single_program_flow) {
1066 insn->header.execution_size = BRW_EXECUTE_1;
1067
1068 insn->bits3.d = (do_insn - insn) * 16;
1069 } else {
1070 insn->header.execution_size = do_insn->header.execution_size;
1071
1072 assert(do_insn->header.opcode == BRW_OPCODE_DO);
1073 insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
1074 insn->bits3.if_else.pop_count = 0;
1075 insn->bits3.if_else.pad0 = 0;
1076 }
1077
1078 /* insn->header.mask_control = BRW_MASK_ENABLE; */
1079
1080 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1081 p->current->header.predicate_control = BRW_PREDICATE_NONE;
1082 return insn;
1083 }
1084
1085
1086 /* FORWARD JUMPS:
1087 */
1088 void brw_land_fwd_jump(struct brw_compile *p,
1089 struct brw_instruction *jmp_insn)
1090 {
1091 struct intel_context *intel = &p->brw->intel;
1092 struct brw_instruction *landing = &p->store[p->nr_insn];
1093 GLuint jmpi = 1;
1094
1095 if (intel->gen >= 5)
1096 jmpi = 2;
1097
1098 assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
1099 assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE);
1100
1101 jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1);
1102 }
1103
1104
1105
1106 /* To integrate with the above, it makes sense that the comparison
1107 * instruction should populate the flag register. It might be simpler
1108 * just to use the flag reg for most WM tasks?
1109 */
1110 void brw_CMP(struct brw_compile *p,
1111 struct brw_reg dest,
1112 GLuint conditional,
1113 struct brw_reg src0,
1114 struct brw_reg src1)
1115 {
1116 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
1117
1118 insn->header.destreg__conditionalmod = conditional;
1119 brw_set_dest(insn, dest);
1120 brw_set_src0(insn, src0);
1121 brw_set_src1(insn, src1);
1122
1123 /* guess_execution_size(insn, src0); */
1124
1125
1126 /* Make it so that future instructions will use the computed flag
1127 * value until brw_set_predicate_control_flag_value() is called
1128 * again.
1129 */
1130 if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
1131 dest.nr == 0) {
1132 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
1133 p->flag_value = 0xff;
1134 }
1135 }
1136
1137 /* Issue 'wait' instruction for n1, host could program MMIO
1138 to wake up thread. */
1139 void brw_WAIT (struct brw_compile *p)
1140 {
1141 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_WAIT);
1142 struct brw_reg src = brw_notification_1_reg();
1143
1144 brw_set_dest(insn, src);
1145 brw_set_src0(insn, src);
1146 brw_set_src1(insn, brw_null_reg());
1147 insn->header.execution_size = 0; /* must */
1148 insn->header.predicate_control = 0;
1149 insn->header.compression_control = 0;
1150 }
1151
1152
1153 /***********************************************************************
1154 * Helpers for the various SEND message types:
1155 */
1156
1157 /** Extended math function, float[8].
1158 */
1159 void brw_math( struct brw_compile *p,
1160 struct brw_reg dest,
1161 GLuint function,
1162 GLuint saturate,
1163 GLuint msg_reg_nr,
1164 struct brw_reg src,
1165 GLuint data_type,
1166 GLuint precision )
1167 {
1168 struct intel_context *intel = &p->brw->intel;
1169
1170 if (intel->gen >= 6) {
1171 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
1172
1173 assert(dest.file == BRW_GENERAL_REGISTER_FILE);
1174 assert(src.file == BRW_GENERAL_REGISTER_FILE);
1175
1176 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1177 assert(src.hstride == BRW_HORIZONTAL_STRIDE_1);
1178
1179 if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT &&
1180 function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1181 assert(src.type == BRW_REGISTER_TYPE_F);
1182 }
1183
1184 /* Math is the same ISA format as other opcodes, except that CondModifier
1185 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1186 */
1187 insn->header.destreg__conditionalmod = function;
1188
1189 brw_set_dest(insn, dest);
1190 brw_set_src0(insn, src);
1191 brw_set_src1(insn, brw_null_reg());
1192 } else {
1193 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1194 GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
1195 GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
1196 /* Example code doesn't set predicate_control for send
1197 * instructions.
1198 */
1199 insn->header.predicate_control = 0;
1200 insn->header.destreg__conditionalmod = msg_reg_nr;
1201
1202 brw_set_dest(insn, dest);
1203 brw_set_src0(insn, src);
1204 brw_set_math_message(p->brw,
1205 insn,
1206 msg_length, response_length,
1207 function,
1208 BRW_MATH_INTEGER_UNSIGNED,
1209 precision,
1210 saturate,
1211 data_type);
1212 }
1213 }
1214
1215 /** Extended math function, float[8].
1216 */
1217 void brw_math2(struct brw_compile *p,
1218 struct brw_reg dest,
1219 GLuint function,
1220 struct brw_reg src0,
1221 struct brw_reg src1)
1222 {
1223 struct intel_context *intel = &p->brw->intel;
1224 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
1225
1226 assert(intel->gen >= 6);
1227 (void) intel;
1228
1229
1230 assert(dest.file == BRW_GENERAL_REGISTER_FILE);
1231 assert(src0.file == BRW_GENERAL_REGISTER_FILE);
1232 assert(src1.file == BRW_GENERAL_REGISTER_FILE);
1233
1234 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1235 assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1);
1236 assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1);
1237
1238 if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT &&
1239 function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1240 assert(src0.type == BRW_REGISTER_TYPE_F);
1241 assert(src1.type == BRW_REGISTER_TYPE_F);
1242 }
1243
1244 /* Math is the same ISA format as other opcodes, except that CondModifier
1245 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1246 */
1247 insn->header.destreg__conditionalmod = function;
1248
1249 brw_set_dest(insn, dest);
1250 brw_set_src0(insn, src0);
1251 brw_set_src1(insn, src1);
1252 }
1253
1254 /**
1255 * Extended math function, float[16].
1256 * Use 2 send instructions.
1257 */
1258 void brw_math_16( struct brw_compile *p,
1259 struct brw_reg dest,
1260 GLuint function,
1261 GLuint saturate,
1262 GLuint msg_reg_nr,
1263 struct brw_reg src,
1264 GLuint precision )
1265 {
1266 struct intel_context *intel = &p->brw->intel;
1267 struct brw_instruction *insn;
1268 GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
1269 GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
1270
1271 if (intel->gen >= 6) {
1272 insn = next_insn(p, BRW_OPCODE_MATH);
1273
1274 /* Math is the same ISA format as other opcodes, except that CondModifier
1275 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1276 */
1277 insn->header.destreg__conditionalmod = function;
1278
1279 brw_set_dest(insn, dest);
1280 brw_set_src0(insn, src);
1281 brw_set_src1(insn, brw_null_reg());
1282 return;
1283 }
1284
1285 /* First instruction:
1286 */
1287 brw_push_insn_state(p);
1288 brw_set_predicate_control_flag_value(p, 0xff);
1289 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1290
1291 insn = next_insn(p, BRW_OPCODE_SEND);
1292 insn->header.destreg__conditionalmod = msg_reg_nr;
1293
1294 brw_set_dest(insn, dest);
1295 brw_set_src0(insn, src);
1296 brw_set_math_message(p->brw,
1297 insn,
1298 msg_length, response_length,
1299 function,
1300 BRW_MATH_INTEGER_UNSIGNED,
1301 precision,
1302 saturate,
1303 BRW_MATH_DATA_VECTOR);
1304
1305 /* Second instruction:
1306 */
1307 insn = next_insn(p, BRW_OPCODE_SEND);
1308 insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
1309 insn->header.destreg__conditionalmod = msg_reg_nr+1;
1310
1311 brw_set_dest(insn, offset(dest,1));
1312 brw_set_src0(insn, src);
1313 brw_set_math_message(p->brw,
1314 insn,
1315 msg_length, response_length,
1316 function,
1317 BRW_MATH_INTEGER_UNSIGNED,
1318 precision,
1319 saturate,
1320 BRW_MATH_DATA_VECTOR);
1321
1322 brw_pop_insn_state(p);
1323 }
1324
1325
1326 /**
1327 * Write block of 16 dwords/floats to the data port Render Cache scratch buffer.
1328 * Scratch offset should be a multiple of 64.
1329 * Used for register spilling.
1330 */
1331 void brw_dp_WRITE_16( struct brw_compile *p,
1332 struct brw_reg src,
1333 GLuint scratch_offset )
1334 {
1335 struct intel_context *intel = &p->brw->intel;
1336 GLuint msg_reg_nr = 1;
1337 {
1338 brw_push_insn_state(p);
1339 brw_set_mask_control(p, BRW_MASK_DISABLE);
1340 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1341
1342 /* set message header global offset field (reg 0, element 2) */
1343 brw_MOV(p,
1344 retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
1345 brw_imm_d(scratch_offset));
1346
1347 brw_pop_insn_state(p);
1348 }
1349
1350 {
1351 GLuint msg_length = 3;
1352 struct brw_reg dest;
1353 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1354 int send_commit_msg;
1355
1356 insn->header.predicate_control = 0; /* XXX */
1357 insn->header.compression_control = BRW_COMPRESSION_NONE;
1358 insn->header.destreg__conditionalmod = msg_reg_nr;
1359
1360 /* Until gen6, writes followed by reads from the same location
1361 * are not guaranteed to be ordered unless write_commit is set.
1362 * If set, then a no-op write is issued to the destination
1363 * register to set a dependency, and a read from the destination
1364 * can be used to ensure the ordering.
1365 *
1366 * For gen6, only writes between different threads need ordering
1367 * protection. Our use of DP writes is all about register
1368 * spilling within a thread.
1369 */
1370 if (intel->gen >= 6) {
1371 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
1372 send_commit_msg = 0;
1373 } else {
1374 dest = brw_uw16_grf(0, 0);
1375 send_commit_msg = 1;
1376 }
1377
1378 brw_set_dest(insn, dest);
1379 brw_set_src0(insn, src);
1380
1381 brw_set_dp_write_message(p->brw,
1382 insn,
1383 255, /* binding table index (255=stateless) */
1384 BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */
1385 BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */
1386 msg_length,
1387 GL_TRUE, /* header_present */
1388 0, /* pixel scoreboard */
1389 send_commit_msg, /* response_length */
1390 0, /* eot */
1391 send_commit_msg);
1392 }
1393 }
1394
1395
1396 /**
1397 * Read block of 16 dwords/floats from the data port Render Cache scratch buffer.
1398 * Scratch offset should be a multiple of 64.
1399 * Used for register spilling.
1400 */
1401 void brw_dp_READ_16( struct brw_compile *p,
1402 struct brw_reg dest,
1403 GLuint scratch_offset )
1404 {
1405 GLuint msg_reg_nr = 1;
1406 {
1407 brw_push_insn_state(p);
1408 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1409 brw_set_mask_control(p, BRW_MASK_DISABLE);
1410
1411 /* set message header global offset field (reg 0, element 2) */
1412 brw_MOV(p,
1413 retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
1414 brw_imm_d(scratch_offset));
1415
1416 brw_pop_insn_state(p);
1417 }
1418
1419 {
1420 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1421
1422 insn->header.predicate_control = 0; /* XXX */
1423 insn->header.compression_control = BRW_COMPRESSION_NONE;
1424 insn->header.destreg__conditionalmod = msg_reg_nr;
1425
1426 brw_set_dest(insn, dest); /* UW? */
1427 brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW));
1428
1429 brw_set_dp_read_message(p->brw,
1430 insn,
1431 255, /* binding table index (255=stateless) */
1432 BRW_DATAPORT_OWORD_BLOCK_4_OWORDS,
1433 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1434 1, /* target cache (render/scratch) */
1435 1, /* msg_length */
1436 2, /* response_length */
1437 0); /* eot */
1438 }
1439 }
1440
1441
1442 /**
1443 * Read a float[4] vector from the data port Data Cache (const buffer).
1444 * Location (in buffer) should be a multiple of 16.
1445 * Used for fetching shader constants.
1446 * If relAddr is true, we'll do an indirect fetch using the address register.
1447 */
1448 void brw_dp_READ_4( struct brw_compile *p,
1449 struct brw_reg dest,
1450 GLboolean relAddr,
1451 GLuint location,
1452 GLuint bind_table_index )
1453 {
1454 /* XXX: relAddr not implemented */
1455 GLuint msg_reg_nr = 1;
1456 {
1457 struct brw_reg b;
1458 brw_push_insn_state(p);
1459 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1460 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1461 brw_set_mask_control(p, BRW_MASK_DISABLE);
1462
1463 /* Setup MRF[1] with location/offset into const buffer */
1464 b = brw_message_reg(msg_reg_nr);
1465 b = retype(b, BRW_REGISTER_TYPE_UD);
1466 /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
1467 * when the docs say only dword[2] should be set. Hmmm. But it works.
1468 */
1469 brw_MOV(p, b, brw_imm_ud(location));
1470 brw_pop_insn_state(p);
1471 }
1472
1473 {
1474 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1475
1476 insn->header.predicate_control = BRW_PREDICATE_NONE;
1477 insn->header.compression_control = BRW_COMPRESSION_NONE;
1478 insn->header.destreg__conditionalmod = msg_reg_nr;
1479 insn->header.mask_control = BRW_MASK_DISABLE;
1480
1481 /* cast dest to a uword[8] vector */
1482 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
1483
1484 brw_set_dest(insn, dest);
1485 brw_set_src0(insn, brw_null_reg());
1486
1487 brw_set_dp_read_message(p->brw,
1488 insn,
1489 bind_table_index,
1490 0, /* msg_control (0 means 1 Oword) */
1491 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1492 0, /* source cache = data cache */
1493 1, /* msg_length */
1494 1, /* response_length (1 Oword) */
1495 0); /* eot */
1496 }
1497 }
1498
1499
1500 /**
1501 * Read float[4] constant(s) from VS constant buffer.
1502 * For relative addressing, two float[4] constants will be read into 'dest'.
1503 * Otherwise, one float[4] constant will be read into the lower half of 'dest'.
1504 */
1505 void brw_dp_READ_4_vs(struct brw_compile *p,
1506 struct brw_reg dest,
1507 GLuint location,
1508 GLuint bind_table_index)
1509 {
1510 struct brw_instruction *insn;
1511 GLuint msg_reg_nr = 1;
1512 struct brw_reg b;
1513
1514 /*
1515 printf("vs const read msg, location %u, msg_reg_nr %d\n",
1516 location, msg_reg_nr);
1517 */
1518
1519 /* Setup MRF[1] with location/offset into const buffer */
1520 brw_push_insn_state(p);
1521 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1522 brw_set_mask_control(p, BRW_MASK_DISABLE);
1523 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1524
1525 /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
1526 * when the docs say only dword[2] should be set. Hmmm. But it works.
1527 */
1528 b = brw_message_reg(msg_reg_nr);
1529 b = retype(b, BRW_REGISTER_TYPE_UD);
1530 /*b = get_element_ud(b, 2);*/
1531 brw_MOV(p, b, brw_imm_ud(location));
1532
1533 brw_pop_insn_state(p);
1534
1535 insn = next_insn(p, BRW_OPCODE_SEND);
1536
1537 insn->header.predicate_control = BRW_PREDICATE_NONE;
1538 insn->header.compression_control = BRW_COMPRESSION_NONE;
1539 insn->header.destreg__conditionalmod = msg_reg_nr;
1540 insn->header.mask_control = BRW_MASK_DISABLE;
1541
1542 brw_set_dest(insn, dest);
1543 brw_set_src0(insn, brw_null_reg());
1544
1545 brw_set_dp_read_message(p->brw,
1546 insn,
1547 bind_table_index,
1548 0,
1549 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1550 0, /* source cache = data cache */
1551 1, /* msg_length */
1552 1, /* response_length (1 Oword) */
1553 0); /* eot */
1554 }
1555
1556 /**
1557 * Read a float[4] constant per vertex from VS constant buffer, with
1558 * relative addressing.
1559 */
1560 void brw_dp_READ_4_vs_relative(struct brw_compile *p,
1561 struct brw_reg dest,
1562 struct brw_reg addr_reg,
1563 GLuint offset,
1564 GLuint bind_table_index)
1565 {
1566 struct intel_context *intel = &p->brw->intel;
1567 int msg_type;
1568
1569 /* Setup MRF[1] with offset into const buffer */
1570 brw_push_insn_state(p);
1571 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1572 brw_set_mask_control(p, BRW_MASK_DISABLE);
1573 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1574
1575 /* M1.0 is block offset 0, M1.4 is block offset 1, all other
1576 * fields ignored.
1577 */
1578 brw_ADD(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD),
1579 addr_reg, brw_imm_d(offset));
1580 brw_pop_insn_state(p);
1581
1582 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1583
1584 insn->header.predicate_control = BRW_PREDICATE_NONE;
1585 insn->header.compression_control = BRW_COMPRESSION_NONE;
1586 insn->header.destreg__conditionalmod = 0;
1587 insn->header.mask_control = BRW_MASK_DISABLE;
1588
1589 brw_set_dest(insn, dest);
1590 brw_set_src0(insn, brw_vec8_grf(0, 0));
1591
1592 if (intel->gen == 6)
1593 msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
1594 else if (intel->gen == 5 || intel->is_g4x)
1595 msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
1596 else
1597 msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
1598
1599 brw_set_dp_read_message(p->brw,
1600 insn,
1601 bind_table_index,
1602 BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
1603 msg_type,
1604 0, /* source cache = data cache */
1605 2, /* msg_length */
1606 1, /* response_length */
1607 0); /* eot */
1608 }
1609
1610
1611
1612 void brw_fb_WRITE(struct brw_compile *p,
1613 int dispatch_width,
1614 struct brw_reg dest,
1615 GLuint msg_reg_nr,
1616 struct brw_reg src0,
1617 GLuint binding_table_index,
1618 GLuint msg_length,
1619 GLuint response_length,
1620 GLboolean eot)
1621 {
1622 struct intel_context *intel = &p->brw->intel;
1623 struct brw_instruction *insn;
1624 GLuint msg_control, msg_type;
1625 GLboolean header_present = GL_TRUE;
1626
1627 insn = next_insn(p, BRW_OPCODE_SEND);
1628 insn->header.predicate_control = 0; /* XXX */
1629 insn->header.compression_control = BRW_COMPRESSION_NONE;
1630
1631 if (intel->gen >= 6) {
1632 if (msg_length == 4)
1633 header_present = GL_FALSE;
1634
1635 /* headerless version, just submit color payload */
1636 src0 = brw_message_reg(msg_reg_nr);
1637
1638 msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE_GEN6;
1639 } else {
1640 insn->header.destreg__conditionalmod = msg_reg_nr;
1641
1642 msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
1643 }
1644
1645 if (dispatch_width == 16)
1646 msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
1647 else
1648 msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01;
1649
1650 brw_set_dest(insn, dest);
1651 brw_set_src0(insn, src0);
1652 brw_set_dp_write_message(p->brw,
1653 insn,
1654 binding_table_index,
1655 msg_control,
1656 msg_type,
1657 msg_length,
1658 header_present,
1659 1, /* pixel scoreboard */
1660 response_length,
1661 eot,
1662 0 /* send_commit_msg */);
1663 }
1664
1665
1666 /**
1667 * Texture sample instruction.
1668 * Note: the msg_type plus msg_length values determine exactly what kind
1669 * of sampling operation is performed. See volume 4, page 161 of docs.
1670 */
1671 void brw_SAMPLE(struct brw_compile *p,
1672 struct brw_reg dest,
1673 GLuint msg_reg_nr,
1674 struct brw_reg src0,
1675 GLuint binding_table_index,
1676 GLuint sampler,
1677 GLuint writemask,
1678 GLuint msg_type,
1679 GLuint response_length,
1680 GLuint msg_length,
1681 GLboolean eot,
1682 GLuint header_present,
1683 GLuint simd_mode)
1684 {
1685 struct intel_context *intel = &p->brw->intel;
1686 GLboolean need_stall = 0;
1687
1688 if (writemask == 0) {
1689 /*printf("%s: zero writemask??\n", __FUNCTION__); */
1690 return;
1691 }
1692
1693 /* Hardware doesn't do destination dependency checking on send
1694 * instructions properly. Add a workaround which generates the
1695 * dependency by other means. In practice it seems like this bug
1696 * only crops up for texture samples, and only where registers are
1697 * written by the send and then written again later without being
1698 * read in between. Luckily for us, we already track that
1699 * information and use it to modify the writemask for the
1700 * instruction, so that is a guide for whether a workaround is
1701 * needed.
1702 */
1703 if (writemask != WRITEMASK_XYZW) {
1704 GLuint dst_offset = 0;
1705 GLuint i, newmask = 0, len = 0;
1706
1707 for (i = 0; i < 4; i++) {
1708 if (writemask & (1<<i))
1709 break;
1710 dst_offset += 2;
1711 }
1712 for (; i < 4; i++) {
1713 if (!(writemask & (1<<i)))
1714 break;
1715 newmask |= 1<<i;
1716 len++;
1717 }
1718
1719 if (newmask != writemask) {
1720 need_stall = 1;
1721 /* printf("need stall %x %x\n", newmask , writemask); */
1722 }
1723 else {
1724 GLboolean dispatch_16 = GL_FALSE;
1725
1726 struct brw_reg m1 = brw_message_reg(msg_reg_nr);
1727
1728 guess_execution_size(p->current, dest);
1729 if (p->current->header.execution_size == BRW_EXECUTE_16)
1730 dispatch_16 = GL_TRUE;
1731
1732 newmask = ~newmask & WRITEMASK_XYZW;
1733
1734 brw_push_insn_state(p);
1735
1736 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1737 brw_set_mask_control(p, BRW_MASK_DISABLE);
1738
1739 brw_MOV(p, m1, brw_vec8_grf(0,0));
1740 brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12));
1741
1742 brw_pop_insn_state(p);
1743
1744 src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
1745 dest = offset(dest, dst_offset);
1746
1747 /* For 16-wide dispatch, masked channels are skipped in the
1748 * response. For 8-wide, masked channels still take up slots,
1749 * and are just not written to.
1750 */
1751 if (dispatch_16)
1752 response_length = len * 2;
1753 }
1754 }
1755
1756 {
1757 struct brw_instruction *insn;
1758
1759 /* Sandybridge doesn't have the implied move for SENDs,
1760 * and the first message register index comes from src0.
1761 */
1762 if (intel->gen >= 6) {
1763 brw_push_insn_state(p);
1764 brw_set_mask_control( p, BRW_MASK_DISABLE );
1765 /* m1 contains header? */
1766 brw_MOV(p, brw_message_reg(msg_reg_nr), src0);
1767 brw_pop_insn_state(p);
1768 src0 = brw_message_reg(msg_reg_nr);
1769 }
1770
1771 insn = next_insn(p, BRW_OPCODE_SEND);
1772 insn->header.predicate_control = 0; /* XXX */
1773 insn->header.compression_control = BRW_COMPRESSION_NONE;
1774 if (intel->gen < 6)
1775 insn->header.destreg__conditionalmod = msg_reg_nr;
1776
1777 brw_set_dest(insn, dest);
1778 brw_set_src0(insn, src0);
1779 brw_set_sampler_message(p->brw, insn,
1780 binding_table_index,
1781 sampler,
1782 msg_type,
1783 response_length,
1784 msg_length,
1785 eot,
1786 header_present,
1787 simd_mode);
1788 }
1789
1790 if (need_stall) {
1791 struct brw_reg reg = vec8(offset(dest, response_length-1));
1792
1793 /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 }
1794 */
1795 brw_push_insn_state(p);
1796 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1797 brw_MOV(p, reg, reg);
1798 brw_pop_insn_state(p);
1799 }
1800
1801 }
1802
1803 /* All these variables are pretty confusing - we might be better off
1804 * using bitmasks and macros for this, in the old style. Or perhaps
1805 * just having the caller instantiate the fields in dword3 itself.
1806 */
1807 void brw_urb_WRITE(struct brw_compile *p,
1808 struct brw_reg dest,
1809 GLuint msg_reg_nr,
1810 struct brw_reg src0,
1811 GLboolean allocate,
1812 GLboolean used,
1813 GLuint msg_length,
1814 GLuint response_length,
1815 GLboolean eot,
1816 GLboolean writes_complete,
1817 GLuint offset,
1818 GLuint swizzle)
1819 {
1820 struct intel_context *intel = &p->brw->intel;
1821 struct brw_instruction *insn;
1822
1823 /* Sandybridge doesn't have the implied move for SENDs,
1824 * and the first message register index comes from src0.
1825 */
1826 if (intel->gen >= 6) {
1827 brw_push_insn_state(p);
1828 brw_set_mask_control( p, BRW_MASK_DISABLE );
1829 brw_MOV(p, brw_message_reg(msg_reg_nr), src0);
1830 brw_pop_insn_state(p);
1831 src0 = brw_message_reg(msg_reg_nr);
1832 }
1833
1834 insn = next_insn(p, BRW_OPCODE_SEND);
1835
1836 assert(msg_length < BRW_MAX_MRF);
1837
1838 brw_set_dest(insn, dest);
1839 brw_set_src0(insn, src0);
1840 brw_set_src1(insn, brw_imm_d(0));
1841
1842 if (intel->gen < 6)
1843 insn->header.destreg__conditionalmod = msg_reg_nr;
1844
1845 brw_set_urb_message(p->brw,
1846 insn,
1847 allocate,
1848 used,
1849 msg_length,
1850 response_length,
1851 eot,
1852 writes_complete,
1853 offset,
1854 swizzle);
1855 }
1856
1857 void brw_ff_sync(struct brw_compile *p,
1858 struct brw_reg dest,
1859 GLuint msg_reg_nr,
1860 struct brw_reg src0,
1861 GLboolean allocate,
1862 GLuint response_length,
1863 GLboolean eot)
1864 {
1865 struct intel_context *intel = &p->brw->intel;
1866 struct brw_instruction *insn;
1867
1868 /* Sandybridge doesn't have the implied move for SENDs,
1869 * and the first message register index comes from src0.
1870 */
1871 if (intel->gen >= 6) {
1872 brw_push_insn_state(p);
1873 brw_set_mask_control( p, BRW_MASK_DISABLE );
1874 brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD),
1875 retype(src0, BRW_REGISTER_TYPE_UD));
1876 brw_pop_insn_state(p);
1877 src0 = brw_message_reg(msg_reg_nr);
1878 }
1879
1880 insn = next_insn(p, BRW_OPCODE_SEND);
1881 brw_set_dest(insn, dest);
1882 brw_set_src0(insn, src0);
1883 brw_set_src1(insn, brw_imm_d(0));
1884
1885 if (intel->gen < 6)
1886 insn->header.destreg__conditionalmod = msg_reg_nr;
1887
1888 brw_set_ff_sync_message(p->brw,
1889 insn,
1890 allocate,
1891 response_length,
1892 eot);
1893 }