i965: Set the source operand types for gen6 if/else/endif to integer.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_eu_emit.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "brw_context.h"
34 #include "brw_defines.h"
35 #include "brw_eu.h"
36
37
38
39
40 /***********************************************************************
41 * Internal helper for constructing instructions
42 */
43
44 static void guess_execution_size( struct brw_instruction *insn,
45 struct brw_reg reg )
46 {
47 if (reg.width == BRW_WIDTH_8 &&
48 insn->header.compression_control == BRW_COMPRESSION_COMPRESSED)
49 insn->header.execution_size = BRW_EXECUTE_16;
50 else
51 insn->header.execution_size = reg.width; /* note - definitions are compatible */
52 }
53
54
55 static void brw_set_dest( struct brw_instruction *insn,
56 struct brw_reg dest )
57 {
58 if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
59 dest.file != BRW_MESSAGE_REGISTER_FILE)
60 assert(dest.nr < 128);
61
62 insn->bits1.da1.dest_reg_file = dest.file;
63 insn->bits1.da1.dest_reg_type = dest.type;
64 insn->bits1.da1.dest_address_mode = dest.address_mode;
65
66 if (dest.address_mode == BRW_ADDRESS_DIRECT) {
67 insn->bits1.da1.dest_reg_nr = dest.nr;
68
69 if (insn->header.access_mode == BRW_ALIGN_1) {
70 insn->bits1.da1.dest_subreg_nr = dest.subnr;
71 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
72 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
73 insn->bits1.da1.dest_horiz_stride = dest.hstride;
74 }
75 else {
76 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
77 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
78 /* even ignored in da16, still need to set as '01' */
79 insn->bits1.da16.dest_horiz_stride = 1;
80 }
81 }
82 else {
83 insn->bits1.ia1.dest_subreg_nr = dest.subnr;
84
85 /* These are different sizes in align1 vs align16:
86 */
87 if (insn->header.access_mode == BRW_ALIGN_1) {
88 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
89 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
90 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
91 insn->bits1.ia1.dest_horiz_stride = dest.hstride;
92 }
93 else {
94 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
95 /* even ignored in da16, still need to set as '01' */
96 insn->bits1.ia16.dest_horiz_stride = 1;
97 }
98 }
99
100 /* NEW: Set the execution size based on dest.width and
101 * insn->compression_control:
102 */
103 guess_execution_size(insn, dest);
104 }
105
106 extern int reg_type_size[];
107
108 static void
109 validate_reg(struct brw_instruction *insn, struct brw_reg reg)
110 {
111 int hstride_for_reg[] = {0, 1, 2, 4};
112 int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
113 int width_for_reg[] = {1, 2, 4, 8, 16};
114 int execsize_for_reg[] = {1, 2, 4, 8, 16};
115 int width, hstride, vstride, execsize;
116
117 if (reg.file == BRW_IMMEDIATE_VALUE) {
118 /* 3.3.6: Region Parameters. Restriction: Immediate vectors
119 * mean the destination has to be 128-bit aligned and the
120 * destination horiz stride has to be a word.
121 */
122 if (reg.type == BRW_REGISTER_TYPE_V) {
123 assert(hstride_for_reg[insn->bits1.da1.dest_horiz_stride] *
124 reg_type_size[insn->bits1.da1.dest_reg_type] == 2);
125 }
126
127 return;
128 }
129
130 if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE &&
131 reg.file == BRW_ARF_NULL)
132 return;
133
134 assert(reg.hstride >= 0 && reg.hstride < Elements(hstride_for_reg));
135 hstride = hstride_for_reg[reg.hstride];
136
137 if (reg.vstride == 0xf) {
138 vstride = -1;
139 } else {
140 assert(reg.vstride >= 0 && reg.vstride < Elements(vstride_for_reg));
141 vstride = vstride_for_reg[reg.vstride];
142 }
143
144 assert(reg.width >= 0 && reg.width < Elements(width_for_reg));
145 width = width_for_reg[reg.width];
146
147 assert(insn->header.execution_size >= 0 &&
148 insn->header.execution_size < Elements(execsize_for_reg));
149 execsize = execsize_for_reg[insn->header.execution_size];
150
151 /* Restrictions from 3.3.10: Register Region Restrictions. */
152 /* 3. */
153 assert(execsize >= width);
154
155 /* 4. */
156 if (execsize == width && hstride != 0) {
157 assert(vstride == -1 || vstride == width * hstride);
158 }
159
160 /* 5. */
161 if (execsize == width && hstride == 0) {
162 /* no restriction on vstride. */
163 }
164
165 /* 6. */
166 if (width == 1) {
167 assert(hstride == 0);
168 }
169
170 /* 7. */
171 if (execsize == 1 && width == 1) {
172 assert(hstride == 0);
173 assert(vstride == 0);
174 }
175
176 /* 8. */
177 if (vstride == 0 && hstride == 0) {
178 assert(width == 1);
179 }
180
181 /* 10. Check destination issues. */
182 }
183
184 static void brw_set_src0( struct brw_instruction *insn,
185 struct brw_reg reg )
186 {
187 if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
188 assert(reg.nr < 128);
189
190 validate_reg(insn, reg);
191
192 insn->bits1.da1.src0_reg_file = reg.file;
193 insn->bits1.da1.src0_reg_type = reg.type;
194 insn->bits2.da1.src0_abs = reg.abs;
195 insn->bits2.da1.src0_negate = reg.negate;
196 insn->bits2.da1.src0_address_mode = reg.address_mode;
197
198 if (reg.file == BRW_IMMEDIATE_VALUE) {
199 insn->bits3.ud = reg.dw1.ud;
200
201 /* Required to set some fields in src1 as well:
202 */
203 insn->bits1.da1.src1_reg_file = 0; /* arf */
204 insn->bits1.da1.src1_reg_type = reg.type;
205 }
206 else
207 {
208 if (reg.address_mode == BRW_ADDRESS_DIRECT) {
209 if (insn->header.access_mode == BRW_ALIGN_1) {
210 insn->bits2.da1.src0_subreg_nr = reg.subnr;
211 insn->bits2.da1.src0_reg_nr = reg.nr;
212 }
213 else {
214 insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
215 insn->bits2.da16.src0_reg_nr = reg.nr;
216 }
217 }
218 else {
219 insn->bits2.ia1.src0_subreg_nr = reg.subnr;
220
221 if (insn->header.access_mode == BRW_ALIGN_1) {
222 insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
223 }
224 else {
225 insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
226 }
227 }
228
229 if (insn->header.access_mode == BRW_ALIGN_1) {
230 if (reg.width == BRW_WIDTH_1 &&
231 insn->header.execution_size == BRW_EXECUTE_1) {
232 insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
233 insn->bits2.da1.src0_width = BRW_WIDTH_1;
234 insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
235 }
236 else {
237 insn->bits2.da1.src0_horiz_stride = reg.hstride;
238 insn->bits2.da1.src0_width = reg.width;
239 insn->bits2.da1.src0_vert_stride = reg.vstride;
240 }
241 }
242 else {
243 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
244 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
245 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
246 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
247
248 /* This is an oddity of the fact we're using the same
249 * descriptions for registers in align_16 as align_1:
250 */
251 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
252 insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
253 else
254 insn->bits2.da16.src0_vert_stride = reg.vstride;
255 }
256 }
257 }
258
259
260 void brw_set_src1( struct brw_instruction *insn,
261 struct brw_reg reg )
262 {
263 assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
264
265 assert(reg.nr < 128);
266
267 validate_reg(insn, reg);
268
269 insn->bits1.da1.src1_reg_file = reg.file;
270 insn->bits1.da1.src1_reg_type = reg.type;
271 insn->bits3.da1.src1_abs = reg.abs;
272 insn->bits3.da1.src1_negate = reg.negate;
273
274 /* Only src1 can be immediate in two-argument instructions.
275 */
276 assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
277
278 if (reg.file == BRW_IMMEDIATE_VALUE) {
279 insn->bits3.ud = reg.dw1.ud;
280 }
281 else {
282 /* This is a hardware restriction, which may or may not be lifted
283 * in the future:
284 */
285 assert (reg.address_mode == BRW_ADDRESS_DIRECT);
286 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
287
288 if (insn->header.access_mode == BRW_ALIGN_1) {
289 insn->bits3.da1.src1_subreg_nr = reg.subnr;
290 insn->bits3.da1.src1_reg_nr = reg.nr;
291 }
292 else {
293 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
294 insn->bits3.da16.src1_reg_nr = reg.nr;
295 }
296
297 if (insn->header.access_mode == BRW_ALIGN_1) {
298 if (reg.width == BRW_WIDTH_1 &&
299 insn->header.execution_size == BRW_EXECUTE_1) {
300 insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
301 insn->bits3.da1.src1_width = BRW_WIDTH_1;
302 insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
303 }
304 else {
305 insn->bits3.da1.src1_horiz_stride = reg.hstride;
306 insn->bits3.da1.src1_width = reg.width;
307 insn->bits3.da1.src1_vert_stride = reg.vstride;
308 }
309 }
310 else {
311 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
312 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
313 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
314 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
315
316 /* This is an oddity of the fact we're using the same
317 * descriptions for registers in align_16 as align_1:
318 */
319 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
320 insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
321 else
322 insn->bits3.da16.src1_vert_stride = reg.vstride;
323 }
324 }
325 }
326
327
328
329 static void brw_set_math_message( struct brw_context *brw,
330 struct brw_instruction *insn,
331 GLuint msg_length,
332 GLuint response_length,
333 GLuint function,
334 GLuint integer_type,
335 GLboolean low_precision,
336 GLboolean saturate,
337 GLuint dataType )
338 {
339 struct intel_context *intel = &brw->intel;
340 brw_set_src1(insn, brw_imm_d(0));
341
342 if (intel->gen == 5) {
343 insn->bits3.math_gen5.function = function;
344 insn->bits3.math_gen5.int_type = integer_type;
345 insn->bits3.math_gen5.precision = low_precision;
346 insn->bits3.math_gen5.saturate = saturate;
347 insn->bits3.math_gen5.data_type = dataType;
348 insn->bits3.math_gen5.snapshot = 0;
349 insn->bits3.math_gen5.header_present = 0;
350 insn->bits3.math_gen5.response_length = response_length;
351 insn->bits3.math_gen5.msg_length = msg_length;
352 insn->bits3.math_gen5.end_of_thread = 0;
353 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_MATH;
354 insn->bits2.send_gen5.end_of_thread = 0;
355 } else {
356 insn->bits3.math.function = function;
357 insn->bits3.math.int_type = integer_type;
358 insn->bits3.math.precision = low_precision;
359 insn->bits3.math.saturate = saturate;
360 insn->bits3.math.data_type = dataType;
361 insn->bits3.math.response_length = response_length;
362 insn->bits3.math.msg_length = msg_length;
363 insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH;
364 insn->bits3.math.end_of_thread = 0;
365 }
366 }
367
368
369 static void brw_set_ff_sync_message(struct brw_context *brw,
370 struct brw_instruction *insn,
371 GLboolean allocate,
372 GLuint response_length,
373 GLboolean end_of_thread)
374 {
375 struct intel_context *intel = &brw->intel;
376 brw_set_src1(insn, brw_imm_d(0));
377
378 insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */
379 insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */
380 insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */
381 insn->bits3.urb_gen5.allocate = allocate;
382 insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */
383 insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */
384 insn->bits3.urb_gen5.header_present = 1;
385 insn->bits3.urb_gen5.response_length = response_length; /* may be 1 or 0 */
386 insn->bits3.urb_gen5.msg_length = 1;
387 insn->bits3.urb_gen5.end_of_thread = end_of_thread;
388 if (intel->gen >= 6) {
389 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB;
390 } else {
391 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB;
392 insn->bits2.send_gen5.end_of_thread = end_of_thread;
393 }
394 }
395
396 static void brw_set_urb_message( struct brw_context *brw,
397 struct brw_instruction *insn,
398 GLboolean allocate,
399 GLboolean used,
400 GLuint msg_length,
401 GLuint response_length,
402 GLboolean end_of_thread,
403 GLboolean complete,
404 GLuint offset,
405 GLuint swizzle_control )
406 {
407 struct intel_context *intel = &brw->intel;
408 brw_set_src1(insn, brw_imm_d(0));
409
410 if (intel->gen >= 5) {
411 insn->bits3.urb_gen5.opcode = 0; /* ? */
412 insn->bits3.urb_gen5.offset = offset;
413 insn->bits3.urb_gen5.swizzle_control = swizzle_control;
414 insn->bits3.urb_gen5.allocate = allocate;
415 insn->bits3.urb_gen5.used = used; /* ? */
416 insn->bits3.urb_gen5.complete = complete;
417 insn->bits3.urb_gen5.header_present = 1;
418 insn->bits3.urb_gen5.response_length = response_length;
419 insn->bits3.urb_gen5.msg_length = msg_length;
420 insn->bits3.urb_gen5.end_of_thread = end_of_thread;
421 if (intel->gen >= 6) {
422 /* For SNB, the SFID bits moved to the condmod bits, and
423 * EOT stayed in bits3 above. Does the EOT bit setting
424 * below on Ironlake even do anything?
425 */
426 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB;
427 } else {
428 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB;
429 insn->bits2.send_gen5.end_of_thread = end_of_thread;
430 }
431 } else {
432 insn->bits3.urb.opcode = 0; /* ? */
433 insn->bits3.urb.offset = offset;
434 insn->bits3.urb.swizzle_control = swizzle_control;
435 insn->bits3.urb.allocate = allocate;
436 insn->bits3.urb.used = used; /* ? */
437 insn->bits3.urb.complete = complete;
438 insn->bits3.urb.response_length = response_length;
439 insn->bits3.urb.msg_length = msg_length;
440 insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB;
441 insn->bits3.urb.end_of_thread = end_of_thread;
442 }
443 }
444
445 static void brw_set_dp_write_message( struct brw_context *brw,
446 struct brw_instruction *insn,
447 GLuint binding_table_index,
448 GLuint msg_control,
449 GLuint msg_type,
450 GLuint msg_length,
451 GLboolean header_present,
452 GLuint pixel_scoreboard_clear,
453 GLuint response_length,
454 GLuint end_of_thread,
455 GLuint send_commit_msg)
456 {
457 struct intel_context *intel = &brw->intel;
458 brw_set_src1(insn, brw_imm_ud(0));
459
460 if (intel->gen >= 6) {
461 insn->bits3.dp_render_cache.binding_table_index = binding_table_index;
462 insn->bits3.dp_render_cache.msg_control = msg_control;
463 insn->bits3.dp_render_cache.pixel_scoreboard_clear = pixel_scoreboard_clear;
464 insn->bits3.dp_render_cache.msg_type = msg_type;
465 insn->bits3.dp_render_cache.send_commit_msg = send_commit_msg;
466 insn->bits3.dp_render_cache.header_present = header_present;
467 insn->bits3.dp_render_cache.response_length = response_length;
468 insn->bits3.dp_render_cache.msg_length = msg_length;
469 insn->bits3.dp_render_cache.end_of_thread = end_of_thread;
470 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
471 /* XXX really need below? */
472 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
473 insn->bits2.send_gen5.end_of_thread = end_of_thread;
474 } else if (intel->gen == 5) {
475 insn->bits3.dp_write_gen5.binding_table_index = binding_table_index;
476 insn->bits3.dp_write_gen5.msg_control = msg_control;
477 insn->bits3.dp_write_gen5.pixel_scoreboard_clear = pixel_scoreboard_clear;
478 insn->bits3.dp_write_gen5.msg_type = msg_type;
479 insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg;
480 insn->bits3.dp_write_gen5.header_present = header_present;
481 insn->bits3.dp_write_gen5.response_length = response_length;
482 insn->bits3.dp_write_gen5.msg_length = msg_length;
483 insn->bits3.dp_write_gen5.end_of_thread = end_of_thread;
484 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
485 insn->bits2.send_gen5.end_of_thread = end_of_thread;
486 } else {
487 insn->bits3.dp_write.binding_table_index = binding_table_index;
488 insn->bits3.dp_write.msg_control = msg_control;
489 insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear;
490 insn->bits3.dp_write.msg_type = msg_type;
491 insn->bits3.dp_write.send_commit_msg = send_commit_msg;
492 insn->bits3.dp_write.response_length = response_length;
493 insn->bits3.dp_write.msg_length = msg_length;
494 insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
495 insn->bits3.dp_write.end_of_thread = end_of_thread;
496 }
497 }
498
499 static void brw_set_dp_read_message( struct brw_context *brw,
500 struct brw_instruction *insn,
501 GLuint binding_table_index,
502 GLuint msg_control,
503 GLuint msg_type,
504 GLuint target_cache,
505 GLuint msg_length,
506 GLuint response_length,
507 GLuint end_of_thread )
508 {
509 struct intel_context *intel = &brw->intel;
510 brw_set_src1(insn, brw_imm_d(0));
511
512 if (intel->gen == 5) {
513 insn->bits3.dp_read_gen5.binding_table_index = binding_table_index;
514 insn->bits3.dp_read_gen5.msg_control = msg_control;
515 insn->bits3.dp_read_gen5.msg_type = msg_type;
516 insn->bits3.dp_read_gen5.target_cache = target_cache;
517 insn->bits3.dp_read_gen5.header_present = 1;
518 insn->bits3.dp_read_gen5.response_length = response_length;
519 insn->bits3.dp_read_gen5.msg_length = msg_length;
520 insn->bits3.dp_read_gen5.pad1 = 0;
521 insn->bits3.dp_read_gen5.end_of_thread = end_of_thread;
522 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ;
523 insn->bits2.send_gen5.end_of_thread = end_of_thread;
524 } else {
525 insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
526 insn->bits3.dp_read.msg_control = msg_control; /*8:11*/
527 insn->bits3.dp_read.msg_type = msg_type; /*12:13*/
528 insn->bits3.dp_read.target_cache = target_cache; /*14:15*/
529 insn->bits3.dp_read.response_length = response_length; /*16:19*/
530 insn->bits3.dp_read.msg_length = msg_length; /*20:23*/
531 insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/
532 insn->bits3.dp_read.pad1 = 0; /*28:30*/
533 insn->bits3.dp_read.end_of_thread = end_of_thread; /*31*/
534 }
535 }
536
537 static void brw_set_sampler_message(struct brw_context *brw,
538 struct brw_instruction *insn,
539 GLuint binding_table_index,
540 GLuint sampler,
541 GLuint msg_type,
542 GLuint response_length,
543 GLuint msg_length,
544 GLboolean eot,
545 GLuint header_present,
546 GLuint simd_mode)
547 {
548 struct intel_context *intel = &brw->intel;
549 assert(eot == 0);
550 brw_set_src1(insn, brw_imm_d(0));
551
552 if (intel->gen >= 5) {
553 insn->bits3.sampler_gen5.binding_table_index = binding_table_index;
554 insn->bits3.sampler_gen5.sampler = sampler;
555 insn->bits3.sampler_gen5.msg_type = msg_type;
556 insn->bits3.sampler_gen5.simd_mode = simd_mode;
557 insn->bits3.sampler_gen5.header_present = header_present;
558 insn->bits3.sampler_gen5.response_length = response_length;
559 insn->bits3.sampler_gen5.msg_length = msg_length;
560 insn->bits3.sampler_gen5.end_of_thread = eot;
561 if (intel->gen >= 6)
562 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_SAMPLER;
563 else {
564 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_SAMPLER;
565 insn->bits2.send_gen5.end_of_thread = eot;
566 }
567 } else if (intel->is_g4x) {
568 insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
569 insn->bits3.sampler_g4x.sampler = sampler;
570 insn->bits3.sampler_g4x.msg_type = msg_type;
571 insn->bits3.sampler_g4x.response_length = response_length;
572 insn->bits3.sampler_g4x.msg_length = msg_length;
573 insn->bits3.sampler_g4x.end_of_thread = eot;
574 insn->bits3.sampler_g4x.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
575 } else {
576 insn->bits3.sampler.binding_table_index = binding_table_index;
577 insn->bits3.sampler.sampler = sampler;
578 insn->bits3.sampler.msg_type = msg_type;
579 insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
580 insn->bits3.sampler.response_length = response_length;
581 insn->bits3.sampler.msg_length = msg_length;
582 insn->bits3.sampler.end_of_thread = eot;
583 insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
584 }
585 }
586
587
588
589 static struct brw_instruction *next_insn( struct brw_compile *p,
590 GLuint opcode )
591 {
592 struct brw_instruction *insn;
593
594 assert(p->nr_insn + 1 < BRW_EU_MAX_INSN);
595
596 insn = &p->store[p->nr_insn++];
597 memcpy(insn, p->current, sizeof(*insn));
598
599 /* Reset this one-shot flag:
600 */
601
602 if (p->current->header.destreg__conditionalmod) {
603 p->current->header.destreg__conditionalmod = 0;
604 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
605 }
606
607 insn->header.opcode = opcode;
608 return insn;
609 }
610
611
612 static struct brw_instruction *brw_alu1( struct brw_compile *p,
613 GLuint opcode,
614 struct brw_reg dest,
615 struct brw_reg src )
616 {
617 struct brw_instruction *insn = next_insn(p, opcode);
618 brw_set_dest(insn, dest);
619 brw_set_src0(insn, src);
620 return insn;
621 }
622
623 static struct brw_instruction *brw_alu2(struct brw_compile *p,
624 GLuint opcode,
625 struct brw_reg dest,
626 struct brw_reg src0,
627 struct brw_reg src1 )
628 {
629 struct brw_instruction *insn = next_insn(p, opcode);
630 brw_set_dest(insn, dest);
631 brw_set_src0(insn, src0);
632 brw_set_src1(insn, src1);
633 return insn;
634 }
635
636
637 /***********************************************************************
638 * Convenience routines.
639 */
640 #define ALU1(OP) \
641 struct brw_instruction *brw_##OP(struct brw_compile *p, \
642 struct brw_reg dest, \
643 struct brw_reg src0) \
644 { \
645 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
646 }
647
648 #define ALU2(OP) \
649 struct brw_instruction *brw_##OP(struct brw_compile *p, \
650 struct brw_reg dest, \
651 struct brw_reg src0, \
652 struct brw_reg src1) \
653 { \
654 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
655 }
656
657 /* Rounding operations (other than RNDD) require two instructions - the first
658 * stores a rounded value (possibly the wrong way) in the dest register, but
659 * also sets a per-channel "increment bit" in the flag register. A predicated
660 * add of 1.0 fixes dest to contain the desired result.
661 */
662 #define ROUND(OP) \
663 void brw_##OP(struct brw_compile *p, \
664 struct brw_reg dest, \
665 struct brw_reg src) \
666 { \
667 struct brw_instruction *rnd, *add; \
668 rnd = next_insn(p, BRW_OPCODE_##OP); \
669 brw_set_dest(rnd, dest); \
670 brw_set_src0(rnd, src); \
671 rnd->header.destreg__conditionalmod = 0x7; /* turn on round-increments */ \
672 \
673 add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \
674 add->header.predicate_control = BRW_PREDICATE_NORMAL; \
675 }
676
677
678 ALU1(MOV)
679 ALU2(SEL)
680 ALU1(NOT)
681 ALU2(AND)
682 ALU2(OR)
683 ALU2(XOR)
684 ALU2(SHR)
685 ALU2(SHL)
686 ALU2(RSR)
687 ALU2(RSL)
688 ALU2(ASR)
689 ALU1(FRC)
690 ALU1(RNDD)
691 ALU2(MAC)
692 ALU2(MACH)
693 ALU1(LZD)
694 ALU2(DP4)
695 ALU2(DPH)
696 ALU2(DP3)
697 ALU2(DP2)
698 ALU2(LINE)
699 ALU2(PLN)
700
701
702 ROUND(RNDZ)
703 ROUND(RNDE)
704
705
706 struct brw_instruction *brw_ADD(struct brw_compile *p,
707 struct brw_reg dest,
708 struct brw_reg src0,
709 struct brw_reg src1)
710 {
711 /* 6.2.2: add */
712 if (src0.type == BRW_REGISTER_TYPE_F ||
713 (src0.file == BRW_IMMEDIATE_VALUE &&
714 src0.type == BRW_REGISTER_TYPE_VF)) {
715 assert(src1.type != BRW_REGISTER_TYPE_UD);
716 assert(src1.type != BRW_REGISTER_TYPE_D);
717 }
718
719 if (src1.type == BRW_REGISTER_TYPE_F ||
720 (src1.file == BRW_IMMEDIATE_VALUE &&
721 src1.type == BRW_REGISTER_TYPE_VF)) {
722 assert(src0.type != BRW_REGISTER_TYPE_UD);
723 assert(src0.type != BRW_REGISTER_TYPE_D);
724 }
725
726 return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1);
727 }
728
729 struct brw_instruction *brw_MUL(struct brw_compile *p,
730 struct brw_reg dest,
731 struct brw_reg src0,
732 struct brw_reg src1)
733 {
734 /* 6.32.38: mul */
735 if (src0.type == BRW_REGISTER_TYPE_D ||
736 src0.type == BRW_REGISTER_TYPE_UD ||
737 src1.type == BRW_REGISTER_TYPE_D ||
738 src1.type == BRW_REGISTER_TYPE_UD) {
739 assert(dest.type != BRW_REGISTER_TYPE_F);
740 }
741
742 if (src0.type == BRW_REGISTER_TYPE_F ||
743 (src0.file == BRW_IMMEDIATE_VALUE &&
744 src0.type == BRW_REGISTER_TYPE_VF)) {
745 assert(src1.type != BRW_REGISTER_TYPE_UD);
746 assert(src1.type != BRW_REGISTER_TYPE_D);
747 }
748
749 if (src1.type == BRW_REGISTER_TYPE_F ||
750 (src1.file == BRW_IMMEDIATE_VALUE &&
751 src1.type == BRW_REGISTER_TYPE_VF)) {
752 assert(src0.type != BRW_REGISTER_TYPE_UD);
753 assert(src0.type != BRW_REGISTER_TYPE_D);
754 }
755
756 assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE ||
757 src0.nr != BRW_ARF_ACCUMULATOR);
758 assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE ||
759 src1.nr != BRW_ARF_ACCUMULATOR);
760
761 return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1);
762 }
763
764
765 void brw_NOP(struct brw_compile *p)
766 {
767 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);
768 brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
769 brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
770 brw_set_src1(insn, brw_imm_ud(0x0));
771 }
772
773
774
775
776
777 /***********************************************************************
778 * Comparisons, if/else/endif
779 */
780
781 struct brw_instruction *brw_JMPI(struct brw_compile *p,
782 struct brw_reg dest,
783 struct brw_reg src0,
784 struct brw_reg src1)
785 {
786 struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
787
788 insn->header.execution_size = 1;
789 insn->header.compression_control = BRW_COMPRESSION_NONE;
790 insn->header.mask_control = BRW_MASK_DISABLE;
791
792 p->current->header.predicate_control = BRW_PREDICATE_NONE;
793
794 return insn;
795 }
796
797 /* EU takes the value from the flag register and pushes it onto some
798 * sort of a stack (presumably merging with any flag value already on
799 * the stack). Within an if block, the flags at the top of the stack
800 * control execution on each channel of the unit, eg. on each of the
801 * 16 pixel values in our wm programs.
802 *
803 * When the matching 'else' instruction is reached (presumably by
804 * countdown of the instruction count patched in by our ELSE/ENDIF
805 * functions), the relevent flags are inverted.
806 *
807 * When the matching 'endif' instruction is reached, the flags are
808 * popped off. If the stack is now empty, normal execution resumes.
809 *
810 * No attempt is made to deal with stack overflow (14 elements?).
811 */
812 struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size)
813 {
814 struct intel_context *intel = &p->brw->intel;
815 struct brw_instruction *insn;
816
817 if (p->single_program_flow) {
818 assert(execute_size == BRW_EXECUTE_1);
819
820 insn = next_insn(p, BRW_OPCODE_ADD);
821 insn->header.predicate_inverse = 1;
822 } else {
823 insn = next_insn(p, BRW_OPCODE_IF);
824 }
825
826 /* Override the defaults for this instruction:
827 */
828 if (intel->gen < 6) {
829 brw_set_dest(insn, brw_ip_reg());
830 brw_set_src0(insn, brw_ip_reg());
831 brw_set_src1(insn, brw_imm_d(0x0));
832 } else {
833 brw_set_dest(insn, brw_imm_w(0));
834 insn->bits1.branch_gen6.jump_count = 0;
835 brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
836 brw_set_src1(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
837 }
838
839 insn->header.execution_size = execute_size;
840 insn->header.compression_control = BRW_COMPRESSION_NONE;
841 insn->header.predicate_control = BRW_PREDICATE_NORMAL;
842 insn->header.mask_control = BRW_MASK_ENABLE;
843 if (!p->single_program_flow)
844 insn->header.thread_control = BRW_THREAD_SWITCH;
845
846 p->current->header.predicate_control = BRW_PREDICATE_NONE;
847
848 return insn;
849 }
850
851 struct brw_instruction *
852 brw_IF_gen6(struct brw_compile *p, uint32_t conditional,
853 struct brw_reg src0, struct brw_reg src1)
854 {
855 struct brw_instruction *insn;
856
857 insn = next_insn(p, BRW_OPCODE_IF);
858
859 brw_set_dest(insn, brw_imm_w(0));
860 insn->header.execution_size = BRW_EXECUTE_8;
861 insn->bits1.branch_gen6.jump_count = 0;
862 brw_set_src0(insn, src0);
863 brw_set_src1(insn, src1);
864
865 assert(insn->header.compression_control == BRW_COMPRESSION_NONE);
866 assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
867 insn->header.destreg__conditionalmod = conditional;
868
869 if (!p->single_program_flow)
870 insn->header.thread_control = BRW_THREAD_SWITCH;
871
872 return insn;
873 }
874
875 struct brw_instruction *brw_ELSE(struct brw_compile *p,
876 struct brw_instruction *if_insn)
877 {
878 struct intel_context *intel = &p->brw->intel;
879 struct brw_instruction *insn;
880 GLuint br = 1;
881
882 /* jump count is for 64bit data chunk each, so one 128bit
883 instruction requires 2 chunks. */
884 if (intel->gen >= 5)
885 br = 2;
886
887 if (p->single_program_flow) {
888 insn = next_insn(p, BRW_OPCODE_ADD);
889 } else {
890 insn = next_insn(p, BRW_OPCODE_ELSE);
891 }
892
893 if (intel->gen < 6) {
894 brw_set_dest(insn, brw_ip_reg());
895 brw_set_src0(insn, brw_ip_reg());
896 brw_set_src1(insn, brw_imm_d(0x0));
897 } else {
898 brw_set_dest(insn, brw_imm_w(0));
899 insn->bits1.branch_gen6.jump_count = 0;
900 brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
901 brw_set_src1(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
902 }
903
904 insn->header.compression_control = BRW_COMPRESSION_NONE;
905 insn->header.execution_size = if_insn->header.execution_size;
906 insn->header.mask_control = BRW_MASK_ENABLE;
907 if (!p->single_program_flow)
908 insn->header.thread_control = BRW_THREAD_SWITCH;
909
910 /* Patch the if instruction to point at this instruction.
911 */
912 if (p->single_program_flow) {
913 assert(if_insn->header.opcode == BRW_OPCODE_ADD);
914
915 if_insn->bits3.ud = (insn - if_insn + 1) * 16;
916 } else {
917 assert(if_insn->header.opcode == BRW_OPCODE_IF);
918
919 if (intel->gen < 6) {
920 if_insn->bits3.if_else.jump_count = br * (insn - if_insn);
921 if_insn->bits3.if_else.pop_count = 0;
922 if_insn->bits3.if_else.pad0 = 0;
923 } else {
924 if_insn->bits1.branch_gen6.jump_count = br * (insn - if_insn + 1);
925 }
926 }
927
928 return insn;
929 }
930
931 void brw_ENDIF(struct brw_compile *p,
932 struct brw_instruction *patch_insn)
933 {
934 struct intel_context *intel = &p->brw->intel;
935 GLuint br = 1;
936
937 if (intel->gen >= 5)
938 br = 2;
939
940 if (p->single_program_flow) {
941 /* In single program flow mode, there's no need to execute an ENDIF,
942 * since we don't need to do any stack operations, and if we're executing
943 * currently, we want to just continue executing.
944 */
945 struct brw_instruction *next = &p->store[p->nr_insn];
946
947 assert(patch_insn->header.opcode == BRW_OPCODE_ADD);
948
949 patch_insn->bits3.ud = (next - patch_insn) * 16;
950 } else {
951 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF);
952
953 if (intel->gen < 6) {
954 brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
955 brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
956 brw_set_src1(insn, brw_imm_d(0x0));
957 } else {
958 brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_W));
959 brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
960 brw_set_src1(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
961 }
962
963 insn->header.compression_control = BRW_COMPRESSION_NONE;
964 insn->header.execution_size = patch_insn->header.execution_size;
965 insn->header.mask_control = BRW_MASK_ENABLE;
966 insn->header.thread_control = BRW_THREAD_SWITCH;
967
968 if (intel->gen < 6)
969 assert(patch_insn->bits3.if_else.jump_count == 0);
970 else
971 assert(patch_insn->bits1.branch_gen6.jump_count == 0);
972
973 /* Patch the if or else instructions to point at this or the next
974 * instruction respectively.
975 */
976 if (patch_insn->header.opcode == BRW_OPCODE_IF) {
977 if (intel->gen < 6) {
978 /* Turn it into an IFF, which means no mask stack operations for
979 * all-false and jumping past the ENDIF.
980 */
981 patch_insn->header.opcode = BRW_OPCODE_IFF;
982 patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1);
983 patch_insn->bits3.if_else.pop_count = 0;
984 patch_insn->bits3.if_else.pad0 = 0;
985 } else {
986 /* As of gen6, there is no IFF and IF must point to the ENDIF. */
987 patch_insn->bits1.branch_gen6.jump_count = br * (insn - patch_insn);
988 }
989 } else {
990 assert(patch_insn->header.opcode == BRW_OPCODE_ELSE);
991 if (intel->gen < 6) {
992 /* BRW_OPCODE_ELSE pre-gen6 should point just past the
993 * matching ENDIF.
994 */
995 patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1);
996 patch_insn->bits3.if_else.pop_count = 1;
997 patch_insn->bits3.if_else.pad0 = 0;
998 } else {
999 /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
1000 patch_insn->bits1.branch_gen6.jump_count = br * (insn - patch_insn);
1001 }
1002 }
1003
1004 /* Also pop item off the stack in the endif instruction:
1005 */
1006 if (intel->gen < 6) {
1007 insn->bits3.if_else.jump_count = 0;
1008 insn->bits3.if_else.pop_count = 1;
1009 insn->bits3.if_else.pad0 = 0;
1010 } else {
1011 insn->bits1.branch_gen6.jump_count = 2;
1012 }
1013 }
1014 }
1015
1016 struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count)
1017 {
1018 struct brw_instruction *insn;
1019 insn = next_insn(p, BRW_OPCODE_BREAK);
1020 brw_set_dest(insn, brw_ip_reg());
1021 brw_set_src0(insn, brw_ip_reg());
1022 brw_set_src1(insn, brw_imm_d(0x0));
1023 insn->header.compression_control = BRW_COMPRESSION_NONE;
1024 insn->header.execution_size = BRW_EXECUTE_8;
1025 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1026 insn->bits3.if_else.pad0 = 0;
1027 insn->bits3.if_else.pop_count = pop_count;
1028 return insn;
1029 }
1030
1031 struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count)
1032 {
1033 struct brw_instruction *insn;
1034 insn = next_insn(p, BRW_OPCODE_CONTINUE);
1035 brw_set_dest(insn, brw_ip_reg());
1036 brw_set_src0(insn, brw_ip_reg());
1037 brw_set_src1(insn, brw_imm_d(0x0));
1038 insn->header.compression_control = BRW_COMPRESSION_NONE;
1039 insn->header.execution_size = BRW_EXECUTE_8;
1040 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1041 insn->bits3.if_else.pad0 = 0;
1042 insn->bits3.if_else.pop_count = pop_count;
1043 return insn;
1044 }
1045
1046 /* DO/WHILE loop:
1047 */
1048 struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
1049 {
1050 if (p->single_program_flow) {
1051 return &p->store[p->nr_insn];
1052 } else {
1053 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
1054
1055 /* Override the defaults for this instruction:
1056 */
1057 brw_set_dest(insn, brw_null_reg());
1058 brw_set_src0(insn, brw_null_reg());
1059 brw_set_src1(insn, brw_null_reg());
1060
1061 insn->header.compression_control = BRW_COMPRESSION_NONE;
1062 insn->header.execution_size = execute_size;
1063 insn->header.predicate_control = BRW_PREDICATE_NONE;
1064 /* insn->header.mask_control = BRW_MASK_ENABLE; */
1065 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1066
1067 return insn;
1068 }
1069 }
1070
1071
1072
1073 struct brw_instruction *brw_WHILE(struct brw_compile *p,
1074 struct brw_instruction *do_insn)
1075 {
1076 struct intel_context *intel = &p->brw->intel;
1077 struct brw_instruction *insn;
1078 GLuint br = 1;
1079
1080 if (intel->gen >= 5)
1081 br = 2;
1082
1083 if (p->single_program_flow)
1084 insn = next_insn(p, BRW_OPCODE_ADD);
1085 else
1086 insn = next_insn(p, BRW_OPCODE_WHILE);
1087
1088 brw_set_dest(insn, brw_ip_reg());
1089 brw_set_src0(insn, brw_ip_reg());
1090 brw_set_src1(insn, brw_imm_d(0x0));
1091
1092 insn->header.compression_control = BRW_COMPRESSION_NONE;
1093
1094 if (p->single_program_flow) {
1095 insn->header.execution_size = BRW_EXECUTE_1;
1096
1097 insn->bits3.d = (do_insn - insn) * 16;
1098 } else {
1099 insn->header.execution_size = do_insn->header.execution_size;
1100
1101 assert(do_insn->header.opcode == BRW_OPCODE_DO);
1102 insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
1103 insn->bits3.if_else.pop_count = 0;
1104 insn->bits3.if_else.pad0 = 0;
1105 }
1106
1107 /* insn->header.mask_control = BRW_MASK_ENABLE; */
1108
1109 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1110 p->current->header.predicate_control = BRW_PREDICATE_NONE;
1111 return insn;
1112 }
1113
1114
1115 /* FORWARD JUMPS:
1116 */
1117 void brw_land_fwd_jump(struct brw_compile *p,
1118 struct brw_instruction *jmp_insn)
1119 {
1120 struct intel_context *intel = &p->brw->intel;
1121 struct brw_instruction *landing = &p->store[p->nr_insn];
1122 GLuint jmpi = 1;
1123
1124 if (intel->gen >= 5)
1125 jmpi = 2;
1126
1127 assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
1128 assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE);
1129
1130 jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1);
1131 }
1132
1133
1134
1135 /* To integrate with the above, it makes sense that the comparison
1136 * instruction should populate the flag register. It might be simpler
1137 * just to use the flag reg for most WM tasks?
1138 */
1139 void brw_CMP(struct brw_compile *p,
1140 struct brw_reg dest,
1141 GLuint conditional,
1142 struct brw_reg src0,
1143 struct brw_reg src1)
1144 {
1145 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
1146
1147 insn->header.destreg__conditionalmod = conditional;
1148 brw_set_dest(insn, dest);
1149 brw_set_src0(insn, src0);
1150 brw_set_src1(insn, src1);
1151
1152 /* guess_execution_size(insn, src0); */
1153
1154
1155 /* Make it so that future instructions will use the computed flag
1156 * value until brw_set_predicate_control_flag_value() is called
1157 * again.
1158 */
1159 if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
1160 dest.nr == 0) {
1161 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
1162 p->flag_value = 0xff;
1163 }
1164 }
1165
1166 /* Issue 'wait' instruction for n1, host could program MMIO
1167 to wake up thread. */
1168 void brw_WAIT (struct brw_compile *p)
1169 {
1170 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_WAIT);
1171 struct brw_reg src = brw_notification_1_reg();
1172
1173 brw_set_dest(insn, src);
1174 brw_set_src0(insn, src);
1175 brw_set_src1(insn, brw_null_reg());
1176 insn->header.execution_size = 0; /* must */
1177 insn->header.predicate_control = 0;
1178 insn->header.compression_control = 0;
1179 }
1180
1181
1182 /***********************************************************************
1183 * Helpers for the various SEND message types:
1184 */
1185
1186 /** Extended math function, float[8].
1187 */
1188 void brw_math( struct brw_compile *p,
1189 struct brw_reg dest,
1190 GLuint function,
1191 GLuint saturate,
1192 GLuint msg_reg_nr,
1193 struct brw_reg src,
1194 GLuint data_type,
1195 GLuint precision )
1196 {
1197 struct intel_context *intel = &p->brw->intel;
1198
1199 if (intel->gen >= 6) {
1200 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
1201
1202 assert(dest.file == BRW_GENERAL_REGISTER_FILE);
1203 assert(src.file == BRW_GENERAL_REGISTER_FILE);
1204
1205 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1206 assert(src.hstride == BRW_HORIZONTAL_STRIDE_1);
1207
1208 if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT &&
1209 function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1210 assert(src.type == BRW_REGISTER_TYPE_F);
1211 }
1212
1213 /* Math is the same ISA format as other opcodes, except that CondModifier
1214 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1215 */
1216 insn->header.destreg__conditionalmod = function;
1217
1218 brw_set_dest(insn, dest);
1219 brw_set_src0(insn, src);
1220 brw_set_src1(insn, brw_null_reg());
1221 } else {
1222 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1223 GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
1224 GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
1225 /* Example code doesn't set predicate_control for send
1226 * instructions.
1227 */
1228 insn->header.predicate_control = 0;
1229 insn->header.destreg__conditionalmod = msg_reg_nr;
1230
1231 brw_set_dest(insn, dest);
1232 brw_set_src0(insn, src);
1233 brw_set_math_message(p->brw,
1234 insn,
1235 msg_length, response_length,
1236 function,
1237 BRW_MATH_INTEGER_UNSIGNED,
1238 precision,
1239 saturate,
1240 data_type);
1241 }
1242 }
1243
1244 /** Extended math function, float[8].
1245 */
1246 void brw_math2(struct brw_compile *p,
1247 struct brw_reg dest,
1248 GLuint function,
1249 struct brw_reg src0,
1250 struct brw_reg src1)
1251 {
1252 struct intel_context *intel = &p->brw->intel;
1253 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
1254
1255 assert(intel->gen >= 6);
1256 (void) intel;
1257
1258
1259 assert(dest.file == BRW_GENERAL_REGISTER_FILE);
1260 assert(src0.file == BRW_GENERAL_REGISTER_FILE);
1261 assert(src1.file == BRW_GENERAL_REGISTER_FILE);
1262
1263 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1264 assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1);
1265 assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1);
1266
1267 if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT &&
1268 function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1269 assert(src0.type == BRW_REGISTER_TYPE_F);
1270 assert(src1.type == BRW_REGISTER_TYPE_F);
1271 }
1272
1273 /* Math is the same ISA format as other opcodes, except that CondModifier
1274 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1275 */
1276 insn->header.destreg__conditionalmod = function;
1277
1278 brw_set_dest(insn, dest);
1279 brw_set_src0(insn, src0);
1280 brw_set_src1(insn, src1);
1281 }
1282
1283 /**
1284 * Extended math function, float[16].
1285 * Use 2 send instructions.
1286 */
1287 void brw_math_16( struct brw_compile *p,
1288 struct brw_reg dest,
1289 GLuint function,
1290 GLuint saturate,
1291 GLuint msg_reg_nr,
1292 struct brw_reg src,
1293 GLuint precision )
1294 {
1295 struct intel_context *intel = &p->brw->intel;
1296 struct brw_instruction *insn;
1297 GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
1298 GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
1299
1300 if (intel->gen >= 6) {
1301 insn = next_insn(p, BRW_OPCODE_MATH);
1302
1303 /* Math is the same ISA format as other opcodes, except that CondModifier
1304 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1305 */
1306 insn->header.destreg__conditionalmod = function;
1307
1308 brw_set_dest(insn, dest);
1309 brw_set_src0(insn, src);
1310 brw_set_src1(insn, brw_null_reg());
1311 return;
1312 }
1313
1314 /* First instruction:
1315 */
1316 brw_push_insn_state(p);
1317 brw_set_predicate_control_flag_value(p, 0xff);
1318 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1319
1320 insn = next_insn(p, BRW_OPCODE_SEND);
1321 insn->header.destreg__conditionalmod = msg_reg_nr;
1322
1323 brw_set_dest(insn, dest);
1324 brw_set_src0(insn, src);
1325 brw_set_math_message(p->brw,
1326 insn,
1327 msg_length, response_length,
1328 function,
1329 BRW_MATH_INTEGER_UNSIGNED,
1330 precision,
1331 saturate,
1332 BRW_MATH_DATA_VECTOR);
1333
1334 /* Second instruction:
1335 */
1336 insn = next_insn(p, BRW_OPCODE_SEND);
1337 insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
1338 insn->header.destreg__conditionalmod = msg_reg_nr+1;
1339
1340 brw_set_dest(insn, offset(dest,1));
1341 brw_set_src0(insn, src);
1342 brw_set_math_message(p->brw,
1343 insn,
1344 msg_length, response_length,
1345 function,
1346 BRW_MATH_INTEGER_UNSIGNED,
1347 precision,
1348 saturate,
1349 BRW_MATH_DATA_VECTOR);
1350
1351 brw_pop_insn_state(p);
1352 }
1353
1354
1355 /**
1356 * Write block of 16 dwords/floats to the data port Render Cache scratch buffer.
1357 * Scratch offset should be a multiple of 64.
1358 * Used for register spilling.
1359 */
1360 void brw_dp_WRITE_16( struct brw_compile *p,
1361 struct brw_reg src,
1362 GLuint scratch_offset )
1363 {
1364 struct intel_context *intel = &p->brw->intel;
1365 GLuint msg_reg_nr = 1;
1366 {
1367 brw_push_insn_state(p);
1368 brw_set_mask_control(p, BRW_MASK_DISABLE);
1369 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1370
1371 /* set message header global offset field (reg 0, element 2) */
1372 brw_MOV(p,
1373 retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
1374 brw_imm_d(scratch_offset));
1375
1376 brw_pop_insn_state(p);
1377 }
1378
1379 {
1380 GLuint msg_length = 3;
1381 struct brw_reg dest;
1382 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1383 int send_commit_msg;
1384
1385 insn->header.predicate_control = 0; /* XXX */
1386 insn->header.compression_control = BRW_COMPRESSION_NONE;
1387 insn->header.destreg__conditionalmod = msg_reg_nr;
1388
1389 /* Until gen6, writes followed by reads from the same location
1390 * are not guaranteed to be ordered unless write_commit is set.
1391 * If set, then a no-op write is issued to the destination
1392 * register to set a dependency, and a read from the destination
1393 * can be used to ensure the ordering.
1394 *
1395 * For gen6, only writes between different threads need ordering
1396 * protection. Our use of DP writes is all about register
1397 * spilling within a thread.
1398 */
1399 if (intel->gen >= 6) {
1400 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
1401 send_commit_msg = 0;
1402 } else {
1403 dest = brw_uw16_grf(0, 0);
1404 send_commit_msg = 1;
1405 }
1406
1407 brw_set_dest(insn, dest);
1408 brw_set_src0(insn, src);
1409
1410 brw_set_dp_write_message(p->brw,
1411 insn,
1412 255, /* binding table index (255=stateless) */
1413 BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */
1414 BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */
1415 msg_length,
1416 GL_TRUE, /* header_present */
1417 0, /* pixel scoreboard */
1418 send_commit_msg, /* response_length */
1419 0, /* eot */
1420 send_commit_msg);
1421 }
1422 }
1423
1424
1425 /**
1426 * Read block of 16 dwords/floats from the data port Render Cache scratch buffer.
1427 * Scratch offset should be a multiple of 64.
1428 * Used for register spilling.
1429 */
1430 void brw_dp_READ_16( struct brw_compile *p,
1431 struct brw_reg dest,
1432 GLuint scratch_offset )
1433 {
1434 GLuint msg_reg_nr = 1;
1435 {
1436 brw_push_insn_state(p);
1437 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1438 brw_set_mask_control(p, BRW_MASK_DISABLE);
1439
1440 /* set message header global offset field (reg 0, element 2) */
1441 brw_MOV(p,
1442 retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
1443 brw_imm_d(scratch_offset));
1444
1445 brw_pop_insn_state(p);
1446 }
1447
1448 {
1449 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1450
1451 insn->header.predicate_control = 0; /* XXX */
1452 insn->header.compression_control = BRW_COMPRESSION_NONE;
1453 insn->header.destreg__conditionalmod = msg_reg_nr;
1454
1455 brw_set_dest(insn, dest); /* UW? */
1456 brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW));
1457
1458 brw_set_dp_read_message(p->brw,
1459 insn,
1460 255, /* binding table index (255=stateless) */
1461 BRW_DATAPORT_OWORD_BLOCK_4_OWORDS,
1462 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1463 1, /* target cache (render/scratch) */
1464 1, /* msg_length */
1465 2, /* response_length */
1466 0); /* eot */
1467 }
1468 }
1469
1470
1471 /**
1472 * Read a float[4] vector from the data port Data Cache (const buffer).
1473 * Location (in buffer) should be a multiple of 16.
1474 * Used for fetching shader constants.
1475 * If relAddr is true, we'll do an indirect fetch using the address register.
1476 */
1477 void brw_dp_READ_4( struct brw_compile *p,
1478 struct brw_reg dest,
1479 GLboolean relAddr,
1480 GLuint location,
1481 GLuint bind_table_index )
1482 {
1483 /* XXX: relAddr not implemented */
1484 GLuint msg_reg_nr = 1;
1485 {
1486 struct brw_reg b;
1487 brw_push_insn_state(p);
1488 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1489 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1490 brw_set_mask_control(p, BRW_MASK_DISABLE);
1491
1492 /* Setup MRF[1] with location/offset into const buffer */
1493 b = brw_message_reg(msg_reg_nr);
1494 b = retype(b, BRW_REGISTER_TYPE_UD);
1495 /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
1496 * when the docs say only dword[2] should be set. Hmmm. But it works.
1497 */
1498 brw_MOV(p, b, brw_imm_ud(location));
1499 brw_pop_insn_state(p);
1500 }
1501
1502 {
1503 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1504
1505 insn->header.predicate_control = BRW_PREDICATE_NONE;
1506 insn->header.compression_control = BRW_COMPRESSION_NONE;
1507 insn->header.destreg__conditionalmod = msg_reg_nr;
1508 insn->header.mask_control = BRW_MASK_DISABLE;
1509
1510 /* cast dest to a uword[8] vector */
1511 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
1512
1513 brw_set_dest(insn, dest);
1514 brw_set_src0(insn, brw_null_reg());
1515
1516 brw_set_dp_read_message(p->brw,
1517 insn,
1518 bind_table_index,
1519 0, /* msg_control (0 means 1 Oword) */
1520 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1521 0, /* source cache = data cache */
1522 1, /* msg_length */
1523 1, /* response_length (1 Oword) */
1524 0); /* eot */
1525 }
1526 }
1527
1528
1529 /**
1530 * Read float[4] constant(s) from VS constant buffer.
1531 * For relative addressing, two float[4] constants will be read into 'dest'.
1532 * Otherwise, one float[4] constant will be read into the lower half of 'dest'.
1533 */
1534 void brw_dp_READ_4_vs(struct brw_compile *p,
1535 struct brw_reg dest,
1536 GLuint location,
1537 GLuint bind_table_index)
1538 {
1539 struct brw_instruction *insn;
1540 GLuint msg_reg_nr = 1;
1541 struct brw_reg b;
1542
1543 /*
1544 printf("vs const read msg, location %u, msg_reg_nr %d\n",
1545 location, msg_reg_nr);
1546 */
1547
1548 /* Setup MRF[1] with location/offset into const buffer */
1549 brw_push_insn_state(p);
1550 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1551 brw_set_mask_control(p, BRW_MASK_DISABLE);
1552 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1553
1554 /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
1555 * when the docs say only dword[2] should be set. Hmmm. But it works.
1556 */
1557 b = brw_message_reg(msg_reg_nr);
1558 b = retype(b, BRW_REGISTER_TYPE_UD);
1559 /*b = get_element_ud(b, 2);*/
1560 brw_MOV(p, b, brw_imm_ud(location));
1561
1562 brw_pop_insn_state(p);
1563
1564 insn = next_insn(p, BRW_OPCODE_SEND);
1565
1566 insn->header.predicate_control = BRW_PREDICATE_NONE;
1567 insn->header.compression_control = BRW_COMPRESSION_NONE;
1568 insn->header.destreg__conditionalmod = msg_reg_nr;
1569 insn->header.mask_control = BRW_MASK_DISABLE;
1570
1571 brw_set_dest(insn, dest);
1572 brw_set_src0(insn, brw_null_reg());
1573
1574 brw_set_dp_read_message(p->brw,
1575 insn,
1576 bind_table_index,
1577 0,
1578 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1579 0, /* source cache = data cache */
1580 1, /* msg_length */
1581 1, /* response_length (1 Oword) */
1582 0); /* eot */
1583 }
1584
1585 /**
1586 * Read a float[4] constant per vertex from VS constant buffer, with
1587 * relative addressing.
1588 */
1589 void brw_dp_READ_4_vs_relative(struct brw_compile *p,
1590 struct brw_reg dest,
1591 struct brw_reg addr_reg,
1592 GLuint offset,
1593 GLuint bind_table_index)
1594 {
1595 struct intel_context *intel = &p->brw->intel;
1596 int msg_type;
1597
1598 /* Setup MRF[1] with offset into const buffer */
1599 brw_push_insn_state(p);
1600 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1601 brw_set_mask_control(p, BRW_MASK_DISABLE);
1602 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1603
1604 /* M1.0 is block offset 0, M1.4 is block offset 1, all other
1605 * fields ignored.
1606 */
1607 brw_ADD(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD),
1608 addr_reg, brw_imm_d(offset));
1609 brw_pop_insn_state(p);
1610
1611 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1612
1613 insn->header.predicate_control = BRW_PREDICATE_NONE;
1614 insn->header.compression_control = BRW_COMPRESSION_NONE;
1615 insn->header.destreg__conditionalmod = 0;
1616 insn->header.mask_control = BRW_MASK_DISABLE;
1617
1618 brw_set_dest(insn, dest);
1619 brw_set_src0(insn, brw_vec8_grf(0, 0));
1620
1621 if (intel->gen == 6)
1622 msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
1623 else if (intel->gen == 5 || intel->is_g4x)
1624 msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
1625 else
1626 msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
1627
1628 brw_set_dp_read_message(p->brw,
1629 insn,
1630 bind_table_index,
1631 BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
1632 msg_type,
1633 0, /* source cache = data cache */
1634 2, /* msg_length */
1635 1, /* response_length */
1636 0); /* eot */
1637 }
1638
1639
1640
1641 void brw_fb_WRITE(struct brw_compile *p,
1642 int dispatch_width,
1643 struct brw_reg dest,
1644 GLuint msg_reg_nr,
1645 struct brw_reg src0,
1646 GLuint binding_table_index,
1647 GLuint msg_length,
1648 GLuint response_length,
1649 GLboolean eot)
1650 {
1651 struct intel_context *intel = &p->brw->intel;
1652 struct brw_instruction *insn;
1653 GLuint msg_control, msg_type;
1654 GLboolean header_present = GL_TRUE;
1655
1656 insn = next_insn(p, BRW_OPCODE_SEND);
1657 insn->header.predicate_control = 0; /* XXX */
1658 insn->header.compression_control = BRW_COMPRESSION_NONE;
1659
1660 if (intel->gen >= 6) {
1661 if (msg_length == 4)
1662 header_present = GL_FALSE;
1663
1664 /* headerless version, just submit color payload */
1665 src0 = brw_message_reg(msg_reg_nr);
1666
1667 msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE_GEN6;
1668 } else {
1669 insn->header.destreg__conditionalmod = msg_reg_nr;
1670
1671 msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
1672 }
1673
1674 if (dispatch_width == 16)
1675 msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
1676 else
1677 msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01;
1678
1679 brw_set_dest(insn, dest);
1680 brw_set_src0(insn, src0);
1681 brw_set_dp_write_message(p->brw,
1682 insn,
1683 binding_table_index,
1684 msg_control,
1685 msg_type,
1686 msg_length,
1687 header_present,
1688 1, /* pixel scoreboard */
1689 response_length,
1690 eot,
1691 0 /* send_commit_msg */);
1692 }
1693
1694
1695 /**
1696 * Texture sample instruction.
1697 * Note: the msg_type plus msg_length values determine exactly what kind
1698 * of sampling operation is performed. See volume 4, page 161 of docs.
1699 */
1700 void brw_SAMPLE(struct brw_compile *p,
1701 struct brw_reg dest,
1702 GLuint msg_reg_nr,
1703 struct brw_reg src0,
1704 GLuint binding_table_index,
1705 GLuint sampler,
1706 GLuint writemask,
1707 GLuint msg_type,
1708 GLuint response_length,
1709 GLuint msg_length,
1710 GLboolean eot,
1711 GLuint header_present,
1712 GLuint simd_mode)
1713 {
1714 struct intel_context *intel = &p->brw->intel;
1715 GLboolean need_stall = 0;
1716
1717 if (writemask == 0) {
1718 /*printf("%s: zero writemask??\n", __FUNCTION__); */
1719 return;
1720 }
1721
1722 /* Hardware doesn't do destination dependency checking on send
1723 * instructions properly. Add a workaround which generates the
1724 * dependency by other means. In practice it seems like this bug
1725 * only crops up for texture samples, and only where registers are
1726 * written by the send and then written again later without being
1727 * read in between. Luckily for us, we already track that
1728 * information and use it to modify the writemask for the
1729 * instruction, so that is a guide for whether a workaround is
1730 * needed.
1731 */
1732 if (writemask != WRITEMASK_XYZW) {
1733 GLuint dst_offset = 0;
1734 GLuint i, newmask = 0, len = 0;
1735
1736 for (i = 0; i < 4; i++) {
1737 if (writemask & (1<<i))
1738 break;
1739 dst_offset += 2;
1740 }
1741 for (; i < 4; i++) {
1742 if (!(writemask & (1<<i)))
1743 break;
1744 newmask |= 1<<i;
1745 len++;
1746 }
1747
1748 if (newmask != writemask) {
1749 need_stall = 1;
1750 /* printf("need stall %x %x\n", newmask , writemask); */
1751 }
1752 else {
1753 GLboolean dispatch_16 = GL_FALSE;
1754
1755 struct brw_reg m1 = brw_message_reg(msg_reg_nr);
1756
1757 guess_execution_size(p->current, dest);
1758 if (p->current->header.execution_size == BRW_EXECUTE_16)
1759 dispatch_16 = GL_TRUE;
1760
1761 newmask = ~newmask & WRITEMASK_XYZW;
1762
1763 brw_push_insn_state(p);
1764
1765 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1766 brw_set_mask_control(p, BRW_MASK_DISABLE);
1767
1768 brw_MOV(p, m1, brw_vec8_grf(0,0));
1769 brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12));
1770
1771 brw_pop_insn_state(p);
1772
1773 src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
1774 dest = offset(dest, dst_offset);
1775
1776 /* For 16-wide dispatch, masked channels are skipped in the
1777 * response. For 8-wide, masked channels still take up slots,
1778 * and are just not written to.
1779 */
1780 if (dispatch_16)
1781 response_length = len * 2;
1782 }
1783 }
1784
1785 {
1786 struct brw_instruction *insn;
1787
1788 /* Sandybridge doesn't have the implied move for SENDs,
1789 * and the first message register index comes from src0.
1790 */
1791 if (intel->gen >= 6) {
1792 brw_push_insn_state(p);
1793 brw_set_mask_control( p, BRW_MASK_DISABLE );
1794 /* m1 contains header? */
1795 brw_MOV(p, brw_message_reg(msg_reg_nr), src0);
1796 brw_pop_insn_state(p);
1797 src0 = brw_message_reg(msg_reg_nr);
1798 }
1799
1800 insn = next_insn(p, BRW_OPCODE_SEND);
1801 insn->header.predicate_control = 0; /* XXX */
1802 insn->header.compression_control = BRW_COMPRESSION_NONE;
1803 if (intel->gen < 6)
1804 insn->header.destreg__conditionalmod = msg_reg_nr;
1805
1806 brw_set_dest(insn, dest);
1807 brw_set_src0(insn, src0);
1808 brw_set_sampler_message(p->brw, insn,
1809 binding_table_index,
1810 sampler,
1811 msg_type,
1812 response_length,
1813 msg_length,
1814 eot,
1815 header_present,
1816 simd_mode);
1817 }
1818
1819 if (need_stall) {
1820 struct brw_reg reg = vec8(offset(dest, response_length-1));
1821
1822 /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 }
1823 */
1824 brw_push_insn_state(p);
1825 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1826 brw_MOV(p, reg, reg);
1827 brw_pop_insn_state(p);
1828 }
1829
1830 }
1831
1832 /* All these variables are pretty confusing - we might be better off
1833 * using bitmasks and macros for this, in the old style. Or perhaps
1834 * just having the caller instantiate the fields in dword3 itself.
1835 */
1836 void brw_urb_WRITE(struct brw_compile *p,
1837 struct brw_reg dest,
1838 GLuint msg_reg_nr,
1839 struct brw_reg src0,
1840 GLboolean allocate,
1841 GLboolean used,
1842 GLuint msg_length,
1843 GLuint response_length,
1844 GLboolean eot,
1845 GLboolean writes_complete,
1846 GLuint offset,
1847 GLuint swizzle)
1848 {
1849 struct intel_context *intel = &p->brw->intel;
1850 struct brw_instruction *insn;
1851
1852 /* Sandybridge doesn't have the implied move for SENDs,
1853 * and the first message register index comes from src0.
1854 */
1855 if (intel->gen >= 6) {
1856 brw_push_insn_state(p);
1857 brw_set_mask_control( p, BRW_MASK_DISABLE );
1858 brw_MOV(p, brw_message_reg(msg_reg_nr), src0);
1859 brw_pop_insn_state(p);
1860 src0 = brw_message_reg(msg_reg_nr);
1861 }
1862
1863 insn = next_insn(p, BRW_OPCODE_SEND);
1864
1865 assert(msg_length < BRW_MAX_MRF);
1866
1867 brw_set_dest(insn, dest);
1868 brw_set_src0(insn, src0);
1869 brw_set_src1(insn, brw_imm_d(0));
1870
1871 if (intel->gen < 6)
1872 insn->header.destreg__conditionalmod = msg_reg_nr;
1873
1874 brw_set_urb_message(p->brw,
1875 insn,
1876 allocate,
1877 used,
1878 msg_length,
1879 response_length,
1880 eot,
1881 writes_complete,
1882 offset,
1883 swizzle);
1884 }
1885
1886 void brw_ff_sync(struct brw_compile *p,
1887 struct brw_reg dest,
1888 GLuint msg_reg_nr,
1889 struct brw_reg src0,
1890 GLboolean allocate,
1891 GLuint response_length,
1892 GLboolean eot)
1893 {
1894 struct intel_context *intel = &p->brw->intel;
1895 struct brw_instruction *insn;
1896
1897 /* Sandybridge doesn't have the implied move for SENDs,
1898 * and the first message register index comes from src0.
1899 */
1900 if (intel->gen >= 6) {
1901 brw_push_insn_state(p);
1902 brw_set_mask_control( p, BRW_MASK_DISABLE );
1903 brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD),
1904 retype(src0, BRW_REGISTER_TYPE_UD));
1905 brw_pop_insn_state(p);
1906 src0 = brw_message_reg(msg_reg_nr);
1907 }
1908
1909 insn = next_insn(p, BRW_OPCODE_SEND);
1910 brw_set_dest(insn, dest);
1911 brw_set_src0(insn, src0);
1912 brw_set_src1(insn, brw_imm_d(0));
1913
1914 if (intel->gen < 6)
1915 insn->header.destreg__conditionalmod = msg_reg_nr;
1916
1917 brw_set_ff_sync_message(p->brw,
1918 insn,
1919 allocate,
1920 response_length,
1921 eot);
1922 }