Merge remote branch 'origin/master' into lp-setup-llvm
[mesa.git] / src / mesa / drivers / dri / i965 / brw_eu_emit.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "brw_context.h"
34 #include "brw_defines.h"
35 #include "brw_eu.h"
36
37
38
39
40 /***********************************************************************
41 * Internal helper for constructing instructions
42 */
43
44 static void guess_execution_size( struct brw_instruction *insn,
45 struct brw_reg reg )
46 {
47 if (reg.width == BRW_WIDTH_8 &&
48 insn->header.compression_control == BRW_COMPRESSION_COMPRESSED)
49 insn->header.execution_size = BRW_EXECUTE_16;
50 else
51 insn->header.execution_size = reg.width; /* note - definitions are compatible */
52 }
53
54
55 static void brw_set_dest( struct brw_instruction *insn,
56 struct brw_reg dest )
57 {
58 if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
59 dest.file != BRW_MESSAGE_REGISTER_FILE)
60 assert(dest.nr < 128);
61
62 insn->bits1.da1.dest_reg_file = dest.file;
63 insn->bits1.da1.dest_reg_type = dest.type;
64 insn->bits1.da1.dest_address_mode = dest.address_mode;
65
66 if (dest.address_mode == BRW_ADDRESS_DIRECT) {
67 insn->bits1.da1.dest_reg_nr = dest.nr;
68
69 if (insn->header.access_mode == BRW_ALIGN_1) {
70 insn->bits1.da1.dest_subreg_nr = dest.subnr;
71 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
72 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
73 insn->bits1.da1.dest_horiz_stride = dest.hstride;
74 }
75 else {
76 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
77 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
78 /* even ignored in da16, still need to set as '01' */
79 insn->bits1.da16.dest_horiz_stride = 1;
80 }
81 }
82 else {
83 insn->bits1.ia1.dest_subreg_nr = dest.subnr;
84
85 /* These are different sizes in align1 vs align16:
86 */
87 if (insn->header.access_mode == BRW_ALIGN_1) {
88 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
89 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
90 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
91 insn->bits1.ia1.dest_horiz_stride = dest.hstride;
92 }
93 else {
94 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
95 /* even ignored in da16, still need to set as '01' */
96 insn->bits1.ia16.dest_horiz_stride = 1;
97 }
98 }
99
100 /* NEW: Set the execution size based on dest.width and
101 * insn->compression_control:
102 */
103 guess_execution_size(insn, dest);
104 }
105
106 extern int reg_type_size[];
107
108 static void
109 validate_reg(struct brw_instruction *insn, struct brw_reg reg)
110 {
111 int hstride_for_reg[] = {0, 1, 2, 4};
112 int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
113 int width_for_reg[] = {1, 2, 4, 8, 16};
114 int execsize_for_reg[] = {1, 2, 4, 8, 16};
115 int width, hstride, vstride, execsize;
116
117 if (reg.file == BRW_IMMEDIATE_VALUE) {
118 /* 3.3.6: Region Parameters. Restriction: Immediate vectors
119 * mean the destination has to be 128-bit aligned and the
120 * destination horiz stride has to be a word.
121 */
122 if (reg.type == BRW_REGISTER_TYPE_V) {
123 assert(hstride_for_reg[insn->bits1.da1.dest_horiz_stride] *
124 reg_type_size[insn->bits1.da1.dest_reg_type] == 2);
125 }
126
127 return;
128 }
129
130 if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE &&
131 reg.file == BRW_ARF_NULL)
132 return;
133
134 assert(reg.hstride >= 0 && reg.hstride < Elements(hstride_for_reg));
135 hstride = hstride_for_reg[reg.hstride];
136
137 if (reg.vstride == 0xf) {
138 vstride = -1;
139 } else {
140 assert(reg.vstride >= 0 && reg.vstride < Elements(vstride_for_reg));
141 vstride = vstride_for_reg[reg.vstride];
142 }
143
144 assert(reg.width >= 0 && reg.width < Elements(width_for_reg));
145 width = width_for_reg[reg.width];
146
147 assert(insn->header.execution_size >= 0 &&
148 insn->header.execution_size < Elements(execsize_for_reg));
149 execsize = execsize_for_reg[insn->header.execution_size];
150
151 /* Restrictions from 3.3.10: Register Region Restrictions. */
152 /* 3. */
153 assert(execsize >= width);
154
155 /* 4. */
156 if (execsize == width && hstride != 0) {
157 assert(vstride == -1 || vstride == width * hstride);
158 }
159
160 /* 5. */
161 if (execsize == width && hstride == 0) {
162 /* no restriction on vstride. */
163 }
164
165 /* 6. */
166 if (width == 1) {
167 assert(hstride == 0);
168 }
169
170 /* 7. */
171 if (execsize == 1 && width == 1) {
172 assert(hstride == 0);
173 assert(vstride == 0);
174 }
175
176 /* 8. */
177 if (vstride == 0 && hstride == 0) {
178 assert(width == 1);
179 }
180
181 /* 10. Check destination issues. */
182 }
183
184 static void brw_set_src0( struct brw_instruction *insn,
185 struct brw_reg reg )
186 {
187 if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
188 assert(reg.nr < 128);
189
190 validate_reg(insn, reg);
191
192 insn->bits1.da1.src0_reg_file = reg.file;
193 insn->bits1.da1.src0_reg_type = reg.type;
194 insn->bits2.da1.src0_abs = reg.abs;
195 insn->bits2.da1.src0_negate = reg.negate;
196 insn->bits2.da1.src0_address_mode = reg.address_mode;
197
198 if (reg.file == BRW_IMMEDIATE_VALUE) {
199 insn->bits3.ud = reg.dw1.ud;
200
201 /* Required to set some fields in src1 as well:
202 */
203 insn->bits1.da1.src1_reg_file = 0; /* arf */
204 insn->bits1.da1.src1_reg_type = reg.type;
205 }
206 else
207 {
208 if (reg.address_mode == BRW_ADDRESS_DIRECT) {
209 if (insn->header.access_mode == BRW_ALIGN_1) {
210 insn->bits2.da1.src0_subreg_nr = reg.subnr;
211 insn->bits2.da1.src0_reg_nr = reg.nr;
212 }
213 else {
214 insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
215 insn->bits2.da16.src0_reg_nr = reg.nr;
216 }
217 }
218 else {
219 insn->bits2.ia1.src0_subreg_nr = reg.subnr;
220
221 if (insn->header.access_mode == BRW_ALIGN_1) {
222 insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
223 }
224 else {
225 insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
226 }
227 }
228
229 if (insn->header.access_mode == BRW_ALIGN_1) {
230 if (reg.width == BRW_WIDTH_1 &&
231 insn->header.execution_size == BRW_EXECUTE_1) {
232 insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
233 insn->bits2.da1.src0_width = BRW_WIDTH_1;
234 insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
235 }
236 else {
237 insn->bits2.da1.src0_horiz_stride = reg.hstride;
238 insn->bits2.da1.src0_width = reg.width;
239 insn->bits2.da1.src0_vert_stride = reg.vstride;
240 }
241 }
242 else {
243 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
244 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
245 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
246 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
247
248 /* This is an oddity of the fact we're using the same
249 * descriptions for registers in align_16 as align_1:
250 */
251 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
252 insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
253 else
254 insn->bits2.da16.src0_vert_stride = reg.vstride;
255 }
256 }
257 }
258
259
260 void brw_set_src1( struct brw_instruction *insn,
261 struct brw_reg reg )
262 {
263 assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
264
265 assert(reg.nr < 128);
266
267 validate_reg(insn, reg);
268
269 insn->bits1.da1.src1_reg_file = reg.file;
270 insn->bits1.da1.src1_reg_type = reg.type;
271 insn->bits3.da1.src1_abs = reg.abs;
272 insn->bits3.da1.src1_negate = reg.negate;
273
274 /* Only src1 can be immediate in two-argument instructions.
275 */
276 assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
277
278 if (reg.file == BRW_IMMEDIATE_VALUE) {
279 insn->bits3.ud = reg.dw1.ud;
280 }
281 else {
282 /* This is a hardware restriction, which may or may not be lifted
283 * in the future:
284 */
285 assert (reg.address_mode == BRW_ADDRESS_DIRECT);
286 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
287
288 if (insn->header.access_mode == BRW_ALIGN_1) {
289 insn->bits3.da1.src1_subreg_nr = reg.subnr;
290 insn->bits3.da1.src1_reg_nr = reg.nr;
291 }
292 else {
293 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
294 insn->bits3.da16.src1_reg_nr = reg.nr;
295 }
296
297 if (insn->header.access_mode == BRW_ALIGN_1) {
298 if (reg.width == BRW_WIDTH_1 &&
299 insn->header.execution_size == BRW_EXECUTE_1) {
300 insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
301 insn->bits3.da1.src1_width = BRW_WIDTH_1;
302 insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
303 }
304 else {
305 insn->bits3.da1.src1_horiz_stride = reg.hstride;
306 insn->bits3.da1.src1_width = reg.width;
307 insn->bits3.da1.src1_vert_stride = reg.vstride;
308 }
309 }
310 else {
311 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
312 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
313 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
314 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
315
316 /* This is an oddity of the fact we're using the same
317 * descriptions for registers in align_16 as align_1:
318 */
319 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
320 insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
321 else
322 insn->bits3.da16.src1_vert_stride = reg.vstride;
323 }
324 }
325 }
326
327
328
329 static void brw_set_math_message( struct brw_context *brw,
330 struct brw_instruction *insn,
331 GLuint msg_length,
332 GLuint response_length,
333 GLuint function,
334 GLuint integer_type,
335 GLboolean low_precision,
336 GLboolean saturate,
337 GLuint dataType )
338 {
339 struct intel_context *intel = &brw->intel;
340 brw_set_src1(insn, brw_imm_d(0));
341
342 if (intel->gen == 5) {
343 insn->bits3.math_gen5.function = function;
344 insn->bits3.math_gen5.int_type = integer_type;
345 insn->bits3.math_gen5.precision = low_precision;
346 insn->bits3.math_gen5.saturate = saturate;
347 insn->bits3.math_gen5.data_type = dataType;
348 insn->bits3.math_gen5.snapshot = 0;
349 insn->bits3.math_gen5.header_present = 0;
350 insn->bits3.math_gen5.response_length = response_length;
351 insn->bits3.math_gen5.msg_length = msg_length;
352 insn->bits3.math_gen5.end_of_thread = 0;
353 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_MATH;
354 insn->bits2.send_gen5.end_of_thread = 0;
355 } else {
356 insn->bits3.math.function = function;
357 insn->bits3.math.int_type = integer_type;
358 insn->bits3.math.precision = low_precision;
359 insn->bits3.math.saturate = saturate;
360 insn->bits3.math.data_type = dataType;
361 insn->bits3.math.response_length = response_length;
362 insn->bits3.math.msg_length = msg_length;
363 insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH;
364 insn->bits3.math.end_of_thread = 0;
365 }
366 }
367
368
369 static void brw_set_ff_sync_message(struct brw_context *brw,
370 struct brw_instruction *insn,
371 GLboolean allocate,
372 GLuint response_length,
373 GLboolean end_of_thread)
374 {
375 struct intel_context *intel = &brw->intel;
376 brw_set_src1(insn, brw_imm_d(0));
377
378 insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */
379 insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */
380 insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */
381 insn->bits3.urb_gen5.allocate = allocate;
382 insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */
383 insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */
384 insn->bits3.urb_gen5.header_present = 1;
385 insn->bits3.urb_gen5.response_length = response_length; /* may be 1 or 0 */
386 insn->bits3.urb_gen5.msg_length = 1;
387 insn->bits3.urb_gen5.end_of_thread = end_of_thread;
388 if (intel->gen >= 6) {
389 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB;
390 } else {
391 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB;
392 insn->bits2.send_gen5.end_of_thread = end_of_thread;
393 }
394 }
395
396 static void brw_set_urb_message( struct brw_context *brw,
397 struct brw_instruction *insn,
398 GLboolean allocate,
399 GLboolean used,
400 GLuint msg_length,
401 GLuint response_length,
402 GLboolean end_of_thread,
403 GLboolean complete,
404 GLuint offset,
405 GLuint swizzle_control )
406 {
407 struct intel_context *intel = &brw->intel;
408 brw_set_src1(insn, brw_imm_d(0));
409
410 if (intel->gen >= 5) {
411 insn->bits3.urb_gen5.opcode = 0; /* ? */
412 insn->bits3.urb_gen5.offset = offset;
413 insn->bits3.urb_gen5.swizzle_control = swizzle_control;
414 insn->bits3.urb_gen5.allocate = allocate;
415 insn->bits3.urb_gen5.used = used; /* ? */
416 insn->bits3.urb_gen5.complete = complete;
417 insn->bits3.urb_gen5.header_present = 1;
418 insn->bits3.urb_gen5.response_length = response_length;
419 insn->bits3.urb_gen5.msg_length = msg_length;
420 insn->bits3.urb_gen5.end_of_thread = end_of_thread;
421 if (intel->gen >= 6) {
422 /* For SNB, the SFID bits moved to the condmod bits, and
423 * EOT stayed in bits3 above. Does the EOT bit setting
424 * below on Ironlake even do anything?
425 */
426 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB;
427 } else {
428 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB;
429 insn->bits2.send_gen5.end_of_thread = end_of_thread;
430 }
431 } else {
432 insn->bits3.urb.opcode = 0; /* ? */
433 insn->bits3.urb.offset = offset;
434 insn->bits3.urb.swizzle_control = swizzle_control;
435 insn->bits3.urb.allocate = allocate;
436 insn->bits3.urb.used = used; /* ? */
437 insn->bits3.urb.complete = complete;
438 insn->bits3.urb.response_length = response_length;
439 insn->bits3.urb.msg_length = msg_length;
440 insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB;
441 insn->bits3.urb.end_of_thread = end_of_thread;
442 }
443 }
444
445 static void brw_set_dp_write_message( struct brw_context *brw,
446 struct brw_instruction *insn,
447 GLuint binding_table_index,
448 GLuint msg_control,
449 GLuint msg_type,
450 GLuint msg_length,
451 GLboolean header_present,
452 GLuint pixel_scoreboard_clear,
453 GLuint response_length,
454 GLuint end_of_thread,
455 GLuint send_commit_msg)
456 {
457 struct intel_context *intel = &brw->intel;
458 brw_set_src1(insn, brw_imm_ud(0));
459
460 if (intel->gen >= 6) {
461 insn->bits3.dp_render_cache.binding_table_index = binding_table_index;
462 insn->bits3.dp_render_cache.msg_control = msg_control;
463 insn->bits3.dp_render_cache.pixel_scoreboard_clear = pixel_scoreboard_clear;
464 insn->bits3.dp_render_cache.msg_type = msg_type;
465 insn->bits3.dp_render_cache.send_commit_msg = send_commit_msg;
466 insn->bits3.dp_render_cache.header_present = header_present;
467 insn->bits3.dp_render_cache.response_length = response_length;
468 insn->bits3.dp_render_cache.msg_length = msg_length;
469 insn->bits3.dp_render_cache.end_of_thread = end_of_thread;
470 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
471 /* XXX really need below? */
472 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
473 insn->bits2.send_gen5.end_of_thread = end_of_thread;
474 } else if (intel->gen == 5) {
475 insn->bits3.dp_write_gen5.binding_table_index = binding_table_index;
476 insn->bits3.dp_write_gen5.msg_control = msg_control;
477 insn->bits3.dp_write_gen5.pixel_scoreboard_clear = pixel_scoreboard_clear;
478 insn->bits3.dp_write_gen5.msg_type = msg_type;
479 insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg;
480 insn->bits3.dp_write_gen5.header_present = header_present;
481 insn->bits3.dp_write_gen5.response_length = response_length;
482 insn->bits3.dp_write_gen5.msg_length = msg_length;
483 insn->bits3.dp_write_gen5.end_of_thread = end_of_thread;
484 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
485 insn->bits2.send_gen5.end_of_thread = end_of_thread;
486 } else {
487 insn->bits3.dp_write.binding_table_index = binding_table_index;
488 insn->bits3.dp_write.msg_control = msg_control;
489 insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear;
490 insn->bits3.dp_write.msg_type = msg_type;
491 insn->bits3.dp_write.send_commit_msg = send_commit_msg;
492 insn->bits3.dp_write.response_length = response_length;
493 insn->bits3.dp_write.msg_length = msg_length;
494 insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
495 insn->bits3.dp_write.end_of_thread = end_of_thread;
496 }
497 }
498
499 static void brw_set_dp_read_message( struct brw_context *brw,
500 struct brw_instruction *insn,
501 GLuint binding_table_index,
502 GLuint msg_control,
503 GLuint msg_type,
504 GLuint target_cache,
505 GLuint msg_length,
506 GLuint response_length,
507 GLuint end_of_thread )
508 {
509 struct intel_context *intel = &brw->intel;
510 brw_set_src1(insn, brw_imm_d(0));
511
512 if (intel->gen == 5) {
513 insn->bits3.dp_read_gen5.binding_table_index = binding_table_index;
514 insn->bits3.dp_read_gen5.msg_control = msg_control;
515 insn->bits3.dp_read_gen5.msg_type = msg_type;
516 insn->bits3.dp_read_gen5.target_cache = target_cache;
517 insn->bits3.dp_read_gen5.header_present = 1;
518 insn->bits3.dp_read_gen5.response_length = response_length;
519 insn->bits3.dp_read_gen5.msg_length = msg_length;
520 insn->bits3.dp_read_gen5.pad1 = 0;
521 insn->bits3.dp_read_gen5.end_of_thread = end_of_thread;
522 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ;
523 insn->bits2.send_gen5.end_of_thread = end_of_thread;
524 } else {
525 insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
526 insn->bits3.dp_read.msg_control = msg_control; /*8:11*/
527 insn->bits3.dp_read.msg_type = msg_type; /*12:13*/
528 insn->bits3.dp_read.target_cache = target_cache; /*14:15*/
529 insn->bits3.dp_read.response_length = response_length; /*16:19*/
530 insn->bits3.dp_read.msg_length = msg_length; /*20:23*/
531 insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/
532 insn->bits3.dp_read.pad1 = 0; /*28:30*/
533 insn->bits3.dp_read.end_of_thread = end_of_thread; /*31*/
534 }
535 }
536
537 static void brw_set_sampler_message(struct brw_context *brw,
538 struct brw_instruction *insn,
539 GLuint binding_table_index,
540 GLuint sampler,
541 GLuint msg_type,
542 GLuint response_length,
543 GLuint msg_length,
544 GLboolean eot,
545 GLuint header_present,
546 GLuint simd_mode)
547 {
548 struct intel_context *intel = &brw->intel;
549 assert(eot == 0);
550 brw_set_src1(insn, brw_imm_d(0));
551
552 if (intel->gen >= 5) {
553 insn->bits3.sampler_gen5.binding_table_index = binding_table_index;
554 insn->bits3.sampler_gen5.sampler = sampler;
555 insn->bits3.sampler_gen5.msg_type = msg_type;
556 insn->bits3.sampler_gen5.simd_mode = simd_mode;
557 insn->bits3.sampler_gen5.header_present = header_present;
558 insn->bits3.sampler_gen5.response_length = response_length;
559 insn->bits3.sampler_gen5.msg_length = msg_length;
560 insn->bits3.sampler_gen5.end_of_thread = eot;
561 if (intel->gen >= 6)
562 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_SAMPLER;
563 else {
564 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_SAMPLER;
565 insn->bits2.send_gen5.end_of_thread = eot;
566 }
567 } else if (intel->is_g4x) {
568 insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
569 insn->bits3.sampler_g4x.sampler = sampler;
570 insn->bits3.sampler_g4x.msg_type = msg_type;
571 insn->bits3.sampler_g4x.response_length = response_length;
572 insn->bits3.sampler_g4x.msg_length = msg_length;
573 insn->bits3.sampler_g4x.end_of_thread = eot;
574 insn->bits3.sampler_g4x.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
575 } else {
576 insn->bits3.sampler.binding_table_index = binding_table_index;
577 insn->bits3.sampler.sampler = sampler;
578 insn->bits3.sampler.msg_type = msg_type;
579 insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
580 insn->bits3.sampler.response_length = response_length;
581 insn->bits3.sampler.msg_length = msg_length;
582 insn->bits3.sampler.end_of_thread = eot;
583 insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
584 }
585 }
586
587
588
589 static struct brw_instruction *next_insn( struct brw_compile *p,
590 GLuint opcode )
591 {
592 struct brw_instruction *insn;
593
594 assert(p->nr_insn + 1 < BRW_EU_MAX_INSN);
595
596 insn = &p->store[p->nr_insn++];
597 memcpy(insn, p->current, sizeof(*insn));
598
599 /* Reset this one-shot flag:
600 */
601
602 if (p->current->header.destreg__conditionalmod) {
603 p->current->header.destreg__conditionalmod = 0;
604 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
605 }
606
607 insn->header.opcode = opcode;
608 return insn;
609 }
610
611
612 static struct brw_instruction *brw_alu1( struct brw_compile *p,
613 GLuint opcode,
614 struct brw_reg dest,
615 struct brw_reg src )
616 {
617 struct brw_instruction *insn = next_insn(p, opcode);
618 brw_set_dest(insn, dest);
619 brw_set_src0(insn, src);
620 return insn;
621 }
622
623 static struct brw_instruction *brw_alu2(struct brw_compile *p,
624 GLuint opcode,
625 struct brw_reg dest,
626 struct brw_reg src0,
627 struct brw_reg src1 )
628 {
629 struct brw_instruction *insn = next_insn(p, opcode);
630 brw_set_dest(insn, dest);
631 brw_set_src0(insn, src0);
632 brw_set_src1(insn, src1);
633 return insn;
634 }
635
636
637 /***********************************************************************
638 * Convenience routines.
639 */
640 #define ALU1(OP) \
641 struct brw_instruction *brw_##OP(struct brw_compile *p, \
642 struct brw_reg dest, \
643 struct brw_reg src0) \
644 { \
645 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
646 }
647
648 #define ALU2(OP) \
649 struct brw_instruction *brw_##OP(struct brw_compile *p, \
650 struct brw_reg dest, \
651 struct brw_reg src0, \
652 struct brw_reg src1) \
653 { \
654 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
655 }
656
657 /* Rounding operations (other than RNDD) require two instructions - the first
658 * stores a rounded value (possibly the wrong way) in the dest register, but
659 * also sets a per-channel "increment bit" in the flag register. A predicated
660 * add of 1.0 fixes dest to contain the desired result.
661 */
662 #define ROUND(OP) \
663 void brw_##OP(struct brw_compile *p, \
664 struct brw_reg dest, \
665 struct brw_reg src) \
666 { \
667 struct brw_instruction *rnd, *add; \
668 rnd = next_insn(p, BRW_OPCODE_##OP); \
669 brw_set_dest(rnd, dest); \
670 brw_set_src0(rnd, src); \
671 rnd->header.destreg__conditionalmod = 0x7; /* turn on round-increments */ \
672 \
673 add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \
674 add->header.predicate_control = BRW_PREDICATE_NORMAL; \
675 }
676
677
678 ALU1(MOV)
679 ALU2(SEL)
680 ALU1(NOT)
681 ALU2(AND)
682 ALU2(OR)
683 ALU2(XOR)
684 ALU2(SHR)
685 ALU2(SHL)
686 ALU2(RSR)
687 ALU2(RSL)
688 ALU2(ASR)
689 ALU1(FRC)
690 ALU1(RNDD)
691 ALU2(MAC)
692 ALU2(MACH)
693 ALU1(LZD)
694 ALU2(DP4)
695 ALU2(DPH)
696 ALU2(DP3)
697 ALU2(DP2)
698 ALU2(LINE)
699 ALU2(PLN)
700
701
702 ROUND(RNDZ)
703 ROUND(RNDE)
704
705
706 struct brw_instruction *brw_ADD(struct brw_compile *p,
707 struct brw_reg dest,
708 struct brw_reg src0,
709 struct brw_reg src1)
710 {
711 /* 6.2.2: add */
712 if (src0.type == BRW_REGISTER_TYPE_F ||
713 (src0.file == BRW_IMMEDIATE_VALUE &&
714 src0.type == BRW_REGISTER_TYPE_VF)) {
715 assert(src1.type != BRW_REGISTER_TYPE_UD);
716 assert(src1.type != BRW_REGISTER_TYPE_D);
717 }
718
719 if (src1.type == BRW_REGISTER_TYPE_F ||
720 (src1.file == BRW_IMMEDIATE_VALUE &&
721 src1.type == BRW_REGISTER_TYPE_VF)) {
722 assert(src0.type != BRW_REGISTER_TYPE_UD);
723 assert(src0.type != BRW_REGISTER_TYPE_D);
724 }
725
726 return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1);
727 }
728
729 struct brw_instruction *brw_MUL(struct brw_compile *p,
730 struct brw_reg dest,
731 struct brw_reg src0,
732 struct brw_reg src1)
733 {
734 /* 6.32.38: mul */
735 if (src0.type == BRW_REGISTER_TYPE_D ||
736 src0.type == BRW_REGISTER_TYPE_UD ||
737 src1.type == BRW_REGISTER_TYPE_D ||
738 src1.type == BRW_REGISTER_TYPE_UD) {
739 assert(dest.type != BRW_REGISTER_TYPE_F);
740 }
741
742 if (src0.type == BRW_REGISTER_TYPE_F ||
743 (src0.file == BRW_IMMEDIATE_VALUE &&
744 src0.type == BRW_REGISTER_TYPE_VF)) {
745 assert(src1.type != BRW_REGISTER_TYPE_UD);
746 assert(src1.type != BRW_REGISTER_TYPE_D);
747 }
748
749 if (src1.type == BRW_REGISTER_TYPE_F ||
750 (src1.file == BRW_IMMEDIATE_VALUE &&
751 src1.type == BRW_REGISTER_TYPE_VF)) {
752 assert(src0.type != BRW_REGISTER_TYPE_UD);
753 assert(src0.type != BRW_REGISTER_TYPE_D);
754 }
755
756 assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE ||
757 src0.nr != BRW_ARF_ACCUMULATOR);
758 assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE ||
759 src1.nr != BRW_ARF_ACCUMULATOR);
760
761 return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1);
762 }
763
764
765 void brw_NOP(struct brw_compile *p)
766 {
767 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);
768 brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
769 brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
770 brw_set_src1(insn, brw_imm_ud(0x0));
771 }
772
773
774
775
776
777 /***********************************************************************
778 * Comparisons, if/else/endif
779 */
780
781 struct brw_instruction *brw_JMPI(struct brw_compile *p,
782 struct brw_reg dest,
783 struct brw_reg src0,
784 struct brw_reg src1)
785 {
786 struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
787
788 insn->header.execution_size = 1;
789 insn->header.compression_control = BRW_COMPRESSION_NONE;
790 insn->header.mask_control = BRW_MASK_DISABLE;
791
792 p->current->header.predicate_control = BRW_PREDICATE_NONE;
793
794 return insn;
795 }
796
797 /* EU takes the value from the flag register and pushes it onto some
798 * sort of a stack (presumably merging with any flag value already on
799 * the stack). Within an if block, the flags at the top of the stack
800 * control execution on each channel of the unit, eg. on each of the
801 * 16 pixel values in our wm programs.
802 *
803 * When the matching 'else' instruction is reached (presumably by
804 * countdown of the instruction count patched in by our ELSE/ENDIF
805 * functions), the relevent flags are inverted.
806 *
807 * When the matching 'endif' instruction is reached, the flags are
808 * popped off. If the stack is now empty, normal execution resumes.
809 *
810 * No attempt is made to deal with stack overflow (14 elements?).
811 */
812 struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size)
813 {
814 struct intel_context *intel = &p->brw->intel;
815 struct brw_instruction *insn;
816
817 if (p->single_program_flow) {
818 assert(execute_size == BRW_EXECUTE_1);
819
820 insn = next_insn(p, BRW_OPCODE_ADD);
821 insn->header.predicate_inverse = 1;
822 } else {
823 insn = next_insn(p, BRW_OPCODE_IF);
824 }
825
826 /* Override the defaults for this instruction:
827 */
828 if (intel->gen < 6) {
829 brw_set_dest(insn, brw_ip_reg());
830 brw_set_src0(insn, brw_ip_reg());
831 brw_set_src1(insn, brw_imm_d(0x0));
832 } else {
833 brw_set_dest(insn, brw_imm_w(0));
834 brw_set_src0(insn, brw_null_reg());
835 brw_set_src1(insn, brw_null_reg());
836 }
837
838 insn->header.execution_size = execute_size;
839 insn->header.compression_control = BRW_COMPRESSION_NONE;
840 insn->header.predicate_control = BRW_PREDICATE_NORMAL;
841 insn->header.mask_control = BRW_MASK_ENABLE;
842 if (!p->single_program_flow)
843 insn->header.thread_control = BRW_THREAD_SWITCH;
844
845 p->current->header.predicate_control = BRW_PREDICATE_NONE;
846
847 return insn;
848 }
849
850
851 struct brw_instruction *brw_ELSE(struct brw_compile *p,
852 struct brw_instruction *if_insn)
853 {
854 struct intel_context *intel = &p->brw->intel;
855 struct brw_instruction *insn;
856 GLuint br = 1;
857
858 /* jump count is for 64bit data chunk each, so one 128bit
859 instruction requires 2 chunks. */
860 if (intel->gen >= 5)
861 br = 2;
862
863 if (p->single_program_flow) {
864 insn = next_insn(p, BRW_OPCODE_ADD);
865 } else {
866 insn = next_insn(p, BRW_OPCODE_ELSE);
867 }
868
869 if (intel->gen < 6) {
870 brw_set_dest(insn, brw_ip_reg());
871 brw_set_src0(insn, brw_ip_reg());
872 brw_set_src1(insn, brw_imm_d(0x0));
873 } else {
874 brw_set_dest(insn, brw_imm_w(0));
875 brw_set_src0(insn, brw_null_reg());
876 brw_set_src1(insn, brw_null_reg());
877 }
878
879 insn->header.compression_control = BRW_COMPRESSION_NONE;
880 insn->header.execution_size = if_insn->header.execution_size;
881 insn->header.mask_control = BRW_MASK_ENABLE;
882 if (!p->single_program_flow)
883 insn->header.thread_control = BRW_THREAD_SWITCH;
884
885 /* Patch the if instruction to point at this instruction.
886 */
887 if (p->single_program_flow) {
888 assert(if_insn->header.opcode == BRW_OPCODE_ADD);
889
890 if_insn->bits3.ud = (insn - if_insn + 1) * 16;
891 } else {
892 assert(if_insn->header.opcode == BRW_OPCODE_IF);
893
894 if (intel->gen < 6) {
895 if_insn->bits3.if_else.jump_count = br * (insn - if_insn);
896 if_insn->bits3.if_else.pop_count = 0;
897 if_insn->bits3.if_else.pad0 = 0;
898 } else {
899 if_insn->bits1.branch_gen6.jump_count = br * (insn - if_insn + 1);
900 }
901 }
902
903 return insn;
904 }
905
906 void brw_ENDIF(struct brw_compile *p,
907 struct brw_instruction *patch_insn)
908 {
909 struct intel_context *intel = &p->brw->intel;
910 GLuint br = 1;
911
912 if (intel->gen >= 5)
913 br = 2;
914
915 if (p->single_program_flow) {
916 /* In single program flow mode, there's no need to execute an ENDIF,
917 * since we don't need to do any stack operations, and if we're executing
918 * currently, we want to just continue executing.
919 */
920 struct brw_instruction *next = &p->store[p->nr_insn];
921
922 assert(patch_insn->header.opcode == BRW_OPCODE_ADD);
923
924 patch_insn->bits3.ud = (next - patch_insn) * 16;
925 } else {
926 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF);
927
928 if (intel->gen < 6) {
929 brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
930 brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
931 brw_set_src1(insn, brw_imm_d(0x0));
932 } else {
933 brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_W));
934 brw_set_src0(insn, brw_null_reg());
935 brw_set_src1(insn, brw_null_reg());
936 }
937
938 insn->header.compression_control = BRW_COMPRESSION_NONE;
939 insn->header.execution_size = patch_insn->header.execution_size;
940 insn->header.mask_control = BRW_MASK_ENABLE;
941 insn->header.thread_control = BRW_THREAD_SWITCH;
942
943 assert(patch_insn->bits3.if_else.jump_count == 0);
944
945 /* Patch the if or else instructions to point at this or the next
946 * instruction respectively.
947 */
948 if (patch_insn->header.opcode == BRW_OPCODE_IF) {
949 if (intel->gen < 6) {
950 /* Turn it into an IFF, which means no mask stack operations for
951 * all-false and jumping past the ENDIF.
952 */
953 patch_insn->header.opcode = BRW_OPCODE_IFF;
954 patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1);
955 patch_insn->bits3.if_else.pop_count = 0;
956 patch_insn->bits3.if_else.pad0 = 0;
957 } else {
958 /* As of gen6, there is no IFF and IF must point to the ENDIF. */
959 patch_insn->bits1.branch_gen6.jump_count = br * (insn - patch_insn);
960 }
961 } else {
962 assert(patch_insn->header.opcode == BRW_OPCODE_ELSE);
963 if (intel->gen < 6) {
964 /* BRW_OPCODE_ELSE pre-gen6 should point just past the
965 * matching ENDIF.
966 */
967 patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1);
968 patch_insn->bits3.if_else.pop_count = 1;
969 patch_insn->bits3.if_else.pad0 = 0;
970 } else {
971 /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
972 patch_insn->bits1.branch_gen6.jump_count = br * (insn - patch_insn);
973 }
974 }
975
976 /* Also pop item off the stack in the endif instruction:
977 */
978 if (intel->gen < 6) {
979 insn->bits3.if_else.jump_count = 0;
980 insn->bits3.if_else.pop_count = 1;
981 insn->bits3.if_else.pad0 = 0;
982 } else {
983 insn->bits1.branch_gen6.jump_count = 2;
984 }
985 }
986 }
987
988 struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count)
989 {
990 struct brw_instruction *insn;
991 insn = next_insn(p, BRW_OPCODE_BREAK);
992 brw_set_dest(insn, brw_ip_reg());
993 brw_set_src0(insn, brw_ip_reg());
994 brw_set_src1(insn, brw_imm_d(0x0));
995 insn->header.compression_control = BRW_COMPRESSION_NONE;
996 insn->header.execution_size = BRW_EXECUTE_8;
997 /* insn->header.mask_control = BRW_MASK_DISABLE; */
998 insn->bits3.if_else.pad0 = 0;
999 insn->bits3.if_else.pop_count = pop_count;
1000 return insn;
1001 }
1002
1003 struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count)
1004 {
1005 struct brw_instruction *insn;
1006 insn = next_insn(p, BRW_OPCODE_CONTINUE);
1007 brw_set_dest(insn, brw_ip_reg());
1008 brw_set_src0(insn, brw_ip_reg());
1009 brw_set_src1(insn, brw_imm_d(0x0));
1010 insn->header.compression_control = BRW_COMPRESSION_NONE;
1011 insn->header.execution_size = BRW_EXECUTE_8;
1012 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1013 insn->bits3.if_else.pad0 = 0;
1014 insn->bits3.if_else.pop_count = pop_count;
1015 return insn;
1016 }
1017
1018 /* DO/WHILE loop:
1019 */
1020 struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
1021 {
1022 if (p->single_program_flow) {
1023 return &p->store[p->nr_insn];
1024 } else {
1025 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
1026
1027 /* Override the defaults for this instruction:
1028 */
1029 brw_set_dest(insn, brw_null_reg());
1030 brw_set_src0(insn, brw_null_reg());
1031 brw_set_src1(insn, brw_null_reg());
1032
1033 insn->header.compression_control = BRW_COMPRESSION_NONE;
1034 insn->header.execution_size = execute_size;
1035 insn->header.predicate_control = BRW_PREDICATE_NONE;
1036 /* insn->header.mask_control = BRW_MASK_ENABLE; */
1037 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1038
1039 return insn;
1040 }
1041 }
1042
1043
1044
1045 struct brw_instruction *brw_WHILE(struct brw_compile *p,
1046 struct brw_instruction *do_insn)
1047 {
1048 struct intel_context *intel = &p->brw->intel;
1049 struct brw_instruction *insn;
1050 GLuint br = 1;
1051
1052 if (intel->gen >= 5)
1053 br = 2;
1054
1055 if (p->single_program_flow)
1056 insn = next_insn(p, BRW_OPCODE_ADD);
1057 else
1058 insn = next_insn(p, BRW_OPCODE_WHILE);
1059
1060 brw_set_dest(insn, brw_ip_reg());
1061 brw_set_src0(insn, brw_ip_reg());
1062 brw_set_src1(insn, brw_imm_d(0x0));
1063
1064 insn->header.compression_control = BRW_COMPRESSION_NONE;
1065
1066 if (p->single_program_flow) {
1067 insn->header.execution_size = BRW_EXECUTE_1;
1068
1069 insn->bits3.d = (do_insn - insn) * 16;
1070 } else {
1071 insn->header.execution_size = do_insn->header.execution_size;
1072
1073 assert(do_insn->header.opcode == BRW_OPCODE_DO);
1074 insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
1075 insn->bits3.if_else.pop_count = 0;
1076 insn->bits3.if_else.pad0 = 0;
1077 }
1078
1079 /* insn->header.mask_control = BRW_MASK_ENABLE; */
1080
1081 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1082 p->current->header.predicate_control = BRW_PREDICATE_NONE;
1083 return insn;
1084 }
1085
1086
1087 /* FORWARD JUMPS:
1088 */
1089 void brw_land_fwd_jump(struct brw_compile *p,
1090 struct brw_instruction *jmp_insn)
1091 {
1092 struct intel_context *intel = &p->brw->intel;
1093 struct brw_instruction *landing = &p->store[p->nr_insn];
1094 GLuint jmpi = 1;
1095
1096 if (intel->gen >= 5)
1097 jmpi = 2;
1098
1099 assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
1100 assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE);
1101
1102 jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1);
1103 }
1104
1105
1106
1107 /* To integrate with the above, it makes sense that the comparison
1108 * instruction should populate the flag register. It might be simpler
1109 * just to use the flag reg for most WM tasks?
1110 */
1111 void brw_CMP(struct brw_compile *p,
1112 struct brw_reg dest,
1113 GLuint conditional,
1114 struct brw_reg src0,
1115 struct brw_reg src1)
1116 {
1117 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
1118
1119 insn->header.destreg__conditionalmod = conditional;
1120 brw_set_dest(insn, dest);
1121 brw_set_src0(insn, src0);
1122 brw_set_src1(insn, src1);
1123
1124 /* guess_execution_size(insn, src0); */
1125
1126
1127 /* Make it so that future instructions will use the computed flag
1128 * value until brw_set_predicate_control_flag_value() is called
1129 * again.
1130 */
1131 if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
1132 dest.nr == 0) {
1133 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
1134 p->flag_value = 0xff;
1135 }
1136 }
1137
1138 /* Issue 'wait' instruction for n1, host could program MMIO
1139 to wake up thread. */
1140 void brw_WAIT (struct brw_compile *p)
1141 {
1142 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_WAIT);
1143 struct brw_reg src = brw_notification_1_reg();
1144
1145 brw_set_dest(insn, src);
1146 brw_set_src0(insn, src);
1147 brw_set_src1(insn, brw_null_reg());
1148 insn->header.execution_size = 0; /* must */
1149 insn->header.predicate_control = 0;
1150 insn->header.compression_control = 0;
1151 }
1152
1153
1154 /***********************************************************************
1155 * Helpers for the various SEND message types:
1156 */
1157
1158 /** Extended math function, float[8].
1159 */
1160 void brw_math( struct brw_compile *p,
1161 struct brw_reg dest,
1162 GLuint function,
1163 GLuint saturate,
1164 GLuint msg_reg_nr,
1165 struct brw_reg src,
1166 GLuint data_type,
1167 GLuint precision )
1168 {
1169 struct intel_context *intel = &p->brw->intel;
1170
1171 if (intel->gen >= 6) {
1172 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
1173
1174 assert(dest.file == BRW_GENERAL_REGISTER_FILE);
1175 assert(src.file == BRW_GENERAL_REGISTER_FILE);
1176
1177 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1178 assert(src.hstride == BRW_HORIZONTAL_STRIDE_1);
1179
1180 if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT &&
1181 function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1182 assert(src.type == BRW_REGISTER_TYPE_F);
1183 }
1184
1185 /* Math is the same ISA format as other opcodes, except that CondModifier
1186 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1187 */
1188 insn->header.destreg__conditionalmod = function;
1189
1190 brw_set_dest(insn, dest);
1191 brw_set_src0(insn, src);
1192 brw_set_src1(insn, brw_null_reg());
1193 } else {
1194 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1195 GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
1196 GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
1197 /* Example code doesn't set predicate_control for send
1198 * instructions.
1199 */
1200 insn->header.predicate_control = 0;
1201 insn->header.destreg__conditionalmod = msg_reg_nr;
1202
1203 brw_set_dest(insn, dest);
1204 brw_set_src0(insn, src);
1205 brw_set_math_message(p->brw,
1206 insn,
1207 msg_length, response_length,
1208 function,
1209 BRW_MATH_INTEGER_UNSIGNED,
1210 precision,
1211 saturate,
1212 data_type);
1213 }
1214 }
1215
1216 /** Extended math function, float[8].
1217 */
1218 void brw_math2(struct brw_compile *p,
1219 struct brw_reg dest,
1220 GLuint function,
1221 struct brw_reg src0,
1222 struct brw_reg src1)
1223 {
1224 struct intel_context *intel = &p->brw->intel;
1225 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
1226
1227 assert(intel->gen >= 6);
1228 (void) intel;
1229
1230
1231 assert(dest.file == BRW_GENERAL_REGISTER_FILE);
1232 assert(src0.file == BRW_GENERAL_REGISTER_FILE);
1233 assert(src1.file == BRW_GENERAL_REGISTER_FILE);
1234
1235 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1236 assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1);
1237 assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1);
1238
1239 if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT &&
1240 function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1241 assert(src0.type == BRW_REGISTER_TYPE_F);
1242 assert(src1.type == BRW_REGISTER_TYPE_F);
1243 }
1244
1245 /* Math is the same ISA format as other opcodes, except that CondModifier
1246 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1247 */
1248 insn->header.destreg__conditionalmod = function;
1249
1250 brw_set_dest(insn, dest);
1251 brw_set_src0(insn, src0);
1252 brw_set_src1(insn, src1);
1253 }
1254
1255 /**
1256 * Extended math function, float[16].
1257 * Use 2 send instructions.
1258 */
1259 void brw_math_16( struct brw_compile *p,
1260 struct brw_reg dest,
1261 GLuint function,
1262 GLuint saturate,
1263 GLuint msg_reg_nr,
1264 struct brw_reg src,
1265 GLuint precision )
1266 {
1267 struct intel_context *intel = &p->brw->intel;
1268 struct brw_instruction *insn;
1269 GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
1270 GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
1271
1272 if (intel->gen >= 6) {
1273 insn = next_insn(p, BRW_OPCODE_MATH);
1274
1275 /* Math is the same ISA format as other opcodes, except that CondModifier
1276 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1277 */
1278 insn->header.destreg__conditionalmod = function;
1279
1280 brw_set_dest(insn, dest);
1281 brw_set_src0(insn, src);
1282 brw_set_src1(insn, brw_null_reg());
1283 return;
1284 }
1285
1286 /* First instruction:
1287 */
1288 brw_push_insn_state(p);
1289 brw_set_predicate_control_flag_value(p, 0xff);
1290 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1291
1292 insn = next_insn(p, BRW_OPCODE_SEND);
1293 insn->header.destreg__conditionalmod = msg_reg_nr;
1294
1295 brw_set_dest(insn, dest);
1296 brw_set_src0(insn, src);
1297 brw_set_math_message(p->brw,
1298 insn,
1299 msg_length, response_length,
1300 function,
1301 BRW_MATH_INTEGER_UNSIGNED,
1302 precision,
1303 saturate,
1304 BRW_MATH_DATA_VECTOR);
1305
1306 /* Second instruction:
1307 */
1308 insn = next_insn(p, BRW_OPCODE_SEND);
1309 insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
1310 insn->header.destreg__conditionalmod = msg_reg_nr+1;
1311
1312 brw_set_dest(insn, offset(dest,1));
1313 brw_set_src0(insn, src);
1314 brw_set_math_message(p->brw,
1315 insn,
1316 msg_length, response_length,
1317 function,
1318 BRW_MATH_INTEGER_UNSIGNED,
1319 precision,
1320 saturate,
1321 BRW_MATH_DATA_VECTOR);
1322
1323 brw_pop_insn_state(p);
1324 }
1325
1326
1327 /**
1328 * Write block of 16 dwords/floats to the data port Render Cache scratch buffer.
1329 * Scratch offset should be a multiple of 64.
1330 * Used for register spilling.
1331 */
1332 void brw_dp_WRITE_16( struct brw_compile *p,
1333 struct brw_reg src,
1334 GLuint scratch_offset )
1335 {
1336 struct intel_context *intel = &p->brw->intel;
1337 GLuint msg_reg_nr = 1;
1338 {
1339 brw_push_insn_state(p);
1340 brw_set_mask_control(p, BRW_MASK_DISABLE);
1341 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1342
1343 /* set message header global offset field (reg 0, element 2) */
1344 brw_MOV(p,
1345 retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
1346 brw_imm_d(scratch_offset));
1347
1348 brw_pop_insn_state(p);
1349 }
1350
1351 {
1352 GLuint msg_length = 3;
1353 struct brw_reg dest;
1354 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1355 int send_commit_msg;
1356
1357 insn->header.predicate_control = 0; /* XXX */
1358 insn->header.compression_control = BRW_COMPRESSION_NONE;
1359 insn->header.destreg__conditionalmod = msg_reg_nr;
1360
1361 /* Until gen6, writes followed by reads from the same location
1362 * are not guaranteed to be ordered unless write_commit is set.
1363 * If set, then a no-op write is issued to the destination
1364 * register to set a dependency, and a read from the destination
1365 * can be used to ensure the ordering.
1366 *
1367 * For gen6, only writes between different threads need ordering
1368 * protection. Our use of DP writes is all about register
1369 * spilling within a thread.
1370 */
1371 if (intel->gen >= 6) {
1372 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
1373 send_commit_msg = 0;
1374 } else {
1375 dest = brw_uw16_grf(0, 0);
1376 send_commit_msg = 1;
1377 }
1378
1379 brw_set_dest(insn, dest);
1380 brw_set_src0(insn, src);
1381
1382 brw_set_dp_write_message(p->brw,
1383 insn,
1384 255, /* binding table index (255=stateless) */
1385 BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */
1386 BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */
1387 msg_length,
1388 GL_TRUE, /* header_present */
1389 0, /* pixel scoreboard */
1390 send_commit_msg, /* response_length */
1391 0, /* eot */
1392 send_commit_msg);
1393 }
1394 }
1395
1396
1397 /**
1398 * Read block of 16 dwords/floats from the data port Render Cache scratch buffer.
1399 * Scratch offset should be a multiple of 64.
1400 * Used for register spilling.
1401 */
1402 void brw_dp_READ_16( struct brw_compile *p,
1403 struct brw_reg dest,
1404 GLuint scratch_offset )
1405 {
1406 GLuint msg_reg_nr = 1;
1407 {
1408 brw_push_insn_state(p);
1409 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1410 brw_set_mask_control(p, BRW_MASK_DISABLE);
1411
1412 /* set message header global offset field (reg 0, element 2) */
1413 brw_MOV(p,
1414 retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
1415 brw_imm_d(scratch_offset));
1416
1417 brw_pop_insn_state(p);
1418 }
1419
1420 {
1421 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1422
1423 insn->header.predicate_control = 0; /* XXX */
1424 insn->header.compression_control = BRW_COMPRESSION_NONE;
1425 insn->header.destreg__conditionalmod = msg_reg_nr;
1426
1427 brw_set_dest(insn, dest); /* UW? */
1428 brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW));
1429
1430 brw_set_dp_read_message(p->brw,
1431 insn,
1432 255, /* binding table index (255=stateless) */
1433 BRW_DATAPORT_OWORD_BLOCK_4_OWORDS,
1434 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1435 1, /* target cache (render/scratch) */
1436 1, /* msg_length */
1437 2, /* response_length */
1438 0); /* eot */
1439 }
1440 }
1441
1442
1443 /**
1444 * Read a float[4] vector from the data port Data Cache (const buffer).
1445 * Location (in buffer) should be a multiple of 16.
1446 * Used for fetching shader constants.
1447 * If relAddr is true, we'll do an indirect fetch using the address register.
1448 */
1449 void brw_dp_READ_4( struct brw_compile *p,
1450 struct brw_reg dest,
1451 GLboolean relAddr,
1452 GLuint location,
1453 GLuint bind_table_index )
1454 {
1455 /* XXX: relAddr not implemented */
1456 GLuint msg_reg_nr = 1;
1457 {
1458 struct brw_reg b;
1459 brw_push_insn_state(p);
1460 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1461 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1462 brw_set_mask_control(p, BRW_MASK_DISABLE);
1463
1464 /* Setup MRF[1] with location/offset into const buffer */
1465 b = brw_message_reg(msg_reg_nr);
1466 b = retype(b, BRW_REGISTER_TYPE_UD);
1467 /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
1468 * when the docs say only dword[2] should be set. Hmmm. But it works.
1469 */
1470 brw_MOV(p, b, brw_imm_ud(location));
1471 brw_pop_insn_state(p);
1472 }
1473
1474 {
1475 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1476
1477 insn->header.predicate_control = BRW_PREDICATE_NONE;
1478 insn->header.compression_control = BRW_COMPRESSION_NONE;
1479 insn->header.destreg__conditionalmod = msg_reg_nr;
1480 insn->header.mask_control = BRW_MASK_DISABLE;
1481
1482 /* cast dest to a uword[8] vector */
1483 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
1484
1485 brw_set_dest(insn, dest);
1486 brw_set_src0(insn, brw_null_reg());
1487
1488 brw_set_dp_read_message(p->brw,
1489 insn,
1490 bind_table_index,
1491 0, /* msg_control (0 means 1 Oword) */
1492 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1493 0, /* source cache = data cache */
1494 1, /* msg_length */
1495 1, /* response_length (1 Oword) */
1496 0); /* eot */
1497 }
1498 }
1499
1500
1501 /**
1502 * Read float[4] constant(s) from VS constant buffer.
1503 * For relative addressing, two float[4] constants will be read into 'dest'.
1504 * Otherwise, one float[4] constant will be read into the lower half of 'dest'.
1505 */
1506 void brw_dp_READ_4_vs(struct brw_compile *p,
1507 struct brw_reg dest,
1508 GLuint location,
1509 GLuint bind_table_index)
1510 {
1511 struct brw_instruction *insn;
1512 GLuint msg_reg_nr = 1;
1513 struct brw_reg b;
1514
1515 /*
1516 printf("vs const read msg, location %u, msg_reg_nr %d\n",
1517 location, msg_reg_nr);
1518 */
1519
1520 /* Setup MRF[1] with location/offset into const buffer */
1521 brw_push_insn_state(p);
1522 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1523 brw_set_mask_control(p, BRW_MASK_DISABLE);
1524 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1525
1526 /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
1527 * when the docs say only dword[2] should be set. Hmmm. But it works.
1528 */
1529 b = brw_message_reg(msg_reg_nr);
1530 b = retype(b, BRW_REGISTER_TYPE_UD);
1531 /*b = get_element_ud(b, 2);*/
1532 brw_MOV(p, b, brw_imm_ud(location));
1533
1534 brw_pop_insn_state(p);
1535
1536 insn = next_insn(p, BRW_OPCODE_SEND);
1537
1538 insn->header.predicate_control = BRW_PREDICATE_NONE;
1539 insn->header.compression_control = BRW_COMPRESSION_NONE;
1540 insn->header.destreg__conditionalmod = msg_reg_nr;
1541 insn->header.mask_control = BRW_MASK_DISABLE;
1542
1543 brw_set_dest(insn, dest);
1544 brw_set_src0(insn, brw_null_reg());
1545
1546 brw_set_dp_read_message(p->brw,
1547 insn,
1548 bind_table_index,
1549 0,
1550 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1551 0, /* source cache = data cache */
1552 1, /* msg_length */
1553 1, /* response_length (1 Oword) */
1554 0); /* eot */
1555 }
1556
1557 /**
1558 * Read a float[4] constant per vertex from VS constant buffer, with
1559 * relative addressing.
1560 */
1561 void brw_dp_READ_4_vs_relative(struct brw_compile *p,
1562 struct brw_reg dest,
1563 struct brw_reg addr_reg,
1564 GLuint offset,
1565 GLuint bind_table_index)
1566 {
1567 struct intel_context *intel = &p->brw->intel;
1568 int msg_type;
1569
1570 /* Setup MRF[1] with offset into const buffer */
1571 brw_push_insn_state(p);
1572 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1573 brw_set_mask_control(p, BRW_MASK_DISABLE);
1574 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1575
1576 /* M1.0 is block offset 0, M1.4 is block offset 1, all other
1577 * fields ignored.
1578 */
1579 brw_ADD(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD),
1580 addr_reg, brw_imm_d(offset));
1581 brw_pop_insn_state(p);
1582
1583 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1584
1585 insn->header.predicate_control = BRW_PREDICATE_NONE;
1586 insn->header.compression_control = BRW_COMPRESSION_NONE;
1587 insn->header.destreg__conditionalmod = 0;
1588 insn->header.mask_control = BRW_MASK_DISABLE;
1589
1590 brw_set_dest(insn, dest);
1591 brw_set_src0(insn, brw_vec8_grf(0, 0));
1592
1593 if (intel->gen == 6)
1594 msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
1595 else if (intel->gen == 5 || intel->is_g4x)
1596 msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
1597 else
1598 msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
1599
1600 brw_set_dp_read_message(p->brw,
1601 insn,
1602 bind_table_index,
1603 BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
1604 msg_type,
1605 0, /* source cache = data cache */
1606 2, /* msg_length */
1607 1, /* response_length */
1608 0); /* eot */
1609 }
1610
1611
1612
1613 void brw_fb_WRITE(struct brw_compile *p,
1614 int dispatch_width,
1615 struct brw_reg dest,
1616 GLuint msg_reg_nr,
1617 struct brw_reg src0,
1618 GLuint binding_table_index,
1619 GLuint msg_length,
1620 GLuint response_length,
1621 GLboolean eot)
1622 {
1623 struct intel_context *intel = &p->brw->intel;
1624 struct brw_instruction *insn;
1625 GLuint msg_control, msg_type;
1626 GLboolean header_present = GL_TRUE;
1627
1628 insn = next_insn(p, BRW_OPCODE_SEND);
1629 insn->header.predicate_control = 0; /* XXX */
1630 insn->header.compression_control = BRW_COMPRESSION_NONE;
1631
1632 if (intel->gen >= 6) {
1633 if (msg_length == 4)
1634 header_present = GL_FALSE;
1635
1636 /* headerless version, just submit color payload */
1637 src0 = brw_message_reg(msg_reg_nr);
1638
1639 msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE_GEN6;
1640 } else {
1641 insn->header.destreg__conditionalmod = msg_reg_nr;
1642
1643 msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
1644 }
1645
1646 if (dispatch_width == 16)
1647 msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
1648 else
1649 msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01;
1650
1651 brw_set_dest(insn, dest);
1652 brw_set_src0(insn, src0);
1653 brw_set_dp_write_message(p->brw,
1654 insn,
1655 binding_table_index,
1656 msg_control,
1657 msg_type,
1658 msg_length,
1659 header_present,
1660 1, /* pixel scoreboard */
1661 response_length,
1662 eot,
1663 0 /* send_commit_msg */);
1664 }
1665
1666
1667 /**
1668 * Texture sample instruction.
1669 * Note: the msg_type plus msg_length values determine exactly what kind
1670 * of sampling operation is performed. See volume 4, page 161 of docs.
1671 */
1672 void brw_SAMPLE(struct brw_compile *p,
1673 struct brw_reg dest,
1674 GLuint msg_reg_nr,
1675 struct brw_reg src0,
1676 GLuint binding_table_index,
1677 GLuint sampler,
1678 GLuint writemask,
1679 GLuint msg_type,
1680 GLuint response_length,
1681 GLuint msg_length,
1682 GLboolean eot,
1683 GLuint header_present,
1684 GLuint simd_mode)
1685 {
1686 struct intel_context *intel = &p->brw->intel;
1687 GLboolean need_stall = 0;
1688
1689 if (writemask == 0) {
1690 /*printf("%s: zero writemask??\n", __FUNCTION__); */
1691 return;
1692 }
1693
1694 /* Hardware doesn't do destination dependency checking on send
1695 * instructions properly. Add a workaround which generates the
1696 * dependency by other means. In practice it seems like this bug
1697 * only crops up for texture samples, and only where registers are
1698 * written by the send and then written again later without being
1699 * read in between. Luckily for us, we already track that
1700 * information and use it to modify the writemask for the
1701 * instruction, so that is a guide for whether a workaround is
1702 * needed.
1703 */
1704 if (writemask != WRITEMASK_XYZW) {
1705 GLuint dst_offset = 0;
1706 GLuint i, newmask = 0, len = 0;
1707
1708 for (i = 0; i < 4; i++) {
1709 if (writemask & (1<<i))
1710 break;
1711 dst_offset += 2;
1712 }
1713 for (; i < 4; i++) {
1714 if (!(writemask & (1<<i)))
1715 break;
1716 newmask |= 1<<i;
1717 len++;
1718 }
1719
1720 if (newmask != writemask) {
1721 need_stall = 1;
1722 /* printf("need stall %x %x\n", newmask , writemask); */
1723 }
1724 else {
1725 GLboolean dispatch_16 = GL_FALSE;
1726
1727 struct brw_reg m1 = brw_message_reg(msg_reg_nr);
1728
1729 guess_execution_size(p->current, dest);
1730 if (p->current->header.execution_size == BRW_EXECUTE_16)
1731 dispatch_16 = GL_TRUE;
1732
1733 newmask = ~newmask & WRITEMASK_XYZW;
1734
1735 brw_push_insn_state(p);
1736
1737 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1738 brw_set_mask_control(p, BRW_MASK_DISABLE);
1739
1740 brw_MOV(p, m1, brw_vec8_grf(0,0));
1741 brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12));
1742
1743 brw_pop_insn_state(p);
1744
1745 src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
1746 dest = offset(dest, dst_offset);
1747
1748 /* For 16-wide dispatch, masked channels are skipped in the
1749 * response. For 8-wide, masked channels still take up slots,
1750 * and are just not written to.
1751 */
1752 if (dispatch_16)
1753 response_length = len * 2;
1754 }
1755 }
1756
1757 {
1758 struct brw_instruction *insn;
1759
1760 /* Sandybridge doesn't have the implied move for SENDs,
1761 * and the first message register index comes from src0.
1762 */
1763 if (intel->gen >= 6) {
1764 brw_push_insn_state(p);
1765 brw_set_mask_control( p, BRW_MASK_DISABLE );
1766 /* m1 contains header? */
1767 brw_MOV(p, brw_message_reg(msg_reg_nr), src0);
1768 brw_pop_insn_state(p);
1769 src0 = brw_message_reg(msg_reg_nr);
1770 }
1771
1772 insn = next_insn(p, BRW_OPCODE_SEND);
1773 insn->header.predicate_control = 0; /* XXX */
1774 insn->header.compression_control = BRW_COMPRESSION_NONE;
1775 if (intel->gen < 6)
1776 insn->header.destreg__conditionalmod = msg_reg_nr;
1777
1778 brw_set_dest(insn, dest);
1779 brw_set_src0(insn, src0);
1780 brw_set_sampler_message(p->brw, insn,
1781 binding_table_index,
1782 sampler,
1783 msg_type,
1784 response_length,
1785 msg_length,
1786 eot,
1787 header_present,
1788 simd_mode);
1789 }
1790
1791 if (need_stall) {
1792 struct brw_reg reg = vec8(offset(dest, response_length-1));
1793
1794 /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 }
1795 */
1796 brw_push_insn_state(p);
1797 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1798 brw_MOV(p, reg, reg);
1799 brw_pop_insn_state(p);
1800 }
1801
1802 }
1803
1804 /* All these variables are pretty confusing - we might be better off
1805 * using bitmasks and macros for this, in the old style. Or perhaps
1806 * just having the caller instantiate the fields in dword3 itself.
1807 */
1808 void brw_urb_WRITE(struct brw_compile *p,
1809 struct brw_reg dest,
1810 GLuint msg_reg_nr,
1811 struct brw_reg src0,
1812 GLboolean allocate,
1813 GLboolean used,
1814 GLuint msg_length,
1815 GLuint response_length,
1816 GLboolean eot,
1817 GLboolean writes_complete,
1818 GLuint offset,
1819 GLuint swizzle)
1820 {
1821 struct intel_context *intel = &p->brw->intel;
1822 struct brw_instruction *insn;
1823
1824 /* Sandybridge doesn't have the implied move for SENDs,
1825 * and the first message register index comes from src0.
1826 */
1827 if (intel->gen >= 6) {
1828 brw_push_insn_state(p);
1829 brw_set_mask_control( p, BRW_MASK_DISABLE );
1830 brw_MOV(p, brw_message_reg(msg_reg_nr), src0);
1831 brw_pop_insn_state(p);
1832 src0 = brw_message_reg(msg_reg_nr);
1833 }
1834
1835 insn = next_insn(p, BRW_OPCODE_SEND);
1836
1837 assert(msg_length < BRW_MAX_MRF);
1838
1839 brw_set_dest(insn, dest);
1840 brw_set_src0(insn, src0);
1841 brw_set_src1(insn, brw_imm_d(0));
1842
1843 if (intel->gen < 6)
1844 insn->header.destreg__conditionalmod = msg_reg_nr;
1845
1846 brw_set_urb_message(p->brw,
1847 insn,
1848 allocate,
1849 used,
1850 msg_length,
1851 response_length,
1852 eot,
1853 writes_complete,
1854 offset,
1855 swizzle);
1856 }
1857
1858 void brw_ff_sync(struct brw_compile *p,
1859 struct brw_reg dest,
1860 GLuint msg_reg_nr,
1861 struct brw_reg src0,
1862 GLboolean allocate,
1863 GLuint response_length,
1864 GLboolean eot)
1865 {
1866 struct intel_context *intel = &p->brw->intel;
1867 struct brw_instruction *insn;
1868
1869 /* Sandybridge doesn't have the implied move for SENDs,
1870 * and the first message register index comes from src0.
1871 */
1872 if (intel->gen >= 6) {
1873 brw_push_insn_state(p);
1874 brw_set_mask_control( p, BRW_MASK_DISABLE );
1875 brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD),
1876 retype(src0, BRW_REGISTER_TYPE_UD));
1877 brw_pop_insn_state(p);
1878 src0 = brw_message_reg(msg_reg_nr);
1879 }
1880
1881 insn = next_insn(p, BRW_OPCODE_SEND);
1882 brw_set_dest(insn, dest);
1883 brw_set_src0(insn, src0);
1884 brw_set_src1(insn, brw_imm_d(0));
1885
1886 if (intel->gen < 6)
1887 insn->header.destreg__conditionalmod = msg_reg_nr;
1888
1889 brw_set_ff_sync_message(p->brw,
1890 insn,
1891 allocate,
1892 response_length,
1893 eot);
1894 }