Merge branch 'lp-offset-twoside'
[mesa.git] / src / mesa / drivers / dri / i965 / brw_eu_emit.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "brw_context.h"
34 #include "brw_defines.h"
35 #include "brw_eu.h"
36
37
38
39
40 /***********************************************************************
41 * Internal helper for constructing instructions
42 */
43
44 static void guess_execution_size( struct brw_instruction *insn,
45 struct brw_reg reg )
46 {
47 if (reg.width == BRW_WIDTH_8 &&
48 insn->header.compression_control == BRW_COMPRESSION_COMPRESSED)
49 insn->header.execution_size = BRW_EXECUTE_16;
50 else
51 insn->header.execution_size = reg.width; /* note - definitions are compatible */
52 }
53
54
55 static void brw_set_dest( struct brw_instruction *insn,
56 struct brw_reg dest )
57 {
58 if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
59 dest.file != BRW_MESSAGE_REGISTER_FILE)
60 assert(dest.nr < 128);
61
62 insn->bits1.da1.dest_reg_file = dest.file;
63 insn->bits1.da1.dest_reg_type = dest.type;
64 insn->bits1.da1.dest_address_mode = dest.address_mode;
65
66 if (dest.address_mode == BRW_ADDRESS_DIRECT) {
67 insn->bits1.da1.dest_reg_nr = dest.nr;
68
69 if (insn->header.access_mode == BRW_ALIGN_1) {
70 insn->bits1.da1.dest_subreg_nr = dest.subnr;
71 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
72 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
73 insn->bits1.da1.dest_horiz_stride = dest.hstride;
74 }
75 else {
76 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
77 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
78 /* even ignored in da16, still need to set as '01' */
79 insn->bits1.da16.dest_horiz_stride = 1;
80 }
81 }
82 else {
83 insn->bits1.ia1.dest_subreg_nr = dest.subnr;
84
85 /* These are different sizes in align1 vs align16:
86 */
87 if (insn->header.access_mode == BRW_ALIGN_1) {
88 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
89 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
90 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
91 insn->bits1.ia1.dest_horiz_stride = dest.hstride;
92 }
93 else {
94 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
95 /* even ignored in da16, still need to set as '01' */
96 insn->bits1.ia16.dest_horiz_stride = 1;
97 }
98 }
99
100 /* NEW: Set the execution size based on dest.width and
101 * insn->compression_control:
102 */
103 guess_execution_size(insn, dest);
104 }
105
106 extern int reg_type_size[];
107
108 static void
109 validate_reg(struct brw_instruction *insn, struct brw_reg reg)
110 {
111 int hstride_for_reg[] = {0, 1, 2, 4};
112 int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
113 int width_for_reg[] = {1, 2, 4, 8, 16};
114 int execsize_for_reg[] = {1, 2, 4, 8, 16};
115 int width, hstride, vstride, execsize;
116
117 if (reg.file == BRW_IMMEDIATE_VALUE) {
118 /* 3.3.6: Region Parameters. Restriction: Immediate vectors
119 * mean the destination has to be 128-bit aligned and the
120 * destination horiz stride has to be a word.
121 */
122 if (reg.type == BRW_REGISTER_TYPE_V) {
123 assert(hstride_for_reg[insn->bits1.da1.dest_horiz_stride] *
124 reg_type_size[insn->bits1.da1.dest_reg_type] == 2);
125 }
126
127 return;
128 }
129
130 if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE &&
131 reg.file == BRW_ARF_NULL)
132 return;
133
134 assert(reg.hstride >= 0 && reg.hstride < Elements(hstride_for_reg));
135 hstride = hstride_for_reg[reg.hstride];
136
137 if (reg.vstride == 0xf) {
138 vstride = -1;
139 } else {
140 assert(reg.vstride >= 0 && reg.vstride < Elements(vstride_for_reg));
141 vstride = vstride_for_reg[reg.vstride];
142 }
143
144 assert(reg.width >= 0 && reg.width < Elements(width_for_reg));
145 width = width_for_reg[reg.width];
146
147 assert(insn->header.execution_size >= 0 &&
148 insn->header.execution_size < Elements(execsize_for_reg));
149 execsize = execsize_for_reg[insn->header.execution_size];
150
151 /* Restrictions from 3.3.10: Register Region Restrictions. */
152 /* 3. */
153 assert(execsize >= width);
154
155 /* 4. */
156 if (execsize == width && hstride != 0) {
157 assert(vstride == -1 || vstride == width * hstride);
158 }
159
160 /* 5. */
161 if (execsize == width && hstride == 0) {
162 /* no restriction on vstride. */
163 }
164
165 /* 6. */
166 if (width == 1) {
167 assert(hstride == 0);
168 }
169
170 /* 7. */
171 if (execsize == 1 && width == 1) {
172 assert(hstride == 0);
173 assert(vstride == 0);
174 }
175
176 /* 8. */
177 if (vstride == 0 && hstride == 0) {
178 assert(width == 1);
179 }
180
181 /* 10. Check destination issues. */
182 }
183
184 static void brw_set_src0( struct brw_instruction *insn,
185 struct brw_reg reg )
186 {
187 if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
188 assert(reg.nr < 128);
189
190 validate_reg(insn, reg);
191
192 insn->bits1.da1.src0_reg_file = reg.file;
193 insn->bits1.da1.src0_reg_type = reg.type;
194 insn->bits2.da1.src0_abs = reg.abs;
195 insn->bits2.da1.src0_negate = reg.negate;
196 insn->bits2.da1.src0_address_mode = reg.address_mode;
197
198 if (reg.file == BRW_IMMEDIATE_VALUE) {
199 insn->bits3.ud = reg.dw1.ud;
200
201 /* Required to set some fields in src1 as well:
202 */
203 insn->bits1.da1.src1_reg_file = 0; /* arf */
204 insn->bits1.da1.src1_reg_type = reg.type;
205 }
206 else
207 {
208 if (reg.address_mode == BRW_ADDRESS_DIRECT) {
209 if (insn->header.access_mode == BRW_ALIGN_1) {
210 insn->bits2.da1.src0_subreg_nr = reg.subnr;
211 insn->bits2.da1.src0_reg_nr = reg.nr;
212 }
213 else {
214 insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
215 insn->bits2.da16.src0_reg_nr = reg.nr;
216 }
217 }
218 else {
219 insn->bits2.ia1.src0_subreg_nr = reg.subnr;
220
221 if (insn->header.access_mode == BRW_ALIGN_1) {
222 insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
223 }
224 else {
225 insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
226 }
227 }
228
229 if (insn->header.access_mode == BRW_ALIGN_1) {
230 if (reg.width == BRW_WIDTH_1 &&
231 insn->header.execution_size == BRW_EXECUTE_1) {
232 insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
233 insn->bits2.da1.src0_width = BRW_WIDTH_1;
234 insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
235 }
236 else {
237 insn->bits2.da1.src0_horiz_stride = reg.hstride;
238 insn->bits2.da1.src0_width = reg.width;
239 insn->bits2.da1.src0_vert_stride = reg.vstride;
240 }
241 }
242 else {
243 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
244 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
245 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
246 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
247
248 /* This is an oddity of the fact we're using the same
249 * descriptions for registers in align_16 as align_1:
250 */
251 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
252 insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
253 else
254 insn->bits2.da16.src0_vert_stride = reg.vstride;
255 }
256 }
257 }
258
259
260 void brw_set_src1( struct brw_instruction *insn,
261 struct brw_reg reg )
262 {
263 assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
264
265 assert(reg.nr < 128);
266
267 validate_reg(insn, reg);
268
269 insn->bits1.da1.src1_reg_file = reg.file;
270 insn->bits1.da1.src1_reg_type = reg.type;
271 insn->bits3.da1.src1_abs = reg.abs;
272 insn->bits3.da1.src1_negate = reg.negate;
273
274 /* Only src1 can be immediate in two-argument instructions.
275 */
276 assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
277
278 if (reg.file == BRW_IMMEDIATE_VALUE) {
279 insn->bits3.ud = reg.dw1.ud;
280 }
281 else {
282 /* This is a hardware restriction, which may or may not be lifted
283 * in the future:
284 */
285 assert (reg.address_mode == BRW_ADDRESS_DIRECT);
286 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
287
288 if (insn->header.access_mode == BRW_ALIGN_1) {
289 insn->bits3.da1.src1_subreg_nr = reg.subnr;
290 insn->bits3.da1.src1_reg_nr = reg.nr;
291 }
292 else {
293 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
294 insn->bits3.da16.src1_reg_nr = reg.nr;
295 }
296
297 if (insn->header.access_mode == BRW_ALIGN_1) {
298 if (reg.width == BRW_WIDTH_1 &&
299 insn->header.execution_size == BRW_EXECUTE_1) {
300 insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
301 insn->bits3.da1.src1_width = BRW_WIDTH_1;
302 insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
303 }
304 else {
305 insn->bits3.da1.src1_horiz_stride = reg.hstride;
306 insn->bits3.da1.src1_width = reg.width;
307 insn->bits3.da1.src1_vert_stride = reg.vstride;
308 }
309 }
310 else {
311 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
312 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
313 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
314 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
315
316 /* This is an oddity of the fact we're using the same
317 * descriptions for registers in align_16 as align_1:
318 */
319 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
320 insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
321 else
322 insn->bits3.da16.src1_vert_stride = reg.vstride;
323 }
324 }
325 }
326
327
328
329 static void brw_set_math_message( struct brw_context *brw,
330 struct brw_instruction *insn,
331 GLuint msg_length,
332 GLuint response_length,
333 GLuint function,
334 GLuint integer_type,
335 GLboolean low_precision,
336 GLboolean saturate,
337 GLuint dataType )
338 {
339 struct intel_context *intel = &brw->intel;
340 brw_set_src1(insn, brw_imm_d(0));
341
342 if (intel->gen == 5) {
343 insn->bits3.math_gen5.function = function;
344 insn->bits3.math_gen5.int_type = integer_type;
345 insn->bits3.math_gen5.precision = low_precision;
346 insn->bits3.math_gen5.saturate = saturate;
347 insn->bits3.math_gen5.data_type = dataType;
348 insn->bits3.math_gen5.snapshot = 0;
349 insn->bits3.math_gen5.header_present = 0;
350 insn->bits3.math_gen5.response_length = response_length;
351 insn->bits3.math_gen5.msg_length = msg_length;
352 insn->bits3.math_gen5.end_of_thread = 0;
353 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_MATH;
354 insn->bits2.send_gen5.end_of_thread = 0;
355 } else {
356 insn->bits3.math.function = function;
357 insn->bits3.math.int_type = integer_type;
358 insn->bits3.math.precision = low_precision;
359 insn->bits3.math.saturate = saturate;
360 insn->bits3.math.data_type = dataType;
361 insn->bits3.math.response_length = response_length;
362 insn->bits3.math.msg_length = msg_length;
363 insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH;
364 insn->bits3.math.end_of_thread = 0;
365 }
366 }
367
368
369 static void brw_set_ff_sync_message(struct brw_context *brw,
370 struct brw_instruction *insn,
371 GLboolean allocate,
372 GLuint response_length,
373 GLboolean end_of_thread)
374 {
375 struct intel_context *intel = &brw->intel;
376 brw_set_src1(insn, brw_imm_d(0));
377
378 insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */
379 insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */
380 insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */
381 insn->bits3.urb_gen5.allocate = allocate;
382 insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */
383 insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */
384 insn->bits3.urb_gen5.header_present = 1;
385 insn->bits3.urb_gen5.response_length = response_length; /* may be 1 or 0 */
386 insn->bits3.urb_gen5.msg_length = 1;
387 insn->bits3.urb_gen5.end_of_thread = end_of_thread;
388 if (intel->gen >= 6) {
389 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB;
390 } else {
391 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB;
392 insn->bits2.send_gen5.end_of_thread = end_of_thread;
393 }
394 }
395
396 static void brw_set_urb_message( struct brw_context *brw,
397 struct brw_instruction *insn,
398 GLboolean allocate,
399 GLboolean used,
400 GLuint msg_length,
401 GLuint response_length,
402 GLboolean end_of_thread,
403 GLboolean complete,
404 GLuint offset,
405 GLuint swizzle_control )
406 {
407 struct intel_context *intel = &brw->intel;
408 brw_set_src1(insn, brw_imm_d(0));
409
410 if (intel->gen >= 5) {
411 insn->bits3.urb_gen5.opcode = 0; /* ? */
412 insn->bits3.urb_gen5.offset = offset;
413 insn->bits3.urb_gen5.swizzle_control = swizzle_control;
414 insn->bits3.urb_gen5.allocate = allocate;
415 insn->bits3.urb_gen5.used = used; /* ? */
416 insn->bits3.urb_gen5.complete = complete;
417 insn->bits3.urb_gen5.header_present = 1;
418 insn->bits3.urb_gen5.response_length = response_length;
419 insn->bits3.urb_gen5.msg_length = msg_length;
420 insn->bits3.urb_gen5.end_of_thread = end_of_thread;
421 if (intel->gen >= 6) {
422 /* For SNB, the SFID bits moved to the condmod bits, and
423 * EOT stayed in bits3 above. Does the EOT bit setting
424 * below on Ironlake even do anything?
425 */
426 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB;
427 } else {
428 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB;
429 insn->bits2.send_gen5.end_of_thread = end_of_thread;
430 }
431 } else {
432 insn->bits3.urb.opcode = 0; /* ? */
433 insn->bits3.urb.offset = offset;
434 insn->bits3.urb.swizzle_control = swizzle_control;
435 insn->bits3.urb.allocate = allocate;
436 insn->bits3.urb.used = used; /* ? */
437 insn->bits3.urb.complete = complete;
438 insn->bits3.urb.response_length = response_length;
439 insn->bits3.urb.msg_length = msg_length;
440 insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB;
441 insn->bits3.urb.end_of_thread = end_of_thread;
442 }
443 }
444
445 static void brw_set_dp_write_message( struct brw_context *brw,
446 struct brw_instruction *insn,
447 GLuint binding_table_index,
448 GLuint msg_control,
449 GLuint msg_type,
450 GLuint msg_length,
451 GLboolean header_present,
452 GLuint pixel_scoreboard_clear,
453 GLuint response_length,
454 GLuint end_of_thread,
455 GLuint send_commit_msg)
456 {
457 struct intel_context *intel = &brw->intel;
458 brw_set_src1(insn, brw_imm_ud(0));
459
460 if (intel->gen >= 6) {
461 insn->bits3.dp_render_cache.binding_table_index = binding_table_index;
462 insn->bits3.dp_render_cache.msg_control = msg_control;
463 insn->bits3.dp_render_cache.pixel_scoreboard_clear = pixel_scoreboard_clear;
464 insn->bits3.dp_render_cache.msg_type = msg_type;
465 insn->bits3.dp_render_cache.send_commit_msg = send_commit_msg;
466 insn->bits3.dp_render_cache.header_present = header_present;
467 insn->bits3.dp_render_cache.response_length = response_length;
468 insn->bits3.dp_render_cache.msg_length = msg_length;
469 insn->bits3.dp_render_cache.end_of_thread = end_of_thread;
470 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
471 /* XXX really need below? */
472 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
473 insn->bits2.send_gen5.end_of_thread = end_of_thread;
474 } else if (intel->gen == 5) {
475 insn->bits3.dp_write_gen5.binding_table_index = binding_table_index;
476 insn->bits3.dp_write_gen5.msg_control = msg_control;
477 insn->bits3.dp_write_gen5.pixel_scoreboard_clear = pixel_scoreboard_clear;
478 insn->bits3.dp_write_gen5.msg_type = msg_type;
479 insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg;
480 insn->bits3.dp_write_gen5.header_present = header_present;
481 insn->bits3.dp_write_gen5.response_length = response_length;
482 insn->bits3.dp_write_gen5.msg_length = msg_length;
483 insn->bits3.dp_write_gen5.end_of_thread = end_of_thread;
484 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
485 insn->bits2.send_gen5.end_of_thread = end_of_thread;
486 } else {
487 insn->bits3.dp_write.binding_table_index = binding_table_index;
488 insn->bits3.dp_write.msg_control = msg_control;
489 insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear;
490 insn->bits3.dp_write.msg_type = msg_type;
491 insn->bits3.dp_write.send_commit_msg = send_commit_msg;
492 insn->bits3.dp_write.response_length = response_length;
493 insn->bits3.dp_write.msg_length = msg_length;
494 insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
495 insn->bits3.dp_write.end_of_thread = end_of_thread;
496 }
497 }
498
499 static void
500 brw_set_dp_read_message(struct brw_context *brw,
501 struct brw_instruction *insn,
502 GLuint binding_table_index,
503 GLuint msg_control,
504 GLuint msg_type,
505 GLuint target_cache,
506 GLuint msg_length,
507 GLuint response_length)
508 {
509 struct intel_context *intel = &brw->intel;
510 brw_set_src1(insn, brw_imm_d(0));
511
512 if (intel->gen >= 6) {
513 insn->bits3.dp_render_cache.binding_table_index = binding_table_index;
514 insn->bits3.dp_render_cache.msg_control = msg_control;
515 insn->bits3.dp_render_cache.pixel_scoreboard_clear = 0;
516 insn->bits3.dp_render_cache.msg_type = msg_type;
517 insn->bits3.dp_render_cache.send_commit_msg = 0;
518 insn->bits3.dp_render_cache.header_present = 1;
519 insn->bits3.dp_render_cache.response_length = response_length;
520 insn->bits3.dp_render_cache.msg_length = msg_length;
521 insn->bits3.dp_render_cache.end_of_thread = 0;
522 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_DATAPORT_READ;
523 /* XXX really need below? */
524 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ;
525 insn->bits2.send_gen5.end_of_thread = 0;
526 } else if (intel->gen == 5) {
527 insn->bits3.dp_read_gen5.binding_table_index = binding_table_index;
528 insn->bits3.dp_read_gen5.msg_control = msg_control;
529 insn->bits3.dp_read_gen5.msg_type = msg_type;
530 insn->bits3.dp_read_gen5.target_cache = target_cache;
531 insn->bits3.dp_read_gen5.header_present = 1;
532 insn->bits3.dp_read_gen5.response_length = response_length;
533 insn->bits3.dp_read_gen5.msg_length = msg_length;
534 insn->bits3.dp_read_gen5.pad1 = 0;
535 insn->bits3.dp_read_gen5.end_of_thread = 0;
536 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ;
537 insn->bits2.send_gen5.end_of_thread = 0;
538 } else {
539 insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
540 insn->bits3.dp_read.msg_control = msg_control; /*8:11*/
541 insn->bits3.dp_read.msg_type = msg_type; /*12:13*/
542 insn->bits3.dp_read.target_cache = target_cache; /*14:15*/
543 insn->bits3.dp_read.response_length = response_length; /*16:19*/
544 insn->bits3.dp_read.msg_length = msg_length; /*20:23*/
545 insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/
546 insn->bits3.dp_read.pad1 = 0; /*28:30*/
547 insn->bits3.dp_read.end_of_thread = 0; /*31*/
548 }
549 }
550
551 static void brw_set_sampler_message(struct brw_context *brw,
552 struct brw_instruction *insn,
553 GLuint binding_table_index,
554 GLuint sampler,
555 GLuint msg_type,
556 GLuint response_length,
557 GLuint msg_length,
558 GLboolean eot,
559 GLuint header_present,
560 GLuint simd_mode)
561 {
562 struct intel_context *intel = &brw->intel;
563 assert(eot == 0);
564 brw_set_src1(insn, brw_imm_d(0));
565
566 if (intel->gen >= 5) {
567 insn->bits3.sampler_gen5.binding_table_index = binding_table_index;
568 insn->bits3.sampler_gen5.sampler = sampler;
569 insn->bits3.sampler_gen5.msg_type = msg_type;
570 insn->bits3.sampler_gen5.simd_mode = simd_mode;
571 insn->bits3.sampler_gen5.header_present = header_present;
572 insn->bits3.sampler_gen5.response_length = response_length;
573 insn->bits3.sampler_gen5.msg_length = msg_length;
574 insn->bits3.sampler_gen5.end_of_thread = eot;
575 if (intel->gen >= 6)
576 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_SAMPLER;
577 else {
578 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_SAMPLER;
579 insn->bits2.send_gen5.end_of_thread = eot;
580 }
581 } else if (intel->is_g4x) {
582 insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
583 insn->bits3.sampler_g4x.sampler = sampler;
584 insn->bits3.sampler_g4x.msg_type = msg_type;
585 insn->bits3.sampler_g4x.response_length = response_length;
586 insn->bits3.sampler_g4x.msg_length = msg_length;
587 insn->bits3.sampler_g4x.end_of_thread = eot;
588 insn->bits3.sampler_g4x.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
589 } else {
590 insn->bits3.sampler.binding_table_index = binding_table_index;
591 insn->bits3.sampler.sampler = sampler;
592 insn->bits3.sampler.msg_type = msg_type;
593 insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
594 insn->bits3.sampler.response_length = response_length;
595 insn->bits3.sampler.msg_length = msg_length;
596 insn->bits3.sampler.end_of_thread = eot;
597 insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
598 }
599 }
600
601
602
603 static struct brw_instruction *next_insn( struct brw_compile *p,
604 GLuint opcode )
605 {
606 struct brw_instruction *insn;
607
608 assert(p->nr_insn + 1 < BRW_EU_MAX_INSN);
609
610 insn = &p->store[p->nr_insn++];
611 memcpy(insn, p->current, sizeof(*insn));
612
613 /* Reset this one-shot flag:
614 */
615
616 if (p->current->header.destreg__conditionalmod) {
617 p->current->header.destreg__conditionalmod = 0;
618 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
619 }
620
621 insn->header.opcode = opcode;
622 return insn;
623 }
624
625
626 static struct brw_instruction *brw_alu1( struct brw_compile *p,
627 GLuint opcode,
628 struct brw_reg dest,
629 struct brw_reg src )
630 {
631 struct brw_instruction *insn = next_insn(p, opcode);
632 brw_set_dest(insn, dest);
633 brw_set_src0(insn, src);
634 return insn;
635 }
636
637 static struct brw_instruction *brw_alu2(struct brw_compile *p,
638 GLuint opcode,
639 struct brw_reg dest,
640 struct brw_reg src0,
641 struct brw_reg src1 )
642 {
643 struct brw_instruction *insn = next_insn(p, opcode);
644 brw_set_dest(insn, dest);
645 brw_set_src0(insn, src0);
646 brw_set_src1(insn, src1);
647 return insn;
648 }
649
650
651 /***********************************************************************
652 * Convenience routines.
653 */
654 #define ALU1(OP) \
655 struct brw_instruction *brw_##OP(struct brw_compile *p, \
656 struct brw_reg dest, \
657 struct brw_reg src0) \
658 { \
659 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
660 }
661
662 #define ALU2(OP) \
663 struct brw_instruction *brw_##OP(struct brw_compile *p, \
664 struct brw_reg dest, \
665 struct brw_reg src0, \
666 struct brw_reg src1) \
667 { \
668 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
669 }
670
671 /* Rounding operations (other than RNDD) require two instructions - the first
672 * stores a rounded value (possibly the wrong way) in the dest register, but
673 * also sets a per-channel "increment bit" in the flag register. A predicated
674 * add of 1.0 fixes dest to contain the desired result.
675 */
676 #define ROUND(OP) \
677 void brw_##OP(struct brw_compile *p, \
678 struct brw_reg dest, \
679 struct brw_reg src) \
680 { \
681 struct brw_instruction *rnd, *add; \
682 rnd = next_insn(p, BRW_OPCODE_##OP); \
683 brw_set_dest(rnd, dest); \
684 brw_set_src0(rnd, src); \
685 rnd->header.destreg__conditionalmod = 0x7; /* turn on round-increments */ \
686 \
687 add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \
688 add->header.predicate_control = BRW_PREDICATE_NORMAL; \
689 }
690
691
692 ALU1(MOV)
693 ALU2(SEL)
694 ALU1(NOT)
695 ALU2(AND)
696 ALU2(OR)
697 ALU2(XOR)
698 ALU2(SHR)
699 ALU2(SHL)
700 ALU2(RSR)
701 ALU2(RSL)
702 ALU2(ASR)
703 ALU1(FRC)
704 ALU1(RNDD)
705 ALU2(MAC)
706 ALU2(MACH)
707 ALU1(LZD)
708 ALU2(DP4)
709 ALU2(DPH)
710 ALU2(DP3)
711 ALU2(DP2)
712 ALU2(LINE)
713 ALU2(PLN)
714
715
716 ROUND(RNDZ)
717 ROUND(RNDE)
718
719
720 struct brw_instruction *brw_ADD(struct brw_compile *p,
721 struct brw_reg dest,
722 struct brw_reg src0,
723 struct brw_reg src1)
724 {
725 /* 6.2.2: add */
726 if (src0.type == BRW_REGISTER_TYPE_F ||
727 (src0.file == BRW_IMMEDIATE_VALUE &&
728 src0.type == BRW_REGISTER_TYPE_VF)) {
729 assert(src1.type != BRW_REGISTER_TYPE_UD);
730 assert(src1.type != BRW_REGISTER_TYPE_D);
731 }
732
733 if (src1.type == BRW_REGISTER_TYPE_F ||
734 (src1.file == BRW_IMMEDIATE_VALUE &&
735 src1.type == BRW_REGISTER_TYPE_VF)) {
736 assert(src0.type != BRW_REGISTER_TYPE_UD);
737 assert(src0.type != BRW_REGISTER_TYPE_D);
738 }
739
740 return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1);
741 }
742
743 struct brw_instruction *brw_MUL(struct brw_compile *p,
744 struct brw_reg dest,
745 struct brw_reg src0,
746 struct brw_reg src1)
747 {
748 /* 6.32.38: mul */
749 if (src0.type == BRW_REGISTER_TYPE_D ||
750 src0.type == BRW_REGISTER_TYPE_UD ||
751 src1.type == BRW_REGISTER_TYPE_D ||
752 src1.type == BRW_REGISTER_TYPE_UD) {
753 assert(dest.type != BRW_REGISTER_TYPE_F);
754 }
755
756 if (src0.type == BRW_REGISTER_TYPE_F ||
757 (src0.file == BRW_IMMEDIATE_VALUE &&
758 src0.type == BRW_REGISTER_TYPE_VF)) {
759 assert(src1.type != BRW_REGISTER_TYPE_UD);
760 assert(src1.type != BRW_REGISTER_TYPE_D);
761 }
762
763 if (src1.type == BRW_REGISTER_TYPE_F ||
764 (src1.file == BRW_IMMEDIATE_VALUE &&
765 src1.type == BRW_REGISTER_TYPE_VF)) {
766 assert(src0.type != BRW_REGISTER_TYPE_UD);
767 assert(src0.type != BRW_REGISTER_TYPE_D);
768 }
769
770 assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE ||
771 src0.nr != BRW_ARF_ACCUMULATOR);
772 assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE ||
773 src1.nr != BRW_ARF_ACCUMULATOR);
774
775 return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1);
776 }
777
778
779 void brw_NOP(struct brw_compile *p)
780 {
781 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);
782 brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
783 brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
784 brw_set_src1(insn, brw_imm_ud(0x0));
785 }
786
787
788
789
790
791 /***********************************************************************
792 * Comparisons, if/else/endif
793 */
794
795 struct brw_instruction *brw_JMPI(struct brw_compile *p,
796 struct brw_reg dest,
797 struct brw_reg src0,
798 struct brw_reg src1)
799 {
800 struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
801
802 insn->header.execution_size = 1;
803 insn->header.compression_control = BRW_COMPRESSION_NONE;
804 insn->header.mask_control = BRW_MASK_DISABLE;
805
806 p->current->header.predicate_control = BRW_PREDICATE_NONE;
807
808 return insn;
809 }
810
811 /* EU takes the value from the flag register and pushes it onto some
812 * sort of a stack (presumably merging with any flag value already on
813 * the stack). Within an if block, the flags at the top of the stack
814 * control execution on each channel of the unit, eg. on each of the
815 * 16 pixel values in our wm programs.
816 *
817 * When the matching 'else' instruction is reached (presumably by
818 * countdown of the instruction count patched in by our ELSE/ENDIF
819 * functions), the relevent flags are inverted.
820 *
821 * When the matching 'endif' instruction is reached, the flags are
822 * popped off. If the stack is now empty, normal execution resumes.
823 *
824 * No attempt is made to deal with stack overflow (14 elements?).
825 */
826 struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size)
827 {
828 struct intel_context *intel = &p->brw->intel;
829 struct brw_instruction *insn;
830
831 if (p->single_program_flow) {
832 assert(execute_size == BRW_EXECUTE_1);
833
834 insn = next_insn(p, BRW_OPCODE_ADD);
835 insn->header.predicate_inverse = 1;
836 } else {
837 insn = next_insn(p, BRW_OPCODE_IF);
838 }
839
840 /* Override the defaults for this instruction:
841 */
842 if (intel->gen < 6) {
843 brw_set_dest(insn, brw_ip_reg());
844 brw_set_src0(insn, brw_ip_reg());
845 brw_set_src1(insn, brw_imm_d(0x0));
846 } else {
847 brw_set_dest(insn, brw_imm_w(0));
848 insn->bits1.branch_gen6.jump_count = 0;
849 brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
850 brw_set_src1(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
851 }
852
853 insn->header.execution_size = execute_size;
854 insn->header.compression_control = BRW_COMPRESSION_NONE;
855 insn->header.predicate_control = BRW_PREDICATE_NORMAL;
856 insn->header.mask_control = BRW_MASK_ENABLE;
857 if (!p->single_program_flow)
858 insn->header.thread_control = BRW_THREAD_SWITCH;
859
860 p->current->header.predicate_control = BRW_PREDICATE_NONE;
861
862 return insn;
863 }
864
865 struct brw_instruction *
866 brw_IF_gen6(struct brw_compile *p, uint32_t conditional,
867 struct brw_reg src0, struct brw_reg src1)
868 {
869 struct brw_instruction *insn;
870
871 insn = next_insn(p, BRW_OPCODE_IF);
872
873 brw_set_dest(insn, brw_imm_w(0));
874 insn->header.execution_size = BRW_EXECUTE_8;
875 insn->bits1.branch_gen6.jump_count = 0;
876 brw_set_src0(insn, src0);
877 brw_set_src1(insn, src1);
878
879 assert(insn->header.compression_control == BRW_COMPRESSION_NONE);
880 assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
881 insn->header.destreg__conditionalmod = conditional;
882
883 if (!p->single_program_flow)
884 insn->header.thread_control = BRW_THREAD_SWITCH;
885
886 return insn;
887 }
888
889 struct brw_instruction *brw_ELSE(struct brw_compile *p,
890 struct brw_instruction *if_insn)
891 {
892 struct intel_context *intel = &p->brw->intel;
893 struct brw_instruction *insn;
894 GLuint br = 1;
895
896 /* jump count is for 64bit data chunk each, so one 128bit
897 instruction requires 2 chunks. */
898 if (intel->gen >= 5)
899 br = 2;
900
901 if (p->single_program_flow) {
902 insn = next_insn(p, BRW_OPCODE_ADD);
903 } else {
904 insn = next_insn(p, BRW_OPCODE_ELSE);
905 }
906
907 if (intel->gen < 6) {
908 brw_set_dest(insn, brw_ip_reg());
909 brw_set_src0(insn, brw_ip_reg());
910 brw_set_src1(insn, brw_imm_d(0x0));
911 } else {
912 brw_set_dest(insn, brw_imm_w(0));
913 insn->bits1.branch_gen6.jump_count = 0;
914 brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
915 brw_set_src1(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
916 }
917
918 insn->header.compression_control = BRW_COMPRESSION_NONE;
919 insn->header.execution_size = if_insn->header.execution_size;
920 insn->header.mask_control = BRW_MASK_ENABLE;
921 if (!p->single_program_flow)
922 insn->header.thread_control = BRW_THREAD_SWITCH;
923
924 /* Patch the if instruction to point at this instruction.
925 */
926 if (p->single_program_flow) {
927 assert(if_insn->header.opcode == BRW_OPCODE_ADD);
928
929 if_insn->bits3.ud = (insn - if_insn + 1) * 16;
930 } else {
931 assert(if_insn->header.opcode == BRW_OPCODE_IF);
932
933 if (intel->gen < 6) {
934 if_insn->bits3.if_else.jump_count = br * (insn - if_insn);
935 if_insn->bits3.if_else.pop_count = 0;
936 if_insn->bits3.if_else.pad0 = 0;
937 } else {
938 if_insn->bits1.branch_gen6.jump_count = br * (insn - if_insn + 1);
939 }
940 }
941
942 return insn;
943 }
944
945 void brw_ENDIF(struct brw_compile *p,
946 struct brw_instruction *patch_insn)
947 {
948 struct intel_context *intel = &p->brw->intel;
949 GLuint br = 1;
950
951 if (intel->gen >= 5)
952 br = 2;
953
954 if (p->single_program_flow) {
955 /* In single program flow mode, there's no need to execute an ENDIF,
956 * since we don't need to do any stack operations, and if we're executing
957 * currently, we want to just continue executing.
958 */
959 struct brw_instruction *next = &p->store[p->nr_insn];
960
961 assert(patch_insn->header.opcode == BRW_OPCODE_ADD);
962
963 patch_insn->bits3.ud = (next - patch_insn) * 16;
964 } else {
965 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF);
966
967 if (intel->gen < 6) {
968 brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
969 brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
970 brw_set_src1(insn, brw_imm_d(0x0));
971 } else {
972 brw_set_dest(insn, brw_imm_w(0));
973 brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
974 brw_set_src1(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
975 }
976
977 insn->header.compression_control = BRW_COMPRESSION_NONE;
978 insn->header.execution_size = patch_insn->header.execution_size;
979 insn->header.mask_control = BRW_MASK_ENABLE;
980 insn->header.thread_control = BRW_THREAD_SWITCH;
981
982 if (intel->gen < 6)
983 assert(patch_insn->bits3.if_else.jump_count == 0);
984 else
985 assert(patch_insn->bits1.branch_gen6.jump_count == 0);
986
987 /* Patch the if or else instructions to point at this or the next
988 * instruction respectively.
989 */
990 if (patch_insn->header.opcode == BRW_OPCODE_IF) {
991 if (intel->gen < 6) {
992 /* Turn it into an IFF, which means no mask stack operations for
993 * all-false and jumping past the ENDIF.
994 */
995 patch_insn->header.opcode = BRW_OPCODE_IFF;
996 patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1);
997 patch_insn->bits3.if_else.pop_count = 0;
998 patch_insn->bits3.if_else.pad0 = 0;
999 } else {
1000 /* As of gen6, there is no IFF and IF must point to the ENDIF. */
1001 patch_insn->bits1.branch_gen6.jump_count = br * (insn - patch_insn);
1002 }
1003 } else {
1004 assert(patch_insn->header.opcode == BRW_OPCODE_ELSE);
1005 if (intel->gen < 6) {
1006 /* BRW_OPCODE_ELSE pre-gen6 should point just past the
1007 * matching ENDIF.
1008 */
1009 patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1);
1010 patch_insn->bits3.if_else.pop_count = 1;
1011 patch_insn->bits3.if_else.pad0 = 0;
1012 } else {
1013 /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
1014 patch_insn->bits1.branch_gen6.jump_count = br * (insn - patch_insn);
1015 }
1016 }
1017
1018 /* Also pop item off the stack in the endif instruction:
1019 */
1020 if (intel->gen < 6) {
1021 insn->bits3.if_else.jump_count = 0;
1022 insn->bits3.if_else.pop_count = 1;
1023 insn->bits3.if_else.pad0 = 0;
1024 } else {
1025 insn->bits1.branch_gen6.jump_count = 2;
1026 }
1027 }
1028 }
1029
1030 struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count)
1031 {
1032 struct brw_instruction *insn;
1033 insn = next_insn(p, BRW_OPCODE_BREAK);
1034 brw_set_dest(insn, brw_ip_reg());
1035 brw_set_src0(insn, brw_ip_reg());
1036 brw_set_src1(insn, brw_imm_d(0x0));
1037 insn->header.compression_control = BRW_COMPRESSION_NONE;
1038 insn->header.execution_size = BRW_EXECUTE_8;
1039 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1040 insn->bits3.if_else.pad0 = 0;
1041 insn->bits3.if_else.pop_count = pop_count;
1042 return insn;
1043 }
1044
1045 struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count)
1046 {
1047 struct brw_instruction *insn;
1048 insn = next_insn(p, BRW_OPCODE_CONTINUE);
1049 brw_set_dest(insn, brw_ip_reg());
1050 brw_set_src0(insn, brw_ip_reg());
1051 brw_set_src1(insn, brw_imm_d(0x0));
1052 insn->header.compression_control = BRW_COMPRESSION_NONE;
1053 insn->header.execution_size = BRW_EXECUTE_8;
1054 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1055 insn->bits3.if_else.pad0 = 0;
1056 insn->bits3.if_else.pop_count = pop_count;
1057 return insn;
1058 }
1059
1060 /* DO/WHILE loop:
1061 */
1062 struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
1063 {
1064 if (p->single_program_flow) {
1065 return &p->store[p->nr_insn];
1066 } else {
1067 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
1068
1069 /* Override the defaults for this instruction:
1070 */
1071 brw_set_dest(insn, brw_null_reg());
1072 brw_set_src0(insn, brw_null_reg());
1073 brw_set_src1(insn, brw_null_reg());
1074
1075 insn->header.compression_control = BRW_COMPRESSION_NONE;
1076 insn->header.execution_size = execute_size;
1077 insn->header.predicate_control = BRW_PREDICATE_NONE;
1078 /* insn->header.mask_control = BRW_MASK_ENABLE; */
1079 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1080
1081 return insn;
1082 }
1083 }
1084
1085
1086
1087 struct brw_instruction *brw_WHILE(struct brw_compile *p,
1088 struct brw_instruction *do_insn)
1089 {
1090 struct intel_context *intel = &p->brw->intel;
1091 struct brw_instruction *insn;
1092 GLuint br = 1;
1093
1094 if (intel->gen >= 5)
1095 br = 2;
1096
1097 if (p->single_program_flow)
1098 insn = next_insn(p, BRW_OPCODE_ADD);
1099 else
1100 insn = next_insn(p, BRW_OPCODE_WHILE);
1101
1102 brw_set_dest(insn, brw_ip_reg());
1103 brw_set_src0(insn, brw_ip_reg());
1104 brw_set_src1(insn, brw_imm_d(0x0));
1105
1106 insn->header.compression_control = BRW_COMPRESSION_NONE;
1107
1108 if (p->single_program_flow) {
1109 insn->header.execution_size = BRW_EXECUTE_1;
1110
1111 insn->bits3.d = (do_insn - insn) * 16;
1112 } else {
1113 insn->header.execution_size = do_insn->header.execution_size;
1114
1115 assert(do_insn->header.opcode == BRW_OPCODE_DO);
1116 insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
1117 insn->bits3.if_else.pop_count = 0;
1118 insn->bits3.if_else.pad0 = 0;
1119 }
1120
1121 /* insn->header.mask_control = BRW_MASK_ENABLE; */
1122
1123 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1124 p->current->header.predicate_control = BRW_PREDICATE_NONE;
1125 return insn;
1126 }
1127
1128
1129 /* FORWARD JUMPS:
1130 */
1131 void brw_land_fwd_jump(struct brw_compile *p,
1132 struct brw_instruction *jmp_insn)
1133 {
1134 struct intel_context *intel = &p->brw->intel;
1135 struct brw_instruction *landing = &p->store[p->nr_insn];
1136 GLuint jmpi = 1;
1137
1138 if (intel->gen >= 5)
1139 jmpi = 2;
1140
1141 assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
1142 assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE);
1143
1144 jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1);
1145 }
1146
1147
1148
1149 /* To integrate with the above, it makes sense that the comparison
1150 * instruction should populate the flag register. It might be simpler
1151 * just to use the flag reg for most WM tasks?
1152 */
1153 void brw_CMP(struct brw_compile *p,
1154 struct brw_reg dest,
1155 GLuint conditional,
1156 struct brw_reg src0,
1157 struct brw_reg src1)
1158 {
1159 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
1160
1161 insn->header.destreg__conditionalmod = conditional;
1162 brw_set_dest(insn, dest);
1163 brw_set_src0(insn, src0);
1164 brw_set_src1(insn, src1);
1165
1166 /* guess_execution_size(insn, src0); */
1167
1168
1169 /* Make it so that future instructions will use the computed flag
1170 * value until brw_set_predicate_control_flag_value() is called
1171 * again.
1172 */
1173 if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
1174 dest.nr == 0) {
1175 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
1176 p->flag_value = 0xff;
1177 }
1178 }
1179
1180 /* Issue 'wait' instruction for n1, host could program MMIO
1181 to wake up thread. */
1182 void brw_WAIT (struct brw_compile *p)
1183 {
1184 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_WAIT);
1185 struct brw_reg src = brw_notification_1_reg();
1186
1187 brw_set_dest(insn, src);
1188 brw_set_src0(insn, src);
1189 brw_set_src1(insn, brw_null_reg());
1190 insn->header.execution_size = 0; /* must */
1191 insn->header.predicate_control = 0;
1192 insn->header.compression_control = 0;
1193 }
1194
1195
1196 /***********************************************************************
1197 * Helpers for the various SEND message types:
1198 */
1199
1200 /** Extended math function, float[8].
1201 */
1202 void brw_math( struct brw_compile *p,
1203 struct brw_reg dest,
1204 GLuint function,
1205 GLuint saturate,
1206 GLuint msg_reg_nr,
1207 struct brw_reg src,
1208 GLuint data_type,
1209 GLuint precision )
1210 {
1211 struct intel_context *intel = &p->brw->intel;
1212
1213 if (intel->gen >= 6) {
1214 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
1215
1216 assert(dest.file == BRW_GENERAL_REGISTER_FILE);
1217 assert(src.file == BRW_GENERAL_REGISTER_FILE);
1218
1219 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1220 assert(src.hstride == BRW_HORIZONTAL_STRIDE_1);
1221
1222 if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT &&
1223 function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1224 assert(src.type == BRW_REGISTER_TYPE_F);
1225 }
1226
1227 /* Math is the same ISA format as other opcodes, except that CondModifier
1228 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1229 */
1230 insn->header.destreg__conditionalmod = function;
1231
1232 brw_set_dest(insn, dest);
1233 brw_set_src0(insn, src);
1234 brw_set_src1(insn, brw_null_reg());
1235 } else {
1236 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1237 GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
1238 GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
1239 /* Example code doesn't set predicate_control for send
1240 * instructions.
1241 */
1242 insn->header.predicate_control = 0;
1243 insn->header.destreg__conditionalmod = msg_reg_nr;
1244
1245 brw_set_dest(insn, dest);
1246 brw_set_src0(insn, src);
1247 brw_set_math_message(p->brw,
1248 insn,
1249 msg_length, response_length,
1250 function,
1251 BRW_MATH_INTEGER_UNSIGNED,
1252 precision,
1253 saturate,
1254 data_type);
1255 }
1256 }
1257
1258 /** Extended math function, float[8].
1259 */
1260 void brw_math2(struct brw_compile *p,
1261 struct brw_reg dest,
1262 GLuint function,
1263 struct brw_reg src0,
1264 struct brw_reg src1)
1265 {
1266 struct intel_context *intel = &p->brw->intel;
1267 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
1268
1269 assert(intel->gen >= 6);
1270 (void) intel;
1271
1272
1273 assert(dest.file == BRW_GENERAL_REGISTER_FILE);
1274 assert(src0.file == BRW_GENERAL_REGISTER_FILE);
1275 assert(src1.file == BRW_GENERAL_REGISTER_FILE);
1276
1277 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1278 assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1);
1279 assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1);
1280
1281 if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT &&
1282 function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1283 assert(src0.type == BRW_REGISTER_TYPE_F);
1284 assert(src1.type == BRW_REGISTER_TYPE_F);
1285 }
1286
1287 /* Math is the same ISA format as other opcodes, except that CondModifier
1288 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1289 */
1290 insn->header.destreg__conditionalmod = function;
1291
1292 brw_set_dest(insn, dest);
1293 brw_set_src0(insn, src0);
1294 brw_set_src1(insn, src1);
1295 }
1296
1297 /**
1298 * Extended math function, float[16].
1299 * Use 2 send instructions.
1300 */
1301 void brw_math_16( struct brw_compile *p,
1302 struct brw_reg dest,
1303 GLuint function,
1304 GLuint saturate,
1305 GLuint msg_reg_nr,
1306 struct brw_reg src,
1307 GLuint precision )
1308 {
1309 struct intel_context *intel = &p->brw->intel;
1310 struct brw_instruction *insn;
1311 GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
1312 GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
1313
1314 if (intel->gen >= 6) {
1315 insn = next_insn(p, BRW_OPCODE_MATH);
1316
1317 /* Math is the same ISA format as other opcodes, except that CondModifier
1318 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1319 */
1320 insn->header.destreg__conditionalmod = function;
1321
1322 brw_set_dest(insn, dest);
1323 brw_set_src0(insn, src);
1324 brw_set_src1(insn, brw_null_reg());
1325 return;
1326 }
1327
1328 /* First instruction:
1329 */
1330 brw_push_insn_state(p);
1331 brw_set_predicate_control_flag_value(p, 0xff);
1332 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1333
1334 insn = next_insn(p, BRW_OPCODE_SEND);
1335 insn->header.destreg__conditionalmod = msg_reg_nr;
1336
1337 brw_set_dest(insn, dest);
1338 brw_set_src0(insn, src);
1339 brw_set_math_message(p->brw,
1340 insn,
1341 msg_length, response_length,
1342 function,
1343 BRW_MATH_INTEGER_UNSIGNED,
1344 precision,
1345 saturate,
1346 BRW_MATH_DATA_VECTOR);
1347
1348 /* Second instruction:
1349 */
1350 insn = next_insn(p, BRW_OPCODE_SEND);
1351 insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
1352 insn->header.destreg__conditionalmod = msg_reg_nr+1;
1353
1354 brw_set_dest(insn, offset(dest,1));
1355 brw_set_src0(insn, src);
1356 brw_set_math_message(p->brw,
1357 insn,
1358 msg_length, response_length,
1359 function,
1360 BRW_MATH_INTEGER_UNSIGNED,
1361 precision,
1362 saturate,
1363 BRW_MATH_DATA_VECTOR);
1364
1365 brw_pop_insn_state(p);
1366 }
1367
1368
1369 /**
1370 * Write a block of OWORDs (half a GRF each) from the scratch buffer,
1371 * using a constant offset per channel.
1372 *
1373 * The offset must be aligned to oword size (16 bytes). Used for
1374 * register spilling.
1375 */
1376 void brw_oword_block_write_scratch(struct brw_compile *p,
1377 struct brw_reg mrf,
1378 int num_regs,
1379 GLuint offset)
1380 {
1381 struct intel_context *intel = &p->brw->intel;
1382 uint32_t msg_control;
1383 int mlen;
1384
1385 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1386
1387 if (num_regs == 1) {
1388 msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
1389 mlen = 2;
1390 } else {
1391 msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
1392 mlen = 3;
1393 }
1394
1395 /* Set up the message header. This is g0, with g0.2 filled with
1396 * the offset. We don't want to leave our offset around in g0 or
1397 * it'll screw up texture samples, so set it up inside the message
1398 * reg.
1399 */
1400 {
1401 brw_push_insn_state(p);
1402 brw_set_mask_control(p, BRW_MASK_DISABLE);
1403 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1404
1405 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
1406
1407 /* set message header global offset field (reg 0, element 2) */
1408 brw_MOV(p,
1409 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
1410 mrf.nr,
1411 2), BRW_REGISTER_TYPE_UD),
1412 brw_imm_ud(offset));
1413
1414 brw_pop_insn_state(p);
1415 }
1416
1417 {
1418 struct brw_reg dest;
1419 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1420 int send_commit_msg;
1421 struct brw_reg src_header = retype(brw_vec8_grf(0, 0),
1422 BRW_REGISTER_TYPE_UW);
1423
1424 if (insn->header.compression_control != BRW_COMPRESSION_NONE) {
1425 insn->header.compression_control = BRW_COMPRESSION_NONE;
1426 src_header = vec16(src_header);
1427 }
1428 assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
1429 insn->header.destreg__conditionalmod = mrf.nr;
1430
1431 /* Until gen6, writes followed by reads from the same location
1432 * are not guaranteed to be ordered unless write_commit is set.
1433 * If set, then a no-op write is issued to the destination
1434 * register to set a dependency, and a read from the destination
1435 * can be used to ensure the ordering.
1436 *
1437 * For gen6, only writes between different threads need ordering
1438 * protection. Our use of DP writes is all about register
1439 * spilling within a thread.
1440 */
1441 if (intel->gen >= 6) {
1442 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
1443 send_commit_msg = 0;
1444 } else {
1445 dest = src_header;
1446 send_commit_msg = 1;
1447 }
1448
1449 brw_set_dest(insn, dest);
1450 brw_set_src0(insn, brw_null_reg());
1451
1452 brw_set_dp_write_message(p->brw,
1453 insn,
1454 255, /* binding table index (255=stateless) */
1455 msg_control,
1456 BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */
1457 mlen,
1458 GL_TRUE, /* header_present */
1459 0, /* pixel scoreboard */
1460 send_commit_msg, /* response_length */
1461 0, /* eot */
1462 send_commit_msg);
1463 }
1464 }
1465
1466
1467 /**
1468 * Read a block of owords (half a GRF each) from the scratch buffer
1469 * using a constant index per channel.
1470 *
1471 * Offset must be aligned to oword size (16 bytes). Used for register
1472 * spilling.
1473 */
1474 void
1475 brw_oword_block_read_scratch(struct brw_compile *p,
1476 struct brw_reg dest,
1477 struct brw_reg mrf,
1478 int num_regs,
1479 GLuint offset)
1480 {
1481 uint32_t msg_control;
1482 int rlen;
1483
1484 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1485 dest = retype(dest, BRW_REGISTER_TYPE_UW);
1486
1487 if (num_regs == 1) {
1488 msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
1489 rlen = 1;
1490 } else {
1491 msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
1492 rlen = 2;
1493 }
1494
1495 {
1496 brw_push_insn_state(p);
1497 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1498 brw_set_mask_control(p, BRW_MASK_DISABLE);
1499
1500 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
1501
1502 /* set message header global offset field (reg 0, element 2) */
1503 brw_MOV(p,
1504 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
1505 mrf.nr,
1506 2), BRW_REGISTER_TYPE_UD),
1507 brw_imm_ud(offset));
1508
1509 brw_pop_insn_state(p);
1510 }
1511
1512 {
1513 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1514
1515 assert(insn->header.predicate_control == 0);
1516 insn->header.compression_control = BRW_COMPRESSION_NONE;
1517 insn->header.destreg__conditionalmod = mrf.nr;
1518
1519 brw_set_dest(insn, dest); /* UW? */
1520 brw_set_src0(insn, brw_null_reg());
1521
1522 brw_set_dp_read_message(p->brw,
1523 insn,
1524 255, /* binding table index (255=stateless) */
1525 msg_control,
1526 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1527 1, /* target cache (render/scratch) */
1528 1, /* msg_length */
1529 rlen);
1530 }
1531 }
1532
1533 /**
1534 * Read a float[4] vector from the data port Data Cache (const buffer).
1535 * Location (in buffer) should be a multiple of 16.
1536 * Used for fetching shader constants.
1537 */
1538 void brw_oword_block_read(struct brw_compile *p,
1539 struct brw_reg dest,
1540 struct brw_reg mrf,
1541 uint32_t offset,
1542 uint32_t bind_table_index)
1543 {
1544 struct intel_context *intel = &p->brw->intel;
1545
1546 /* On newer hardware, offset is in units of owords. */
1547 if (intel->gen >= 6)
1548 offset /= 16;
1549
1550 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1551
1552 brw_push_insn_state(p);
1553 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1554 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1555 brw_set_mask_control(p, BRW_MASK_DISABLE);
1556
1557 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
1558
1559 /* set message header global offset field (reg 0, element 2) */
1560 brw_MOV(p,
1561 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
1562 mrf.nr,
1563 2), BRW_REGISTER_TYPE_UD),
1564 brw_imm_ud(offset));
1565
1566 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1567 insn->header.destreg__conditionalmod = mrf.nr;
1568
1569 /* cast dest to a uword[8] vector */
1570 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
1571
1572 brw_set_dest(insn, dest);
1573 if (intel->gen >= 6) {
1574 brw_set_src0(insn, mrf);
1575 } else {
1576 brw_set_src0(insn, brw_null_reg());
1577 }
1578
1579 brw_set_dp_read_message(p->brw,
1580 insn,
1581 bind_table_index,
1582 BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW,
1583 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ,
1584 0, /* source cache = data cache */
1585 1, /* msg_length */
1586 1); /* response_length (1 reg, 2 owords!) */
1587
1588 brw_pop_insn_state(p);
1589 }
1590
1591 /**
1592 * Read a set of dwords from the data port Data Cache (const buffer).
1593 *
1594 * Location (in buffer) appears as UD offsets in the register after
1595 * the provided mrf header reg.
1596 */
1597 void brw_dword_scattered_read(struct brw_compile *p,
1598 struct brw_reg dest,
1599 struct brw_reg mrf,
1600 uint32_t bind_table_index)
1601 {
1602 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1603
1604 brw_push_insn_state(p);
1605 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1606 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1607 brw_set_mask_control(p, BRW_MASK_DISABLE);
1608 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
1609 brw_pop_insn_state(p);
1610
1611 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1612 insn->header.destreg__conditionalmod = mrf.nr;
1613
1614 /* cast dest to a uword[8] vector */
1615 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
1616
1617 brw_set_dest(insn, dest);
1618 brw_set_src0(insn, brw_null_reg());
1619
1620 brw_set_dp_read_message(p->brw,
1621 insn,
1622 bind_table_index,
1623 BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS,
1624 BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ,
1625 0, /* source cache = data cache */
1626 2, /* msg_length */
1627 1); /* response_length */
1628 }
1629
1630
1631
1632 /**
1633 * Read float[4] constant(s) from VS constant buffer.
1634 * For relative addressing, two float[4] constants will be read into 'dest'.
1635 * Otherwise, one float[4] constant will be read into the lower half of 'dest'.
1636 */
1637 void brw_dp_READ_4_vs(struct brw_compile *p,
1638 struct brw_reg dest,
1639 GLuint location,
1640 GLuint bind_table_index)
1641 {
1642 struct brw_instruction *insn;
1643 GLuint msg_reg_nr = 1;
1644 struct brw_reg b;
1645
1646 /*
1647 printf("vs const read msg, location %u, msg_reg_nr %d\n",
1648 location, msg_reg_nr);
1649 */
1650
1651 /* Setup MRF[1] with location/offset into const buffer */
1652 brw_push_insn_state(p);
1653 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1654 brw_set_mask_control(p, BRW_MASK_DISABLE);
1655 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1656
1657 /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
1658 * when the docs say only dword[2] should be set. Hmmm. But it works.
1659 */
1660 b = brw_message_reg(msg_reg_nr);
1661 b = retype(b, BRW_REGISTER_TYPE_UD);
1662 /*b = get_element_ud(b, 2);*/
1663 brw_MOV(p, b, brw_imm_ud(location));
1664
1665 brw_pop_insn_state(p);
1666
1667 insn = next_insn(p, BRW_OPCODE_SEND);
1668
1669 insn->header.predicate_control = BRW_PREDICATE_NONE;
1670 insn->header.compression_control = BRW_COMPRESSION_NONE;
1671 insn->header.destreg__conditionalmod = msg_reg_nr;
1672 insn->header.mask_control = BRW_MASK_DISABLE;
1673
1674 brw_set_dest(insn, dest);
1675 brw_set_src0(insn, brw_null_reg());
1676
1677 brw_set_dp_read_message(p->brw,
1678 insn,
1679 bind_table_index,
1680 0,
1681 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1682 0, /* source cache = data cache */
1683 1, /* msg_length */
1684 1); /* response_length (1 Oword) */
1685 }
1686
1687 /**
1688 * Read a float[4] constant per vertex from VS constant buffer, with
1689 * relative addressing.
1690 */
1691 void brw_dp_READ_4_vs_relative(struct brw_compile *p,
1692 struct brw_reg dest,
1693 struct brw_reg addr_reg,
1694 GLuint offset,
1695 GLuint bind_table_index)
1696 {
1697 struct intel_context *intel = &p->brw->intel;
1698 int msg_type;
1699
1700 /* Setup MRF[1] with offset into const buffer */
1701 brw_push_insn_state(p);
1702 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1703 brw_set_mask_control(p, BRW_MASK_DISABLE);
1704 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1705
1706 /* M1.0 is block offset 0, M1.4 is block offset 1, all other
1707 * fields ignored.
1708 */
1709 brw_ADD(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD),
1710 addr_reg, brw_imm_d(offset));
1711 brw_pop_insn_state(p);
1712
1713 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1714
1715 insn->header.predicate_control = BRW_PREDICATE_NONE;
1716 insn->header.compression_control = BRW_COMPRESSION_NONE;
1717 insn->header.destreg__conditionalmod = 0;
1718 insn->header.mask_control = BRW_MASK_DISABLE;
1719
1720 brw_set_dest(insn, dest);
1721 brw_set_src0(insn, brw_vec8_grf(0, 0));
1722
1723 if (intel->gen == 6)
1724 msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
1725 else if (intel->gen == 5 || intel->is_g4x)
1726 msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
1727 else
1728 msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
1729
1730 brw_set_dp_read_message(p->brw,
1731 insn,
1732 bind_table_index,
1733 BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
1734 msg_type,
1735 0, /* source cache = data cache */
1736 2, /* msg_length */
1737 1); /* response_length */
1738 }
1739
1740
1741
1742 void brw_fb_WRITE(struct brw_compile *p,
1743 int dispatch_width,
1744 struct brw_reg dest,
1745 GLuint msg_reg_nr,
1746 struct brw_reg src0,
1747 GLuint binding_table_index,
1748 GLuint msg_length,
1749 GLuint response_length,
1750 GLboolean eot)
1751 {
1752 struct intel_context *intel = &p->brw->intel;
1753 struct brw_instruction *insn;
1754 GLuint msg_control, msg_type;
1755 GLboolean header_present = GL_TRUE;
1756
1757 if (intel->gen >= 6 && binding_table_index == 0) {
1758 insn = next_insn(p, BRW_OPCODE_SENDC);
1759 } else {
1760 insn = next_insn(p, BRW_OPCODE_SEND);
1761 }
1762 /* The execution mask is ignored for render target writes. */
1763 insn->header.predicate_control = 0;
1764 insn->header.compression_control = BRW_COMPRESSION_NONE;
1765
1766 if (intel->gen >= 6) {
1767 if (msg_length == 4)
1768 header_present = GL_FALSE;
1769
1770 /* headerless version, just submit color payload */
1771 src0 = brw_message_reg(msg_reg_nr);
1772
1773 msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE_GEN6;
1774 } else {
1775 insn->header.destreg__conditionalmod = msg_reg_nr;
1776
1777 msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
1778 }
1779
1780 if (dispatch_width == 16)
1781 msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
1782 else
1783 msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01;
1784
1785 brw_set_dest(insn, dest);
1786 brw_set_src0(insn, src0);
1787 brw_set_dp_write_message(p->brw,
1788 insn,
1789 binding_table_index,
1790 msg_control,
1791 msg_type,
1792 msg_length,
1793 header_present,
1794 1, /* pixel scoreboard */
1795 response_length,
1796 eot,
1797 0 /* send_commit_msg */);
1798 }
1799
1800
1801 /**
1802 * Texture sample instruction.
1803 * Note: the msg_type plus msg_length values determine exactly what kind
1804 * of sampling operation is performed. See volume 4, page 161 of docs.
1805 */
1806 void brw_SAMPLE(struct brw_compile *p,
1807 struct brw_reg dest,
1808 GLuint msg_reg_nr,
1809 struct brw_reg src0,
1810 GLuint binding_table_index,
1811 GLuint sampler,
1812 GLuint writemask,
1813 GLuint msg_type,
1814 GLuint response_length,
1815 GLuint msg_length,
1816 GLboolean eot,
1817 GLuint header_present,
1818 GLuint simd_mode)
1819 {
1820 struct intel_context *intel = &p->brw->intel;
1821 GLboolean need_stall = 0;
1822
1823 if (writemask == 0) {
1824 /*printf("%s: zero writemask??\n", __FUNCTION__); */
1825 return;
1826 }
1827
1828 /* Hardware doesn't do destination dependency checking on send
1829 * instructions properly. Add a workaround which generates the
1830 * dependency by other means. In practice it seems like this bug
1831 * only crops up for texture samples, and only where registers are
1832 * written by the send and then written again later without being
1833 * read in between. Luckily for us, we already track that
1834 * information and use it to modify the writemask for the
1835 * instruction, so that is a guide for whether a workaround is
1836 * needed.
1837 */
1838 if (writemask != WRITEMASK_XYZW) {
1839 GLuint dst_offset = 0;
1840 GLuint i, newmask = 0, len = 0;
1841
1842 for (i = 0; i < 4; i++) {
1843 if (writemask & (1<<i))
1844 break;
1845 dst_offset += 2;
1846 }
1847 for (; i < 4; i++) {
1848 if (!(writemask & (1<<i)))
1849 break;
1850 newmask |= 1<<i;
1851 len++;
1852 }
1853
1854 if (newmask != writemask) {
1855 need_stall = 1;
1856 /* printf("need stall %x %x\n", newmask , writemask); */
1857 }
1858 else {
1859 GLboolean dispatch_16 = GL_FALSE;
1860
1861 struct brw_reg m1 = brw_message_reg(msg_reg_nr);
1862
1863 guess_execution_size(p->current, dest);
1864 if (p->current->header.execution_size == BRW_EXECUTE_16)
1865 dispatch_16 = GL_TRUE;
1866
1867 newmask = ~newmask & WRITEMASK_XYZW;
1868
1869 brw_push_insn_state(p);
1870
1871 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1872 brw_set_mask_control(p, BRW_MASK_DISABLE);
1873
1874 brw_MOV(p, m1, brw_vec8_grf(0,0));
1875 brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12));
1876
1877 brw_pop_insn_state(p);
1878
1879 src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
1880 dest = offset(dest, dst_offset);
1881
1882 /* For 16-wide dispatch, masked channels are skipped in the
1883 * response. For 8-wide, masked channels still take up slots,
1884 * and are just not written to.
1885 */
1886 if (dispatch_16)
1887 response_length = len * 2;
1888 }
1889 }
1890
1891 {
1892 struct brw_instruction *insn;
1893
1894 /* Sandybridge doesn't have the implied move for SENDs,
1895 * and the first message register index comes from src0.
1896 */
1897 if (intel->gen >= 6) {
1898 brw_push_insn_state(p);
1899 brw_set_mask_control( p, BRW_MASK_DISABLE );
1900 /* m1 contains header? */
1901 brw_MOV(p, brw_message_reg(msg_reg_nr), src0);
1902 brw_pop_insn_state(p);
1903 src0 = brw_message_reg(msg_reg_nr);
1904 }
1905
1906 insn = next_insn(p, BRW_OPCODE_SEND);
1907 insn->header.predicate_control = 0; /* XXX */
1908 insn->header.compression_control = BRW_COMPRESSION_NONE;
1909 if (intel->gen < 6)
1910 insn->header.destreg__conditionalmod = msg_reg_nr;
1911
1912 brw_set_dest(insn, dest);
1913 brw_set_src0(insn, src0);
1914 brw_set_sampler_message(p->brw, insn,
1915 binding_table_index,
1916 sampler,
1917 msg_type,
1918 response_length,
1919 msg_length,
1920 eot,
1921 header_present,
1922 simd_mode);
1923 }
1924
1925 if (need_stall) {
1926 struct brw_reg reg = vec8(offset(dest, response_length-1));
1927
1928 /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 }
1929 */
1930 brw_push_insn_state(p);
1931 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1932 brw_MOV(p, reg, reg);
1933 brw_pop_insn_state(p);
1934 }
1935
1936 }
1937
1938 /* All these variables are pretty confusing - we might be better off
1939 * using bitmasks and macros for this, in the old style. Or perhaps
1940 * just having the caller instantiate the fields in dword3 itself.
1941 */
1942 void brw_urb_WRITE(struct brw_compile *p,
1943 struct brw_reg dest,
1944 GLuint msg_reg_nr,
1945 struct brw_reg src0,
1946 GLboolean allocate,
1947 GLboolean used,
1948 GLuint msg_length,
1949 GLuint response_length,
1950 GLboolean eot,
1951 GLboolean writes_complete,
1952 GLuint offset,
1953 GLuint swizzle)
1954 {
1955 struct intel_context *intel = &p->brw->intel;
1956 struct brw_instruction *insn;
1957
1958 /* Sandybridge doesn't have the implied move for SENDs,
1959 * and the first message register index comes from src0.
1960 */
1961 if (intel->gen >= 6) {
1962 brw_push_insn_state(p);
1963 brw_set_mask_control( p, BRW_MASK_DISABLE );
1964 brw_MOV(p, brw_message_reg(msg_reg_nr), src0);
1965 brw_pop_insn_state(p);
1966 src0 = brw_message_reg(msg_reg_nr);
1967 }
1968
1969 insn = next_insn(p, BRW_OPCODE_SEND);
1970
1971 assert(msg_length < BRW_MAX_MRF);
1972
1973 brw_set_dest(insn, dest);
1974 brw_set_src0(insn, src0);
1975 brw_set_src1(insn, brw_imm_d(0));
1976
1977 if (intel->gen < 6)
1978 insn->header.destreg__conditionalmod = msg_reg_nr;
1979
1980 brw_set_urb_message(p->brw,
1981 insn,
1982 allocate,
1983 used,
1984 msg_length,
1985 response_length,
1986 eot,
1987 writes_complete,
1988 offset,
1989 swizzle);
1990 }
1991
1992 void brw_ff_sync(struct brw_compile *p,
1993 struct brw_reg dest,
1994 GLuint msg_reg_nr,
1995 struct brw_reg src0,
1996 GLboolean allocate,
1997 GLuint response_length,
1998 GLboolean eot)
1999 {
2000 struct intel_context *intel = &p->brw->intel;
2001 struct brw_instruction *insn;
2002
2003 /* Sandybridge doesn't have the implied move for SENDs,
2004 * and the first message register index comes from src0.
2005 */
2006 if (intel->gen >= 6) {
2007 brw_push_insn_state(p);
2008 brw_set_mask_control( p, BRW_MASK_DISABLE );
2009 brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD),
2010 retype(src0, BRW_REGISTER_TYPE_UD));
2011 brw_pop_insn_state(p);
2012 src0 = brw_message_reg(msg_reg_nr);
2013 }
2014
2015 insn = next_insn(p, BRW_OPCODE_SEND);
2016 brw_set_dest(insn, dest);
2017 brw_set_src0(insn, src0);
2018 brw_set_src1(insn, brw_imm_d(0));
2019
2020 if (intel->gen < 6)
2021 insn->header.destreg__conditionalmod = msg_reg_nr;
2022
2023 brw_set_ff_sync_message(p->brw,
2024 insn,
2025 allocate,
2026 response_length,
2027 eot);
2028 }