i965: Add support for POW in gen6 FS.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_eu_emit.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "brw_context.h"
34 #include "brw_defines.h"
35 #include "brw_eu.h"
36
37
38
39
40 /***********************************************************************
41 * Internal helper for constructing instructions
42 */
43
44 static void guess_execution_size( struct brw_instruction *insn,
45 struct brw_reg reg )
46 {
47 if (reg.width == BRW_WIDTH_8 &&
48 insn->header.compression_control == BRW_COMPRESSION_COMPRESSED)
49 insn->header.execution_size = BRW_EXECUTE_16;
50 else
51 insn->header.execution_size = reg.width; /* note - definitions are compatible */
52 }
53
54
55 static void brw_set_dest( struct brw_instruction *insn,
56 struct brw_reg dest )
57 {
58 if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
59 dest.file != BRW_MESSAGE_REGISTER_FILE)
60 assert(dest.nr < 128);
61
62 insn->bits1.da1.dest_reg_file = dest.file;
63 insn->bits1.da1.dest_reg_type = dest.type;
64 insn->bits1.da1.dest_address_mode = dest.address_mode;
65
66 if (dest.address_mode == BRW_ADDRESS_DIRECT) {
67 insn->bits1.da1.dest_reg_nr = dest.nr;
68
69 if (insn->header.access_mode == BRW_ALIGN_1) {
70 insn->bits1.da1.dest_subreg_nr = dest.subnr;
71 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
72 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
73 insn->bits1.da1.dest_horiz_stride = dest.hstride;
74 }
75 else {
76 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
77 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
78 /* even ignored in da16, still need to set as '01' */
79 insn->bits1.da16.dest_horiz_stride = 1;
80 }
81 }
82 else {
83 insn->bits1.ia1.dest_subreg_nr = dest.subnr;
84
85 /* These are different sizes in align1 vs align16:
86 */
87 if (insn->header.access_mode == BRW_ALIGN_1) {
88 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
89 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
90 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
91 insn->bits1.ia1.dest_horiz_stride = dest.hstride;
92 }
93 else {
94 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
95 /* even ignored in da16, still need to set as '01' */
96 insn->bits1.ia16.dest_horiz_stride = 1;
97 }
98 }
99
100 /* NEW: Set the execution size based on dest.width and
101 * insn->compression_control:
102 */
103 guess_execution_size(insn, dest);
104 }
105
106 extern int reg_type_size[];
107
108 static void
109 validate_reg(struct brw_instruction *insn, struct brw_reg reg)
110 {
111 int hstride_for_reg[] = {0, 1, 2, 4};
112 int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
113 int width_for_reg[] = {1, 2, 4, 8, 16};
114 int execsize_for_reg[] = {1, 2, 4, 8, 16};
115 int width, hstride, vstride, execsize;
116
117 if (reg.file == BRW_IMMEDIATE_VALUE) {
118 /* 3.3.6: Region Parameters. Restriction: Immediate vectors
119 * mean the destination has to be 128-bit aligned and the
120 * destination horiz stride has to be a word.
121 */
122 if (reg.type == BRW_REGISTER_TYPE_V) {
123 assert(hstride_for_reg[insn->bits1.da1.dest_horiz_stride] *
124 reg_type_size[insn->bits1.da1.dest_reg_type] == 2);
125 }
126
127 return;
128 }
129
130 if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE &&
131 reg.file == BRW_ARF_NULL)
132 return;
133
134 assert(reg.hstride >= 0 && reg.hstride < Elements(hstride_for_reg));
135 hstride = hstride_for_reg[reg.hstride];
136
137 if (reg.vstride == 0xf) {
138 vstride = -1;
139 } else {
140 assert(reg.vstride >= 0 && reg.vstride < Elements(vstride_for_reg));
141 vstride = vstride_for_reg[reg.vstride];
142 }
143
144 assert(reg.width >= 0 && reg.width < Elements(width_for_reg));
145 width = width_for_reg[reg.width];
146
147 assert(insn->header.execution_size >= 0 &&
148 insn->header.execution_size < Elements(execsize_for_reg));
149 execsize = execsize_for_reg[insn->header.execution_size];
150
151 /* Restrictions from 3.3.10: Register Region Restrictions. */
152 /* 3. */
153 assert(execsize >= width);
154
155 /* 4. */
156 if (execsize == width && hstride != 0) {
157 assert(vstride == -1 || vstride == width * hstride);
158 }
159
160 /* 5. */
161 if (execsize == width && hstride == 0) {
162 /* no restriction on vstride. */
163 }
164
165 /* 6. */
166 if (width == 1) {
167 assert(hstride == 0);
168 }
169
170 /* 7. */
171 if (execsize == 1 && width == 1) {
172 assert(hstride == 0);
173 assert(vstride == 0);
174 }
175
176 /* 8. */
177 if (vstride == 0 && hstride == 0) {
178 assert(width == 1);
179 }
180
181 /* 10. Check destination issues. */
182 }
183
184 static void brw_set_src0( struct brw_instruction *insn,
185 struct brw_reg reg )
186 {
187 if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
188 assert(reg.nr < 128);
189
190 validate_reg(insn, reg);
191
192 insn->bits1.da1.src0_reg_file = reg.file;
193 insn->bits1.da1.src0_reg_type = reg.type;
194 insn->bits2.da1.src0_abs = reg.abs;
195 insn->bits2.da1.src0_negate = reg.negate;
196 insn->bits2.da1.src0_address_mode = reg.address_mode;
197
198 if (reg.file == BRW_IMMEDIATE_VALUE) {
199 insn->bits3.ud = reg.dw1.ud;
200
201 /* Required to set some fields in src1 as well:
202 */
203 insn->bits1.da1.src1_reg_file = 0; /* arf */
204 insn->bits1.da1.src1_reg_type = reg.type;
205 }
206 else
207 {
208 if (reg.address_mode == BRW_ADDRESS_DIRECT) {
209 if (insn->header.access_mode == BRW_ALIGN_1) {
210 insn->bits2.da1.src0_subreg_nr = reg.subnr;
211 insn->bits2.da1.src0_reg_nr = reg.nr;
212 }
213 else {
214 insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
215 insn->bits2.da16.src0_reg_nr = reg.nr;
216 }
217 }
218 else {
219 insn->bits2.ia1.src0_subreg_nr = reg.subnr;
220
221 if (insn->header.access_mode == BRW_ALIGN_1) {
222 insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
223 }
224 else {
225 insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
226 }
227 }
228
229 if (insn->header.access_mode == BRW_ALIGN_1) {
230 if (reg.width == BRW_WIDTH_1 &&
231 insn->header.execution_size == BRW_EXECUTE_1) {
232 insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
233 insn->bits2.da1.src0_width = BRW_WIDTH_1;
234 insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
235 }
236 else {
237 insn->bits2.da1.src0_horiz_stride = reg.hstride;
238 insn->bits2.da1.src0_width = reg.width;
239 insn->bits2.da1.src0_vert_stride = reg.vstride;
240 }
241 }
242 else {
243 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
244 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
245 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
246 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
247
248 /* This is an oddity of the fact we're using the same
249 * descriptions for registers in align_16 as align_1:
250 */
251 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
252 insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
253 else
254 insn->bits2.da16.src0_vert_stride = reg.vstride;
255 }
256 }
257 }
258
259
260 void brw_set_src1( struct brw_instruction *insn,
261 struct brw_reg reg )
262 {
263 assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
264
265 assert(reg.nr < 128);
266
267 validate_reg(insn, reg);
268
269 insn->bits1.da1.src1_reg_file = reg.file;
270 insn->bits1.da1.src1_reg_type = reg.type;
271 insn->bits3.da1.src1_abs = reg.abs;
272 insn->bits3.da1.src1_negate = reg.negate;
273
274 /* Only src1 can be immediate in two-argument instructions.
275 */
276 assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
277
278 if (reg.file == BRW_IMMEDIATE_VALUE) {
279 insn->bits3.ud = reg.dw1.ud;
280 }
281 else {
282 /* This is a hardware restriction, which may or may not be lifted
283 * in the future:
284 */
285 assert (reg.address_mode == BRW_ADDRESS_DIRECT);
286 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
287
288 if (insn->header.access_mode == BRW_ALIGN_1) {
289 insn->bits3.da1.src1_subreg_nr = reg.subnr;
290 insn->bits3.da1.src1_reg_nr = reg.nr;
291 }
292 else {
293 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
294 insn->bits3.da16.src1_reg_nr = reg.nr;
295 }
296
297 if (insn->header.access_mode == BRW_ALIGN_1) {
298 if (reg.width == BRW_WIDTH_1 &&
299 insn->header.execution_size == BRW_EXECUTE_1) {
300 insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
301 insn->bits3.da1.src1_width = BRW_WIDTH_1;
302 insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
303 }
304 else {
305 insn->bits3.da1.src1_horiz_stride = reg.hstride;
306 insn->bits3.da1.src1_width = reg.width;
307 insn->bits3.da1.src1_vert_stride = reg.vstride;
308 }
309 }
310 else {
311 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
312 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
313 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
314 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
315
316 /* This is an oddity of the fact we're using the same
317 * descriptions for registers in align_16 as align_1:
318 */
319 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
320 insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
321 else
322 insn->bits3.da16.src1_vert_stride = reg.vstride;
323 }
324 }
325 }
326
327
328
329 static void brw_set_math_message( struct brw_context *brw,
330 struct brw_instruction *insn,
331 GLuint msg_length,
332 GLuint response_length,
333 GLuint function,
334 GLuint integer_type,
335 GLboolean low_precision,
336 GLboolean saturate,
337 GLuint dataType )
338 {
339 struct intel_context *intel = &brw->intel;
340 brw_set_src1(insn, brw_imm_d(0));
341
342 if (intel->gen == 5) {
343 insn->bits3.math_gen5.function = function;
344 insn->bits3.math_gen5.int_type = integer_type;
345 insn->bits3.math_gen5.precision = low_precision;
346 insn->bits3.math_gen5.saturate = saturate;
347 insn->bits3.math_gen5.data_type = dataType;
348 insn->bits3.math_gen5.snapshot = 0;
349 insn->bits3.math_gen5.header_present = 0;
350 insn->bits3.math_gen5.response_length = response_length;
351 insn->bits3.math_gen5.msg_length = msg_length;
352 insn->bits3.math_gen5.end_of_thread = 0;
353 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_MATH;
354 insn->bits2.send_gen5.end_of_thread = 0;
355 } else {
356 insn->bits3.math.function = function;
357 insn->bits3.math.int_type = integer_type;
358 insn->bits3.math.precision = low_precision;
359 insn->bits3.math.saturate = saturate;
360 insn->bits3.math.data_type = dataType;
361 insn->bits3.math.response_length = response_length;
362 insn->bits3.math.msg_length = msg_length;
363 insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH;
364 insn->bits3.math.end_of_thread = 0;
365 }
366 }
367
368
369 static void brw_set_ff_sync_message(struct brw_context *brw,
370 struct brw_instruction *insn,
371 GLboolean allocate,
372 GLuint response_length,
373 GLboolean end_of_thread)
374 {
375 struct intel_context *intel = &brw->intel;
376 brw_set_src1(insn, brw_imm_d(0));
377
378 insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */
379 insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */
380 insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */
381 insn->bits3.urb_gen5.allocate = allocate;
382 insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */
383 insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */
384 insn->bits3.urb_gen5.header_present = 1;
385 insn->bits3.urb_gen5.response_length = response_length; /* may be 1 or 0 */
386 insn->bits3.urb_gen5.msg_length = 1;
387 insn->bits3.urb_gen5.end_of_thread = end_of_thread;
388 if (intel->gen >= 6) {
389 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB;
390 } else {
391 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB;
392 insn->bits2.send_gen5.end_of_thread = end_of_thread;
393 }
394 }
395
396 static void brw_set_urb_message( struct brw_context *brw,
397 struct brw_instruction *insn,
398 GLboolean allocate,
399 GLboolean used,
400 GLuint msg_length,
401 GLuint response_length,
402 GLboolean end_of_thread,
403 GLboolean complete,
404 GLuint offset,
405 GLuint swizzle_control )
406 {
407 struct intel_context *intel = &brw->intel;
408 brw_set_src1(insn, brw_imm_d(0));
409
410 if (intel->gen >= 5) {
411 insn->bits3.urb_gen5.opcode = 0; /* ? */
412 insn->bits3.urb_gen5.offset = offset;
413 insn->bits3.urb_gen5.swizzle_control = swizzle_control;
414 insn->bits3.urb_gen5.allocate = allocate;
415 insn->bits3.urb_gen5.used = used; /* ? */
416 insn->bits3.urb_gen5.complete = complete;
417 insn->bits3.urb_gen5.header_present = 1;
418 insn->bits3.urb_gen5.response_length = response_length;
419 insn->bits3.urb_gen5.msg_length = msg_length;
420 insn->bits3.urb_gen5.end_of_thread = end_of_thread;
421 if (intel->gen >= 6) {
422 /* For SNB, the SFID bits moved to the condmod bits, and
423 * EOT stayed in bits3 above. Does the EOT bit setting
424 * below on Ironlake even do anything?
425 */
426 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB;
427 } else {
428 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB;
429 insn->bits2.send_gen5.end_of_thread = end_of_thread;
430 }
431 } else {
432 insn->bits3.urb.opcode = 0; /* ? */
433 insn->bits3.urb.offset = offset;
434 insn->bits3.urb.swizzle_control = swizzle_control;
435 insn->bits3.urb.allocate = allocate;
436 insn->bits3.urb.used = used; /* ? */
437 insn->bits3.urb.complete = complete;
438 insn->bits3.urb.response_length = response_length;
439 insn->bits3.urb.msg_length = msg_length;
440 insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB;
441 insn->bits3.urb.end_of_thread = end_of_thread;
442 }
443 }
444
445 static void brw_set_dp_write_message( struct brw_context *brw,
446 struct brw_instruction *insn,
447 GLuint binding_table_index,
448 GLuint msg_control,
449 GLuint msg_type,
450 GLuint msg_length,
451 GLuint pixel_scoreboard_clear,
452 GLuint response_length,
453 GLuint end_of_thread,
454 GLuint send_commit_msg)
455 {
456 struct intel_context *intel = &brw->intel;
457 brw_set_src1(insn, brw_imm_ud(0));
458
459 if (intel->gen >= 6) {
460 insn->bits3.dp_render_cache.binding_table_index = binding_table_index;
461 insn->bits3.dp_render_cache.msg_control = msg_control;
462 insn->bits3.dp_render_cache.pixel_scoreboard_clear = pixel_scoreboard_clear;
463 insn->bits3.dp_render_cache.msg_type = msg_type;
464 insn->bits3.dp_render_cache.send_commit_msg = send_commit_msg;
465 insn->bits3.dp_render_cache.header_present = 0; /* XXX */
466 insn->bits3.dp_render_cache.response_length = response_length;
467 insn->bits3.dp_render_cache.msg_length = msg_length;
468 insn->bits3.dp_render_cache.end_of_thread = end_of_thread;
469 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
470 /* XXX really need below? */
471 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
472 insn->bits2.send_gen5.end_of_thread = end_of_thread;
473 } else if (intel->gen == 5) {
474 insn->bits3.dp_write_gen5.binding_table_index = binding_table_index;
475 insn->bits3.dp_write_gen5.msg_control = msg_control;
476 insn->bits3.dp_write_gen5.pixel_scoreboard_clear = pixel_scoreboard_clear;
477 insn->bits3.dp_write_gen5.msg_type = msg_type;
478 insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg;
479 insn->bits3.dp_write_gen5.header_present = 1;
480 insn->bits3.dp_write_gen5.response_length = response_length;
481 insn->bits3.dp_write_gen5.msg_length = msg_length;
482 insn->bits3.dp_write_gen5.end_of_thread = end_of_thread;
483 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
484 insn->bits2.send_gen5.end_of_thread = end_of_thread;
485 } else {
486 insn->bits3.dp_write.binding_table_index = binding_table_index;
487 insn->bits3.dp_write.msg_control = msg_control;
488 insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear;
489 insn->bits3.dp_write.msg_type = msg_type;
490 insn->bits3.dp_write.send_commit_msg = send_commit_msg;
491 insn->bits3.dp_write.response_length = response_length;
492 insn->bits3.dp_write.msg_length = msg_length;
493 insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
494 insn->bits3.dp_write.end_of_thread = end_of_thread;
495 }
496 }
497
498 static void brw_set_dp_read_message( struct brw_context *brw,
499 struct brw_instruction *insn,
500 GLuint binding_table_index,
501 GLuint msg_control,
502 GLuint msg_type,
503 GLuint target_cache,
504 GLuint msg_length,
505 GLuint response_length,
506 GLuint end_of_thread )
507 {
508 struct intel_context *intel = &brw->intel;
509 brw_set_src1(insn, brw_imm_d(0));
510
511 if (intel->gen == 5) {
512 insn->bits3.dp_read_gen5.binding_table_index = binding_table_index;
513 insn->bits3.dp_read_gen5.msg_control = msg_control;
514 insn->bits3.dp_read_gen5.msg_type = msg_type;
515 insn->bits3.dp_read_gen5.target_cache = target_cache;
516 insn->bits3.dp_read_gen5.header_present = 1;
517 insn->bits3.dp_read_gen5.response_length = response_length;
518 insn->bits3.dp_read_gen5.msg_length = msg_length;
519 insn->bits3.dp_read_gen5.pad1 = 0;
520 insn->bits3.dp_read_gen5.end_of_thread = end_of_thread;
521 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ;
522 insn->bits2.send_gen5.end_of_thread = end_of_thread;
523 } else {
524 insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
525 insn->bits3.dp_read.msg_control = msg_control; /*8:11*/
526 insn->bits3.dp_read.msg_type = msg_type; /*12:13*/
527 insn->bits3.dp_read.target_cache = target_cache; /*14:15*/
528 insn->bits3.dp_read.response_length = response_length; /*16:19*/
529 insn->bits3.dp_read.msg_length = msg_length; /*20:23*/
530 insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/
531 insn->bits3.dp_read.pad1 = 0; /*28:30*/
532 insn->bits3.dp_read.end_of_thread = end_of_thread; /*31*/
533 }
534 }
535
536 static void brw_set_sampler_message(struct brw_context *brw,
537 struct brw_instruction *insn,
538 GLuint binding_table_index,
539 GLuint sampler,
540 GLuint msg_type,
541 GLuint response_length,
542 GLuint msg_length,
543 GLboolean eot,
544 GLuint header_present,
545 GLuint simd_mode)
546 {
547 struct intel_context *intel = &brw->intel;
548 assert(eot == 0);
549 brw_set_src1(insn, brw_imm_d(0));
550
551 if (intel->gen == 5) {
552 insn->bits3.sampler_gen5.binding_table_index = binding_table_index;
553 insn->bits3.sampler_gen5.sampler = sampler;
554 insn->bits3.sampler_gen5.msg_type = msg_type;
555 insn->bits3.sampler_gen5.simd_mode = simd_mode;
556 insn->bits3.sampler_gen5.header_present = header_present;
557 insn->bits3.sampler_gen5.response_length = response_length;
558 insn->bits3.sampler_gen5.msg_length = msg_length;
559 insn->bits3.sampler_gen5.end_of_thread = eot;
560 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_SAMPLER;
561 insn->bits2.send_gen5.end_of_thread = eot;
562 } else if (intel->is_g4x) {
563 insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
564 insn->bits3.sampler_g4x.sampler = sampler;
565 insn->bits3.sampler_g4x.msg_type = msg_type;
566 insn->bits3.sampler_g4x.response_length = response_length;
567 insn->bits3.sampler_g4x.msg_length = msg_length;
568 insn->bits3.sampler_g4x.end_of_thread = eot;
569 insn->bits3.sampler_g4x.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
570 } else {
571 insn->bits3.sampler.binding_table_index = binding_table_index;
572 insn->bits3.sampler.sampler = sampler;
573 insn->bits3.sampler.msg_type = msg_type;
574 insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
575 insn->bits3.sampler.response_length = response_length;
576 insn->bits3.sampler.msg_length = msg_length;
577 insn->bits3.sampler.end_of_thread = eot;
578 insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
579 }
580 }
581
582
583
584 static struct brw_instruction *next_insn( struct brw_compile *p,
585 GLuint opcode )
586 {
587 struct brw_instruction *insn;
588
589 assert(p->nr_insn + 1 < BRW_EU_MAX_INSN);
590
591 insn = &p->store[p->nr_insn++];
592 memcpy(insn, p->current, sizeof(*insn));
593
594 /* Reset this one-shot flag:
595 */
596
597 if (p->current->header.destreg__conditionalmod) {
598 p->current->header.destreg__conditionalmod = 0;
599 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
600 }
601
602 insn->header.opcode = opcode;
603 return insn;
604 }
605
606
607 static struct brw_instruction *brw_alu1( struct brw_compile *p,
608 GLuint opcode,
609 struct brw_reg dest,
610 struct brw_reg src )
611 {
612 struct brw_instruction *insn = next_insn(p, opcode);
613 brw_set_dest(insn, dest);
614 brw_set_src0(insn, src);
615 return insn;
616 }
617
618 static struct brw_instruction *brw_alu2(struct brw_compile *p,
619 GLuint opcode,
620 struct brw_reg dest,
621 struct brw_reg src0,
622 struct brw_reg src1 )
623 {
624 struct brw_instruction *insn = next_insn(p, opcode);
625 brw_set_dest(insn, dest);
626 brw_set_src0(insn, src0);
627 brw_set_src1(insn, src1);
628 return insn;
629 }
630
631
632 /***********************************************************************
633 * Convenience routines.
634 */
635 #define ALU1(OP) \
636 struct brw_instruction *brw_##OP(struct brw_compile *p, \
637 struct brw_reg dest, \
638 struct brw_reg src0) \
639 { \
640 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
641 }
642
643 #define ALU2(OP) \
644 struct brw_instruction *brw_##OP(struct brw_compile *p, \
645 struct brw_reg dest, \
646 struct brw_reg src0, \
647 struct brw_reg src1) \
648 { \
649 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
650 }
651
652
653 ALU1(MOV)
654 ALU2(SEL)
655 ALU1(NOT)
656 ALU2(AND)
657 ALU2(OR)
658 ALU2(XOR)
659 ALU2(SHR)
660 ALU2(SHL)
661 ALU2(RSR)
662 ALU2(RSL)
663 ALU2(ASR)
664 ALU1(FRC)
665 ALU1(RNDD)
666 ALU1(RNDZ)
667 ALU2(MAC)
668 ALU2(MACH)
669 ALU1(LZD)
670 ALU2(DP4)
671 ALU2(DPH)
672 ALU2(DP3)
673 ALU2(DP2)
674 ALU2(LINE)
675 ALU2(PLN)
676
677 struct brw_instruction *brw_ADD(struct brw_compile *p,
678 struct brw_reg dest,
679 struct brw_reg src0,
680 struct brw_reg src1)
681 {
682 /* 6.2.2: add */
683 if (src0.type == BRW_REGISTER_TYPE_F ||
684 (src0.file == BRW_IMMEDIATE_VALUE &&
685 src0.type == BRW_REGISTER_TYPE_VF)) {
686 assert(src1.type != BRW_REGISTER_TYPE_UD);
687 assert(src1.type != BRW_REGISTER_TYPE_D);
688 }
689
690 if (src1.type == BRW_REGISTER_TYPE_F ||
691 (src1.file == BRW_IMMEDIATE_VALUE &&
692 src1.type == BRW_REGISTER_TYPE_VF)) {
693 assert(src0.type != BRW_REGISTER_TYPE_UD);
694 assert(src0.type != BRW_REGISTER_TYPE_D);
695 }
696
697 return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1);
698 }
699
700 struct brw_instruction *brw_MUL(struct brw_compile *p,
701 struct brw_reg dest,
702 struct brw_reg src0,
703 struct brw_reg src1)
704 {
705 /* 6.32.38: mul */
706 if (src0.type == BRW_REGISTER_TYPE_D ||
707 src0.type == BRW_REGISTER_TYPE_UD ||
708 src1.type == BRW_REGISTER_TYPE_D ||
709 src1.type == BRW_REGISTER_TYPE_UD) {
710 assert(dest.type != BRW_REGISTER_TYPE_F);
711 }
712
713 if (src0.type == BRW_REGISTER_TYPE_F ||
714 (src0.file == BRW_IMMEDIATE_VALUE &&
715 src0.type == BRW_REGISTER_TYPE_VF)) {
716 assert(src1.type != BRW_REGISTER_TYPE_UD);
717 assert(src1.type != BRW_REGISTER_TYPE_D);
718 }
719
720 if (src1.type == BRW_REGISTER_TYPE_F ||
721 (src1.file == BRW_IMMEDIATE_VALUE &&
722 src1.type == BRW_REGISTER_TYPE_VF)) {
723 assert(src0.type != BRW_REGISTER_TYPE_UD);
724 assert(src0.type != BRW_REGISTER_TYPE_D);
725 }
726
727 assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE ||
728 src0.nr != BRW_ARF_ACCUMULATOR);
729 assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE ||
730 src1.nr != BRW_ARF_ACCUMULATOR);
731
732 return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1);
733 }
734
735
736 void brw_NOP(struct brw_compile *p)
737 {
738 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);
739 brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
740 brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
741 brw_set_src1(insn, brw_imm_ud(0x0));
742 }
743
744
745
746
747
748 /***********************************************************************
749 * Comparisons, if/else/endif
750 */
751
752 struct brw_instruction *brw_JMPI(struct brw_compile *p,
753 struct brw_reg dest,
754 struct brw_reg src0,
755 struct brw_reg src1)
756 {
757 struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
758
759 insn->header.execution_size = 1;
760 insn->header.compression_control = BRW_COMPRESSION_NONE;
761 insn->header.mask_control = BRW_MASK_DISABLE;
762
763 p->current->header.predicate_control = BRW_PREDICATE_NONE;
764
765 return insn;
766 }
767
768 /* EU takes the value from the flag register and pushes it onto some
769 * sort of a stack (presumably merging with any flag value already on
770 * the stack). Within an if block, the flags at the top of the stack
771 * control execution on each channel of the unit, eg. on each of the
772 * 16 pixel values in our wm programs.
773 *
774 * When the matching 'else' instruction is reached (presumably by
775 * countdown of the instruction count patched in by our ELSE/ENDIF
776 * functions), the relevent flags are inverted.
777 *
778 * When the matching 'endif' instruction is reached, the flags are
779 * popped off. If the stack is now empty, normal execution resumes.
780 *
781 * No attempt is made to deal with stack overflow (14 elements?).
782 */
783 struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size)
784 {
785 struct brw_instruction *insn;
786
787 if (p->single_program_flow) {
788 assert(execute_size == BRW_EXECUTE_1);
789
790 insn = next_insn(p, BRW_OPCODE_ADD);
791 insn->header.predicate_inverse = 1;
792 } else {
793 insn = next_insn(p, BRW_OPCODE_IF);
794 }
795
796 /* Override the defaults for this instruction:
797 */
798 brw_set_dest(insn, brw_ip_reg());
799 brw_set_src0(insn, brw_ip_reg());
800 brw_set_src1(insn, brw_imm_d(0x0));
801
802 insn->header.execution_size = execute_size;
803 insn->header.compression_control = BRW_COMPRESSION_NONE;
804 insn->header.predicate_control = BRW_PREDICATE_NORMAL;
805 insn->header.mask_control = BRW_MASK_ENABLE;
806 if (!p->single_program_flow)
807 insn->header.thread_control = BRW_THREAD_SWITCH;
808
809 p->current->header.predicate_control = BRW_PREDICATE_NONE;
810
811 return insn;
812 }
813
814
815 struct brw_instruction *brw_ELSE(struct brw_compile *p,
816 struct brw_instruction *if_insn)
817 {
818 struct intel_context *intel = &p->brw->intel;
819 struct brw_instruction *insn;
820 GLuint br = 1;
821
822 if (intel->gen == 5)
823 br = 2;
824
825 if (p->single_program_flow) {
826 insn = next_insn(p, BRW_OPCODE_ADD);
827 } else {
828 insn = next_insn(p, BRW_OPCODE_ELSE);
829 }
830
831 brw_set_dest(insn, brw_ip_reg());
832 brw_set_src0(insn, brw_ip_reg());
833 brw_set_src1(insn, brw_imm_d(0x0));
834
835 insn->header.compression_control = BRW_COMPRESSION_NONE;
836 insn->header.execution_size = if_insn->header.execution_size;
837 insn->header.mask_control = BRW_MASK_ENABLE;
838 if (!p->single_program_flow)
839 insn->header.thread_control = BRW_THREAD_SWITCH;
840
841 /* Patch the if instruction to point at this instruction.
842 */
843 if (p->single_program_flow) {
844 assert(if_insn->header.opcode == BRW_OPCODE_ADD);
845
846 if_insn->bits3.ud = (insn - if_insn + 1) * 16;
847 } else {
848 assert(if_insn->header.opcode == BRW_OPCODE_IF);
849
850 if_insn->bits3.if_else.jump_count = br * (insn - if_insn);
851 if_insn->bits3.if_else.pop_count = 0;
852 if_insn->bits3.if_else.pad0 = 0;
853 }
854
855 return insn;
856 }
857
858 void brw_ENDIF(struct brw_compile *p,
859 struct brw_instruction *patch_insn)
860 {
861 struct intel_context *intel = &p->brw->intel;
862 GLuint br = 1;
863
864 if (intel->gen == 5)
865 br = 2;
866
867 if (p->single_program_flow) {
868 /* In single program flow mode, there's no need to execute an ENDIF,
869 * since we don't need to do any stack operations, and if we're executing
870 * currently, we want to just continue executing.
871 */
872 struct brw_instruction *next = &p->store[p->nr_insn];
873
874 assert(patch_insn->header.opcode == BRW_OPCODE_ADD);
875
876 patch_insn->bits3.ud = (next - patch_insn) * 16;
877 } else {
878 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF);
879
880 brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
881 brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
882 brw_set_src1(insn, brw_imm_d(0x0));
883
884 insn->header.compression_control = BRW_COMPRESSION_NONE;
885 insn->header.execution_size = patch_insn->header.execution_size;
886 insn->header.mask_control = BRW_MASK_ENABLE;
887 insn->header.thread_control = BRW_THREAD_SWITCH;
888
889 assert(patch_insn->bits3.if_else.jump_count == 0);
890
891 /* Patch the if or else instructions to point at this or the next
892 * instruction respectively.
893 */
894 if (patch_insn->header.opcode == BRW_OPCODE_IF) {
895 /* Automagically turn it into an IFF:
896 */
897 patch_insn->header.opcode = BRW_OPCODE_IFF;
898 patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1);
899 patch_insn->bits3.if_else.pop_count = 0;
900 patch_insn->bits3.if_else.pad0 = 0;
901 } else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) {
902 patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1);
903 patch_insn->bits3.if_else.pop_count = 1;
904 patch_insn->bits3.if_else.pad0 = 0;
905 } else {
906 assert(0);
907 }
908
909 /* Also pop item off the stack in the endif instruction:
910 */
911 insn->bits3.if_else.jump_count = 0;
912 insn->bits3.if_else.pop_count = 1;
913 insn->bits3.if_else.pad0 = 0;
914 }
915 }
916
917 struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count)
918 {
919 struct brw_instruction *insn;
920 insn = next_insn(p, BRW_OPCODE_BREAK);
921 brw_set_dest(insn, brw_ip_reg());
922 brw_set_src0(insn, brw_ip_reg());
923 brw_set_src1(insn, brw_imm_d(0x0));
924 insn->header.compression_control = BRW_COMPRESSION_NONE;
925 insn->header.execution_size = BRW_EXECUTE_8;
926 /* insn->header.mask_control = BRW_MASK_DISABLE; */
927 insn->bits3.if_else.pad0 = 0;
928 insn->bits3.if_else.pop_count = pop_count;
929 return insn;
930 }
931
932 struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count)
933 {
934 struct brw_instruction *insn;
935 insn = next_insn(p, BRW_OPCODE_CONTINUE);
936 brw_set_dest(insn, brw_ip_reg());
937 brw_set_src0(insn, brw_ip_reg());
938 brw_set_src1(insn, brw_imm_d(0x0));
939 insn->header.compression_control = BRW_COMPRESSION_NONE;
940 insn->header.execution_size = BRW_EXECUTE_8;
941 /* insn->header.mask_control = BRW_MASK_DISABLE; */
942 insn->bits3.if_else.pad0 = 0;
943 insn->bits3.if_else.pop_count = pop_count;
944 return insn;
945 }
946
947 /* DO/WHILE loop:
948 */
949 struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
950 {
951 if (p->single_program_flow) {
952 return &p->store[p->nr_insn];
953 } else {
954 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
955
956 /* Override the defaults for this instruction:
957 */
958 brw_set_dest(insn, brw_null_reg());
959 brw_set_src0(insn, brw_null_reg());
960 brw_set_src1(insn, brw_null_reg());
961
962 insn->header.compression_control = BRW_COMPRESSION_NONE;
963 insn->header.execution_size = execute_size;
964 insn->header.predicate_control = BRW_PREDICATE_NONE;
965 /* insn->header.mask_control = BRW_MASK_ENABLE; */
966 /* insn->header.mask_control = BRW_MASK_DISABLE; */
967
968 return insn;
969 }
970 }
971
972
973
974 struct brw_instruction *brw_WHILE(struct brw_compile *p,
975 struct brw_instruction *do_insn)
976 {
977 struct intel_context *intel = &p->brw->intel;
978 struct brw_instruction *insn;
979 GLuint br = 1;
980
981 if (intel->gen == 5)
982 br = 2;
983
984 if (p->single_program_flow)
985 insn = next_insn(p, BRW_OPCODE_ADD);
986 else
987 insn = next_insn(p, BRW_OPCODE_WHILE);
988
989 brw_set_dest(insn, brw_ip_reg());
990 brw_set_src0(insn, brw_ip_reg());
991 brw_set_src1(insn, brw_imm_d(0x0));
992
993 insn->header.compression_control = BRW_COMPRESSION_NONE;
994
995 if (p->single_program_flow) {
996 insn->header.execution_size = BRW_EXECUTE_1;
997
998 insn->bits3.d = (do_insn - insn) * 16;
999 } else {
1000 insn->header.execution_size = do_insn->header.execution_size;
1001
1002 assert(do_insn->header.opcode == BRW_OPCODE_DO);
1003 insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
1004 insn->bits3.if_else.pop_count = 0;
1005 insn->bits3.if_else.pad0 = 0;
1006 }
1007
1008 /* insn->header.mask_control = BRW_MASK_ENABLE; */
1009
1010 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1011 p->current->header.predicate_control = BRW_PREDICATE_NONE;
1012 return insn;
1013 }
1014
1015
1016 /* FORWARD JUMPS:
1017 */
1018 void brw_land_fwd_jump(struct brw_compile *p,
1019 struct brw_instruction *jmp_insn)
1020 {
1021 struct intel_context *intel = &p->brw->intel;
1022 struct brw_instruction *landing = &p->store[p->nr_insn];
1023 GLuint jmpi = 1;
1024
1025 if (intel->gen == 5)
1026 jmpi = 2;
1027
1028 assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
1029 assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE);
1030
1031 jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1);
1032 }
1033
1034
1035
1036 /* To integrate with the above, it makes sense that the comparison
1037 * instruction should populate the flag register. It might be simpler
1038 * just to use the flag reg for most WM tasks?
1039 */
1040 void brw_CMP(struct brw_compile *p,
1041 struct brw_reg dest,
1042 GLuint conditional,
1043 struct brw_reg src0,
1044 struct brw_reg src1)
1045 {
1046 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
1047
1048 insn->header.destreg__conditionalmod = conditional;
1049 brw_set_dest(insn, dest);
1050 brw_set_src0(insn, src0);
1051 brw_set_src1(insn, src1);
1052
1053 /* guess_execution_size(insn, src0); */
1054
1055
1056 /* Make it so that future instructions will use the computed flag
1057 * value until brw_set_predicate_control_flag_value() is called
1058 * again.
1059 */
1060 if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
1061 dest.nr == 0) {
1062 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
1063 p->flag_value = 0xff;
1064 }
1065 }
1066
1067 /* Issue 'wait' instruction for n1, host could program MMIO
1068 to wake up thread. */
1069 void brw_WAIT (struct brw_compile *p)
1070 {
1071 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_WAIT);
1072 struct brw_reg src = brw_notification_1_reg();
1073
1074 brw_set_dest(insn, src);
1075 brw_set_src0(insn, src);
1076 brw_set_src1(insn, brw_null_reg());
1077 insn->header.execution_size = 0; /* must */
1078 insn->header.predicate_control = 0;
1079 insn->header.compression_control = 0;
1080 }
1081
1082
1083 /***********************************************************************
1084 * Helpers for the various SEND message types:
1085 */
1086
1087 /** Extended math function, float[8].
1088 */
1089 void brw_math( struct brw_compile *p,
1090 struct brw_reg dest,
1091 GLuint function,
1092 GLuint saturate,
1093 GLuint msg_reg_nr,
1094 struct brw_reg src,
1095 GLuint data_type,
1096 GLuint precision )
1097 {
1098 struct intel_context *intel = &p->brw->intel;
1099
1100 if (intel->gen >= 6) {
1101 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
1102
1103 /* Math is the same ISA format as other opcodes, except that CondModifier
1104 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1105 */
1106 insn->header.destreg__conditionalmod = function;
1107
1108 brw_set_dest(insn, dest);
1109 brw_set_src0(insn, src);
1110 brw_set_src1(insn, brw_null_reg());
1111 } else {
1112 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1113 GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
1114 GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
1115 /* Example code doesn't set predicate_control for send
1116 * instructions.
1117 */
1118 insn->header.predicate_control = 0;
1119 insn->header.destreg__conditionalmod = msg_reg_nr;
1120
1121 brw_set_dest(insn, dest);
1122 brw_set_src0(insn, src);
1123 brw_set_math_message(p->brw,
1124 insn,
1125 msg_length, response_length,
1126 function,
1127 BRW_MATH_INTEGER_UNSIGNED,
1128 precision,
1129 saturate,
1130 data_type);
1131 }
1132 }
1133
1134 /** Extended math function, float[8].
1135 */
1136 void brw_math2(struct brw_compile *p,
1137 struct brw_reg dest,
1138 GLuint function,
1139 struct brw_reg src0,
1140 struct brw_reg src1)
1141 {
1142 struct intel_context *intel = &p->brw->intel;
1143 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
1144
1145 assert(intel->gen >= 6);
1146
1147 /* Math is the same ISA format as other opcodes, except that CondModifier
1148 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1149 */
1150 insn->header.destreg__conditionalmod = function;
1151
1152 brw_set_dest(insn, dest);
1153 brw_set_src0(insn, src0);
1154 brw_set_src1(insn, src1);
1155 }
1156
1157 /**
1158 * Extended math function, float[16].
1159 * Use 2 send instructions.
1160 */
1161 void brw_math_16( struct brw_compile *p,
1162 struct brw_reg dest,
1163 GLuint function,
1164 GLuint saturate,
1165 GLuint msg_reg_nr,
1166 struct brw_reg src,
1167 GLuint precision )
1168 {
1169 struct intel_context *intel = &p->brw->intel;
1170 struct brw_instruction *insn;
1171 GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
1172 GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
1173
1174 if (intel->gen >= 6) {
1175 insn = next_insn(p, BRW_OPCODE_MATH);
1176
1177 /* Math is the same ISA format as other opcodes, except that CondModifier
1178 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1179 */
1180 insn->header.destreg__conditionalmod = function;
1181
1182 brw_set_dest(insn, dest);
1183 brw_set_src0(insn, src);
1184 brw_set_src1(insn, brw_null_reg());
1185 return;
1186 }
1187
1188 /* First instruction:
1189 */
1190 brw_push_insn_state(p);
1191 brw_set_predicate_control_flag_value(p, 0xff);
1192 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1193
1194 insn = next_insn(p, BRW_OPCODE_SEND);
1195 insn->header.destreg__conditionalmod = msg_reg_nr;
1196
1197 brw_set_dest(insn, dest);
1198 brw_set_src0(insn, src);
1199 brw_set_math_message(p->brw,
1200 insn,
1201 msg_length, response_length,
1202 function,
1203 BRW_MATH_INTEGER_UNSIGNED,
1204 precision,
1205 saturate,
1206 BRW_MATH_DATA_VECTOR);
1207
1208 /* Second instruction:
1209 */
1210 insn = next_insn(p, BRW_OPCODE_SEND);
1211 insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
1212 insn->header.destreg__conditionalmod = msg_reg_nr+1;
1213
1214 brw_set_dest(insn, offset(dest,1));
1215 brw_set_src0(insn, src);
1216 brw_set_math_message(p->brw,
1217 insn,
1218 msg_length, response_length,
1219 function,
1220 BRW_MATH_INTEGER_UNSIGNED,
1221 precision,
1222 saturate,
1223 BRW_MATH_DATA_VECTOR);
1224
1225 brw_pop_insn_state(p);
1226 }
1227
1228
1229 /**
1230 * Write block of 16 dwords/floats to the data port Render Cache scratch buffer.
1231 * Scratch offset should be a multiple of 64.
1232 * Used for register spilling.
1233 */
1234 void brw_dp_WRITE_16( struct brw_compile *p,
1235 struct brw_reg src,
1236 GLuint scratch_offset )
1237 {
1238 struct intel_context *intel = &p->brw->intel;
1239 GLuint msg_reg_nr = 1;
1240 {
1241 brw_push_insn_state(p);
1242 brw_set_mask_control(p, BRW_MASK_DISABLE);
1243 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1244
1245 /* set message header global offset field (reg 0, element 2) */
1246 brw_MOV(p,
1247 retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
1248 brw_imm_d(scratch_offset));
1249
1250 brw_pop_insn_state(p);
1251 }
1252
1253 {
1254 GLuint msg_length = 3;
1255 struct brw_reg dest;
1256 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1257 int send_commit_msg;
1258
1259 insn->header.predicate_control = 0; /* XXX */
1260 insn->header.compression_control = BRW_COMPRESSION_NONE;
1261 insn->header.destreg__conditionalmod = msg_reg_nr;
1262
1263 /* Until gen6, writes followed by reads from the same location
1264 * are not guaranteed to be ordered unless write_commit is set.
1265 * If set, then a no-op write is issued to the destination
1266 * register to set a dependency, and a read from the destination
1267 * can be used to ensure the ordering.
1268 *
1269 * For gen6, only writes between different threads need ordering
1270 * protection. Our use of DP writes is all about register
1271 * spilling within a thread.
1272 */
1273 if (intel->gen >= 6) {
1274 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
1275 send_commit_msg = 0;
1276 } else {
1277 dest = brw_uw16_grf(0, 0);
1278 send_commit_msg = 1;
1279 }
1280
1281 brw_set_dest(insn, dest);
1282 brw_set_src0(insn, src);
1283
1284 brw_set_dp_write_message(p->brw,
1285 insn,
1286 255, /* binding table index (255=stateless) */
1287 BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */
1288 BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */
1289 msg_length,
1290 0, /* pixel scoreboard */
1291 send_commit_msg, /* response_length */
1292 0, /* eot */
1293 send_commit_msg);
1294 }
1295 }
1296
1297
1298 /**
1299 * Read block of 16 dwords/floats from the data port Render Cache scratch buffer.
1300 * Scratch offset should be a multiple of 64.
1301 * Used for register spilling.
1302 */
1303 void brw_dp_READ_16( struct brw_compile *p,
1304 struct brw_reg dest,
1305 GLuint scratch_offset )
1306 {
1307 GLuint msg_reg_nr = 1;
1308 {
1309 brw_push_insn_state(p);
1310 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1311 brw_set_mask_control(p, BRW_MASK_DISABLE);
1312
1313 /* set message header global offset field (reg 0, element 2) */
1314 brw_MOV(p,
1315 retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
1316 brw_imm_d(scratch_offset));
1317
1318 brw_pop_insn_state(p);
1319 }
1320
1321 {
1322 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1323
1324 insn->header.predicate_control = 0; /* XXX */
1325 insn->header.compression_control = BRW_COMPRESSION_NONE;
1326 insn->header.destreg__conditionalmod = msg_reg_nr;
1327
1328 brw_set_dest(insn, dest); /* UW? */
1329 brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW));
1330
1331 brw_set_dp_read_message(p->brw,
1332 insn,
1333 255, /* binding table index (255=stateless) */
1334 BRW_DATAPORT_OWORD_BLOCK_4_OWORDS,
1335 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1336 1, /* target cache (render/scratch) */
1337 1, /* msg_length */
1338 2, /* response_length */
1339 0); /* eot */
1340 }
1341 }
1342
1343
1344 /**
1345 * Read a float[4] vector from the data port Data Cache (const buffer).
1346 * Location (in buffer) should be a multiple of 16.
1347 * Used for fetching shader constants.
1348 * If relAddr is true, we'll do an indirect fetch using the address register.
1349 */
1350 void brw_dp_READ_4( struct brw_compile *p,
1351 struct brw_reg dest,
1352 GLboolean relAddr,
1353 GLuint location,
1354 GLuint bind_table_index )
1355 {
1356 /* XXX: relAddr not implemented */
1357 GLuint msg_reg_nr = 1;
1358 {
1359 struct brw_reg b;
1360 brw_push_insn_state(p);
1361 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1362 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1363 brw_set_mask_control(p, BRW_MASK_DISABLE);
1364
1365 /* Setup MRF[1] with location/offset into const buffer */
1366 b = brw_message_reg(msg_reg_nr);
1367 b = retype(b, BRW_REGISTER_TYPE_UD);
1368 /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
1369 * when the docs say only dword[2] should be set. Hmmm. But it works.
1370 */
1371 brw_MOV(p, b, brw_imm_ud(location));
1372 brw_pop_insn_state(p);
1373 }
1374
1375 {
1376 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1377
1378 insn->header.predicate_control = BRW_PREDICATE_NONE;
1379 insn->header.compression_control = BRW_COMPRESSION_NONE;
1380 insn->header.destreg__conditionalmod = msg_reg_nr;
1381 insn->header.mask_control = BRW_MASK_DISABLE;
1382
1383 /* cast dest to a uword[8] vector */
1384 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
1385
1386 brw_set_dest(insn, dest);
1387 brw_set_src0(insn, brw_null_reg());
1388
1389 brw_set_dp_read_message(p->brw,
1390 insn,
1391 bind_table_index,
1392 0, /* msg_control (0 means 1 Oword) */
1393 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1394 0, /* source cache = data cache */
1395 1, /* msg_length */
1396 1, /* response_length (1 Oword) */
1397 0); /* eot */
1398 }
1399 }
1400
1401
1402 /**
1403 * Read float[4] constant(s) from VS constant buffer.
1404 * For relative addressing, two float[4] constants will be read into 'dest'.
1405 * Otherwise, one float[4] constant will be read into the lower half of 'dest'.
1406 */
1407 void brw_dp_READ_4_vs(struct brw_compile *p,
1408 struct brw_reg dest,
1409 GLuint location,
1410 GLuint bind_table_index)
1411 {
1412 struct brw_instruction *insn;
1413 GLuint msg_reg_nr = 1;
1414 struct brw_reg b;
1415
1416 /*
1417 printf("vs const read msg, location %u, msg_reg_nr %d\n",
1418 location, msg_reg_nr);
1419 */
1420
1421 /* Setup MRF[1] with location/offset into const buffer */
1422 brw_push_insn_state(p);
1423 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1424 brw_set_mask_control(p, BRW_MASK_DISABLE);
1425 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1426
1427 /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
1428 * when the docs say only dword[2] should be set. Hmmm. But it works.
1429 */
1430 b = brw_message_reg(msg_reg_nr);
1431 b = retype(b, BRW_REGISTER_TYPE_UD);
1432 /*b = get_element_ud(b, 2);*/
1433 brw_MOV(p, b, brw_imm_ud(location));
1434
1435 brw_pop_insn_state(p);
1436
1437 insn = next_insn(p, BRW_OPCODE_SEND);
1438
1439 insn->header.predicate_control = BRW_PREDICATE_NONE;
1440 insn->header.compression_control = BRW_COMPRESSION_NONE;
1441 insn->header.destreg__conditionalmod = msg_reg_nr;
1442 insn->header.mask_control = BRW_MASK_DISABLE;
1443
1444 brw_set_dest(insn, dest);
1445 brw_set_src0(insn, brw_null_reg());
1446
1447 brw_set_dp_read_message(p->brw,
1448 insn,
1449 bind_table_index,
1450 0,
1451 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1452 0, /* source cache = data cache */
1453 1, /* msg_length */
1454 1, /* response_length (1 Oword) */
1455 0); /* eot */
1456 }
1457
1458 /**
1459 * Read a float[4] constant per vertex from VS constant buffer, with
1460 * relative addressing.
1461 */
1462 void brw_dp_READ_4_vs_relative(struct brw_compile *p,
1463 struct brw_reg dest,
1464 struct brw_reg addr_reg,
1465 GLuint offset,
1466 GLuint bind_table_index)
1467 {
1468 struct intel_context *intel = &p->brw->intel;
1469 int msg_type;
1470
1471 /* Setup MRF[1] with offset into const buffer */
1472 brw_push_insn_state(p);
1473 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1474 brw_set_mask_control(p, BRW_MASK_DISABLE);
1475 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1476
1477 /* M1.0 is block offset 0, M1.4 is block offset 1, all other
1478 * fields ignored.
1479 */
1480 brw_ADD(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD),
1481 addr_reg, brw_imm_d(offset));
1482 brw_pop_insn_state(p);
1483
1484 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1485
1486 insn->header.predicate_control = BRW_PREDICATE_NONE;
1487 insn->header.compression_control = BRW_COMPRESSION_NONE;
1488 insn->header.destreg__conditionalmod = 0;
1489 insn->header.mask_control = BRW_MASK_DISABLE;
1490
1491 brw_set_dest(insn, dest);
1492 brw_set_src0(insn, brw_vec8_grf(0, 0));
1493
1494 if (intel->gen == 6)
1495 msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
1496 else if (intel->gen == 5 || intel->is_g4x)
1497 msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
1498 else
1499 msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
1500
1501 brw_set_dp_read_message(p->brw,
1502 insn,
1503 bind_table_index,
1504 BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
1505 msg_type,
1506 0, /* source cache = data cache */
1507 2, /* msg_length */
1508 1, /* response_length */
1509 0); /* eot */
1510 }
1511
1512
1513
1514 void brw_fb_WRITE(struct brw_compile *p,
1515 int dispatch_width,
1516 struct brw_reg dest,
1517 GLuint msg_reg_nr,
1518 struct brw_reg src0,
1519 GLuint binding_table_index,
1520 GLuint msg_length,
1521 GLuint response_length,
1522 GLboolean eot)
1523 {
1524 struct intel_context *intel = &p->brw->intel;
1525 struct brw_instruction *insn;
1526 GLuint msg_control, msg_type;
1527
1528 insn = next_insn(p, BRW_OPCODE_SEND);
1529 insn->header.predicate_control = 0; /* XXX */
1530 insn->header.compression_control = BRW_COMPRESSION_NONE;
1531
1532 if (intel->gen >= 6) {
1533 /* headerless version, just submit color payload */
1534 src0 = brw_message_reg(msg_reg_nr);
1535
1536 msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE_GEN6;
1537 } else {
1538 insn->header.destreg__conditionalmod = msg_reg_nr;
1539
1540 msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
1541 }
1542
1543 if (dispatch_width == 16)
1544 msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
1545 else
1546 msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01;
1547
1548 brw_set_dest(insn, dest);
1549 brw_set_src0(insn, src0);
1550 brw_set_dp_write_message(p->brw,
1551 insn,
1552 binding_table_index,
1553 msg_control,
1554 msg_type,
1555 msg_length,
1556 1, /* pixel scoreboard */
1557 response_length,
1558 eot,
1559 0 /* send_commit_msg */);
1560 }
1561
1562
1563 /**
1564 * Texture sample instruction.
1565 * Note: the msg_type plus msg_length values determine exactly what kind
1566 * of sampling operation is performed. See volume 4, page 161 of docs.
1567 */
1568 void brw_SAMPLE(struct brw_compile *p,
1569 struct brw_reg dest,
1570 GLuint msg_reg_nr,
1571 struct brw_reg src0,
1572 GLuint binding_table_index,
1573 GLuint sampler,
1574 GLuint writemask,
1575 GLuint msg_type,
1576 GLuint response_length,
1577 GLuint msg_length,
1578 GLboolean eot,
1579 GLuint header_present,
1580 GLuint simd_mode)
1581 {
1582 GLboolean need_stall = 0;
1583
1584 if (writemask == 0) {
1585 /*printf("%s: zero writemask??\n", __FUNCTION__); */
1586 return;
1587 }
1588
1589 /* Hardware doesn't do destination dependency checking on send
1590 * instructions properly. Add a workaround which generates the
1591 * dependency by other means. In practice it seems like this bug
1592 * only crops up for texture samples, and only where registers are
1593 * written by the send and then written again later without being
1594 * read in between. Luckily for us, we already track that
1595 * information and use it to modify the writemask for the
1596 * instruction, so that is a guide for whether a workaround is
1597 * needed.
1598 */
1599 if (writemask != WRITEMASK_XYZW) {
1600 GLuint dst_offset = 0;
1601 GLuint i, newmask = 0, len = 0;
1602
1603 for (i = 0; i < 4; i++) {
1604 if (writemask & (1<<i))
1605 break;
1606 dst_offset += 2;
1607 }
1608 for (; i < 4; i++) {
1609 if (!(writemask & (1<<i)))
1610 break;
1611 newmask |= 1<<i;
1612 len++;
1613 }
1614
1615 if (newmask != writemask) {
1616 need_stall = 1;
1617 /* printf("need stall %x %x\n", newmask , writemask); */
1618 }
1619 else {
1620 GLboolean dispatch_16 = GL_FALSE;
1621
1622 struct brw_reg m1 = brw_message_reg(msg_reg_nr);
1623
1624 guess_execution_size(p->current, dest);
1625 if (p->current->header.execution_size == BRW_EXECUTE_16)
1626 dispatch_16 = GL_TRUE;
1627
1628 newmask = ~newmask & WRITEMASK_XYZW;
1629
1630 brw_push_insn_state(p);
1631
1632 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1633 brw_set_mask_control(p, BRW_MASK_DISABLE);
1634
1635 brw_MOV(p, m1, brw_vec8_grf(0,0));
1636 brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12));
1637
1638 brw_pop_insn_state(p);
1639
1640 src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
1641 dest = offset(dest, dst_offset);
1642
1643 /* For 16-wide dispatch, masked channels are skipped in the
1644 * response. For 8-wide, masked channels still take up slots,
1645 * and are just not written to.
1646 */
1647 if (dispatch_16)
1648 response_length = len * 2;
1649 }
1650 }
1651
1652 {
1653 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1654
1655 insn->header.predicate_control = 0; /* XXX */
1656 insn->header.compression_control = BRW_COMPRESSION_NONE;
1657 insn->header.destreg__conditionalmod = msg_reg_nr;
1658
1659 brw_set_dest(insn, dest);
1660 brw_set_src0(insn, src0);
1661 brw_set_sampler_message(p->brw, insn,
1662 binding_table_index,
1663 sampler,
1664 msg_type,
1665 response_length,
1666 msg_length,
1667 eot,
1668 header_present,
1669 simd_mode);
1670 }
1671
1672 if (need_stall) {
1673 struct brw_reg reg = vec8(offset(dest, response_length-1));
1674
1675 /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 }
1676 */
1677 brw_push_insn_state(p);
1678 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1679 brw_MOV(p, reg, reg);
1680 brw_pop_insn_state(p);
1681 }
1682
1683 }
1684
1685 /* All these variables are pretty confusing - we might be better off
1686 * using bitmasks and macros for this, in the old style. Or perhaps
1687 * just having the caller instantiate the fields in dword3 itself.
1688 */
1689 void brw_urb_WRITE(struct brw_compile *p,
1690 struct brw_reg dest,
1691 GLuint msg_reg_nr,
1692 struct brw_reg src0,
1693 GLboolean allocate,
1694 GLboolean used,
1695 GLuint msg_length,
1696 GLuint response_length,
1697 GLboolean eot,
1698 GLboolean writes_complete,
1699 GLuint offset,
1700 GLuint swizzle)
1701 {
1702 struct intel_context *intel = &p->brw->intel;
1703 struct brw_instruction *insn;
1704
1705 /* Sandybridge doesn't have the implied move for SENDs,
1706 * and the first message register index comes from src0.
1707 */
1708 if (intel->gen >= 6) {
1709 brw_push_insn_state(p);
1710 brw_set_mask_control( p, BRW_MASK_DISABLE );
1711 brw_MOV(p, brw_message_reg(msg_reg_nr), src0);
1712 brw_pop_insn_state(p);
1713 src0 = brw_message_reg(msg_reg_nr);
1714 }
1715
1716 insn = next_insn(p, BRW_OPCODE_SEND);
1717
1718 assert(msg_length < BRW_MAX_MRF);
1719
1720 brw_set_dest(insn, dest);
1721 brw_set_src0(insn, src0);
1722 brw_set_src1(insn, brw_imm_d(0));
1723
1724 if (intel->gen < 6)
1725 insn->header.destreg__conditionalmod = msg_reg_nr;
1726
1727 brw_set_urb_message(p->brw,
1728 insn,
1729 allocate,
1730 used,
1731 msg_length,
1732 response_length,
1733 eot,
1734 writes_complete,
1735 offset,
1736 swizzle);
1737 }
1738
1739 void brw_ff_sync(struct brw_compile *p,
1740 struct brw_reg dest,
1741 GLuint msg_reg_nr,
1742 struct brw_reg src0,
1743 GLboolean allocate,
1744 GLuint response_length,
1745 GLboolean eot)
1746 {
1747 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1748
1749 brw_set_dest(insn, dest);
1750 brw_set_src0(insn, src0);
1751 brw_set_src1(insn, brw_imm_d(0));
1752
1753 insn->header.destreg__conditionalmod = msg_reg_nr;
1754
1755 brw_set_ff_sync_message(p->brw,
1756 insn,
1757 allocate,
1758 response_length,
1759 eot);
1760 }