2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keith@tungstengraphics.com>
33 #include "brw_context.h"
34 #include "brw_defines.h"
40 /***********************************************************************
41 * Internal helper for constructing instructions
44 static void guess_execution_size( struct brw_instruction
*insn
,
47 if (reg
.width
== BRW_WIDTH_8
&&
48 insn
->header
.compression_control
== BRW_COMPRESSION_COMPRESSED
)
49 insn
->header
.execution_size
= BRW_EXECUTE_16
;
51 insn
->header
.execution_size
= reg
.width
; /* note - definitions are compatible */
55 static void brw_set_dest( struct brw_instruction
*insn
,
58 if (dest
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
&&
59 dest
.file
!= BRW_MESSAGE_REGISTER_FILE
)
60 assert(dest
.nr
< 128);
62 insn
->bits1
.da1
.dest_reg_file
= dest
.file
;
63 insn
->bits1
.da1
.dest_reg_type
= dest
.type
;
64 insn
->bits1
.da1
.dest_address_mode
= dest
.address_mode
;
66 if (dest
.address_mode
== BRW_ADDRESS_DIRECT
) {
67 insn
->bits1
.da1
.dest_reg_nr
= dest
.nr
;
69 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
70 insn
->bits1
.da1
.dest_subreg_nr
= dest
.subnr
;
71 if (dest
.hstride
== BRW_HORIZONTAL_STRIDE_0
)
72 dest
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
73 insn
->bits1
.da1
.dest_horiz_stride
= dest
.hstride
;
76 insn
->bits1
.da16
.dest_subreg_nr
= dest
.subnr
/ 16;
77 insn
->bits1
.da16
.dest_writemask
= dest
.dw1
.bits
.writemask
;
81 insn
->bits1
.ia1
.dest_subreg_nr
= dest
.subnr
;
83 /* These are different sizes in align1 vs align16:
85 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
86 insn
->bits1
.ia1
.dest_indirect_offset
= dest
.dw1
.bits
.indirect_offset
;
87 if (dest
.hstride
== BRW_HORIZONTAL_STRIDE_0
)
88 dest
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
89 insn
->bits1
.ia1
.dest_horiz_stride
= dest
.hstride
;
92 insn
->bits1
.ia16
.dest_indirect_offset
= dest
.dw1
.bits
.indirect_offset
;
96 /* NEW: Set the execution size based on dest.width and
97 * insn->compression_control:
99 guess_execution_size(insn
, dest
);
102 static void brw_set_src0( struct brw_instruction
*insn
,
105 if (reg
.type
!= BRW_ARCHITECTURE_REGISTER_FILE
)
106 assert(reg
.nr
< 128);
108 insn
->bits1
.da1
.src0_reg_file
= reg
.file
;
109 insn
->bits1
.da1
.src0_reg_type
= reg
.type
;
110 insn
->bits2
.da1
.src0_abs
= reg
.abs
;
111 insn
->bits2
.da1
.src0_negate
= reg
.negate
;
112 insn
->bits2
.da1
.src0_address_mode
= reg
.address_mode
;
114 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
115 insn
->bits3
.ud
= reg
.dw1
.ud
;
117 /* Required to set some fields in src1 as well:
119 insn
->bits1
.da1
.src1_reg_file
= 0; /* arf */
120 insn
->bits1
.da1
.src1_reg_type
= reg
.type
;
124 if (reg
.address_mode
== BRW_ADDRESS_DIRECT
) {
125 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
126 insn
->bits2
.da1
.src0_subreg_nr
= reg
.subnr
;
127 insn
->bits2
.da1
.src0_reg_nr
= reg
.nr
;
130 insn
->bits2
.da16
.src0_subreg_nr
= reg
.subnr
/ 16;
131 insn
->bits2
.da16
.src0_reg_nr
= reg
.nr
;
135 insn
->bits2
.ia1
.src0_subreg_nr
= reg
.subnr
;
137 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
138 insn
->bits2
.ia1
.src0_indirect_offset
= reg
.dw1
.bits
.indirect_offset
;
141 insn
->bits2
.ia16
.src0_subreg_nr
= reg
.dw1
.bits
.indirect_offset
;
145 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
146 if (reg
.width
== BRW_WIDTH_1
&&
147 insn
->header
.execution_size
== BRW_EXECUTE_1
) {
148 insn
->bits2
.da1
.src0_horiz_stride
= BRW_HORIZONTAL_STRIDE_0
;
149 insn
->bits2
.da1
.src0_width
= BRW_WIDTH_1
;
150 insn
->bits2
.da1
.src0_vert_stride
= BRW_VERTICAL_STRIDE_0
;
153 insn
->bits2
.da1
.src0_horiz_stride
= reg
.hstride
;
154 insn
->bits2
.da1
.src0_width
= reg
.width
;
155 insn
->bits2
.da1
.src0_vert_stride
= reg
.vstride
;
159 insn
->bits2
.da16
.src0_swz_x
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_X
);
160 insn
->bits2
.da16
.src0_swz_y
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Y
);
161 insn
->bits2
.da16
.src0_swz_z
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Z
);
162 insn
->bits2
.da16
.src0_swz_w
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_W
);
164 /* This is an oddity of the fact we're using the same
165 * descriptions for registers in align_16 as align_1:
167 if (reg
.vstride
== BRW_VERTICAL_STRIDE_8
)
168 insn
->bits2
.da16
.src0_vert_stride
= BRW_VERTICAL_STRIDE_4
;
170 insn
->bits2
.da16
.src0_vert_stride
= reg
.vstride
;
176 void brw_set_src1( struct brw_instruction
*insn
,
179 assert(reg
.file
!= BRW_MESSAGE_REGISTER_FILE
);
181 assert(reg
.nr
< 128);
183 insn
->bits1
.da1
.src1_reg_file
= reg
.file
;
184 insn
->bits1
.da1
.src1_reg_type
= reg
.type
;
185 insn
->bits3
.da1
.src1_abs
= reg
.abs
;
186 insn
->bits3
.da1
.src1_negate
= reg
.negate
;
188 /* Only src1 can be immediate in two-argument instructions.
190 assert(insn
->bits1
.da1
.src0_reg_file
!= BRW_IMMEDIATE_VALUE
);
192 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
193 insn
->bits3
.ud
= reg
.dw1
.ud
;
196 /* This is a hardware restriction, which may or may not be lifted
199 assert (reg
.address_mode
== BRW_ADDRESS_DIRECT
);
200 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
202 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
203 insn
->bits3
.da1
.src1_subreg_nr
= reg
.subnr
;
204 insn
->bits3
.da1
.src1_reg_nr
= reg
.nr
;
207 insn
->bits3
.da16
.src1_subreg_nr
= reg
.subnr
/ 16;
208 insn
->bits3
.da16
.src1_reg_nr
= reg
.nr
;
211 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
212 if (reg
.width
== BRW_WIDTH_1
&&
213 insn
->header
.execution_size
== BRW_EXECUTE_1
) {
214 insn
->bits3
.da1
.src1_horiz_stride
= BRW_HORIZONTAL_STRIDE_0
;
215 insn
->bits3
.da1
.src1_width
= BRW_WIDTH_1
;
216 insn
->bits3
.da1
.src1_vert_stride
= BRW_VERTICAL_STRIDE_0
;
219 insn
->bits3
.da1
.src1_horiz_stride
= reg
.hstride
;
220 insn
->bits3
.da1
.src1_width
= reg
.width
;
221 insn
->bits3
.da1
.src1_vert_stride
= reg
.vstride
;
225 insn
->bits3
.da16
.src1_swz_x
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_X
);
226 insn
->bits3
.da16
.src1_swz_y
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Y
);
227 insn
->bits3
.da16
.src1_swz_z
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Z
);
228 insn
->bits3
.da16
.src1_swz_w
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_W
);
230 /* This is an oddity of the fact we're using the same
231 * descriptions for registers in align_16 as align_1:
233 if (reg
.vstride
== BRW_VERTICAL_STRIDE_8
)
234 insn
->bits3
.da16
.src1_vert_stride
= BRW_VERTICAL_STRIDE_4
;
236 insn
->bits3
.da16
.src1_vert_stride
= reg
.vstride
;
243 static void brw_set_math_message( struct brw_context
*brw
,
244 struct brw_instruction
*insn
,
246 GLuint response_length
,
249 GLboolean low_precision
,
253 struct intel_context
*intel
= &brw
->intel
;
254 brw_set_src1(insn
, brw_imm_d(0));
256 if (intel
->gen
== 5) {
257 insn
->bits3
.math_gen5
.function
= function
;
258 insn
->bits3
.math_gen5
.int_type
= integer_type
;
259 insn
->bits3
.math_gen5
.precision
= low_precision
;
260 insn
->bits3
.math_gen5
.saturate
= saturate
;
261 insn
->bits3
.math_gen5
.data_type
= dataType
;
262 insn
->bits3
.math_gen5
.snapshot
= 0;
263 insn
->bits3
.math_gen5
.header_present
= 0;
264 insn
->bits3
.math_gen5
.response_length
= response_length
;
265 insn
->bits3
.math_gen5
.msg_length
= msg_length
;
266 insn
->bits3
.math_gen5
.end_of_thread
= 0;
267 insn
->bits2
.send_gen5
.sfid
= BRW_MESSAGE_TARGET_MATH
;
268 insn
->bits2
.send_gen5
.end_of_thread
= 0;
270 insn
->bits3
.math
.function
= function
;
271 insn
->bits3
.math
.int_type
= integer_type
;
272 insn
->bits3
.math
.precision
= low_precision
;
273 insn
->bits3
.math
.saturate
= saturate
;
274 insn
->bits3
.math
.data_type
= dataType
;
275 insn
->bits3
.math
.response_length
= response_length
;
276 insn
->bits3
.math
.msg_length
= msg_length
;
277 insn
->bits3
.math
.msg_target
= BRW_MESSAGE_TARGET_MATH
;
278 insn
->bits3
.math
.end_of_thread
= 0;
283 static void brw_set_ff_sync_message(struct brw_context
*brw
,
284 struct brw_instruction
*insn
,
286 GLuint response_length
,
287 GLboolean end_of_thread
)
289 struct intel_context
*intel
= &brw
->intel
;
290 brw_set_src1(insn
, brw_imm_d(0));
292 insn
->bits3
.urb_gen5
.opcode
= 1; /* FF_SYNC */
293 insn
->bits3
.urb_gen5
.offset
= 0; /* Not used by FF_SYNC */
294 insn
->bits3
.urb_gen5
.swizzle_control
= 0; /* Not used by FF_SYNC */
295 insn
->bits3
.urb_gen5
.allocate
= allocate
;
296 insn
->bits3
.urb_gen5
.used
= 0; /* Not used by FF_SYNC */
297 insn
->bits3
.urb_gen5
.complete
= 0; /* Not used by FF_SYNC */
298 insn
->bits3
.urb_gen5
.header_present
= 1;
299 insn
->bits3
.urb_gen5
.response_length
= response_length
; /* may be 1 or 0 */
300 insn
->bits3
.urb_gen5
.msg_length
= 1;
301 insn
->bits3
.urb_gen5
.end_of_thread
= end_of_thread
;
302 if (intel
->gen
>= 6) {
303 insn
->header
.destreg__conditionalmod
= BRW_MESSAGE_TARGET_URB
;
305 insn
->bits2
.send_gen5
.sfid
= BRW_MESSAGE_TARGET_URB
;
306 insn
->bits2
.send_gen5
.end_of_thread
= end_of_thread
;
310 static void brw_set_urb_message( struct brw_context
*brw
,
311 struct brw_instruction
*insn
,
315 GLuint response_length
,
316 GLboolean end_of_thread
,
319 GLuint swizzle_control
)
321 struct intel_context
*intel
= &brw
->intel
;
322 brw_set_src1(insn
, brw_imm_d(0));
324 if (intel
->gen
>= 5) {
325 insn
->bits3
.urb_gen5
.opcode
= 0; /* ? */
326 insn
->bits3
.urb_gen5
.offset
= offset
;
327 insn
->bits3
.urb_gen5
.swizzle_control
= swizzle_control
;
328 insn
->bits3
.urb_gen5
.allocate
= allocate
;
329 insn
->bits3
.urb_gen5
.used
= used
; /* ? */
330 insn
->bits3
.urb_gen5
.complete
= complete
;
331 insn
->bits3
.urb_gen5
.header_present
= 1;
332 insn
->bits3
.urb_gen5
.response_length
= response_length
;
333 insn
->bits3
.urb_gen5
.msg_length
= msg_length
;
334 insn
->bits3
.urb_gen5
.end_of_thread
= end_of_thread
;
335 if (intel
->gen
>= 6) {
336 /* For SNB, the SFID bits moved to the condmod bits, and
337 * EOT stayed in bits3 above. Does the EOT bit setting
338 * below on Ironlake even do anything?
340 insn
->header
.destreg__conditionalmod
= BRW_MESSAGE_TARGET_URB
;
342 insn
->bits2
.send_gen5
.sfid
= BRW_MESSAGE_TARGET_URB
;
343 insn
->bits2
.send_gen5
.end_of_thread
= end_of_thread
;
346 insn
->bits3
.urb
.opcode
= 0; /* ? */
347 insn
->bits3
.urb
.offset
= offset
;
348 insn
->bits3
.urb
.swizzle_control
= swizzle_control
;
349 insn
->bits3
.urb
.allocate
= allocate
;
350 insn
->bits3
.urb
.used
= used
; /* ? */
351 insn
->bits3
.urb
.complete
= complete
;
352 insn
->bits3
.urb
.response_length
= response_length
;
353 insn
->bits3
.urb
.msg_length
= msg_length
;
354 insn
->bits3
.urb
.msg_target
= BRW_MESSAGE_TARGET_URB
;
355 insn
->bits3
.urb
.end_of_thread
= end_of_thread
;
359 static void brw_set_dp_write_message( struct brw_context
*brw
,
360 struct brw_instruction
*insn
,
361 GLuint binding_table_index
,
365 GLuint pixel_scoreboard_clear
,
366 GLuint response_length
,
367 GLuint end_of_thread
)
369 struct intel_context
*intel
= &brw
->intel
;
370 brw_set_src1(insn
, brw_imm_d(0));
372 if (intel
->gen
== 5) {
373 insn
->bits3
.dp_write_gen5
.binding_table_index
= binding_table_index
;
374 insn
->bits3
.dp_write_gen5
.msg_control
= msg_control
;
375 insn
->bits3
.dp_write_gen5
.pixel_scoreboard_clear
= pixel_scoreboard_clear
;
376 insn
->bits3
.dp_write_gen5
.msg_type
= msg_type
;
377 insn
->bits3
.dp_write_gen5
.send_commit_msg
= 0;
378 insn
->bits3
.dp_write_gen5
.header_present
= 1;
379 insn
->bits3
.dp_write_gen5
.response_length
= response_length
;
380 insn
->bits3
.dp_write_gen5
.msg_length
= msg_length
;
381 insn
->bits3
.dp_write_gen5
.end_of_thread
= end_of_thread
;
382 insn
->bits2
.send_gen5
.sfid
= BRW_MESSAGE_TARGET_DATAPORT_WRITE
;
383 insn
->bits2
.send_gen5
.end_of_thread
= end_of_thread
;
385 insn
->bits3
.dp_write
.binding_table_index
= binding_table_index
;
386 insn
->bits3
.dp_write
.msg_control
= msg_control
;
387 insn
->bits3
.dp_write
.pixel_scoreboard_clear
= pixel_scoreboard_clear
;
388 insn
->bits3
.dp_write
.msg_type
= msg_type
;
389 insn
->bits3
.dp_write
.send_commit_msg
= 0;
390 insn
->bits3
.dp_write
.response_length
= response_length
;
391 insn
->bits3
.dp_write
.msg_length
= msg_length
;
392 insn
->bits3
.dp_write
.msg_target
= BRW_MESSAGE_TARGET_DATAPORT_WRITE
;
393 insn
->bits3
.dp_write
.end_of_thread
= end_of_thread
;
397 static void brw_set_dp_read_message( struct brw_context
*brw
,
398 struct brw_instruction
*insn
,
399 GLuint binding_table_index
,
404 GLuint response_length
,
405 GLuint end_of_thread
)
407 struct intel_context
*intel
= &brw
->intel
;
408 brw_set_src1(insn
, brw_imm_d(0));
410 if (intel
->gen
== 5) {
411 insn
->bits3
.dp_read_gen5
.binding_table_index
= binding_table_index
;
412 insn
->bits3
.dp_read_gen5
.msg_control
= msg_control
;
413 insn
->bits3
.dp_read_gen5
.msg_type
= msg_type
;
414 insn
->bits3
.dp_read_gen5
.target_cache
= target_cache
;
415 insn
->bits3
.dp_read_gen5
.header_present
= 1;
416 insn
->bits3
.dp_read_gen5
.response_length
= response_length
;
417 insn
->bits3
.dp_read_gen5
.msg_length
= msg_length
;
418 insn
->bits3
.dp_read_gen5
.pad1
= 0;
419 insn
->bits3
.dp_read_gen5
.end_of_thread
= end_of_thread
;
420 insn
->bits2
.send_gen5
.sfid
= BRW_MESSAGE_TARGET_DATAPORT_READ
;
421 insn
->bits2
.send_gen5
.end_of_thread
= end_of_thread
;
423 insn
->bits3
.dp_read
.binding_table_index
= binding_table_index
; /*0:7*/
424 insn
->bits3
.dp_read
.msg_control
= msg_control
; /*8:11*/
425 insn
->bits3
.dp_read
.msg_type
= msg_type
; /*12:13*/
426 insn
->bits3
.dp_read
.target_cache
= target_cache
; /*14:15*/
427 insn
->bits3
.dp_read
.response_length
= response_length
; /*16:19*/
428 insn
->bits3
.dp_read
.msg_length
= msg_length
; /*20:23*/
429 insn
->bits3
.dp_read
.msg_target
= BRW_MESSAGE_TARGET_DATAPORT_READ
; /*24:27*/
430 insn
->bits3
.dp_read
.pad1
= 0; /*28:30*/
431 insn
->bits3
.dp_read
.end_of_thread
= end_of_thread
; /*31*/
435 static void brw_set_sampler_message(struct brw_context
*brw
,
436 struct brw_instruction
*insn
,
437 GLuint binding_table_index
,
440 GLuint response_length
,
443 GLuint header_present
,
446 struct intel_context
*intel
= &brw
->intel
;
448 brw_set_src1(insn
, brw_imm_d(0));
450 if (intel
->gen
== 5) {
451 insn
->bits3
.sampler_gen5
.binding_table_index
= binding_table_index
;
452 insn
->bits3
.sampler_gen5
.sampler
= sampler
;
453 insn
->bits3
.sampler_gen5
.msg_type
= msg_type
;
454 insn
->bits3
.sampler_gen5
.simd_mode
= simd_mode
;
455 insn
->bits3
.sampler_gen5
.header_present
= header_present
;
456 insn
->bits3
.sampler_gen5
.response_length
= response_length
;
457 insn
->bits3
.sampler_gen5
.msg_length
= msg_length
;
458 insn
->bits3
.sampler_gen5
.end_of_thread
= eot
;
459 insn
->bits2
.send_gen5
.sfid
= BRW_MESSAGE_TARGET_SAMPLER
;
460 insn
->bits2
.send_gen5
.end_of_thread
= eot
;
461 } else if (intel
->is_g4x
) {
462 insn
->bits3
.sampler_g4x
.binding_table_index
= binding_table_index
;
463 insn
->bits3
.sampler_g4x
.sampler
= sampler
;
464 insn
->bits3
.sampler_g4x
.msg_type
= msg_type
;
465 insn
->bits3
.sampler_g4x
.response_length
= response_length
;
466 insn
->bits3
.sampler_g4x
.msg_length
= msg_length
;
467 insn
->bits3
.sampler_g4x
.end_of_thread
= eot
;
468 insn
->bits3
.sampler_g4x
.msg_target
= BRW_MESSAGE_TARGET_SAMPLER
;
470 insn
->bits3
.sampler
.binding_table_index
= binding_table_index
;
471 insn
->bits3
.sampler
.sampler
= sampler
;
472 insn
->bits3
.sampler
.msg_type
= msg_type
;
473 insn
->bits3
.sampler
.return_format
= BRW_SAMPLER_RETURN_FORMAT_FLOAT32
;
474 insn
->bits3
.sampler
.response_length
= response_length
;
475 insn
->bits3
.sampler
.msg_length
= msg_length
;
476 insn
->bits3
.sampler
.end_of_thread
= eot
;
477 insn
->bits3
.sampler
.msg_target
= BRW_MESSAGE_TARGET_SAMPLER
;
483 static struct brw_instruction
*next_insn( struct brw_compile
*p
,
486 struct brw_instruction
*insn
;
488 assert(p
->nr_insn
+ 1 < BRW_EU_MAX_INSN
);
490 insn
= &p
->store
[p
->nr_insn
++];
491 memcpy(insn
, p
->current
, sizeof(*insn
));
493 /* Reset this one-shot flag:
496 if (p
->current
->header
.destreg__conditionalmod
) {
497 p
->current
->header
.destreg__conditionalmod
= 0;
498 p
->current
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
501 insn
->header
.opcode
= opcode
;
506 static struct brw_instruction
*brw_alu1( struct brw_compile
*p
,
511 struct brw_instruction
*insn
= next_insn(p
, opcode
);
512 brw_set_dest(insn
, dest
);
513 brw_set_src0(insn
, src
);
517 static struct brw_instruction
*brw_alu2(struct brw_compile
*p
,
521 struct brw_reg src1
)
523 struct brw_instruction
*insn
= next_insn(p
, opcode
);
524 brw_set_dest(insn
, dest
);
525 brw_set_src0(insn
, src0
);
526 brw_set_src1(insn
, src1
);
531 /***********************************************************************
532 * Convenience routines.
535 struct brw_instruction *brw_##OP(struct brw_compile *p, \
536 struct brw_reg dest, \
537 struct brw_reg src0) \
539 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
543 struct brw_instruction *brw_##OP(struct brw_compile *p, \
544 struct brw_reg dest, \
545 struct brw_reg src0, \
546 struct brw_reg src1) \
548 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
580 void brw_NOP(struct brw_compile
*p
)
582 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_NOP
);
583 brw_set_dest(insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
584 brw_set_src0(insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
585 brw_set_src1(insn
, brw_imm_ud(0x0));
592 /***********************************************************************
593 * Comparisons, if/else/endif
596 struct brw_instruction
*brw_JMPI(struct brw_compile
*p
,
601 struct brw_instruction
*insn
= brw_alu2(p
, BRW_OPCODE_JMPI
, dest
, src0
, src1
);
603 insn
->header
.execution_size
= 1;
604 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
605 insn
->header
.mask_control
= BRW_MASK_DISABLE
;
607 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
612 /* EU takes the value from the flag register and pushes it onto some
613 * sort of a stack (presumably merging with any flag value already on
614 * the stack). Within an if block, the flags at the top of the stack
615 * control execution on each channel of the unit, eg. on each of the
616 * 16 pixel values in our wm programs.
618 * When the matching 'else' instruction is reached (presumably by
619 * countdown of the instruction count patched in by our ELSE/ENDIF
620 * functions), the relevent flags are inverted.
622 * When the matching 'endif' instruction is reached, the flags are
623 * popped off. If the stack is now empty, normal execution resumes.
625 * No attempt is made to deal with stack overflow (14 elements?).
627 struct brw_instruction
*brw_IF(struct brw_compile
*p
, GLuint execute_size
)
629 struct brw_instruction
*insn
;
631 if (p
->single_program_flow
) {
632 assert(execute_size
== BRW_EXECUTE_1
);
634 insn
= next_insn(p
, BRW_OPCODE_ADD
);
635 insn
->header
.predicate_inverse
= 1;
637 insn
= next_insn(p
, BRW_OPCODE_IF
);
640 /* Override the defaults for this instruction:
642 brw_set_dest(insn
, brw_ip_reg());
643 brw_set_src0(insn
, brw_ip_reg());
644 brw_set_src1(insn
, brw_imm_d(0x0));
646 insn
->header
.execution_size
= execute_size
;
647 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
648 insn
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
649 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
650 if (!p
->single_program_flow
)
651 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
653 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
659 struct brw_instruction
*brw_ELSE(struct brw_compile
*p
,
660 struct brw_instruction
*if_insn
)
662 struct intel_context
*intel
= &p
->brw
->intel
;
663 struct brw_instruction
*insn
;
669 if (p
->single_program_flow
) {
670 insn
= next_insn(p
, BRW_OPCODE_ADD
);
672 insn
= next_insn(p
, BRW_OPCODE_ELSE
);
675 brw_set_dest(insn
, brw_ip_reg());
676 brw_set_src0(insn
, brw_ip_reg());
677 brw_set_src1(insn
, brw_imm_d(0x0));
679 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
680 insn
->header
.execution_size
= if_insn
->header
.execution_size
;
681 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
682 if (!p
->single_program_flow
)
683 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
685 /* Patch the if instruction to point at this instruction.
687 if (p
->single_program_flow
) {
688 assert(if_insn
->header
.opcode
== BRW_OPCODE_ADD
);
690 if_insn
->bits3
.ud
= (insn
- if_insn
+ 1) * 16;
692 assert(if_insn
->header
.opcode
== BRW_OPCODE_IF
);
694 if_insn
->bits3
.if_else
.jump_count
= br
* (insn
- if_insn
);
695 if_insn
->bits3
.if_else
.pop_count
= 0;
696 if_insn
->bits3
.if_else
.pad0
= 0;
702 void brw_ENDIF(struct brw_compile
*p
,
703 struct brw_instruction
*patch_insn
)
705 struct intel_context
*intel
= &p
->brw
->intel
;
711 if (p
->single_program_flow
) {
712 /* In single program flow mode, there's no need to execute an ENDIF,
713 * since we don't need to do any stack operations, and if we're executing
714 * currently, we want to just continue executing.
716 struct brw_instruction
*next
= &p
->store
[p
->nr_insn
];
718 assert(patch_insn
->header
.opcode
== BRW_OPCODE_ADD
);
720 patch_insn
->bits3
.ud
= (next
- patch_insn
) * 16;
722 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_ENDIF
);
724 brw_set_dest(insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
725 brw_set_src0(insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
726 brw_set_src1(insn
, brw_imm_d(0x0));
728 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
729 insn
->header
.execution_size
= patch_insn
->header
.execution_size
;
730 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
731 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
733 assert(patch_insn
->bits3
.if_else
.jump_count
== 0);
735 /* Patch the if or else instructions to point at this or the next
736 * instruction respectively.
738 if (patch_insn
->header
.opcode
== BRW_OPCODE_IF
) {
739 /* Automagically turn it into an IFF:
741 patch_insn
->header
.opcode
= BRW_OPCODE_IFF
;
742 patch_insn
->bits3
.if_else
.jump_count
= br
* (insn
- patch_insn
+ 1);
743 patch_insn
->bits3
.if_else
.pop_count
= 0;
744 patch_insn
->bits3
.if_else
.pad0
= 0;
745 } else if (patch_insn
->header
.opcode
== BRW_OPCODE_ELSE
) {
746 patch_insn
->bits3
.if_else
.jump_count
= br
* (insn
- patch_insn
+ 1);
747 patch_insn
->bits3
.if_else
.pop_count
= 1;
748 patch_insn
->bits3
.if_else
.pad0
= 0;
753 /* Also pop item off the stack in the endif instruction:
755 insn
->bits3
.if_else
.jump_count
= 0;
756 insn
->bits3
.if_else
.pop_count
= 1;
757 insn
->bits3
.if_else
.pad0
= 0;
761 struct brw_instruction
*brw_BREAK(struct brw_compile
*p
)
763 struct brw_instruction
*insn
;
764 insn
= next_insn(p
, BRW_OPCODE_BREAK
);
765 brw_set_dest(insn
, brw_ip_reg());
766 brw_set_src0(insn
, brw_ip_reg());
767 brw_set_src1(insn
, brw_imm_d(0x0));
768 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
769 insn
->header
.execution_size
= BRW_EXECUTE_8
;
770 /* insn->header.mask_control = BRW_MASK_DISABLE; */
771 insn
->bits3
.if_else
.pad0
= 0;
775 struct brw_instruction
*brw_CONT(struct brw_compile
*p
)
777 struct brw_instruction
*insn
;
778 insn
= next_insn(p
, BRW_OPCODE_CONTINUE
);
779 brw_set_dest(insn
, brw_ip_reg());
780 brw_set_src0(insn
, brw_ip_reg());
781 brw_set_src1(insn
, brw_imm_d(0x0));
782 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
783 insn
->header
.execution_size
= BRW_EXECUTE_8
;
784 /* insn->header.mask_control = BRW_MASK_DISABLE; */
785 insn
->bits3
.if_else
.pad0
= 0;
791 struct brw_instruction
*brw_DO(struct brw_compile
*p
, GLuint execute_size
)
793 if (p
->single_program_flow
) {
794 return &p
->store
[p
->nr_insn
];
796 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_DO
);
798 /* Override the defaults for this instruction:
800 brw_set_dest(insn
, brw_null_reg());
801 brw_set_src0(insn
, brw_null_reg());
802 brw_set_src1(insn
, brw_null_reg());
804 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
805 insn
->header
.execution_size
= execute_size
;
806 insn
->header
.predicate_control
= BRW_PREDICATE_NONE
;
807 /* insn->header.mask_control = BRW_MASK_ENABLE; */
808 /* insn->header.mask_control = BRW_MASK_DISABLE; */
816 struct brw_instruction
*brw_WHILE(struct brw_compile
*p
,
817 struct brw_instruction
*do_insn
)
819 struct intel_context
*intel
= &p
->brw
->intel
;
820 struct brw_instruction
*insn
;
826 if (p
->single_program_flow
)
827 insn
= next_insn(p
, BRW_OPCODE_ADD
);
829 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
831 brw_set_dest(insn
, brw_ip_reg());
832 brw_set_src0(insn
, brw_ip_reg());
833 brw_set_src1(insn
, brw_imm_d(0x0));
835 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
837 if (p
->single_program_flow
) {
838 insn
->header
.execution_size
= BRW_EXECUTE_1
;
840 insn
->bits3
.d
= (do_insn
- insn
) * 16;
842 insn
->header
.execution_size
= do_insn
->header
.execution_size
;
844 assert(do_insn
->header
.opcode
== BRW_OPCODE_DO
);
845 insn
->bits3
.if_else
.jump_count
= br
* (do_insn
- insn
+ 1);
846 insn
->bits3
.if_else
.pop_count
= 0;
847 insn
->bits3
.if_else
.pad0
= 0;
850 /* insn->header.mask_control = BRW_MASK_ENABLE; */
852 /* insn->header.mask_control = BRW_MASK_DISABLE; */
853 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
860 void brw_land_fwd_jump(struct brw_compile
*p
,
861 struct brw_instruction
*jmp_insn
)
863 struct intel_context
*intel
= &p
->brw
->intel
;
864 struct brw_instruction
*landing
= &p
->store
[p
->nr_insn
];
870 assert(jmp_insn
->header
.opcode
== BRW_OPCODE_JMPI
);
871 assert(jmp_insn
->bits1
.da1
.src1_reg_file
== BRW_IMMEDIATE_VALUE
);
873 jmp_insn
->bits3
.ud
= jmpi
* ((landing
- jmp_insn
) - 1);
878 /* To integrate with the above, it makes sense that the comparison
879 * instruction should populate the flag register. It might be simpler
880 * just to use the flag reg for most WM tasks?
882 void brw_CMP(struct brw_compile
*p
,
888 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_CMP
);
890 insn
->header
.destreg__conditionalmod
= conditional
;
891 brw_set_dest(insn
, dest
);
892 brw_set_src0(insn
, src0
);
893 brw_set_src1(insn
, src1
);
895 /* guess_execution_size(insn, src0); */
898 /* Make it so that future instructions will use the computed flag
899 * value until brw_set_predicate_control_flag_value() is called
902 if (dest
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
904 p
->current
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
905 p
->flag_value
= 0xff;
909 /* Issue 'wait' instruction for n1, host could program MMIO
910 to wake up thread. */
911 void brw_WAIT (struct brw_compile
*p
)
913 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_WAIT
);
914 struct brw_reg src
= brw_notification_1_reg();
916 brw_set_dest(insn
, src
);
917 brw_set_src0(insn
, src
);
918 brw_set_src1(insn
, brw_null_reg());
919 insn
->header
.execution_size
= 0; /* must */
920 insn
->header
.predicate_control
= 0;
921 insn
->header
.compression_control
= 0;
925 /***********************************************************************
926 * Helpers for the various SEND message types:
929 /** Extended math function, float[8].
931 void brw_math( struct brw_compile
*p
,
940 struct intel_context
*intel
= &p
->brw
->intel
;
942 if (intel
->gen
>= 6) {
943 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_MATH
);
945 /* Math is the same ISA format as other opcodes, except that CondModifier
946 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
948 insn
->header
.destreg__conditionalmod
= function
;
950 brw_set_dest(insn
, dest
);
951 brw_set_src0(insn
, src
);
952 brw_set_src1(insn
, brw_null_reg());
954 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
955 GLuint msg_length
= (function
== BRW_MATH_FUNCTION_POW
) ? 2 : 1;
956 GLuint response_length
= (function
== BRW_MATH_FUNCTION_SINCOS
) ? 2 : 1;
957 /* Example code doesn't set predicate_control for send
960 insn
->header
.predicate_control
= 0;
961 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
963 brw_set_dest(insn
, dest
);
964 brw_set_src0(insn
, src
);
965 brw_set_math_message(p
->brw
,
967 msg_length
, response_length
,
969 BRW_MATH_INTEGER_UNSIGNED
,
977 * Extended math function, float[16].
978 * Use 2 send instructions.
980 void brw_math_16( struct brw_compile
*p
,
988 struct intel_context
*intel
= &p
->brw
->intel
;
989 struct brw_instruction
*insn
;
990 GLuint msg_length
= (function
== BRW_MATH_FUNCTION_POW
) ? 2 : 1;
991 GLuint response_length
= (function
== BRW_MATH_FUNCTION_SINCOS
) ? 2 : 1;
993 if (intel
->gen
>= 6) {
994 insn
= next_insn(p
, BRW_OPCODE_MATH
);
996 /* Math is the same ISA format as other opcodes, except that CondModifier
997 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
999 insn
->header
.destreg__conditionalmod
= function
;
1001 brw_set_dest(insn
, dest
);
1002 brw_set_src0(insn
, src
);
1003 brw_set_src1(insn
, brw_null_reg());
1007 /* First instruction:
1009 brw_push_insn_state(p
);
1010 brw_set_predicate_control_flag_value(p
, 0xff);
1011 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1013 insn
= next_insn(p
, BRW_OPCODE_SEND
);
1014 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1016 brw_set_dest(insn
, dest
);
1017 brw_set_src0(insn
, src
);
1018 brw_set_math_message(p
->brw
,
1020 msg_length
, response_length
,
1022 BRW_MATH_INTEGER_UNSIGNED
,
1025 BRW_MATH_DATA_VECTOR
);
1027 /* Second instruction:
1029 insn
= next_insn(p
, BRW_OPCODE_SEND
);
1030 insn
->header
.compression_control
= BRW_COMPRESSION_2NDHALF
;
1031 insn
->header
.destreg__conditionalmod
= msg_reg_nr
+1;
1033 brw_set_dest(insn
, offset(dest
,1));
1034 brw_set_src0(insn
, src
);
1035 brw_set_math_message(p
->brw
,
1037 msg_length
, response_length
,
1039 BRW_MATH_INTEGER_UNSIGNED
,
1042 BRW_MATH_DATA_VECTOR
);
1044 brw_pop_insn_state(p
);
1049 * Write block of 16 dwords/floats to the data port Render Cache scratch buffer.
1050 * Scratch offset should be a multiple of 64.
1051 * Used for register spilling.
1053 void brw_dp_WRITE_16( struct brw_compile
*p
,
1055 GLuint scratch_offset
)
1057 GLuint msg_reg_nr
= 1;
1059 brw_push_insn_state(p
);
1060 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1061 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1063 /* set message header global offset field (reg 0, element 2) */
1065 retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D
),
1066 brw_imm_d(scratch_offset
));
1068 brw_pop_insn_state(p
);
1072 GLuint msg_length
= 3;
1073 struct brw_reg dest
= retype(brw_null_reg(), BRW_REGISTER_TYPE_UW
);
1074 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1076 insn
->header
.predicate_control
= 0; /* XXX */
1077 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1078 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1080 brw_set_dest(insn
, dest
);
1081 brw_set_src0(insn
, src
);
1083 brw_set_dp_write_message(p
->brw
,
1085 255, /* binding table index (255=stateless) */
1086 BRW_DATAPORT_OWORD_BLOCK_4_OWORDS
, /* msg_control */
1087 BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE
, /* msg_type */
1089 0, /* pixel scoreboard */
1090 0, /* response_length */
1097 * Read block of 16 dwords/floats from the data port Render Cache scratch buffer.
1098 * Scratch offset should be a multiple of 64.
1099 * Used for register spilling.
1101 void brw_dp_READ_16( struct brw_compile
*p
,
1102 struct brw_reg dest
,
1103 GLuint scratch_offset
)
1105 GLuint msg_reg_nr
= 1;
1107 brw_push_insn_state(p
);
1108 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1109 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1111 /* set message header global offset field (reg 0, element 2) */
1113 retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D
),
1114 brw_imm_d(scratch_offset
));
1116 brw_pop_insn_state(p
);
1120 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1122 insn
->header
.predicate_control
= 0; /* XXX */
1123 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1124 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1126 brw_set_dest(insn
, dest
); /* UW? */
1127 brw_set_src0(insn
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW
));
1129 brw_set_dp_read_message(p
->brw
,
1131 255, /* binding table index (255=stateless) */
1132 3, /* msg_control (3 means 4 Owords) */
1133 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
, /* msg_type */
1134 1, /* target cache (render/scratch) */
1136 2, /* response_length */
1143 * Read a float[4] vector from the data port Data Cache (const buffer).
1144 * Location (in buffer) should be a multiple of 16.
1145 * Used for fetching shader constants.
1146 * If relAddr is true, we'll do an indirect fetch using the address register.
1148 void brw_dp_READ_4( struct brw_compile
*p
,
1149 struct brw_reg dest
,
1152 GLuint bind_table_index
)
1154 /* XXX: relAddr not implemented */
1155 GLuint msg_reg_nr
= 1;
1158 brw_push_insn_state(p
);
1159 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1160 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1161 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1163 /* Setup MRF[1] with location/offset into const buffer */
1164 b
= brw_message_reg(msg_reg_nr
);
1165 b
= retype(b
, BRW_REGISTER_TYPE_UD
);
1166 /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
1167 * when the docs say only dword[2] should be set. Hmmm. But it works.
1169 brw_MOV(p
, b
, brw_imm_ud(location
));
1170 brw_pop_insn_state(p
);
1174 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1176 insn
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1177 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1178 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1179 insn
->header
.mask_control
= BRW_MASK_DISABLE
;
1181 /* cast dest to a uword[8] vector */
1182 dest
= retype(vec8(dest
), BRW_REGISTER_TYPE_UW
);
1184 brw_set_dest(insn
, dest
);
1185 brw_set_src0(insn
, brw_null_reg());
1187 brw_set_dp_read_message(p
->brw
,
1190 0, /* msg_control (0 means 1 Oword) */
1191 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
, /* msg_type */
1192 0, /* source cache = data cache */
1194 1, /* response_length (1 Oword) */
1201 * Read float[4] constant(s) from VS constant buffer.
1202 * For relative addressing, two float[4] constants will be read into 'dest'.
1203 * Otherwise, one float[4] constant will be read into the lower half of 'dest'.
1205 void brw_dp_READ_4_vs(struct brw_compile
*p
,
1206 struct brw_reg dest
,
1209 struct brw_reg addrReg
,
1211 GLuint bind_table_index
)
1213 GLuint msg_reg_nr
= 1;
1217 printf("vs const read msg, location %u, msg_reg_nr %d\n",
1218 location, msg_reg_nr);
1221 /* Setup MRF[1] with location/offset into const buffer */
1225 brw_push_insn_state(p
);
1226 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1227 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1228 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1229 /*brw_set_access_mode(p, BRW_ALIGN_16);*/
1231 /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
1232 * when the docs say only dword[2] should be set. Hmmm. But it works.
1234 b
= brw_message_reg(msg_reg_nr
);
1235 b
= retype(b
, BRW_REGISTER_TYPE_UD
);
1236 /*b = get_element_ud(b, 2);*/
1238 brw_ADD(p
, b
, addrReg
, brw_imm_ud(location
));
1241 brw_MOV(p
, b
, brw_imm_ud(location
));
1244 brw_pop_insn_state(p
);
1248 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1250 insn
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1251 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1252 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1253 insn
->header
.mask_control
= BRW_MASK_DISABLE
;
1254 /*insn->header.access_mode = BRW_ALIGN_16;*/
1256 brw_set_dest(insn
, dest
);
1257 brw_set_src0(insn
, brw_null_reg());
1259 brw_set_dp_read_message(p
->brw
,
1262 oword
, /* 0 = lower Oword, 1 = upper Oword */
1263 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
, /* msg_type */
1264 0, /* source cache = data cache */
1266 1, /* response_length (1 Oword) */
1273 void brw_fb_WRITE(struct brw_compile
*p
,
1274 struct brw_reg dest
,
1276 struct brw_reg src0
,
1277 GLuint binding_table_index
,
1279 GLuint response_length
,
1282 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1284 insn
->header
.predicate_control
= 0; /* XXX */
1285 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1286 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1288 brw_set_dest(insn
, dest
);
1289 brw_set_src0(insn
, src0
);
1290 brw_set_dp_write_message(p
->brw
,
1292 binding_table_index
,
1293 BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE
, /* msg_control */
1294 BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
, /* msg_type */
1296 1, /* pixel scoreboard */
1303 * Texture sample instruction.
1304 * Note: the msg_type plus msg_length values determine exactly what kind
1305 * of sampling operation is performed. See volume 4, page 161 of docs.
1307 void brw_SAMPLE(struct brw_compile
*p
,
1308 struct brw_reg dest
,
1310 struct brw_reg src0
,
1311 GLuint binding_table_index
,
1315 GLuint response_length
,
1318 GLuint header_present
,
1321 GLboolean need_stall
= 0;
1323 if (writemask
== 0) {
1324 /*printf("%s: zero writemask??\n", __FUNCTION__); */
1328 /* Hardware doesn't do destination dependency checking on send
1329 * instructions properly. Add a workaround which generates the
1330 * dependency by other means. In practice it seems like this bug
1331 * only crops up for texture samples, and only where registers are
1332 * written by the send and then written again later without being
1333 * read in between. Luckily for us, we already track that
1334 * information and use it to modify the writemask for the
1335 * instruction, so that is a guide for whether a workaround is
1338 if (writemask
!= WRITEMASK_XYZW
) {
1339 GLuint dst_offset
= 0;
1340 GLuint i
, newmask
= 0, len
= 0;
1342 for (i
= 0; i
< 4; i
++) {
1343 if (writemask
& (1<<i
))
1347 for (; i
< 4; i
++) {
1348 if (!(writemask
& (1<<i
)))
1354 if (newmask
!= writemask
) {
1356 /* printf("need stall %x %x\n", newmask , writemask); */
1359 GLboolean dispatch_16
= GL_FALSE
;
1361 struct brw_reg m1
= brw_message_reg(msg_reg_nr
);
1363 guess_execution_size(p
->current
, dest
);
1364 if (p
->current
->header
.execution_size
== BRW_EXECUTE_16
)
1365 dispatch_16
= GL_TRUE
;
1367 newmask
= ~newmask
& WRITEMASK_XYZW
;
1369 brw_push_insn_state(p
);
1371 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1372 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1374 brw_MOV(p
, m1
, brw_vec8_grf(0,0));
1375 brw_MOV(p
, get_element_ud(m1
, 2), brw_imm_ud(newmask
<< 12));
1377 brw_pop_insn_state(p
);
1379 src0
= retype(brw_null_reg(), BRW_REGISTER_TYPE_UW
);
1380 dest
= offset(dest
, dst_offset
);
1382 /* For 16-wide dispatch, masked channels are skipped in the
1383 * response. For 8-wide, masked channels still take up slots,
1384 * and are just not written to.
1387 response_length
= len
* 2;
1392 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1394 insn
->header
.predicate_control
= 0; /* XXX */
1395 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1396 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1398 brw_set_dest(insn
, dest
);
1399 brw_set_src0(insn
, src0
);
1400 brw_set_sampler_message(p
->brw
, insn
,
1401 binding_table_index
,
1412 struct brw_reg reg
= vec8(offset(dest
, response_length
-1));
1414 /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 }
1416 brw_push_insn_state(p
);
1417 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1418 brw_MOV(p
, reg
, reg
);
1419 brw_pop_insn_state(p
);
1424 /* All these variables are pretty confusing - we might be better off
1425 * using bitmasks and macros for this, in the old style. Or perhaps
1426 * just having the caller instantiate the fields in dword3 itself.
1428 void brw_urb_WRITE(struct brw_compile
*p
,
1429 struct brw_reg dest
,
1431 struct brw_reg src0
,
1435 GLuint response_length
,
1437 GLboolean writes_complete
,
1441 struct intel_context
*intel
= &p
->brw
->intel
;
1442 struct brw_instruction
*insn
;
1444 /* Sandybridge doesn't have the implied move for SENDs,
1445 * and the first message register index comes from src0.
1447 if (intel
->gen
>= 6) {
1448 brw_push_insn_state(p
);
1449 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1450 brw_MOV(p
, brw_message_reg(msg_reg_nr
), src0
);
1451 brw_pop_insn_state(p
);
1452 src0
= brw_message_reg(msg_reg_nr
);
1455 insn
= next_insn(p
, BRW_OPCODE_SEND
);
1457 assert(msg_length
< BRW_MAX_MRF
);
1459 brw_set_dest(insn
, dest
);
1460 brw_set_src0(insn
, src0
);
1461 brw_set_src1(insn
, brw_imm_d(0));
1464 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1466 brw_set_urb_message(p
->brw
,
1478 void brw_ff_sync(struct brw_compile
*p
,
1479 struct brw_reg dest
,
1481 struct brw_reg src0
,
1483 GLuint response_length
,
1486 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1488 brw_set_dest(insn
, dest
);
1489 brw_set_src0(insn
, src0
);
1490 brw_set_src1(insn
, brw_imm_d(0));
1492 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1494 brw_set_ff_sync_message(p
->brw
,