2 * Copyright © 2011 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
28 #include "main/macros.h"
29 #include "program/prog_print.h"
30 #include "program/prog_parameter.h"
35 gen8_vec4_generator::gen8_vec4_generator(struct brw_context
*brw
,
36 struct gl_shader_program
*shader_prog
,
37 struct gl_program
*prog
,
38 struct brw_vec4_prog_data
*prog_data
,
41 : gen8_generator(brw
, shader_prog
, prog
, mem_ctx
),
43 debug_flag(debug_flag
)
47 gen8_vec4_generator::~gen8_vec4_generator()
52 gen8_vec4_generator::generate_tex(vec4_instruction
*ir
, struct brw_reg dst
)
57 case SHADER_OPCODE_TEX
:
58 case SHADER_OPCODE_TXL
:
59 if (ir
->shadow_compare
) {
60 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE
;
62 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_LOD
;
65 case SHADER_OPCODE_TXD
:
66 if (ir
->shadow_compare
) {
67 msg_type
= HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE
;
69 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS
;
72 case SHADER_OPCODE_TXF
:
73 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_LD
;
75 case SHADER_OPCODE_TXF_CMS
:
76 msg_type
= GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS
;
78 case SHADER_OPCODE_TXF_MCS
:
79 msg_type
= GEN7_SAMPLER_MESSAGE_SAMPLE_LD_MCS
;
81 case SHADER_OPCODE_TXS
:
82 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO
;
84 case SHADER_OPCODE_TG4
:
85 if (ir
->shadow_compare
) {
86 msg_type
= GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_C
;
88 msg_type
= GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4
;
91 case SHADER_OPCODE_TG4_OFFSET
:
92 if (ir
->shadow_compare
) {
93 msg_type
= GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C
;
95 msg_type
= GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO
;
99 assert(!"should not get here: invalid VS texture opcode");
103 if (ir
->header_present
) {
104 MOV_RAW(retype(brw_message_reg(ir
->base_mrf
), BRW_REGISTER_TYPE_UD
),
105 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
107 default_state
.access_mode
= BRW_ALIGN_1
;
109 if (ir
->texture_offset
) {
110 /* Set the offset bits in DWord 2. */
111 MOV_RAW(retype(brw_vec1_reg(MRF
, ir
->base_mrf
, 2),
112 BRW_REGISTER_TYPE_UD
),
113 brw_imm_ud(ir
->texture_offset
));
116 if (ir
->sampler
>= 16) {
117 /* The "Sampler Index" field can only store values between 0 and 15.
118 * However, we can add an offset to the "Sampler State Pointer"
119 * field, effectively selecting a different set of 16 samplers.
121 * The "Sampler State Pointer" needs to be aligned to a 32-byte
122 * offset, and each sampler state is only 16-bytes, so we can't
123 * exclusively use the offset - we have to use both.
125 gen8_instruction
*add
=
126 ADD(get_element_ud(brw_message_reg(ir
->base_mrf
), 3),
127 get_element_ud(brw_vec8_grf(0, 0), 3),
128 brw_imm_ud(16 * (ir
->sampler
/ 16) *
129 sizeof(gen7_sampler_state
)));
130 gen8_set_mask_control(add
, BRW_MASK_DISABLE
);
133 default_state
.access_mode
= BRW_ALIGN_16
;
136 uint32_t surf_index
=
137 prog_data
->base
.binding_table
.texture_start
+ ir
->sampler
;
139 gen8_instruction
*inst
= next_inst(BRW_OPCODE_SEND
);
140 gen8_set_dst(brw
, inst
, dst
);
141 gen8_set_src0(brw
, inst
, brw_message_reg(ir
->base_mrf
));
142 gen8_set_sampler_message(brw
, inst
,
149 BRW_SAMPLER_SIMD_MODE_SIMD4X2
);
151 brw_mark_surface_used(&prog_data
->base
, surf_index
);
155 gen8_vec4_generator::generate_urb_write(vec4_instruction
*ir
, bool vs
)
157 struct brw_reg header
= brw_vec8_grf(GEN7_MRF_HACK_START
+ ir
->base_mrf
, 0);
161 MOV_RAW(header
, brw_vec8_grf(0, 0));
163 gen8_instruction
*inst
;
164 if (!(ir
->urb_write_flags
& BRW_URB_WRITE_USE_CHANNEL_MASKS
)) {
165 /* Enable Channel Masks in the URB_WRITE_OWORD message header */
166 default_state
.access_mode
= BRW_ALIGN_1
;
167 MOV_RAW(brw_vec1_grf(GEN7_MRF_HACK_START
+ ir
->base_mrf
, 5),
169 default_state
.access_mode
= BRW_ALIGN_16
;
172 inst
= next_inst(BRW_OPCODE_SEND
);
173 gen8_set_urb_message(brw
, inst
, ir
->urb_write_flags
, ir
->mlen
, 0, ir
->offset
,
175 gen8_set_dst(brw
, inst
, brw_null_reg());
176 gen8_set_src0(brw
, inst
, header
);
180 gen8_vec4_generator::generate_gs_set_vertex_count(struct brw_reg eot_mrf_header
,
183 /* Move the vertex count into the second MRF for the EOT write. */
184 assert(eot_mrf_header
.file
== BRW_MESSAGE_REGISTER_FILE
);
185 int dst_nr
= GEN7_MRF_HACK_START
+ eot_mrf_header
.nr
+ 1;
186 MOV(retype(brw_vec8_grf(dst_nr
, 0), BRW_REGISTER_TYPE_UD
), src
);
190 gen8_vec4_generator::generate_gs_thread_end(vec4_instruction
*ir
)
192 struct brw_reg src
= brw_vec8_grf(GEN7_MRF_HACK_START
+ ir
->base_mrf
, 0);
193 gen8_instruction
*inst
;
195 /* Enable Channel Masks in the URB_WRITE_HWORD message header */
196 default_state
.access_mode
= BRW_ALIGN_1
;
197 inst
= OR(retype(brw_vec1_grf(GEN7_MRF_HACK_START
+ ir
->base_mrf
, 5),
198 BRW_REGISTER_TYPE_UD
),
199 retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD
),
200 brw_imm_ud(0xff00)); /* could be 0x1100 but shouldn't matter */
201 gen8_set_mask_control(inst
, BRW_MASK_DISABLE
);
202 default_state
.access_mode
= BRW_ALIGN_16
;
204 /* mlen = 2: g0 header + vertex count */
205 inst
= next_inst(BRW_OPCODE_SEND
);
206 gen8_set_urb_message(brw
, inst
, BRW_URB_WRITE_EOT
, 2, 0, 0, true);
207 gen8_set_dst(brw
, inst
, brw_null_reg());
208 gen8_set_src0(brw
, inst
, src
);
212 gen8_vec4_generator::generate_gs_set_write_offset(struct brw_reg dst
,
216 /* From p22 of volume 4 part 2 of the Ivy Bridge PRM (2.4.3.1 Message
219 * Slot 0 Offset. This field, after adding to the Global Offset field
220 * in the message descriptor, specifies the offset (in 256-bit units)
221 * from the start of the URB entry, as referenced by URB Handle 0, at
222 * which the data will be accessed.
224 * Similar text describes DWORD M0.4, which is slot 1 offset.
226 * Therefore, we want to multiply DWORDs 0 and 4 of src0 (the x components
227 * of the register for geometry shader invocations 0 and 1) by the
228 * immediate value in src1, and store the result in DWORDs 3 and 4 of dst.
230 * We can do this with the following EU instruction:
232 * mul(2) dst.3<1>UD src0<8;2,4>UD src1 { Align1 WE_all }
234 default_state
.access_mode
= BRW_ALIGN_1
;
235 gen8_instruction
*inst
=
236 MUL(suboffset(stride(dst
, 2, 2, 1), 3), stride(src0
, 8, 2, 4), src1
);
237 gen8_set_mask_control(inst
, BRW_MASK_DISABLE
);
238 default_state
.access_mode
= BRW_ALIGN_16
;
242 gen8_vec4_generator::generate_gs_set_dword_2_immed(struct brw_reg dst
,
245 assert(src
.file
== BRW_IMMEDIATE_VALUE
);
247 default_state
.access_mode
= BRW_ALIGN_1
;
249 gen8_instruction
*inst
= MOV(suboffset(vec1(dst
), 2), src
);
250 gen8_set_mask_control(inst
, BRW_MASK_DISABLE
);
252 default_state
.access_mode
= BRW_ALIGN_16
;
256 gen8_vec4_generator::generate_gs_prepare_channel_masks(struct brw_reg dst
)
258 /* We want to left shift just DWORD 4 (the x component belonging to the
259 * second geometry shader invocation) by 4 bits. So generate the
262 * shl(1) dst.4<1>UD dst.4<0,1,0>UD 4UD { align1 WE_all }
264 dst
= suboffset(vec1(dst
), 4);
265 default_state
.access_mode
= BRW_ALIGN_1
;
266 gen8_instruction
*inst
= SHL(dst
, dst
, brw_imm_ud(4));
267 gen8_set_mask_control(inst
, BRW_MASK_DISABLE
);
268 default_state
.access_mode
= BRW_ALIGN_16
;
272 gen8_vec4_generator::generate_gs_set_channel_masks(struct brw_reg dst
,
275 /* From p21 of volume 4 part 2 of the Ivy Bridge PRM (2.4.3.1 Message
278 * 15 Vertex 1 DATA [3] / Vertex 0 DATA[7] Channel Mask
280 * When Swizzle Control = URB_INTERLEAVED this bit controls Vertex 1
281 * DATA[3], when Swizzle Control = URB_NOSWIZZLE this bit controls
282 * Vertex 0 DATA[7]. This bit is ANDed with the corresponding
283 * channel enable to determine the final channel enable. For the
284 * URB_READ_OWORD & URB_READ_HWORD messages, when final channel
285 * enable is 1 it indicates that Vertex 1 DATA [3] will be included
286 * in the writeback message. For the URB_WRITE_OWORD &
287 * URB_WRITE_HWORD messages, when final channel enable is 1 it
288 * indicates that Vertex 1 DATA [3] will be written to the surface.
290 * 0: Vertex 1 DATA [3] / Vertex 0 DATA[7] channel not included
291 * 1: Vertex DATA [3] / Vertex 0 DATA[7] channel included
293 * 14 Vertex 1 DATA [2] Channel Mask
294 * 13 Vertex 1 DATA [1] Channel Mask
295 * 12 Vertex 1 DATA [0] Channel Mask
296 * 11 Vertex 0 DATA [3] Channel Mask
297 * 10 Vertex 0 DATA [2] Channel Mask
298 * 9 Vertex 0 DATA [1] Channel Mask
299 * 8 Vertex 0 DATA [0] Channel Mask
301 * (This is from a section of the PRM that is agnostic to the particular
302 * type of shader being executed, so "Vertex 0" and "Vertex 1" refer to
303 * geometry shader invocations 0 and 1, respectively). Since we have the
304 * enable flags for geometry shader invocation 0 in bits 3:0 of DWORD 0,
305 * and the enable flags for geometry shader invocation 1 in bits 7:0 of
306 * DWORD 4, we just need to OR them together and store the result in bits
309 * It's easier to get the EU to do this if we think of the src and dst
310 * registers as composed of 32 bytes each; then, we want to pick up the
311 * contents of bytes 0 and 16 from src, OR them together, and store them in
314 * We can do that by the following EU instruction:
316 * or(1) dst.21<1>UB src<0,1,0>UB src.16<0,1,0>UB { align1 WE_all }
318 * Note: this relies on the source register having zeros in (a) bits 7:4 of
319 * DWORD 0 and (b) bits 3:0 of DWORD 4. We can rely on (b) because the
320 * source register was prepared by GS_OPCODE_PREPARE_CHANNEL_MASKS (which
321 * shifts DWORD 4 left by 4 bits), and we can rely on (a) because prior to
322 * the execution of GS_OPCODE_PREPARE_CHANNEL_MASKS, DWORDs 0 and 4 need to
323 * contain valid channel mask values (which are in the range 0x0-0xf).
325 dst
= retype(dst
, BRW_REGISTER_TYPE_UB
);
326 src
= retype(src
, BRW_REGISTER_TYPE_UB
);
328 default_state
.access_mode
= BRW_ALIGN_1
;
330 gen8_instruction
*inst
=
331 OR(suboffset(vec1(dst
), 21), vec1(src
), suboffset(vec1(src
), 16));
332 gen8_set_mask_control(inst
, BRW_MASK_DISABLE
);
334 default_state
.access_mode
= BRW_ALIGN_16
;
338 gen8_vec4_generator::generate_oword_dual_block_offsets(struct brw_reg m1
,
339 struct brw_reg index
)
341 int second_vertex_offset
= 1;
343 m1
= retype(m1
, BRW_REGISTER_TYPE_D
);
345 /* Set up M1 (message payload). Only the block offsets in M1.0 and
346 * M1.4 are used, and the rest are ignored.
348 struct brw_reg m1_0
= suboffset(vec1(m1
), 0);
349 struct brw_reg m1_4
= suboffset(vec1(m1
), 4);
350 struct brw_reg index_0
= suboffset(vec1(index
), 0);
351 struct brw_reg index_4
= suboffset(vec1(index
), 4);
353 default_state
.mask_control
= BRW_MASK_DISABLE
;
354 default_state
.access_mode
= BRW_ALIGN_1
;
358 if (index
.file
== BRW_IMMEDIATE_VALUE
) {
359 index_4
.dw1
.ud
+= second_vertex_offset
;
362 ADD(m1_4
, index_4
, brw_imm_d(second_vertex_offset
));
365 default_state
.mask_control
= BRW_MASK_ENABLE
;
366 default_state
.access_mode
= BRW_ALIGN_16
;
370 gen8_vec4_generator::generate_scratch_read(vec4_instruction
*ir
,
372 struct brw_reg index
)
374 struct brw_reg header
= brw_vec8_grf(GEN7_MRF_HACK_START
+ ir
->base_mrf
, 0);
376 MOV_RAW(header
, brw_vec8_grf(0, 0));
378 generate_oword_dual_block_offsets(brw_message_reg(ir
->base_mrf
+ 1), index
);
380 /* Each of the 8 channel enables is considered for whether each
383 gen8_instruction
*send
= next_inst(BRW_OPCODE_SEND
);
384 gen8_set_dst(brw
, send
, dst
);
385 gen8_set_src0(brw
, send
, header
);
386 gen8_set_dp_message(brw
, send
, GEN7_SFID_DATAPORT_DATA_CACHE
,
387 255, /* binding table index: stateless access */
388 GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ
,
389 BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD
,
392 true, /* header present */
397 gen8_vec4_generator::generate_scratch_write(vec4_instruction
*ir
,
400 struct brw_reg index
)
402 struct brw_reg header
= brw_vec8_grf(GEN7_MRF_HACK_START
+ ir
->base_mrf
, 0);
404 MOV_RAW(header
, brw_vec8_grf(0, 0));
406 generate_oword_dual_block_offsets(brw_message_reg(ir
->base_mrf
+ 1), index
);
408 MOV(retype(brw_message_reg(ir
->base_mrf
+ 2), BRW_REGISTER_TYPE_D
),
409 retype(src
, BRW_REGISTER_TYPE_D
));
411 /* Each of the 8 channel enables is considered for whether each
414 gen8_instruction
*send
= next_inst(BRW_OPCODE_SEND
);
415 gen8_set_dst(brw
, send
, dst
);
416 gen8_set_src0(brw
, send
, header
);
417 gen8_set_pred_control(send
, ir
->predicate
);
418 gen8_set_dp_message(brw
, send
, GEN7_SFID_DATAPORT_DATA_CACHE
,
419 255, /* binding table index: stateless access */
420 GEN7_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE
,
421 BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD
,
424 true, /* header present */
429 gen8_vec4_generator::generate_pull_constant_load(vec4_instruction
*inst
,
431 struct brw_reg index
,
432 struct brw_reg offset
)
434 assert(index
.file
== BRW_IMMEDIATE_VALUE
&&
435 index
.type
== BRW_REGISTER_TYPE_UD
);
436 uint32_t surf_index
= index
.dw1
.ud
;
438 assert(offset
.file
== BRW_GENERAL_REGISTER_FILE
);
440 /* Each of the 8 channel enables is considered for whether each
443 gen8_instruction
*send
= next_inst(BRW_OPCODE_SEND
);
444 gen8_set_dst(brw
, send
, dst
);
445 gen8_set_src0(brw
, send
, offset
);
446 gen8_set_dp_message(brw
, send
, GEN7_SFID_DATAPORT_DATA_CACHE
,
448 GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ
,
449 0, /* message control */
452 false, /* no header */
455 brw_mark_surface_used(&prog_data
->base
, surf_index
);
459 gen8_vec4_generator::generate_untyped_atomic(vec4_instruction
*ir
,
461 struct brw_reg atomic_op
,
462 struct brw_reg surf_index
)
464 assert(atomic_op
.file
== BRW_IMMEDIATE_VALUE
&&
465 atomic_op
.type
== BRW_REGISTER_TYPE_UD
&&
466 surf_index
.file
== BRW_IMMEDIATE_VALUE
&&
467 surf_index
.type
== BRW_REGISTER_TYPE_UD
);
468 assert((atomic_op
.dw1
.ud
& ~0xf) == 0);
470 unsigned msg_control
=
471 atomic_op
.dw1
.ud
| /* Atomic Operation Type: BRW_AOP_* */
472 (1 << 5); /* Return data expected */
474 gen8_instruction
*inst
= next_inst(BRW_OPCODE_SEND
);
475 gen8_set_dst(brw
, inst
, retype(dst
, BRW_REGISTER_TYPE_UD
));
476 gen8_set_src0(brw
, inst
, brw_message_reg(ir
->base_mrf
));
477 gen8_set_dp_message(brw
, inst
, HSW_SFID_DATAPORT_DATA_CACHE_1
,
479 HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2
,
486 brw_mark_surface_used(&prog_data
->base
, surf_index
.dw1
.ud
);
492 gen8_vec4_generator::generate_untyped_surface_read(vec4_instruction
*ir
,
494 struct brw_reg surf_index
)
496 assert(surf_index
.file
== BRW_IMMEDIATE_VALUE
&&
497 surf_index
.type
== BRW_REGISTER_TYPE_UD
);
499 gen8_instruction
*inst
= next_inst(BRW_OPCODE_SEND
);
500 gen8_set_dst(brw
, inst
, retype(dst
, BRW_REGISTER_TYPE_UD
));
501 gen8_set_src0(brw
, inst
, brw_message_reg(ir
->base_mrf
));
502 gen8_set_dp_message(brw
, inst
, HSW_SFID_DATAPORT_DATA_CACHE_1
,
504 HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ
,
505 0xe, /* enable only the R channel */
511 brw_mark_surface_used(&prog_data
->base
, surf_index
.dw1
.ud
);
516 gen8_vec4_generator::generate_vec4_instruction(vec4_instruction
*instruction
,
520 vec4_instruction
*ir
= (vec4_instruction
*) instruction
;
522 if (dst
.width
== BRW_WIDTH_4
) {
523 /* This happens in attribute fixups for "dual instanced" geometry
524 * shaders, since they use attributes that are vec4's. Since the exec
525 * width is only 4, it's essential that the caller set
526 * force_writemask_all in order to make sure the instruction is executed
527 * regardless of which channels are enabled.
529 assert(ir
->force_writemask_all
);
531 /* Fix up any <8;8,1> or <0;4,1> source registers to <4;4,1> to satisfy
532 * the following register region restrictions (from Graphics BSpec:
533 * 3D-Media-GPGPU Engine > EU Overview > Registers and Register Regions
534 * > Register Region Restrictions)
536 * 1. ExecSize must be greater than or equal to Width.
538 * 2. If ExecSize = Width and HorzStride != 0, VertStride must be set
539 * to Width * HorzStride."
541 for (int i
= 0; i
< 3; i
++) {
542 if (src
[i
].file
== BRW_GENERAL_REGISTER_FILE
)
543 src
[i
] = stride(src
[i
], 4, 4, 1);
547 switch (ir
->opcode
) {
553 ADD(dst
, src
[0], src
[1]);
557 MUL(dst
, src
[0], src
[1]);
560 case BRW_OPCODE_MACH
:
561 MACH(dst
, src
[0], src
[1]);
565 MAD(dst
, src
[0], src
[1], src
[2]);
572 case BRW_OPCODE_RNDD
:
576 case BRW_OPCODE_RNDE
:
580 case BRW_OPCODE_RNDZ
:
585 AND(dst
, src
[0], src
[1]);
589 OR(dst
, src
[0], src
[1]);
593 XOR(dst
, src
[0], src
[1]);
601 ASR(dst
, src
[0], src
[1]);
605 SHR(dst
, src
[0], src
[1]);
609 SHL(dst
, src
[0], src
[1]);
613 CMP(dst
, ir
->conditional_mod
, src
[0], src
[1]);
617 SEL(dst
, src
[0], src
[1]);
621 DPH(dst
, src
[0], src
[1]);
625 DP4(dst
, src
[0], src
[1]);
629 DP3(dst
, src
[0], src
[1]);
633 DP2(dst
, src
[0], src
[1]);
636 case BRW_OPCODE_F32TO16
:
637 /* Emulate the Gen7 zeroing bug. */
638 MOV(retype(dst
, BRW_REGISTER_TYPE_UD
), brw_imm_ud(0u));
639 MOV(retype(dst
, BRW_REGISTER_TYPE_HF
), src
[0]);
642 case BRW_OPCODE_F16TO32
:
643 MOV(dst
, retype(src
[0], BRW_REGISTER_TYPE_HF
));
647 LRP(dst
, src
[0], src
[1], src
[2]);
650 case BRW_OPCODE_BFREV
:
651 /* BFREV only supports UD type for src and dst. */
652 BFREV(retype(dst
, BRW_REGISTER_TYPE_UD
),
653 retype(src
[0], BRW_REGISTER_TYPE_UD
));
657 /* FBH only supports UD type for dst. */
658 FBH(retype(dst
, BRW_REGISTER_TYPE_UD
), src
[0]);
662 /* FBL only supports UD type for dst. */
663 FBL(retype(dst
, BRW_REGISTER_TYPE_UD
), src
[0]);
666 case BRW_OPCODE_CBIT
:
667 /* CBIT only supports UD type for dst. */
668 CBIT(retype(dst
, BRW_REGISTER_TYPE_UD
), src
[0]);
671 case BRW_OPCODE_ADDC
:
672 ADDC(dst
, src
[0], src
[1]);
675 case BRW_OPCODE_SUBB
:
676 SUBB(dst
, src
[0], src
[1]);
680 BFE(dst
, src
[0], src
[1], src
[2]);
683 case BRW_OPCODE_BFI1
:
684 BFI1(dst
, src
[0], src
[1]);
687 case BRW_OPCODE_BFI2
:
688 BFI2(dst
, src
[0], src
[1], src
[2]);
695 case BRW_OPCODE_ELSE
:
699 case BRW_OPCODE_ENDIF
:
707 case BRW_OPCODE_BREAK
:
711 case BRW_OPCODE_CONTINUE
:
715 case BRW_OPCODE_WHILE
:
719 case SHADER_OPCODE_RCP
:
720 MATH(BRW_MATH_FUNCTION_INV
, dst
, src
[0]);
723 case SHADER_OPCODE_RSQ
:
724 MATH(BRW_MATH_FUNCTION_RSQ
, dst
, src
[0]);
727 case SHADER_OPCODE_SQRT
:
728 MATH(BRW_MATH_FUNCTION_SQRT
, dst
, src
[0]);
731 case SHADER_OPCODE_EXP2
:
732 MATH(BRW_MATH_FUNCTION_EXP
, dst
, src
[0]);
735 case SHADER_OPCODE_LOG2
:
736 MATH(BRW_MATH_FUNCTION_LOG
, dst
, src
[0]);
739 case SHADER_OPCODE_SIN
:
740 MATH(BRW_MATH_FUNCTION_SIN
, dst
, src
[0]);
743 case SHADER_OPCODE_COS
:
744 MATH(BRW_MATH_FUNCTION_COS
, dst
, src
[0]);
747 case SHADER_OPCODE_POW
:
748 MATH(BRW_MATH_FUNCTION_POW
, dst
, src
[0], src
[1]);
751 case SHADER_OPCODE_INT_QUOTIENT
:
752 MATH(BRW_MATH_FUNCTION_INT_DIV_QUOTIENT
, dst
, src
[0], src
[1]);
755 case SHADER_OPCODE_INT_REMAINDER
:
756 MATH(BRW_MATH_FUNCTION_INT_DIV_REMAINDER
, dst
, src
[0], src
[1]);
759 case SHADER_OPCODE_TEX
:
760 case SHADER_OPCODE_TXD
:
761 case SHADER_OPCODE_TXF
:
762 case SHADER_OPCODE_TXF_CMS
:
763 case SHADER_OPCODE_TXF_MCS
:
764 case SHADER_OPCODE_TXL
:
765 case SHADER_OPCODE_TXS
:
766 case SHADER_OPCODE_TG4
:
767 case SHADER_OPCODE_TG4_OFFSET
:
768 generate_tex(ir
, dst
);
771 case VS_OPCODE_URB_WRITE
:
772 generate_urb_write(ir
, true);
775 case SHADER_OPCODE_GEN4_SCRATCH_READ
:
776 generate_scratch_read(ir
, dst
, src
[0]);
779 case SHADER_OPCODE_GEN4_SCRATCH_WRITE
:
780 generate_scratch_write(ir
, dst
, src
[0], src
[1]);
783 case VS_OPCODE_PULL_CONSTANT_LOAD
:
784 case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7
:
785 generate_pull_constant_load(ir
, dst
, src
[0], src
[1]);
788 case GS_OPCODE_URB_WRITE
:
789 generate_urb_write(ir
, false);
792 case GS_OPCODE_THREAD_END
:
793 generate_gs_thread_end(ir
);
796 case GS_OPCODE_SET_WRITE_OFFSET
:
797 generate_gs_set_write_offset(dst
, src
[0], src
[1]);
800 case GS_OPCODE_SET_VERTEX_COUNT
:
801 generate_gs_set_vertex_count(dst
, src
[0]);
804 case GS_OPCODE_SET_DWORD_2_IMMED
:
805 generate_gs_set_dword_2_immed(dst
, src
[0]);
808 case GS_OPCODE_PREPARE_CHANNEL_MASKS
:
809 generate_gs_prepare_channel_masks(dst
);
812 case GS_OPCODE_SET_CHANNEL_MASKS
:
813 generate_gs_set_channel_masks(dst
, src
[0]);
816 case SHADER_OPCODE_SHADER_TIME_ADD
:
817 assert(!"XXX: Missing Gen8 vec4 support for INTEL_DEBUG=shader_time");
820 case SHADER_OPCODE_UNTYPED_ATOMIC
:
821 generate_untyped_atomic(ir
, dst
, src
[0], src
[1]);
824 case SHADER_OPCODE_UNTYPED_SURFACE_READ
:
825 generate_untyped_surface_read(ir
, dst
, src
[0]);
828 case VS_OPCODE_UNPACK_FLAGS_SIMD4X2
:
829 assert(!"VS_OPCODE_UNPACK_FLAGS_SIMD4X2 should not be used on Gen8+.");
833 if (ir
->opcode
< (int) ARRAY_SIZE(opcode_descs
)) {
834 _mesa_problem(ctx
, "Unsupported opcode in `%s' in VS\n",
835 opcode_descs
[ir
->opcode
].name
);
837 _mesa_problem(ctx
, "Unsupported opcode %d in VS", ir
->opcode
);
844 gen8_vec4_generator::generate_code(exec_list
*instructions
)
846 int last_native_inst_offset
= 0;
847 const char *last_annotation_string
= NULL
;
848 const void *last_annotation_ir
= NULL
;
850 if (unlikely(debug_flag
)) {
852 fprintf(stderr
, "Native code for %s vertex shader %d:\n",
853 shader_prog
->Label
? shader_prog
->Label
: "unnamed",
856 fprintf(stderr
, "Native code for vertex program %d:\n", prog
->Id
);
860 foreach_list(node
, instructions
) {
861 vec4_instruction
*ir
= (vec4_instruction
*) node
;
862 struct brw_reg src
[3], dst
;
864 if (unlikely(debug_flag
)) {
865 if (last_annotation_ir
!= ir
->ir
) {
866 last_annotation_ir
= ir
->ir
;
867 if (last_annotation_ir
) {
868 fprintf(stderr
, " ");
870 ((ir_instruction
*) last_annotation_ir
)->fprint(stderr
);
872 const prog_instruction
*vpi
;
873 vpi
= (const prog_instruction
*) ir
->ir
;
874 fprintf(stderr
, "%d: ", (int)(vpi
- prog
->Instructions
));
875 _mesa_fprint_instruction_opt(stderr
, vpi
, 0,
876 PROG_PRINT_DEBUG
, NULL
);
878 fprintf(stderr
, "\n");
881 if (last_annotation_string
!= ir
->annotation
) {
882 last_annotation_string
= ir
->annotation
;
883 if (last_annotation_string
)
884 fprintf(stderr
, " %s\n", last_annotation_string
);
888 for (unsigned int i
= 0; i
< 3; i
++) {
889 src
[i
] = ir
->get_src(prog_data
, i
);
893 default_state
.conditional_mod
= ir
->conditional_mod
;
894 default_state
.predicate
= ir
->predicate
;
895 default_state
.predicate_inverse
= ir
->predicate_inverse
;
896 default_state
.saturate
= ir
->saturate
;
898 const unsigned pre_emit_nr_inst
= nr_inst
;
900 generate_vec4_instruction(ir
, dst
, src
);
902 if (ir
->no_dd_clear
|| ir
->no_dd_check
) {
903 assert(nr_inst
== pre_emit_nr_inst
+ 1 ||
904 !"no_dd_check or no_dd_clear set for IR emitting more "
905 "than 1 instruction");
907 gen8_instruction
*last
= &store
[pre_emit_nr_inst
];
908 gen8_set_no_dd_clear(last
, ir
->no_dd_clear
);
909 gen8_set_no_dd_check(last
, ir
->no_dd_check
);
912 if (unlikely(debug_flag
)) {
913 gen8_disassemble(brw
, store
, last_native_inst_offset
, next_inst_offset
, stderr
);
916 last_native_inst_offset
= next_inst_offset
;
919 if (unlikely(debug_flag
)) {
920 fprintf(stderr
, "\n");
923 patch_jump_targets();
925 /* OK, while the INTEL_DEBUG=vs above is very nice for debugging VS
926 * emit issues, it doesn't get the jump distances into the output,
927 * which is often something we want to debug. So this is here in
928 * case you're doing that.
930 if (0 && unlikely(debug_flag
)) {
931 gen8_disassemble(brw
, store
, 0, next_inst_offset
, stderr
);
936 gen8_vec4_generator::generate_assembly(exec_list
*instructions
,
937 unsigned *assembly_size
)
939 default_state
.access_mode
= BRW_ALIGN_16
;
940 default_state
.exec_size
= BRW_EXECUTE_8
;
941 generate_code(instructions
);
942 *assembly_size
= next_inst_offset
;
943 return (const unsigned *) store
;
946 } /* namespace brw */