2 * Copyright © 2011 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
29 #include "main/macros.h"
30 #include "program/prog_print.h"
31 #include "program/prog_parameter.h"
36 gen8_vec4_generator::gen8_vec4_generator(struct brw_context
*brw
,
37 struct gl_shader_program
*shader_prog
,
38 struct gl_program
*prog
,
39 struct brw_vec4_prog_data
*prog_data
,
42 : gen8_generator(brw
, shader_prog
, prog
, mem_ctx
),
44 debug_flag(debug_flag
)
48 gen8_vec4_generator::~gen8_vec4_generator()
53 gen8_vec4_generator::generate_tex(vec4_instruction
*ir
, struct brw_reg dst
)
58 case SHADER_OPCODE_TEX
:
59 case SHADER_OPCODE_TXL
:
60 if (ir
->shadow_compare
) {
61 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE
;
63 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_LOD
;
66 case SHADER_OPCODE_TXD
:
67 if (ir
->shadow_compare
) {
68 msg_type
= HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE
;
70 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS
;
73 case SHADER_OPCODE_TXF
:
74 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_LD
;
76 case SHADER_OPCODE_TXF_CMS
:
77 msg_type
= GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS
;
79 case SHADER_OPCODE_TXF_MCS
:
80 msg_type
= GEN7_SAMPLER_MESSAGE_SAMPLE_LD_MCS
;
82 case SHADER_OPCODE_TXS
:
83 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO
;
85 case SHADER_OPCODE_TG4
:
86 if (ir
->shadow_compare
) {
87 msg_type
= GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_C
;
89 msg_type
= GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4
;
92 case SHADER_OPCODE_TG4_OFFSET
:
93 if (ir
->shadow_compare
) {
94 msg_type
= GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C
;
96 msg_type
= GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO
;
100 assert(!"should not get here: invalid VS texture opcode");
104 if (ir
->header_present
) {
105 MOV_RAW(retype(brw_message_reg(ir
->base_mrf
), BRW_REGISTER_TYPE_UD
),
106 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
108 default_state
.access_mode
= BRW_ALIGN_1
;
110 if (ir
->texture_offset
) {
111 /* Set the offset bits in DWord 2. */
112 MOV_RAW(retype(brw_vec1_reg(MRF
, ir
->base_mrf
, 2),
113 BRW_REGISTER_TYPE_UD
),
114 brw_imm_ud(ir
->texture_offset
));
117 if (ir
->sampler
>= 16) {
118 /* The "Sampler Index" field can only store values between 0 and 15.
119 * However, we can add an offset to the "Sampler State Pointer"
120 * field, effectively selecting a different set of 16 samplers.
122 * The "Sampler State Pointer" needs to be aligned to a 32-byte
123 * offset, and each sampler state is only 16-bytes, so we can't
124 * exclusively use the offset - we have to use both.
126 gen8_instruction
*add
=
127 ADD(get_element_ud(brw_message_reg(ir
->base_mrf
), 3),
128 get_element_ud(brw_vec8_grf(0, 0), 3),
129 brw_imm_ud(16 * (ir
->sampler
/ 16) *
130 sizeof(gen7_sampler_state
)));
131 gen8_set_mask_control(add
, BRW_MASK_DISABLE
);
134 default_state
.access_mode
= BRW_ALIGN_16
;
137 uint32_t surf_index
=
138 prog_data
->base
.binding_table
.texture_start
+ ir
->sampler
;
140 gen8_instruction
*inst
= next_inst(BRW_OPCODE_SEND
);
141 gen8_set_dst(brw
, inst
, dst
);
142 gen8_set_src0(brw
, inst
, brw_message_reg(ir
->base_mrf
));
143 gen8_set_sampler_message(brw
, inst
,
150 BRW_SAMPLER_SIMD_MODE_SIMD4X2
);
152 brw_mark_surface_used(&prog_data
->base
, surf_index
);
156 gen8_vec4_generator::generate_urb_write(vec4_instruction
*ir
, bool vs
)
158 struct brw_reg header
= brw_vec8_grf(GEN7_MRF_HACK_START
+ ir
->base_mrf
, 0);
162 MOV_RAW(header
, brw_vec8_grf(0, 0));
164 gen8_instruction
*inst
;
165 if (!(ir
->urb_write_flags
& BRW_URB_WRITE_USE_CHANNEL_MASKS
)) {
166 /* Enable Channel Masks in the URB_WRITE_OWORD message header */
167 default_state
.access_mode
= BRW_ALIGN_1
;
168 MOV_RAW(brw_vec1_grf(GEN7_MRF_HACK_START
+ ir
->base_mrf
, 5),
170 default_state
.access_mode
= BRW_ALIGN_16
;
173 inst
= next_inst(BRW_OPCODE_SEND
);
174 gen8_set_urb_message(brw
, inst
, ir
->urb_write_flags
, ir
->mlen
, 0, ir
->offset
,
176 gen8_set_dst(brw
, inst
, brw_null_reg());
177 gen8_set_src0(brw
, inst
, header
);
181 gen8_vec4_generator::generate_gs_set_vertex_count(struct brw_reg eot_mrf_header
,
184 /* Move the vertex count into the second MRF for the EOT write. */
185 assert(eot_mrf_header
.file
== BRW_MESSAGE_REGISTER_FILE
);
186 int dst_nr
= GEN7_MRF_HACK_START
+ eot_mrf_header
.nr
+ 1;
187 MOV(retype(brw_vec8_grf(dst_nr
, 0), BRW_REGISTER_TYPE_UD
), src
);
191 gen8_vec4_generator::generate_gs_thread_end(vec4_instruction
*ir
)
193 struct brw_reg src
= brw_vec8_grf(GEN7_MRF_HACK_START
+ ir
->base_mrf
, 0);
194 gen8_instruction
*inst
;
196 /* Enable Channel Masks in the URB_WRITE_HWORD message header */
197 default_state
.access_mode
= BRW_ALIGN_1
;
198 inst
= OR(retype(brw_vec1_grf(GEN7_MRF_HACK_START
+ ir
->base_mrf
, 5),
199 BRW_REGISTER_TYPE_UD
),
200 retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD
),
201 brw_imm_ud(0xff00)); /* could be 0x1100 but shouldn't matter */
202 gen8_set_mask_control(inst
, BRW_MASK_DISABLE
);
203 default_state
.access_mode
= BRW_ALIGN_16
;
205 /* mlen = 2: g0 header + vertex count */
206 inst
= next_inst(BRW_OPCODE_SEND
);
207 gen8_set_urb_message(brw
, inst
, BRW_URB_WRITE_EOT
, 2, 0, 0, true);
208 gen8_set_dst(brw
, inst
, brw_null_reg());
209 gen8_set_src0(brw
, inst
, src
);
213 gen8_vec4_generator::generate_gs_set_write_offset(struct brw_reg dst
,
217 /* From p22 of volume 4 part 2 of the Ivy Bridge PRM (2.4.3.1 Message
220 * Slot 0 Offset. This field, after adding to the Global Offset field
221 * in the message descriptor, specifies the offset (in 256-bit units)
222 * from the start of the URB entry, as referenced by URB Handle 0, at
223 * which the data will be accessed.
225 * Similar text describes DWORD M0.4, which is slot 1 offset.
227 * Therefore, we want to multiply DWORDs 0 and 4 of src0 (the x components
228 * of the register for geometry shader invocations 0 and 1) by the
229 * immediate value in src1, and store the result in DWORDs 3 and 4 of dst.
231 * We can do this with the following EU instruction:
233 * mul(2) dst.3<1>UD src0<8;2,4>UD src1 { Align1 WE_all }
235 default_state
.access_mode
= BRW_ALIGN_1
;
236 gen8_instruction
*inst
=
237 MUL(suboffset(stride(dst
, 2, 2, 1), 3), stride(src0
, 8, 2, 4), src1
);
238 gen8_set_mask_control(inst
, BRW_MASK_DISABLE
);
239 default_state
.access_mode
= BRW_ALIGN_16
;
243 gen8_vec4_generator::generate_gs_set_dword_2_immed(struct brw_reg dst
,
246 assert(src
.file
== BRW_IMMEDIATE_VALUE
);
248 default_state
.access_mode
= BRW_ALIGN_1
;
250 gen8_instruction
*inst
= MOV(suboffset(vec1(dst
), 2), src
);
251 gen8_set_mask_control(inst
, BRW_MASK_DISABLE
);
253 default_state
.access_mode
= BRW_ALIGN_16
;
257 gen8_vec4_generator::generate_gs_prepare_channel_masks(struct brw_reg dst
)
259 /* We want to left shift just DWORD 4 (the x component belonging to the
260 * second geometry shader invocation) by 4 bits. So generate the
263 * shl(1) dst.4<1>UD dst.4<0,1,0>UD 4UD { align1 WE_all }
265 dst
= suboffset(vec1(dst
), 4);
266 default_state
.access_mode
= BRW_ALIGN_1
;
267 gen8_instruction
*inst
= SHL(dst
, dst
, brw_imm_ud(4));
268 gen8_set_mask_control(inst
, BRW_MASK_DISABLE
);
269 default_state
.access_mode
= BRW_ALIGN_16
;
273 gen8_vec4_generator::generate_gs_set_channel_masks(struct brw_reg dst
,
276 /* From p21 of volume 4 part 2 of the Ivy Bridge PRM (2.4.3.1 Message
279 * 15 Vertex 1 DATA [3] / Vertex 0 DATA[7] Channel Mask
281 * When Swizzle Control = URB_INTERLEAVED this bit controls Vertex 1
282 * DATA[3], when Swizzle Control = URB_NOSWIZZLE this bit controls
283 * Vertex 0 DATA[7]. This bit is ANDed with the corresponding
284 * channel enable to determine the final channel enable. For the
285 * URB_READ_OWORD & URB_READ_HWORD messages, when final channel
286 * enable is 1 it indicates that Vertex 1 DATA [3] will be included
287 * in the writeback message. For the URB_WRITE_OWORD &
288 * URB_WRITE_HWORD messages, when final channel enable is 1 it
289 * indicates that Vertex 1 DATA [3] will be written to the surface.
291 * 0: Vertex 1 DATA [3] / Vertex 0 DATA[7] channel not included
292 * 1: Vertex DATA [3] / Vertex 0 DATA[7] channel included
294 * 14 Vertex 1 DATA [2] Channel Mask
295 * 13 Vertex 1 DATA [1] Channel Mask
296 * 12 Vertex 1 DATA [0] Channel Mask
297 * 11 Vertex 0 DATA [3] Channel Mask
298 * 10 Vertex 0 DATA [2] Channel Mask
299 * 9 Vertex 0 DATA [1] Channel Mask
300 * 8 Vertex 0 DATA [0] Channel Mask
302 * (This is from a section of the PRM that is agnostic to the particular
303 * type of shader being executed, so "Vertex 0" and "Vertex 1" refer to
304 * geometry shader invocations 0 and 1, respectively). Since we have the
305 * enable flags for geometry shader invocation 0 in bits 3:0 of DWORD 0,
306 * and the enable flags for geometry shader invocation 1 in bits 7:0 of
307 * DWORD 4, we just need to OR them together and store the result in bits
310 * It's easier to get the EU to do this if we think of the src and dst
311 * registers as composed of 32 bytes each; then, we want to pick up the
312 * contents of bytes 0 and 16 from src, OR them together, and store them in
315 * We can do that by the following EU instruction:
317 * or(1) dst.21<1>UB src<0,1,0>UB src.16<0,1,0>UB { align1 WE_all }
319 * Note: this relies on the source register having zeros in (a) bits 7:4 of
320 * DWORD 0 and (b) bits 3:0 of DWORD 4. We can rely on (b) because the
321 * source register was prepared by GS_OPCODE_PREPARE_CHANNEL_MASKS (which
322 * shifts DWORD 4 left by 4 bits), and we can rely on (a) because prior to
323 * the execution of GS_OPCODE_PREPARE_CHANNEL_MASKS, DWORDs 0 and 4 need to
324 * contain valid channel mask values (which are in the range 0x0-0xf).
326 dst
= retype(dst
, BRW_REGISTER_TYPE_UB
);
327 src
= retype(src
, BRW_REGISTER_TYPE_UB
);
329 default_state
.access_mode
= BRW_ALIGN_1
;
331 gen8_instruction
*inst
=
332 OR(suboffset(vec1(dst
), 21), vec1(src
), suboffset(vec1(src
), 16));
333 gen8_set_mask_control(inst
, BRW_MASK_DISABLE
);
335 default_state
.access_mode
= BRW_ALIGN_16
;
339 gen8_vec4_generator::generate_oword_dual_block_offsets(struct brw_reg m1
,
340 struct brw_reg index
)
342 int second_vertex_offset
= 1;
344 m1
= retype(m1
, BRW_REGISTER_TYPE_D
);
346 /* Set up M1 (message payload). Only the block offsets in M1.0 and
347 * M1.4 are used, and the rest are ignored.
349 struct brw_reg m1_0
= suboffset(vec1(m1
), 0);
350 struct brw_reg m1_4
= suboffset(vec1(m1
), 4);
351 struct brw_reg index_0
= suboffset(vec1(index
), 0);
352 struct brw_reg index_4
= suboffset(vec1(index
), 4);
354 default_state
.mask_control
= BRW_MASK_DISABLE
;
355 default_state
.access_mode
= BRW_ALIGN_1
;
359 if (index
.file
== BRW_IMMEDIATE_VALUE
) {
360 index_4
.dw1
.ud
+= second_vertex_offset
;
363 ADD(m1_4
, index_4
, brw_imm_d(second_vertex_offset
));
366 default_state
.mask_control
= BRW_MASK_ENABLE
;
367 default_state
.access_mode
= BRW_ALIGN_16
;
371 gen8_vec4_generator::generate_scratch_read(vec4_instruction
*ir
,
373 struct brw_reg index
)
375 struct brw_reg header
= brw_vec8_grf(GEN7_MRF_HACK_START
+ ir
->base_mrf
, 0);
377 MOV_RAW(header
, brw_vec8_grf(0, 0));
379 generate_oword_dual_block_offsets(brw_message_reg(ir
->base_mrf
+ 1), index
);
381 /* Each of the 8 channel enables is considered for whether each
384 gen8_instruction
*send
= next_inst(BRW_OPCODE_SEND
);
385 gen8_set_dst(brw
, send
, dst
);
386 gen8_set_src0(brw
, send
, header
);
387 gen8_set_dp_message(brw
, send
, GEN7_SFID_DATAPORT_DATA_CACHE
,
388 255, /* binding table index: stateless access */
389 GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ
,
390 BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD
,
393 true, /* header present */
398 gen8_vec4_generator::generate_scratch_write(vec4_instruction
*ir
,
401 struct brw_reg index
)
403 struct brw_reg header
= brw_vec8_grf(GEN7_MRF_HACK_START
+ ir
->base_mrf
, 0);
405 MOV_RAW(header
, brw_vec8_grf(0, 0));
407 generate_oword_dual_block_offsets(brw_message_reg(ir
->base_mrf
+ 1), index
);
409 MOV(retype(brw_message_reg(ir
->base_mrf
+ 2), BRW_REGISTER_TYPE_D
),
410 retype(src
, BRW_REGISTER_TYPE_D
));
412 /* Each of the 8 channel enables is considered for whether each
415 gen8_instruction
*send
= next_inst(BRW_OPCODE_SEND
);
416 gen8_set_dst(brw
, send
, dst
);
417 gen8_set_src0(brw
, send
, header
);
418 gen8_set_pred_control(send
, ir
->predicate
);
419 gen8_set_dp_message(brw
, send
, GEN7_SFID_DATAPORT_DATA_CACHE
,
420 255, /* binding table index: stateless access */
421 GEN7_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE
,
422 BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD
,
425 true, /* header present */
430 gen8_vec4_generator::generate_pull_constant_load(vec4_instruction
*inst
,
432 struct brw_reg index
,
433 struct brw_reg offset
)
435 assert(index
.file
== BRW_IMMEDIATE_VALUE
&&
436 index
.type
== BRW_REGISTER_TYPE_UD
);
437 uint32_t surf_index
= index
.dw1
.ud
;
439 assert(offset
.file
== BRW_GENERAL_REGISTER_FILE
);
441 /* Each of the 8 channel enables is considered for whether each
444 gen8_instruction
*send
= next_inst(BRW_OPCODE_SEND
);
445 gen8_set_dst(brw
, send
, dst
);
446 gen8_set_src0(brw
, send
, offset
);
447 gen8_set_sampler_message(brw
, send
,
449 0, /* The LD message ignores the sampler unit. */
450 GEN5_SAMPLER_MESSAGE_SAMPLE_LD
,
453 false, /* no header */
454 BRW_SAMPLER_SIMD_MODE_SIMD4X2
);
456 brw_mark_surface_used(&prog_data
->base
, surf_index
);
460 gen8_vec4_generator::generate_untyped_atomic(vec4_instruction
*ir
,
462 struct brw_reg atomic_op
,
463 struct brw_reg surf_index
)
465 assert(atomic_op
.file
== BRW_IMMEDIATE_VALUE
&&
466 atomic_op
.type
== BRW_REGISTER_TYPE_UD
&&
467 surf_index
.file
== BRW_IMMEDIATE_VALUE
&&
468 surf_index
.type
== BRW_REGISTER_TYPE_UD
);
469 assert((atomic_op
.dw1
.ud
& ~0xf) == 0);
471 unsigned msg_control
=
472 atomic_op
.dw1
.ud
| /* Atomic Operation Type: BRW_AOP_* */
473 (1 << 5); /* Return data expected */
475 gen8_instruction
*inst
= next_inst(BRW_OPCODE_SEND
);
476 gen8_set_dst(brw
, inst
, retype(dst
, BRW_REGISTER_TYPE_UD
));
477 gen8_set_src0(brw
, inst
, brw_message_reg(ir
->base_mrf
));
478 gen8_set_dp_message(brw
, inst
, HSW_SFID_DATAPORT_DATA_CACHE_1
,
480 HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2
,
487 brw_mark_surface_used(&prog_data
->base
, surf_index
.dw1
.ud
);
493 gen8_vec4_generator::generate_untyped_surface_read(vec4_instruction
*ir
,
495 struct brw_reg surf_index
)
497 assert(surf_index
.file
== BRW_IMMEDIATE_VALUE
&&
498 surf_index
.type
== BRW_REGISTER_TYPE_UD
);
500 gen8_instruction
*inst
= next_inst(BRW_OPCODE_SEND
);
501 gen8_set_dst(brw
, inst
, retype(dst
, BRW_REGISTER_TYPE_UD
));
502 gen8_set_src0(brw
, inst
, brw_message_reg(ir
->base_mrf
));
503 gen8_set_dp_message(brw
, inst
, HSW_SFID_DATAPORT_DATA_CACHE_1
,
505 HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ
,
506 0xe, /* enable only the R channel */
512 brw_mark_surface_used(&prog_data
->base
, surf_index
.dw1
.ud
);
517 gen8_vec4_generator::generate_vec4_instruction(vec4_instruction
*instruction
,
521 vec4_instruction
*ir
= (vec4_instruction
*) instruction
;
523 if (dst
.width
== BRW_WIDTH_4
) {
524 /* This happens in attribute fixups for "dual instanced" geometry
525 * shaders, since they use attributes that are vec4's. Since the exec
526 * width is only 4, it's essential that the caller set
527 * force_writemask_all in order to make sure the instruction is executed
528 * regardless of which channels are enabled.
530 assert(ir
->force_writemask_all
);
532 /* Fix up any <8;8,1> or <0;4,1> source registers to <4;4,1> to satisfy
533 * the following register region restrictions (from Graphics BSpec:
534 * 3D-Media-GPGPU Engine > EU Overview > Registers and Register Regions
535 * > Register Region Restrictions)
537 * 1. ExecSize must be greater than or equal to Width.
539 * 2. If ExecSize = Width and HorzStride != 0, VertStride must be set
540 * to Width * HorzStride."
542 for (int i
= 0; i
< 3; i
++) {
543 if (src
[i
].file
== BRW_GENERAL_REGISTER_FILE
)
544 src
[i
] = stride(src
[i
], 4, 4, 1);
548 switch (ir
->opcode
) {
554 ADD(dst
, src
[0], src
[1]);
558 MUL(dst
, src
[0], src
[1]);
561 case BRW_OPCODE_MACH
:
562 MACH(dst
, src
[0], src
[1]);
566 MAD(dst
, src
[0], src
[1], src
[2]);
573 case BRW_OPCODE_RNDD
:
577 case BRW_OPCODE_RNDE
:
581 case BRW_OPCODE_RNDZ
:
586 AND(dst
, src
[0], src
[1]);
590 OR(dst
, src
[0], src
[1]);
594 XOR(dst
, src
[0], src
[1]);
602 ASR(dst
, src
[0], src
[1]);
606 SHR(dst
, src
[0], src
[1]);
610 SHL(dst
, src
[0], src
[1]);
614 CMP(dst
, ir
->conditional_mod
, src
[0], src
[1]);
618 SEL(dst
, src
[0], src
[1]);
622 DPH(dst
, src
[0], src
[1]);
626 DP4(dst
, src
[0], src
[1]);
630 DP3(dst
, src
[0], src
[1]);
634 DP2(dst
, src
[0], src
[1]);
637 case BRW_OPCODE_F32TO16
:
638 /* Emulate the Gen7 zeroing bug. */
639 MOV(retype(dst
, BRW_REGISTER_TYPE_UD
), brw_imm_ud(0u));
640 MOV(retype(dst
, BRW_REGISTER_TYPE_HF
), src
[0]);
643 case BRW_OPCODE_F16TO32
:
644 MOV(dst
, retype(src
[0], BRW_REGISTER_TYPE_HF
));
648 LRP(dst
, src
[0], src
[1], src
[2]);
651 case BRW_OPCODE_BFREV
:
652 /* BFREV only supports UD type for src and dst. */
653 BFREV(retype(dst
, BRW_REGISTER_TYPE_UD
),
654 retype(src
[0], BRW_REGISTER_TYPE_UD
));
658 /* FBH only supports UD type for dst. */
659 FBH(retype(dst
, BRW_REGISTER_TYPE_UD
), src
[0]);
663 /* FBL only supports UD type for dst. */
664 FBL(retype(dst
, BRW_REGISTER_TYPE_UD
), src
[0]);
667 case BRW_OPCODE_CBIT
:
668 /* CBIT only supports UD type for dst. */
669 CBIT(retype(dst
, BRW_REGISTER_TYPE_UD
), src
[0]);
672 case BRW_OPCODE_ADDC
:
673 ADDC(dst
, src
[0], src
[1]);
676 case BRW_OPCODE_SUBB
:
677 SUBB(dst
, src
[0], src
[1]);
681 BFE(dst
, src
[0], src
[1], src
[2]);
684 case BRW_OPCODE_BFI1
:
685 BFI1(dst
, src
[0], src
[1]);
688 case BRW_OPCODE_BFI2
:
689 BFI2(dst
, src
[0], src
[1], src
[2]);
696 case BRW_OPCODE_ELSE
:
700 case BRW_OPCODE_ENDIF
:
708 case BRW_OPCODE_BREAK
:
712 case BRW_OPCODE_CONTINUE
:
716 case BRW_OPCODE_WHILE
:
720 case SHADER_OPCODE_RCP
:
721 MATH(BRW_MATH_FUNCTION_INV
, dst
, src
[0]);
724 case SHADER_OPCODE_RSQ
:
725 MATH(BRW_MATH_FUNCTION_RSQ
, dst
, src
[0]);
728 case SHADER_OPCODE_SQRT
:
729 MATH(BRW_MATH_FUNCTION_SQRT
, dst
, src
[0]);
732 case SHADER_OPCODE_EXP2
:
733 MATH(BRW_MATH_FUNCTION_EXP
, dst
, src
[0]);
736 case SHADER_OPCODE_LOG2
:
737 MATH(BRW_MATH_FUNCTION_LOG
, dst
, src
[0]);
740 case SHADER_OPCODE_SIN
:
741 MATH(BRW_MATH_FUNCTION_SIN
, dst
, src
[0]);
744 case SHADER_OPCODE_COS
:
745 MATH(BRW_MATH_FUNCTION_COS
, dst
, src
[0]);
748 case SHADER_OPCODE_POW
:
749 MATH(BRW_MATH_FUNCTION_POW
, dst
, src
[0], src
[1]);
752 case SHADER_OPCODE_INT_QUOTIENT
:
753 MATH(BRW_MATH_FUNCTION_INT_DIV_QUOTIENT
, dst
, src
[0], src
[1]);
756 case SHADER_OPCODE_INT_REMAINDER
:
757 MATH(BRW_MATH_FUNCTION_INT_DIV_REMAINDER
, dst
, src
[0], src
[1]);
760 case SHADER_OPCODE_TEX
:
761 case SHADER_OPCODE_TXD
:
762 case SHADER_OPCODE_TXF
:
763 case SHADER_OPCODE_TXF_CMS
:
764 case SHADER_OPCODE_TXF_MCS
:
765 case SHADER_OPCODE_TXL
:
766 case SHADER_OPCODE_TXS
:
767 case SHADER_OPCODE_TG4
:
768 case SHADER_OPCODE_TG4_OFFSET
:
769 generate_tex(ir
, dst
);
772 case VS_OPCODE_URB_WRITE
:
773 generate_urb_write(ir
, true);
776 case SHADER_OPCODE_GEN4_SCRATCH_READ
:
777 generate_scratch_read(ir
, dst
, src
[0]);
780 case SHADER_OPCODE_GEN4_SCRATCH_WRITE
:
781 generate_scratch_write(ir
, dst
, src
[0], src
[1]);
784 case VS_OPCODE_PULL_CONSTANT_LOAD
:
785 case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7
:
786 generate_pull_constant_load(ir
, dst
, src
[0], src
[1]);
789 case GS_OPCODE_URB_WRITE
:
790 generate_urb_write(ir
, false);
793 case GS_OPCODE_THREAD_END
:
794 generate_gs_thread_end(ir
);
797 case GS_OPCODE_SET_WRITE_OFFSET
:
798 generate_gs_set_write_offset(dst
, src
[0], src
[1]);
801 case GS_OPCODE_SET_VERTEX_COUNT
:
802 generate_gs_set_vertex_count(dst
, src
[0]);
805 case GS_OPCODE_SET_DWORD_2_IMMED
:
806 generate_gs_set_dword_2_immed(dst
, src
[0]);
809 case GS_OPCODE_PREPARE_CHANNEL_MASKS
:
810 generate_gs_prepare_channel_masks(dst
);
813 case GS_OPCODE_SET_CHANNEL_MASKS
:
814 generate_gs_set_channel_masks(dst
, src
[0]);
817 case SHADER_OPCODE_SHADER_TIME_ADD
:
818 assert(!"XXX: Missing Gen8 vec4 support for INTEL_DEBUG=shader_time");
821 case SHADER_OPCODE_UNTYPED_ATOMIC
:
822 generate_untyped_atomic(ir
, dst
, src
[0], src
[1]);
825 case SHADER_OPCODE_UNTYPED_SURFACE_READ
:
826 generate_untyped_surface_read(ir
, dst
, src
[0]);
829 case VS_OPCODE_UNPACK_FLAGS_SIMD4X2
:
830 assert(!"VS_OPCODE_UNPACK_FLAGS_SIMD4X2 should not be used on Gen8+.");
834 if (ir
->opcode
< (int) ARRAY_SIZE(opcode_descs
)) {
835 _mesa_problem(ctx
, "Unsupported opcode in `%s' in VS\n",
836 opcode_descs
[ir
->opcode
].name
);
838 _mesa_problem(ctx
, "Unsupported opcode %d in VS", ir
->opcode
);
845 gen8_vec4_generator::generate_code(exec_list
*instructions
)
847 struct annotation_info annotation
;
848 memset(&annotation
, 0, sizeof(annotation
));
851 if (unlikely(debug_flag
))
852 cfg
= new(mem_ctx
) cfg_t(instructions
);
854 foreach_in_list(vec4_instruction
, ir
, instructions
) {
855 struct brw_reg src
[3], dst
;
857 if (unlikely(debug_flag
))
858 annotate(brw
, &annotation
, cfg
, ir
, next_inst_offset
);
860 for (unsigned int i
= 0; i
< 3; i
++) {
861 src
[i
] = ir
->get_src(prog_data
, i
);
865 default_state
.conditional_mod
= ir
->conditional_mod
;
866 default_state
.predicate
= ir
->predicate
;
867 default_state
.predicate_inverse
= ir
->predicate_inverse
;
868 default_state
.saturate
= ir
->saturate
;
870 const unsigned pre_emit_nr_inst
= nr_inst
;
872 generate_vec4_instruction(ir
, dst
, src
);
874 if (ir
->no_dd_clear
|| ir
->no_dd_check
) {
875 assert(nr_inst
== pre_emit_nr_inst
+ 1 ||
876 !"no_dd_check or no_dd_clear set for IR emitting more "
877 "than 1 instruction");
879 gen8_instruction
*last
= &store
[pre_emit_nr_inst
];
880 gen8_set_no_dd_clear(last
, ir
->no_dd_clear
);
881 gen8_set_no_dd_check(last
, ir
->no_dd_check
);
885 patch_jump_targets();
886 annotation_finalize(&annotation
, next_inst_offset
);
888 int before_size
= next_inst_offset
;
890 if (unlikely(debug_flag
)) {
892 fprintf(stderr
, "Native code for %s vertex shader %d:\n",
893 shader_prog
->Label
? shader_prog
->Label
: "unnamed",
896 fprintf(stderr
, "Native code for vertex program %d:\n", prog
->Id
);
898 fprintf(stderr
, "vec4 shader: %d instructions.\n", before_size
/ 16);
900 dump_assembly(store
, annotation
.ann_count
, annotation
.ann
, brw
, prog
);
901 ralloc_free(annotation
.ann
);
906 gen8_vec4_generator::generate_assembly(exec_list
*instructions
,
907 unsigned *assembly_size
)
909 default_state
.access_mode
= BRW_ALIGN_16
;
910 default_state
.exec_size
= BRW_EXECUTE_8
;
911 generate_code(instructions
);
913 *assembly_size
= next_inst_offset
;
914 return (const unsigned *) store
;
917 } /* namespace brw */