2 * Copyright © 2011 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
28 #include "main/macros.h"
29 #include "program/prog_print.h"
30 #include "program/prog_parameter.h"
35 gen8_vec4_generator::gen8_vec4_generator(struct brw_context
*brw
,
36 struct gl_shader_program
*shader_prog
,
37 struct gl_program
*prog
,
38 struct brw_vec4_prog_data
*prog_data
,
41 : gen8_generator(brw
, shader_prog
, prog
, mem_ctx
),
43 debug_flag(debug_flag
)
47 gen8_vec4_generator::~gen8_vec4_generator()
52 gen8_vec4_generator::mark_surface_used(unsigned surf_index
)
54 assert(surf_index
< BRW_MAX_SURFACES
);
56 prog_data
->base
.binding_table
.size_bytes
=
57 MAX2(prog_data
->base
.binding_table
.size_bytes
, (surf_index
+ 1) * 4);
61 gen8_vec4_generator::generate_tex(vec4_instruction
*ir
, struct brw_reg dst
)
66 case SHADER_OPCODE_TEX
:
67 case SHADER_OPCODE_TXL
:
68 if (ir
->shadow_compare
) {
69 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE
;
71 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_LOD
;
74 case SHADER_OPCODE_TXD
:
75 if (ir
->shadow_compare
) {
76 msg_type
= HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE
;
78 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS
;
81 case SHADER_OPCODE_TXF
:
82 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_LD
;
84 case SHADER_OPCODE_TXF_CMS
:
85 msg_type
= GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS
;
87 case SHADER_OPCODE_TXF_MCS
:
88 msg_type
= GEN7_SAMPLER_MESSAGE_SAMPLE_LD_MCS
;
90 case SHADER_OPCODE_TXS
:
91 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO
;
93 case SHADER_OPCODE_TG4
:
94 if (ir
->shadow_compare
) {
95 msg_type
= GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_C
;
97 msg_type
= GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4
;
100 case SHADER_OPCODE_TG4_OFFSET
:
101 if (ir
->shadow_compare
) {
102 msg_type
= GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C
;
104 msg_type
= GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO
;
108 assert(!"should not get here: invalid VS texture opcode");
112 if (ir
->header_present
) {
113 MOV_RAW(retype(brw_message_reg(ir
->base_mrf
), BRW_REGISTER_TYPE_UD
),
114 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
116 default_state
.access_mode
= BRW_ALIGN_1
;
118 if (ir
->texture_offset
) {
119 /* Set the offset bits in DWord 2. */
120 MOV_RAW(retype(brw_vec1_reg(MRF
, ir
->base_mrf
, 2),
121 BRW_REGISTER_TYPE_UD
),
122 brw_imm_ud(ir
->texture_offset
));
125 if (ir
->sampler
>= 16) {
126 /* The "Sampler Index" field can only store values between 0 and 15.
127 * However, we can add an offset to the "Sampler State Pointer"
128 * field, effectively selecting a different set of 16 samplers.
130 * The "Sampler State Pointer" needs to be aligned to a 32-byte
131 * offset, and each sampler state is only 16-bytes, so we can't
132 * exclusively use the offset - we have to use both.
134 gen8_instruction
*add
=
135 ADD(get_element_ud(brw_message_reg(ir
->base_mrf
), 3),
136 get_element_ud(brw_vec8_grf(0, 0), 3),
137 brw_imm_ud(16 * (ir
->sampler
/ 16) *
138 sizeof(gen7_sampler_state
)));
139 gen8_set_mask_control(add
, BRW_MASK_DISABLE
);
142 default_state
.access_mode
= BRW_ALIGN_16
;
145 uint32_t surf_index
=
146 prog_data
->base
.binding_table
.texture_start
+ ir
->sampler
;
148 gen8_instruction
*inst
= next_inst(BRW_OPCODE_SEND
);
149 gen8_set_dst(brw
, inst
, dst
);
150 gen8_set_src0(brw
, inst
, brw_message_reg(ir
->base_mrf
));
151 gen8_set_sampler_message(brw
, inst
,
158 BRW_SAMPLER_SIMD_MODE_SIMD4X2
);
160 mark_surface_used(surf_index
);
164 gen8_vec4_generator::generate_urb_write(vec4_instruction
*ir
, bool vs
)
166 struct brw_reg header
= brw_vec8_grf(GEN7_MRF_HACK_START
+ ir
->base_mrf
, 0);
170 MOV_RAW(header
, brw_vec8_grf(0, 0));
172 gen8_instruction
*inst
;
173 if (!(ir
->urb_write_flags
& BRW_URB_WRITE_USE_CHANNEL_MASKS
)) {
174 /* Enable Channel Masks in the URB_WRITE_OWORD message header */
175 default_state
.access_mode
= BRW_ALIGN_1
;
176 MOV_RAW(brw_vec1_grf(GEN7_MRF_HACK_START
+ ir
->base_mrf
, 5),
178 default_state
.access_mode
= BRW_ALIGN_16
;
181 inst
= next_inst(BRW_OPCODE_SEND
);
182 gen8_set_urb_message(brw
, inst
, ir
->urb_write_flags
, ir
->mlen
, 0, ir
->offset
,
184 gen8_set_dst(brw
, inst
, brw_null_reg());
185 gen8_set_src0(brw
, inst
, header
);
189 gen8_vec4_generator::generate_gs_set_vertex_count(struct brw_reg eot_mrf_header
,
192 /* Move the vertex count into the second MRF for the EOT write. */
193 assert(eot_mrf_header
.file
== BRW_MESSAGE_REGISTER_FILE
);
194 int dst_nr
= GEN7_MRF_HACK_START
+ eot_mrf_header
.nr
+ 1;
195 MOV(retype(brw_vec8_grf(dst_nr
, 0), BRW_REGISTER_TYPE_UD
), src
);
199 gen8_vec4_generator::generate_gs_thread_end(vec4_instruction
*ir
)
201 struct brw_reg src
= brw_vec8_grf(GEN7_MRF_HACK_START
+ ir
->base_mrf
, 0);
202 gen8_instruction
*inst
;
204 /* Enable Channel Masks in the URB_WRITE_HWORD message header */
205 default_state
.access_mode
= BRW_ALIGN_1
;
206 inst
= OR(retype(brw_vec1_grf(GEN7_MRF_HACK_START
+ ir
->base_mrf
, 5),
207 BRW_REGISTER_TYPE_UD
),
208 retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD
),
209 brw_imm_ud(0xff00)); /* could be 0x1100 but shouldn't matter */
210 gen8_set_mask_control(inst
, BRW_MASK_DISABLE
);
211 default_state
.access_mode
= BRW_ALIGN_16
;
213 /* mlen = 2: g0 header + vertex count */
214 inst
= next_inst(BRW_OPCODE_SEND
);
215 gen8_set_urb_message(brw
, inst
, BRW_URB_WRITE_EOT
, 2, 0, 0, true);
216 gen8_set_dst(brw
, inst
, brw_null_reg());
217 gen8_set_src0(brw
, inst
, src
);
221 gen8_vec4_generator::generate_gs_set_write_offset(struct brw_reg dst
,
225 /* From p22 of volume 4 part 2 of the Ivy Bridge PRM (2.4.3.1 Message
228 * Slot 0 Offset. This field, after adding to the Global Offset field
229 * in the message descriptor, specifies the offset (in 256-bit units)
230 * from the start of the URB entry, as referenced by URB Handle 0, at
231 * which the data will be accessed.
233 * Similar text describes DWORD M0.4, which is slot 1 offset.
235 * Therefore, we want to multiply DWORDs 0 and 4 of src0 (the x components
236 * of the register for geometry shader invocations 0 and 1) by the
237 * immediate value in src1, and store the result in DWORDs 3 and 4 of dst.
239 * We can do this with the following EU instruction:
241 * mul(2) dst.3<1>UD src0<8;2,4>UD src1 { Align1 WE_all }
243 default_state
.access_mode
= BRW_ALIGN_1
;
244 gen8_instruction
*inst
=
245 MUL(suboffset(stride(dst
, 2, 2, 1), 3), stride(src0
, 8, 2, 4), src1
);
246 gen8_set_mask_control(inst
, BRW_MASK_DISABLE
);
247 default_state
.access_mode
= BRW_ALIGN_16
;
251 gen8_vec4_generator::generate_gs_set_dword_2_immed(struct brw_reg dst
,
254 assert(src
.file
== BRW_IMMEDIATE_VALUE
);
256 default_state
.access_mode
= BRW_ALIGN_1
;
258 gen8_instruction
*inst
= MOV(suboffset(vec1(dst
), 2), src
);
259 gen8_set_mask_control(inst
, BRW_MASK_DISABLE
);
261 default_state
.access_mode
= BRW_ALIGN_16
;
265 gen8_vec4_generator::generate_gs_prepare_channel_masks(struct brw_reg dst
)
267 /* We want to left shift just DWORD 4 (the x component belonging to the
268 * second geometry shader invocation) by 4 bits. So generate the
271 * shl(1) dst.4<1>UD dst.4<0,1,0>UD 4UD { align1 WE_all }
273 dst
= suboffset(vec1(dst
), 4);
274 default_state
.access_mode
= BRW_ALIGN_1
;
275 gen8_instruction
*inst
= SHL(dst
, dst
, brw_imm_ud(4));
276 gen8_set_mask_control(inst
, BRW_MASK_DISABLE
);
277 default_state
.access_mode
= BRW_ALIGN_16
;
281 gen8_vec4_generator::generate_gs_set_channel_masks(struct brw_reg dst
,
284 /* From p21 of volume 4 part 2 of the Ivy Bridge PRM (2.4.3.1 Message
287 * 15 Vertex 1 DATA [3] / Vertex 0 DATA[7] Channel Mask
289 * When Swizzle Control = URB_INTERLEAVED this bit controls Vertex 1
290 * DATA[3], when Swizzle Control = URB_NOSWIZZLE this bit controls
291 * Vertex 0 DATA[7]. This bit is ANDed with the corresponding
292 * channel enable to determine the final channel enable. For the
293 * URB_READ_OWORD & URB_READ_HWORD messages, when final channel
294 * enable is 1 it indicates that Vertex 1 DATA [3] will be included
295 * in the writeback message. For the URB_WRITE_OWORD &
296 * URB_WRITE_HWORD messages, when final channel enable is 1 it
297 * indicates that Vertex 1 DATA [3] will be written to the surface.
299 * 0: Vertex 1 DATA [3] / Vertex 0 DATA[7] channel not included
300 * 1: Vertex DATA [3] / Vertex 0 DATA[7] channel included
302 * 14 Vertex 1 DATA [2] Channel Mask
303 * 13 Vertex 1 DATA [1] Channel Mask
304 * 12 Vertex 1 DATA [0] Channel Mask
305 * 11 Vertex 0 DATA [3] Channel Mask
306 * 10 Vertex 0 DATA [2] Channel Mask
307 * 9 Vertex 0 DATA [1] Channel Mask
308 * 8 Vertex 0 DATA [0] Channel Mask
310 * (This is from a section of the PRM that is agnostic to the particular
311 * type of shader being executed, so "Vertex 0" and "Vertex 1" refer to
312 * geometry shader invocations 0 and 1, respectively). Since we have the
313 * enable flags for geometry shader invocation 0 in bits 3:0 of DWORD 0,
314 * and the enable flags for geometry shader invocation 1 in bits 7:0 of
315 * DWORD 4, we just need to OR them together and store the result in bits
318 * It's easier to get the EU to do this if we think of the src and dst
319 * registers as composed of 32 bytes each; then, we want to pick up the
320 * contents of bytes 0 and 16 from src, OR them together, and store them in
323 * We can do that by the following EU instruction:
325 * or(1) dst.21<1>UB src<0,1,0>UB src.16<0,1,0>UB { align1 WE_all }
327 * Note: this relies on the source register having zeros in (a) bits 7:4 of
328 * DWORD 0 and (b) bits 3:0 of DWORD 4. We can rely on (b) because the
329 * source register was prepared by GS_OPCODE_PREPARE_CHANNEL_MASKS (which
330 * shifts DWORD 4 left by 4 bits), and we can rely on (a) because prior to
331 * the execution of GS_OPCODE_PREPARE_CHANNEL_MASKS, DWORDs 0 and 4 need to
332 * contain valid channel mask values (which are in the range 0x0-0xf).
334 dst
= retype(dst
, BRW_REGISTER_TYPE_UB
);
335 src
= retype(src
, BRW_REGISTER_TYPE_UB
);
337 default_state
.access_mode
= BRW_ALIGN_1
;
339 gen8_instruction
*inst
=
340 OR(suboffset(vec1(dst
), 21), vec1(src
), suboffset(vec1(src
), 16));
341 gen8_set_mask_control(inst
, BRW_MASK_DISABLE
);
343 default_state
.access_mode
= BRW_ALIGN_16
;
347 gen8_vec4_generator::generate_oword_dual_block_offsets(struct brw_reg m1
,
348 struct brw_reg index
)
350 int second_vertex_offset
= 1;
352 m1
= retype(m1
, BRW_REGISTER_TYPE_D
);
354 /* Set up M1 (message payload). Only the block offsets in M1.0 and
355 * M1.4 are used, and the rest are ignored.
357 struct brw_reg m1_0
= suboffset(vec1(m1
), 0);
358 struct brw_reg m1_4
= suboffset(vec1(m1
), 4);
359 struct brw_reg index_0
= suboffset(vec1(index
), 0);
360 struct brw_reg index_4
= suboffset(vec1(index
), 4);
362 default_state
.mask_control
= BRW_MASK_DISABLE
;
363 default_state
.access_mode
= BRW_ALIGN_1
;
367 if (index
.file
== BRW_IMMEDIATE_VALUE
) {
368 index_4
.dw1
.ud
+= second_vertex_offset
;
371 ADD(m1_4
, index_4
, brw_imm_d(second_vertex_offset
));
374 default_state
.mask_control
= BRW_MASK_ENABLE
;
375 default_state
.access_mode
= BRW_ALIGN_16
;
379 gen8_vec4_generator::generate_scratch_read(vec4_instruction
*ir
,
381 struct brw_reg index
)
383 struct brw_reg header
= brw_vec8_grf(GEN7_MRF_HACK_START
+ ir
->base_mrf
, 0);
385 MOV_RAW(header
, brw_vec8_grf(0, 0));
387 generate_oword_dual_block_offsets(brw_message_reg(ir
->base_mrf
+ 1), index
);
389 /* Each of the 8 channel enables is considered for whether each
392 gen8_instruction
*send
= next_inst(BRW_OPCODE_SEND
);
393 gen8_set_dst(brw
, send
, dst
);
394 gen8_set_src0(brw
, send
, header
);
395 gen8_set_dp_message(brw
, send
, GEN7_SFID_DATAPORT_DATA_CACHE
,
396 255, /* binding table index: stateless access */
397 GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ
,
398 BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD
,
401 true, /* header present */
406 gen8_vec4_generator::generate_scratch_write(vec4_instruction
*ir
,
409 struct brw_reg index
)
411 struct brw_reg header
= brw_vec8_grf(GEN7_MRF_HACK_START
+ ir
->base_mrf
, 0);
413 MOV_RAW(header
, brw_vec8_grf(0, 0));
415 generate_oword_dual_block_offsets(brw_message_reg(ir
->base_mrf
+ 1), index
);
417 MOV(retype(brw_message_reg(ir
->base_mrf
+ 2), BRW_REGISTER_TYPE_D
),
418 retype(src
, BRW_REGISTER_TYPE_D
));
420 /* Each of the 8 channel enables is considered for whether each
423 gen8_instruction
*send
= next_inst(BRW_OPCODE_SEND
);
424 gen8_set_dst(brw
, send
, dst
);
425 gen8_set_src0(brw
, send
, header
);
426 gen8_set_pred_control(send
, ir
->predicate
);
427 gen8_set_dp_message(brw
, send
, GEN7_SFID_DATAPORT_DATA_CACHE
,
428 255, /* binding table index: stateless access */
429 GEN7_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE
,
430 BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD
,
433 true, /* header present */
438 gen8_vec4_generator::generate_pull_constant_load(vec4_instruction
*inst
,
440 struct brw_reg index
,
441 struct brw_reg offset
)
443 assert(index
.file
== BRW_IMMEDIATE_VALUE
&&
444 index
.type
== BRW_REGISTER_TYPE_UD
);
445 uint32_t surf_index
= index
.dw1
.ud
;
447 assert(offset
.file
== BRW_GENERAL_REGISTER_FILE
);
449 /* Each of the 8 channel enables is considered for whether each
452 gen8_instruction
*send
= next_inst(BRW_OPCODE_SEND
);
453 gen8_set_dst(brw
, send
, dst
);
454 gen8_set_src0(brw
, send
, offset
);
455 gen8_set_dp_message(brw
, send
, GEN7_SFID_DATAPORT_DATA_CACHE
,
457 GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ
,
458 0, /* message control */
461 false, /* no header */
464 mark_surface_used(surf_index
);
468 gen8_vec4_generator::generate_vec4_instruction(vec4_instruction
*instruction
,
472 vec4_instruction
*ir
= (vec4_instruction
*) instruction
;
474 if (dst
.width
== BRW_WIDTH_4
) {
475 /* This happens in attribute fixups for "dual instanced" geometry
476 * shaders, since they use attributes that are vec4's. Since the exec
477 * width is only 4, it's essential that the caller set
478 * force_writemask_all in order to make sure the instruction is executed
479 * regardless of which channels are enabled.
481 assert(ir
->force_writemask_all
);
483 /* Fix up any <8;8,1> or <0;4,1> source registers to <4;4,1> to satisfy
484 * the following register region restrictions (from Graphics BSpec:
485 * 3D-Media-GPGPU Engine > EU Overview > Registers and Register Regions
486 * > Register Region Restrictions)
488 * 1. ExecSize must be greater than or equal to Width.
490 * 2. If ExecSize = Width and HorzStride != 0, VertStride must be set
491 * to Width * HorzStride."
493 for (int i
= 0; i
< 3; i
++) {
494 if (src
[i
].file
== BRW_GENERAL_REGISTER_FILE
)
495 src
[i
] = stride(src
[i
], 4, 4, 1);
499 switch (ir
->opcode
) {
505 ADD(dst
, src
[0], src
[1]);
509 MUL(dst
, src
[0], src
[1]);
512 case BRW_OPCODE_MACH
:
513 MACH(dst
, src
[0], src
[1]);
517 MAD(dst
, src
[0], src
[1], src
[2]);
524 case BRW_OPCODE_RNDD
:
528 case BRW_OPCODE_RNDE
:
532 case BRW_OPCODE_RNDZ
:
537 AND(dst
, src
[0], src
[1]);
541 OR(dst
, src
[0], src
[1]);
545 XOR(dst
, src
[0], src
[1]);
553 ASR(dst
, src
[0], src
[1]);
557 SHR(dst
, src
[0], src
[1]);
561 SHL(dst
, src
[0], src
[1]);
565 CMP(dst
, ir
->conditional_mod
, src
[0], src
[1]);
569 SEL(dst
, src
[0], src
[1]);
573 DPH(dst
, src
[0], src
[1]);
577 DP4(dst
, src
[0], src
[1]);
581 DP3(dst
, src
[0], src
[1]);
585 DP2(dst
, src
[0], src
[1]);
588 case BRW_OPCODE_F32TO16
:
589 /* Emulate the Gen7 zeroing bug. */
590 MOV(retype(dst
, BRW_REGISTER_TYPE_UD
), brw_imm_ud(0u));
591 MOV(retype(dst
, BRW_REGISTER_TYPE_HF
), src
[0]);
594 case BRW_OPCODE_F16TO32
:
595 MOV(dst
, retype(src
[0], BRW_REGISTER_TYPE_HF
));
599 LRP(dst
, src
[0], src
[1], src
[2]);
602 case BRW_OPCODE_BFREV
:
603 /* BFREV only supports UD type for src and dst. */
604 BFREV(retype(dst
, BRW_REGISTER_TYPE_UD
),
605 retype(src
[0], BRW_REGISTER_TYPE_UD
));
609 /* FBH only supports UD type for dst. */
610 FBH(retype(dst
, BRW_REGISTER_TYPE_UD
), src
[0]);
614 /* FBL only supports UD type for dst. */
615 FBL(retype(dst
, BRW_REGISTER_TYPE_UD
), src
[0]);
618 case BRW_OPCODE_CBIT
:
619 /* CBIT only supports UD type for dst. */
620 CBIT(retype(dst
, BRW_REGISTER_TYPE_UD
), src
[0]);
623 case BRW_OPCODE_ADDC
:
624 ADDC(dst
, src
[0], src
[1]);
627 case BRW_OPCODE_SUBB
:
628 SUBB(dst
, src
[0], src
[1]);
632 BFE(dst
, src
[0], src
[1], src
[2]);
635 case BRW_OPCODE_BFI1
:
636 BFI1(dst
, src
[0], src
[1]);
639 case BRW_OPCODE_BFI2
:
640 BFI2(dst
, src
[0], src
[1], src
[2]);
647 case BRW_OPCODE_ELSE
:
651 case BRW_OPCODE_ENDIF
:
659 case BRW_OPCODE_BREAK
:
663 case BRW_OPCODE_CONTINUE
:
667 case BRW_OPCODE_WHILE
:
671 case SHADER_OPCODE_RCP
:
672 MATH(BRW_MATH_FUNCTION_INV
, dst
, src
[0]);
675 case SHADER_OPCODE_RSQ
:
676 MATH(BRW_MATH_FUNCTION_RSQ
, dst
, src
[0]);
679 case SHADER_OPCODE_SQRT
:
680 MATH(BRW_MATH_FUNCTION_SQRT
, dst
, src
[0]);
683 case SHADER_OPCODE_EXP2
:
684 MATH(BRW_MATH_FUNCTION_EXP
, dst
, src
[0]);
687 case SHADER_OPCODE_LOG2
:
688 MATH(BRW_MATH_FUNCTION_LOG
, dst
, src
[0]);
691 case SHADER_OPCODE_SIN
:
692 MATH(BRW_MATH_FUNCTION_SIN
, dst
, src
[0]);
695 case SHADER_OPCODE_COS
:
696 MATH(BRW_MATH_FUNCTION_COS
, dst
, src
[0]);
699 case SHADER_OPCODE_POW
:
700 MATH(BRW_MATH_FUNCTION_POW
, dst
, src
[0], src
[1]);
703 case SHADER_OPCODE_INT_QUOTIENT
:
704 MATH(BRW_MATH_FUNCTION_INT_DIV_QUOTIENT
, dst
, src
[0], src
[1]);
707 case SHADER_OPCODE_INT_REMAINDER
:
708 MATH(BRW_MATH_FUNCTION_INT_DIV_REMAINDER
, dst
, src
[0], src
[1]);
711 case SHADER_OPCODE_TEX
:
712 case SHADER_OPCODE_TXD
:
713 case SHADER_OPCODE_TXF
:
714 case SHADER_OPCODE_TXF_CMS
:
715 case SHADER_OPCODE_TXF_MCS
:
716 case SHADER_OPCODE_TXL
:
717 case SHADER_OPCODE_TXS
:
718 case SHADER_OPCODE_TG4
:
719 case SHADER_OPCODE_TG4_OFFSET
:
720 generate_tex(ir
, dst
);
723 case VS_OPCODE_URB_WRITE
:
724 generate_urb_write(ir
, true);
727 case SHADER_OPCODE_GEN4_SCRATCH_READ
:
728 generate_scratch_read(ir
, dst
, src
[0]);
731 case SHADER_OPCODE_GEN4_SCRATCH_WRITE
:
732 generate_scratch_write(ir
, dst
, src
[0], src
[1]);
735 case VS_OPCODE_PULL_CONSTANT_LOAD
:
736 case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7
:
737 generate_pull_constant_load(ir
, dst
, src
[0], src
[1]);
740 case GS_OPCODE_URB_WRITE
:
741 generate_urb_write(ir
, false);
744 case GS_OPCODE_THREAD_END
:
745 generate_gs_thread_end(ir
);
748 case GS_OPCODE_SET_WRITE_OFFSET
:
749 generate_gs_set_write_offset(dst
, src
[0], src
[1]);
752 case GS_OPCODE_SET_VERTEX_COUNT
:
753 generate_gs_set_vertex_count(dst
, src
[0]);
756 case GS_OPCODE_SET_DWORD_2_IMMED
:
757 generate_gs_set_dword_2_immed(dst
, src
[0]);
760 case GS_OPCODE_PREPARE_CHANNEL_MASKS
:
761 generate_gs_prepare_channel_masks(dst
);
764 case GS_OPCODE_SET_CHANNEL_MASKS
:
765 generate_gs_set_channel_masks(dst
, src
[0]);
768 case SHADER_OPCODE_SHADER_TIME_ADD
:
769 assert(!"XXX: Missing Gen8 vec4 support for INTEL_DEBUG=shader_time");
772 case SHADER_OPCODE_UNTYPED_ATOMIC
:
773 assert(!"XXX: Missing Gen8 vec4 support for UNTYPED_ATOMIC");
776 case SHADER_OPCODE_UNTYPED_SURFACE_READ
:
777 assert(!"XXX: Missing Gen8 vec4 support for UNTYPED_SURFACE_READ");
780 case VS_OPCODE_UNPACK_FLAGS_SIMD4X2
:
781 assert(!"VS_OPCODE_UNPACK_FLAGS_SIMD4X2 should not be used on Gen8+.");
785 if (ir
->opcode
< (int) ARRAY_SIZE(opcode_descs
)) {
786 _mesa_problem(ctx
, "Unsupported opcode in `%s' in VS\n",
787 opcode_descs
[ir
->opcode
].name
);
789 _mesa_problem(ctx
, "Unsupported opcode %d in VS", ir
->opcode
);
796 gen8_vec4_generator::generate_code(exec_list
*instructions
)
798 int last_native_inst_offset
= 0;
799 const char *last_annotation_string
= NULL
;
800 const void *last_annotation_ir
= NULL
;
802 if (unlikely(debug_flag
)) {
804 fprintf(stderr
, "Native code for %s vertex shader %d:\n",
805 shader_prog
->Label
? shader_prog
->Label
: "unnamed",
808 fprintf(stderr
, "Native code for vertex program %d:\n", prog
->Id
);
812 foreach_list(node
, instructions
) {
813 vec4_instruction
*ir
= (vec4_instruction
*) node
;
814 struct brw_reg src
[3], dst
;
816 if (unlikely(debug_flag
)) {
817 if (last_annotation_ir
!= ir
->ir
) {
818 last_annotation_ir
= ir
->ir
;
819 if (last_annotation_ir
) {
820 fprintf(stderr
, " ");
822 ((ir_instruction
*) last_annotation_ir
)->fprint(stderr
);
824 const prog_instruction
*vpi
;
825 vpi
= (const prog_instruction
*) ir
->ir
;
826 fprintf(stderr
, "%d: ", (int)(vpi
- prog
->Instructions
));
827 _mesa_fprint_instruction_opt(stderr
, vpi
, 0,
828 PROG_PRINT_DEBUG
, NULL
);
830 fprintf(stderr
, "\n");
833 if (last_annotation_string
!= ir
->annotation
) {
834 last_annotation_string
= ir
->annotation
;
835 if (last_annotation_string
)
836 fprintf(stderr
, " %s\n", last_annotation_string
);
840 for (unsigned int i
= 0; i
< 3; i
++) {
841 src
[i
] = ir
->get_src(prog_data
, i
);
845 default_state
.conditional_mod
= ir
->conditional_mod
;
846 default_state
.predicate
= ir
->predicate
;
847 default_state
.predicate_inverse
= ir
->predicate_inverse
;
848 default_state
.saturate
= ir
->saturate
;
850 const unsigned pre_emit_nr_inst
= nr_inst
;
852 generate_vec4_instruction(ir
, dst
, src
);
854 if (ir
->no_dd_clear
|| ir
->no_dd_check
) {
855 assert(nr_inst
== pre_emit_nr_inst
+ 1 ||
856 !"no_dd_check or no_dd_clear set for IR emitting more "
857 "than 1 instruction");
859 gen8_instruction
*last
= &store
[pre_emit_nr_inst
];
860 gen8_set_no_dd_clear(last
, ir
->no_dd_clear
);
861 gen8_set_no_dd_check(last
, ir
->no_dd_check
);
864 if (unlikely(debug_flag
)) {
865 disassemble(stderr
, last_native_inst_offset
, next_inst_offset
);
868 last_native_inst_offset
= next_inst_offset
;
871 if (unlikely(debug_flag
)) {
872 fprintf(stderr
, "\n");
875 patch_jump_targets();
877 /* OK, while the INTEL_DEBUG=vs above is very nice for debugging VS
878 * emit issues, it doesn't get the jump distances into the output,
879 * which is often something we want to debug. So this is here in
880 * case you're doing that.
882 if (0 && unlikely(debug_flag
)) {
883 disassemble(stderr
, 0, next_inst_offset
);
888 gen8_vec4_generator::generate_assembly(exec_list
*instructions
,
889 unsigned *assembly_size
)
891 default_state
.access_mode
= BRW_ALIGN_16
;
892 default_state
.exec_size
= BRW_EXECUTE_8
;
893 generate_code(instructions
);
894 *assembly_size
= next_inst_offset
;
895 return (const unsigned *) store
;
898 } /* namespace brw */