2 * Copyright © 2011 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
28 #include "main/macros.h"
29 #include "program/prog_print.h"
30 #include "program/prog_parameter.h"
35 gen8_vec4_generator::gen8_vec4_generator(struct brw_context
*brw
,
36 struct gl_shader_program
*shader_prog
,
37 struct gl_program
*prog
,
38 struct brw_vec4_prog_data
*prog_data
,
41 : gen8_generator(brw
, shader_prog
, prog
, mem_ctx
),
43 debug_flag(debug_flag
)
45 shader
= shader_prog
? shader_prog
->_LinkedShaders
[MESA_SHADER_VERTEX
] : NULL
;
48 gen8_vec4_generator::~gen8_vec4_generator()
53 gen8_vec4_generator::mark_surface_used(unsigned surf_index
)
55 assert(surf_index
< BRW_MAX_SURFACES
);
57 prog_data
->base
.binding_table
.size_bytes
=
58 MAX2(prog_data
->base
.binding_table
.size_bytes
, (surf_index
+ 1) * 4);
62 gen8_vec4_generator::generate_tex(vec4_instruction
*ir
, struct brw_reg dst
)
67 case SHADER_OPCODE_TEX
:
68 case SHADER_OPCODE_TXL
:
69 if (ir
->shadow_compare
) {
70 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE
;
72 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_LOD
;
75 case SHADER_OPCODE_TXD
:
76 if (ir
->shadow_compare
) {
77 msg_type
= HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE
;
79 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS
;
82 case SHADER_OPCODE_TXF
:
83 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_LD
;
85 case SHADER_OPCODE_TXF_MS
:
86 msg_type
= GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS
;
88 case SHADER_OPCODE_TXF_MCS
:
89 msg_type
= GEN7_SAMPLER_MESSAGE_SAMPLE_LD_MCS
;
91 case SHADER_OPCODE_TXS
:
92 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO
;
94 case SHADER_OPCODE_TG4
:
95 if (ir
->shadow_compare
) {
96 msg_type
= GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_C
;
98 msg_type
= GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4
;
101 case SHADER_OPCODE_TG4_OFFSET
:
102 if (ir
->shadow_compare
) {
103 msg_type
= GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C
;
105 msg_type
= GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO
;
109 assert(!"should not get here: invalid VS texture opcode");
113 if (ir
->header_present
) {
114 MOV_RAW(retype(brw_message_reg(ir
->base_mrf
), BRW_REGISTER_TYPE_UD
),
115 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
117 if (ir
->texture_offset
) {
118 /* Set the offset bits in DWord 2. */
119 default_state
.access_mode
= BRW_ALIGN_1
;
120 MOV_RAW(retype(brw_vec1_reg(MRF
, ir
->base_mrf
, 2),
121 BRW_REGISTER_TYPE_UD
),
122 brw_imm_ud(ir
->texture_offset
));
123 default_state
.access_mode
= BRW_ALIGN_16
;
127 uint32_t surf_index
=
128 prog_data
->base
.binding_table
.texture_start
+ ir
->sampler
;
130 gen8_instruction
*inst
= next_inst(BRW_OPCODE_SEND
);
131 gen8_set_dst(brw
, inst
, dst
);
132 gen8_set_src0(brw
, inst
, brw_message_reg(ir
->base_mrf
));
133 gen8_set_sampler_message(brw
, inst
,
140 BRW_SAMPLER_SIMD_MODE_SIMD4X2
);
142 mark_surface_used(surf_index
);
146 gen8_vec4_generator::generate_urb_write(vec4_instruction
*ir
, bool vs
)
148 struct brw_reg header
= brw_vec8_grf(GEN7_MRF_HACK_START
+ ir
->base_mrf
, 0);
152 MOV_RAW(header
, brw_vec8_grf(0, 0));
154 gen8_instruction
*inst
;
155 if (!(ir
->urb_write_flags
& BRW_URB_WRITE_USE_CHANNEL_MASKS
)) {
156 /* Enable Channel Masks in the URB_WRITE_OWORD message header */
157 default_state
.access_mode
= BRW_ALIGN_1
;
158 inst
= OR(retype(brw_vec1_grf(GEN7_MRF_HACK_START
+ ir
->base_mrf
, 5),
159 BRW_REGISTER_TYPE_UD
),
160 retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD
),
162 gen8_set_mask_control(inst
, BRW_MASK_DISABLE
);
163 default_state
.access_mode
= BRW_ALIGN_16
;
166 inst
= next_inst(BRW_OPCODE_SEND
);
167 gen8_set_urb_message(brw
, inst
, ir
->urb_write_flags
, ir
->mlen
, 0, ir
->offset
,
169 gen8_set_dst(brw
, inst
, brw_null_reg());
170 gen8_set_src0(brw
, inst
, header
);
174 gen8_vec4_generator::generate_gs_set_vertex_count(struct brw_reg eot_mrf_header
,
177 /* Move the vertex count into the second MRF for the EOT write. */
178 assert(eot_mrf_header
.file
== BRW_MESSAGE_REGISTER_FILE
);
179 int dst_nr
= GEN7_MRF_HACK_START
+ eot_mrf_header
.nr
+ 1;
180 MOV(retype(brw_vec8_grf(dst_nr
, 0), BRW_REGISTER_TYPE_UD
), src
);
184 gen8_vec4_generator::generate_gs_thread_end(vec4_instruction
*ir
)
186 struct brw_reg src
= brw_vec8_grf(GEN7_MRF_HACK_START
+ ir
->base_mrf
, 0);
187 gen8_instruction
*inst
;
189 /* Enable Channel Masks in the URB_WRITE_HWORD message header */
190 default_state
.access_mode
= BRW_ALIGN_1
;
191 inst
= OR(retype(brw_vec1_grf(GEN7_MRF_HACK_START
+ ir
->base_mrf
, 5),
192 BRW_REGISTER_TYPE_UD
),
193 retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD
),
194 brw_imm_ud(0xff00)); /* could be 0x1100 but shouldn't matter */
195 gen8_set_mask_control(inst
, BRW_MASK_DISABLE
);
196 default_state
.access_mode
= BRW_ALIGN_16
;
198 /* mlen = 2: g0 header + vertex count */
199 inst
= next_inst(BRW_OPCODE_SEND
);
200 gen8_set_urb_message(brw
, inst
, BRW_URB_WRITE_EOT
, 2, 0, 0, true);
201 gen8_set_dst(brw
, inst
, brw_null_reg());
202 gen8_set_src0(brw
, inst
, src
);
206 gen8_vec4_generator::generate_gs_set_write_offset(struct brw_reg dst
,
210 /* From p22 of volume 4 part 2 of the Ivy Bridge PRM (2.4.3.1 Message
213 * Slot 0 Offset. This field, after adding to the Global Offset field
214 * in the message descriptor, specifies the offset (in 256-bit units)
215 * from the start of the URB entry, as referenced by URB Handle 0, at
216 * which the data will be accessed.
218 * Similar text describes DWORD M0.4, which is slot 1 offset.
220 * Therefore, we want to multiply DWORDs 0 and 4 of src0 (the x components
221 * of the register for geometry shader invocations 0 and 1) by the
222 * immediate value in src1, and store the result in DWORDs 3 and 4 of dst.
224 * We can do this with the following EU instruction:
226 * mul(2) dst.3<1>UD src0<8;2,4>UD src1 { Align1 WE_all }
228 default_state
.access_mode
= BRW_ALIGN_1
;
229 gen8_instruction
*inst
=
230 MUL(suboffset(stride(dst
, 2, 2, 1), 3), stride(src0
, 8, 2, 4), src1
);
231 gen8_set_mask_control(inst
, BRW_MASK_DISABLE
);
232 default_state
.access_mode
= BRW_ALIGN_16
;
236 gen8_vec4_generator::generate_gs_set_dword_2_immed(struct brw_reg dst
,
239 assert(src
.file
== BRW_IMMEDIATE_VALUE
);
241 default_state
.access_mode
= BRW_ALIGN_1
;
243 gen8_instruction
*inst
= MOV(suboffset(vec1(dst
), 2), src
);
244 gen8_set_mask_control(inst
, BRW_MASK_DISABLE
);
246 default_state
.access_mode
= BRW_ALIGN_16
;
250 gen8_vec4_generator::generate_gs_prepare_channel_masks(struct brw_reg dst
)
252 /* We want to left shift just DWORD 4 (the x component belonging to the
253 * second geometry shader invocation) by 4 bits. So generate the
256 * shl(1) dst.4<1>UD dst.4<0,1,0>UD 4UD { align1 WE_all }
258 dst
= suboffset(vec1(dst
), 4);
259 default_state
.access_mode
= BRW_ALIGN_1
;
260 gen8_instruction
*inst
= SHL(dst
, dst
, brw_imm_ud(4));
261 gen8_set_mask_control(inst
, BRW_MASK_DISABLE
);
262 default_state
.access_mode
= BRW_ALIGN_16
;
266 gen8_vec4_generator::generate_gs_set_channel_masks(struct brw_reg dst
,
269 /* From p21 of volume 4 part 2 of the Ivy Bridge PRM (2.4.3.1 Message
272 * 15 Vertex 1 DATA [3] / Vertex 0 DATA[7] Channel Mask
274 * When Swizzle Control = URB_INTERLEAVED this bit controls Vertex 1
275 * DATA[3], when Swizzle Control = URB_NOSWIZZLE this bit controls
276 * Vertex 0 DATA[7]. This bit is ANDed with the corresponding
277 * channel enable to determine the final channel enable. For the
278 * URB_READ_OWORD & URB_READ_HWORD messages, when final channel
279 * enable is 1 it indicates that Vertex 1 DATA [3] will be included
280 * in the writeback message. For the URB_WRITE_OWORD &
281 * URB_WRITE_HWORD messages, when final channel enable is 1 it
282 * indicates that Vertex 1 DATA [3] will be written to the surface.
284 * 0: Vertex 1 DATA [3] / Vertex 0 DATA[7] channel not included
285 * 1: Vertex DATA [3] / Vertex 0 DATA[7] channel included
287 * 14 Vertex 1 DATA [2] Channel Mask
288 * 13 Vertex 1 DATA [1] Channel Mask
289 * 12 Vertex 1 DATA [0] Channel Mask
290 * 11 Vertex 0 DATA [3] Channel Mask
291 * 10 Vertex 0 DATA [2] Channel Mask
292 * 9 Vertex 0 DATA [1] Channel Mask
293 * 8 Vertex 0 DATA [0] Channel Mask
295 * (This is from a section of the PRM that is agnostic to the particular
296 * type of shader being executed, so "Vertex 0" and "Vertex 1" refer to
297 * geometry shader invocations 0 and 1, respectively). Since we have the
298 * enable flags for geometry shader invocation 0 in bits 3:0 of DWORD 0,
299 * and the enable flags for geometry shader invocation 1 in bits 7:0 of
300 * DWORD 4, we just need to OR them together and store the result in bits
303 * It's easier to get the EU to do this if we think of the src and dst
304 * registers as composed of 32 bytes each; then, we want to pick up the
305 * contents of bytes 0 and 16 from src, OR them together, and store them in
308 * We can do that by the following EU instruction:
310 * or(1) dst.21<1>UB src<0,1,0>UB src.16<0,1,0>UB { align1 WE_all }
312 * Note: this relies on the source register having zeros in (a) bits 7:4 of
313 * DWORD 0 and (b) bits 3:0 of DWORD 4. We can rely on (b) because the
314 * source register was prepared by GS_OPCODE_PREPARE_CHANNEL_MASKS (which
315 * shifts DWORD 4 left by 4 bits), and we can rely on (a) because prior to
316 * the execution of GS_OPCODE_PREPARE_CHANNEL_MASKS, DWORDs 0 and 4 need to
317 * contain valid channel mask values (which are in the range 0x0-0xf).
319 dst
= retype(dst
, BRW_REGISTER_TYPE_UB
);
320 src
= retype(src
, BRW_REGISTER_TYPE_UB
);
322 default_state
.access_mode
= BRW_ALIGN_1
;
324 gen8_instruction
*inst
=
325 OR(suboffset(vec1(dst
), 21), vec1(src
), suboffset(vec1(src
), 16));
326 gen8_set_mask_control(inst
, BRW_MASK_DISABLE
);
328 default_state
.access_mode
= BRW_ALIGN_16
;
332 gen8_vec4_generator::generate_oword_dual_block_offsets(struct brw_reg m1
,
333 struct brw_reg index
)
335 int second_vertex_offset
= 1;
337 m1
= retype(m1
, BRW_REGISTER_TYPE_D
);
339 /* Set up M1 (message payload). Only the block offsets in M1.0 and
340 * M1.4 are used, and the rest are ignored.
342 struct brw_reg m1_0
= suboffset(vec1(m1
), 0);
343 struct brw_reg m1_4
= suboffset(vec1(m1
), 4);
344 struct brw_reg index_0
= suboffset(vec1(index
), 0);
345 struct brw_reg index_4
= suboffset(vec1(index
), 4);
347 default_state
.mask_control
= BRW_MASK_DISABLE
;
348 default_state
.access_mode
= BRW_ALIGN_1
;
352 if (index
.file
== BRW_IMMEDIATE_VALUE
) {
353 index_4
.dw1
.ud
+= second_vertex_offset
;
356 ADD(m1_4
, index_4
, brw_imm_d(second_vertex_offset
));
359 default_state
.mask_control
= BRW_MASK_ENABLE
;
360 default_state
.access_mode
= BRW_ALIGN_16
;
364 gen8_vec4_generator::generate_scratch_read(vec4_instruction
*ir
,
366 struct brw_reg index
)
368 struct brw_reg header
= brw_vec8_grf(GEN7_MRF_HACK_START
+ ir
->base_mrf
, 0);
370 MOV_RAW(header
, brw_vec8_grf(0, 0));
372 generate_oword_dual_block_offsets(brw_message_reg(ir
->base_mrf
+ 1), index
);
374 /* Each of the 8 channel enables is considered for whether each
377 gen8_instruction
*send
= next_inst(BRW_OPCODE_SEND
);
378 gen8_set_dst(brw
, send
, dst
);
379 gen8_set_src0(brw
, send
, header
);
380 gen8_set_dp_message(brw
, send
, GEN7_SFID_DATAPORT_DATA_CACHE
,
381 255, /* binding table index: stateless access */
382 GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ
,
383 BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD
,
386 true, /* header present */
391 gen8_vec4_generator::generate_scratch_write(vec4_instruction
*ir
,
394 struct brw_reg index
)
396 struct brw_reg header
= brw_vec8_grf(GEN7_MRF_HACK_START
+ ir
->base_mrf
, 0);
398 MOV_RAW(header
, brw_vec8_grf(0, 0));
400 generate_oword_dual_block_offsets(brw_message_reg(ir
->base_mrf
+ 1), index
);
402 MOV(retype(brw_message_reg(ir
->base_mrf
+ 2), BRW_REGISTER_TYPE_D
),
403 retype(src
, BRW_REGISTER_TYPE_D
));
405 /* Each of the 8 channel enables is considered for whether each
408 gen8_instruction
*send
= next_inst(BRW_OPCODE_SEND
);
409 gen8_set_dst(brw
, send
, dst
);
410 gen8_set_src0(brw
, send
, header
);
411 gen8_set_pred_control(send
, ir
->predicate
);
412 gen8_set_dp_message(brw
, send
, GEN7_SFID_DATAPORT_DATA_CACHE
,
413 255, /* binding table index: stateless access */
414 GEN7_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE
,
415 BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD
,
418 true, /* header present */
423 gen8_vec4_generator::generate_pull_constant_load(vec4_instruction
*inst
,
425 struct brw_reg index
,
426 struct brw_reg offset
)
428 assert(index
.file
== BRW_IMMEDIATE_VALUE
&&
429 index
.type
== BRW_REGISTER_TYPE_UD
);
430 uint32_t surf_index
= index
.dw1
.ud
;
432 assert(offset
.file
== BRW_GENERAL_REGISTER_FILE
);
434 /* Each of the 8 channel enables is considered for whether each
437 gen8_instruction
*send
= next_inst(BRW_OPCODE_SEND
);
438 gen8_set_dst(brw
, send
, dst
);
439 gen8_set_src0(brw
, send
, offset
);
440 gen8_set_dp_message(brw
, send
, GEN7_SFID_DATAPORT_DATA_CACHE
,
442 GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ
,
443 0, /* message control */
446 false, /* no header */
449 mark_surface_used(surf_index
);
453 gen8_vec4_generator::generate_vec4_instruction(vec4_instruction
*instruction
,
457 vec4_instruction
*ir
= (vec4_instruction
*) instruction
;
459 if (dst
.width
== BRW_WIDTH_4
) {
460 /* This happens in attribute fixups for "dual instanced" geometry
461 * shaders, since they use attributes that are vec4's. Since the exec
462 * width is only 4, it's essential that the caller set
463 * force_writemask_all in order to make sure the instruction is executed
464 * regardless of which channels are enabled.
466 assert(ir
->force_writemask_all
);
468 /* Fix up any <8;8,1> or <0;4,1> source registers to <4;4,1> to satisfy
469 * the following register region restrictions (from Graphics BSpec:
470 * 3D-Media-GPGPU Engine > EU Overview > Registers and Register Regions
471 * > Register Region Restrictions)
473 * 1. ExecSize must be greater than or equal to Width.
475 * 2. If ExecSize = Width and HorzStride != 0, VertStride must be set
476 * to Width * HorzStride."
478 for (int i
= 0; i
< 3; i
++) {
479 if (src
[i
].file
== BRW_GENERAL_REGISTER_FILE
)
480 src
[i
] = stride(src
[i
], 4, 4, 1);
484 switch (ir
->opcode
) {
490 ADD(dst
, src
[0], src
[1]);
494 MUL(dst
, src
[0], src
[1]);
497 case BRW_OPCODE_MACH
:
498 MACH(dst
, src
[0], src
[1]);
502 MAD(dst
, src
[0], src
[1], src
[2]);
509 case BRW_OPCODE_RNDD
:
513 case BRW_OPCODE_RNDE
:
517 case BRW_OPCODE_RNDZ
:
522 AND(dst
, src
[0], src
[1]);
526 OR(dst
, src
[0], src
[1]);
530 XOR(dst
, src
[0], src
[1]);
538 ASR(dst
, src
[0], src
[1]);
542 SHR(dst
, src
[0], src
[1]);
546 SHL(dst
, src
[0], src
[1]);
550 CMP(dst
, ir
->conditional_mod
, src
[0], src
[1]);
554 SEL(dst
, src
[0], src
[1]);
558 DPH(dst
, src
[0], src
[1]);
562 DP4(dst
, src
[0], src
[1]);
566 DP3(dst
, src
[0], src
[1]);
570 DP2(dst
, src
[0], src
[1]);
573 case BRW_OPCODE_F32TO16
:
574 F32TO16(dst
, src
[0]);
577 case BRW_OPCODE_F16TO32
:
578 F16TO32(dst
, src
[0]);
582 LRP(dst
, src
[0], src
[1], src
[2]);
585 case BRW_OPCODE_BFREV
:
586 /* BFREV only supports UD type for src and dst. */
587 BFREV(retype(dst
, BRW_REGISTER_TYPE_UD
),
588 retype(src
[0], BRW_REGISTER_TYPE_UD
));
592 /* FBH only supports UD type for dst. */
593 FBH(retype(dst
, BRW_REGISTER_TYPE_UD
), src
[0]);
597 /* FBL only supports UD type for dst. */
598 FBL(retype(dst
, BRW_REGISTER_TYPE_UD
), src
[0]);
601 case BRW_OPCODE_CBIT
:
602 /* CBIT only supports UD type for dst. */
603 CBIT(retype(dst
, BRW_REGISTER_TYPE_UD
), src
[0]);
606 case BRW_OPCODE_ADDC
:
607 ADDC(dst
, src
[0], src
[1]);
610 case BRW_OPCODE_SUBB
:
611 SUBB(dst
, src
[0], src
[1]);
615 BFE(dst
, src
[0], src
[1], src
[2]);
618 case BRW_OPCODE_BFI1
:
619 BFI1(dst
, src
[0], src
[1]);
622 case BRW_OPCODE_BFI2
:
623 BFI2(dst
, src
[0], src
[1], src
[2]);
630 case BRW_OPCODE_ELSE
:
634 case BRW_OPCODE_ENDIF
:
642 case BRW_OPCODE_BREAK
:
646 case BRW_OPCODE_CONTINUE
:
650 case BRW_OPCODE_WHILE
:
654 case SHADER_OPCODE_RCP
:
655 MATH(BRW_MATH_FUNCTION_INV
, dst
, src
[0]);
658 case SHADER_OPCODE_RSQ
:
659 MATH(BRW_MATH_FUNCTION_RSQ
, dst
, src
[0]);
662 case SHADER_OPCODE_SQRT
:
663 MATH(BRW_MATH_FUNCTION_SQRT
, dst
, src
[0]);
666 case SHADER_OPCODE_EXP2
:
667 MATH(BRW_MATH_FUNCTION_EXP
, dst
, src
[0]);
670 case SHADER_OPCODE_LOG2
:
671 MATH(BRW_MATH_FUNCTION_LOG
, dst
, src
[0]);
674 case SHADER_OPCODE_SIN
:
675 MATH(BRW_MATH_FUNCTION_SIN
, dst
, src
[0]);
678 case SHADER_OPCODE_COS
:
679 MATH(BRW_MATH_FUNCTION_COS
, dst
, src
[0]);
682 case SHADER_OPCODE_POW
:
683 MATH(BRW_MATH_FUNCTION_POW
, dst
, src
[0], src
[1]);
686 case SHADER_OPCODE_INT_QUOTIENT
:
687 MATH(BRW_MATH_FUNCTION_INT_DIV_QUOTIENT
, dst
, src
[0], src
[1]);
690 case SHADER_OPCODE_INT_REMAINDER
:
691 MATH(BRW_MATH_FUNCTION_INT_DIV_REMAINDER
, dst
, src
[0], src
[1]);
694 case SHADER_OPCODE_TEX
:
695 case SHADER_OPCODE_TXD
:
696 case SHADER_OPCODE_TXF
:
697 case SHADER_OPCODE_TXF_MS
:
698 case SHADER_OPCODE_TXF_MCS
:
699 case SHADER_OPCODE_TXL
:
700 case SHADER_OPCODE_TXS
:
701 case SHADER_OPCODE_TG4
:
702 case SHADER_OPCODE_TG4_OFFSET
:
703 generate_tex(ir
, dst
);
706 case VS_OPCODE_URB_WRITE
:
707 generate_urb_write(ir
, true);
710 case SHADER_OPCODE_GEN4_SCRATCH_READ
:
711 generate_scratch_read(ir
, dst
, src
[0]);
714 case SHADER_OPCODE_GEN4_SCRATCH_WRITE
:
715 generate_scratch_write(ir
, dst
, src
[0], src
[1]);
718 case VS_OPCODE_PULL_CONSTANT_LOAD
:
719 case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7
:
720 generate_pull_constant_load(ir
, dst
, src
[0], src
[1]);
723 case GS_OPCODE_URB_WRITE
:
724 generate_urb_write(ir
, false);
727 case GS_OPCODE_THREAD_END
:
728 generate_gs_thread_end(ir
);
731 case GS_OPCODE_SET_WRITE_OFFSET
:
732 generate_gs_set_write_offset(dst
, src
[0], src
[1]);
735 case GS_OPCODE_SET_VERTEX_COUNT
:
736 generate_gs_set_vertex_count(dst
, src
[0]);
739 case GS_OPCODE_SET_DWORD_2_IMMED
:
740 generate_gs_set_dword_2_immed(dst
, src
[0]);
743 case GS_OPCODE_PREPARE_CHANNEL_MASKS
:
744 generate_gs_prepare_channel_masks(dst
);
747 case GS_OPCODE_SET_CHANNEL_MASKS
:
748 generate_gs_set_channel_masks(dst
, src
[0]);
751 case SHADER_OPCODE_SHADER_TIME_ADD
:
752 assert(!"XXX: Missing Gen8 vec4 support for INTEL_DEBUG=shader_time");
755 case SHADER_OPCODE_UNTYPED_ATOMIC
:
756 assert(!"XXX: Missing Gen8 vec4 support for UNTYPED_ATOMIC");
759 case SHADER_OPCODE_UNTYPED_SURFACE_READ
:
760 assert(!"XXX: Missing Gen8 vec4 support for UNTYPED_SURFACE_READ");
763 case VS_OPCODE_UNPACK_FLAGS_SIMD4X2
:
764 assert(!"VS_OPCODE_UNPACK_FLAGS_SIMD4X2 should not be used on Gen8+.");
768 if (ir
->opcode
< (int) ARRAY_SIZE(opcode_descs
)) {
769 _mesa_problem(ctx
, "Unsupported opcode in `%s' in VS\n",
770 opcode_descs
[ir
->opcode
].name
);
772 _mesa_problem(ctx
, "Unsupported opcode %d in VS", ir
->opcode
);
779 gen8_vec4_generator::generate_code(exec_list
*instructions
)
781 int last_native_inst_offset
= 0;
782 const char *last_annotation_string
= NULL
;
783 const void *last_annotation_ir
= NULL
;
785 if (unlikely(debug_flag
)) {
787 printf("Native code for vertex shader %d:\n", shader_prog
->Name
);
789 printf("Native code for vertex program %d:\n", prog
->Id
);
793 foreach_list(node
, instructions
) {
794 vec4_instruction
*ir
= (vec4_instruction
*) node
;
795 struct brw_reg src
[3], dst
;
797 if (unlikely(debug_flag
)) {
798 if (last_annotation_ir
!= ir
->ir
) {
799 last_annotation_ir
= ir
->ir
;
800 if (last_annotation_ir
) {
803 ((ir_instruction
*) last_annotation_ir
)->print();
805 const prog_instruction
*vpi
;
806 vpi
= (const prog_instruction
*) ir
->ir
;
807 printf("%d: ", (int)(vpi
- prog
->Instructions
));
808 _mesa_fprint_instruction_opt(stdout
, vpi
, 0,
809 PROG_PRINT_DEBUG
, NULL
);
814 if (last_annotation_string
!= ir
->annotation
) {
815 last_annotation_string
= ir
->annotation
;
816 if (last_annotation_string
)
817 printf(" %s\n", last_annotation_string
);
821 for (unsigned int i
= 0; i
< 3; i
++) {
822 src
[i
] = ir
->get_src(prog_data
, i
);
826 default_state
.conditional_mod
= ir
->conditional_mod
;
827 default_state
.predicate
= ir
->predicate
;
828 default_state
.predicate_inverse
= ir
->predicate_inverse
;
829 default_state
.saturate
= ir
->saturate
;
831 const unsigned pre_emit_nr_inst
= nr_inst
;
833 generate_vec4_instruction(ir
, dst
, src
);
835 if (ir
->no_dd_clear
|| ir
->no_dd_check
) {
836 assert(nr_inst
== pre_emit_nr_inst
+ 1 ||
837 !"no_dd_check or no_dd_clear set for IR emitting more "
838 "than 1 instruction");
840 gen8_instruction
*last
= &store
[pre_emit_nr_inst
];
841 gen8_set_no_dd_clear(last
, ir
->no_dd_clear
);
842 gen8_set_no_dd_check(last
, ir
->no_dd_check
);
845 if (unlikely(debug_flag
)) {
846 disassemble(stdout
, last_native_inst_offset
, next_inst_offset
);
849 last_native_inst_offset
= next_inst_offset
;
852 if (unlikely(debug_flag
)) {
856 patch_jump_targets();
858 /* OK, while the INTEL_DEBUG=vs above is very nice for debugging VS
859 * emit issues, it doesn't get the jump distances into the output,
860 * which is often something we want to debug. So this is here in
861 * case you're doing that.
863 if (0 && unlikely(debug_flag
)) {
864 disassemble(stdout
, 0, next_inst_offset
);
869 gen8_vec4_generator::generate_assembly(exec_list
*instructions
,
870 unsigned *assembly_size
)
872 default_state
.access_mode
= BRW_ALIGN_16
;
873 default_state
.exec_size
= BRW_EXECUTE_8
;
874 generate_code(instructions
);
875 *assembly_size
= next_inst_offset
;
876 return (const unsigned *) store
;
879 } /* namespace brw */