1 /* Copyright © 2011 Intel Corporation
3 * Permission is hereby granted, free of charge, to any person obtaining a
4 * copy of this software and associated documentation files (the "Software"),
5 * to deal in the Software without restriction, including without limitation
6 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
7 * and/or sell copies of the Software, and to permit persons to whom the
8 * Software is furnished to do so, subject to the following conditions:
10 * The above copyright notice and this permission notice (including the next
11 * paragraph) shall be included in all copies or substantial portions of the
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 #include "glsl/ir_print_visitor.h"
28 #include "main/macros.h"
29 #include "program/prog_print.h"
30 #include "program/prog_parameter.h"
38 vec4_visitor::setup_attributes(int payload_reg
)
41 int attribute_map
[VERT_ATTRIB_MAX
+ 1];
44 for (int i
= 0; i
< VERT_ATTRIB_MAX
; i
++) {
45 if (prog_data
->inputs_read
& BITFIELD64_BIT(i
)) {
46 attribute_map
[i
] = payload_reg
+ nr_attributes
;
51 /* VertexID is stored by the VF as the last vertex element, but we
52 * don't represent it with a flag in inputs_read, so we call it
55 if (prog_data
->uses_vertexid
) {
56 attribute_map
[VERT_ATTRIB_MAX
] = payload_reg
+ nr_attributes
;
60 foreach_list(node
, &this->instructions
) {
61 vec4_instruction
*inst
= (vec4_instruction
*)node
;
63 /* We have to support ATTR as a destination for GL_FIXED fixup. */
64 if (inst
->dst
.file
== ATTR
) {
65 int grf
= attribute_map
[inst
->dst
.reg
+ inst
->dst
.reg_offset
];
67 struct brw_reg reg
= brw_vec8_grf(grf
, 0);
68 reg
.type
= inst
->dst
.type
;
69 reg
.dw1
.bits
.writemask
= inst
->dst
.writemask
;
71 inst
->dst
.file
= HW_REG
;
72 inst
->dst
.fixed_hw_reg
= reg
;
75 for (int i
= 0; i
< 3; i
++) {
76 if (inst
->src
[i
].file
!= ATTR
)
79 int grf
= attribute_map
[inst
->src
[i
].reg
+ inst
->src
[i
].reg_offset
];
81 struct brw_reg reg
= brw_vec8_grf(grf
, 0);
82 reg
.dw1
.bits
.swizzle
= inst
->src
[i
].swizzle
;
83 reg
.type
= inst
->src
[i
].type
;
86 if (inst
->src
[i
].negate
)
89 inst
->src
[i
].file
= HW_REG
;
90 inst
->src
[i
].fixed_hw_reg
= reg
;
94 /* The BSpec says we always have to read at least one thing from
95 * the VF, and it appears that the hardware wedges otherwise.
97 if (nr_attributes
== 0)
100 prog_data
->urb_read_length
= (nr_attributes
+ 1) / 2;
102 unsigned vue_entries
= MAX2(nr_attributes
, c
->prog_data
.vue_map
.num_slots
);
105 c
->prog_data
.urb_entry_size
= ALIGN(vue_entries
, 8) / 8;
107 c
->prog_data
.urb_entry_size
= ALIGN(vue_entries
, 4) / 4;
109 return payload_reg
+ nr_attributes
;
113 vec4_visitor::setup_uniforms(int reg
)
115 /* The pre-gen6 VS requires that some push constants get loaded no
116 * matter what, or the GPU would hang.
118 if (intel
->gen
< 6 && this->uniforms
== 0) {
119 this->uniform_vector_size
[this->uniforms
] = 1;
121 for (unsigned int i
= 0; i
< 4; i
++) {
122 unsigned int slot
= this->uniforms
* 4 + i
;
123 static float zero
= 0.0;
124 c
->prog_data
.param
[slot
] = &zero
;
130 reg
+= ALIGN(uniforms
, 2) / 2;
133 c
->prog_data
.nr_params
= this->uniforms
* 4;
135 c
->prog_data
.curb_read_length
= reg
- 1;
141 vec4_visitor::setup_payload(void)
145 /* The payload always contains important data in g0, which contains
146 * the URB handles that are passed on to the URB write at the end
147 * of the thread. So, we always start push constants at g1.
151 reg
= setup_uniforms(reg
);
153 reg
= setup_attributes(reg
);
155 this->first_non_payload_grf
= reg
;
159 vec4_instruction::get_dst(void)
161 struct brw_reg brw_reg
;
165 brw_reg
= brw_vec8_grf(dst
.reg
+ dst
.reg_offset
, 0);
166 brw_reg
= retype(brw_reg
, dst
.type
);
167 brw_reg
.dw1
.bits
.writemask
= dst
.writemask
;
171 brw_reg
= brw_message_reg(dst
.reg
+ dst
.reg_offset
);
172 brw_reg
= retype(brw_reg
, dst
.type
);
173 brw_reg
.dw1
.bits
.writemask
= dst
.writemask
;
177 brw_reg
= dst
.fixed_hw_reg
;
181 brw_reg
= brw_null_reg();
185 assert(!"not reached");
186 brw_reg
= brw_null_reg();
193 vec4_instruction::get_src(int i
)
195 struct brw_reg brw_reg
;
197 switch (src
[i
].file
) {
199 brw_reg
= brw_vec8_grf(src
[i
].reg
+ src
[i
].reg_offset
, 0);
200 brw_reg
= retype(brw_reg
, src
[i
].type
);
201 brw_reg
.dw1
.bits
.swizzle
= src
[i
].swizzle
;
203 brw_reg
= brw_abs(brw_reg
);
205 brw_reg
= negate(brw_reg
);
209 switch (src
[i
].type
) {
210 case BRW_REGISTER_TYPE_F
:
211 brw_reg
= brw_imm_f(src
[i
].imm
.f
);
213 case BRW_REGISTER_TYPE_D
:
214 brw_reg
= brw_imm_d(src
[i
].imm
.i
);
216 case BRW_REGISTER_TYPE_UD
:
217 brw_reg
= brw_imm_ud(src
[i
].imm
.u
);
220 assert(!"not reached");
221 brw_reg
= brw_null_reg();
227 brw_reg
= stride(brw_vec4_grf(1 + (src
[i
].reg
+ src
[i
].reg_offset
) / 2,
228 ((src
[i
].reg
+ src
[i
].reg_offset
) % 2) * 4),
230 brw_reg
= retype(brw_reg
, src
[i
].type
);
231 brw_reg
.dw1
.bits
.swizzle
= src
[i
].swizzle
;
233 brw_reg
= brw_abs(brw_reg
);
235 brw_reg
= negate(brw_reg
);
237 /* This should have been moved to pull constants. */
238 assert(!src
[i
].reladdr
);
242 brw_reg
= src
[i
].fixed_hw_reg
;
246 /* Probably unused. */
247 brw_reg
= brw_null_reg();
251 assert(!"not reached");
252 brw_reg
= brw_null_reg();
260 vec4_visitor::generate_math1_gen4(vec4_instruction
*inst
,
266 brw_math_function(inst
->opcode
),
269 BRW_MATH_DATA_VECTOR
,
270 BRW_MATH_PRECISION_FULL
);
274 check_gen6_math_src_arg(struct brw_reg src
)
276 /* Source swizzles are ignored. */
279 assert(src
.dw1
.bits
.swizzle
== BRW_SWIZZLE_XYZW
);
283 vec4_visitor::generate_math1_gen6(vec4_instruction
*inst
,
287 /* Can't do writemask because math can't be align16. */
288 assert(dst
.dw1
.bits
.writemask
== WRITEMASK_XYZW
);
289 check_gen6_math_src_arg(src
);
291 brw_set_access_mode(p
, BRW_ALIGN_1
);
294 brw_math_function(inst
->opcode
),
297 BRW_MATH_DATA_SCALAR
,
298 BRW_MATH_PRECISION_FULL
);
299 brw_set_access_mode(p
, BRW_ALIGN_16
);
303 vec4_visitor::generate_math2_gen7(vec4_instruction
*inst
,
310 brw_math_function(inst
->opcode
),
315 vec4_visitor::generate_math2_gen6(vec4_instruction
*inst
,
320 /* Can't do writemask because math can't be align16. */
321 assert(dst
.dw1
.bits
.writemask
== WRITEMASK_XYZW
);
322 /* Source swizzles are ignored. */
323 check_gen6_math_src_arg(src0
);
324 check_gen6_math_src_arg(src1
);
326 brw_set_access_mode(p
, BRW_ALIGN_1
);
329 brw_math_function(inst
->opcode
),
331 brw_set_access_mode(p
, BRW_ALIGN_16
);
335 vec4_visitor::generate_math2_gen4(vec4_instruction
*inst
,
340 /* From the Ironlake PRM, Volume 4, Part 1, Section 6.1.13
343 * "Operand0[7]. For the INT DIV functions, this operand is the
346 * "Operand1[7]. For the INT DIV functions, this operand is the
349 bool is_int_div
= inst
->opcode
!= SHADER_OPCODE_POW
;
350 struct brw_reg
&op0
= is_int_div
? src1
: src0
;
351 struct brw_reg
&op1
= is_int_div
? src0
: src1
;
353 brw_push_insn_state(p
);
354 brw_set_saturate(p
, false);
355 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
356 brw_MOV(p
, retype(brw_message_reg(inst
->base_mrf
+ 1), op1
.type
), op1
);
357 brw_pop_insn_state(p
);
361 brw_math_function(inst
->opcode
),
364 BRW_MATH_DATA_VECTOR
,
365 BRW_MATH_PRECISION_FULL
);
369 vec4_visitor::generate_tex(vec4_instruction
*inst
,
375 if (intel
->gen
>= 5) {
376 switch (inst
->opcode
) {
377 case SHADER_OPCODE_TEX
:
378 case SHADER_OPCODE_TXL
:
379 if (inst
->shadow_compare
) {
380 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE
;
382 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_LOD
;
385 case SHADER_OPCODE_TXD
:
386 /* There is no sample_d_c message; comparisons are done manually. */
387 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS
;
389 case SHADER_OPCODE_TXF
:
390 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_LD
;
392 case SHADER_OPCODE_TXS
:
393 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO
;
396 assert(!"should not get here: invalid VS texture opcode");
400 switch (inst
->opcode
) {
401 case SHADER_OPCODE_TEX
:
402 case SHADER_OPCODE_TXL
:
403 if (inst
->shadow_compare
) {
404 msg_type
= BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD_COMPARE
;
405 assert(inst
->mlen
== 3);
407 msg_type
= BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD
;
408 assert(inst
->mlen
== 2);
411 case SHADER_OPCODE_TXD
:
412 /* There is no sample_d_c message; comparisons are done manually. */
413 msg_type
= BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS
;
414 assert(inst
->mlen
== 4);
416 case SHADER_OPCODE_TXF
:
417 msg_type
= BRW_SAMPLER_MESSAGE_SIMD4X2_LD
;
418 assert(inst
->mlen
== 2);
420 case SHADER_OPCODE_TXS
:
421 msg_type
= BRW_SAMPLER_MESSAGE_SIMD4X2_RESINFO
;
422 assert(inst
->mlen
== 2);
425 assert(!"should not get here: invalid VS texture opcode");
430 assert(msg_type
!= -1);
432 /* Load the message header if present. If there's a texture offset, we need
433 * to set it up explicitly and load the offset bitfield. Otherwise, we can
434 * use an implied move from g0 to the first message register.
436 if (inst
->texture_offset
) {
437 /* Explicitly set up the message header by copying g0 to the MRF. */
438 brw_MOV(p
, retype(brw_message_reg(inst
->base_mrf
), BRW_REGISTER_TYPE_UD
),
439 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
441 /* Then set the offset bits in DWord 2. */
442 brw_set_access_mode(p
, BRW_ALIGN_1
);
444 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
, inst
->base_mrf
, 2),
445 BRW_REGISTER_TYPE_UD
),
446 brw_imm_uw(inst
->texture_offset
));
447 brw_set_access_mode(p
, BRW_ALIGN_16
);
448 } else if (inst
->header_present
) {
449 /* Set up an implied move from g0 to the MRF. */
450 src
= brw_vec8_grf(0, 0);
453 uint32_t return_format
;
456 case BRW_REGISTER_TYPE_D
:
457 return_format
= BRW_SAMPLER_RETURN_FORMAT_SINT32
;
459 case BRW_REGISTER_TYPE_UD
:
460 return_format
= BRW_SAMPLER_RETURN_FORMAT_UINT32
;
463 return_format
= BRW_SAMPLER_RETURN_FORMAT_FLOAT32
;
471 SURF_INDEX_VS_TEXTURE(inst
->sampler
),
475 1, /* response length */
477 inst
->header_present
,
478 BRW_SAMPLER_SIMD_MODE_SIMD4X2
,
483 vec4_visitor::generate_urb_write(vec4_instruction
*inst
)
486 brw_null_reg(), /* dest */
487 inst
->base_mrf
, /* starting mrf reg nr */
488 brw_vec8_grf(0, 0), /* src */
489 false, /* allocate */
492 0, /* response len */
494 inst
->eot
, /* writes complete */
495 inst
->offset
, /* urb destination offset */
496 BRW_URB_SWIZZLE_INTERLEAVE
);
500 vec4_visitor::generate_oword_dual_block_offsets(struct brw_reg m1
,
501 struct brw_reg index
)
503 int second_vertex_offset
;
506 second_vertex_offset
= 1;
508 second_vertex_offset
= 16;
510 m1
= retype(m1
, BRW_REGISTER_TYPE_D
);
512 /* Set up M1 (message payload). Only the block offsets in M1.0 and
513 * M1.4 are used, and the rest are ignored.
515 struct brw_reg m1_0
= suboffset(vec1(m1
), 0);
516 struct brw_reg m1_4
= suboffset(vec1(m1
), 4);
517 struct brw_reg index_0
= suboffset(vec1(index
), 0);
518 struct brw_reg index_4
= suboffset(vec1(index
), 4);
520 brw_push_insn_state(p
);
521 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
522 brw_set_access_mode(p
, BRW_ALIGN_1
);
524 brw_MOV(p
, m1_0
, index_0
);
526 if (index
.file
== BRW_IMMEDIATE_VALUE
) {
527 index_4
.dw1
.ud
+= second_vertex_offset
;
528 brw_MOV(p
, m1_4
, index_4
);
530 brw_ADD(p
, m1_4
, index_4
, brw_imm_d(second_vertex_offset
));
533 brw_pop_insn_state(p
);
537 vec4_visitor::generate_scratch_read(vec4_instruction
*inst
,
539 struct brw_reg index
)
541 struct brw_reg header
= brw_vec8_grf(0, 0);
543 gen6_resolve_implied_move(p
, &header
, inst
->base_mrf
);
545 generate_oword_dual_block_offsets(brw_message_reg(inst
->base_mrf
+ 1),
551 msg_type
= GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ
;
552 else if (intel
->gen
== 5 || intel
->is_g4x
)
553 msg_type
= G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ
;
555 msg_type
= BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ
;
557 /* Each of the 8 channel enables is considered for whether each
560 struct brw_instruction
*send
= brw_next_insn(p
, BRW_OPCODE_SEND
);
561 brw_set_dest(p
, send
, dst
);
562 brw_set_src0(p
, send
, header
);
564 send
->header
.destreg__conditionalmod
= inst
->base_mrf
;
565 brw_set_dp_read_message(p
, send
,
566 255, /* binding table index: stateless access */
567 BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD
,
569 BRW_DATAPORT_READ_TARGET_RENDER_CACHE
,
575 vec4_visitor::generate_scratch_write(vec4_instruction
*inst
,
578 struct brw_reg index
)
580 struct brw_reg header
= brw_vec8_grf(0, 0);
583 /* If the instruction is predicated, we'll predicate the send, not
586 brw_set_predicate_control(p
, false);
588 gen6_resolve_implied_move(p
, &header
, inst
->base_mrf
);
590 generate_oword_dual_block_offsets(brw_message_reg(inst
->base_mrf
+ 1),
594 retype(brw_message_reg(inst
->base_mrf
+ 2), BRW_REGISTER_TYPE_D
),
595 retype(src
, BRW_REGISTER_TYPE_D
));
600 msg_type
= GEN7_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE
;
601 else if (intel
->gen
== 6)
602 msg_type
= GEN6_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE
;
604 msg_type
= BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE
;
606 brw_set_predicate_control(p
, inst
->predicate
);
608 /* Pre-gen6, we have to specify write commits to ensure ordering
609 * between reads and writes within a thread. Afterwards, that's
610 * guaranteed and write commits only matter for inter-thread
613 if (intel
->gen
>= 6) {
614 write_commit
= false;
616 /* The visitor set up our destination register to be g0. This
617 * means that when the next read comes along, we will end up
618 * reading from g0 and causing a block on the write commit. For
619 * write-after-read, we are relying on the value of the previous
620 * read being used (and thus blocking on completion) before our
621 * write is executed. This means we have to be careful in
622 * instruction scheduling to not violate this assumption.
627 /* Each of the 8 channel enables is considered for whether each
630 struct brw_instruction
*send
= brw_next_insn(p
, BRW_OPCODE_SEND
);
631 brw_set_dest(p
, send
, dst
);
632 brw_set_src0(p
, send
, header
);
634 send
->header
.destreg__conditionalmod
= inst
->base_mrf
;
635 brw_set_dp_write_message(p
, send
,
636 255, /* binding table index: stateless access */
637 BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD
,
640 true, /* header present */
641 false, /* not a render target write */
642 write_commit
, /* rlen */
648 vec4_visitor::generate_pull_constant_load(vec4_instruction
*inst
,
650 struct brw_reg index
,
651 struct brw_reg offset
)
653 assert(index
.file
== BRW_IMMEDIATE_VALUE
&&
654 index
.type
== BRW_REGISTER_TYPE_UD
);
655 uint32_t surf_index
= index
.dw1
.ud
;
657 if (intel
->gen
== 7) {
658 gen6_resolve_implied_move(p
, &offset
, inst
->base_mrf
);
659 brw_instruction
*insn
= brw_next_insn(p
, BRW_OPCODE_SEND
);
660 brw_set_dest(p
, insn
, dst
);
661 brw_set_src0(p
, insn
, offset
);
662 brw_set_sampler_message(p
, insn
,
664 0, /* LD message ignores sampler unit */
665 GEN5_SAMPLER_MESSAGE_SAMPLE_LD
,
668 false, /* no header */
669 BRW_SAMPLER_SIMD_MODE_SIMD4X2
,
674 struct brw_reg header
= brw_vec8_grf(0, 0);
676 gen6_resolve_implied_move(p
, &header
, inst
->base_mrf
);
678 brw_MOV(p
, retype(brw_message_reg(inst
->base_mrf
+ 1), BRW_REGISTER_TYPE_D
),
684 msg_type
= GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ
;
685 else if (intel
->gen
== 5 || intel
->is_g4x
)
686 msg_type
= G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ
;
688 msg_type
= BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ
;
690 /* Each of the 8 channel enables is considered for whether each
693 struct brw_instruction
*send
= brw_next_insn(p
, BRW_OPCODE_SEND
);
694 brw_set_dest(p
, send
, dst
);
695 brw_set_src0(p
, send
, header
);
697 send
->header
.destreg__conditionalmod
= inst
->base_mrf
;
698 brw_set_dp_read_message(p
, send
,
700 BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD
,
702 BRW_DATAPORT_READ_TARGET_DATA_CACHE
,
708 vec4_visitor::generate_vs_instruction(vec4_instruction
*instruction
,
712 vec4_instruction
*inst
= (vec4_instruction
*)instruction
;
714 switch (inst
->opcode
) {
715 case SHADER_OPCODE_RCP
:
716 case SHADER_OPCODE_RSQ
:
717 case SHADER_OPCODE_SQRT
:
718 case SHADER_OPCODE_EXP2
:
719 case SHADER_OPCODE_LOG2
:
720 case SHADER_OPCODE_SIN
:
721 case SHADER_OPCODE_COS
:
722 if (intel
->gen
== 6) {
723 generate_math1_gen6(inst
, dst
, src
[0]);
725 /* Also works for Gen7. */
726 generate_math1_gen4(inst
, dst
, src
[0]);
730 case SHADER_OPCODE_POW
:
731 case SHADER_OPCODE_INT_QUOTIENT
:
732 case SHADER_OPCODE_INT_REMAINDER
:
733 if (intel
->gen
>= 7) {
734 generate_math2_gen7(inst
, dst
, src
[0], src
[1]);
735 } else if (intel
->gen
== 6) {
736 generate_math2_gen6(inst
, dst
, src
[0], src
[1]);
738 generate_math2_gen4(inst
, dst
, src
[0], src
[1]);
742 case SHADER_OPCODE_TEX
:
743 case SHADER_OPCODE_TXD
:
744 case SHADER_OPCODE_TXF
:
745 case SHADER_OPCODE_TXL
:
746 case SHADER_OPCODE_TXS
:
747 generate_tex(inst
, dst
, src
[0]);
750 case VS_OPCODE_URB_WRITE
:
751 generate_urb_write(inst
);
754 case VS_OPCODE_SCRATCH_READ
:
755 generate_scratch_read(inst
, dst
, src
[0]);
758 case VS_OPCODE_SCRATCH_WRITE
:
759 generate_scratch_write(inst
, dst
, src
[0], src
[1]);
762 case VS_OPCODE_PULL_CONSTANT_LOAD
:
763 generate_pull_constant_load(inst
, dst
, src
[0], src
[1]);
767 if (inst
->opcode
< (int) ARRAY_SIZE(opcode_descs
)) {
768 fail("unsupported opcode in `%s' in VS\n",
769 opcode_descs
[inst
->opcode
].name
);
771 fail("Unsupported opcode %d in VS", inst
->opcode
);
779 /* Generate VS IR for main(). (the visitor only descends into
780 * functions called "main").
783 visit_instructions(shader
->ir
);
785 emit_vertex_program_code();
788 if (c
->key
.userclip_active
&& !c
->key
.uses_clip_distance
)
789 setup_uniform_clipplane_values();
793 /* Before any optimization, push array accesses out to scratch
794 * space where we need them to be. This pass may allocate new
795 * virtual GRFs, so we want to do it early. It also makes sure
796 * that we have reladdr computations available for CSE, since we'll
797 * often do repeated subexpressions for those.
800 move_grf_array_access_to_scratch();
801 move_uniform_array_access_to_pull_constants();
803 /* The ARB_vertex_program frontend emits pull constant loads directly
804 * rather than using reladdr, so we don't need to walk through all the
805 * instructions looking for things to move. There isn't anything.
807 * We do still need to split things to vec4 size.
809 split_uniform_registers();
811 pack_uniform_registers();
812 move_push_constants_to_pull_constants();
813 split_virtual_grfs();
818 progress
= dead_code_eliminate() || progress
;
819 progress
= opt_copy_propagation() || progress
;
820 progress
= opt_algebraic() || progress
;
821 progress
= opt_compute_to_mrf() || progress
;
831 /* Debug of register spilling: Go spill everything. */
832 const int grf_count
= virtual_grf_count
;
833 float spill_costs
[virtual_grf_count
];
834 bool no_spill
[virtual_grf_count
];
835 evaluate_spill_costs(spill_costs
, no_spill
);
836 for (int i
= 0; i
< grf_count
; i
++) {
843 while (!reg_allocate()) {
851 brw_set_access_mode(p
, BRW_ALIGN_16
);
859 vec4_visitor::generate_code()
861 int last_native_insn_offset
= 0;
862 const char *last_annotation_string
= NULL
;
863 const void *last_annotation_ir
= NULL
;
865 if (unlikely(INTEL_DEBUG
& DEBUG_VS
)) {
867 printf("Native code for vertex shader %d:\n", prog
->Name
);
869 printf("Native code for vertex program %d:\n", c
->vp
->program
.Base
.Id
);
873 foreach_list(node
, &this->instructions
) {
874 vec4_instruction
*inst
= (vec4_instruction
*)node
;
875 struct brw_reg src
[3], dst
;
877 if (unlikely(INTEL_DEBUG
& DEBUG_VS
)) {
878 if (last_annotation_ir
!= inst
->ir
) {
879 last_annotation_ir
= inst
->ir
;
880 if (last_annotation_ir
) {
883 ((ir_instruction
*) last_annotation_ir
)->print();
885 const prog_instruction
*vpi
;
886 vpi
= (const prog_instruction
*) inst
->ir
;
887 printf("%d: ", (int)(vpi
- vp
->Base
.Instructions
));
888 _mesa_fprint_instruction_opt(stdout
, vpi
, 0,
889 PROG_PRINT_DEBUG
, NULL
);
894 if (last_annotation_string
!= inst
->annotation
) {
895 last_annotation_string
= inst
->annotation
;
896 if (last_annotation_string
)
897 printf(" %s\n", last_annotation_string
);
901 for (unsigned int i
= 0; i
< 3; i
++) {
902 src
[i
] = inst
->get_src(i
);
904 dst
= inst
->get_dst();
906 brw_set_conditionalmod(p
, inst
->conditional_mod
);
907 brw_set_predicate_control(p
, inst
->predicate
);
908 brw_set_predicate_inverse(p
, inst
->predicate_inverse
);
909 brw_set_saturate(p
, inst
->saturate
);
911 switch (inst
->opcode
) {
913 brw_MOV(p
, dst
, src
[0]);
916 brw_ADD(p
, dst
, src
[0], src
[1]);
919 brw_MUL(p
, dst
, src
[0], src
[1]);
921 case BRW_OPCODE_MACH
:
922 brw_set_acc_write_control(p
, 1);
923 brw_MACH(p
, dst
, src
[0], src
[1]);
924 brw_set_acc_write_control(p
, 0);
928 brw_FRC(p
, dst
, src
[0]);
930 case BRW_OPCODE_RNDD
:
931 brw_RNDD(p
, dst
, src
[0]);
933 case BRW_OPCODE_RNDE
:
934 brw_RNDE(p
, dst
, src
[0]);
936 case BRW_OPCODE_RNDZ
:
937 brw_RNDZ(p
, dst
, src
[0]);
941 brw_AND(p
, dst
, src
[0], src
[1]);
944 brw_OR(p
, dst
, src
[0], src
[1]);
947 brw_XOR(p
, dst
, src
[0], src
[1]);
950 brw_NOT(p
, dst
, src
[0]);
953 brw_ASR(p
, dst
, src
[0], src
[1]);
956 brw_SHR(p
, dst
, src
[0], src
[1]);
959 brw_SHL(p
, dst
, src
[0], src
[1]);
963 brw_CMP(p
, dst
, inst
->conditional_mod
, src
[0], src
[1]);
966 brw_SEL(p
, dst
, src
[0], src
[1]);
970 brw_DPH(p
, dst
, src
[0], src
[1]);
974 brw_DP4(p
, dst
, src
[0], src
[1]);
978 brw_DP3(p
, dst
, src
[0], src
[1]);
982 brw_DP2(p
, dst
, src
[0], src
[1]);
986 if (inst
->src
[0].file
!= BAD_FILE
) {
987 /* The instruction has an embedded compare (only allowed on gen6) */
988 assert(intel
->gen
== 6);
989 gen6_IF(p
, inst
->conditional_mod
, src
[0], src
[1]);
991 struct brw_instruction
*brw_inst
= brw_IF(p
, BRW_EXECUTE_8
);
992 brw_inst
->header
.predicate_control
= inst
->predicate
;
996 case BRW_OPCODE_ELSE
:
999 case BRW_OPCODE_ENDIF
:
1004 brw_DO(p
, BRW_EXECUTE_8
);
1007 case BRW_OPCODE_BREAK
:
1009 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1011 case BRW_OPCODE_CONTINUE
:
1012 /* FINISHME: We need to write the loop instruction support still. */
1013 if (intel
->gen
>= 6)
1017 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1020 case BRW_OPCODE_WHILE
:
1025 generate_vs_instruction(inst
, dst
, src
);
1029 if (unlikely(INTEL_DEBUG
& DEBUG_VS
)) {
1030 brw_dump_compile(p
, stdout
,
1031 last_native_insn_offset
, p
->next_insn_offset
);
1034 last_native_insn_offset
= p
->next_insn_offset
;
1037 if (unlikely(INTEL_DEBUG
& DEBUG_VS
)) {
1043 /* OK, while the INTEL_DEBUG=vs above is very nice for debugging VS
1044 * emit issues, it doesn't get the jump distances into the output,
1045 * which is often something we want to debug. So this is here in
1046 * case you're doing that.
1048 if (0 && unlikely(INTEL_DEBUG
& DEBUG_VS
)) {
1049 brw_dump_compile(p
, stdout
, 0, p
->next_insn_offset
);
1056 brw_vs_emit(struct gl_shader_program
*prog
, struct brw_vs_compile
*c
)
1058 struct brw_context
*brw
= c
->func
.brw
;
1059 struct intel_context
*intel
= &c
->func
.brw
->intel
;
1060 bool start_busy
= false;
1061 float start_time
= 0;
1063 if (unlikely(INTEL_DEBUG
& DEBUG_PERF
)) {
1064 start_busy
= (intel
->batch
.last_bo
&&
1065 drm_intel_bo_busy(intel
->batch
.last_bo
));
1066 start_time
= get_time();
1069 struct brw_shader
*shader
= NULL
;
1071 shader
= (brw_shader
*) prog
->_LinkedShaders
[MESA_SHADER_VERTEX
];
1073 if (unlikely(INTEL_DEBUG
& DEBUG_VS
)) {
1075 printf("GLSL IR for native vertex shader %d:\n", prog
->Name
);
1076 _mesa_print_ir(shader
->ir
, NULL
);
1079 printf("ARB_vertex_program %d for native vertex shader\n",
1080 c
->vp
->program
.Base
.Id
);
1081 _mesa_print_program(&c
->vp
->program
.Base
);
1085 if (unlikely(INTEL_DEBUG
& DEBUG_PERF
) && shader
) {
1086 if (shader
->compiled_once
) {
1087 brw_vs_debug_recompile(brw
, prog
, &c
->key
);
1089 if (start_busy
&& !drm_intel_bo_busy(intel
->batch
.last_bo
)) {
1090 perf_debug("VS compile took %.03f ms and stalled the GPU\n",
1091 (get_time() - start_time
) * 1000);
1093 shader
->compiled_once
= true;
1096 vec4_visitor
v(c
, prog
, shader
);
1098 prog
->LinkStatus
= false;
1099 ralloc_strcat(&prog
->InfoLog
, v
.fail_msg
);
1108 } /* namespace brw */