1 /* Copyright © 2011 Intel Corporation
3 * Permission is hereby granted, free of charge, to any person obtaining a
4 * copy of this software and associated documentation files (the "Software"),
5 * to deal in the Software without restriction, including without limitation
6 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
7 * and/or sell copies of the Software, and to permit persons to whom the
8 * Software is furnished to do so, subject to the following conditions:
10 * The above copyright notice and this permission notice (including the next
11 * paragraph) shall be included in all copies or substantial portions of the
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 #include "glsl/ir_print_visitor.h"
28 #include "main/macros.h"
29 #include "program/prog_print.h"
30 #include "program/prog_parameter.h"
38 vec4_visitor::setup_attributes(int payload_reg
)
41 int attribute_map
[VERT_ATTRIB_MAX
+ 1];
44 for (int i
= 0; i
< VERT_ATTRIB_MAX
; i
++) {
45 if (prog_data
->inputs_read
& BITFIELD64_BIT(i
)) {
46 attribute_map
[i
] = payload_reg
+ nr_attributes
;
51 /* VertexID is stored by the VF as the last vertex element, but we
52 * don't represent it with a flag in inputs_read, so we call it
55 if (prog_data
->uses_vertexid
) {
56 attribute_map
[VERT_ATTRIB_MAX
] = payload_reg
+ nr_attributes
;
60 foreach_list(node
, &this->instructions
) {
61 vec4_instruction
*inst
= (vec4_instruction
*)node
;
63 /* We have to support ATTR as a destination for GL_FIXED fixup. */
64 if (inst
->dst
.file
== ATTR
) {
65 int grf
= attribute_map
[inst
->dst
.reg
+ inst
->dst
.reg_offset
];
67 struct brw_reg reg
= brw_vec8_grf(grf
, 0);
68 reg
.dw1
.bits
.writemask
= inst
->dst
.writemask
;
70 inst
->dst
.file
= HW_REG
;
71 inst
->dst
.fixed_hw_reg
= reg
;
74 for (int i
= 0; i
< 3; i
++) {
75 if (inst
->src
[i
].file
!= ATTR
)
78 int grf
= attribute_map
[inst
->src
[i
].reg
+ inst
->src
[i
].reg_offset
];
80 struct brw_reg reg
= brw_vec8_grf(grf
, 0);
81 reg
.dw1
.bits
.swizzle
= inst
->src
[i
].swizzle
;
82 reg
.type
= inst
->src
[i
].type
;
85 if (inst
->src
[i
].negate
)
88 inst
->src
[i
].file
= HW_REG
;
89 inst
->src
[i
].fixed_hw_reg
= reg
;
93 /* The BSpec says we always have to read at least one thing from
94 * the VF, and it appears that the hardware wedges otherwise.
96 if (nr_attributes
== 0)
99 prog_data
->urb_read_length
= (nr_attributes
+ 1) / 2;
101 unsigned vue_entries
= MAX2(nr_attributes
, c
->prog_data
.vue_map
.num_slots
);
104 c
->prog_data
.urb_entry_size
= ALIGN(vue_entries
, 8) / 8;
106 c
->prog_data
.urb_entry_size
= ALIGN(vue_entries
, 4) / 4;
108 return payload_reg
+ nr_attributes
;
112 vec4_visitor::setup_uniforms(int reg
)
114 /* The pre-gen6 VS requires that some push constants get loaded no
115 * matter what, or the GPU would hang.
117 if (intel
->gen
< 6 && this->uniforms
== 0) {
118 this->uniform_vector_size
[this->uniforms
] = 1;
120 for (unsigned int i
= 0; i
< 4; i
++) {
121 unsigned int slot
= this->uniforms
* 4 + i
;
122 static float zero
= 0.0;
123 c
->prog_data
.param
[slot
] = &zero
;
129 reg
+= ALIGN(uniforms
, 2) / 2;
132 c
->prog_data
.nr_params
= this->uniforms
* 4;
134 c
->prog_data
.curb_read_length
= reg
- 1;
140 vec4_visitor::setup_payload(void)
144 /* The payload always contains important data in g0, which contains
145 * the URB handles that are passed on to the URB write at the end
146 * of the thread. So, we always start push constants at g1.
150 reg
= setup_uniforms(reg
);
152 reg
= setup_attributes(reg
);
154 this->first_non_payload_grf
= reg
;
158 vec4_instruction::get_dst(void)
160 struct brw_reg brw_reg
;
164 brw_reg
= brw_vec8_grf(dst
.reg
+ dst
.reg_offset
, 0);
165 brw_reg
= retype(brw_reg
, dst
.type
);
166 brw_reg
.dw1
.bits
.writemask
= dst
.writemask
;
170 brw_reg
= brw_message_reg(dst
.reg
+ dst
.reg_offset
);
171 brw_reg
= retype(brw_reg
, dst
.type
);
172 brw_reg
.dw1
.bits
.writemask
= dst
.writemask
;
176 brw_reg
= dst
.fixed_hw_reg
;
180 brw_reg
= brw_null_reg();
184 assert(!"not reached");
185 brw_reg
= brw_null_reg();
192 vec4_instruction::get_src(int i
)
194 struct brw_reg brw_reg
;
196 switch (src
[i
].file
) {
198 brw_reg
= brw_vec8_grf(src
[i
].reg
+ src
[i
].reg_offset
, 0);
199 brw_reg
= retype(brw_reg
, src
[i
].type
);
200 brw_reg
.dw1
.bits
.swizzle
= src
[i
].swizzle
;
202 brw_reg
= brw_abs(brw_reg
);
204 brw_reg
= negate(brw_reg
);
208 switch (src
[i
].type
) {
209 case BRW_REGISTER_TYPE_F
:
210 brw_reg
= brw_imm_f(src
[i
].imm
.f
);
212 case BRW_REGISTER_TYPE_D
:
213 brw_reg
= brw_imm_d(src
[i
].imm
.i
);
215 case BRW_REGISTER_TYPE_UD
:
216 brw_reg
= brw_imm_ud(src
[i
].imm
.u
);
219 assert(!"not reached");
220 brw_reg
= brw_null_reg();
226 brw_reg
= stride(brw_vec4_grf(1 + (src
[i
].reg
+ src
[i
].reg_offset
) / 2,
227 ((src
[i
].reg
+ src
[i
].reg_offset
) % 2) * 4),
229 brw_reg
= retype(brw_reg
, src
[i
].type
);
230 brw_reg
.dw1
.bits
.swizzle
= src
[i
].swizzle
;
232 brw_reg
= brw_abs(brw_reg
);
234 brw_reg
= negate(brw_reg
);
236 /* This should have been moved to pull constants. */
237 assert(!src
[i
].reladdr
);
241 brw_reg
= src
[i
].fixed_hw_reg
;
245 /* Probably unused. */
246 brw_reg
= brw_null_reg();
250 assert(!"not reached");
251 brw_reg
= brw_null_reg();
259 vec4_visitor::generate_math1_gen4(vec4_instruction
*inst
,
265 brw_math_function(inst
->opcode
),
268 BRW_MATH_DATA_VECTOR
,
269 BRW_MATH_PRECISION_FULL
);
273 check_gen6_math_src_arg(struct brw_reg src
)
275 /* Source swizzles are ignored. */
278 assert(src
.dw1
.bits
.swizzle
== BRW_SWIZZLE_XYZW
);
282 vec4_visitor::generate_math1_gen6(vec4_instruction
*inst
,
286 /* Can't do writemask because math can't be align16. */
287 assert(dst
.dw1
.bits
.writemask
== WRITEMASK_XYZW
);
288 check_gen6_math_src_arg(src
);
290 brw_set_access_mode(p
, BRW_ALIGN_1
);
293 brw_math_function(inst
->opcode
),
296 BRW_MATH_DATA_SCALAR
,
297 BRW_MATH_PRECISION_FULL
);
298 brw_set_access_mode(p
, BRW_ALIGN_16
);
302 vec4_visitor::generate_math2_gen7(vec4_instruction
*inst
,
309 brw_math_function(inst
->opcode
),
314 vec4_visitor::generate_math2_gen6(vec4_instruction
*inst
,
319 /* Can't do writemask because math can't be align16. */
320 assert(dst
.dw1
.bits
.writemask
== WRITEMASK_XYZW
);
321 /* Source swizzles are ignored. */
322 check_gen6_math_src_arg(src0
);
323 check_gen6_math_src_arg(src1
);
325 brw_set_access_mode(p
, BRW_ALIGN_1
);
328 brw_math_function(inst
->opcode
),
330 brw_set_access_mode(p
, BRW_ALIGN_16
);
334 vec4_visitor::generate_math2_gen4(vec4_instruction
*inst
,
339 /* From the Ironlake PRM, Volume 4, Part 1, Section 6.1.13
342 * "Operand0[7]. For the INT DIV functions, this operand is the
345 * "Operand1[7]. For the INT DIV functions, this operand is the
348 bool is_int_div
= inst
->opcode
!= SHADER_OPCODE_POW
;
349 struct brw_reg
&op0
= is_int_div
? src1
: src0
;
350 struct brw_reg
&op1
= is_int_div
? src0
: src1
;
352 brw_push_insn_state(p
);
353 brw_set_saturate(p
, false);
354 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
355 brw_MOV(p
, retype(brw_message_reg(inst
->base_mrf
+ 1), op1
.type
), op1
);
356 brw_pop_insn_state(p
);
360 brw_math_function(inst
->opcode
),
363 BRW_MATH_DATA_VECTOR
,
364 BRW_MATH_PRECISION_FULL
);
368 vec4_visitor::generate_tex(vec4_instruction
*inst
,
374 if (intel
->gen
>= 5) {
375 switch (inst
->opcode
) {
376 case SHADER_OPCODE_TEX
:
377 case SHADER_OPCODE_TXL
:
378 if (inst
->shadow_compare
) {
379 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE
;
381 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_LOD
;
384 case SHADER_OPCODE_TXD
:
385 /* There is no sample_d_c message; comparisons are done manually. */
386 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS
;
388 case SHADER_OPCODE_TXF
:
389 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_LD
;
391 case SHADER_OPCODE_TXS
:
392 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO
;
395 assert(!"should not get here: invalid VS texture opcode");
399 switch (inst
->opcode
) {
400 case SHADER_OPCODE_TEX
:
401 case SHADER_OPCODE_TXL
:
402 if (inst
->shadow_compare
) {
403 msg_type
= BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD_COMPARE
;
404 assert(inst
->mlen
== 3);
406 msg_type
= BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD
;
407 assert(inst
->mlen
== 2);
410 case SHADER_OPCODE_TXD
:
411 /* There is no sample_d_c message; comparisons are done manually. */
412 msg_type
= BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS
;
413 assert(inst
->mlen
== 4);
415 case SHADER_OPCODE_TXF
:
416 msg_type
= BRW_SAMPLER_MESSAGE_SIMD4X2_LD
;
417 assert(inst
->mlen
== 2);
419 case SHADER_OPCODE_TXS
:
420 msg_type
= BRW_SAMPLER_MESSAGE_SIMD4X2_RESINFO
;
421 assert(inst
->mlen
== 2);
424 assert(!"should not get here: invalid VS texture opcode");
429 assert(msg_type
!= -1);
431 /* Load the message header if present. If there's a texture offset, we need
432 * to set it up explicitly and load the offset bitfield. Otherwise, we can
433 * use an implied move from g0 to the first message register.
435 if (inst
->texture_offset
) {
436 /* Explicitly set up the message header by copying g0 to the MRF. */
437 brw_MOV(p
, retype(brw_message_reg(inst
->base_mrf
), BRW_REGISTER_TYPE_UD
),
438 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
440 /* Then set the offset bits in DWord 2. */
441 brw_set_access_mode(p
, BRW_ALIGN_1
);
443 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
, inst
->base_mrf
, 2),
444 BRW_REGISTER_TYPE_UD
),
445 brw_imm_uw(inst
->texture_offset
));
446 brw_set_access_mode(p
, BRW_ALIGN_16
);
447 } else if (inst
->header_present
) {
448 /* Set up an implied move from g0 to the MRF. */
449 src
= brw_vec8_grf(0, 0);
452 uint32_t return_format
;
455 case BRW_REGISTER_TYPE_D
:
456 return_format
= BRW_SAMPLER_RETURN_FORMAT_SINT32
;
458 case BRW_REGISTER_TYPE_UD
:
459 return_format
= BRW_SAMPLER_RETURN_FORMAT_UINT32
;
462 return_format
= BRW_SAMPLER_RETURN_FORMAT_FLOAT32
;
470 SURF_INDEX_VS_TEXTURE(inst
->sampler
),
474 1, /* response length */
476 inst
->header_present
,
477 BRW_SAMPLER_SIMD_MODE_SIMD4X2
,
482 vec4_visitor::generate_urb_write(vec4_instruction
*inst
)
485 brw_null_reg(), /* dest */
486 inst
->base_mrf
, /* starting mrf reg nr */
487 brw_vec8_grf(0, 0), /* src */
488 false, /* allocate */
491 0, /* response len */
493 inst
->eot
, /* writes complete */
494 inst
->offset
, /* urb destination offset */
495 BRW_URB_SWIZZLE_INTERLEAVE
);
499 vec4_visitor::generate_oword_dual_block_offsets(struct brw_reg m1
,
500 struct brw_reg index
)
502 int second_vertex_offset
;
505 second_vertex_offset
= 1;
507 second_vertex_offset
= 16;
509 m1
= retype(m1
, BRW_REGISTER_TYPE_D
);
511 /* Set up M1 (message payload). Only the block offsets in M1.0 and
512 * M1.4 are used, and the rest are ignored.
514 struct brw_reg m1_0
= suboffset(vec1(m1
), 0);
515 struct brw_reg m1_4
= suboffset(vec1(m1
), 4);
516 struct brw_reg index_0
= suboffset(vec1(index
), 0);
517 struct brw_reg index_4
= suboffset(vec1(index
), 4);
519 brw_push_insn_state(p
);
520 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
521 brw_set_access_mode(p
, BRW_ALIGN_1
);
523 brw_MOV(p
, m1_0
, index_0
);
525 if (index
.file
== BRW_IMMEDIATE_VALUE
) {
526 index_4
.dw1
.ud
+= second_vertex_offset
;
527 brw_MOV(p
, m1_4
, index_4
);
529 brw_ADD(p
, m1_4
, index_4
, brw_imm_d(second_vertex_offset
));
532 brw_pop_insn_state(p
);
536 vec4_visitor::generate_scratch_read(vec4_instruction
*inst
,
538 struct brw_reg index
)
540 struct brw_reg header
= brw_vec8_grf(0, 0);
542 gen6_resolve_implied_move(p
, &header
, inst
->base_mrf
);
544 generate_oword_dual_block_offsets(brw_message_reg(inst
->base_mrf
+ 1),
550 msg_type
= GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ
;
551 else if (intel
->gen
== 5 || intel
->is_g4x
)
552 msg_type
= G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ
;
554 msg_type
= BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ
;
556 /* Each of the 8 channel enables is considered for whether each
559 struct brw_instruction
*send
= brw_next_insn(p
, BRW_OPCODE_SEND
);
560 brw_set_dest(p
, send
, dst
);
561 brw_set_src0(p
, send
, header
);
563 send
->header
.destreg__conditionalmod
= inst
->base_mrf
;
564 brw_set_dp_read_message(p
, send
,
565 255, /* binding table index: stateless access */
566 BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD
,
568 BRW_DATAPORT_READ_TARGET_RENDER_CACHE
,
574 vec4_visitor::generate_scratch_write(vec4_instruction
*inst
,
577 struct brw_reg index
)
579 struct brw_reg header
= brw_vec8_grf(0, 0);
582 /* If the instruction is predicated, we'll predicate the send, not
585 brw_set_predicate_control(p
, false);
587 gen6_resolve_implied_move(p
, &header
, inst
->base_mrf
);
589 generate_oword_dual_block_offsets(brw_message_reg(inst
->base_mrf
+ 1),
593 retype(brw_message_reg(inst
->base_mrf
+ 2), BRW_REGISTER_TYPE_D
),
594 retype(src
, BRW_REGISTER_TYPE_D
));
599 msg_type
= GEN7_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE
;
600 else if (intel
->gen
== 6)
601 msg_type
= GEN6_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE
;
603 msg_type
= BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE
;
605 brw_set_predicate_control(p
, inst
->predicate
);
607 /* Pre-gen6, we have to specify write commits to ensure ordering
608 * between reads and writes within a thread. Afterwards, that's
609 * guaranteed and write commits only matter for inter-thread
612 if (intel
->gen
>= 6) {
613 write_commit
= false;
615 /* The visitor set up our destination register to be g0. This
616 * means that when the next read comes along, we will end up
617 * reading from g0 and causing a block on the write commit. For
618 * write-after-read, we are relying on the value of the previous
619 * read being used (and thus blocking on completion) before our
620 * write is executed. This means we have to be careful in
621 * instruction scheduling to not violate this assumption.
626 /* Each of the 8 channel enables is considered for whether each
629 struct brw_instruction
*send
= brw_next_insn(p
, BRW_OPCODE_SEND
);
630 brw_set_dest(p
, send
, dst
);
631 brw_set_src0(p
, send
, header
);
633 send
->header
.destreg__conditionalmod
= inst
->base_mrf
;
634 brw_set_dp_write_message(p
, send
,
635 255, /* binding table index: stateless access */
636 BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD
,
639 true, /* header present */
640 false, /* not a render target write */
641 write_commit
, /* rlen */
647 vec4_visitor::generate_pull_constant_load(vec4_instruction
*inst
,
649 struct brw_reg index
,
650 struct brw_reg offset
)
652 assert(index
.file
== BRW_IMMEDIATE_VALUE
&&
653 index
.type
== BRW_REGISTER_TYPE_UD
);
654 uint32_t surf_index
= index
.dw1
.ud
;
656 if (intel
->gen
== 7) {
657 gen6_resolve_implied_move(p
, &offset
, inst
->base_mrf
);
658 brw_instruction
*insn
= brw_next_insn(p
, BRW_OPCODE_SEND
);
659 brw_set_dest(p
, insn
, dst
);
660 brw_set_src0(p
, insn
, offset
);
661 brw_set_sampler_message(p
, insn
,
663 0, /* LD message ignores sampler unit */
664 GEN5_SAMPLER_MESSAGE_SAMPLE_LD
,
667 false, /* no header */
668 BRW_SAMPLER_SIMD_MODE_SIMD4X2
,
673 struct brw_reg header
= brw_vec8_grf(0, 0);
675 gen6_resolve_implied_move(p
, &header
, inst
->base_mrf
);
677 brw_MOV(p
, retype(brw_message_reg(inst
->base_mrf
+ 1), BRW_REGISTER_TYPE_D
),
683 msg_type
= GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ
;
684 else if (intel
->gen
== 5 || intel
->is_g4x
)
685 msg_type
= G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ
;
687 msg_type
= BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ
;
689 /* Each of the 8 channel enables is considered for whether each
692 struct brw_instruction
*send
= brw_next_insn(p
, BRW_OPCODE_SEND
);
693 brw_set_dest(p
, send
, dst
);
694 brw_set_src0(p
, send
, header
);
696 send
->header
.destreg__conditionalmod
= inst
->base_mrf
;
697 brw_set_dp_read_message(p
, send
,
699 BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD
,
701 BRW_DATAPORT_READ_TARGET_DATA_CACHE
,
707 vec4_visitor::generate_vs_instruction(vec4_instruction
*instruction
,
711 vec4_instruction
*inst
= (vec4_instruction
*)instruction
;
713 switch (inst
->opcode
) {
714 case SHADER_OPCODE_RCP
:
715 case SHADER_OPCODE_RSQ
:
716 case SHADER_OPCODE_SQRT
:
717 case SHADER_OPCODE_EXP2
:
718 case SHADER_OPCODE_LOG2
:
719 case SHADER_OPCODE_SIN
:
720 case SHADER_OPCODE_COS
:
721 if (intel
->gen
== 6) {
722 generate_math1_gen6(inst
, dst
, src
[0]);
724 /* Also works for Gen7. */
725 generate_math1_gen4(inst
, dst
, src
[0]);
729 case SHADER_OPCODE_POW
:
730 case SHADER_OPCODE_INT_QUOTIENT
:
731 case SHADER_OPCODE_INT_REMAINDER
:
732 if (intel
->gen
>= 7) {
733 generate_math2_gen7(inst
, dst
, src
[0], src
[1]);
734 } else if (intel
->gen
== 6) {
735 generate_math2_gen6(inst
, dst
, src
[0], src
[1]);
737 generate_math2_gen4(inst
, dst
, src
[0], src
[1]);
741 case SHADER_OPCODE_TEX
:
742 case SHADER_OPCODE_TXD
:
743 case SHADER_OPCODE_TXF
:
744 case SHADER_OPCODE_TXL
:
745 case SHADER_OPCODE_TXS
:
746 generate_tex(inst
, dst
, src
[0]);
749 case VS_OPCODE_URB_WRITE
:
750 generate_urb_write(inst
);
753 case VS_OPCODE_SCRATCH_READ
:
754 generate_scratch_read(inst
, dst
, src
[0]);
757 case VS_OPCODE_SCRATCH_WRITE
:
758 generate_scratch_write(inst
, dst
, src
[0], src
[1]);
761 case VS_OPCODE_PULL_CONSTANT_LOAD
:
762 generate_pull_constant_load(inst
, dst
, src
[0], src
[1]);
766 if (inst
->opcode
< (int)ARRAY_SIZE(brw_opcodes
)) {
767 fail("unsupported opcode in `%s' in VS\n",
768 brw_opcodes
[inst
->opcode
].name
);
770 fail("Unsupported opcode %d in VS", inst
->opcode
);
778 /* Generate VS IR for main(). (the visitor only descends into
779 * functions called "main").
782 visit_instructions(shader
->ir
);
784 emit_vertex_program_code();
787 if (c
->key
.userclip_active
&& !c
->key
.uses_clip_distance
)
788 setup_uniform_clipplane_values();
792 /* Before any optimization, push array accesses out to scratch
793 * space where we need them to be. This pass may allocate new
794 * virtual GRFs, so we want to do it early. It also makes sure
795 * that we have reladdr computations available for CSE, since we'll
796 * often do repeated subexpressions for those.
799 move_grf_array_access_to_scratch();
800 move_uniform_array_access_to_pull_constants();
802 /* The ARB_vertex_program frontend emits pull constant loads directly
803 * rather than using reladdr, so we don't need to walk through all the
804 * instructions looking for things to move. There isn't anything.
806 * We do still need to split things to vec4 size.
808 split_uniform_registers();
810 pack_uniform_registers();
811 move_push_constants_to_pull_constants();
812 split_virtual_grfs();
817 progress
= dead_code_eliminate() || progress
;
818 progress
= opt_copy_propagation() || progress
;
819 progress
= opt_algebraic() || progress
;
820 progress
= opt_compute_to_mrf() || progress
;
830 /* Debug of register spilling: Go spill everything. */
831 const int grf_count
= virtual_grf_count
;
832 float spill_costs
[virtual_grf_count
];
833 bool no_spill
[virtual_grf_count
];
834 evaluate_spill_costs(spill_costs
, no_spill
);
835 for (int i
= 0; i
< grf_count
; i
++) {
842 while (!reg_allocate()) {
850 brw_set_access_mode(p
, BRW_ALIGN_16
);
858 vec4_visitor::generate_code()
860 int last_native_insn_offset
= 0;
861 const char *last_annotation_string
= NULL
;
862 const void *last_annotation_ir
= NULL
;
864 if (unlikely(INTEL_DEBUG
& DEBUG_VS
)) {
866 printf("Native code for vertex shader %d:\n", prog
->Name
);
868 printf("Native code for vertex program %d:\n", c
->vp
->program
.Base
.Id
);
872 foreach_list(node
, &this->instructions
) {
873 vec4_instruction
*inst
= (vec4_instruction
*)node
;
874 struct brw_reg src
[3], dst
;
876 if (unlikely(INTEL_DEBUG
& DEBUG_VS
)) {
877 if (last_annotation_ir
!= inst
->ir
) {
878 last_annotation_ir
= inst
->ir
;
879 if (last_annotation_ir
) {
882 ((ir_instruction
*) last_annotation_ir
)->print();
884 const prog_instruction
*vpi
;
885 vpi
= (const prog_instruction
*) inst
->ir
;
886 printf("%d: ", (int)(vpi
- vp
->Base
.Instructions
));
887 _mesa_fprint_instruction_opt(stdout
, vpi
, 0,
888 PROG_PRINT_DEBUG
, NULL
);
893 if (last_annotation_string
!= inst
->annotation
) {
894 last_annotation_string
= inst
->annotation
;
895 if (last_annotation_string
)
896 printf(" %s\n", last_annotation_string
);
900 for (unsigned int i
= 0; i
< 3; i
++) {
901 src
[i
] = inst
->get_src(i
);
903 dst
= inst
->get_dst();
905 brw_set_conditionalmod(p
, inst
->conditional_mod
);
906 brw_set_predicate_control(p
, inst
->predicate
);
907 brw_set_predicate_inverse(p
, inst
->predicate_inverse
);
908 brw_set_saturate(p
, inst
->saturate
);
910 switch (inst
->opcode
) {
912 brw_MOV(p
, dst
, src
[0]);
915 brw_ADD(p
, dst
, src
[0], src
[1]);
918 brw_MUL(p
, dst
, src
[0], src
[1]);
920 case BRW_OPCODE_MACH
:
921 brw_set_acc_write_control(p
, 1);
922 brw_MACH(p
, dst
, src
[0], src
[1]);
923 brw_set_acc_write_control(p
, 0);
927 brw_FRC(p
, dst
, src
[0]);
929 case BRW_OPCODE_RNDD
:
930 brw_RNDD(p
, dst
, src
[0]);
932 case BRW_OPCODE_RNDE
:
933 brw_RNDE(p
, dst
, src
[0]);
935 case BRW_OPCODE_RNDZ
:
936 brw_RNDZ(p
, dst
, src
[0]);
940 brw_AND(p
, dst
, src
[0], src
[1]);
943 brw_OR(p
, dst
, src
[0], src
[1]);
946 brw_XOR(p
, dst
, src
[0], src
[1]);
949 brw_NOT(p
, dst
, src
[0]);
952 brw_ASR(p
, dst
, src
[0], src
[1]);
955 brw_SHR(p
, dst
, src
[0], src
[1]);
958 brw_SHL(p
, dst
, src
[0], src
[1]);
962 brw_CMP(p
, dst
, inst
->conditional_mod
, src
[0], src
[1]);
965 brw_SEL(p
, dst
, src
[0], src
[1]);
969 brw_DPH(p
, dst
, src
[0], src
[1]);
973 brw_DP4(p
, dst
, src
[0], src
[1]);
977 brw_DP3(p
, dst
, src
[0], src
[1]);
981 brw_DP2(p
, dst
, src
[0], src
[1]);
985 if (inst
->src
[0].file
!= BAD_FILE
) {
986 /* The instruction has an embedded compare (only allowed on gen6) */
987 assert(intel
->gen
== 6);
988 gen6_IF(p
, inst
->conditional_mod
, src
[0], src
[1]);
990 struct brw_instruction
*brw_inst
= brw_IF(p
, BRW_EXECUTE_8
);
991 brw_inst
->header
.predicate_control
= inst
->predicate
;
995 case BRW_OPCODE_ELSE
:
998 case BRW_OPCODE_ENDIF
:
1003 brw_DO(p
, BRW_EXECUTE_8
);
1006 case BRW_OPCODE_BREAK
:
1008 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1010 case BRW_OPCODE_CONTINUE
:
1011 /* FINISHME: We need to write the loop instruction support still. */
1012 if (intel
->gen
>= 6)
1016 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1019 case BRW_OPCODE_WHILE
:
1024 generate_vs_instruction(inst
, dst
, src
);
1028 if (unlikely(INTEL_DEBUG
& DEBUG_VS
)) {
1029 brw_dump_compile(p
, stdout
,
1030 last_native_insn_offset
, p
->next_insn_offset
);
1033 last_native_insn_offset
= p
->next_insn_offset
;
1036 if (unlikely(INTEL_DEBUG
& DEBUG_VS
)) {
1042 /* OK, while the INTEL_DEBUG=vs above is very nice for debugging VS
1043 * emit issues, it doesn't get the jump distances into the output,
1044 * which is often something we want to debug. So this is here in
1045 * case you're doing that.
1047 if (0 && unlikely(INTEL_DEBUG
& DEBUG_VS
)) {
1048 brw_dump_compile(p
, stdout
, 0, p
->next_insn_offset
);
1055 brw_vs_emit(struct gl_shader_program
*prog
, struct brw_vs_compile
*c
)
1057 struct brw_context
*brw
= c
->func
.brw
;
1058 struct intel_context
*intel
= &c
->func
.brw
->intel
;
1059 bool start_busy
= false;
1060 float start_time
= 0;
1062 if (unlikely(INTEL_DEBUG
& DEBUG_PERF
)) {
1063 start_busy
= (intel
->batch
.last_bo
&&
1064 drm_intel_bo_busy(intel
->batch
.last_bo
));
1065 start_time
= get_time();
1068 struct brw_shader
*shader
= NULL
;
1070 shader
= (brw_shader
*) prog
->_LinkedShaders
[MESA_SHADER_VERTEX
];
1072 if (unlikely(INTEL_DEBUG
& DEBUG_VS
)) {
1074 printf("GLSL IR for native vertex shader %d:\n", prog
->Name
);
1075 _mesa_print_ir(shader
->ir
, NULL
);
1078 printf("ARB_vertex_program %d for native vertex shader\n",
1079 c
->vp
->program
.Base
.Id
);
1080 _mesa_print_program(&c
->vp
->program
.Base
);
1084 if (unlikely(INTEL_DEBUG
& DEBUG_PERF
) && shader
) {
1085 if (shader
->compiled_once
) {
1086 brw_vs_debug_recompile(brw
, prog
, &c
->key
);
1088 if (start_busy
&& !drm_intel_bo_busy(intel
->batch
.last_bo
)) {
1089 perf_debug("VS compile took %.03f ms and stalled the GPU\n",
1090 (get_time() - start_time
) * 1000);
1092 shader
->compiled_once
= true;
1095 vec4_visitor
v(c
, prog
, shader
);
1097 prog
->LinkStatus
= false;
1098 ralloc_strcat(&prog
->InfoLog
, v
.fail_msg
);
1107 } /* namespace brw */