2 * Copyright © 2011 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 #include "../glsl/ir_print_visitor.h"
36 vec4_visitor::setup_attributes(int payload_reg
)
39 int attribute_map
[VERT_ATTRIB_MAX
];
42 for (int i
= 0; i
< VERT_ATTRIB_MAX
; i
++) {
43 if (prog_data
->inputs_read
& BITFIELD64_BIT(i
)) {
44 attribute_map
[i
] = payload_reg
+ nr_attributes
;
49 foreach_iter(exec_list_iterator
, iter
, this->instructions
) {
50 vec4_instruction
*inst
= (vec4_instruction
*)iter
.get();
52 for (int i
= 0; i
< 3; i
++) {
53 if (inst
->src
[i
].file
!= ATTR
)
56 inst
->src
[i
].file
= HW_REG
;
57 inst
->src
[i
].fixed_hw_reg
= brw_vec8_grf(attribute_map
[inst
->src
[i
].reg
], 0);
58 inst
->src
[i
].fixed_hw_reg
.dw1
.bits
.swizzle
= inst
->src
[i
].swizzle
;
62 /* The BSpec says we always have to read at least one thing from
63 * the VF, and it appears that the hardware wedges otherwise.
65 if (nr_attributes
== 0)
68 prog_data
->urb_read_length
= (nr_attributes
+ 1) / 2;
70 return payload_reg
+ nr_attributes
;
74 vec4_visitor::setup_uniforms(int reg
)
76 /* User clip planes from curbe:
78 if (c
->key
.nr_userclip
) {
79 if (intel
->gen
>= 6) {
80 for (int i
= 0; i
< c
->key
.nr_userclip
; i
++) {
81 c
->userplane
[i
] = stride(brw_vec4_grf(reg
+ i
/ 2,
82 (i
% 2) * 4), 0, 4, 1);
84 reg
+= ALIGN(c
->key
.nr_userclip
, 2) / 2;
86 for (int i
= 0; i
< c
->key
.nr_userclip
; i
++) {
87 c
->userplane
[i
] = stride(brw_vec4_grf(reg
+ (6 + i
) / 2,
88 (i
% 2) * 4), 0, 4, 1);
90 reg
+= (ALIGN(6 + c
->key
.nr_userclip
, 4) / 4) * 2;
94 /* The pre-gen6 VS requires that some push constants get loaded no
95 * matter what, or the GPU would hang.
97 if (intel
->gen
< 6 && this->uniforms
== 0) {
98 this->uniform_size
[this->uniforms
] = 1;
100 for (unsigned int i
= 0; i
< 4; i
++) {
101 unsigned int slot
= this->uniforms
* 4 + i
;
103 c
->prog_data
.param
[slot
] = NULL
;
104 c
->prog_data
.param_convert
[slot
] = PARAM_CONVERT_ZERO
;
110 reg
+= ALIGN(uniforms
, 2) / 2;
113 /* for now, we are not doing any elimination of unused slots, nor
114 * are we packing our uniforms.
116 c
->prog_data
.nr_params
= this->uniforms
* 4;
118 c
->prog_data
.curb_read_length
= reg
- 1;
119 c
->prog_data
.uses_new_param_layout
= true;
125 vec4_visitor::setup_payload(void)
129 /* The payload always contains important data in g0, which contains
130 * the URB handles that are passed on to the URB write at the end
131 * of the thread. So, we always start push constants at g1.
135 reg
= setup_uniforms(reg
);
137 reg
= setup_attributes(reg
);
139 this->first_non_payload_grf
= reg
;
143 vec4_instruction::get_dst(void)
145 struct brw_reg brw_reg
;
149 brw_reg
= brw_vec8_grf(dst
.reg
+ dst
.reg_offset
, 0);
150 brw_reg
= retype(brw_reg
, dst
.type
);
151 brw_reg
.dw1
.bits
.writemask
= dst
.writemask
;
155 brw_reg
= dst
.fixed_hw_reg
;
159 brw_reg
= brw_null_reg();
163 assert(!"not reached");
164 brw_reg
= brw_null_reg();
171 vec4_instruction::get_src(int i
)
173 struct brw_reg brw_reg
;
175 switch (src
[i
].file
) {
177 brw_reg
= brw_vec8_grf(src
[i
].reg
+ src
[i
].reg_offset
, 0);
178 brw_reg
= retype(brw_reg
, src
[i
].type
);
179 brw_reg
.dw1
.bits
.swizzle
= src
[i
].swizzle
;
181 brw_reg
= brw_abs(brw_reg
);
183 brw_reg
= negate(brw_reg
);
187 switch (src
[i
].type
) {
188 case BRW_REGISTER_TYPE_F
:
189 brw_reg
= brw_imm_f(src
[i
].imm
.f
);
191 case BRW_REGISTER_TYPE_D
:
192 brw_reg
= brw_imm_d(src
[i
].imm
.i
);
194 case BRW_REGISTER_TYPE_UD
:
195 brw_reg
= brw_imm_ud(src
[i
].imm
.u
);
198 assert(!"not reached");
199 brw_reg
= brw_null_reg();
205 brw_reg
= stride(brw_vec4_grf(1 + (src
[i
].reg
+ src
[i
].reg_offset
) / 2,
206 ((src
[i
].reg
+ src
[i
].reg_offset
) % 2) * 4),
208 brw_reg
= retype(brw_reg
, src
[i
].type
);
209 brw_reg
.dw1
.bits
.swizzle
= src
[i
].swizzle
;
211 brw_reg
= brw_abs(brw_reg
);
213 brw_reg
= negate(brw_reg
);
217 brw_reg
= src
[i
].fixed_hw_reg
;
221 /* Probably unused. */
222 brw_reg
= brw_null_reg();
226 assert(!"not reached");
227 brw_reg
= brw_null_reg();
235 vec4_visitor::generate_math1_gen4(vec4_instruction
*inst
,
241 brw_math_function(inst
->opcode
),
242 BRW_MATH_SATURATE_NONE
,
245 BRW_MATH_DATA_SCALAR
,
246 BRW_MATH_PRECISION_FULL
);
250 vec4_visitor::generate_math1_gen6(vec4_instruction
*inst
,
256 brw_math_function(inst
->opcode
),
257 BRW_MATH_SATURATE_NONE
,
260 BRW_MATH_DATA_SCALAR
,
261 BRW_MATH_PRECISION_FULL
);
265 vec4_visitor::generate_urb_write(vec4_instruction
*inst
)
268 brw_null_reg(), /* dest */
269 inst
->base_mrf
, /* starting mrf reg nr */
270 brw_vec8_grf(0, 0), /* src */
271 false, /* allocate */
274 0, /* response len */
276 inst
->eot
, /* writes complete */
277 inst
->offset
, /* urb destination offset */
278 BRW_URB_SWIZZLE_INTERLEAVE
);
282 vec4_visitor::generate_vs_instruction(vec4_instruction
*instruction
,
286 vec4_instruction
*inst
= (vec4_instruction
*)instruction
;
288 switch (inst
->opcode
) {
289 case SHADER_OPCODE_RCP
:
290 case SHADER_OPCODE_RSQ
:
291 case SHADER_OPCODE_SQRT
:
292 case SHADER_OPCODE_EXP2
:
293 case SHADER_OPCODE_LOG2
:
294 case SHADER_OPCODE_SIN
:
295 case SHADER_OPCODE_COS
:
296 if (intel
->gen
>= 6) {
297 generate_math1_gen6(inst
, dst
, src
[0]);
299 generate_math1_gen4(inst
, dst
, src
[0]);
303 case SHADER_OPCODE_POW
:
307 case VS_OPCODE_URB_WRITE
:
308 generate_urb_write(inst
);
312 if (inst
->opcode
< (int)ARRAY_SIZE(brw_opcodes
)) {
313 fail("unsupported opcode in `%s' in VS\n",
314 brw_opcodes
[inst
->opcode
].name
);
316 fail("Unsupported opcode %d in VS", inst
->opcode
);
324 /* Generate FS IR for main(). (the visitor only descends into
325 * functions called "main").
327 foreach_iter(exec_list_iterator
, iter
, *shader
->ir
) {
328 ir_instruction
*ir
= (ir_instruction
*)iter
.get();
341 brw_set_access_mode(p
, BRW_ALIGN_16
);
349 vec4_visitor::generate_code()
351 int last_native_inst
= p
->nr_insn
;
352 const char *last_annotation_string
= NULL
;
353 ir_instruction
*last_annotation_ir
= NULL
;
355 int loop_stack_array_size
= 16;
356 int loop_stack_depth
= 0;
357 brw_instruction
**loop_stack
=
358 rzalloc_array(this->mem_ctx
, brw_instruction
*, loop_stack_array_size
);
359 int *if_depth_in_loop
=
360 rzalloc_array(this->mem_ctx
, int, loop_stack_array_size
);
363 if (unlikely(INTEL_DEBUG
& DEBUG_VS
)) {
364 printf("Native code for vertex shader %d:\n", prog
->Name
);
367 foreach_list(node
, &this->instructions
) {
368 vec4_instruction
*inst
= (vec4_instruction
*)node
;
369 struct brw_reg src
[3], dst
;
371 if (unlikely(INTEL_DEBUG
& DEBUG_VS
)) {
372 if (last_annotation_ir
!= inst
->ir
) {
373 last_annotation_ir
= inst
->ir
;
374 if (last_annotation_ir
) {
376 last_annotation_ir
->print();
380 if (last_annotation_string
!= inst
->annotation
) {
381 last_annotation_string
= inst
->annotation
;
382 if (last_annotation_string
)
383 printf(" %s\n", last_annotation_string
);
387 for (unsigned int i
= 0; i
< 3; i
++) {
388 src
[i
] = inst
->get_src(i
);
390 dst
= inst
->get_dst();
392 brw_set_conditionalmod(p
, inst
->conditional_mod
);
393 brw_set_predicate_control(p
, inst
->predicate
);
394 brw_set_predicate_inverse(p
, inst
->predicate_inverse
);
395 brw_set_saturate(p
, inst
->saturate
);
397 switch (inst
->opcode
) {
399 brw_MOV(p
, dst
, src
[0]);
402 brw_ADD(p
, dst
, src
[0], src
[1]);
405 brw_MUL(p
, dst
, src
[0], src
[1]);
409 brw_FRC(p
, dst
, src
[0]);
411 case BRW_OPCODE_RNDD
:
412 brw_RNDD(p
, dst
, src
[0]);
414 case BRW_OPCODE_RNDE
:
415 brw_RNDE(p
, dst
, src
[0]);
417 case BRW_OPCODE_RNDZ
:
418 brw_RNDZ(p
, dst
, src
[0]);
422 brw_AND(p
, dst
, src
[0], src
[1]);
425 brw_OR(p
, dst
, src
[0], src
[1]);
428 brw_XOR(p
, dst
, src
[0], src
[1]);
431 brw_NOT(p
, dst
, src
[0]);
434 brw_ASR(p
, dst
, src
[0], src
[1]);
437 brw_SHR(p
, dst
, src
[0], src
[1]);
440 brw_SHL(p
, dst
, src
[0], src
[1]);
444 brw_CMP(p
, dst
, inst
->conditional_mod
, src
[0], src
[1]);
447 brw_SEL(p
, dst
, src
[0], src
[1]);
451 brw_DP4(p
, dst
, src
[0], src
[1]);
455 brw_DP3(p
, dst
, src
[0], src
[1]);
459 brw_DP2(p
, dst
, src
[0], src
[1]);
463 if (inst
->src
[0].file
!= BAD_FILE
) {
464 /* The instruction has an embedded compare (only allowed on gen6) */
465 assert(intel
->gen
== 6);
466 gen6_IF(p
, inst
->conditional_mod
, src
[0], src
[1]);
468 struct brw_instruction
*brw_inst
= brw_IF(p
, BRW_EXECUTE_8
);
469 brw_inst
->header
.predicate_control
= inst
->predicate
;
471 if_depth_in_loop
[loop_stack_depth
]++;
474 case BRW_OPCODE_ELSE
:
477 case BRW_OPCODE_ENDIF
:
479 if_depth_in_loop
[loop_stack_depth
]--;
483 loop_stack
[loop_stack_depth
++] = brw_DO(p
, BRW_EXECUTE_8
);
484 if (loop_stack_array_size
<= loop_stack_depth
) {
485 loop_stack_array_size
*= 2;
486 loop_stack
= reralloc(this->mem_ctx
, loop_stack
, brw_instruction
*,
487 loop_stack_array_size
);
488 if_depth_in_loop
= reralloc(this->mem_ctx
, if_depth_in_loop
, int,
489 loop_stack_array_size
);
491 if_depth_in_loop
[loop_stack_depth
] = 0;
494 case BRW_OPCODE_BREAK
:
495 brw_BREAK(p
, if_depth_in_loop
[loop_stack_depth
]);
496 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
498 case BRW_OPCODE_CONTINUE
:
499 /* FINISHME: We need to write the loop instruction support still. */
501 gen6_CONT(p
, loop_stack
[loop_stack_depth
- 1]);
503 brw_CONT(p
, if_depth_in_loop
[loop_stack_depth
]);
504 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
507 case BRW_OPCODE_WHILE
: {
508 struct brw_instruction
*inst0
, *inst1
;
514 assert(loop_stack_depth
> 0);
516 inst0
= inst1
= brw_WHILE(p
, loop_stack
[loop_stack_depth
]);
517 if (intel
->gen
< 6) {
518 /* patch all the BREAK/CONT instructions from last BGNLOOP */
519 while (inst0
> loop_stack
[loop_stack_depth
]) {
521 if (inst0
->header
.opcode
== BRW_OPCODE_BREAK
&&
522 inst0
->bits3
.if_else
.jump_count
== 0) {
523 inst0
->bits3
.if_else
.jump_count
= br
* (inst1
- inst0
+ 1);
525 else if (inst0
->header
.opcode
== BRW_OPCODE_CONTINUE
&&
526 inst0
->bits3
.if_else
.jump_count
== 0) {
527 inst0
->bits3
.if_else
.jump_count
= br
* (inst1
- inst0
);
535 generate_vs_instruction(inst
, dst
, src
);
539 if (unlikely(INTEL_DEBUG
& DEBUG_VS
)) {
540 for (unsigned int i
= last_native_inst
; i
< p
->nr_insn
; i
++) {
542 printf("0x%08x 0x%08x 0x%08x 0x%08x ",
543 ((uint32_t *)&p
->store
[i
])[3],
544 ((uint32_t *)&p
->store
[i
])[2],
545 ((uint32_t *)&p
->store
[i
])[1],
546 ((uint32_t *)&p
->store
[i
])[0]);
548 brw_disasm(stdout
, &p
->store
[i
], intel
->gen
);
552 last_native_inst
= p
->nr_insn
;
555 if (unlikely(INTEL_DEBUG
& DEBUG_VS
)) {
559 ralloc_free(loop_stack
);
560 ralloc_free(if_depth_in_loop
);
564 /* OK, while the INTEL_DEBUG=vs above is very nice for debugging VS
565 * emit issues, it doesn't get the jump distances into the output,
566 * which is often something we want to debug. So this is here in
567 * case you're doing that.
570 if (unlikely(INTEL_DEBUG
& DEBUG_VS
)) {
571 for (unsigned int i
= 0; i
< p
->nr_insn
; i
++) {
572 printf("0x%08x 0x%08x 0x%08x 0x%08x ",
573 ((uint32_t *)&p
->store
[i
])[3],
574 ((uint32_t *)&p
->store
[i
])[2],
575 ((uint32_t *)&p
->store
[i
])[1],
576 ((uint32_t *)&p
->store
[i
])[0]);
577 brw_disasm(stdout
, &p
->store
[i
], intel
->gen
);
586 brw_vs_emit(struct brw_vs_compile
*c
)
588 struct brw_compile
*p
= &c
->func
;
589 struct brw_context
*brw
= p
->brw
;
590 struct intel_context
*intel
= &brw
->intel
;
591 struct gl_context
*ctx
= &intel
->ctx
;
592 struct gl_shader_program
*prog
= ctx
->Shader
.CurrentVertexProgram
;
597 struct brw_shader
*shader
=
598 (brw_shader
*) prog
->_LinkedShaders
[MESA_SHADER_VERTEX
];
602 if (unlikely(INTEL_DEBUG
& DEBUG_VS
)) {
603 printf("GLSL IR for native vertex shader %d:\n", prog
->Name
);
604 _mesa_print_ir(shader
->ir
, NULL
);
608 vec4_visitor
v(c
, prog
, shader
);
610 /* FINISHME: Cleanly fail, test at link time, etc. */
611 assert(!"not reached");
620 } /* namespace brw */