2 * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE. */
23 #include "radeon_compiler.h"
29 #include "radeon_dataflow.h"
30 #include "radeon_program.h"
31 #include "radeon_program_pair.h"
32 #include "radeon_compiler_util.h"
35 void rc_init(struct radeon_compiler
* c
)
37 memset(c
, 0, sizeof(*c
));
39 memory_pool_init(&c
->Pool
);
40 c
->Program
.Instructions
.Prev
= &c
->Program
.Instructions
;
41 c
->Program
.Instructions
.Next
= &c
->Program
.Instructions
;
42 c
->Program
.Instructions
.U
.I
.Opcode
= RC_OPCODE_ILLEGAL_OPCODE
;
45 void rc_destroy(struct radeon_compiler
* c
)
47 rc_constants_destroy(&c
->Program
.Constants
);
48 memory_pool_destroy(&c
->Pool
);
52 void rc_debug(struct radeon_compiler
* c
, const char * fmt
, ...)
56 if (!(c
->Debug
& RC_DBG_LOG
))
60 vfprintf(stderr
, fmt
, ap
);
64 void rc_error(struct radeon_compiler
* c
, const char * fmt
, ...)
71 /* Only remember the first error */
76 written
= vsnprintf(buf
, sizeof(buf
), fmt
, ap
);
79 if (written
< sizeof(buf
)) {
80 c
->ErrorMsg
= strdup(buf
);
82 c
->ErrorMsg
= malloc(written
+ 1);
85 vsnprintf(c
->ErrorMsg
, written
+ 1, fmt
, ap
);
90 if (c
->Debug
& RC_DBG_LOG
) {
91 fprintf(stderr
, "r300compiler error: ");
94 vfprintf(stderr
, fmt
, ap
);
99 int rc_if_fail_helper(struct radeon_compiler
* c
, const char * file
, int line
, const char * assertion
)
101 rc_error(c
, "ICE at %s:%i: assertion failed: %s\n", file
, line
, assertion
);
106 * Recompute c->Program.InputsRead and c->Program.OutputsWritten
107 * based on which inputs and outputs are actually referenced
108 * in program instructions.
110 void rc_calculate_inputs_outputs(struct radeon_compiler
* c
)
112 struct rc_instruction
*inst
;
114 c
->Program
.InputsRead
= 0;
115 c
->Program
.OutputsWritten
= 0;
117 for(inst
= c
->Program
.Instructions
.Next
; inst
!= &c
->Program
.Instructions
; inst
= inst
->Next
)
119 const struct rc_opcode_info
* opcode
= rc_get_opcode_info(inst
->U
.I
.Opcode
);
122 for (i
= 0; i
< opcode
->NumSrcRegs
; ++i
) {
123 if (inst
->U
.I
.SrcReg
[i
].File
== RC_FILE_INPUT
)
124 c
->Program
.InputsRead
|= 1 << inst
->U
.I
.SrcReg
[i
].Index
;
127 if (opcode
->HasDstReg
) {
128 if (inst
->U
.I
.DstReg
.File
== RC_FILE_OUTPUT
)
129 c
->Program
.OutputsWritten
|= 1 << inst
->U
.I
.DstReg
.Index
;
135 * Rewrite the program such that everything that source the given input
136 * register will source new_input instead.
138 void rc_move_input(struct radeon_compiler
* c
, unsigned input
, struct rc_src_register new_input
)
140 struct rc_instruction
* inst
;
142 c
->Program
.InputsRead
&= ~(1 << input
);
144 for(inst
= c
->Program
.Instructions
.Next
; inst
!= &c
->Program
.Instructions
; inst
= inst
->Next
) {
145 const struct rc_opcode_info
* opcode
= rc_get_opcode_info(inst
->U
.I
.Opcode
);
148 for(i
= 0; i
< opcode
->NumSrcRegs
; ++i
) {
149 if (inst
->U
.I
.SrcReg
[i
].File
== RC_FILE_INPUT
&& inst
->U
.I
.SrcReg
[i
].Index
== input
) {
150 inst
->U
.I
.SrcReg
[i
].File
= new_input
.File
;
151 inst
->U
.I
.SrcReg
[i
].Index
= new_input
.Index
;
152 inst
->U
.I
.SrcReg
[i
].Swizzle
= combine_swizzles(new_input
.Swizzle
, inst
->U
.I
.SrcReg
[i
].Swizzle
);
153 if (!inst
->U
.I
.SrcReg
[i
].Abs
) {
154 inst
->U
.I
.SrcReg
[i
].Negate
^= new_input
.Negate
;
155 inst
->U
.I
.SrcReg
[i
].Abs
= new_input
.Abs
;
158 c
->Program
.InputsRead
|= 1 << new_input
.Index
;
166 * Rewrite the program such that everything that writes into the given
167 * output register will instead write to new_output. The new_output
168 * writemask is honoured.
170 void rc_move_output(struct radeon_compiler
* c
, unsigned output
, unsigned new_output
, unsigned writemask
)
172 struct rc_instruction
* inst
;
174 c
->Program
.OutputsWritten
&= ~(1 << output
);
176 for(inst
= c
->Program
.Instructions
.Next
; inst
!= &c
->Program
.Instructions
; inst
= inst
->Next
) {
177 const struct rc_opcode_info
* opcode
= rc_get_opcode_info(inst
->U
.I
.Opcode
);
179 if (opcode
->HasDstReg
) {
180 if (inst
->U
.I
.DstReg
.File
== RC_FILE_OUTPUT
&& inst
->U
.I
.DstReg
.Index
== output
) {
181 inst
->U
.I
.DstReg
.Index
= new_output
;
182 inst
->U
.I
.DstReg
.WriteMask
&= writemask
;
184 c
->Program
.OutputsWritten
|= 1 << new_output
;
192 * Rewrite the program such that a given output is duplicated.
194 void rc_copy_output(struct radeon_compiler
* c
, unsigned output
, unsigned dup_output
)
196 unsigned tempreg
= rc_find_free_temporary(c
);
197 struct rc_instruction
* inst
;
199 for(inst
= c
->Program
.Instructions
.Next
; inst
!= &c
->Program
.Instructions
; inst
= inst
->Next
) {
200 const struct rc_opcode_info
* opcode
= rc_get_opcode_info(inst
->U
.I
.Opcode
);
202 if (opcode
->HasDstReg
) {
203 if (inst
->U
.I
.DstReg
.File
== RC_FILE_OUTPUT
&& inst
->U
.I
.DstReg
.Index
== output
) {
204 inst
->U
.I
.DstReg
.File
= RC_FILE_TEMPORARY
;
205 inst
->U
.I
.DstReg
.Index
= tempreg
;
210 inst
= rc_insert_new_instruction(c
, c
->Program
.Instructions
.Prev
);
211 inst
->U
.I
.Opcode
= RC_OPCODE_MOV
;
212 inst
->U
.I
.DstReg
.File
= RC_FILE_OUTPUT
;
213 inst
->U
.I
.DstReg
.Index
= output
;
215 inst
->U
.I
.SrcReg
[0].File
= RC_FILE_TEMPORARY
;
216 inst
->U
.I
.SrcReg
[0].Index
= tempreg
;
217 inst
->U
.I
.SrcReg
[0].Swizzle
= RC_SWIZZLE_XYZW
;
219 inst
= rc_insert_new_instruction(c
, c
->Program
.Instructions
.Prev
);
220 inst
->U
.I
.Opcode
= RC_OPCODE_MOV
;
221 inst
->U
.I
.DstReg
.File
= RC_FILE_OUTPUT
;
222 inst
->U
.I
.DstReg
.Index
= dup_output
;
224 inst
->U
.I
.SrcReg
[0].File
= RC_FILE_TEMPORARY
;
225 inst
->U
.I
.SrcReg
[0].Index
= tempreg
;
226 inst
->U
.I
.SrcReg
[0].Swizzle
= RC_SWIZZLE_XYZW
;
228 c
->Program
.OutputsWritten
|= 1 << dup_output
;
233 * Introduce standard code fragment to deal with fragment.position.
235 void rc_transform_fragment_wpos(struct radeon_compiler
* c
, unsigned wpos
, unsigned new_input
,
238 unsigned tempregi
= rc_find_free_temporary(c
);
239 struct rc_instruction
* inst_rcp
;
240 struct rc_instruction
* inst_mul
;
241 struct rc_instruction
* inst_mad
;
242 struct rc_instruction
* inst
;
244 c
->Program
.InputsRead
&= ~(1 << wpos
);
245 c
->Program
.InputsRead
|= 1 << new_input
;
247 /* perspective divide */
248 inst_rcp
= rc_insert_new_instruction(c
, &c
->Program
.Instructions
);
249 inst_rcp
->U
.I
.Opcode
= RC_OPCODE_RCP
;
251 inst_rcp
->U
.I
.DstReg
.File
= RC_FILE_TEMPORARY
;
252 inst_rcp
->U
.I
.DstReg
.Index
= tempregi
;
253 inst_rcp
->U
.I
.DstReg
.WriteMask
= RC_MASK_W
;
255 inst_rcp
->U
.I
.SrcReg
[0].File
= RC_FILE_INPUT
;
256 inst_rcp
->U
.I
.SrcReg
[0].Index
= new_input
;
257 inst_rcp
->U
.I
.SrcReg
[0].Swizzle
= RC_SWIZZLE_WWWW
;
259 inst_mul
= rc_insert_new_instruction(c
, inst_rcp
);
260 inst_mul
->U
.I
.Opcode
= RC_OPCODE_MUL
;
262 inst_mul
->U
.I
.DstReg
.File
= RC_FILE_TEMPORARY
;
263 inst_mul
->U
.I
.DstReg
.Index
= tempregi
;
264 inst_mul
->U
.I
.DstReg
.WriteMask
= RC_MASK_XYZ
;
266 inst_mul
->U
.I
.SrcReg
[0].File
= RC_FILE_INPUT
;
267 inst_mul
->U
.I
.SrcReg
[0].Index
= new_input
;
269 inst_mul
->U
.I
.SrcReg
[1].File
= RC_FILE_TEMPORARY
;
270 inst_mul
->U
.I
.SrcReg
[1].Index
= tempregi
;
271 inst_mul
->U
.I
.SrcReg
[1].Swizzle
= RC_SWIZZLE_WWWW
;
273 /* viewport transformation */
274 inst_mad
= rc_insert_new_instruction(c
, inst_mul
);
275 inst_mad
->U
.I
.Opcode
= RC_OPCODE_MAD
;
277 inst_mad
->U
.I
.DstReg
.File
= RC_FILE_TEMPORARY
;
278 inst_mad
->U
.I
.DstReg
.Index
= tempregi
;
279 inst_mad
->U
.I
.DstReg
.WriteMask
= RC_MASK_XYZ
;
281 inst_mad
->U
.I
.SrcReg
[0].File
= RC_FILE_TEMPORARY
;
282 inst_mad
->U
.I
.SrcReg
[0].Index
= tempregi
;
283 inst_mad
->U
.I
.SrcReg
[0].Swizzle
= RC_SWIZZLE_XYZ0
;
285 inst_mad
->U
.I
.SrcReg
[1].File
= RC_FILE_CONSTANT
;
286 inst_mad
->U
.I
.SrcReg
[1].Swizzle
= RC_SWIZZLE_XYZ0
;
288 inst_mad
->U
.I
.SrcReg
[2].File
= RC_FILE_CONSTANT
;
289 inst_mad
->U
.I
.SrcReg
[2].Swizzle
= RC_SWIZZLE_XYZ0
;
291 if (full_vtransform
) {
292 inst_mad
->U
.I
.SrcReg
[1].Index
= rc_constants_add_state(&c
->Program
.Constants
, RC_STATE_R300_VIEWPORT_SCALE
, 0);
293 inst_mad
->U
.I
.SrcReg
[2].Index
= rc_constants_add_state(&c
->Program
.Constants
, RC_STATE_R300_VIEWPORT_OFFSET
, 0);
295 inst_mad
->U
.I
.SrcReg
[1].Index
=
296 inst_mad
->U
.I
.SrcReg
[2].Index
= rc_constants_add_state(&c
->Program
.Constants
, RC_STATE_R300_WINDOW_DIMENSION
, 0);
299 for (inst
= inst_mad
->Next
; inst
!= &c
->Program
.Instructions
; inst
= inst
->Next
) {
300 const struct rc_opcode_info
* opcode
= rc_get_opcode_info(inst
->U
.I
.Opcode
);
303 for(i
= 0; i
< opcode
->NumSrcRegs
; i
++) {
304 if (inst
->U
.I
.SrcReg
[i
].File
== RC_FILE_INPUT
&&
305 inst
->U
.I
.SrcReg
[i
].Index
== wpos
) {
306 inst
->U
.I
.SrcReg
[i
].File
= RC_FILE_TEMPORARY
;
307 inst
->U
.I
.SrcReg
[i
].Index
= tempregi
;
315 * The FACE input in hardware contains 1 if it's a back face, 0 otherwise.
316 * Gallium and OpenGL define it the other way around.
318 * So let's just negate FACE at the beginning of the shader and rewrite the rest
319 * of the shader to read from the newly allocated temporary.
321 void rc_transform_fragment_face(struct radeon_compiler
*c
, unsigned face
)
323 unsigned tempregi
= rc_find_free_temporary(c
);
324 struct rc_instruction
*inst_add
;
325 struct rc_instruction
*inst
;
327 /* perspective divide */
328 inst_add
= rc_insert_new_instruction(c
, &c
->Program
.Instructions
);
329 inst_add
->U
.I
.Opcode
= RC_OPCODE_ADD
;
331 inst_add
->U
.I
.DstReg
.File
= RC_FILE_TEMPORARY
;
332 inst_add
->U
.I
.DstReg
.Index
= tempregi
;
333 inst_add
->U
.I
.DstReg
.WriteMask
= RC_MASK_X
;
335 inst_add
->U
.I
.SrcReg
[0].File
= RC_FILE_NONE
;
336 inst_add
->U
.I
.SrcReg
[0].Swizzle
= RC_SWIZZLE_1111
;
338 inst_add
->U
.I
.SrcReg
[1].File
= RC_FILE_INPUT
;
339 inst_add
->U
.I
.SrcReg
[1].Index
= face
;
340 inst_add
->U
.I
.SrcReg
[1].Swizzle
= RC_SWIZZLE_XXXX
;
341 inst_add
->U
.I
.SrcReg
[1].Negate
= RC_MASK_XYZW
;
343 for (inst
= inst_add
->Next
; inst
!= &c
->Program
.Instructions
; inst
= inst
->Next
) {
344 const struct rc_opcode_info
* opcode
= rc_get_opcode_info(inst
->U
.I
.Opcode
);
347 for(i
= 0; i
< opcode
->NumSrcRegs
; i
++) {
348 if (inst
->U
.I
.SrcReg
[i
].File
== RC_FILE_INPUT
&&
349 inst
->U
.I
.SrcReg
[i
].Index
== face
) {
350 inst
->U
.I
.SrcReg
[i
].File
= RC_FILE_TEMPORARY
;
351 inst
->U
.I
.SrcReg
[i
].Index
= tempregi
;
357 static void reg_count_callback(void * userdata
, struct rc_instruction
* inst
,
358 rc_register_file file
, unsigned int index
, unsigned int mask
)
360 struct rc_program_stats
*s
= userdata
;
361 if (file
== RC_FILE_TEMPORARY
)
362 (int)index
> s
->num_temp_regs
? s
->num_temp_regs
= index
: 0;
363 if (file
== RC_FILE_INLINE
)
364 s
->num_inline_literals
++;
367 void rc_get_stats(struct radeon_compiler
*c
, struct rc_program_stats
*s
)
369 struct rc_instruction
* tmp
;
370 memset(s
, 0, sizeof(*s
));
372 for(tmp
= c
->Program
.Instructions
.Next
; tmp
!= &c
->Program
.Instructions
;
374 const struct rc_opcode_info
* info
;
375 rc_for_all_reads_mask(tmp
, reg_count_callback
, s
);
376 if (tmp
->Type
== RC_INSTRUCTION_NORMAL
) {
377 info
= rc_get_opcode_info(tmp
->U
.I
.Opcode
);
378 if (info
->Opcode
== RC_OPCODE_BEGIN_TEX
)
380 if (tmp
->U
.I
.PreSub
.Opcode
!= RC_PRESUB_NONE
)
383 if (tmp
->U
.P
.RGB
.Src
[RC_PAIR_PRESUB_SRC
].Used
)
385 if (tmp
->U
.P
.Alpha
.Src
[RC_PAIR_PRESUB_SRC
].Used
)
387 /* Assuming alpha will never be a flow control or
388 * a tex instruction. */
389 if (tmp
->U
.P
.Alpha
.Opcode
!= RC_OPCODE_NOP
)
390 s
->num_alpha_insts
++;
391 if (tmp
->U
.P
.RGB
.Opcode
!= RC_OPCODE_NOP
)
393 if (tmp
->U
.P
.RGB
.Omod
!= RC_OMOD_MUL_1
&&
394 tmp
->U
.P
.RGB
.Omod
!= RC_OMOD_DISABLE
) {
397 if (tmp
->U
.P
.Alpha
.Omod
!= RC_OMOD_MUL_1
&&
398 tmp
->U
.P
.Alpha
.Omod
!= RC_OMOD_DISABLE
) {
401 info
= rc_get_opcode_info(tmp
->U
.P
.RGB
.Opcode
);
403 if (info
->IsFlowControl
)
405 if (info
->HasTexture
)
409 /* Increment here because the reg_count_callback store the max
410 * temporary reg index in s->nun_temp_regs. */
414 static void print_stats(struct radeon_compiler
* c
)
416 struct rc_program_stats s
;
418 if (c
->initial_num_insts
<= 5)
424 case RC_VERTEX_PROGRAM
:
425 fprintf(stderr
,"~~~~~~~~~ VERTEX PROGRAM ~~~~~~~~\n"
426 "~%4u Instructions\n"
427 "~%4u Flow Control Instructions\n"
428 "~%4u Temporary Registers\n"
429 "~~~~~~~~~~~~~~ END ~~~~~~~~~~~~~~\n",
430 s
.num_insts
, s
.num_fc_insts
, s
.num_temp_regs
);
433 case RC_FRAGMENT_PROGRAM
:
434 fprintf(stderr
,"~~~~~~~~ FRAGMENT PROGRAM ~~~~~~~\n"
435 "~%4u Instructions\n"
436 "~%4u Vector Instructions (RGB)\n"
437 "~%4u Scalar Instructions (Alpha)\n"
438 "~%4u Flow Control Instructions\n"
439 "~%4u Texture Instructions\n"
440 "~%4u Presub Operations\n"
441 "~%4u OMOD Operations\n"
442 "~%4u Temporary Registers\n"
443 "~%4u Inline Literals\n"
444 "~~~~~~~~~~~~~~ END ~~~~~~~~~~~~~~\n",
445 s
.num_insts
, s
.num_rgb_insts
, s
.num_alpha_insts
,
446 s
.num_fc_insts
, s
.num_tex_insts
, s
.num_presub_ops
,
447 s
.num_omod_ops
, s
.num_temp_regs
, s
.num_inline_literals
);
454 static const char *shader_name
[RC_NUM_PROGRAM_TYPES
] = {
459 void rc_run_compiler_passes(struct radeon_compiler
*c
, struct radeon_compiler_pass
*list
)
461 for (unsigned i
= 0; list
[i
].name
; i
++) {
462 if (list
[i
].predicate
) {
463 list
[i
].run(c
, list
[i
].user
);
468 if ((c
->Debug
& RC_DBG_LOG
) && list
[i
].dump
) {
469 fprintf(stderr
, "%s: after '%s'\n", shader_name
[c
->type
], list
[i
].name
);
470 rc_print_program(&c
->Program
);
476 /* Executes a list of compiler passes given in the parameter 'list'. */
477 void rc_run_compiler(struct radeon_compiler
*c
, struct radeon_compiler_pass
*list
)
479 struct rc_program_stats s
;
482 c
->initial_num_insts
= s
.num_insts
;
484 if (c
->Debug
& RC_DBG_LOG
) {
485 fprintf(stderr
, "%s: before compilation\n", shader_name
[c
->type
]);
486 rc_print_program(&c
->Program
);
489 rc_run_compiler_passes(c
, list
);
491 if (c
->Debug
& RC_DBG_STATS
)
495 void rc_validate_final_shader(struct radeon_compiler
*c
, void *user
)
497 /* Check the number of constants. */
498 if (c
->Program
.Constants
.Count
> c
->max_constants
) {
499 rc_error(c
, "Too many constants. Max: %i, Got: %i\n",
500 c
->max_constants
, c
->Program
.Constants
.Count
);