2 * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE. */
23 #include "radeon_compiler.h"
29 #include "radeon_dataflow.h"
30 #include "radeon_program.h"
31 #include "radeon_program_pair.h"
32 #include "radeon_regalloc.h"
33 #include "radeon_compiler_util.h"
36 void rc_init(struct radeon_compiler
* c
, const struct rc_regalloc_state
*rs
)
38 memset(c
, 0, sizeof(*c
));
40 memory_pool_init(&c
->Pool
);
41 c
->Program
.Instructions
.Prev
= &c
->Program
.Instructions
;
42 c
->Program
.Instructions
.Next
= &c
->Program
.Instructions
;
43 c
->Program
.Instructions
.U
.I
.Opcode
= RC_OPCODE_ILLEGAL_OPCODE
;
44 c
->regalloc_state
= rs
;
47 void rc_destroy(struct radeon_compiler
* c
)
49 rc_constants_destroy(&c
->Program
.Constants
);
50 memory_pool_destroy(&c
->Pool
);
54 void rc_debug(struct radeon_compiler
* c
, const char * fmt
, ...)
58 if (!(c
->Debug
& RC_DBG_LOG
))
62 vfprintf(stderr
, fmt
, ap
);
66 void rc_error(struct radeon_compiler
* c
, const char * fmt
, ...)
73 /* Only remember the first error */
78 written
= vsnprintf(buf
, sizeof(buf
), fmt
, ap
);
81 if (written
< sizeof(buf
)) {
82 c
->ErrorMsg
= strdup(buf
);
84 c
->ErrorMsg
= malloc(written
+ 1);
87 vsnprintf(c
->ErrorMsg
, written
+ 1, fmt
, ap
);
92 if (c
->Debug
& RC_DBG_LOG
) {
93 fprintf(stderr
, "r300compiler error: ");
96 vfprintf(stderr
, fmt
, ap
);
101 int rc_if_fail_helper(struct radeon_compiler
* c
, const char * file
, int line
, const char * assertion
)
103 rc_error(c
, "ICE at %s:%i: assertion failed: %s\n", file
, line
, assertion
);
108 * Recompute c->Program.InputsRead and c->Program.OutputsWritten
109 * based on which inputs and outputs are actually referenced
110 * in program instructions.
112 void rc_calculate_inputs_outputs(struct radeon_compiler
* c
)
114 struct rc_instruction
*inst
;
116 c
->Program
.InputsRead
= 0;
117 c
->Program
.OutputsWritten
= 0;
119 for(inst
= c
->Program
.Instructions
.Next
; inst
!= &c
->Program
.Instructions
; inst
= inst
->Next
)
121 const struct rc_opcode_info
* opcode
= rc_get_opcode_info(inst
->U
.I
.Opcode
);
124 for (i
= 0; i
< opcode
->NumSrcRegs
; ++i
) {
125 if (inst
->U
.I
.SrcReg
[i
].File
== RC_FILE_INPUT
)
126 c
->Program
.InputsRead
|= 1 << inst
->U
.I
.SrcReg
[i
].Index
;
129 if (opcode
->HasDstReg
) {
130 if (inst
->U
.I
.DstReg
.File
== RC_FILE_OUTPUT
)
131 c
->Program
.OutputsWritten
|= 1 << inst
->U
.I
.DstReg
.Index
;
137 * Rewrite the program such that everything that source the given input
138 * register will source new_input instead.
140 void rc_move_input(struct radeon_compiler
* c
, unsigned input
, struct rc_src_register new_input
)
142 struct rc_instruction
* inst
;
144 c
->Program
.InputsRead
&= ~(1 << input
);
146 for(inst
= c
->Program
.Instructions
.Next
; inst
!= &c
->Program
.Instructions
; inst
= inst
->Next
) {
147 const struct rc_opcode_info
* opcode
= rc_get_opcode_info(inst
->U
.I
.Opcode
);
150 for(i
= 0; i
< opcode
->NumSrcRegs
; ++i
) {
151 if (inst
->U
.I
.SrcReg
[i
].File
== RC_FILE_INPUT
&& inst
->U
.I
.SrcReg
[i
].Index
== input
) {
152 inst
->U
.I
.SrcReg
[i
].File
= new_input
.File
;
153 inst
->U
.I
.SrcReg
[i
].Index
= new_input
.Index
;
154 inst
->U
.I
.SrcReg
[i
].Swizzle
= combine_swizzles(new_input
.Swizzle
, inst
->U
.I
.SrcReg
[i
].Swizzle
);
155 if (!inst
->U
.I
.SrcReg
[i
].Abs
) {
156 inst
->U
.I
.SrcReg
[i
].Negate
^= new_input
.Negate
;
157 inst
->U
.I
.SrcReg
[i
].Abs
= new_input
.Abs
;
160 c
->Program
.InputsRead
|= 1 << new_input
.Index
;
168 * Rewrite the program such that everything that writes into the given
169 * output register will instead write to new_output. The new_output
170 * writemask is honoured.
172 void rc_move_output(struct radeon_compiler
* c
, unsigned output
, unsigned new_output
, unsigned writemask
)
174 struct rc_instruction
* inst
;
176 c
->Program
.OutputsWritten
&= ~(1 << output
);
178 for(inst
= c
->Program
.Instructions
.Next
; inst
!= &c
->Program
.Instructions
; inst
= inst
->Next
) {
179 const struct rc_opcode_info
* opcode
= rc_get_opcode_info(inst
->U
.I
.Opcode
);
181 if (opcode
->HasDstReg
) {
182 if (inst
->U
.I
.DstReg
.File
== RC_FILE_OUTPUT
&& inst
->U
.I
.DstReg
.Index
== output
) {
183 inst
->U
.I
.DstReg
.Index
= new_output
;
184 inst
->U
.I
.DstReg
.WriteMask
&= writemask
;
186 c
->Program
.OutputsWritten
|= 1 << new_output
;
194 * Rewrite the program such that a given output is duplicated.
196 void rc_copy_output(struct radeon_compiler
* c
, unsigned output
, unsigned dup_output
)
198 unsigned tempreg
= rc_find_free_temporary(c
);
199 struct rc_instruction
* inst
;
201 for(inst
= c
->Program
.Instructions
.Next
; inst
!= &c
->Program
.Instructions
; inst
= inst
->Next
) {
202 const struct rc_opcode_info
* opcode
= rc_get_opcode_info(inst
->U
.I
.Opcode
);
204 if (opcode
->HasDstReg
) {
205 if (inst
->U
.I
.DstReg
.File
== RC_FILE_OUTPUT
&& inst
->U
.I
.DstReg
.Index
== output
) {
206 inst
->U
.I
.DstReg
.File
= RC_FILE_TEMPORARY
;
207 inst
->U
.I
.DstReg
.Index
= tempreg
;
212 inst
= rc_insert_new_instruction(c
, c
->Program
.Instructions
.Prev
);
213 inst
->U
.I
.Opcode
= RC_OPCODE_MOV
;
214 inst
->U
.I
.DstReg
.File
= RC_FILE_OUTPUT
;
215 inst
->U
.I
.DstReg
.Index
= output
;
217 inst
->U
.I
.SrcReg
[0].File
= RC_FILE_TEMPORARY
;
218 inst
->U
.I
.SrcReg
[0].Index
= tempreg
;
219 inst
->U
.I
.SrcReg
[0].Swizzle
= RC_SWIZZLE_XYZW
;
221 inst
= rc_insert_new_instruction(c
, c
->Program
.Instructions
.Prev
);
222 inst
->U
.I
.Opcode
= RC_OPCODE_MOV
;
223 inst
->U
.I
.DstReg
.File
= RC_FILE_OUTPUT
;
224 inst
->U
.I
.DstReg
.Index
= dup_output
;
226 inst
->U
.I
.SrcReg
[0].File
= RC_FILE_TEMPORARY
;
227 inst
->U
.I
.SrcReg
[0].Index
= tempreg
;
228 inst
->U
.I
.SrcReg
[0].Swizzle
= RC_SWIZZLE_XYZW
;
230 c
->Program
.OutputsWritten
|= 1 << dup_output
;
235 * Introduce standard code fragment to deal with fragment.position.
237 void rc_transform_fragment_wpos(struct radeon_compiler
* c
, unsigned wpos
, unsigned new_input
,
240 unsigned tempregi
= rc_find_free_temporary(c
);
241 struct rc_instruction
* inst_rcp
;
242 struct rc_instruction
* inst_mul
;
243 struct rc_instruction
* inst_mad
;
244 struct rc_instruction
* inst
;
246 c
->Program
.InputsRead
&= ~(1 << wpos
);
247 c
->Program
.InputsRead
|= 1 << new_input
;
249 /* perspective divide */
250 inst_rcp
= rc_insert_new_instruction(c
, &c
->Program
.Instructions
);
251 inst_rcp
->U
.I
.Opcode
= RC_OPCODE_RCP
;
253 inst_rcp
->U
.I
.DstReg
.File
= RC_FILE_TEMPORARY
;
254 inst_rcp
->U
.I
.DstReg
.Index
= tempregi
;
255 inst_rcp
->U
.I
.DstReg
.WriteMask
= RC_MASK_W
;
257 inst_rcp
->U
.I
.SrcReg
[0].File
= RC_FILE_INPUT
;
258 inst_rcp
->U
.I
.SrcReg
[0].Index
= new_input
;
259 inst_rcp
->U
.I
.SrcReg
[0].Swizzle
= RC_SWIZZLE_WWWW
;
261 inst_mul
= rc_insert_new_instruction(c
, inst_rcp
);
262 inst_mul
->U
.I
.Opcode
= RC_OPCODE_MUL
;
264 inst_mul
->U
.I
.DstReg
.File
= RC_FILE_TEMPORARY
;
265 inst_mul
->U
.I
.DstReg
.Index
= tempregi
;
266 inst_mul
->U
.I
.DstReg
.WriteMask
= RC_MASK_XYZ
;
268 inst_mul
->U
.I
.SrcReg
[0].File
= RC_FILE_INPUT
;
269 inst_mul
->U
.I
.SrcReg
[0].Index
= new_input
;
271 inst_mul
->U
.I
.SrcReg
[1].File
= RC_FILE_TEMPORARY
;
272 inst_mul
->U
.I
.SrcReg
[1].Index
= tempregi
;
273 inst_mul
->U
.I
.SrcReg
[1].Swizzle
= RC_SWIZZLE_WWWW
;
275 /* viewport transformation */
276 inst_mad
= rc_insert_new_instruction(c
, inst_mul
);
277 inst_mad
->U
.I
.Opcode
= RC_OPCODE_MAD
;
279 inst_mad
->U
.I
.DstReg
.File
= RC_FILE_TEMPORARY
;
280 inst_mad
->U
.I
.DstReg
.Index
= tempregi
;
281 inst_mad
->U
.I
.DstReg
.WriteMask
= RC_MASK_XYZ
;
283 inst_mad
->U
.I
.SrcReg
[0].File
= RC_FILE_TEMPORARY
;
284 inst_mad
->U
.I
.SrcReg
[0].Index
= tempregi
;
285 inst_mad
->U
.I
.SrcReg
[0].Swizzle
= RC_SWIZZLE_XYZ0
;
287 inst_mad
->U
.I
.SrcReg
[1].File
= RC_FILE_CONSTANT
;
288 inst_mad
->U
.I
.SrcReg
[1].Swizzle
= RC_SWIZZLE_XYZ0
;
290 inst_mad
->U
.I
.SrcReg
[2].File
= RC_FILE_CONSTANT
;
291 inst_mad
->U
.I
.SrcReg
[2].Swizzle
= RC_SWIZZLE_XYZ0
;
293 if (full_vtransform
) {
294 inst_mad
->U
.I
.SrcReg
[1].Index
= rc_constants_add_state(&c
->Program
.Constants
, RC_STATE_R300_VIEWPORT_SCALE
, 0);
295 inst_mad
->U
.I
.SrcReg
[2].Index
= rc_constants_add_state(&c
->Program
.Constants
, RC_STATE_R300_VIEWPORT_OFFSET
, 0);
297 inst_mad
->U
.I
.SrcReg
[1].Index
=
298 inst_mad
->U
.I
.SrcReg
[2].Index
= rc_constants_add_state(&c
->Program
.Constants
, RC_STATE_R300_WINDOW_DIMENSION
, 0);
301 for (inst
= inst_mad
->Next
; inst
!= &c
->Program
.Instructions
; inst
= inst
->Next
) {
302 const struct rc_opcode_info
* opcode
= rc_get_opcode_info(inst
->U
.I
.Opcode
);
305 for(i
= 0; i
< opcode
->NumSrcRegs
; i
++) {
306 if (inst
->U
.I
.SrcReg
[i
].File
== RC_FILE_INPUT
&&
307 inst
->U
.I
.SrcReg
[i
].Index
== wpos
) {
308 inst
->U
.I
.SrcReg
[i
].File
= RC_FILE_TEMPORARY
;
309 inst
->U
.I
.SrcReg
[i
].Index
= tempregi
;
317 * The FACE input in hardware contains 1 if it's a back face, 0 otherwise.
318 * Gallium and OpenGL define it the other way around.
320 * So let's just negate FACE at the beginning of the shader and rewrite the rest
321 * of the shader to read from the newly allocated temporary.
323 void rc_transform_fragment_face(struct radeon_compiler
*c
, unsigned face
)
325 unsigned tempregi
= rc_find_free_temporary(c
);
326 struct rc_instruction
*inst_add
;
327 struct rc_instruction
*inst
;
329 /* perspective divide */
330 inst_add
= rc_insert_new_instruction(c
, &c
->Program
.Instructions
);
331 inst_add
->U
.I
.Opcode
= RC_OPCODE_ADD
;
333 inst_add
->U
.I
.DstReg
.File
= RC_FILE_TEMPORARY
;
334 inst_add
->U
.I
.DstReg
.Index
= tempregi
;
335 inst_add
->U
.I
.DstReg
.WriteMask
= RC_MASK_X
;
337 inst_add
->U
.I
.SrcReg
[0].File
= RC_FILE_NONE
;
338 inst_add
->U
.I
.SrcReg
[0].Swizzle
= RC_SWIZZLE_1111
;
340 inst_add
->U
.I
.SrcReg
[1].File
= RC_FILE_INPUT
;
341 inst_add
->U
.I
.SrcReg
[1].Index
= face
;
342 inst_add
->U
.I
.SrcReg
[1].Swizzle
= RC_SWIZZLE_XXXX
;
343 inst_add
->U
.I
.SrcReg
[1].Negate
= RC_MASK_XYZW
;
345 for (inst
= inst_add
->Next
; inst
!= &c
->Program
.Instructions
; inst
= inst
->Next
) {
346 const struct rc_opcode_info
* opcode
= rc_get_opcode_info(inst
->U
.I
.Opcode
);
349 for(i
= 0; i
< opcode
->NumSrcRegs
; i
++) {
350 if (inst
->U
.I
.SrcReg
[i
].File
== RC_FILE_INPUT
&&
351 inst
->U
.I
.SrcReg
[i
].Index
== face
) {
352 inst
->U
.I
.SrcReg
[i
].File
= RC_FILE_TEMPORARY
;
353 inst
->U
.I
.SrcReg
[i
].Index
= tempregi
;
359 static void reg_count_callback(void * userdata
, struct rc_instruction
* inst
,
360 rc_register_file file
, unsigned int index
, unsigned int mask
)
362 struct rc_program_stats
*s
= userdata
;
363 if (file
== RC_FILE_TEMPORARY
)
364 (int)index
> s
->num_temp_regs
? s
->num_temp_regs
= index
: 0;
365 if (file
== RC_FILE_INLINE
)
366 s
->num_inline_literals
++;
369 void rc_get_stats(struct radeon_compiler
*c
, struct rc_program_stats
*s
)
371 struct rc_instruction
* tmp
;
372 memset(s
, 0, sizeof(*s
));
374 for(tmp
= c
->Program
.Instructions
.Next
; tmp
!= &c
->Program
.Instructions
;
376 const struct rc_opcode_info
* info
;
377 rc_for_all_reads_mask(tmp
, reg_count_callback
, s
);
378 if (tmp
->Type
== RC_INSTRUCTION_NORMAL
) {
379 info
= rc_get_opcode_info(tmp
->U
.I
.Opcode
);
380 if (info
->Opcode
== RC_OPCODE_BEGIN_TEX
)
382 if (tmp
->U
.I
.PreSub
.Opcode
!= RC_PRESUB_NONE
)
385 if (tmp
->U
.P
.RGB
.Src
[RC_PAIR_PRESUB_SRC
].Used
)
387 if (tmp
->U
.P
.Alpha
.Src
[RC_PAIR_PRESUB_SRC
].Used
)
389 /* Assuming alpha will never be a flow control or
390 * a tex instruction. */
391 if (tmp
->U
.P
.Alpha
.Opcode
!= RC_OPCODE_NOP
)
392 s
->num_alpha_insts
++;
393 if (tmp
->U
.P
.RGB
.Opcode
!= RC_OPCODE_NOP
)
395 if (tmp
->U
.P
.RGB
.Omod
!= RC_OMOD_MUL_1
&&
396 tmp
->U
.P
.RGB
.Omod
!= RC_OMOD_DISABLE
) {
399 if (tmp
->U
.P
.Alpha
.Omod
!= RC_OMOD_MUL_1
&&
400 tmp
->U
.P
.Alpha
.Omod
!= RC_OMOD_DISABLE
) {
403 info
= rc_get_opcode_info(tmp
->U
.P
.RGB
.Opcode
);
405 if (info
->IsFlowControl
)
407 if (info
->HasTexture
)
411 /* Increment here because the reg_count_callback store the max
412 * temporary reg index in s->nun_temp_regs. */
416 static void print_stats(struct radeon_compiler
* c
)
418 struct rc_program_stats s
;
420 if (c
->initial_num_insts
<= 5)
426 case RC_VERTEX_PROGRAM
:
427 fprintf(stderr
,"~~~~~~~~~ VERTEX PROGRAM ~~~~~~~~\n"
428 "~%4u Instructions\n"
429 "~%4u Flow Control Instructions\n"
430 "~%4u Temporary Registers\n"
431 "~~~~~~~~~~~~~~ END ~~~~~~~~~~~~~~\n",
432 s
.num_insts
, s
.num_fc_insts
, s
.num_temp_regs
);
435 case RC_FRAGMENT_PROGRAM
:
436 fprintf(stderr
,"~~~~~~~~ FRAGMENT PROGRAM ~~~~~~~\n"
437 "~%4u Instructions\n"
438 "~%4u Vector Instructions (RGB)\n"
439 "~%4u Scalar Instructions (Alpha)\n"
440 "~%4u Flow Control Instructions\n"
441 "~%4u Texture Instructions\n"
442 "~%4u Presub Operations\n"
443 "~%4u OMOD Operations\n"
444 "~%4u Temporary Registers\n"
445 "~%4u Inline Literals\n"
446 "~~~~~~~~~~~~~~ END ~~~~~~~~~~~~~~\n",
447 s
.num_insts
, s
.num_rgb_insts
, s
.num_alpha_insts
,
448 s
.num_fc_insts
, s
.num_tex_insts
, s
.num_presub_ops
,
449 s
.num_omod_ops
, s
.num_temp_regs
, s
.num_inline_literals
);
456 static const char *shader_name
[RC_NUM_PROGRAM_TYPES
] = {
461 void rc_run_compiler_passes(struct radeon_compiler
*c
, struct radeon_compiler_pass
*list
)
463 for (unsigned i
= 0; list
[i
].name
; i
++) {
464 if (list
[i
].predicate
) {
465 list
[i
].run(c
, list
[i
].user
);
470 if ((c
->Debug
& RC_DBG_LOG
) && list
[i
].dump
) {
471 fprintf(stderr
, "%s: after '%s'\n", shader_name
[c
->type
], list
[i
].name
);
472 rc_print_program(&c
->Program
);
478 /* Executes a list of compiler passes given in the parameter 'list'. */
479 void rc_run_compiler(struct radeon_compiler
*c
, struct radeon_compiler_pass
*list
)
481 struct rc_program_stats s
;
484 c
->initial_num_insts
= s
.num_insts
;
486 if (c
->Debug
& RC_DBG_LOG
) {
487 fprintf(stderr
, "%s: before compilation\n", shader_name
[c
->type
]);
488 rc_print_program(&c
->Program
);
491 rc_run_compiler_passes(c
, list
);
493 if (c
->Debug
& RC_DBG_STATS
)
497 void rc_validate_final_shader(struct radeon_compiler
*c
, void *user
)
499 /* Check the number of constants. */
500 if (c
->Program
.Constants
.Count
> c
->max_constants
) {
501 rc_error(c
, "Too many constants. Max: %i, Got: %i\n",
502 c
->max_constants
, c
->Program
.Constants
.Count
);