2 * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
3 * Joakim Sindholt <opensource@zhasha.com>
4 * Copyright 2009 Marek Olšák <maraeo@gmail.com>
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * on the rights to use, copy, modify, merge, publish, distribute, sub
10 * license, and/or sell copies of the Software, and to permit persons to whom
11 * the Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
21 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
22 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
23 * USE OR OTHER DEALINGS IN THE SOFTWARE. */
25 #include "util/u_math.h"
26 #include "util/u_memory.h"
28 #include "tgsi/tgsi_dump.h"
29 #include "tgsi/tgsi_ureg.h"
32 #include "r300_context.h"
33 #include "r300_emit.h"
34 #include "r300_screen.h"
37 #include "r300_tgsi_to_rc.h"
39 #include "radeon_code.h"
40 #include "radeon_compiler.h"
42 /* Convert info about FS input semantics to r300_shader_semantics. */
43 void r300_shader_read_fs_inputs(struct tgsi_shader_info
* info
,
44 struct r300_shader_semantics
* fs_inputs
)
49 r300_shader_semantics_reset(fs_inputs
);
51 for (i
= 0; i
< info
->num_inputs
; i
++) {
52 index
= info
->input_semantic_index
[i
];
54 switch (info
->input_semantic_name
[i
]) {
55 case TGSI_SEMANTIC_COLOR
:
56 assert(index
< ATTR_COLOR_COUNT
);
57 fs_inputs
->color
[index
] = i
;
60 case TGSI_SEMANTIC_GENERIC
:
61 assert(index
< ATTR_GENERIC_COUNT
);
62 fs_inputs
->generic
[index
] = i
;
65 case TGSI_SEMANTIC_FOG
:
70 case TGSI_SEMANTIC_POSITION
:
75 case TGSI_SEMANTIC_FACE
:
81 fprintf(stderr
, "r300: FP: Unknown input semantic: %i\n",
82 info
->input_semantic_name
[i
]);
87 static void find_output_registers(struct r300_fragment_program_compiler
* compiler
,
88 struct r300_fragment_shader_code
*shader
)
90 unsigned i
, colorbuf_count
= 0;
92 /* Mark the outputs as not present initially */
93 compiler
->OutputColor
[0] = shader
->info
.num_outputs
;
94 compiler
->OutputColor
[1] = shader
->info
.num_outputs
;
95 compiler
->OutputColor
[2] = shader
->info
.num_outputs
;
96 compiler
->OutputColor
[3] = shader
->info
.num_outputs
;
97 compiler
->OutputDepth
= shader
->info
.num_outputs
;
99 /* Now see where they really are. */
100 for(i
= 0; i
< shader
->info
.num_outputs
; ++i
) {
101 switch(shader
->info
.output_semantic_name
[i
]) {
102 case TGSI_SEMANTIC_COLOR
:
103 compiler
->OutputColor
[colorbuf_count
] = i
;
106 case TGSI_SEMANTIC_POSITION
:
107 compiler
->OutputDepth
= i
;
113 static void allocate_hardware_inputs(
114 struct r300_fragment_program_compiler
* c
,
115 void (*allocate
)(void * data
, unsigned input
, unsigned hwreg
),
118 struct r300_shader_semantics
* inputs
=
119 (struct r300_shader_semantics
*)c
->UserData
;
122 /* Allocate input registers. */
123 for (i
= 0; i
< ATTR_COLOR_COUNT
; i
++) {
124 if (inputs
->color
[i
] != ATTR_UNUSED
) {
125 allocate(mydata
, inputs
->color
[i
], reg
++);
128 if (inputs
->face
!= ATTR_UNUSED
) {
129 allocate(mydata
, inputs
->face
, reg
++);
131 for (i
= 0; i
< ATTR_GENERIC_COUNT
; i
++) {
132 if (inputs
->generic
[i
] != ATTR_UNUSED
) {
133 allocate(mydata
, inputs
->generic
[i
], reg
++);
136 if (inputs
->fog
!= ATTR_UNUSED
) {
137 allocate(mydata
, inputs
->fog
, reg
++);
139 if (inputs
->wpos
!= ATTR_UNUSED
) {
140 allocate(mydata
, inputs
->wpos
, reg
++);
144 static void get_external_state(
145 struct r300_context
* r300
,
146 struct r300_fragment_program_external_state
* state
)
148 struct r300_textures_state
*texstate
= r300
->textures_state
.state
;
150 unsigned char *swizzle
;
152 for (i
= 0; i
< texstate
->sampler_state_count
; i
++) {
153 struct r300_sampler_state
* s
= texstate
->sampler_states
[i
];
159 if (s
->state
.compare_mode
== PIPE_TEX_COMPARE_R_TO_TEXTURE
) {
160 state
->unit
[i
].compare_mode_enabled
= 1;
162 /* Pass depth texture swizzling to the compiler. */
163 if (texstate
->sampler_views
[i
]) {
164 swizzle
= texstate
->sampler_views
[i
]->swizzle
;
166 state
->unit
[i
].depth_texture_swizzle
=
167 RC_MAKE_SWIZZLE(swizzle
[0], swizzle
[1],
168 swizzle
[2], swizzle
[3]);
170 state
->unit
[i
].depth_texture_swizzle
= RC_SWIZZLE_XYZW
;
173 /* Fortunately, no need to translate this. */
174 state
->unit
[i
].texture_compare_func
= s
->state
.compare_func
;
177 state
->unit
[i
].non_normalized_coords
= !s
->state
.normalized_coords
;
179 if (texstate
->sampler_views
[i
]) {
180 struct r300_texture
*t
;
181 t
= (struct r300_texture
*)texstate
->sampler_views
[i
]->base
.texture
;
183 /* XXX this should probably take into account STR, not just S. */
184 if (t
->desc
.is_npot
) {
185 switch (s
->state
.wrap_s
) {
186 case PIPE_TEX_WRAP_REPEAT
:
187 state
->unit
[i
].wrap_mode
= RC_WRAP_REPEAT
;
188 state
->unit
[i
].fake_npot
= TRUE
;
191 case PIPE_TEX_WRAP_MIRROR_REPEAT
:
192 state
->unit
[i
].wrap_mode
= RC_WRAP_MIRRORED_REPEAT
;
193 state
->unit
[i
].fake_npot
= TRUE
;
196 case PIPE_TEX_WRAP_MIRROR_CLAMP
:
197 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE
:
198 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER
:
199 state
->unit
[i
].wrap_mode
= RC_WRAP_MIRRORED_CLAMP
;
200 state
->unit
[i
].fake_npot
= TRUE
;
204 state
->unit
[i
].wrap_mode
= RC_WRAP_NONE
;
212 static void r300_translate_fragment_shader(
213 struct r300_context
* r300
,
214 struct r300_fragment_shader_code
* shader
,
215 const struct tgsi_token
*tokens
);
217 static void r300_dummy_fragment_shader(
218 struct r300_context
* r300
,
219 struct r300_fragment_shader_code
* shader
)
221 struct pipe_shader_state state
;
222 struct ureg_program
*ureg
;
226 /* Make a simple fragment shader which outputs (0, 0, 0, 1) */
227 ureg
= ureg_create(TGSI_PROCESSOR_FRAGMENT
);
228 out
= ureg_DECL_output(ureg
, TGSI_SEMANTIC_COLOR
, 0);
229 imm
= ureg_imm4f(ureg
, 0, 0, 0, 1);
231 ureg_MOV(ureg
, out
, imm
);
234 state
.tokens
= ureg_finalize(ureg
);
236 shader
->dummy
= TRUE
;
237 r300_translate_fragment_shader(r300
, shader
, state
.tokens
);
242 static void r300_emit_fs_code_to_buffer(
243 struct r300_context
*r300
,
244 struct r300_fragment_shader_code
*shader
)
246 struct rX00_fragment_program_code
*generic_code
= &shader
->code
;
247 unsigned imm_count
= shader
->immediates_count
;
248 unsigned imm_first
= shader
->externals_count
;
249 unsigned imm_end
= generic_code
->constants
.Count
;
250 struct rc_constant
*constants
= generic_code
->constants
.Constants
;
254 if (r300
->screen
->caps
.is_r500
) {
255 struct r500_fragment_program_code
*code
= &generic_code
->code
.r500
;
257 shader
->cb_code_size
= 19 +
258 ((code
->inst_end
+ 1) * 6) +
260 code
->int_constant_count
* 2;
262 NEW_CB(shader
->cb_code
, shader
->cb_code_size
);
263 OUT_CB_REG(R500_US_CONFIG
, R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO
);
264 OUT_CB_REG(R500_US_PIXSIZE
, code
->max_temp_idx
);
265 OUT_CB_REG(R500_US_FC_CTRL
, code
->us_fc_ctrl
);
266 for(i
= 0; i
< code
->int_constant_count
; i
++){
267 OUT_CB_REG(R500_US_FC_INT_CONST_0
+ (i
* 4),
268 code
->int_constants
[i
]);
270 OUT_CB_REG(R500_US_CODE_RANGE
,
271 R500_US_CODE_RANGE_ADDR(0) | R500_US_CODE_RANGE_SIZE(code
->inst_end
));
272 OUT_CB_REG(R500_US_CODE_OFFSET
, 0);
273 OUT_CB_REG(R500_US_CODE_ADDR
,
274 R500_US_CODE_START_ADDR(0) | R500_US_CODE_END_ADDR(code
->inst_end
));
276 OUT_CB_REG(R500_GA_US_VECTOR_INDEX
, R500_GA_US_VECTOR_INDEX_TYPE_INSTR
);
277 OUT_CB_ONE_REG(R500_GA_US_VECTOR_DATA
, (code
->inst_end
+ 1) * 6);
278 for (i
= 0; i
<= code
->inst_end
; i
++) {
279 OUT_CB(code
->inst
[i
].inst0
);
280 OUT_CB(code
->inst
[i
].inst1
);
281 OUT_CB(code
->inst
[i
].inst2
);
282 OUT_CB(code
->inst
[i
].inst3
);
283 OUT_CB(code
->inst
[i
].inst4
);
284 OUT_CB(code
->inst
[i
].inst5
);
287 /* Emit immediates. */
289 for(i
= imm_first
; i
< imm_end
; ++i
) {
290 if (constants
[i
].Type
== RC_CONSTANT_IMMEDIATE
) {
291 const float *data
= constants
[i
].u
.Immediate
;
293 OUT_CB_REG(R500_GA_US_VECTOR_INDEX
,
294 R500_GA_US_VECTOR_INDEX_TYPE_CONST
|
295 (i
& R500_GA_US_VECTOR_INDEX_MASK
));
296 OUT_CB_ONE_REG(R500_GA_US_VECTOR_DATA
, 4);
297 OUT_CB_TABLE(data
, 4);
302 struct r300_fragment_program_code
*code
= &generic_code
->code
.r300
;
304 shader
->cb_code_size
= 19 +
305 (r300
->screen
->caps
.is_r400
? 2 : 0) +
306 code
->alu
.length
* 4 +
307 (code
->tex
.length
? (1 + code
->tex
.length
) : 0) +
310 NEW_CB(shader
->cb_code
, shader
->cb_code_size
);
312 if (r300
->screen
->caps
.is_r400
)
313 OUT_CB_REG(R400_US_CODE_BANK
, 0);
315 OUT_CB_REG(R300_US_CONFIG
, code
->config
);
316 OUT_CB_REG(R300_US_PIXSIZE
, code
->pixsize
);
317 OUT_CB_REG(R300_US_CODE_OFFSET
, code
->code_offset
);
319 OUT_CB_REG_SEQ(R300_US_CODE_ADDR_0
, 4);
320 OUT_CB_TABLE(code
->code_addr
, 4);
322 OUT_CB_REG_SEQ(R300_US_ALU_RGB_INST_0
, code
->alu
.length
);
323 for (i
= 0; i
< code
->alu
.length
; i
++)
324 OUT_CB(code
->alu
.inst
[i
].rgb_inst
);
326 OUT_CB_REG_SEQ(R300_US_ALU_RGB_ADDR_0
, code
->alu
.length
);
327 for (i
= 0; i
< code
->alu
.length
; i
++)
328 OUT_CB(code
->alu
.inst
[i
].rgb_addr
);
330 OUT_CB_REG_SEQ(R300_US_ALU_ALPHA_INST_0
, code
->alu
.length
);
331 for (i
= 0; i
< code
->alu
.length
; i
++)
332 OUT_CB(code
->alu
.inst
[i
].alpha_inst
);
334 OUT_CB_REG_SEQ(R300_US_ALU_ALPHA_ADDR_0
, code
->alu
.length
);
335 for (i
= 0; i
< code
->alu
.length
; i
++)
336 OUT_CB(code
->alu
.inst
[i
].alpha_addr
);
338 if (code
->tex
.length
) {
339 OUT_CB_REG_SEQ(R300_US_TEX_INST_0
, code
->tex
.length
);
340 OUT_CB_TABLE(code
->tex
.inst
, code
->tex
.length
);
343 /* Emit immediates. */
345 for(i
= imm_first
; i
< imm_end
; ++i
) {
346 if (constants
[i
].Type
== RC_CONSTANT_IMMEDIATE
) {
347 const float *data
= constants
[i
].u
.Immediate
;
349 OUT_CB_REG_SEQ(R300_PFS_PARAM_0_X
+ i
* 16, 4);
350 OUT_CB(pack_float24(data
[0]));
351 OUT_CB(pack_float24(data
[1]));
352 OUT_CB(pack_float24(data
[2]));
353 OUT_CB(pack_float24(data
[3]));
359 OUT_CB_REG(R300_FG_DEPTH_SRC
, shader
->fg_depth_src
);
360 OUT_CB_REG(R300_US_W_FMT
, shader
->us_out_w
);
364 static void r300_translate_fragment_shader(
365 struct r300_context
* r300
,
366 struct r300_fragment_shader_code
* shader
,
367 const struct tgsi_token
*tokens
)
369 struct r300_fragment_program_compiler compiler
;
370 struct tgsi_to_rc ttr
;
374 tgsi_scan_shader(tokens
, &shader
->info
);
375 r300_shader_read_fs_inputs(&shader
->info
, &shader
->inputs
);
377 wpos
= shader
->inputs
.wpos
;
378 face
= shader
->inputs
.face
;
380 /* Setup the compiler. */
381 memset(&compiler
, 0, sizeof(compiler
));
382 rc_init(&compiler
.Base
);
383 compiler
.Base
.Debug
= DBG_ON(r300
, DBG_FP
);
385 compiler
.code
= &shader
->code
;
386 compiler
.state
= shader
->compare_state
;
387 compiler
.Base
.is_r500
= r300
->screen
->caps
.is_r500
;
388 compiler
.Base
.disable_optimizations
= DBG_ON(r300
, DBG_NO_OPT
);
389 compiler
.Base
.has_half_swizzles
= TRUE
;
390 compiler
.Base
.has_presub
= TRUE
;
391 compiler
.Base
.max_temp_regs
= compiler
.Base
.is_r500
? 128 : 32;
392 compiler
.Base
.max_constants
= compiler
.Base
.is_r500
? 256 : 32;
393 compiler
.Base
.max_alu_insts
= compiler
.Base
.is_r500
? 512 : 64;
394 compiler
.Base
.remove_unused_constants
= TRUE
;
395 compiler
.AllocateHwInputs
= &allocate_hardware_inputs
;
396 compiler
.UserData
= &shader
->inputs
;
398 find_output_registers(&compiler
, shader
);
400 if (compiler
.Base
.Debug
) {
401 DBG(r300
, DBG_FP
, "r300: Initial fragment program\n");
402 tgsi_dump(tokens
, 0);
405 /* Translate TGSI to our internal representation */
406 ttr
.compiler
= &compiler
.Base
;
407 ttr
.info
= &shader
->info
;
408 ttr
.use_half_swizzles
= TRUE
;
410 r300_tgsi_to_rc(&ttr
, tokens
);
413 * Transform the program to support WPOS.
415 * Introduce a small fragment at the start of the program that will be
416 * the only code that directly reads the WPOS input.
417 * All other code pieces that reference that input will be rewritten
418 * to read from a newly allocated temporary. */
419 if (wpos
!= ATTR_UNUSED
) {
420 /* Moving the input to some other reg is not really necessary. */
421 rc_transform_fragment_wpos(&compiler
.Base
, wpos
, wpos
, TRUE
);
424 if (face
!= ATTR_UNUSED
) {
425 rc_transform_fragment_face(&compiler
.Base
, face
);
428 /* Invoke the compiler */
429 r3xx_compile_fragment_program(&compiler
);
431 if (compiler
.Base
.Error
) {
432 fprintf(stderr
, "r300 FP: Compiler Error:\n%sUsing a dummy shader"
433 " instead.\n", compiler
.Base
.ErrorMsg
);
436 fprintf(stderr
, "r300 FP: Cannot compile the dummy shader! "
441 rc_destroy(&compiler
.Base
);
442 r300_dummy_fragment_shader(r300
, shader
);
446 /* Shaders with zero instructions are invalid,
447 * use the dummy shader instead. */
448 if (shader
->code
.code
.r500
.inst_end
== -1) {
449 rc_destroy(&compiler
.Base
);
450 r300_dummy_fragment_shader(r300
, shader
);
454 /* Initialize numbers of constants for each type. */
455 shader
->externals_count
= 0;
457 i
< shader
->code
.constants
.Count
&&
458 shader
->code
.constants
.Constants
[i
].Type
== RC_CONSTANT_EXTERNAL
; i
++) {
459 shader
->externals_count
= i
+1;
461 shader
->immediates_count
= 0;
462 shader
->rc_state_count
= 0;
464 for (i
= shader
->externals_count
; i
< shader
->code
.constants
.Count
; i
++) {
465 switch (shader
->code
.constants
.Constants
[i
].Type
) {
466 case RC_CONSTANT_IMMEDIATE
:
467 ++shader
->immediates_count
;
469 case RC_CONSTANT_STATE
:
470 ++shader
->rc_state_count
;
477 /* Setup shader depth output. */
478 if (shader
->code
.writes_depth
) {
479 shader
->fg_depth_src
= R300_FG_DEPTH_SRC_SHADER
;
480 shader
->us_out_w
= R300_W_FMT_W24
| R300_W_SRC_US
;
482 shader
->fg_depth_src
= R300_FG_DEPTH_SRC_SCAN
;
483 shader
->us_out_w
= R300_W_FMT_W0
| R300_W_SRC_US
;
486 /* And, finally... */
487 rc_destroy(&compiler
.Base
);
489 /* Build the command buffer. */
490 r300_emit_fs_code_to_buffer(r300
, shader
);
493 boolean
r300_pick_fragment_shader(struct r300_context
* r300
)
495 struct r300_fragment_shader
* fs
= r300_fs(r300
);
496 struct r300_fragment_program_external_state state
= {{{ 0 }}};
497 struct r300_fragment_shader_code
* ptr
;
499 get_external_state(r300
, &state
);
502 /* Build the fragment shader for the first time. */
503 fs
->first
= fs
->shader
= CALLOC_STRUCT(r300_fragment_shader_code
);
505 memcpy(&fs
->shader
->compare_state
, &state
,
506 sizeof(struct r300_fragment_program_external_state
));
507 r300_translate_fragment_shader(r300
, fs
->shader
, fs
->state
.tokens
);
511 /* Check if the currently-bound shader has been compiled
512 * with the texture-compare state we need. */
513 if (memcmp(&fs
->shader
->compare_state
, &state
, sizeof(state
)) != 0) {
514 /* Search for the right shader. */
517 if (memcmp(&ptr
->compare_state
, &state
, sizeof(state
)) == 0) {
518 if (fs
->shader
!= ptr
) {
522 /* The currently-bound one is OK. */
528 /* Not found, gotta compile a new one. */
529 ptr
= CALLOC_STRUCT(r300_fragment_shader_code
);
530 ptr
->next
= fs
->first
;
531 fs
->first
= fs
->shader
= ptr
;
533 ptr
->compare_state
= state
;
534 r300_translate_fragment_shader(r300
, ptr
, fs
->state
.tokens
);