2 * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
3 * Joakim Sindholt <opensource@zhasha.com>
4 * Copyright 2009 Marek Olšák <maraeo@gmail.com>
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * on the rights to use, copy, modify, merge, publish, distribute, sub
10 * license, and/or sell copies of the Software, and to permit persons to whom
11 * the Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
21 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
22 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
23 * USE OR OTHER DEALINGS IN THE SOFTWARE. */
25 #include "util/u_math.h"
26 #include "util/u_memory.h"
28 #include "tgsi/tgsi_dump.h"
29 #include "tgsi/tgsi_ureg.h"
32 #include "r300_context.h"
33 #include "r300_emit.h"
34 #include "r300_screen.h"
37 #include "r300_tgsi_to_rc.h"
39 #include "radeon_code.h"
40 #include "radeon_compiler.h"
42 /* Convert info about FS input semantics to r300_shader_semantics. */
43 void r300_shader_read_fs_inputs(struct tgsi_shader_info
* info
,
44 struct r300_shader_semantics
* fs_inputs
)
49 r300_shader_semantics_reset(fs_inputs
);
51 for (i
= 0; i
< info
->num_inputs
; i
++) {
52 index
= info
->input_semantic_index
[i
];
54 switch (info
->input_semantic_name
[i
]) {
55 case TGSI_SEMANTIC_COLOR
:
56 assert(index
< ATTR_COLOR_COUNT
);
57 fs_inputs
->color
[index
] = i
;
60 case TGSI_SEMANTIC_GENERIC
:
61 assert(index
< ATTR_GENERIC_COUNT
);
62 fs_inputs
->generic
[index
] = i
;
65 case TGSI_SEMANTIC_FOG
:
70 case TGSI_SEMANTIC_POSITION
:
76 fprintf(stderr
, "r300: FP: Unknown input semantic: %i\n",
77 info
->input_semantic_name
[i
]);
82 static void find_output_registers(struct r300_fragment_program_compiler
* compiler
,
83 struct r300_fragment_shader_code
*shader
)
85 unsigned i
, colorbuf_count
= 0;
87 /* Mark the outputs as not present initially */
88 compiler
->OutputColor
[0] = shader
->info
.num_outputs
;
89 compiler
->OutputColor
[1] = shader
->info
.num_outputs
;
90 compiler
->OutputColor
[2] = shader
->info
.num_outputs
;
91 compiler
->OutputColor
[3] = shader
->info
.num_outputs
;
92 compiler
->OutputDepth
= shader
->info
.num_outputs
;
94 /* Now see where they really are. */
95 for(i
= 0; i
< shader
->info
.num_outputs
; ++i
) {
96 switch(shader
->info
.output_semantic_name
[i
]) {
97 case TGSI_SEMANTIC_COLOR
:
98 compiler
->OutputColor
[colorbuf_count
] = i
;
101 case TGSI_SEMANTIC_POSITION
:
102 compiler
->OutputDepth
= i
;
108 static void allocate_hardware_inputs(
109 struct r300_fragment_program_compiler
* c
,
110 void (*allocate
)(void * data
, unsigned input
, unsigned hwreg
),
113 struct r300_shader_semantics
* inputs
=
114 (struct r300_shader_semantics
*)c
->UserData
;
117 /* Allocate input registers. */
118 for (i
= 0; i
< ATTR_COLOR_COUNT
; i
++) {
119 if (inputs
->color
[i
] != ATTR_UNUSED
) {
120 allocate(mydata
, inputs
->color
[i
], reg
++);
123 for (i
= 0; i
< ATTR_GENERIC_COUNT
; i
++) {
124 if (inputs
->generic
[i
] != ATTR_UNUSED
) {
125 allocate(mydata
, inputs
->generic
[i
], reg
++);
128 if (inputs
->fog
!= ATTR_UNUSED
) {
129 allocate(mydata
, inputs
->fog
, reg
++);
131 if (inputs
->wpos
!= ATTR_UNUSED
) {
132 allocate(mydata
, inputs
->wpos
, reg
++);
136 static void get_external_state(
137 struct r300_context
* r300
,
138 struct r300_fragment_program_external_state
* state
)
140 struct r300_textures_state
*texstate
= r300
->textures_state
.state
;
142 unsigned char *swizzle
;
144 for (i
= 0; i
< texstate
->sampler_state_count
; i
++) {
145 struct r300_sampler_state
* s
= texstate
->sampler_states
[i
];
151 if (s
->state
.compare_mode
== PIPE_TEX_COMPARE_R_TO_TEXTURE
) {
152 state
->unit
[i
].compare_mode_enabled
= 1;
154 /* Pass depth texture swizzling to the compiler. */
155 if (texstate
->sampler_views
[i
]) {
156 swizzle
= texstate
->sampler_views
[i
]->swizzle
;
158 state
->unit
[i
].depth_texture_swizzle
=
159 RC_MAKE_SWIZZLE(swizzle
[0], swizzle
[1],
160 swizzle
[2], swizzle
[3]);
162 state
->unit
[i
].depth_texture_swizzle
= RC_SWIZZLE_XYZW
;
165 /* Fortunately, no need to translate this. */
166 state
->unit
[i
].texture_compare_func
= s
->state
.compare_func
;
169 state
->unit
[i
].non_normalized_coords
= !s
->state
.normalized_coords
;
171 if (texstate
->sampler_views
[i
]) {
172 struct r300_texture
*t
;
173 t
= (struct r300_texture
*)texstate
->sampler_views
[i
]->base
.texture
;
175 /* XXX this should probably take into account STR, not just S. */
177 switch (s
->state
.wrap_s
) {
178 case PIPE_TEX_WRAP_REPEAT
:
179 state
->unit
[i
].wrap_mode
= RC_WRAP_REPEAT
;
180 state
->unit
[i
].fake_npot
= TRUE
;
183 case PIPE_TEX_WRAP_MIRROR_REPEAT
:
184 state
->unit
[i
].wrap_mode
= RC_WRAP_MIRRORED_REPEAT
;
185 state
->unit
[i
].fake_npot
= TRUE
;
188 case PIPE_TEX_WRAP_MIRROR_CLAMP
:
189 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE
:
190 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER
:
191 state
->unit
[i
].wrap_mode
= RC_WRAP_MIRRORED_CLAMP
;
192 state
->unit
[i
].fake_npot
= TRUE
;
196 state
->unit
[i
].wrap_mode
= RC_WRAP_NONE
;
204 static void r300_translate_fragment_shader(
205 struct r300_context
* r300
,
206 struct r300_fragment_shader_code
* shader
,
207 const struct tgsi_token
*tokens
);
209 static void r300_dummy_fragment_shader(
210 struct r300_context
* r300
,
211 struct r300_fragment_shader_code
* shader
)
213 struct pipe_shader_state state
;
214 struct ureg_program
*ureg
;
218 /* Make a simple fragment shader which outputs (0, 0, 0, 1) */
219 ureg
= ureg_create(TGSI_PROCESSOR_FRAGMENT
);
220 out
= ureg_DECL_output(ureg
, TGSI_SEMANTIC_COLOR
, 0);
221 imm
= ureg_imm4f(ureg
, 0, 0, 0, 1);
223 ureg_MOV(ureg
, out
, imm
);
226 state
.tokens
= ureg_finalize(ureg
);
228 shader
->dummy
= TRUE
;
229 r300_translate_fragment_shader(r300
, shader
, state
.tokens
);
234 static void r300_emit_fs_code_to_buffer(
235 struct r300_context
*r300
,
236 struct r300_fragment_shader_code
*shader
)
238 struct rX00_fragment_program_code
*generic_code
= &shader
->code
;
239 unsigned imm_count
= shader
->immediates_count
;
240 unsigned imm_first
= shader
->externals_count
;
241 unsigned imm_end
= generic_code
->constants
.Count
;
242 struct rc_constant
*constants
= generic_code
->constants
.Constants
;
246 if (r300
->screen
->caps
.is_r500
) {
247 struct r500_fragment_program_code
*code
= &generic_code
->code
.r500
;
249 shader
->cb_code_size
= 17 +
250 ((code
->inst_end
+ 1) * 6) +
253 NEW_CB(shader
->cb_code
, shader
->cb_code_size
);
254 OUT_CB_REG(R500_US_CONFIG
, R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO
);
255 OUT_CB_REG(R500_US_PIXSIZE
, code
->max_temp_idx
);
256 OUT_CB_REG(R500_US_CODE_RANGE
,
257 R500_US_CODE_RANGE_ADDR(0) | R500_US_CODE_RANGE_SIZE(code
->inst_end
));
258 OUT_CB_REG(R500_US_CODE_OFFSET
, 0);
259 OUT_CB_REG(R500_US_CODE_ADDR
,
260 R500_US_CODE_START_ADDR(0) | R500_US_CODE_END_ADDR(code
->inst_end
));
262 OUT_CB_REG(R500_GA_US_VECTOR_INDEX
, R500_GA_US_VECTOR_INDEX_TYPE_INSTR
);
263 OUT_CB_ONE_REG(R500_GA_US_VECTOR_DATA
, (code
->inst_end
+ 1) * 6);
264 for (i
= 0; i
<= code
->inst_end
; i
++) {
265 OUT_CB(code
->inst
[i
].inst0
);
266 OUT_CB(code
->inst
[i
].inst1
);
267 OUT_CB(code
->inst
[i
].inst2
);
268 OUT_CB(code
->inst
[i
].inst3
);
269 OUT_CB(code
->inst
[i
].inst4
);
270 OUT_CB(code
->inst
[i
].inst5
);
273 /* Emit immediates. */
275 for(i
= imm_first
; i
< imm_end
; ++i
) {
276 if (constants
[i
].Type
== RC_CONSTANT_IMMEDIATE
) {
277 const float *data
= constants
[i
].u
.Immediate
;
279 OUT_CB_REG(R500_GA_US_VECTOR_INDEX
,
280 R500_GA_US_VECTOR_INDEX_TYPE_CONST
|
281 (i
& R500_GA_US_VECTOR_INDEX_MASK
));
282 OUT_CB_ONE_REG(R500_GA_US_VECTOR_DATA
, 4);
283 OUT_CB_TABLE(data
, 4);
288 struct r300_fragment_program_code
*code
= &generic_code
->code
.r300
;
290 shader
->cb_code_size
= 19 +
291 code
->alu
.length
* 4 +
292 (code
->tex
.length
? (1 + code
->tex
.length
) : 0) +
295 NEW_CB(shader
->cb_code
, shader
->cb_code_size
);
296 OUT_CB_REG(R300_US_CONFIG
, code
->config
);
297 OUT_CB_REG(R300_US_PIXSIZE
, code
->pixsize
);
298 OUT_CB_REG(R300_US_CODE_OFFSET
, code
->code_offset
);
300 OUT_CB_REG_SEQ(R300_US_CODE_ADDR_0
, 4);
301 OUT_CB_TABLE(code
->code_addr
, 4);
303 OUT_CB_REG_SEQ(R300_US_ALU_RGB_INST_0
, code
->alu
.length
);
304 for (i
= 0; i
< code
->alu
.length
; i
++)
305 OUT_CB(code
->alu
.inst
[i
].rgb_inst
);
307 OUT_CB_REG_SEQ(R300_US_ALU_RGB_ADDR_0
, code
->alu
.length
);
308 for (i
= 0; i
< code
->alu
.length
; i
++)
309 OUT_CB(code
->alu
.inst
[i
].rgb_addr
);
311 OUT_CB_REG_SEQ(R300_US_ALU_ALPHA_INST_0
, code
->alu
.length
);
312 for (i
= 0; i
< code
->alu
.length
; i
++)
313 OUT_CB(code
->alu
.inst
[i
].alpha_inst
);
315 OUT_CB_REG_SEQ(R300_US_ALU_ALPHA_ADDR_0
, code
->alu
.length
);
316 for (i
= 0; i
< code
->alu
.length
; i
++)
317 OUT_CB(code
->alu
.inst
[i
].alpha_addr
);
319 if (code
->tex
.length
) {
320 OUT_CB_REG_SEQ(R300_US_TEX_INST_0
, code
->tex
.length
);
321 OUT_CB_TABLE(code
->tex
.inst
, code
->tex
.length
);
324 /* Emit immediates. */
326 for(i
= imm_first
; i
< imm_end
; ++i
) {
327 if (constants
[i
].Type
== RC_CONSTANT_IMMEDIATE
) {
328 const float *data
= constants
[i
].u
.Immediate
;
330 OUT_CB_REG_SEQ(R300_PFS_PARAM_0_X
+ i
* 16, 4);
331 OUT_CB(pack_float24(data
[0]));
332 OUT_CB(pack_float24(data
[1]));
333 OUT_CB(pack_float24(data
[2]));
334 OUT_CB(pack_float24(data
[3]));
340 OUT_CB_REG(R300_FG_DEPTH_SRC
, shader
->fg_depth_src
);
341 OUT_CB_REG(R300_US_W_FMT
, shader
->us_out_w
);
345 static void r300_translate_fragment_shader(
346 struct r300_context
* r300
,
347 struct r300_fragment_shader_code
* shader
,
348 const struct tgsi_token
*tokens
)
350 struct r300_fragment_program_compiler compiler
;
351 struct tgsi_to_rc ttr
;
355 tgsi_scan_shader(tokens
, &shader
->info
);
356 r300_shader_read_fs_inputs(&shader
->info
, &shader
->inputs
);
358 wpos
= shader
->inputs
.wpos
;
360 /* Setup the compiler. */
361 memset(&compiler
, 0, sizeof(compiler
));
362 rc_init(&compiler
.Base
);
363 compiler
.Base
.Debug
= DBG_ON(r300
, DBG_FP
);
365 compiler
.code
= &shader
->code
;
366 compiler
.state
= shader
->compare_state
;
367 compiler
.Base
.is_r500
= r300
->screen
->caps
.is_r500
;
368 compiler
.Base
.max_temp_regs
= compiler
.Base
.is_r500
? 128 : 32;
369 compiler
.AllocateHwInputs
= &allocate_hardware_inputs
;
370 compiler
.UserData
= &shader
->inputs
;
372 find_output_registers(&compiler
, shader
);
374 if (compiler
.Base
.Debug
) {
375 debug_printf("r300: Initial fragment program\n");
376 tgsi_dump(tokens
, 0);
379 /* Translate TGSI to our internal representation */
380 ttr
.compiler
= &compiler
.Base
;
381 ttr
.info
= &shader
->info
;
382 ttr
.use_half_swizzles
= TRUE
;
384 r300_tgsi_to_rc(&ttr
, tokens
);
387 * Transform the program to support WPOS.
389 * Introduce a small fragment at the start of the program that will be
390 * the only code that directly reads the WPOS input.
391 * All other code pieces that reference that input will be rewritten
392 * to read from a newly allocated temporary. */
393 if (wpos
!= ATTR_UNUSED
) {
394 /* Moving the input to some other reg is not really necessary. */
395 rc_transform_fragment_wpos(&compiler
.Base
, wpos
, wpos
, TRUE
);
398 /* Invoke the compiler */
399 r3xx_compile_fragment_program(&compiler
);
401 /* Shaders with zero instructions are invalid,
402 * use the dummy shader instead. */
403 if (shader
->code
.code
.r500
.inst_end
== -1) {
404 rc_destroy(&compiler
.Base
);
405 r300_dummy_fragment_shader(r300
, shader
);
409 if (compiler
.Base
.Error
) {
410 fprintf(stderr
, "r300 FP: Compiler Error:\n%sUsing a dummy shader"
411 " instead.\nIf there's an 'unknown opcode' message, please"
412 " file a bug report and attach this log.\n", compiler
.Base
.ErrorMsg
);
415 fprintf(stderr
, "r300 FP: Cannot compile the dummy shader! "
420 rc_destroy(&compiler
.Base
);
421 r300_dummy_fragment_shader(r300
, shader
);
425 /* Initialize numbers of constants for each type. */
426 shader
->externals_count
= ttr
.immediate_offset
;
427 shader
->immediates_count
= 0;
428 shader
->rc_state_count
= 0;
430 for (i
= shader
->externals_count
; i
< shader
->code
.constants
.Count
; i
++) {
431 switch (shader
->code
.constants
.Constants
[i
].Type
) {
432 case RC_CONSTANT_IMMEDIATE
:
433 ++shader
->immediates_count
;
435 case RC_CONSTANT_STATE
:
436 ++shader
->rc_state_count
;
443 /* Setup shader depth output. */
444 if (shader
->code
.writes_depth
) {
445 shader
->fg_depth_src
= R300_FG_DEPTH_SRC_SHADER
;
446 shader
->us_out_w
= R300_W_FMT_W24
| R300_W_SRC_US
;
448 shader
->fg_depth_src
= R300_FG_DEPTH_SRC_SCAN
;
449 shader
->us_out_w
= R300_W_FMT_W0
| R300_W_SRC_US
;
452 /* And, finally... */
453 rc_destroy(&compiler
.Base
);
455 /* Build the command buffer. */
456 r300_emit_fs_code_to_buffer(r300
, shader
);
459 boolean
r300_pick_fragment_shader(struct r300_context
* r300
)
461 struct r300_fragment_shader
* fs
= r300_fs(r300
);
462 struct r300_fragment_program_external_state state
= {{{ 0 }}};
463 struct r300_fragment_shader_code
* ptr
;
465 get_external_state(r300
, &state
);
468 /* Build the fragment shader for the first time. */
469 fs
->first
= fs
->shader
= CALLOC_STRUCT(r300_fragment_shader_code
);
471 memcpy(&fs
->shader
->compare_state
, &state
,
472 sizeof(struct r300_fragment_program_external_state
));
473 r300_translate_fragment_shader(r300
, fs
->shader
, fs
->state
.tokens
);
477 /* Check if the currently-bound shader has been compiled
478 * with the texture-compare state we need. */
479 if (memcmp(&fs
->shader
->compare_state
, &state
, sizeof(state
)) != 0) {
480 /* Search for the right shader. */
483 if (memcmp(&ptr
->compare_state
, &state
, sizeof(state
)) == 0) {
484 if (fs
->shader
!= ptr
) {
488 /* The currently-bound one is OK. */
494 /* Not found, gotta compile a new one. */
495 ptr
= CALLOC_STRUCT(r300_fragment_shader_code
);
496 ptr
->next
= fs
->first
;
497 fs
->first
= fs
->shader
= ptr
;
499 ptr
->compare_state
= state
;
500 r300_translate_fragment_shader(r300
, ptr
, fs
->state
.tokens
);