r300-gallium: r500-fs: Add writemasks and some flexibility for MOV/SWZ.
[mesa.git] / src / gallium / drivers / r300 / r300_state_shader.c
1 /*
2 * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE. */
22
23 #include "r300_state_shader.h"
24
25 static void r300_copy_passthrough_shader(struct r300_fragment_shader* fs)
26 {
27 struct r300_fragment_shader* pt = &r300_passthrough_fragment_shader;
28 fs->shader.stack_size = pt->shader.stack_size;
29 fs->alu_instruction_count = pt->alu_instruction_count;
30 fs->tex_instruction_count = pt->tex_instruction_count;
31 fs->indirections = pt->indirections;
32 fs->instructions[0] = pt->instructions[0];
33 }
34
35 static void r500_copy_passthrough_shader(struct r500_fragment_shader* fs)
36 {
37 struct r500_fragment_shader* pt = &r500_passthrough_fragment_shader;
38 fs->shader.stack_size = pt->shader.stack_size;
39 fs->instruction_count = pt->instruction_count;
40 fs->instructions[0] = pt->instructions[0];
41 }
42
43 static void r300_fs_declare(struct r300_fs_asm* assembler,
44 struct tgsi_full_declaration* decl)
45 {
46 switch (decl->Declaration.File) {
47 case TGSI_FILE_INPUT:
48 switch (decl->Semantic.SemanticName) {
49 case TGSI_SEMANTIC_COLOR:
50 assembler->color_count++;
51 break;
52 case TGSI_SEMANTIC_GENERIC:
53 assembler->tex_count++;
54 break;
55 default:
56 debug_printf("r300: fs: Bad semantic declaration %d\n",
57 decl->Semantic.SemanticName);
58 break;
59 }
60 break;
61 case TGSI_FILE_OUTPUT:
62 break;
63 case TGSI_FILE_TEMPORARY:
64 assembler->temp_count++;
65 break;
66 default:
67 debug_printf("r300: fs: Bad file %d\n", decl->Declaration.File);
68 break;
69 }
70
71 assembler->temp_offset = assembler->color_count + assembler->tex_count;
72 }
73
74 static INLINE unsigned r300_fs_src(struct r300_fs_asm* assembler,
75 struct tgsi_src_register* src)
76 {
77 switch (src->File) {
78 case TGSI_FILE_INPUT:
79 /* XXX may be wrong */
80 return src->Index;
81 break;
82 case TGSI_FILE_TEMPORARY:
83 return src->Index + assembler->temp_offset;
84 break;
85 default:
86 debug_printf("r300: fs: Unimplemented src %d\n", src->File);
87 break;
88 }
89 return 0;
90 }
91
92 static INLINE unsigned r300_fs_dst(struct r300_fs_asm* assembler,
93 struct tgsi_dst_register* dst)
94 {
95 switch (dst->File) {
96 case TGSI_FILE_OUTPUT:
97 return 0;
98 break;
99 case TGSI_FILE_TEMPORARY:
100 return dst->Index + assembler->temp_offset;
101 break;
102 default:
103 debug_printf("r300: fs: Unimplemented dst %d\n", dst->File);
104 break;
105 }
106 return 0;
107 }
108
109 static INLINE unsigned r500_fix_swiz(unsigned s)
110 {
111 /* For historical reasons, the swizzle values x, y, z, w, and 0 are
112 * equivalent to the actual machine code, but 1 is not. Thus, we just
113 * adjust it a bit... */
114 if (s == TGSI_EXTSWIZZLE_ONE) {
115 return R500_SWIZZLE_ONE;
116 } else {
117 return s;
118 }
119 }
120
121 static uint32_t r500_rgba_swiz(struct tgsi_full_src_register* reg)
122 {
123 if (reg->SrcRegister.Extended) {
124 return r500_fix_swiz(reg->SrcRegisterExtSwz.ExtSwizzleX) |
125 (r500_fix_swiz(reg->SrcRegisterExtSwz.ExtSwizzleY) << 3) |
126 (r500_fix_swiz(reg->SrcRegisterExtSwz.ExtSwizzleZ) << 6) |
127 (r500_fix_swiz(reg->SrcRegisterExtSwz.ExtSwizzleW) << 9);
128 } else {
129 return reg->SrcRegister.SwizzleX |
130 (reg->SrcRegister.SwizzleY << 3) |
131 (reg->SrcRegister.SwizzleZ << 6) |
132 (reg->SrcRegister.SwizzleW << 9);
133 }
134 }
135
136 static uint32_t r500_strq_swiz(struct tgsi_full_src_register* reg)
137 {
138 return reg->SrcRegister.SwizzleX |
139 (reg->SrcRegister.SwizzleY << 2) |
140 (reg->SrcRegister.SwizzleZ << 4) |
141 (reg->SrcRegister.SwizzleW << 6);
142 }
143
144 static INLINE uint32_t r500_rgb_swiz(struct tgsi_full_src_register* reg)
145 {
146 /* Only the first 9 bits... */
147 return r500_rgba_swiz(reg) & 0x1ff;
148 }
149
150 static INLINE uint32_t r500_alpha_swiz(struct tgsi_full_src_register* reg)
151 {
152 /* Only the last 3 bits... */
153 return r500_rgba_swiz(reg) >> 9;
154 }
155
156 static INLINE void r500_emit_mov(struct r500_fragment_shader* fs,
157 struct r300_fs_asm* assembler,
158 struct tgsi_full_src_register* src,
159 struct tgsi_full_dst_register* dst)
160 {
161 int i = fs->instruction_count;
162
163 if (dst->DstRegister.File == TGSI_FILE_OUTPUT) {
164 fs->instructions[i].inst0 = R500_INST_TYPE_OUT |
165 R500_ALU_OMASK(dst->DstRegister.WriteMask);
166 } else {
167 fs->instructions[i].inst0 = R500_INST_TYPE_ALU |
168 R500_ALU_WMASK(dst->DstRegister.WriteMask);
169 }
170
171 fs->instructions[i].inst0 |=
172 R500_INST_TEX_SEM_WAIT |
173 R500_INST_RGB_CLAMP | R500_INST_ALPHA_CLAMP;
174
175 fs->instructions[i].inst1 =
176 R500_RGB_ADDR0(r300_fs_src(assembler, &src->SrcRegister));
177 fs->instructions[i].inst2 =
178 R500_ALPHA_ADDR0(r300_fs_src(assembler, &src->SrcRegister));
179 fs->instructions[i].inst3 = R500_ALU_RGB_SEL_A_SRC0 |
180 R500_SWIZ_RGB_A(r500_rgb_swiz(src)) |
181 R500_ALU_RGB_SEL_B_SRC0 |
182 R500_SWIZ_RGB_B(r500_rgb_swiz(src));
183 fs->instructions[i].inst4 = R500_ALPHA_OP_CMP |
184 R500_SWIZ_ALPHA_A(r500_alpha_swiz(src)) |
185 R500_SWIZ_ALPHA_B(r500_alpha_swiz(src));
186 fs->instructions[i].inst5 =
187 R500_ALU_RGBA_OP_CMP | R500_ALU_RGBA_R_SWIZ_0 |
188 R500_ALU_RGBA_G_SWIZ_0 | R500_ALU_RGBA_B_SWIZ_0 |
189 R500_ALU_RGBA_A_SWIZ_0;
190
191 fs->instruction_count++;
192 }
193
194 static INLINE void r500_emit_tex(struct r500_fragment_shader* fs,
195 struct r300_fs_asm* assembler,
196 uint32_t op,
197 struct tgsi_full_src_register* src,
198 struct tgsi_full_dst_register* dst)
199 {
200 int i = fs->instruction_count;
201
202 fs->instructions[i].inst0 = R500_INST_TYPE_TEX |
203 R500_TEX_WMASK(dst->DstRegister.WriteMask) |
204 R500_INST_TEX_SEM_WAIT;
205 fs->instructions[i].inst1 = R500_TEX_ID(0) |
206 R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED |
207 R500_TEX_INST_PROJ;
208 fs->instructions[i].inst2 =
209 R500_TEX_SRC_ADDR(r300_fs_src(assembler, &src->SrcRegister)) |
210 R500_SWIZ_TEX_STRQ(r500_strq_swiz(src)) |
211 R500_TEX_DST_ADDR(r300_fs_dst(assembler, &dst->DstRegister)) |
212 R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G |
213 R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A;
214
215 fs->instruction_count++;
216 }
217
218 static void r500_fs_instruction(struct r500_fragment_shader* fs,
219 struct r300_fs_asm* assembler,
220 struct tgsi_full_instruction* inst)
221 {
222 /* Switch between opcodes. When possible, prefer using the official
223 * AMD/ATI names for opcodes, please, as it facilitates using the
224 * documentation. */
225 switch (inst->Instruction.Opcode) {
226 case TGSI_OPCODE_MOV:
227 case TGSI_OPCODE_SWZ:
228 r500_emit_mov(fs, assembler, &inst->FullSrcRegisters[0],
229 &inst->FullDstRegisters[0]);
230 break;
231 case TGSI_OPCODE_TXP:
232 r500_emit_tex(fs, assembler, 0, &inst->FullSrcRegisters[0],
233 &inst->FullDstRegisters[0]);
234 break;
235 case TGSI_OPCODE_END:
236 break;
237 default:
238 debug_printf("r300: fs: Bad opcode %d\n",
239 inst->Instruction.Opcode);
240 break;
241 }
242 }
243
244 static void r500_fs_finalize(struct r500_fragment_shader* fs,
245 struct r300_fs_asm* assembler)
246 {
247 /* XXX subtly wrong */
248 fs->shader.stack_size = assembler->temp_offset;
249
250 /* XXX should this just go with OPCODE_END? */
251 fs->instructions[fs->instruction_count - 1].inst0 |=
252 R500_INST_LAST;
253 }
254
255 void r300_translate_fragment_shader(struct r300_context* r300,
256 struct r300_fragment_shader* fs)
257 {
258 struct tgsi_parse_context parser;
259
260 tgsi_parse_init(&parser, fs->shader.state.tokens);
261
262 while (!tgsi_parse_end_of_tokens(&parser)) {
263 tgsi_parse_token(&parser);
264 }
265
266 r300_copy_passthrough_shader(fs);
267 }
268
269 void r500_translate_fragment_shader(struct r300_context* r300,
270 struct r500_fragment_shader* fs)
271 {
272 struct r300_fs_asm* assembler = CALLOC_STRUCT(r300_fs_asm);
273 if (assembler == NULL) {
274 return;
275 }
276 struct tgsi_parse_context parser;
277
278 tgsi_parse_init(&parser, fs->shader.state.tokens);
279
280 while (!tgsi_parse_end_of_tokens(&parser)) {
281 tgsi_parse_token(&parser);
282
283 /* This is seriously the lamest way to create fragment programs ever.
284 * I blame TGSI. */
285 switch (parser.FullToken.Token.Type) {
286 case TGSI_TOKEN_TYPE_DECLARATION:
287 /* Allocated registers sitting at the beginning
288 * of the program. */
289 r300_fs_declare(assembler, &parser.FullToken.FullDeclaration);
290 break;
291 case TGSI_TOKEN_TYPE_INSTRUCTION:
292 r500_fs_instruction(fs, assembler,
293 &parser.FullToken.FullInstruction);
294 }
295
296 }
297
298 debug_printf("%d texs and %d colors, first free reg is %d\n",
299 assembler->tex_count, assembler->color_count,
300 assembler->tex_count + assembler->color_count);
301
302 r500_fs_finalize(fs, assembler);
303
304 tgsi_dump(fs->shader.state.tokens);
305 r500_fs_dump(fs);
306
307 //r500_copy_passthrough_shader(fs);
308
309 tgsi_parse_free(&parser);
310 FREE(assembler);
311 }