Initial r5xx fragment program compiler support.
[mesa.git] / src / mesa / drivers / dri / r300 / r500_fragprog.c
1 /*
2 * Copyright (C) 2005 Ben Skeggs.
3 *
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 */
27
28 /**
29 * \file
30 *
31 * \author Ben Skeggs <darktama@iinet.net.au>
32 *
33 * \author Jerome Glisse <j.glisse@gmail.com>
34 *
35 * \author Corbin Simpson <MostAwesomeDude@gmail.com>
36 *
37 * \todo Depth write, WPOS/FOGC inputs
38 *
39 * \todo FogOption
40 *
41 * \todo Verify results of opcodes for accuracy, I've only checked them in
42 * specific cases.
43 */
44
45 #include "glheader.h"
46 #include "macros.h"
47 #include "enums.h"
48 #include "shader/prog_instruction.h"
49 #include "shader/prog_parameter.h"
50 #include "shader/prog_print.h"
51
52 #include "r300_context.h"
53 #include "r500_fragprog.h"
54 #include "r300_reg.h"
55 #include "r300_state.h"
56
57 /*
58 * Useful macros and values
59 */
60 #define ERROR(fmt, args...) do { \
61 fprintf(stderr, "%s::%s(): " fmt "\n", \
62 __FILE__, __FUNCTION__, ##args); \
63 fp->error = GL_TRUE; \
64 } while(0)
65
66 #define COMPILE_STATE struct r300_pfs_compile_state *cs = fp->cs
67
68 /* "Register" flags */
69 #define REG_CONSTANT (1 << 8)
70
71 /* Swizzle tools */
72 #define R500_SWIZZLE_ZERO 4
73 #define R500_SWIZZLE_HALF 5
74 #define R500_SWIZZLE_ONE 6
75 #define R500_SWIZ_RGB_ONE ((6 << 0) | (6 << 3) | (6 << 6))
76 /* Swizzles for inst3 */
77 #define MAKE_SWIZ_RGB_A(x) (x << 2)
78 #define MAKE_SWIZ_RGB_B(x) (x << 15)
79 /* Swizzles for inst4 */
80 #define MAKE_SWIZ_ALPHA_A(x) (x << 14)
81 #define MAKE_SWIZ_ALPHA_B(x) (x << 21)
82 /* Swizzle for inst5 */
83 #define MAKE_SWIZ_RGBA_C(x) (x << 14)
84 #define MAKE_SWIZ_ALPHA_C(x) (x << 27)
85
86 static inline GLuint make_rgb_swizzle(struct prog_src_register src) {
87 GLuint swiz = 0x0;
88 GLuint temp;
89 /* This could be optimized, but it should be plenty fast already. */
90 for (int i = 0; i < 3; i++) {
91 temp = (src.Swizzle >> i*3) & 0x7;
92 /* Fix SWIZZLE_ONE */
93 if (temp == 5) temp++;
94 swiz += temp << i*3;
95 }
96 return swiz;
97 }
98
99 static inline GLuint make_alpha_swizzle(struct prog_src_register src) {
100 GLuint swiz = (src.Swizzle >> 12) & 0x7;
101 if (swiz == 5) swiz++;
102 return swiz;
103 }
104
105 static GLuint make_src(struct prog_src_register src) {
106 GLuint reg = src.Index;
107 switch (src.File) {
108 case PROGRAM_INPUT:
109 /* Ugly hack needed to work around Mesa;
110 * fragments don't get loaded right otherwise! */
111 reg = 0x0;
112 break;
113 case PROGRAM_CONSTANT:
114 reg |= REG_CONSTANT;
115 break;
116 default:
117 // ERROR("Can't handle src.File %x\n", src.File);
118 break;
119 }
120 return reg;
121 }
122
123 static GLuint make_dest(struct prog_dst_register dest) {
124 GLuint reg = dest.Index;
125 switch (dest.File) {
126 case PROGRAM_OUTPUT:
127 /* Eventually we may need to handle multiple
128 * rendering targets... */
129 break;
130 case PROGRAM_CONSTANT:
131 reg |= REG_CONSTANT;
132 break;
133 default:
134 // ERROR("Can't handle dest.File %x\n", dest.File);
135 break;
136 }
137 return reg;
138 }
139
140 static void dumb_shader(struct r500_fragment_program *fp)
141 {
142 /* R500_INST_TYPE_TEX */
143 fp->inst[0].inst0 = 0x7808;
144 fp->inst[0].inst1 = R500_TEX_ID(0) | R500_TEX_INST_LD | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED;
145 fp->inst[0].inst2 = R500_TEX_SRC_ADDR(0) | R500_TEX_SRC_S_SWIZ_R |
146 R500_TEX_SRC_T_SWIZ_G |
147 R500_TEX_DST_ADDR(0) |
148 R500_TEX_DST_R_SWIZ_R |
149 R500_TEX_DST_G_SWIZ_G |
150 R500_TEX_DST_B_SWIZ_B |
151 R500_TEX_DST_A_SWIZ_A;
152 fp->inst[0].inst3 = R500_DX_ADDR(0) |
153 R500_DX_S_SWIZ_R |
154 R500_DX_T_SWIZ_R |
155 R500_DX_R_SWIZ_R |
156 R500_DX_Q_SWIZ_R |
157 R500_DY_ADDR(0) |
158 R500_DY_S_SWIZ_R |
159 R500_DY_T_SWIZ_R |
160 R500_DY_R_SWIZ_R |
161 R500_DY_Q_SWIZ_R;
162 fp->inst[0].inst4 = 0x0;
163 fp->inst[0].inst5 = 0x0;
164
165 fp->inst[1].inst0 = R500_INST_TYPE_OUT |
166 R500_INST_TEX_SEM_WAIT |
167 R500_INST_LAST |
168 R500_INST_RGB_OMASK_R |
169 R500_INST_RGB_OMASK_G |
170 R500_INST_RGB_OMASK_B |
171 R500_INST_ALPHA_OMASK;
172 fp->inst[1].inst1 = R500_RGB_ADDR0(0) |
173 R500_RGB_ADDR1(0) |
174 R500_RGB_ADDR1_CONST |
175 R500_RGB_ADDR2(0) |
176 R500_RGB_ADDR2_CONST |
177 R500_RGB_SRCP_OP_1_MINUS_2RGB0;
178 fp->inst[1].inst2 = R500_ALPHA_ADDR0(0) |
179 R500_ALPHA_ADDR1(0) |
180 R500_ALPHA_ADDR1_CONST |
181 R500_ALPHA_ADDR2(0) |
182 R500_ALPHA_ADDR2_CONST |
183 R500_ALPHA_SRCP_OP_1_MINUS_2A0;
184 fp->inst[1].inst3 = R500_ALU_RGB_SEL_A_SRC0 |
185 R500_ALU_RGB_R_SWIZ_A_R |
186 R500_ALU_RGB_G_SWIZ_A_G |
187 R500_ALU_RGB_B_SWIZ_A_B |
188 R500_ALU_RGB_SEL_B_SRC0 |
189 R500_ALU_RGB_R_SWIZ_B_1 |
190 R500_ALU_RGB_B_SWIZ_B_1 |
191 R500_ALU_RGB_G_SWIZ_B_1;
192 fp->inst[1].inst4 = R500_ALPHA_OP_MAD |
193 R500_ALPHA_SWIZ_A_A |
194 R500_ALPHA_SWIZ_B_1;
195 fp->inst[1].inst5 = R500_ALU_RGBA_OP_MAD |
196 R500_ALU_RGBA_R_SWIZ_0 |
197 R500_ALU_RGBA_G_SWIZ_0 |
198 R500_ALU_RGBA_B_SWIZ_0 |
199 R500_ALU_RGBA_A_SWIZ_0;
200
201 fp->cs->nrslots = 2;
202 fp->translated = GL_TRUE;
203 }
204
205 static GLboolean parse_program(struct r500_fragment_program *fp)
206 {
207 struct gl_fragment_program *mp = &fp->mesa_program;
208 const struct prog_instruction *inst = mp->Base.Instructions;
209 struct prog_instruction *fpi;
210 GLuint src[3], dest, temp[2];
211 int flags, mask, counter = 0;
212
213 if (!inst || inst[0].Opcode == OPCODE_END) {
214 ERROR("The program is empty!\n");
215 return GL_FALSE;
216 }
217
218 for (fpi = mp->Base.Instructions; fpi->Opcode != OPCODE_END; fpi++) {
219
220 if (fpi->Opcode != OPCODE_KIL) {
221 dest = make_dest(fpi->DstReg);
222 mask = fpi->DstReg.WriteMask;
223 }
224
225 switch (fpi->Opcode) {
226 case OPCODE_ADD:
227 src[0] = make_src(fpi->SrcReg[0]);
228 src[1] = make_src(fpi->SrcReg[1]);
229 /* Variation on MAD: 1*src0+src1 */
230 fp->inst[counter].inst0 = R500_INST_TYPE_ALU
231 | R500_INST_RGB_WMASK_R | R500_INST_RGB_WMASK_G
232 | R500_INST_RGB_WMASK_B | R500_INST_ALPHA_WMASK;
233 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
234 | R500_RGB_ADDR1(src[1]);
235 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
236 | R500_ALPHA_ADDR1(src[1]);
237 fp->inst[counter].inst3 = /* 1 */
238 MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE)
239 | R500_ALU_RGB_SEL_B_SRC0 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0]));
240 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
241 | R500_ALPHA_ADDRD(dest)
242 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ONE)
243 | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[0]));
244 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
245 | R500_ALU_RGBA_ADDRD(dest)
246 | R500_ALU_RGBA_SEL_C_SRC1
247 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[1]))
248 | R500_ALU_RGBA_ALPHA_SEL_C_SRC1
249 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[1]));
250 break;
251 case OPCODE_MAD:
252 src[0] = make_src(fpi->SrcReg[0]);
253 src[1] = make_src(fpi->SrcReg[1]);
254 src[2] = make_src(fpi->SrcReg[2]);
255 fp->inst[counter].inst0 = R500_INST_TYPE_ALU
256 | R500_INST_RGB_WMASK_R | R500_INST_RGB_WMASK_G
257 | R500_INST_RGB_WMASK_B | R500_INST_ALPHA_WMASK;
258 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
259 | R500_RGB_ADDR1(src[1]);
260 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
261 | R500_ALPHA_ADDR1(src[1]);
262 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
263 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
264 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
265 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
266 | R500_ALPHA_ADDRD(dest)
267 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
268 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
269 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
270 | R500_ALU_RGBA_ADDRD(dest)
271 | R500_ALU_RGBA_SEL_C_SRC2
272 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[2]))
273 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
274 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[2]));
275 break;
276 case OPCODE_MOV:
277 src[0] = make_src(fpi->SrcReg[0]);
278 /* We use MAX, but MIN, CND, and CMP also work.
279 * Just remember to disable the OMOD! */
280 fp->inst[counter].inst0 = R500_INST_TYPE_ALU
281 | R500_INST_RGB_WMASK_R | R500_INST_RGB_WMASK_G
282 | R500_INST_RGB_WMASK_B | R500_INST_ALPHA_WMASK;
283 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
284 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
285 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
286 | R500_ALU_RGB_R_SWIZ_A_R | R500_ALU_RGB_G_SWIZ_A_G | R500_ALU_RGB_B_SWIZ_A_B
287 | R500_ALU_RGB_SEL_B_SRC0
288 | R500_ALU_RGB_R_SWIZ_B_R | R500_ALU_RGB_G_SWIZ_B_G | R500_ALU_RGB_B_SWIZ_B_B
289 | R500_ALU_RGB_OMOD_DISABLE;
290 fp->inst[counter].inst4 = R500_ALPHA_OP_MAX
291 | R500_ALPHA_ADDRD(dest)
292 | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SEL_B_SRC0
293 | R500_ALPHA_OMOD_DISABLE;
294 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAX
295 | R500_ALU_RGBA_ADDRD(dest);
296 break;
297 default:
298 ERROR("unknown fpi->Opcode %d\n", fpi->Opcode);
299 break;
300 }
301
302 /* Finishing touches */
303 if (fpi->SaturateMode == SATURATE_ZERO_ONE) {
304 fp->inst[counter].inst0 |= R500_INST_RGB_CLAMP | R500_INST_ALPHA_CLAMP;
305 }
306 if (fpi->DstReg.File == PROGRAM_OUTPUT) {
307 fp->inst[counter].inst0 |= R500_INST_TYPE_OUT
308 | R500_INST_RGB_OMASK_R | R500_INST_RGB_OMASK_G
309 | R500_INST_RGB_OMASK_B | R500_INST_ALPHA_OMASK;
310 }
311
312 counter++;
313
314 if (fp->error)
315 return GL_FALSE;
316
317 }
318
319 fp->cs->nrslots = counter;
320
321 /* Finish him! (If it's an output instruction...)
322 * Yes, I know it's ugly... */
323 if ((fp->inst[counter].inst0 & 0x3) ^ 0x2) {
324 fp->inst[counter].inst0 |= R500_INST_TYPE_OUT
325 | R500_INST_TEX_SEM_WAIT | R500_INST_LAST;
326 }
327
328 return GL_TRUE;
329 }
330
331 static void init_program(r300ContextPtr r300, struct r500_fragment_program *fp)
332 {
333 struct r300_pfs_compile_state *cs = NULL;
334 struct gl_fragment_program *mp = &fp->mesa_program;
335 struct prog_instruction *fpi;
336 GLuint InputsRead = mp->Base.InputsRead;
337 GLuint temps_used = 0; /* for fp->temps[] */
338 int i, j;
339
340 /* New compile, reset tracking data */
341 fp->optimization =
342 driQueryOptioni(&r300->radeon.optionCache, "fp_optimization");
343 fp->translated = GL_FALSE;
344 fp->error = GL_FALSE;
345 fp->cs = cs = &(R300_CONTEXT(fp->ctx)->state.pfs_compile);
346 fp->cur_node = 0;
347 fp->first_node_has_tex = 0;
348 fp->const_nr = 0;
349 fp->max_temp_idx = 0;
350 fp->node[0].alu_end = -1;
351 fp->node[0].tex_end = -1;
352
353 _mesa_memset(cs, 0, sizeof(*fp->cs));
354 for (i = 0; i < PFS_MAX_ALU_INST; i++) {
355 for (j = 0; j < 3; j++) {
356 cs->slot[i].vsrc[j] = SRC_CONST;
357 cs->slot[i].ssrc[j] = SRC_CONST;
358 }
359 }
360
361 /* Work out what temps the Mesa inputs correspond to, this must match
362 * what setup_rs_unit does, which shouldn't be a problem as rs_unit
363 * configures itself based on the fragprog's InputsRead
364 *
365 * NOTE: this depends on get_hw_temp() allocating registers in order,
366 * starting from register 0.
367 */
368
369 #if 0
370 /* Texcoords come first */
371 for (i = 0; i < fp->ctx->Const.MaxTextureUnits; i++) {
372 if (InputsRead & (FRAG_BIT_TEX0 << i)) {
373 cs->inputs[FRAG_ATTRIB_TEX0 + i].refcount = 0;
374 cs->inputs[FRAG_ATTRIB_TEX0 + i].reg =
375 get_hw_temp(fp, 0);
376 }
377 }
378 InputsRead &= ~FRAG_BITS_TEX_ANY;
379
380 /* fragment position treated as a texcoord */
381 if (InputsRead & FRAG_BIT_WPOS) {
382 cs->inputs[FRAG_ATTRIB_WPOS].refcount = 0;
383 cs->inputs[FRAG_ATTRIB_WPOS].reg = get_hw_temp(fp, 0);
384 insert_wpos(&mp->Base);
385 }
386 InputsRead &= ~FRAG_BIT_WPOS;
387
388 /* Then primary colour */
389 if (InputsRead & FRAG_BIT_COL0) {
390 cs->inputs[FRAG_ATTRIB_COL0].refcount = 0;
391 cs->inputs[FRAG_ATTRIB_COL0].reg = get_hw_temp(fp, 0);
392 }
393 InputsRead &= ~FRAG_BIT_COL0;
394
395 /* Secondary color */
396 if (InputsRead & FRAG_BIT_COL1) {
397 cs->inputs[FRAG_ATTRIB_COL1].refcount = 0;
398 cs->inputs[FRAG_ATTRIB_COL1].reg = get_hw_temp(fp, 0);
399 }
400 InputsRead &= ~FRAG_BIT_COL1;
401
402 /* Anything else */
403 if (InputsRead) {
404 WARN_ONCE("Don't know how to handle inputs 0x%x\n", InputsRead);
405 /* force read from hwreg 0 for now */
406 for (i = 0; i < 32; i++)
407 if (InputsRead & (1 << i))
408 cs->inputs[i].reg = 0;
409 }
410 #endif
411
412 /* Pre-parse the mesa program, grabbing refcounts on input/temp regs.
413 * That way, we can free up the reg when it's no longer needed
414 */
415 if (!mp->Base.Instructions) {
416 ERROR("No instructions found in program\n");
417 return;
418 }
419
420 for (fpi = mp->Base.Instructions; fpi->Opcode != OPCODE_END; fpi++) {
421 int idx;
422
423 for (i = 0; i < 3; i++) {
424 idx = fpi->SrcReg[i].Index;
425 switch (fpi->SrcReg[i].File) {
426 case PROGRAM_TEMPORARY:
427 if (!(temps_used & (1 << idx))) {
428 cs->temps[idx].reg = -1;
429 cs->temps[idx].refcount = 1;
430 temps_used |= (1 << idx);
431 } else
432 cs->temps[idx].refcount++;
433 break;
434 case PROGRAM_INPUT:
435 cs->inputs[idx].refcount++;
436 break;
437 default:
438 break;
439 }
440 }
441
442 idx = fpi->DstReg.Index;
443 if (fpi->DstReg.File == PROGRAM_TEMPORARY) {
444 if (!(temps_used & (1 << idx))) {
445 cs->temps[idx].reg = -1;
446 cs->temps[idx].refcount = 1;
447 temps_used |= (1 << idx);
448 } else
449 cs->temps[idx].refcount++;
450 }
451 }
452 cs->temp_in_use = temps_used;
453 }
454
455 static void update_params(struct r500_fragment_program *fp)
456 {
457 struct gl_fragment_program *mp = &fp->mesa_program;
458
459 /* Ask Mesa nicely to fill in ParameterValues for us */
460 if (mp->Base.Parameters)
461 _mesa_load_state_parameters(fp->ctx, mp->Base.Parameters);
462 }
463
464 void r500TranslateFragmentShader(r300ContextPtr r300,
465 struct r500_fragment_program *fp)
466 {
467
468 struct r300_pfs_compile_state *cs = NULL;
469
470 if (!fp->translated) {
471
472 /* I need to see what I'm working with! */
473 fprintf(stderr, "Mesa program:\n");
474 fprintf(stderr, "-------------\n");
475 _mesa_print_program(&fp->mesa_program.Base);
476 fflush(stdout);
477
478 init_program(r300, fp);
479 cs = fp->cs;
480
481 if (parse_program(fp) == GL_FALSE) {
482 ERROR("Huh. Couldn't parse program. There should be additional errors explaining why.\nUsing dumb shader...\n");
483 dumb_shader(fp);
484 return;
485 }
486
487 /* Finish off */
488 fp->node[fp->cur_node].alu_end =
489 cs->nrslots - fp->node[fp->cur_node].alu_offset - 1;
490 if (fp->node[fp->cur_node].tex_end < 0)
491 fp->node[fp->cur_node].tex_end = 0;
492 fp->alu_offset = 0;
493 fp->alu_end = cs->nrslots - 1;
494 //assert(fp->node[fp->cur_node].alu_end >= 0);
495 //assert(fp->alu_end >= 0);
496
497 fp->translated = GL_TRUE;
498 r300UpdateStateParameters(fp->ctx, _NEW_PROGRAM);
499 }
500
501 update_params(fp);
502 }