Merge branch 'r345-cleanup' of git://people.freedesktop.org/~agd5f/mesa into r500test
[mesa.git] / src / mesa / drivers / dri / r300 / r500_fragprog.c
1 /*
2 * Copyright (C) 2005 Ben Skeggs.
3 *
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 */
27
28 /**
29 * \file
30 *
31 * \author Ben Skeggs <darktama@iinet.net.au>
32 *
33 * \author Jerome Glisse <j.glisse@gmail.com>
34 *
35 * \author Corbin Simpson <MostAwesomeDude@gmail.com>
36 *
37 * \todo Depth write, WPOS/FOGC inputs
38 *
39 * \todo FogOption
40 *
41 * \todo Verify results of opcodes for accuracy, I've only checked them in
42 * specific cases.
43 */
44
45 #include "glheader.h"
46 #include "macros.h"
47 #include "enums.h"
48 #include "shader/prog_instruction.h"
49 #include "shader/prog_parameter.h"
50 #include "shader/prog_print.h"
51
52 #include "r300_context.h"
53 #include "r500_fragprog.h"
54 #include "r300_reg.h"
55 #include "r300_state.h"
56
57 /*
58 * Useful macros and values
59 */
60 #define ERROR(fmt, args...) do { \
61 fprintf(stderr, "%s::%s(): " fmt "\n", \
62 __FILE__, __FUNCTION__, ##args); \
63 fp->error = GL_TRUE; \
64 } while(0)
65
66 #define COMPILE_STATE struct r300_pfs_compile_state *cs = fp->cs
67
68 /* "Register" flags */
69 #define REG_CONSTANT (1 << 8)
70
71 /* Swizzle tools */
72 #define R500_SWIZZLE_ZERO 4
73 #define R500_SWIZZLE_HALF 5
74 #define R500_SWIZZLE_ONE 6
75 #define R500_SWIZ_RGB_ZERO ((4 << 0) | (4 << 3) | (4 << 6))
76 #define R500_SWIZ_RGB_ONE ((6 << 0) | (6 << 3) | (6 << 6))
77 /* Swizzles for inst2 */
78 #define MAKE_SWIZ_TEX_STRQ(x) (x << 8)
79 #define MAKE_SWIZ_TEX_RGBA(x) (x << 24)
80 /* Swizzles for inst3 */
81 #define MAKE_SWIZ_RGB_A(x) (x << 2)
82 #define MAKE_SWIZ_RGB_B(x) (x << 15)
83 /* Swizzles for inst4 */
84 #define MAKE_SWIZ_ALPHA_A(x) (x << 14)
85 #define MAKE_SWIZ_ALPHA_B(x) (x << 21)
86 /* Swizzle for inst5 */
87 #define MAKE_SWIZ_RGBA_C(x) (x << 14)
88 #define MAKE_SWIZ_ALPHA_C(x) (x << 27)
89
90 static inline GLuint make_rgb_swizzle(struct prog_src_register src) {
91 GLuint swiz = 0x0;
92 GLuint temp;
93 /* This could be optimized, but it should be plenty fast already. */
94 for (int i = 0; i < 3; i++) {
95 temp = (src.Swizzle >> i*3) & 0x7;
96 /* Fix SWIZZLE_ONE */
97 if (temp == 5) temp++;
98 swiz += temp << i*3;
99 }
100 return swiz;
101 }
102
103 static inline GLuint make_alpha_swizzle(struct prog_src_register src) {
104 GLuint swiz = (src.Swizzle >> 12) & 0x7;
105 if (swiz == 5) swiz++;
106 return swiz;
107 }
108
109 static inline GLuint make_strq_swizzle(struct prog_src_register src) {
110 GLuint swiz = 0x0;
111 GLuint temp = src.Swizzle;
112 for (int i = 0; i < 4; i++) {
113 swiz += (temp & 0x3) << i*2;
114 temp >>= 3;
115 }
116 return swiz;
117 }
118
119 /* Borrowed verbatim from r300_fragprog since it hasn't changed. */
120 static GLuint emit_const4fv(struct r500_fragment_program *fp,
121 const GLfloat * cp)
122 {
123 GLuint reg = 0x0;
124 int index;
125
126 for (index = 0; index < fp->const_nr; ++index) {
127 if (fp->constant[index] == cp)
128 break;
129 }
130
131 if (index >= fp->const_nr) {
132 /* TODO: This should be r5xx nums, not r300 */
133 if (index >= PFS_NUM_CONST_REGS) {
134 ERROR("Out of hw constants!\n");
135 return reg;
136 }
137
138 fp->const_nr++;
139 fp->constant[index] = cp;
140 }
141
142 reg = index | REG_CONSTANT;
143 return reg;
144 }
145
146 static GLuint make_src(struct r500_fragment_program *fp, struct prog_src_register src) {
147 GLuint reg;
148 switch (src.File) {
149 case PROGRAM_TEMPORARY:
150 reg = (src.Index << 0x1) | 0x1;
151 break;
152 case PROGRAM_INPUT:
153 /* Ugly hack needed to work around Mesa;
154 * fragments don't get loaded right otherwise! */
155 reg = 0x0;
156 break;
157 case PROGRAM_CONSTANT:
158 reg = emit_const4fv(fp, fp->mesa_program.Base.Parameters->
159 ParameterValues[src.Index]);
160 break;
161 default:
162 ERROR("Can't handle src.File %x\n", src.File);
163 reg = 0x0;
164 break;
165 }
166 return reg;
167 }
168
169 static GLuint make_dest(struct r500_fragment_program *fp, struct prog_dst_register dest) {
170 GLuint reg;
171 switch (dest.File) {
172 case PROGRAM_TEMPORARY:
173 reg = (dest.Index << 0x1) | 0x1;
174 break;
175 case PROGRAM_OUTPUT:
176 /* Eventually we may need to handle multiple
177 * rendering targets... */
178 reg = dest.Index;
179 break;
180 default:
181 ERROR("Can't handle dest.File %x\n", dest.File);
182 reg = 0x0;
183 break;
184 }
185 return reg;
186 }
187
188 static void dumb_shader(struct r500_fragment_program *fp)
189 {
190 /* R500_INST_TYPE_TEX? */
191 fp->inst[0].inst0 = 0x7808;
192 fp->inst[0].inst1 = R500_TEX_ID(0) | R500_TEX_INST_LD | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED;
193 fp->inst[0].inst2 = R500_TEX_SRC_ADDR(0) | R500_TEX_SRC_S_SWIZ_R |
194 R500_TEX_SRC_T_SWIZ_G |
195 R500_TEX_DST_ADDR(0) |
196 R500_TEX_DST_R_SWIZ_R |
197 R500_TEX_DST_G_SWIZ_G |
198 R500_TEX_DST_B_SWIZ_B |
199 R500_TEX_DST_A_SWIZ_A;
200 fp->inst[0].inst3 = R500_DX_ADDR(0) |
201 R500_DX_S_SWIZ_R |
202 R500_DX_T_SWIZ_R |
203 R500_DX_R_SWIZ_R |
204 R500_DX_Q_SWIZ_R |
205 R500_DY_ADDR(0) |
206 R500_DY_S_SWIZ_R |
207 R500_DY_T_SWIZ_R |
208 R500_DY_R_SWIZ_R |
209 R500_DY_Q_SWIZ_R;
210 fp->inst[0].inst4 = 0x0;
211 fp->inst[0].inst5 = 0x0;
212
213 fp->inst[1].inst0 = R500_INST_TYPE_OUT |
214 R500_INST_TEX_SEM_WAIT |
215 R500_INST_LAST |
216 R500_INST_RGB_OMASK_R |
217 R500_INST_RGB_OMASK_G |
218 R500_INST_RGB_OMASK_B |
219 R500_INST_ALPHA_OMASK;
220 fp->inst[1].inst1 = R500_RGB_ADDR0(0) |
221 R500_RGB_ADDR1(0) |
222 R500_RGB_ADDR1_CONST |
223 R500_RGB_ADDR2(0) |
224 R500_RGB_ADDR2_CONST |
225 R500_RGB_SRCP_OP_1_MINUS_2RGB0;
226 fp->inst[1].inst2 = R500_ALPHA_ADDR0(0) |
227 R500_ALPHA_ADDR1(0) |
228 R500_ALPHA_ADDR1_CONST |
229 R500_ALPHA_ADDR2(0) |
230 R500_ALPHA_ADDR2_CONST |
231 R500_ALPHA_SRCP_OP_1_MINUS_2A0;
232 fp->inst[1].inst3 = R500_ALU_RGB_SEL_A_SRC0 |
233 R500_ALU_RGB_R_SWIZ_A_R |
234 R500_ALU_RGB_G_SWIZ_A_G |
235 R500_ALU_RGB_B_SWIZ_A_B |
236 R500_ALU_RGB_SEL_B_SRC0 |
237 R500_ALU_RGB_R_SWIZ_B_1 |
238 R500_ALU_RGB_B_SWIZ_B_1 |
239 R500_ALU_RGB_G_SWIZ_B_1;
240 fp->inst[1].inst4 = R500_ALPHA_OP_MAD |
241 R500_ALPHA_SWIZ_A_A |
242 R500_ALPHA_SWIZ_B_1;
243 fp->inst[1].inst5 = R500_ALU_RGBA_OP_MAD |
244 R500_ALU_RGBA_R_SWIZ_0 |
245 R500_ALU_RGBA_G_SWIZ_0 |
246 R500_ALU_RGBA_B_SWIZ_0 |
247 R500_ALU_RGBA_A_SWIZ_0;
248
249 fp->cs->nrslots = 2;
250 fp->translated = GL_TRUE;
251 }
252
253 static void emit_alu(struct r500_fragment_program *fp) {
254 }
255
256 static GLboolean parse_program(struct r500_fragment_program *fp)
257 {
258 struct gl_fragment_program *mp = &fp->mesa_program;
259 const struct prog_instruction *inst = mp->Base.Instructions;
260 struct prog_instruction *fpi;
261 GLuint src[3], dest, temp[2];
262 int flags, mask, counter = 0;
263
264 if (!inst || inst[0].Opcode == OPCODE_END) {
265 ERROR("The program is empty!\n");
266 return GL_FALSE;
267 }
268
269 for (fpi = mp->Base.Instructions; fpi->Opcode != OPCODE_END; fpi++) {
270
271 if (fpi->Opcode != OPCODE_KIL) {
272 dest = make_dest(fp, fpi->DstReg);
273 mask = fpi->DstReg.WriteMask << 11;
274 }
275
276 switch (fpi->Opcode) {
277 case OPCODE_ABS:
278 src[0] = make_src(fp, fpi->SrcReg[0]);
279 /* Variation on MOV */
280 fp->inst[counter].inst0 = R500_INST_TYPE_ALU
281 | mask;
282 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
283 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
284 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
285 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
286 | R500_ALU_RGB_MOD_A_ABS | R500_ALU_RGB_SEL_B_SRC0
287 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0]));
288 fp->inst[counter].inst4 = R500_ALPHA_OP_MAX
289 | R500_ALPHA_ADDRD(dest)
290 | R500_ALPHA_SEL_A_SRC0
291 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) | R500_ALPHA_MOD_A_ABS
292 | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[0]));
293 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAX
294 | R500_ALU_RGBA_ADDRD(dest);
295 break;
296 case OPCODE_ADD:
297 src[0] = make_src(fp, fpi->SrcReg[0]);
298 src[1] = make_src(fp, fpi->SrcReg[1]);
299 /* Variation on MAD: 1*src0+src1 */
300 fp->inst[counter].inst0 = R500_INST_TYPE_ALU
301 | mask;
302 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
303 | R500_RGB_ADDR1(src[1]);
304 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
305 | R500_ALPHA_ADDR1(src[1]);
306 fp->inst[counter].inst3 = /* 1 */
307 MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE)
308 | R500_ALU_RGB_SEL_B_SRC0 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0]));
309 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
310 | R500_ALPHA_ADDRD(dest)
311 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE)
312 | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[0]));
313 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
314 | R500_ALU_RGBA_ADDRD(dest)
315 | R500_ALU_RGBA_SEL_C_SRC1
316 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[1]))
317 | R500_ALU_RGBA_ALPHA_SEL_C_SRC1
318 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[1]));
319 break;
320 case OPCODE_DP3:
321 src[0] = make_src(fp, fpi->SrcReg[0]);
322 src[1] = make_src(fp, fpi->SrcReg[1]);
323 src[2] = make_src(fp, fpi->SrcReg[2]);
324 fp->inst[counter].inst0 = R500_INST_TYPE_ALU
325 | mask;
326 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
327 | R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(src[2]);
328 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
329 | R500_ALPHA_ADDR1(src[1]) | R500_ALPHA_ADDR2(src[2]);
330 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
331 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
332 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
333 fp->inst[counter].inst4 = R500_ALPHA_OP_DP
334 | R500_ALPHA_ADDRD(dest)
335 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
336 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
337 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_DP3
338 | R500_ALU_RGBA_ADDRD(dest)
339 | R500_ALU_RGBA_SEL_C_SRC2
340 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[2]))
341 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
342 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[2]));
343 break;
344 case OPCODE_DP4:
345 src[0] = make_src(fp, fpi->SrcReg[0]);
346 src[1] = make_src(fp, fpi->SrcReg[1]);
347 src[2] = make_src(fp, fpi->SrcReg[2]);
348 /* Based on DP3 */
349 fp->inst[counter].inst0 = R500_INST_TYPE_ALU
350 | mask;
351 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
352 | R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(src[2]);
353 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
354 | R500_ALPHA_ADDR1(src[1]) | R500_ALPHA_ADDR2(src[2]);
355 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
356 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
357 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
358 fp->inst[counter].inst4 = R500_ALPHA_OP_DP
359 | R500_ALPHA_ADDRD(dest)
360 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
361 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
362 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_DP4
363 | R500_ALU_RGBA_ADDRD(dest)
364 | R500_ALU_RGBA_SEL_C_SRC2
365 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[2]))
366 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
367 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[2]));
368 break;
369 case OPCODE_MAD:
370 src[0] = make_src(fp, fpi->SrcReg[0]);
371 src[1] = make_src(fp, fpi->SrcReg[1]);
372 src[2] = make_src(fp, fpi->SrcReg[2]);
373 fp->inst[counter].inst0 = R500_INST_TYPE_ALU
374 | mask;
375 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
376 | R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(src[2]);
377 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
378 | R500_ALPHA_ADDR1(src[1]) | R500_ALPHA_ADDR2(src[2]);
379 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
380 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
381 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
382 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
383 | R500_ALPHA_ADDRD(dest)
384 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
385 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
386 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
387 | R500_ALU_RGBA_ADDRD(dest)
388 | R500_ALU_RGBA_SEL_C_SRC2
389 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[2]))
390 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
391 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[2]));
392 break;
393 case OPCODE_MAX:
394 src[0] = make_src(fp, fpi->SrcReg[0]);
395 src[1] = make_src(fp, fpi->SrcReg[0]);
396 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | mask;
397 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]);
398 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) | R500_ALPHA_ADDR1(src[1]);
399 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
400 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
401 | R500_ALU_RGB_SEL_B_SRC1
402 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
403 fp->inst[counter].inst4 = R500_ALPHA_OP_MAX
404 | R500_ALPHA_ADDRD(dest)
405 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
406 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
407 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAX
408 | R500_ALU_RGBA_ADDRD(dest);
409 break;
410 case OPCODE_MIN:
411 src[0] = make_src(fp, fpi->SrcReg[0]);
412 src[1] = make_src(fp, fpi->SrcReg[0]);
413 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | mask;
414 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]);
415 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) | R500_ALPHA_ADDR1(src[1]);
416 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
417 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
418 | R500_ALU_RGB_SEL_B_SRC1
419 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
420 fp->inst[counter].inst4 = R500_ALPHA_OP_MIN
421 | R500_ALPHA_ADDRD(dest)
422 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
423 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
424 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MIN
425 | R500_ALU_RGBA_ADDRD(dest);
426 break;
427 case OPCODE_MOV:
428 src[0] = make_src(fp, fpi->SrcReg[0]);
429 /* We use MAX, but MIN, CND, and CMP also work.
430 * Just remember to disable the OMOD! */
431 fp->inst[counter].inst0 = R500_INST_TYPE_ALU
432 | mask;
433 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
434 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
435 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
436 | R500_ALU_RGB_R_SWIZ_A_R | R500_ALU_RGB_G_SWIZ_A_G | R500_ALU_RGB_B_SWIZ_A_B
437 | R500_ALU_RGB_SEL_B_SRC0
438 | R500_ALU_RGB_R_SWIZ_B_R | R500_ALU_RGB_G_SWIZ_B_G | R500_ALU_RGB_B_SWIZ_B_B
439 | R500_ALU_RGB_OMOD_DISABLE;
440 fp->inst[counter].inst4 = R500_ALPHA_OP_MAX
441 | R500_ALPHA_ADDRD(dest)
442 | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SEL_B_SRC0
443 | R500_ALPHA_OMOD_DISABLE;
444 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAX
445 | R500_ALU_RGBA_ADDRD(dest);
446 break;
447 case OPCODE_MUL:
448 src[0] = make_src(fp, fpi->SrcReg[0]);
449 src[1] = make_src(fp, fpi->SrcReg[1]);
450 /* Variation on MAD: src0*src1+0 */
451 fp->inst[counter].inst0 = R500_INST_TYPE_ALU
452 | mask;
453 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
454 | R500_RGB_ADDR1(src[1]);
455 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
456 | R500_ALPHA_ADDR1(src[1]);
457 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
458 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
459 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
460 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
461 | R500_ALPHA_ADDRD(dest)
462 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
463 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
464 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
465 | R500_ALU_RGBA_ADDRD(dest)
466 // | R500_ALU_RGBA_SEL_C_SRC2
467 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
468 // | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
469 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
470 break;
471 case OPCODE_SUB:
472 src[0] = make_src(fp, fpi->SrcReg[0]);
473 src[1] = make_src(fp, fpi->SrcReg[1]);
474 /* Variation on MAD: 1*src0-src1 */
475 fp->inst[counter].inst0 = R500_INST_TYPE_ALU
476 | mask;
477 fp->inst[counter].inst1 = R500_RGB_ADDR1(src[0])
478 | R500_RGB_ADDR2(src[1]);
479 fp->inst[counter].inst2 = R500_ALPHA_ADDR1(src[0])
480 | R500_ALPHA_ADDR2(src[1]);
481 fp->inst[counter].inst3 = /* 1 */
482 MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE)
483 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0]));
484 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
485 | R500_ALPHA_ADDRD(dest)
486 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE)
487 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[0]));
488 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
489 | R500_ALU_RGBA_ADDRD(dest)
490 | R500_ALU_RGBA_SEL_C_SRC2
491 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[1]))
492 | R500_ALU_RGBA_MOD_C_NEG
493 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
494 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[1]))
495 | R500_ALU_RGBA_ALPHA_MOD_C_NEG;
496 break;
497 case OPCODE_TEX:
498 src[0] = make_src(fp, fpi->SrcReg[0]);
499 fp->inst[counter].inst0 = R500_INST_TYPE_TEX | mask
500 | R500_INST_TEX_SEM_WAIT;
501 fp->inst[counter].inst1 = fpi->TexSrcUnit
502 | R500_TEX_INST_LD | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED;
503 fp->inst[counter].inst2 = R500_TEX_SRC_ADDR(src[0])
504 /* | MAKE_SWIZ_TEX_STRQ(make_strq_swizzle(fpi->SrcReg[0])) */
505 | R500_TEX_SRC_S_SWIZ_R | R500_TEX_SRC_T_SWIZ_G
506 | R500_TEX_SRC_R_SWIZ_B | R500_TEX_SRC_Q_SWIZ_A
507 | R500_TEX_DST_ADDR(dest)
508 | R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G
509 | R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A;
510 fp->inst[counter].inst3 = 0x0;
511 fp->inst[counter].inst4 = 0x0;
512 fp->inst[counter].inst5 = 0x0;
513 break;
514 case OPCODE_TXP:
515 src[0] = make_src(fp, fpi->SrcReg[0]);
516 fp->inst[counter].inst0 = R500_INST_TYPE_TEX | mask;
517 fp->inst[counter].inst1 = fpi->TexSrcUnit
518 | R500_TEX_INST_PROJ | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED;
519 fp->inst[counter].inst2 = R500_TEX_SRC_ADDR(src[0])
520 /* | MAKE_SWIZ_TEX_STRQ(make_strq_swizzle(fpi->SrcReg[0])) */
521 | R500_TEX_SRC_S_SWIZ_R | R500_TEX_SRC_T_SWIZ_G
522 | R500_TEX_SRC_R_SWIZ_B | R500_TEX_SRC_Q_SWIZ_A
523 | R500_TEX_DST_ADDR(dest)
524 | R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G
525 | R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A;
526 fp->inst[counter].inst3 = 0x0;
527 fp->inst[counter].inst4 = 0x0;
528 fp->inst[counter].inst5 = 0x0;
529 break;
530 default:
531 ERROR("unknown fpi->Opcode %d\n", fpi->Opcode);
532 break;
533 }
534
535 /* Finishing touches */
536 if (fpi->SaturateMode == SATURATE_ZERO_ONE) {
537 fp->inst[counter].inst0 |= R500_INST_RGB_CLAMP | R500_INST_ALPHA_CLAMP;
538 }
539 if (fpi->DstReg.File == PROGRAM_OUTPUT) {
540 fp->inst[counter].inst0 |= R500_INST_TYPE_OUT
541 | R500_INST_RGB_OMASK_R | R500_INST_RGB_OMASK_G
542 | R500_INST_RGB_OMASK_B | R500_INST_ALPHA_OMASK;
543 }
544
545 counter++;
546
547 if (fp->error)
548 return GL_FALSE;
549
550 }
551
552 fp->cs->nrslots = counter;
553
554 /* Finish him! (If it's an output instruction...)
555 * Yes, I know it's ugly... */
556 if ((fp->inst[counter].inst0 & 0x3) ^ 0x2) {
557 fp->inst[counter].inst0 |= R500_INST_TYPE_OUT
558 | R500_INST_TEX_SEM_WAIT | R500_INST_LAST;
559 }
560
561 return GL_TRUE;
562 }
563
564 static void init_program(r300ContextPtr r300, struct r500_fragment_program *fp)
565 {
566 struct r300_pfs_compile_state *cs = NULL;
567 struct gl_fragment_program *mp = &fp->mesa_program;
568 struct prog_instruction *fpi;
569 GLuint InputsRead = mp->Base.InputsRead;
570 GLuint temps_used = 0; /* for fp->temps[] */
571 int i, j;
572
573 /* New compile, reset tracking data */
574 fp->optimization =
575 driQueryOptioni(&r300->radeon.optionCache, "fp_optimization");
576 fp->translated = GL_FALSE;
577 fp->error = GL_FALSE;
578 fp->cs = cs = &(R300_CONTEXT(fp->ctx)->state.pfs_compile);
579 fp->cur_node = 0;
580 fp->first_node_has_tex = 0;
581 fp->const_nr = 0;
582 fp->max_temp_idx = 0;
583 fp->node[0].alu_end = -1;
584 fp->node[0].tex_end = -1;
585
586 _mesa_memset(cs, 0, sizeof(*fp->cs));
587 for (i = 0; i < PFS_MAX_ALU_INST; i++) {
588 for (j = 0; j < 3; j++) {
589 cs->slot[i].vsrc[j] = SRC_CONST;
590 cs->slot[i].ssrc[j] = SRC_CONST;
591 }
592 }
593
594 /* Work out what temps the Mesa inputs correspond to, this must match
595 * what setup_rs_unit does, which shouldn't be a problem as rs_unit
596 * configures itself based on the fragprog's InputsRead
597 *
598 * NOTE: this depends on get_hw_temp() allocating registers in order,
599 * starting from register 0.
600 */
601
602 #if 0
603 /* Texcoords come first */
604 for (i = 0; i < fp->ctx->Const.MaxTextureUnits; i++) {
605 if (InputsRead & (FRAG_BIT_TEX0 << i)) {
606 cs->inputs[FRAG_ATTRIB_TEX0 + i].refcount = 0;
607 cs->inputs[FRAG_ATTRIB_TEX0 + i].reg =
608 get_hw_temp(fp, 0);
609 }
610 }
611 InputsRead &= ~FRAG_BITS_TEX_ANY;
612
613 /* fragment position treated as a texcoord */
614 if (InputsRead & FRAG_BIT_WPOS) {
615 cs->inputs[FRAG_ATTRIB_WPOS].refcount = 0;
616 cs->inputs[FRAG_ATTRIB_WPOS].reg = get_hw_temp(fp, 0);
617 insert_wpos(&mp->Base);
618 }
619 InputsRead &= ~FRAG_BIT_WPOS;
620
621 /* Then primary colour */
622 if (InputsRead & FRAG_BIT_COL0) {
623 cs->inputs[FRAG_ATTRIB_COL0].refcount = 0;
624 cs->inputs[FRAG_ATTRIB_COL0].reg = get_hw_temp(fp, 0);
625 }
626 InputsRead &= ~FRAG_BIT_COL0;
627
628 /* Secondary color */
629 if (InputsRead & FRAG_BIT_COL1) {
630 cs->inputs[FRAG_ATTRIB_COL1].refcount = 0;
631 cs->inputs[FRAG_ATTRIB_COL1].reg = get_hw_temp(fp, 0);
632 }
633 InputsRead &= ~FRAG_BIT_COL1;
634
635 /* Anything else */
636 if (InputsRead) {
637 WARN_ONCE("Don't know how to handle inputs 0x%x\n", InputsRead);
638 /* force read from hwreg 0 for now */
639 for (i = 0; i < 32; i++)
640 if (InputsRead & (1 << i))
641 cs->inputs[i].reg = 0;
642 }
643 #endif
644
645 /* Pre-parse the mesa program, grabbing refcounts on input/temp regs.
646 * That way, we can free up the reg when it's no longer needed
647 */
648 if (!mp->Base.Instructions) {
649 ERROR("No instructions found in program\n");
650 return;
651 }
652
653 for (fpi = mp->Base.Instructions; fpi->Opcode != OPCODE_END; fpi++) {
654 int idx;
655
656 for (i = 0; i < 3; i++) {
657 idx = fpi->SrcReg[i].Index;
658 switch (fpi->SrcReg[i].File) {
659 case PROGRAM_TEMPORARY:
660 if (!(temps_used & (1 << idx))) {
661 cs->temps[idx].reg = -1;
662 cs->temps[idx].refcount = 1;
663 temps_used |= (1 << idx);
664 } else
665 cs->temps[idx].refcount++;
666 break;
667 case PROGRAM_INPUT:
668 cs->inputs[idx].refcount++;
669 break;
670 default:
671 break;
672 }
673 }
674
675 idx = fpi->DstReg.Index;
676 if (fpi->DstReg.File == PROGRAM_TEMPORARY) {
677 if (!(temps_used & (1 << idx))) {
678 cs->temps[idx].reg = -1;
679 cs->temps[idx].refcount = 1;
680 temps_used |= (1 << idx);
681 } else
682 cs->temps[idx].refcount++;
683 }
684 }
685 cs->temp_in_use = temps_used;
686 }
687
688 static void update_params(struct r500_fragment_program *fp)
689 {
690 struct gl_fragment_program *mp = &fp->mesa_program;
691
692 /* Ask Mesa nicely to fill in ParameterValues for us */
693 if (mp->Base.Parameters)
694 _mesa_load_state_parameters(fp->ctx, mp->Base.Parameters);
695 }
696
697 void r500TranslateFragmentShader(r300ContextPtr r300,
698 struct r500_fragment_program *fp)
699 {
700
701 struct r300_pfs_compile_state *cs = NULL;
702
703 if (!fp->translated) {
704
705 /* I need to see what I'm working with! */
706 fprintf(stderr, "Mesa program:\n");
707 fprintf(stderr, "-------------\n");
708 _mesa_print_program(&fp->mesa_program.Base);
709 fflush(stdout);
710
711 init_program(r300, fp);
712 cs = fp->cs;
713
714 if (parse_program(fp) == GL_FALSE) {
715 ERROR("Huh. Couldn't parse program. There should be additional errors explaining why.\nUsing dumb shader...\n");
716 dumb_shader(fp);
717 return;
718 }
719
720 /* Finish off */
721 fp->node[fp->cur_node].alu_end =
722 cs->nrslots - fp->node[fp->cur_node].alu_offset - 1;
723 if (fp->node[fp->cur_node].tex_end < 0)
724 fp->node[fp->cur_node].tex_end = 0;
725 fp->alu_offset = 0;
726 fp->alu_end = cs->nrslots - 1;
727 //assert(fp->node[fp->cur_node].alu_end >= 0);
728 //assert(fp->alu_end >= 0);
729
730 fp->translated = GL_TRUE;
731 r300UpdateStateParameters(fp->ctx, _NEW_PROGRAM);
732 }
733
734 update_params(fp);
735 }