r300: set proper texture row alignment for IGP chips
[mesa.git] / src / mesa / drivers / dri / r300 / r300_fragprog.c
1 /*
2 * Copyright (C) 2005 Ben Skeggs.
3 *
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 */
27
28 #include "r300_fragprog.h"
29
30 #include "shader/prog_parameter.h"
31
32 #include "r300_context.h"
33 #include "r300_fragprog_swizzle.h"
34
35 static void reset_srcreg(struct prog_src_register* reg)
36 {
37 _mesa_bzero(reg, sizeof(*reg));
38 reg->Swizzle = SWIZZLE_NOOP;
39 }
40
41 static struct prog_src_register shadow_ambient(struct gl_program *program, int tmu)
42 {
43 gl_state_index fail_value_tokens[STATE_LENGTH] = {
44 STATE_INTERNAL, STATE_SHADOW_AMBIENT, 0, 0, 0
45 };
46 struct prog_src_register reg = { 0, };
47
48 fail_value_tokens[2] = tmu;
49 reg.File = PROGRAM_STATE_VAR;
50 reg.Index = _mesa_add_state_reference(program->Parameters, fail_value_tokens);
51 reg.Swizzle = SWIZZLE_WWWW;
52 return reg;
53 }
54
55 /**
56 * Transform TEX, TXP, TXB, and KIL instructions in the following way:
57 * - premultiply texture coordinates for RECT
58 * - extract operand swizzles
59 * - introduce a temporary register when write masks are needed
60 *
61 * \todo If/when r5xx uses the radeon_program architecture, this can probably
62 * be reused.
63 */
64 GLboolean r300_transform_TEX(
65 struct radeon_transform_context *t,
66 struct prog_instruction* orig_inst, void* data)
67 {
68 struct r300_fragment_program_compiler *compiler =
69 (struct r300_fragment_program_compiler*)data;
70 struct prog_instruction inst = *orig_inst;
71 struct prog_instruction* tgt;
72 GLboolean destredirect = GL_FALSE;
73
74 if (inst.Opcode != OPCODE_TEX &&
75 inst.Opcode != OPCODE_TXB &&
76 inst.Opcode != OPCODE_TXP &&
77 inst.Opcode != OPCODE_KIL)
78 return GL_FALSE;
79
80 if (inst.Opcode != OPCODE_KIL &&
81 t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) {
82 GLuint comparefunc = GL_NEVER + compiler->fp->state.unit[inst.TexSrcUnit].texture_compare_func;
83
84 if (comparefunc == GL_NEVER || comparefunc == GL_ALWAYS) {
85 tgt = radeonAppendInstructions(t->Program, 1);
86
87 tgt->Opcode = OPCODE_MOV;
88 tgt->DstReg = inst.DstReg;
89 if (comparefunc == GL_ALWAYS) {
90 tgt->SrcReg[0].File = PROGRAM_BUILTIN;
91 tgt->SrcReg[0].Swizzle = SWIZZLE_1111;
92 } else {
93 tgt->SrcReg[0] = shadow_ambient(t->Program, inst.TexSrcUnit);
94 }
95 return GL_TRUE;
96 }
97
98 inst.DstReg.File = PROGRAM_TEMPORARY;
99 inst.DstReg.Index = radeonFindFreeTemporary(t);
100 inst.DstReg.WriteMask = WRITEMASK_XYZW;
101 }
102
103
104 /* Hardware uses [0..1]x[0..1] range for rectangle textures
105 * instead of [0..Width]x[0..Height].
106 * Add a scaling instruction.
107 */
108 if (inst.Opcode != OPCODE_KIL && inst.TexSrcTarget == TEXTURE_RECT_INDEX) {
109 gl_state_index tokens[STATE_LENGTH] = {
110 STATE_INTERNAL, STATE_R300_TEXRECT_FACTOR, 0, 0,
111 0
112 };
113
114 int tempreg = radeonFindFreeTemporary(t);
115 int factor_index;
116
117 tokens[2] = inst.TexSrcUnit;
118 factor_index = _mesa_add_state_reference(t->Program->Parameters, tokens);
119
120 tgt = radeonAppendInstructions(t->Program, 1);
121
122 tgt->Opcode = OPCODE_MUL;
123 tgt->DstReg.File = PROGRAM_TEMPORARY;
124 tgt->DstReg.Index = tempreg;
125 tgt->SrcReg[0] = inst.SrcReg[0];
126 tgt->SrcReg[1].File = PROGRAM_STATE_VAR;
127 tgt->SrcReg[1].Index = factor_index;
128
129 reset_srcreg(&inst.SrcReg[0]);
130 inst.SrcReg[0].File = PROGRAM_TEMPORARY;
131 inst.SrcReg[0].Index = tempreg;
132 }
133
134 if (inst.Opcode != OPCODE_KIL) {
135 if (inst.DstReg.File != PROGRAM_TEMPORARY ||
136 inst.DstReg.WriteMask != WRITEMASK_XYZW) {
137 int tempreg = radeonFindFreeTemporary(t);
138
139 inst.DstReg.File = PROGRAM_TEMPORARY;
140 inst.DstReg.Index = tempreg;
141 inst.DstReg.WriteMask = WRITEMASK_XYZW;
142 destredirect = GL_TRUE;
143 }
144 }
145
146 if (inst.SrcReg[0].File != PROGRAM_TEMPORARY && inst.SrcReg[0].File != PROGRAM_INPUT) {
147 int tmpreg = radeonFindFreeTemporary(t);
148 tgt = radeonAppendInstructions(t->Program, 1);
149 tgt->Opcode = OPCODE_MOV;
150 tgt->DstReg.File = PROGRAM_TEMPORARY;
151 tgt->DstReg.Index = tmpreg;
152 tgt->SrcReg[0] = inst.SrcReg[0];
153
154 reset_srcreg(&inst.SrcReg[0]);
155 inst.SrcReg[0].File = PROGRAM_TEMPORARY;
156 inst.SrcReg[0].Index = tmpreg;
157 }
158
159 tgt = radeonAppendInstructions(t->Program, 1);
160 _mesa_copy_instructions(tgt, &inst, 1);
161
162 if (inst.Opcode != OPCODE_KIL &&
163 t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) {
164 GLuint comparefunc = GL_NEVER + compiler->fp->state.unit[inst.TexSrcUnit].texture_compare_func;
165 GLuint depthmode = compiler->fp->state.unit[inst.TexSrcUnit].depth_texture_mode;
166 int rcptemp = radeonFindFreeTemporary(t);
167 int pass, fail;
168
169 tgt = radeonAppendInstructions(t->Program, 3);
170
171 tgt[0].Opcode = OPCODE_RCP;
172 tgt[0].DstReg.File = PROGRAM_TEMPORARY;
173 tgt[0].DstReg.Index = rcptemp;
174 tgt[0].DstReg.WriteMask = WRITEMASK_W;
175 tgt[0].SrcReg[0] = inst.SrcReg[0];
176 tgt[0].SrcReg[0].Swizzle = SWIZZLE_WWWW;
177
178 tgt[1].Opcode = OPCODE_MAD;
179 tgt[1].DstReg = inst.DstReg;
180 tgt[1].DstReg.WriteMask = orig_inst->DstReg.WriteMask;
181 tgt[1].SrcReg[0] = inst.SrcReg[0];
182 tgt[1].SrcReg[0].Swizzle = SWIZZLE_ZZZZ;
183 tgt[1].SrcReg[1].File = PROGRAM_TEMPORARY;
184 tgt[1].SrcReg[1].Index = rcptemp;
185 tgt[1].SrcReg[1].Swizzle = SWIZZLE_WWWW;
186 tgt[1].SrcReg[2].File = PROGRAM_TEMPORARY;
187 tgt[1].SrcReg[2].Index = inst.DstReg.Index;
188 if (depthmode == 0) /* GL_LUMINANCE */
189 tgt[1].SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z);
190 else if (depthmode == 2) /* GL_ALPHA */
191 tgt[1].SrcReg[2].Swizzle = SWIZZLE_WWWW;
192
193 /* Recall that SrcReg[0] is tex, SrcReg[2] is r and:
194 * r < tex <=> -tex+r < 0
195 * r >= tex <=> not (-tex+r < 0 */
196 if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL)
197 tgt[1].SrcReg[2].Negate = tgt[0].SrcReg[2].Negate ^ NEGATE_XYZW;
198 else
199 tgt[1].SrcReg[0].Negate = tgt[0].SrcReg[0].Negate ^ NEGATE_XYZW;
200
201 tgt[2].Opcode = OPCODE_CMP;
202 tgt[2].DstReg = orig_inst->DstReg;
203 tgt[2].SrcReg[0].File = PROGRAM_TEMPORARY;
204 tgt[2].SrcReg[0].Index = tgt[1].DstReg.Index;
205
206 if (comparefunc == GL_LESS || comparefunc == GL_GREATER) {
207 pass = 1;
208 fail = 2;
209 } else {
210 pass = 2;
211 fail = 1;
212 }
213
214 tgt[2].SrcReg[pass].File = PROGRAM_BUILTIN;
215 tgt[2].SrcReg[pass].Swizzle = SWIZZLE_1111;
216 tgt[2].SrcReg[fail] = shadow_ambient(t->Program, inst.TexSrcUnit);
217 } else if (destredirect) {
218 tgt = radeonAppendInstructions(t->Program, 1);
219
220 tgt->Opcode = OPCODE_MOV;
221 tgt->DstReg = orig_inst->DstReg;
222 tgt->SrcReg[0].File = PROGRAM_TEMPORARY;
223 tgt->SrcReg[0].Index = inst.DstReg.Index;
224 }
225
226 return GL_TRUE;
227 }
228
229 /* just some random things... */
230 void r300FragmentProgramDump(union rX00_fragment_program_code *c)
231 {
232 struct r300_fragment_program_code *code = &c->r300;
233 int n, i, j;
234 static int pc = 0;
235
236 fprintf(stderr, "pc=%d*************************************\n", pc++);
237
238 fprintf(stderr, "Hardware program\n");
239 fprintf(stderr, "----------------\n");
240
241 for (n = 0; n < (code->cur_node + 1); n++) {
242 fprintf(stderr, "NODE %d: alu_offset: %d, tex_offset: %d, "
243 "alu_end: %d, tex_end: %d, flags: %08x\n", n,
244 code->node[n].alu_offset,
245 code->node[n].tex_offset,
246 code->node[n].alu_end, code->node[n].tex_end,
247 code->node[n].flags);
248
249 if (n > 0 || code->first_node_has_tex) {
250 fprintf(stderr, " TEX:\n");
251 for (i = code->node[n].tex_offset;
252 i <= code->node[n].tex_offset + code->node[n].tex_end;
253 ++i) {
254 const char *instr;
255
256 switch ((code->tex.
257 inst[i] >> R300_TEX_INST_SHIFT) &
258 15) {
259 case R300_TEX_OP_LD:
260 instr = "TEX";
261 break;
262 case R300_TEX_OP_KIL:
263 instr = "KIL";
264 break;
265 case R300_TEX_OP_TXP:
266 instr = "TXP";
267 break;
268 case R300_TEX_OP_TXB:
269 instr = "TXB";
270 break;
271 default:
272 instr = "UNKNOWN";
273 }
274
275 fprintf(stderr,
276 " %s t%i, %c%i, texture[%i] (%08x)\n",
277 instr,
278 (code->tex.
279 inst[i] >> R300_DST_ADDR_SHIFT) & 31,
280 't',
281 (code->tex.
282 inst[i] >> R300_SRC_ADDR_SHIFT) & 31,
283 (code->tex.
284 inst[i] & R300_TEX_ID_MASK) >>
285 R300_TEX_ID_SHIFT,
286 code->tex.inst[i]);
287 }
288 }
289
290 for (i = code->node[n].alu_offset;
291 i <= code->node[n].alu_offset + code->node[n].alu_end; ++i) {
292 char srcc[3][10], dstc[20];
293 char srca[3][10], dsta[20];
294 char argc[3][20];
295 char arga[3][20];
296 char flags[5], tmp[10];
297
298 for (j = 0; j < 3; ++j) {
299 int regc = code->alu.inst[i].inst1 >> (j * 6);
300 int rega = code->alu.inst[i].inst3 >> (j * 6);
301
302 sprintf(srcc[j], "%c%i",
303 (regc & 32) ? 'c' : 't', regc & 31);
304 sprintf(srca[j], "%c%i",
305 (rega & 32) ? 'c' : 't', rega & 31);
306 }
307
308 dstc[0] = 0;
309 sprintf(flags, "%s%s%s",
310 (code->alu.inst[i].
311 inst1 & R300_ALU_DSTC_REG_X) ? "x" : "",
312 (code->alu.inst[i].
313 inst1 & R300_ALU_DSTC_REG_Y) ? "y" : "",
314 (code->alu.inst[i].
315 inst1 & R300_ALU_DSTC_REG_Z) ? "z" : "");
316 if (flags[0] != 0) {
317 sprintf(dstc, "t%i.%s ",
318 (code->alu.inst[i].
319 inst1 >> R300_ALU_DSTC_SHIFT) & 31,
320 flags);
321 }
322 sprintf(flags, "%s%s%s",
323 (code->alu.inst[i].
324 inst1 & R300_ALU_DSTC_OUTPUT_X) ? "x" : "",
325 (code->alu.inst[i].
326 inst1 & R300_ALU_DSTC_OUTPUT_Y) ? "y" : "",
327 (code->alu.inst[i].
328 inst1 & R300_ALU_DSTC_OUTPUT_Z) ? "z" : "");
329 if (flags[0] != 0) {
330 sprintf(tmp, "o%i.%s",
331 (code->alu.inst[i].
332 inst1 >> R300_ALU_DSTC_SHIFT) & 31,
333 flags);
334 strcat(dstc, tmp);
335 }
336
337 dsta[0] = 0;
338 if (code->alu.inst[i].inst3 & R300_ALU_DSTA_REG) {
339 sprintf(dsta, "t%i.w ",
340 (code->alu.inst[i].
341 inst3 >> R300_ALU_DSTA_SHIFT) & 31);
342 }
343 if (code->alu.inst[i].inst3 & R300_ALU_DSTA_OUTPUT) {
344 sprintf(tmp, "o%i.w ",
345 (code->alu.inst[i].
346 inst3 >> R300_ALU_DSTA_SHIFT) & 31);
347 strcat(dsta, tmp);
348 }
349 if (code->alu.inst[i].inst3 & R300_ALU_DSTA_DEPTH) {
350 strcat(dsta, "Z");
351 }
352
353 fprintf(stderr,
354 "%3i: xyz: %3s %3s %3s -> %-20s (%08x)\n"
355 " w: %3s %3s %3s -> %-20s (%08x)\n", i,
356 srcc[0], srcc[1], srcc[2], dstc,
357 code->alu.inst[i].inst1, srca[0], srca[1],
358 srca[2], dsta, code->alu.inst[i].inst3);
359
360 for (j = 0; j < 3; ++j) {
361 int regc = code->alu.inst[i].inst0 >> (j * 7);
362 int rega = code->alu.inst[i].inst2 >> (j * 7);
363 int d;
364 char buf[20];
365
366 d = regc & 31;
367 if (d < 12) {
368 switch (d % 4) {
369 case R300_ALU_ARGC_SRC0C_XYZ:
370 sprintf(buf, "%s.xyz",
371 srcc[d / 4]);
372 break;
373 case R300_ALU_ARGC_SRC0C_XXX:
374 sprintf(buf, "%s.xxx",
375 srcc[d / 4]);
376 break;
377 case R300_ALU_ARGC_SRC0C_YYY:
378 sprintf(buf, "%s.yyy",
379 srcc[d / 4]);
380 break;
381 case R300_ALU_ARGC_SRC0C_ZZZ:
382 sprintf(buf, "%s.zzz",
383 srcc[d / 4]);
384 break;
385 }
386 } else if (d < 15) {
387 sprintf(buf, "%s.www", srca[d - 12]);
388 } else if (d == 20) {
389 sprintf(buf, "0.0");
390 } else if (d == 21) {
391 sprintf(buf, "1.0");
392 } else if (d == 22) {
393 sprintf(buf, "0.5");
394 } else if (d >= 23 && d < 32) {
395 d -= 23;
396 switch (d / 3) {
397 case 0:
398 sprintf(buf, "%s.yzx",
399 srcc[d % 3]);
400 break;
401 case 1:
402 sprintf(buf, "%s.zxy",
403 srcc[d % 3]);
404 break;
405 case 2:
406 sprintf(buf, "%s.Wzy",
407 srcc[d % 3]);
408 break;
409 }
410 } else {
411 sprintf(buf, "%i", d);
412 }
413
414 sprintf(argc[j], "%s%s%s%s",
415 (regc & 32) ? "-" : "",
416 (regc & 64) ? "|" : "",
417 buf, (regc & 64) ? "|" : "");
418
419 d = rega & 31;
420 if (d < 9) {
421 sprintf(buf, "%s.%c", srcc[d / 3],
422 'x' + (char)(d % 3));
423 } else if (d < 12) {
424 sprintf(buf, "%s.w", srca[d - 9]);
425 } else if (d == 16) {
426 sprintf(buf, "0.0");
427 } else if (d == 17) {
428 sprintf(buf, "1.0");
429 } else if (d == 18) {
430 sprintf(buf, "0.5");
431 } else {
432 sprintf(buf, "%i", d);
433 }
434
435 sprintf(arga[j], "%s%s%s%s",
436 (rega & 32) ? "-" : "",
437 (rega & 64) ? "|" : "",
438 buf, (rega & 64) ? "|" : "");
439 }
440
441 fprintf(stderr, " xyz: %8s %8s %8s op: %08x\n"
442 " w: %8s %8s %8s op: %08x\n",
443 argc[0], argc[1], argc[2],
444 code->alu.inst[i].inst0, arga[0], arga[1],
445 arga[2], code->alu.inst[i].inst2);
446 }
447 }
448 }