7ce5fb8639fe811e3d236d1e3faa43094deb6381
[mesa.git] / src / mesa / drivers / dri / r300 / compiler / radeon_program_tex.c
1 /*
2 * Copyright (C) 2010 Corbin Simpson
3 * Copyright (C) 2010 Marek Olšák <maraeo@gmail.com>
4 *
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining
8 * a copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sublicense, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial
17 * portions of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
23 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 */
28
29 #include "radeon_program_tex.h"
30
31 /* Series of transformations to be done on textures. */
32
33 static struct rc_src_register shadow_ambient(struct r300_fragment_program_compiler *compiler,
34 int tmu)
35 {
36 struct rc_src_register reg = { 0, };
37
38 if (compiler->enable_shadow_ambient) {
39 reg.File = RC_FILE_CONSTANT;
40 reg.Index = rc_constants_add_state(&compiler->Base.Program.Constants,
41 RC_STATE_SHADOW_AMBIENT, tmu);
42 reg.Swizzle = RC_SWIZZLE_WWWW;
43 } else {
44 reg.File = RC_FILE_NONE;
45 reg.Swizzle = RC_SWIZZLE_0000;
46 }
47 return reg;
48 }
49
50 static void scale_texcoords(struct r300_fragment_program_compiler *compiler,
51 struct rc_instruction *inst,
52 unsigned state_constant)
53 {
54 struct rc_instruction *inst_mov;
55
56 unsigned temp = rc_find_free_temporary(&compiler->Base);
57
58 inst_mov = rc_insert_new_instruction(&compiler->Base, inst->Prev);
59
60 inst_mov->U.I.Opcode = RC_OPCODE_MUL;
61 inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
62 inst_mov->U.I.DstReg.Index = temp;
63 inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
64 inst_mov->U.I.SrcReg[1].File = RC_FILE_CONSTANT;
65 inst_mov->U.I.SrcReg[1].Index =
66 rc_constants_add_state(&compiler->Base.Program.Constants,
67 state_constant, inst->U.I.TexSrcUnit);
68
69 reset_srcreg(&inst->U.I.SrcReg[0]);
70 inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
71 inst->U.I.SrcReg[0].Index = temp;
72 }
73
74 /**
75 * Transform TEX, TXP, TXB, and KIL instructions in the following ways:
76 * - implement texture compare (shadow extensions)
77 * - extract non-native source / destination operands
78 * - premultiply texture coordinates for RECT
79 * - extract operand swizzles
80 * - introduce a temporary register when write masks are needed
81 */
82 int radeonTransformTEX(
83 struct radeon_compiler * c,
84 struct rc_instruction * inst,
85 void* data)
86 {
87 struct r300_fragment_program_compiler *compiler =
88 (struct r300_fragment_program_compiler*)data;
89 rc_wrap_mode wrapmode = compiler->state.unit[inst->U.I.TexSrcUnit].wrap_mode;
90 int is_rect = inst->U.I.TexSrcTarget == RC_TEXTURE_RECT ||
91 compiler->state.unit[inst->U.I.TexSrcUnit].non_normalized_coords;
92
93 if (inst->U.I.Opcode != RC_OPCODE_TEX &&
94 inst->U.I.Opcode != RC_OPCODE_TXB &&
95 inst->U.I.Opcode != RC_OPCODE_TXP &&
96 inst->U.I.Opcode != RC_OPCODE_KIL)
97 return 0;
98
99 /* ARB_shadow & EXT_shadow_funcs */
100 if (inst->U.I.Opcode != RC_OPCODE_KIL &&
101 ((c->Program.ShadowSamplers & (1 << inst->U.I.TexSrcUnit)) ||
102 (compiler->state.unit[inst->U.I.TexSrcUnit].compare_mode_enabled))) {
103 rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func;
104
105 if (comparefunc == RC_COMPARE_FUNC_NEVER || comparefunc == RC_COMPARE_FUNC_ALWAYS) {
106 inst->U.I.Opcode = RC_OPCODE_MOV;
107
108 if (comparefunc == RC_COMPARE_FUNC_ALWAYS) {
109 inst->U.I.SrcReg[0].File = RC_FILE_NONE;
110 inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111;
111 } else {
112 inst->U.I.SrcReg[0] = shadow_ambient(compiler, inst->U.I.TexSrcUnit);
113 }
114
115 return 1;
116 } else {
117 rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func;
118 struct rc_instruction * inst_rcp = NULL;
119 struct rc_instruction * inst_mad;
120 struct rc_instruction * inst_cmp;
121 unsigned tmp_texsample;
122 unsigned tmp_sum;
123 unsigned tmp_recip_w = 0;
124 int pass, fail, tex;
125
126 /* Save the output register. */
127 struct rc_dst_register output_reg = inst->U.I.DstReg;
128
129 /* Redirect TEX to a new temp. */
130 tmp_texsample = rc_find_free_temporary(c);
131 inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
132 inst->U.I.DstReg.Index = tmp_texsample;
133 inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
134
135 if (inst->U.I.Opcode == RC_OPCODE_TXP) {
136 tmp_recip_w = rc_find_free_temporary(c);
137
138 /* Compute 1/W. */
139 inst_rcp = rc_insert_new_instruction(c, inst);
140 inst_rcp->U.I.Opcode = RC_OPCODE_RCP;
141 inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY;
142 inst_rcp->U.I.DstReg.Index = tmp_recip_w;
143 inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W;
144 inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
145 /* XXX do not take W, instead, see which channel is mapped to W. */
146 inst_rcp->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW;
147 }
148
149 /* Perspective-divide r by W (if it's TXP) and add the texture sample (see below). */
150 tmp_sum = rc_find_free_temporary(c);
151 inst_mad = rc_insert_new_instruction(c, inst_rcp ? inst_rcp : inst);
152 inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY;
153 inst_mad->U.I.DstReg.Index = tmp_sum;
154 inst_mad->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
155 inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_ZZZZ;
156 if (inst->U.I.Opcode == RC_OPCODE_TXP) {
157 inst_mad->U.I.Opcode = RC_OPCODE_MAD;
158 inst_mad->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
159 inst_mad->U.I.SrcReg[1].Index = tmp_recip_w;
160 inst_mad->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW;
161 tex = 2;
162 } else {
163 inst_mad->U.I.Opcode = RC_OPCODE_ADD;
164 tex = 1;
165 }
166 inst_mad->U.I.SrcReg[tex].File = RC_FILE_TEMPORARY;
167 inst_mad->U.I.SrcReg[tex].Index = tmp_texsample;
168 inst_mad->U.I.SrcReg[tex].Swizzle = compiler->state.unit[inst->U.I.TexSrcUnit].depth_texture_swizzle;
169
170 /* Fake EQUAL/NOTEQUAL, it seems to pass some tests suprisingly. */
171 if (comparefunc == RC_COMPARE_FUNC_EQUAL) {
172 comparefunc = RC_COMPARE_FUNC_GEQUAL;
173 } else if (comparefunc == RC_COMPARE_FUNC_NOTEQUAL) {
174 comparefunc = RC_COMPARE_FUNC_LESS;
175 }
176
177 /* Recall that SrcReg[0] is r, SrcReg[tex] is tex and:
178 * LESS: r < tex <=> -tex+r < 0
179 * GEQUAL: r >= tex <=> not (-tex+r < 0)
180 * GREATER: r > tex <=> tex-r < 0
181 * LEQUAL: r <= tex <=> not ( tex-r < 0)
182 *
183 * This negates either r or tex: */
184 if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GEQUAL)
185 inst_mad->U.I.SrcReg[tex].Negate = inst_mad->U.I.SrcReg[tex].Negate ^ RC_MASK_XYZW;
186 else
187 inst_mad->U.I.SrcReg[0].Negate = inst_mad->U.I.SrcReg[0].Negate ^ RC_MASK_XYZW;
188
189 /* This negates the whole expresion: */
190 if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GREATER) {
191 pass = 1;
192 fail = 2;
193 } else {
194 pass = 2;
195 fail = 1;
196 }
197
198 inst_cmp = rc_insert_new_instruction(c, inst_mad);
199 inst_cmp->U.I.Opcode = RC_OPCODE_CMP;
200 inst_cmp->U.I.DstReg = output_reg;
201 inst_cmp->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
202 inst_cmp->U.I.SrcReg[0].Index = tmp_sum;
203 inst_cmp->U.I.SrcReg[pass].File = RC_FILE_NONE;
204 inst_cmp->U.I.SrcReg[pass].Swizzle = RC_SWIZZLE_1111;
205 inst_cmp->U.I.SrcReg[fail] = shadow_ambient(compiler, inst->U.I.TexSrcUnit);
206
207 assert(tmp_texsample != tmp_sum && tmp_sum != tmp_recip_w);
208 }
209 }
210
211 /* R300 cannot sample from rectangles and the wrap mode fallback needs
212 * normalized coordinates anyway. */
213 if (inst->U.I.Opcode != RC_OPCODE_KIL &&
214 is_rect && (!c->is_r500 || wrapmode != RC_WRAP_NONE)) {
215 scale_texcoords(compiler, inst, RC_STATE_R300_TEXRECT_FACTOR);
216 inst->U.I.TexSrcTarget = RC_TEXTURE_2D;
217 }
218
219 /* Texture wrap modes don't work on NPOT textures.
220 *
221 * Non-wrapped/clamped texcoords with NPOT are free in HW. Repeat and
222 * mirroring are not. If we need to repeat, we do:
223 *
224 * MUL temp, texcoord, <scaling factor constant>
225 * FRC temp, temp ; Discard integer portion of coords
226 *
227 * This gives us coords in [0, 1].
228 *
229 * Mirroring is trickier. We're going to start out like repeat:
230 *
231 * MUL temp, texcoord, <scaling factor constant> ; De-mirror across axes
232 * MUL temp, temp, 0.5 ; Pattern repeats in [0, 2]
233 * ; so scale to [0, 1]
234 * FRC temp, temp ; Make the pattern repeat
235 * MAD temp, temp, 2, -1 ; Move the pattern to [-1, 1]
236 * ADD temp, 1, -abs(temp) ; Now comes a neat trick: use abs to mirror the pattern.
237 * ; The pattern is backwards, so reverse it (1-x).
238 *
239 * This gives us coords in [0, 1].
240 *
241 * ~ C & M. ;)
242 */
243 if (inst->U.I.Opcode != RC_OPCODE_KIL &&
244 wrapmode != RC_WRAP_NONE) {
245 struct rc_instruction *inst_mov;
246 unsigned temp = rc_find_free_temporary(c);
247
248 if (wrapmode == RC_WRAP_REPEAT) {
249 /* Both instructions will be paired up. */
250 struct rc_instruction *inst_frc = rc_insert_new_instruction(c, inst->Prev);
251
252 inst_frc->U.I.Opcode = RC_OPCODE_FRC;
253 inst_frc->U.I.DstReg.File = RC_FILE_TEMPORARY;
254 inst_frc->U.I.DstReg.Index = temp;
255 inst_frc->U.I.DstReg.WriteMask = RC_MASK_XYZ;
256 inst_frc->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
257 } else if (wrapmode == RC_WRAP_MIRRORED_REPEAT) {
258 /*
259 * Function:
260 * f(v) = 1 - abs(frac(v * 0.5) * 2 - 1)
261 *
262 * Code:
263 * MUL temp, src0, 0.5
264 * FRC temp, temp
265 * MAD temp, temp, 2, -1
266 * ADD temp, 1, -abs(temp)
267 */
268
269 struct rc_instruction *inst_mul, *inst_frc, *inst_mad, *inst_add;
270 unsigned two, two_swizzle;
271
272 inst_mul = rc_insert_new_instruction(c, inst->Prev);
273
274 inst_mul->U.I.Opcode = RC_OPCODE_MUL;
275 inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
276 inst_mul->U.I.DstReg.Index = temp;
277 inst_mul->U.I.DstReg.WriteMask = RC_MASK_XYZ;
278 inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
279 inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_HHHH;
280
281 inst_frc = rc_insert_new_instruction(c, inst->Prev);
282
283 inst_frc->U.I.Opcode = RC_OPCODE_FRC;
284 inst_frc->U.I.DstReg.File = RC_FILE_TEMPORARY;
285 inst_frc->U.I.DstReg.Index = temp;
286 inst_frc->U.I.DstReg.WriteMask = RC_MASK_XYZ;
287 inst_frc->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
288 inst_frc->U.I.SrcReg[0].Index = temp;
289 inst_frc->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0;
290
291 two = rc_constants_add_immediate_scalar(&c->Program.Constants, 2, &two_swizzle);
292 inst_mad = rc_insert_new_instruction(c, inst->Prev);
293
294 inst_mad->U.I.Opcode = RC_OPCODE_MAD;
295 inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY;
296 inst_mad->U.I.DstReg.Index = temp;
297 inst_mad->U.I.DstReg.WriteMask = RC_MASK_XYZ;
298 inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
299 inst_mad->U.I.SrcReg[0].Index = temp;
300 inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0;
301 inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT;
302 inst_mad->U.I.SrcReg[1].Index = two;
303 inst_mad->U.I.SrcReg[1].Swizzle = two_swizzle;
304 inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_1111;
305 inst_mad->U.I.SrcReg[2].Negate = RC_MASK_XYZ;
306
307 inst_add = rc_insert_new_instruction(c, inst->Prev);
308
309 inst_add->U.I.Opcode = RC_OPCODE_ADD;
310 inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY;
311 inst_add->U.I.DstReg.Index = temp;
312 inst_add->U.I.DstReg.WriteMask = RC_MASK_XYZ;
313 inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111;
314 inst_add->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
315 inst_add->U.I.SrcReg[1].Index = temp;
316 inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZ0;
317 inst_add->U.I.SrcReg[1].Abs = 1;
318 inst_add->U.I.SrcReg[1].Negate = RC_MASK_XYZ;
319 } else if (wrapmode == RC_WRAP_MIRRORED_CLAMP) {
320 /*
321 * Mirrored clamp modes are bloody simple, we just use abs
322 * to mirror [0, 1] into [-1, 0]. This works for
323 * all modes i.e. CLAMP, CLAMP_TO_EDGE, and CLAMP_TO_BORDER.
324 */
325 struct rc_instruction *inst_mov;
326
327 inst_mov = rc_insert_new_instruction(c, inst->Prev);
328
329 inst_mov->U.I.Opcode = RC_OPCODE_MOV;
330 inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
331 inst_mov->U.I.DstReg.Index = temp;
332 inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZ;
333 inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
334 inst_mov->U.I.SrcReg[0].Abs = 1;
335 }
336
337 /* Preserve W for TXP/TXB. */
338 inst_mov = rc_insert_new_instruction(c, inst->Prev);
339
340 inst_mov->U.I.Opcode = RC_OPCODE_MOV;
341 inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
342 inst_mov->U.I.DstReg.Index = temp;
343 inst_mov->U.I.DstReg.WriteMask = RC_MASK_W;
344 inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
345
346 reset_srcreg(&inst->U.I.SrcReg[0]);
347 inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
348 inst->U.I.SrcReg[0].Index = temp;
349 }
350
351 /* Cannot write texture to output registers (all chips) or with masks (non-r500) */
352 if (inst->U.I.Opcode != RC_OPCODE_KIL &&
353 (inst->U.I.DstReg.File != RC_FILE_TEMPORARY ||
354 (!c->is_r500 && inst->U.I.DstReg.WriteMask != RC_MASK_XYZW))) {
355 struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst);
356
357 inst_mov->U.I.Opcode = RC_OPCODE_MOV;
358 inst_mov->U.I.DstReg = inst->U.I.DstReg;
359 inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
360 inst_mov->U.I.SrcReg[0].Index = rc_find_free_temporary(c);
361
362 inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
363 inst->U.I.DstReg.Index = inst_mov->U.I.SrcReg[0].Index;
364 inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
365 }
366
367 /* Cannot read texture coordinate from constants file */
368 if (inst->U.I.SrcReg[0].File != RC_FILE_TEMPORARY && inst->U.I.SrcReg[0].File != RC_FILE_INPUT) {
369 struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
370
371 inst_mov->U.I.Opcode = RC_OPCODE_MOV;
372 inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
373 inst_mov->U.I.DstReg.Index = rc_find_free_temporary(c);
374 inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
375
376 reset_srcreg(&inst->U.I.SrcReg[0]);
377 inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
378 inst->U.I.SrcReg[0].Index = inst_mov->U.I.DstReg.Index;
379 }
380
381 return 1;
382 }