r300/compiler: Handle loops in rc_get_readers()
[mesa.git] / src / mesa / drivers / dri / r300 / compiler / radeon_program_tex.c
1 /*
2 * Copyright (C) 2010 Corbin Simpson
3 * Copyright (C) 2010 Marek Olšák <maraeo@gmail.com>
4 *
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining
8 * a copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sublicense, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial
17 * portions of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
23 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 */
28
29 #include "radeon_program_tex.h"
30
31 #include "radeon_compiler_util.h"
32
33 /* Series of transformations to be done on textures. */
34
35 static struct rc_src_register shadow_fail_value(struct r300_fragment_program_compiler *compiler,
36 int tmu)
37 {
38 struct rc_src_register reg = { 0, };
39
40 if (compiler->enable_shadow_ambient) {
41 reg.File = RC_FILE_CONSTANT;
42 reg.Index = rc_constants_add_state(&compiler->Base.Program.Constants,
43 RC_STATE_SHADOW_AMBIENT, tmu);
44 reg.Swizzle = RC_SWIZZLE_WWWW;
45 } else {
46 reg.File = RC_FILE_NONE;
47 reg.Swizzle = RC_SWIZZLE_0000;
48 }
49
50 reg.Swizzle = combine_swizzles(reg.Swizzle,
51 compiler->state.unit[tmu].texture_swizzle);
52 return reg;
53 }
54
55 static struct rc_src_register shadow_pass_value(struct r300_fragment_program_compiler *compiler,
56 int tmu)
57 {
58 struct rc_src_register reg = { 0, };
59
60 reg.File = RC_FILE_NONE;
61 reg.Swizzle = combine_swizzles(RC_SWIZZLE_1111,
62 compiler->state.unit[tmu].texture_swizzle);
63 return reg;
64 }
65
66 static void scale_texcoords(struct r300_fragment_program_compiler *compiler,
67 struct rc_instruction *inst,
68 unsigned state_constant)
69 {
70 struct rc_instruction *inst_mov;
71
72 unsigned temp = rc_find_free_temporary(&compiler->Base);
73
74 inst_mov = rc_insert_new_instruction(&compiler->Base, inst->Prev);
75
76 inst_mov->U.I.Opcode = RC_OPCODE_MUL;
77 inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
78 inst_mov->U.I.DstReg.Index = temp;
79 inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
80 inst_mov->U.I.SrcReg[1].File = RC_FILE_CONSTANT;
81 inst_mov->U.I.SrcReg[1].Index =
82 rc_constants_add_state(&compiler->Base.Program.Constants,
83 state_constant, inst->U.I.TexSrcUnit);
84
85 reset_srcreg(&inst->U.I.SrcReg[0]);
86 inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
87 inst->U.I.SrcReg[0].Index = temp;
88 }
89
90 static void projective_divide(struct r300_fragment_program_compiler *compiler,
91 struct rc_instruction *inst)
92 {
93 struct rc_instruction *inst_mul, *inst_rcp;
94
95 unsigned temp = rc_find_free_temporary(&compiler->Base);
96
97 inst_rcp = rc_insert_new_instruction(&compiler->Base, inst->Prev);
98 inst_rcp->U.I.Opcode = RC_OPCODE_RCP;
99 inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY;
100 inst_rcp->U.I.DstReg.Index = temp;
101 inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W;
102 inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
103 /* Because the input can be arbitrarily swizzled,
104 * read the component mapped to W. */
105 inst_rcp->U.I.SrcReg[0].Swizzle =
106 RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 3));
107
108 inst_mul = rc_insert_new_instruction(&compiler->Base, inst->Prev);
109 inst_mul->U.I.Opcode = RC_OPCODE_MUL;
110 inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
111 inst_mul->U.I.DstReg.Index = temp;
112 inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
113 inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
114 inst_mul->U.I.SrcReg[1].Index = temp;
115 inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW;
116
117 reset_srcreg(&inst->U.I.SrcReg[0]);
118 inst->U.I.Opcode = RC_OPCODE_TEX;
119 inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
120 inst->U.I.SrcReg[0].Index = temp;
121 }
122
123 /**
124 * Transform TEX, TXP, TXB, and KIL instructions in the following ways:
125 * - implement texture compare (shadow extensions)
126 * - extract non-native source / destination operands
127 * - premultiply texture coordinates for RECT
128 * - extract operand swizzles
129 * - introduce a temporary register when write masks are needed
130 */
131 int radeonTransformTEX(
132 struct radeon_compiler * c,
133 struct rc_instruction * inst,
134 void* data)
135 {
136 struct r300_fragment_program_compiler *compiler =
137 (struct r300_fragment_program_compiler*)data;
138 rc_wrap_mode wrapmode = compiler->state.unit[inst->U.I.TexSrcUnit].wrap_mode;
139 int is_rect = inst->U.I.TexSrcTarget == RC_TEXTURE_RECT ||
140 compiler->state.unit[inst->U.I.TexSrcUnit].non_normalized_coords;
141
142 if (inst->U.I.Opcode != RC_OPCODE_TEX &&
143 inst->U.I.Opcode != RC_OPCODE_TXB &&
144 inst->U.I.Opcode != RC_OPCODE_TXP &&
145 inst->U.I.Opcode != RC_OPCODE_KIL)
146 return 0;
147
148 /* ARB_shadow & EXT_shadow_funcs */
149 if (inst->U.I.Opcode != RC_OPCODE_KIL &&
150 ((c->Program.ShadowSamplers & (1 << inst->U.I.TexSrcUnit)) ||
151 (compiler->state.unit[inst->U.I.TexSrcUnit].compare_mode_enabled))) {
152 rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func;
153
154 if (comparefunc == RC_COMPARE_FUNC_NEVER || comparefunc == RC_COMPARE_FUNC_ALWAYS) {
155 inst->U.I.Opcode = RC_OPCODE_MOV;
156
157 if (comparefunc == RC_COMPARE_FUNC_ALWAYS) {
158 inst->U.I.SrcReg[0] = shadow_pass_value(compiler, inst->U.I.TexSrcUnit);
159 } else {
160 inst->U.I.SrcReg[0] = shadow_fail_value(compiler, inst->U.I.TexSrcUnit);
161 }
162
163 return 1;
164 } else {
165 struct rc_instruction * inst_rcp = NULL;
166 struct rc_instruction *inst_mul, *inst_add, *inst_cmp;
167 unsigned tmp_texsample;
168 unsigned tmp_sum;
169 int pass, fail;
170
171 /* Save the output register. */
172 struct rc_dst_register output_reg = inst->U.I.DstReg;
173 unsigned saturate_mode = inst->U.I.SaturateMode;
174
175 /* Redirect TEX to a new temp. */
176 tmp_texsample = rc_find_free_temporary(c);
177 inst->U.I.SaturateMode = 0;
178 inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
179 inst->U.I.DstReg.Index = tmp_texsample;
180 inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
181
182 tmp_sum = rc_find_free_temporary(c);
183
184 if (inst->U.I.Opcode == RC_OPCODE_TXP) {
185 /* Compute 1/W. */
186 inst_rcp = rc_insert_new_instruction(c, inst);
187 inst_rcp->U.I.Opcode = RC_OPCODE_RCP;
188 inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY;
189 inst_rcp->U.I.DstReg.Index = tmp_sum;
190 inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W;
191 inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
192 inst_rcp->U.I.SrcReg[0].Swizzle =
193 RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 3));
194 }
195
196 /* Divide Z by W (if it's TXP) and saturate. */
197 inst_mul = rc_insert_new_instruction(c, inst_rcp ? inst_rcp : inst);
198 inst_mul->U.I.Opcode = inst->U.I.Opcode == RC_OPCODE_TXP ? RC_OPCODE_MUL : RC_OPCODE_MOV;
199 inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
200 inst_mul->U.I.DstReg.Index = tmp_sum;
201 inst_mul->U.I.DstReg.WriteMask = RC_MASK_W;
202 inst_mul->U.I.SaturateMode = RC_SATURATE_ZERO_ONE;
203 inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
204 inst_mul->U.I.SrcReg[0].Swizzle =
205 RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 2));
206 if (inst->U.I.Opcode == RC_OPCODE_TXP) {
207 inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
208 inst_mul->U.I.SrcReg[1].Index = tmp_sum;
209 inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW;
210 }
211
212 /* Add the depth texture value. */
213 inst_add = rc_insert_new_instruction(c, inst_mul);
214 inst_add->U.I.Opcode = RC_OPCODE_ADD;
215 inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY;
216 inst_add->U.I.DstReg.Index = tmp_sum;
217 inst_add->U.I.DstReg.WriteMask = RC_MASK_W;
218 inst_add->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
219 inst_add->U.I.SrcReg[0].Index = tmp_sum;
220 inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW;
221 inst_add->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
222 inst_add->U.I.SrcReg[1].Index = tmp_texsample;
223 inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XXXX;
224
225 /* Note that SrcReg[0] is r, SrcReg[1] is tex and:
226 * LESS: r < tex <=> -tex+r < 0
227 * GEQUAL: r >= tex <=> not (-tex+r < 0)
228 * GREATER: r > tex <=> tex-r < 0
229 * LEQUAL: r <= tex <=> not ( tex-r < 0)
230 * EQUAL: GEQUAL
231 * NOTEQUAL:LESS
232 */
233
234 /* This negates either r or tex: */
235 if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GEQUAL ||
236 comparefunc == RC_COMPARE_FUNC_EQUAL || comparefunc == RC_COMPARE_FUNC_NOTEQUAL)
237 inst_add->U.I.SrcReg[1].Negate = inst_add->U.I.SrcReg[1].Negate ^ RC_MASK_XYZW;
238 else
239 inst_add->U.I.SrcReg[0].Negate = inst_add->U.I.SrcReg[0].Negate ^ RC_MASK_XYZW;
240
241 /* This negates the whole expresion: */
242 if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GREATER ||
243 comparefunc == RC_COMPARE_FUNC_NOTEQUAL) {
244 pass = 1;
245 fail = 2;
246 } else {
247 pass = 2;
248 fail = 1;
249 }
250
251 inst_cmp = rc_insert_new_instruction(c, inst_add);
252 inst_cmp->U.I.Opcode = RC_OPCODE_CMP;
253 inst_cmp->U.I.SaturateMode = saturate_mode;
254 inst_cmp->U.I.DstReg = output_reg;
255 inst_cmp->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
256 inst_cmp->U.I.SrcReg[0].Index = tmp_sum;
257 inst_cmp->U.I.SrcReg[0].Swizzle =
258 combine_swizzles(RC_SWIZZLE_WWWW,
259 compiler->state.unit[inst->U.I.TexSrcUnit].texture_swizzle);
260 inst_cmp->U.I.SrcReg[pass] = shadow_pass_value(compiler, inst->U.I.TexSrcUnit);
261 inst_cmp->U.I.SrcReg[fail] = shadow_fail_value(compiler, inst->U.I.TexSrcUnit);
262
263 assert(tmp_texsample != tmp_sum);
264 }
265 }
266
267 /* R300 cannot sample from rectangles and the wrap mode fallback needs
268 * normalized coordinates anyway. */
269 if (inst->U.I.Opcode != RC_OPCODE_KIL &&
270 is_rect && (!c->is_r500 || wrapmode != RC_WRAP_NONE)) {
271 scale_texcoords(compiler, inst, RC_STATE_R300_TEXRECT_FACTOR);
272 inst->U.I.TexSrcTarget = RC_TEXTURE_2D;
273 }
274
275 /* Divide by W if needed. */
276 if (inst->U.I.Opcode == RC_OPCODE_TXP &&
277 (wrapmode == RC_WRAP_REPEAT || wrapmode == RC_WRAP_MIRRORED_REPEAT ||
278 compiler->state.unit[inst->U.I.TexSrcUnit].clamp_and_scale_before_fetch)) {
279 projective_divide(compiler, inst);
280 }
281
282 /* Texture wrap modes don't work on NPOT textures.
283 *
284 * Non-wrapped/clamped texcoords with NPOT are free in HW. Repeat and
285 * mirroring are not. If we need to repeat, we do:
286 *
287 * MUL temp, texcoord, <scaling factor constant>
288 * FRC temp, temp ; Discard integer portion of coords
289 *
290 * This gives us coords in [0, 1].
291 *
292 * Mirroring is trickier. We're going to start out like repeat:
293 *
294 * MUL temp, texcoord, <scaling factor constant> ; De-mirror across axes
295 * MUL temp, temp, 0.5 ; Pattern repeats in [0, 2]
296 * ; so scale to [0, 1]
297 * FRC temp, temp ; Make the pattern repeat
298 * MAD temp, temp, 2, -1 ; Move the pattern to [-1, 1]
299 * ADD temp, 1, -abs(temp) ; Now comes a neat trick: use abs to mirror the pattern.
300 * ; The pattern is backwards, so reverse it (1-x).
301 *
302 * This gives us coords in [0, 1].
303 *
304 * ~ C & M. ;)
305 */
306 if (inst->U.I.Opcode != RC_OPCODE_KIL &&
307 wrapmode != RC_WRAP_NONE) {
308 struct rc_instruction *inst_mov;
309 unsigned temp = rc_find_free_temporary(c);
310
311 if (wrapmode == RC_WRAP_REPEAT) {
312 /* Both instructions will be paired up. */
313 struct rc_instruction *inst_frc = rc_insert_new_instruction(c, inst->Prev);
314
315 inst_frc->U.I.Opcode = RC_OPCODE_FRC;
316 inst_frc->U.I.DstReg.File = RC_FILE_TEMPORARY;
317 inst_frc->U.I.DstReg.Index = temp;
318 inst_frc->U.I.DstReg.WriteMask = RC_MASK_XYZ;
319 inst_frc->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
320 } else if (wrapmode == RC_WRAP_MIRRORED_REPEAT) {
321 /*
322 * Function:
323 * f(v) = 1 - abs(frac(v * 0.5) * 2 - 1)
324 *
325 * Code:
326 * MUL temp, src0, 0.5
327 * FRC temp, temp
328 * MAD temp, temp, 2, -1
329 * ADD temp, 1, -abs(temp)
330 */
331
332 struct rc_instruction *inst_mul, *inst_frc, *inst_mad, *inst_add;
333 unsigned two, two_swizzle;
334
335 inst_mul = rc_insert_new_instruction(c, inst->Prev);
336
337 inst_mul->U.I.Opcode = RC_OPCODE_MUL;
338 inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
339 inst_mul->U.I.DstReg.Index = temp;
340 inst_mul->U.I.DstReg.WriteMask = RC_MASK_XYZ;
341 inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
342 inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_HHHH;
343
344 inst_frc = rc_insert_new_instruction(c, inst->Prev);
345
346 inst_frc->U.I.Opcode = RC_OPCODE_FRC;
347 inst_frc->U.I.DstReg.File = RC_FILE_TEMPORARY;
348 inst_frc->U.I.DstReg.Index = temp;
349 inst_frc->U.I.DstReg.WriteMask = RC_MASK_XYZ;
350 inst_frc->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
351 inst_frc->U.I.SrcReg[0].Index = temp;
352 inst_frc->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0;
353
354 two = rc_constants_add_immediate_scalar(&c->Program.Constants, 2, &two_swizzle);
355 inst_mad = rc_insert_new_instruction(c, inst->Prev);
356
357 inst_mad->U.I.Opcode = RC_OPCODE_MAD;
358 inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY;
359 inst_mad->U.I.DstReg.Index = temp;
360 inst_mad->U.I.DstReg.WriteMask = RC_MASK_XYZ;
361 inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
362 inst_mad->U.I.SrcReg[0].Index = temp;
363 inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0;
364 inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT;
365 inst_mad->U.I.SrcReg[1].Index = two;
366 inst_mad->U.I.SrcReg[1].Swizzle = two_swizzle;
367 inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_1111;
368 inst_mad->U.I.SrcReg[2].Negate = RC_MASK_XYZ;
369
370 inst_add = rc_insert_new_instruction(c, inst->Prev);
371
372 inst_add->U.I.Opcode = RC_OPCODE_ADD;
373 inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY;
374 inst_add->U.I.DstReg.Index = temp;
375 inst_add->U.I.DstReg.WriteMask = RC_MASK_XYZ;
376 inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111;
377 inst_add->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
378 inst_add->U.I.SrcReg[1].Index = temp;
379 inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZ0;
380 inst_add->U.I.SrcReg[1].Abs = 1;
381 inst_add->U.I.SrcReg[1].Negate = RC_MASK_XYZ;
382 } else if (wrapmode == RC_WRAP_MIRRORED_CLAMP) {
383 /*
384 * Mirrored clamp modes are bloody simple, we just use abs
385 * to mirror [0, 1] into [-1, 0]. This works for
386 * all modes i.e. CLAMP, CLAMP_TO_EDGE, and CLAMP_TO_BORDER.
387 */
388 struct rc_instruction *inst_mov;
389
390 inst_mov = rc_insert_new_instruction(c, inst->Prev);
391
392 inst_mov->U.I.Opcode = RC_OPCODE_MOV;
393 inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
394 inst_mov->U.I.DstReg.Index = temp;
395 inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZ;
396 inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
397 inst_mov->U.I.SrcReg[0].Abs = 1;
398 }
399
400 /* Preserve W for TXP/TXB. */
401 inst_mov = rc_insert_new_instruction(c, inst->Prev);
402
403 inst_mov->U.I.Opcode = RC_OPCODE_MOV;
404 inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
405 inst_mov->U.I.DstReg.Index = temp;
406 inst_mov->U.I.DstReg.WriteMask = RC_MASK_W;
407 inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
408
409 reset_srcreg(&inst->U.I.SrcReg[0]);
410 inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
411 inst->U.I.SrcReg[0].Index = temp;
412 }
413
414 /* NPOT -> POT conversion for 3D textures. */
415 if (inst->U.I.Opcode != RC_OPCODE_KIL &&
416 compiler->state.unit[inst->U.I.TexSrcUnit].clamp_and_scale_before_fetch) {
417 struct rc_instruction *inst_mov;
418 unsigned temp = rc_find_free_temporary(c);
419
420 /* Saturate XYZ. */
421 inst_mov = rc_insert_new_instruction(c, inst->Prev);
422 inst_mov->U.I.Opcode = RC_OPCODE_MOV;
423 inst_mov->U.I.SaturateMode = RC_SATURATE_ZERO_ONE;
424 inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
425 inst_mov->U.I.DstReg.Index = temp;
426 inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZ;
427 inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
428
429 /* Copy W. */
430 inst_mov = rc_insert_new_instruction(c, inst->Prev);
431 inst_mov->U.I.Opcode = RC_OPCODE_MOV;
432 inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
433 inst_mov->U.I.DstReg.Index = temp;
434 inst_mov->U.I.DstReg.WriteMask = RC_MASK_W;
435 inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
436
437 reset_srcreg(&inst->U.I.SrcReg[0]);
438 inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
439 inst->U.I.SrcReg[0].Index = temp;
440
441 scale_texcoords(compiler, inst, RC_STATE_R300_TEXSCALE_FACTOR);
442 }
443
444 /* Convert SNORM-encoded ATI1N sampled as UNORM to SNORM.
445 * Formula: dst = tex > 0.5 ? tex*2-2 : tex*2
446 */
447 if (inst->U.I.Opcode != RC_OPCODE_KIL &&
448 compiler->state.unit[inst->U.I.TexSrcUnit].convert_unorm_to_snorm) {
449 unsigned two, two_swizzle;
450 struct rc_instruction *inst_mul, *inst_mad, *inst_cnd;
451
452 two = rc_constants_add_immediate_scalar(&c->Program.Constants, 2.35, &two_swizzle);
453
454 inst_mul = rc_insert_new_instruction(c, inst);
455 inst_mul->U.I.Opcode = RC_OPCODE_MUL;
456 inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
457 inst_mul->U.I.DstReg.Index = rc_find_free_temporary(c);
458 inst_mul->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
459 inst_mul->U.I.SrcReg[0].Index = rc_find_free_temporary(c); /* redirected TEX output */
460 inst_mul->U.I.SrcReg[1].File = RC_FILE_CONSTANT; /* 2 */
461 inst_mul->U.I.SrcReg[1].Index = two;
462 inst_mul->U.I.SrcReg[1].Swizzle = two_swizzle;
463
464 inst_mad = rc_insert_new_instruction(c, inst_mul);
465 inst_mad->U.I.Opcode = RC_OPCODE_MAD;
466 inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY;
467 inst_mad->U.I.DstReg.Index = rc_find_free_temporary(c);
468 inst_mad->U.I.SrcReg[0] = inst_mul->U.I.SrcReg[0]; /* redirected TEX output */
469 inst_mad->U.I.SrcReg[1] = inst_mul->U.I.SrcReg[1]; /* 2 */
470 inst_mad->U.I.SrcReg[2] = inst_mul->U.I.SrcReg[1]; /* 2 */
471 inst_mad->U.I.SrcReg[2].Negate = RC_MASK_XYZW;
472
473 inst_cnd = rc_insert_new_instruction(c, inst_mad);
474 inst_cnd->U.I.Opcode = RC_OPCODE_CND;
475 inst_cnd->U.I.SaturateMode = inst->U.I.SaturateMode;
476 inst_cnd->U.I.DstReg = inst->U.I.DstReg;
477 inst_cnd->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
478 inst_cnd->U.I.SrcReg[0].Index = inst_mad->U.I.DstReg.Index;
479 inst_cnd->U.I.SrcReg[0].Swizzle = compiler->state.unit[inst->U.I.TexSrcUnit].texture_swizzle;
480 inst_cnd->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
481 inst_cnd->U.I.SrcReg[1].Index = inst_mul->U.I.DstReg.Index;
482 inst_cnd->U.I.SrcReg[1].Swizzle = compiler->state.unit[inst->U.I.TexSrcUnit].texture_swizzle;
483 inst_cnd->U.I.SrcReg[2] = inst_mul->U.I.SrcReg[0]; /* redirected TEX output */
484
485 inst->U.I.SaturateMode = 0;
486 inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
487 inst->U.I.DstReg.Index = inst_mul->U.I.SrcReg[0].Index;
488 inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
489 }
490
491 /* Cannot write texture to output registers or with saturate (all chips),
492 * or with masks (non-r500). */
493 if (inst->U.I.Opcode != RC_OPCODE_KIL &&
494 (inst->U.I.DstReg.File != RC_FILE_TEMPORARY ||
495 inst->U.I.SaturateMode ||
496 (!c->is_r500 && inst->U.I.DstReg.WriteMask != RC_MASK_XYZW))) {
497 struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst);
498
499 inst_mov->U.I.Opcode = RC_OPCODE_MOV;
500 inst_mov->U.I.SaturateMode = inst->U.I.SaturateMode;
501 inst_mov->U.I.DstReg = inst->U.I.DstReg;
502 inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
503 inst_mov->U.I.SrcReg[0].Index = rc_find_free_temporary(c);
504
505 inst->U.I.SaturateMode = 0;
506 inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
507 inst->U.I.DstReg.Index = inst_mov->U.I.SrcReg[0].Index;
508 inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
509 }
510
511 /* Cannot read texture coordinate from constants file */
512 if (inst->U.I.SrcReg[0].File != RC_FILE_TEMPORARY && inst->U.I.SrcReg[0].File != RC_FILE_INPUT) {
513 struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
514
515 inst_mov->U.I.Opcode = RC_OPCODE_MOV;
516 inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
517 inst_mov->U.I.DstReg.Index = rc_find_free_temporary(c);
518 inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
519
520 reset_srcreg(&inst->U.I.SrcReg[0]);
521 inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
522 inst->U.I.SrcReg[0].Index = inst_mov->U.I.DstReg.Index;
523 }
524
525 return 1;
526 }