r300/compiler: Use memory_pool_array_reserve in r500-fragprog_emit
[mesa.git] / src / mesa / drivers / dri / r300 / compiler / radeon_program_tex.c
1 /*
2 * Copyright (C) 2010 Corbin Simpson
3 *
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 */
27
28 #include "radeon_program_tex.h"
29
30 /* Series of transformations to be done on textures. */
31
32 static struct rc_src_register shadow_ambient(struct r300_fragment_program_compiler *compiler,
33 int tmu)
34 {
35 struct rc_src_register reg = { 0, };
36
37 if (compiler->enable_shadow_ambient) {
38 reg.File = RC_FILE_CONSTANT;
39 reg.Index = rc_constants_add_state(&compiler->Base.Program.Constants,
40 RC_STATE_SHADOW_AMBIENT, tmu);
41 reg.Swizzle = RC_SWIZZLE_WWWW;
42 } else {
43 reg.File = RC_FILE_NONE;
44 reg.Swizzle = RC_SWIZZLE_0000;
45 }
46 return reg;
47 }
48
49 static void lower_texture_rect(struct r300_fragment_program_compiler *compiler,
50 struct rc_instruction *inst)
51 {
52 struct rc_instruction *inst_rect;
53 unsigned temp = rc_find_free_temporary(&compiler->Base);
54
55 if (inst->U.I.TexSrcTarget == RC_TEXTURE_RECT ||
56 compiler->state.unit[inst->U.I.TexSrcUnit].non_normalized_coords) {
57 inst_rect = rc_insert_new_instruction(&compiler->Base, inst->Prev);
58
59 inst_rect->U.I.Opcode = RC_OPCODE_MUL;
60 inst_rect->U.I.DstReg.File = RC_FILE_TEMPORARY;
61 inst_rect->U.I.DstReg.Index = temp;
62 inst_rect->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
63 inst_rect->U.I.SrcReg[1].File = RC_FILE_CONSTANT;
64 inst_rect->U.I.SrcReg[1].Index =
65 rc_constants_add_state(&compiler->Base.Program.Constants,
66 RC_STATE_R300_TEXRECT_FACTOR, inst->U.I.TexSrcUnit);
67
68 reset_srcreg(&inst->U.I.SrcReg[0]);
69 inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
70 inst->U.I.SrcReg[0].Index = temp;
71
72 inst->U.I.TexSrcTarget = RC_TEXTURE_2D;
73 }
74 }
75
76 /**
77 * Transform TEX, TXP, TXB, and KIL instructions in the following ways:
78 * - implement texture compare (shadow extensions)
79 * - extract non-native source / destination operands
80 * - premultiply texture coordinates for RECT
81 * - extract operand swizzles
82 * - introduce a temporary register when write masks are needed
83 */
84 int radeonTransformTEX(
85 struct radeon_compiler * c,
86 struct rc_instruction * inst,
87 void* data)
88 {
89 struct r300_fragment_program_compiler *compiler =
90 (struct r300_fragment_program_compiler*)data;
91
92 if (inst->U.I.Opcode != RC_OPCODE_TEX &&
93 inst->U.I.Opcode != RC_OPCODE_TXB &&
94 inst->U.I.Opcode != RC_OPCODE_TXP &&
95 inst->U.I.Opcode != RC_OPCODE_KIL)
96 return 0;
97
98 /* ARB_shadow & EXT_shadow_funcs */
99 if (inst->U.I.Opcode != RC_OPCODE_KIL &&
100 c->Program.ShadowSamplers & (1 << inst->U.I.TexSrcUnit)) {
101 rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func;
102
103 if (comparefunc == RC_COMPARE_FUNC_NEVER || comparefunc == RC_COMPARE_FUNC_ALWAYS) {
104 inst->U.I.Opcode = RC_OPCODE_MOV;
105
106 if (comparefunc == RC_COMPARE_FUNC_ALWAYS) {
107 inst->U.I.SrcReg[0].File = RC_FILE_NONE;
108 inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111;
109 } else {
110 inst->U.I.SrcReg[0] = shadow_ambient(compiler, inst->U.I.TexSrcUnit);
111 }
112
113 return 1;
114 } else {
115 rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func;
116 unsigned int depthmode = compiler->state.unit[inst->U.I.TexSrcUnit].depth_texture_mode;
117 struct rc_instruction * inst_rcp = rc_insert_new_instruction(c, inst);
118 struct rc_instruction * inst_mad = rc_insert_new_instruction(c, inst_rcp);
119 struct rc_instruction * inst_cmp = rc_insert_new_instruction(c, inst_mad);
120 int pass, fail;
121
122 inst_rcp->U.I.Opcode = RC_OPCODE_RCP;
123 inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY;
124 inst_rcp->U.I.DstReg.Index = rc_find_free_temporary(c);
125 inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W;
126 inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
127 inst_rcp->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW;
128
129 inst_cmp->U.I.DstReg = inst->U.I.DstReg;
130 inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
131 inst->U.I.DstReg.Index = rc_find_free_temporary(c);
132 inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
133
134 inst_mad->U.I.Opcode = RC_OPCODE_MAD;
135 inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY;
136 inst_mad->U.I.DstReg.Index = rc_find_free_temporary(c);
137 inst_mad->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
138 inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_ZZZZ;
139 inst_mad->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
140 inst_mad->U.I.SrcReg[1].Index = inst_rcp->U.I.DstReg.Index;
141 inst_mad->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW;
142 inst_mad->U.I.SrcReg[2].File = RC_FILE_TEMPORARY;
143 inst_mad->U.I.SrcReg[2].Index = inst->U.I.DstReg.Index;
144 if (depthmode == 0) /* GL_LUMINANCE */
145 inst_mad->U.I.SrcReg[2].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_Z);
146 else if (depthmode == 2) /* GL_ALPHA */
147 inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_WWWW;
148
149 /* Recall that SrcReg[0] is tex, SrcReg[2] is r and:
150 * r < tex <=> -tex+r < 0
151 * r >= tex <=> not (-tex+r < 0 */
152 if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GEQUAL)
153 inst_mad->U.I.SrcReg[2].Negate = inst_mad->U.I.SrcReg[2].Negate ^ RC_MASK_XYZW;
154 else
155 inst_mad->U.I.SrcReg[0].Negate = inst_mad->U.I.SrcReg[0].Negate ^ RC_MASK_XYZW;
156
157 inst_cmp->U.I.Opcode = RC_OPCODE_CMP;
158 /* DstReg has been filled out above */
159 inst_cmp->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
160 inst_cmp->U.I.SrcReg[0].Index = inst_mad->U.I.DstReg.Index;
161
162 if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GREATER) {
163 pass = 1;
164 fail = 2;
165 } else {
166 pass = 2;
167 fail = 1;
168 }
169
170 inst_cmp->U.I.SrcReg[pass].File = RC_FILE_NONE;
171 inst_cmp->U.I.SrcReg[pass].Swizzle = RC_SWIZZLE_1111;
172 inst_cmp->U.I.SrcReg[fail] = shadow_ambient(compiler, inst->U.I.TexSrcUnit);
173 }
174 }
175
176 /* Texture wrap modes don't work on NPOT textures or texrects.
177 *
178 * The game plan is simple. We have two flags, fake_npot and
179 * non_normalized_coords, as well as a tex target. The RECT tex target
180 * will make the emitted code use non-scaled texcoords.
181 *
182 * Non-wrapped/clamped texcoords with NPOT are free in HW. Repeat and
183 * mirroring are not. If we need to repeat, we do:
184 *
185 * MUL temp, texcoord, <scaling factor constant>
186 * FRC temp, temp ; Discard integer portion of coords
187 *
188 * This gives us coords in [0, 1].
189 *
190 * Mirroring is trickier. We're going to start out like repeat:
191 *
192 * MUL temp, texcoord, <scaling factor constant> ; De-mirror across axes
193 * MUL temp, temp, 0.5 ; Pattern repeats in [0, 2]
194 * ; so scale to [0, 1]
195 * FRC temp, temp ; Make the pattern repeat
196 * MAD temp, temp, 2, -1 ; Move the pattern to [-1, 1]
197 * ADD temp, 1, -abs(temp) ; Now comes a neat trick: use abs to mirror the pattern.
198 * ; The pattern is backwards, so reverse it (1-x).
199 *
200 * This gives us coords in [0, 1].
201 *
202 * ~ C & M. ;)
203 */
204 if (inst->U.I.Opcode != RC_OPCODE_KIL &&
205 (inst->U.I.TexSrcTarget == RC_TEXTURE_RECT ||
206 compiler->state.unit[inst->U.I.TexSrcUnit].fake_npot ||
207 compiler->state.unit[inst->U.I.TexSrcUnit].non_normalized_coords)) {
208 rc_wrap_mode wrapmode = compiler->state.unit[inst->U.I.TexSrcUnit].wrap_mode;
209
210 /* R300 cannot sample from rectangles. */
211 if (!compiler->is_r500) {
212 lower_texture_rect(compiler, inst);
213 }
214
215 if (compiler->state.unit[inst->U.I.TexSrcUnit].fake_npot &&
216 wrapmode != RC_WRAP_NONE) {
217 struct rc_instruction *inst_mov;
218 unsigned temp = rc_find_free_temporary(c);
219
220 /* For NPOT fallback, we need normalized coordinates anyway. */
221 if (compiler->is_r500) {
222 lower_texture_rect(compiler, inst);
223 }
224
225 if (wrapmode == RC_WRAP_REPEAT) {
226 /* Both instructions will be paired up. */
227 struct rc_instruction *inst_frc = rc_insert_new_instruction(c, inst->Prev);
228
229 inst_frc->U.I.Opcode = RC_OPCODE_FRC;
230 inst_frc->U.I.DstReg.File = RC_FILE_TEMPORARY;
231 inst_frc->U.I.DstReg.Index = temp;
232 inst_frc->U.I.DstReg.WriteMask = RC_MASK_XYZ;
233 inst_frc->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
234 } else if (wrapmode == RC_WRAP_MIRRORED_REPEAT) {
235 /*
236 * Function:
237 * f(v) = 1 - abs(frac(v * 0.5) * 2 - 1)
238 *
239 * Code:
240 * MUL temp, src0, 0.5
241 * FRC temp, temp
242 * MAD temp, temp, 2, -1
243 * ADD temp, 1, -abs(temp)
244 */
245
246 struct rc_instruction *inst_mul, *inst_frc, *inst_mad, *inst_add;
247 unsigned two, two_swizzle;
248
249 inst_mul = rc_insert_new_instruction(c, inst->Prev);
250
251 inst_mul->U.I.Opcode = RC_OPCODE_MUL;
252 inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
253 inst_mul->U.I.DstReg.Index = temp;
254 inst_mul->U.I.DstReg.WriteMask = RC_MASK_XYZ;
255 inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
256 inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_HHHH;
257
258 inst_frc = rc_insert_new_instruction(c, inst->Prev);
259
260 inst_frc->U.I.Opcode = RC_OPCODE_FRC;
261 inst_frc->U.I.DstReg.File = RC_FILE_TEMPORARY;
262 inst_frc->U.I.DstReg.Index = temp;
263 inst_frc->U.I.DstReg.WriteMask = RC_MASK_XYZ;
264 inst_frc->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
265 inst_frc->U.I.SrcReg[0].Index = temp;
266 inst_frc->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0;
267
268 two = rc_constants_add_immediate_scalar(&c->Program.Constants, 2, &two_swizzle);
269 inst_mad = rc_insert_new_instruction(c, inst->Prev);
270
271 inst_mad->U.I.Opcode = RC_OPCODE_MAD;
272 inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY;
273 inst_mad->U.I.DstReg.Index = temp;
274 inst_mad->U.I.DstReg.WriteMask = RC_MASK_XYZ;
275 inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
276 inst_mad->U.I.SrcReg[0].Index = temp;
277 inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0;
278 inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT;
279 inst_mad->U.I.SrcReg[1].Index = two;
280 inst_mad->U.I.SrcReg[1].Swizzle = two_swizzle;
281 inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_1111;
282 inst_mad->U.I.SrcReg[2].Negate = RC_MASK_XYZ;
283
284 inst_add = rc_insert_new_instruction(c, inst->Prev);
285
286 inst_add->U.I.Opcode = RC_OPCODE_ADD;
287 inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY;
288 inst_add->U.I.DstReg.Index = temp;
289 inst_add->U.I.DstReg.WriteMask = RC_MASK_XYZ;
290 inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111;
291 inst_add->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
292 inst_add->U.I.SrcReg[1].Index = temp;
293 inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZ0;
294 inst_add->U.I.SrcReg[1].Abs = 1;
295 inst_add->U.I.SrcReg[1].Negate = RC_MASK_XYZ;
296 } else if (wrapmode == RC_WRAP_MIRRORED_CLAMP) {
297 /*
298 * Mirrored clamp modes are bloody simple, we just use abs
299 * to mirror [0, 1] into [-1, 0]. This works for
300 * all modes i.e. CLAMP, CLAMP_TO_EDGE, and CLAMP_TO_BORDER.
301 */
302 struct rc_instruction *inst_mov;
303
304 inst_mov = rc_insert_new_instruction(c, inst->Prev);
305
306 inst_mov->U.I.Opcode = RC_OPCODE_MOV;
307 inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
308 inst_mov->U.I.DstReg.Index = temp;
309 inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZ;
310 inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
311 inst_mov->U.I.SrcReg[0].Abs = 1;
312 }
313
314 /* Preserve W for TXP/TXB. */
315 inst_mov = rc_insert_new_instruction(c, inst->Prev);
316
317 inst_mov->U.I.Opcode = RC_OPCODE_MOV;
318 inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
319 inst_mov->U.I.DstReg.Index = temp;
320 inst_mov->U.I.DstReg.WriteMask = RC_MASK_W;
321 inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
322
323 reset_srcreg(&inst->U.I.SrcReg[0]);
324 inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
325 inst->U.I.SrcReg[0].Index = temp;
326 }
327 }
328
329 /* Cannot write texture to output registers (all chips) or with masks (non-r500) */
330 if (inst->U.I.Opcode != RC_OPCODE_KIL &&
331 (inst->U.I.DstReg.File != RC_FILE_TEMPORARY ||
332 (!compiler->is_r500 && inst->U.I.DstReg.WriteMask != RC_MASK_XYZW))) {
333 struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst);
334
335 inst_mov->U.I.Opcode = RC_OPCODE_MOV;
336 inst_mov->U.I.DstReg = inst->U.I.DstReg;
337 inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
338 inst_mov->U.I.SrcReg[0].Index = rc_find_free_temporary(c);
339
340 inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
341 inst->U.I.DstReg.Index = inst_mov->U.I.SrcReg[0].Index;
342 inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
343 }
344
345 /* Cannot read texture coordinate from constants file */
346 if (inst->U.I.SrcReg[0].File != RC_FILE_TEMPORARY && inst->U.I.SrcReg[0].File != RC_FILE_INPUT) {
347 struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
348
349 inst_mov->U.I.Opcode = RC_OPCODE_MOV;
350 inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
351 inst_mov->U.I.DstReg.Index = rc_find_free_temporary(c);
352 inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
353
354 reset_srcreg(&inst->U.I.SrcReg[0]);
355 inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
356 inst->U.I.SrcReg[0].Index = inst_mov->U.I.DstReg.Index;
357 }
358
359 return 1;
360 }