r300/compiler: Implement KILP opcode.
[mesa.git] / src / mesa / drivers / dri / r300 / compiler / radeon_program_alu.c
1 /*
2 * Copyright (C) 2008 Nicolai Haehnle.
3 *
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 */
27
28 /**
29 * @file
30 *
31 * Shareable transformations that transform "special" ALU instructions
32 * into ALU instructions that are supported by hardware.
33 *
34 */
35
36 #include "radeon_program_alu.h"
37
38 #include "radeon_compiler.h"
39
40
41 static struct rc_instruction *emit1(
42 struct radeon_compiler * c, struct rc_instruction * after,
43 rc_opcode Opcode, rc_saturate_mode Saturate, struct rc_dst_register DstReg,
44 struct rc_src_register SrcReg)
45 {
46 struct rc_instruction *fpi = rc_insert_new_instruction(c, after);
47
48 fpi->U.I.Opcode = Opcode;
49 fpi->U.I.SaturateMode = Saturate;
50 fpi->U.I.DstReg = DstReg;
51 fpi->U.I.SrcReg[0] = SrcReg;
52 return fpi;
53 }
54
55 static struct rc_instruction *emit2(
56 struct radeon_compiler * c, struct rc_instruction * after,
57 rc_opcode Opcode, rc_saturate_mode Saturate, struct rc_dst_register DstReg,
58 struct rc_src_register SrcReg0, struct rc_src_register SrcReg1)
59 {
60 struct rc_instruction *fpi = rc_insert_new_instruction(c, after);
61
62 fpi->U.I.Opcode = Opcode;
63 fpi->U.I.SaturateMode = Saturate;
64 fpi->U.I.DstReg = DstReg;
65 fpi->U.I.SrcReg[0] = SrcReg0;
66 fpi->U.I.SrcReg[1] = SrcReg1;
67 return fpi;
68 }
69
70 static struct rc_instruction *emit3(
71 struct radeon_compiler * c, struct rc_instruction * after,
72 rc_opcode Opcode, rc_saturate_mode Saturate, struct rc_dst_register DstReg,
73 struct rc_src_register SrcReg0, struct rc_src_register SrcReg1,
74 struct rc_src_register SrcReg2)
75 {
76 struct rc_instruction *fpi = rc_insert_new_instruction(c, after);
77
78 fpi->U.I.Opcode = Opcode;
79 fpi->U.I.SaturateMode = Saturate;
80 fpi->U.I.DstReg = DstReg;
81 fpi->U.I.SrcReg[0] = SrcReg0;
82 fpi->U.I.SrcReg[1] = SrcReg1;
83 fpi->U.I.SrcReg[2] = SrcReg2;
84 return fpi;
85 }
86
87 static struct rc_dst_register dstreg(int file, int index)
88 {
89 struct rc_dst_register dst;
90 dst.File = file;
91 dst.Index = index;
92 dst.WriteMask = RC_MASK_XYZW;
93 dst.RelAddr = 0;
94 return dst;
95 }
96
97 static struct rc_dst_register dstregtmpmask(int index, int mask)
98 {
99 struct rc_dst_register dst = {0};
100 dst.File = RC_FILE_TEMPORARY;
101 dst.Index = index;
102 dst.WriteMask = mask;
103 dst.RelAddr = 0;
104 return dst;
105 }
106
107 static const struct rc_src_register builtin_zero = {
108 .File = RC_FILE_NONE,
109 .Index = 0,
110 .Swizzle = RC_SWIZZLE_0000
111 };
112 static const struct rc_src_register builtin_one = {
113 .File = RC_FILE_NONE,
114 .Index = 0,
115 .Swizzle = RC_SWIZZLE_1111
116 };
117 static const struct rc_src_register srcreg_undefined = {
118 .File = RC_FILE_NONE,
119 .Index = 0,
120 .Swizzle = RC_SWIZZLE_XYZW
121 };
122
123 static struct rc_src_register srcreg(int file, int index)
124 {
125 struct rc_src_register src = srcreg_undefined;
126 src.File = file;
127 src.Index = index;
128 return src;
129 }
130
131 static struct rc_src_register srcregswz(int file, int index, int swz)
132 {
133 struct rc_src_register src = srcreg_undefined;
134 src.File = file;
135 src.Index = index;
136 src.Swizzle = swz;
137 return src;
138 }
139
140 static struct rc_src_register absolute(struct rc_src_register reg)
141 {
142 struct rc_src_register newreg = reg;
143 newreg.Abs = 1;
144 newreg.Negate = RC_MASK_NONE;
145 return newreg;
146 }
147
148 static struct rc_src_register negate(struct rc_src_register reg)
149 {
150 struct rc_src_register newreg = reg;
151 newreg.Negate = newreg.Negate ^ RC_MASK_XYZW;
152 return newreg;
153 }
154
155 static struct rc_src_register swizzle(struct rc_src_register reg,
156 rc_swizzle x, rc_swizzle y, rc_swizzle z, rc_swizzle w)
157 {
158 struct rc_src_register swizzled = reg;
159 swizzled.Swizzle = combine_swizzles4(reg.Swizzle, x, y, z, w);
160 return swizzled;
161 }
162
163 static struct rc_src_register swizzle_smear(struct rc_src_register reg,
164 rc_swizzle x)
165 {
166 return swizzle(reg, x, x, x, x);
167 }
168
169 static struct rc_src_register swizzle_xxxx(struct rc_src_register reg)
170 {
171 return swizzle_smear(reg, RC_SWIZZLE_X);
172 }
173
174 static struct rc_src_register swizzle_yyyy(struct rc_src_register reg)
175 {
176 return swizzle_smear(reg, RC_SWIZZLE_Y);
177 }
178
179 static struct rc_src_register swizzle_zzzz(struct rc_src_register reg)
180 {
181 return swizzle_smear(reg, RC_SWIZZLE_Z);
182 }
183
184 static struct rc_src_register swizzle_wwww(struct rc_src_register reg)
185 {
186 return swizzle_smear(reg, RC_SWIZZLE_W);
187 }
188
189 static void transform_ABS(struct radeon_compiler* c,
190 struct rc_instruction* inst)
191 {
192 struct rc_src_register src = inst->U.I.SrcReg[0];
193 src.Abs = 1;
194 src.Negate = RC_MASK_NONE;
195 emit1(c, inst->Prev, RC_OPCODE_MOV, inst->U.I.SaturateMode, inst->U.I.DstReg, src);
196 rc_remove_instruction(inst);
197 }
198
199 static void transform_CEIL(struct radeon_compiler* c,
200 struct rc_instruction* inst)
201 {
202 /* Assuming:
203 * ceil(x) = -floor(-x)
204 *
205 * After inlining floor:
206 * ceil(x) = -(-x-frac(-x))
207 *
208 * After simplification:
209 * ceil(x) = x+frac(-x)
210 */
211
212 int tempreg = rc_find_free_temporary(c);
213 emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstreg(RC_FILE_TEMPORARY, tempreg), negate(inst->U.I.SrcReg[0]));
214 emit2(c, inst->Prev, RC_OPCODE_ADD, inst->U.I.SaturateMode, inst->U.I.DstReg,
215 inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, tempreg));
216 rc_remove_instruction(inst);
217 }
218
219 static void transform_DP3(struct radeon_compiler* c,
220 struct rc_instruction* inst)
221 {
222 struct rc_src_register src0 = inst->U.I.SrcReg[0];
223 struct rc_src_register src1 = inst->U.I.SrcReg[1];
224 src0.Negate &= ~RC_MASK_W;
225 src0.Swizzle &= ~(7 << (3 * 3));
226 src0.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3);
227 src1.Negate &= ~RC_MASK_W;
228 src1.Swizzle &= ~(7 << (3 * 3));
229 src1.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3);
230 emit2(c, inst->Prev, RC_OPCODE_DP4, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, src1);
231 rc_remove_instruction(inst);
232 }
233
234 static void transform_DPH(struct radeon_compiler* c,
235 struct rc_instruction* inst)
236 {
237 struct rc_src_register src0 = inst->U.I.SrcReg[0];
238 src0.Negate &= ~RC_MASK_W;
239 src0.Swizzle &= ~(7 << (3 * 3));
240 src0.Swizzle |= RC_SWIZZLE_ONE << (3 * 3);
241 emit2(c, inst->Prev, RC_OPCODE_DP4, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, inst->U.I.SrcReg[1]);
242 rc_remove_instruction(inst);
243 }
244
245 /**
246 * [1, src0.y*src1.y, src0.z, src1.w]
247 * So basically MUL with lotsa swizzling.
248 */
249 static void transform_DST(struct radeon_compiler* c,
250 struct rc_instruction* inst)
251 {
252 emit2(c, inst->Prev, RC_OPCODE_MUL, inst->U.I.SaturateMode, inst->U.I.DstReg,
253 swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_ONE, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ONE),
254 swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_ONE, RC_SWIZZLE_Y, RC_SWIZZLE_ONE, RC_SWIZZLE_W));
255 rc_remove_instruction(inst);
256 }
257
258 static void transform_FLR(struct radeon_compiler* c,
259 struct rc_instruction* inst)
260 {
261 int tempreg = rc_find_free_temporary(c);
262 emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0]);
263 emit2(c, inst->Prev, RC_OPCODE_ADD, inst->U.I.SaturateMode, inst->U.I.DstReg,
264 inst->U.I.SrcReg[0], negate(srcreg(RC_FILE_TEMPORARY, tempreg)));
265 rc_remove_instruction(inst);
266 }
267
268 /**
269 * Definition of LIT (from ARB_fragment_program):
270 *
271 * tmp = VectorLoad(op0);
272 * if (tmp.x < 0) tmp.x = 0;
273 * if (tmp.y < 0) tmp.y = 0;
274 * if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon);
275 * else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon;
276 * result.x = 1.0;
277 * result.y = tmp.x;
278 * result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0;
279 * result.w = 1.0;
280 *
281 * The longest path of computation is the one leading to result.z,
282 * consisting of 5 operations. This implementation of LIT takes
283 * 5 slots, if the subsequent optimization passes are clever enough
284 * to pair instructions correctly.
285 */
286 static void transform_LIT(struct radeon_compiler* c,
287 struct rc_instruction* inst)
288 {
289 unsigned int constant;
290 unsigned int constant_swizzle;
291 unsigned int temp;
292 struct rc_src_register srctemp;
293
294 constant = rc_constants_add_immediate_scalar(&c->Program.Constants, -127.999999, &constant_swizzle);
295
296 if (inst->U.I.DstReg.WriteMask != RC_MASK_XYZW || inst->U.I.DstReg.File != RC_FILE_TEMPORARY) {
297 struct rc_instruction * inst_mov;
298
299 inst_mov = emit1(c, inst,
300 RC_OPCODE_MOV, 0, inst->U.I.DstReg,
301 srcreg(RC_FILE_TEMPORARY, rc_find_free_temporary(c)));
302
303 inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
304 inst->U.I.DstReg.Index = inst_mov->U.I.SrcReg[0].Index;
305 inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
306 }
307
308 temp = inst->U.I.DstReg.Index;
309 srctemp = srcreg(RC_FILE_TEMPORARY, temp);
310
311 /* tmp.x = max(0.0, Src.x); */
312 /* tmp.y = max(0.0, Src.y); */
313 /* tmp.w = clamp(Src.z, -128+eps, 128-eps); */
314 emit2(c, inst->Prev, RC_OPCODE_MAX, 0,
315 dstregtmpmask(temp, RC_MASK_XYW),
316 inst->U.I.SrcReg[0],
317 swizzle(srcreg(RC_FILE_CONSTANT, constant),
318 RC_SWIZZLE_ZERO, RC_SWIZZLE_ZERO, RC_SWIZZLE_ZERO, constant_swizzle&3));
319 emit2(c, inst->Prev, RC_OPCODE_MIN, 0,
320 dstregtmpmask(temp, RC_MASK_Z),
321 swizzle_wwww(srctemp),
322 negate(srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle)));
323
324 /* tmp.w = Pow(tmp.y, tmp.w) */
325 emit1(c, inst->Prev, RC_OPCODE_LG2, 0,
326 dstregtmpmask(temp, RC_MASK_W),
327 swizzle_yyyy(srctemp));
328 emit2(c, inst->Prev, RC_OPCODE_MUL, 0,
329 dstregtmpmask(temp, RC_MASK_W),
330 swizzle_wwww(srctemp),
331 swizzle_zzzz(srctemp));
332 emit1(c, inst->Prev, RC_OPCODE_EX2, 0,
333 dstregtmpmask(temp, RC_MASK_W),
334 swizzle_wwww(srctemp));
335
336 /* tmp.z = (tmp.x > 0) ? tmp.w : 0.0 */
337 emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode,
338 dstregtmpmask(temp, RC_MASK_Z),
339 negate(swizzle_xxxx(srctemp)),
340 swizzle_wwww(srctemp),
341 builtin_zero);
342
343 /* tmp.x, tmp.y, tmp.w = 1.0, tmp.x, 1.0 */
344 emit1(c, inst->Prev, RC_OPCODE_MOV, inst->U.I.SaturateMode,
345 dstregtmpmask(temp, RC_MASK_XYW),
346 swizzle(srctemp, RC_SWIZZLE_ONE, RC_SWIZZLE_X, RC_SWIZZLE_ONE, RC_SWIZZLE_ONE));
347
348 rc_remove_instruction(inst);
349 }
350
351 static void transform_LRP(struct radeon_compiler* c,
352 struct rc_instruction* inst)
353 {
354 int tempreg = rc_find_free_temporary(c);
355
356 emit2(c, inst->Prev, RC_OPCODE_ADD, 0,
357 dstreg(RC_FILE_TEMPORARY, tempreg),
358 inst->U.I.SrcReg[1], negate(inst->U.I.SrcReg[2]));
359 emit3(c, inst->Prev, RC_OPCODE_MAD, inst->U.I.SaturateMode,
360 inst->U.I.DstReg,
361 inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[2]);
362
363 rc_remove_instruction(inst);
364 }
365
366 static void transform_POW(struct radeon_compiler* c,
367 struct rc_instruction* inst)
368 {
369 int tempreg = rc_find_free_temporary(c);
370 struct rc_dst_register tempdst = dstreg(RC_FILE_TEMPORARY, tempreg);
371 struct rc_src_register tempsrc = srcreg(RC_FILE_TEMPORARY, tempreg);
372 tempdst.WriteMask = RC_MASK_W;
373 tempsrc.Swizzle = RC_SWIZZLE_WWWW;
374
375 emit1(c, inst->Prev, RC_OPCODE_LG2, 0, tempdst, swizzle_xxxx(inst->U.I.SrcReg[0]));
376 emit2(c, inst->Prev, RC_OPCODE_MUL, 0, tempdst, tempsrc, swizzle_xxxx(inst->U.I.SrcReg[1]));
377 emit1(c, inst->Prev, RC_OPCODE_EX2, inst->U.I.SaturateMode, inst->U.I.DstReg, tempsrc);
378
379 rc_remove_instruction(inst);
380 }
381
382 static void transform_RSQ(struct radeon_compiler* c,
383 struct rc_instruction* inst)
384 {
385 inst->U.I.SrcReg[0] = absolute(inst->U.I.SrcReg[0]);
386 }
387
388 static void transform_SEQ(struct radeon_compiler* c,
389 struct rc_instruction* inst)
390 {
391 int tempreg = rc_find_free_temporary(c);
392
393 emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
394 emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
395 negate(absolute(srcreg(RC_FILE_TEMPORARY, tempreg))), builtin_zero, builtin_one);
396
397 rc_remove_instruction(inst);
398 }
399
400 static void transform_SFL(struct radeon_compiler* c,
401 struct rc_instruction* inst)
402 {
403 emit1(c, inst->Prev, RC_OPCODE_MOV, inst->U.I.SaturateMode, inst->U.I.DstReg, builtin_zero);
404 rc_remove_instruction(inst);
405 }
406
407 static void transform_SGE(struct radeon_compiler* c,
408 struct rc_instruction* inst)
409 {
410 int tempreg = rc_find_free_temporary(c);
411
412 emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
413 emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
414 srcreg(RC_FILE_TEMPORARY, tempreg), builtin_zero, builtin_one);
415
416 rc_remove_instruction(inst);
417 }
418
419 static void transform_SGT(struct radeon_compiler* c,
420 struct rc_instruction* inst)
421 {
422 int tempreg = rc_find_free_temporary(c);
423
424 emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]);
425 emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
426 srcreg(RC_FILE_TEMPORARY, tempreg), builtin_one, builtin_zero);
427
428 rc_remove_instruction(inst);
429 }
430
431 static void transform_SLE(struct radeon_compiler* c,
432 struct rc_instruction* inst)
433 {
434 int tempreg = rc_find_free_temporary(c);
435
436 emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]);
437 emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
438 srcreg(RC_FILE_TEMPORARY, tempreg), builtin_zero, builtin_one);
439
440 rc_remove_instruction(inst);
441 }
442
443 static void transform_SLT(struct radeon_compiler* c,
444 struct rc_instruction* inst)
445 {
446 int tempreg = rc_find_free_temporary(c);
447
448 emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
449 emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
450 srcreg(RC_FILE_TEMPORARY, tempreg), builtin_one, builtin_zero);
451
452 rc_remove_instruction(inst);
453 }
454
455 static void transform_SNE(struct radeon_compiler* c,
456 struct rc_instruction* inst)
457 {
458 int tempreg = rc_find_free_temporary(c);
459
460 emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
461 emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
462 negate(absolute(srcreg(RC_FILE_TEMPORARY, tempreg))), builtin_one, builtin_zero);
463
464 rc_remove_instruction(inst);
465 }
466
467 static void transform_SUB(struct radeon_compiler* c,
468 struct rc_instruction* inst)
469 {
470 inst->U.I.Opcode = RC_OPCODE_ADD;
471 inst->U.I.SrcReg[1] = negate(inst->U.I.SrcReg[1]);
472 }
473
474 static void transform_SWZ(struct radeon_compiler* c,
475 struct rc_instruction* inst)
476 {
477 inst->U.I.Opcode = RC_OPCODE_MOV;
478 }
479
480 static void transform_XPD(struct radeon_compiler* c,
481 struct rc_instruction* inst)
482 {
483 int tempreg = rc_find_free_temporary(c);
484
485 emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dstreg(RC_FILE_TEMPORARY, tempreg),
486 swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W),
487 swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W));
488 emit3(c, inst->Prev, RC_OPCODE_MAD, inst->U.I.SaturateMode, inst->U.I.DstReg,
489 swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W),
490 swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W),
491 negate(srcreg(RC_FILE_TEMPORARY, tempreg)));
492
493 rc_remove_instruction(inst);
494 }
495
496
497 /**
498 * Can be used as a transformation for @ref radeonClauseLocalTransform,
499 * no userData necessary.
500 *
501 * Eliminates the following ALU instructions:
502 * ABS, CEIL, DPH, DST, FLR, LIT, LRP, POW, SEQ, SFL, SGE, SGT, SLE, SLT, SNE, SUB, SWZ, XPD
503 * using:
504 * MOV, ADD, MUL, MAD, FRC, DP3, LG2, EX2, CMP
505 *
506 * Transforms RSQ to Radeon's native RSQ by explicitly setting
507 * absolute value.
508 *
509 * @note should be applicable to R300 and R500 fragment programs.
510 */
511 int radeonTransformALU(
512 struct radeon_compiler * c,
513 struct rc_instruction* inst,
514 void* unused)
515 {
516 switch(inst->U.I.Opcode) {
517 case RC_OPCODE_ABS: transform_ABS(c, inst); return 1;
518 case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1;
519 case RC_OPCODE_DPH: transform_DPH(c, inst); return 1;
520 case RC_OPCODE_DST: transform_DST(c, inst); return 1;
521 case RC_OPCODE_FLR: transform_FLR(c, inst); return 1;
522 case RC_OPCODE_LIT: transform_LIT(c, inst); return 1;
523 case RC_OPCODE_LRP: transform_LRP(c, inst); return 1;
524 case RC_OPCODE_POW: transform_POW(c, inst); return 1;
525 case RC_OPCODE_RSQ: transform_RSQ(c, inst); return 1;
526 case RC_OPCODE_SEQ: transform_SEQ(c, inst); return 1;
527 case RC_OPCODE_SFL: transform_SFL(c, inst); return 1;
528 case RC_OPCODE_SGE: transform_SGE(c, inst); return 1;
529 case RC_OPCODE_SGT: transform_SGT(c, inst); return 1;
530 case RC_OPCODE_SLE: transform_SLE(c, inst); return 1;
531 case RC_OPCODE_SLT: transform_SLT(c, inst); return 1;
532 case RC_OPCODE_SNE: transform_SNE(c, inst); return 1;
533 case RC_OPCODE_SUB: transform_SUB(c, inst); return 1;
534 case RC_OPCODE_SWZ: transform_SWZ(c, inst); return 1;
535 case RC_OPCODE_XPD: transform_XPD(c, inst); return 1;
536 default:
537 return 0;
538 }
539 }
540
541
542 static void transform_r300_vertex_ABS(struct radeon_compiler* c,
543 struct rc_instruction* inst)
544 {
545 /* Note: r500 can take absolute values, but r300 cannot. */
546 inst->U.I.Opcode = RC_OPCODE_MAX;
547 inst->U.I.SrcReg[1] = inst->U.I.SrcReg[0];
548 inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
549 }
550
551 static void transform_r300_vertex_CMP(struct radeon_compiler* c,
552 struct rc_instruction* inst)
553 {
554 /* There is no decent CMP available, so let's rig one up.
555 * CMP is defined as dst = src0 < 0.0 ? src1 : src2
556 * The following sequence consumes two temps and two extra slots
557 * (the second temp and the second slot is consumed by transform_LRP),
558 * but should be equivalent:
559 *
560 * SLT tmp0, src0, 0.0
561 * LRP dst, tmp0, src1, src2
562 *
563 * Yes, I know, I'm a mad scientist. ~ C. & M. */
564 int tempreg0 = rc_find_free_temporary(c);
565
566 /* SLT tmp0, src0, 0.0 */
567 emit2(c, inst->Prev, RC_OPCODE_SLT, 0,
568 dstreg(RC_FILE_TEMPORARY, tempreg0),
569 inst->U.I.SrcReg[0], builtin_zero);
570
571 /* LRP dst, tmp0, src1, src2 */
572 transform_LRP(c,
573 emit3(c, inst->Prev, RC_OPCODE_LRP, 0,
574 inst->U.I.DstReg,
575 srcreg(RC_FILE_TEMPORARY, tempreg0), inst->U.I.SrcReg[1], inst->U.I.SrcReg[2]));
576
577 rc_remove_instruction(inst);
578 }
579
580 static void transform_r300_vertex_fix_LIT(struct radeon_compiler* c,
581 struct rc_instruction* inst)
582 {
583 int tempreg = rc_find_free_temporary(c);
584 unsigned constant_swizzle;
585 int constant = rc_constants_add_immediate_scalar(&c->Program.Constants,
586 0.0000000000000000001,
587 &constant_swizzle);
588
589 /* MOV dst, src */
590 emit1(c, inst->Prev, RC_OPCODE_MOV, 0,
591 dstreg(RC_FILE_TEMPORARY, tempreg),
592 inst->U.I.SrcReg[0]);
593
594 /* MAX dst.z, src, 0.00...001 */
595 emit2(c, inst->Prev, RC_OPCODE_MAX, 0,
596 dstregtmpmask(tempreg, RC_MASK_Y),
597 srcreg(RC_FILE_TEMPORARY, tempreg),
598 srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle));
599
600 inst->U.I.SrcReg[0] = srcreg(RC_FILE_TEMPORARY, tempreg);
601 }
602
603 static void transform_r300_vertex_SEQ(struct radeon_compiler *c,
604 struct rc_instruction *inst)
605 {
606 /* x = y <==> x >= y && y >= x */
607 int tmp = rc_find_free_temporary(c);
608
609 /* x <= y */
610 emit2(c, inst->Prev, RC_OPCODE_SGE, 0,
611 dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask),
612 inst->U.I.SrcReg[0],
613 inst->U.I.SrcReg[1]);
614
615 /* y <= x */
616 emit2(c, inst->Prev, RC_OPCODE_SGE, 0,
617 inst->U.I.DstReg,
618 inst->U.I.SrcReg[1],
619 inst->U.I.SrcReg[0]);
620
621 /* x && y = x * y */
622 emit2(c, inst->Prev, RC_OPCODE_MUL, 0,
623 inst->U.I.DstReg,
624 srcreg(RC_FILE_TEMPORARY, tmp),
625 srcreg(inst->U.I.DstReg.File, inst->U.I.DstReg.Index));
626
627 rc_remove_instruction(inst);
628 }
629
630 static void transform_r300_vertex_SNE(struct radeon_compiler *c,
631 struct rc_instruction *inst)
632 {
633 /* x != y <==> x < y || y < x */
634 int tmp = rc_find_free_temporary(c);
635
636 /* x < y */
637 emit2(c, inst->Prev, RC_OPCODE_SLT, 0,
638 dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask),
639 inst->U.I.SrcReg[0],
640 inst->U.I.SrcReg[1]);
641
642 /* y < x */
643 emit2(c, inst->Prev, RC_OPCODE_SLT, 0,
644 inst->U.I.DstReg,
645 inst->U.I.SrcReg[1],
646 inst->U.I.SrcReg[0]);
647
648 /* x || y = max(x, y) */
649 emit2(c, inst->Prev, RC_OPCODE_MAX, 0,
650 inst->U.I.DstReg,
651 srcreg(RC_FILE_TEMPORARY, tmp),
652 srcreg(inst->U.I.DstReg.File, inst->U.I.DstReg.Index));
653
654 rc_remove_instruction(inst);
655 }
656
657 static void transform_r300_vertex_SGT(struct radeon_compiler* c,
658 struct rc_instruction* inst)
659 {
660 /* x > y <==> -x < -y */
661 inst->U.I.Opcode = RC_OPCODE_SLT;
662 inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
663 inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
664 }
665
666 static void transform_r300_vertex_SLE(struct radeon_compiler* c,
667 struct rc_instruction* inst)
668 {
669 /* x <= y <==> -x >= -y */
670 inst->U.I.Opcode = RC_OPCODE_SGE;
671 inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
672 inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
673 }
674
675 /**
676 * For use with radeonLocalTransform, this transforms non-native ALU
677 * instructions of the r300 up to r500 vertex engine.
678 */
679 int r300_transform_vertex_alu(
680 struct radeon_compiler * c,
681 struct rc_instruction* inst,
682 void* unused)
683 {
684 switch(inst->U.I.Opcode) {
685 case RC_OPCODE_ABS: transform_r300_vertex_ABS(c, inst); return 1;
686 case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1;
687 case RC_OPCODE_CMP: transform_r300_vertex_CMP(c, inst); return 1;
688 case RC_OPCODE_DP3: transform_DP3(c, inst); return 1;
689 case RC_OPCODE_DPH: transform_DPH(c, inst); return 1;
690 case RC_OPCODE_FLR: transform_FLR(c, inst); return 1;
691 case RC_OPCODE_LIT: transform_r300_vertex_fix_LIT(c, inst); return 1;
692 case RC_OPCODE_LRP: transform_LRP(c, inst); return 1;
693 case RC_OPCODE_SEQ:
694 if (!c->is_r500) {
695 transform_r300_vertex_SEQ(c, inst);
696 return 1;
697 }
698 return 0;
699 case RC_OPCODE_SFL: transform_SFL(c, inst); return 1;
700 case RC_OPCODE_SGT: transform_r300_vertex_SGT(c, inst); return 1;
701 case RC_OPCODE_SLE: transform_r300_vertex_SLE(c, inst); return 1;
702 case RC_OPCODE_SNE:
703 if (!c->is_r500) {
704 transform_r300_vertex_SNE(c, inst);
705 return 1;
706 }
707 return 0;
708 case RC_OPCODE_SUB: transform_SUB(c, inst); return 1;
709 case RC_OPCODE_SWZ: transform_SWZ(c, inst); return 1;
710 case RC_OPCODE_XPD: transform_XPD(c, inst); return 1;
711 default:
712 return 0;
713 }
714 }
715
716 static void sincos_constants(struct radeon_compiler* c, unsigned int *constants)
717 {
718 static const float SinCosConsts[2][4] = {
719 {
720 1.273239545, /* 4/PI */
721 -0.405284735, /* -4/(PI*PI) */
722 3.141592654, /* PI */
723 0.2225 /* weight */
724 },
725 {
726 0.75,
727 0.5,
728 0.159154943, /* 1/(2*PI) */
729 6.283185307 /* 2*PI */
730 }
731 };
732 int i;
733
734 for(i = 0; i < 2; ++i)
735 constants[i] = rc_constants_add_immediate_vec4(&c->Program.Constants, SinCosConsts[i]);
736 }
737
738 /**
739 * Approximate sin(x), where x is clamped to (-pi/2, pi/2).
740 *
741 * MUL tmp.xy, src, { 4/PI, -4/(PI^2) }
742 * MAD tmp.x, tmp.y, |src|, tmp.x
743 * MAD tmp.y, tmp.x, |tmp.x|, -tmp.x
744 * MAD dest, tmp.y, weight, tmp.x
745 */
746 static void sin_approx(
747 struct radeon_compiler* c, struct rc_instruction * inst,
748 struct rc_dst_register dst, struct rc_src_register src, const unsigned int* constants)
749 {
750 unsigned int tempreg = rc_find_free_temporary(c);
751
752 emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dstregtmpmask(tempreg, RC_MASK_XY),
753 swizzle_xxxx(src),
754 srcreg(RC_FILE_CONSTANT, constants[0]));
755 emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_X),
756 swizzle_yyyy(srcreg(RC_FILE_TEMPORARY, tempreg)),
757 absolute(swizzle_xxxx(src)),
758 swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg)));
759 emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_Y),
760 swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg)),
761 absolute(swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg))),
762 negate(swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg))));
763 emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dst,
764 swizzle_yyyy(srcreg(RC_FILE_TEMPORARY, tempreg)),
765 swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[0])),
766 swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg)));
767 }
768
769 /**
770 * Translate the trigonometric functions COS, SIN, and SCS
771 * using only the basic instructions
772 * MOV, ADD, MUL, MAD, FRC
773 */
774 int radeonTransformTrigSimple(struct radeon_compiler* c,
775 struct rc_instruction* inst,
776 void* unused)
777 {
778 if (inst->U.I.Opcode != RC_OPCODE_COS &&
779 inst->U.I.Opcode != RC_OPCODE_SIN &&
780 inst->U.I.Opcode != RC_OPCODE_SCS)
781 return 0;
782
783 unsigned int constants[2];
784 unsigned int tempreg = rc_find_free_temporary(c);
785
786 sincos_constants(c, constants);
787
788 if (inst->U.I.Opcode == RC_OPCODE_COS) {
789 /* MAD tmp.x, src, 1/(2*PI), 0.75 */
790 /* FRC tmp.x, tmp.x */
791 /* MAD tmp.z, tmp.x, 2*PI, -PI */
792 emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W),
793 swizzle_xxxx(inst->U.I.SrcReg[0]),
794 swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[1])),
795 swizzle_xxxx(srcreg(RC_FILE_CONSTANT, constants[1])));
796 emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(tempreg, RC_MASK_W),
797 swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)));
798 emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W),
799 swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)),
800 swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[1])),
801 negate(swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[0]))));
802
803 sin_approx(c, inst, inst->U.I.DstReg,
804 swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)),
805 constants);
806 } else if (inst->U.I.Opcode == RC_OPCODE_SIN) {
807 emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W),
808 swizzle_xxxx(inst->U.I.SrcReg[0]),
809 swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[1])),
810 swizzle_yyyy(srcreg(RC_FILE_CONSTANT, constants[1])));
811 emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(tempreg, RC_MASK_W),
812 swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)));
813 emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W),
814 swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)),
815 swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[1])),
816 negate(swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[0]))));
817
818 sin_approx(c, inst, inst->U.I.DstReg,
819 swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)),
820 constants);
821 } else {
822 emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_XY),
823 swizzle_xxxx(inst->U.I.SrcReg[0]),
824 swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[1])),
825 swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_W));
826 emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(tempreg, RC_MASK_XY),
827 srcreg(RC_FILE_TEMPORARY, tempreg));
828 emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_XY),
829 srcreg(RC_FILE_TEMPORARY, tempreg),
830 swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[1])),
831 negate(swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[0]))));
832
833 struct rc_dst_register dst = inst->U.I.DstReg;
834
835 dst.WriteMask = inst->U.I.DstReg.WriteMask & RC_MASK_X;
836 sin_approx(c, inst, dst,
837 swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg)),
838 constants);
839
840 dst.WriteMask = inst->U.I.DstReg.WriteMask & RC_MASK_Y;
841 sin_approx(c, inst, dst,
842 swizzle_yyyy(srcreg(RC_FILE_TEMPORARY, tempreg)),
843 constants);
844 }
845
846 rc_remove_instruction(inst);
847
848 return 1;
849 }
850
851 static void r300_transform_SIN_COS_SCS(struct radeon_compiler *c,
852 struct rc_instruction *inst,
853 unsigned srctmp)
854 {
855 if (inst->U.I.Opcode == RC_OPCODE_COS) {
856 emit1(c, inst->Prev, RC_OPCODE_COS, inst->U.I.SaturateMode, inst->U.I.DstReg,
857 srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW));
858 } else if (inst->U.I.Opcode == RC_OPCODE_SIN) {
859 emit1(c, inst->Prev, RC_OPCODE_SIN, inst->U.I.SaturateMode,
860 inst->U.I.DstReg, srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW));
861 } else if (inst->U.I.Opcode == RC_OPCODE_SCS) {
862 struct rc_dst_register moddst = inst->U.I.DstReg;
863
864 if (inst->U.I.DstReg.WriteMask & RC_MASK_X) {
865 moddst.WriteMask = RC_MASK_X;
866 emit1(c, inst->Prev, RC_OPCODE_COS, inst->U.I.SaturateMode, moddst,
867 srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW));
868 }
869 if (inst->U.I.DstReg.WriteMask & RC_MASK_Y) {
870 moddst.WriteMask = RC_MASK_Y;
871 emit1(c, inst->Prev, RC_OPCODE_SIN, inst->U.I.SaturateMode, moddst,
872 srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW));
873 }
874 }
875
876 rc_remove_instruction(inst);
877 }
878
879
880 /**
881 * Transform the trigonometric functions COS, SIN, and SCS
882 * to include pre-scaling by 1/(2*PI) and taking the fractional
883 * part, so that the input to COS and SIN is always in the range [0,1).
884 * SCS is replaced by one COS and one SIN instruction.
885 *
886 * @warning This transformation implicitly changes the semantics of SIN and COS!
887 */
888 int radeonTransformTrigScale(struct radeon_compiler* c,
889 struct rc_instruction* inst,
890 void* unused)
891 {
892 if (inst->U.I.Opcode != RC_OPCODE_COS &&
893 inst->U.I.Opcode != RC_OPCODE_SIN &&
894 inst->U.I.Opcode != RC_OPCODE_SCS)
895 return 0;
896
897 static const float RCP_2PI = 0.15915494309189535;
898 unsigned int temp;
899 unsigned int constant;
900 unsigned int constant_swizzle;
901
902 temp = rc_find_free_temporary(c);
903 constant = rc_constants_add_immediate_scalar(&c->Program.Constants, RCP_2PI, &constant_swizzle);
904
905 emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dstregtmpmask(temp, RC_MASK_W),
906 swizzle_xxxx(inst->U.I.SrcReg[0]),
907 srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle));
908 emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(temp, RC_MASK_W),
909 srcreg(RC_FILE_TEMPORARY, temp));
910
911 r300_transform_SIN_COS_SCS(c, inst, temp);
912 return 1;
913 }
914
915 /**
916 * Transform the trigonometric functions COS, SIN, and SCS
917 * so that the input to COS and SIN is always in the range [-PI, PI].
918 * SCS is replaced by one COS and one SIN instruction.
919 */
920 int r300_transform_trig_scale_vertex(struct radeon_compiler *c,
921 struct rc_instruction *inst,
922 void *unused)
923 {
924 if (inst->U.I.Opcode != RC_OPCODE_COS &&
925 inst->U.I.Opcode != RC_OPCODE_SIN &&
926 inst->U.I.Opcode != RC_OPCODE_SCS)
927 return 0;
928
929 /* Repeat x in the range [-PI, PI]:
930 *
931 * repeat(x) = frac(x / 2PI + 0.5) * 2PI - PI
932 */
933
934 static const float cons[4] = {0.15915494309189535, 0.5, 6.28318530717959, -3.14159265358979};
935 unsigned int temp;
936 unsigned int constant;
937
938 temp = rc_find_free_temporary(c);
939 constant = rc_constants_add_immediate_vec4(&c->Program.Constants, cons);
940
941 emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(temp, RC_MASK_W),
942 swizzle_xxxx(inst->U.I.SrcReg[0]),
943 srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_XXXX),
944 srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_YYYY));
945 emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(temp, RC_MASK_W),
946 srcreg(RC_FILE_TEMPORARY, temp));
947 emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(temp, RC_MASK_W),
948 srcreg(RC_FILE_TEMPORARY, temp),
949 srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_ZZZZ),
950 srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_WWWW));
951
952 r300_transform_SIN_COS_SCS(c, inst, temp);
953 return 1;
954 }
955
956 /**
957 * Rewrite DDX/DDY instructions to properly work with r5xx shaders.
958 * The r5xx MDH/MDV instruction provides per-quad partial derivatives.
959 * It takes the form A*B+C. A and C are set by setting src0. B should be -1.
960 *
961 * @warning This explicitly changes the form of DDX and DDY!
962 */
963
964 int radeonTransformDeriv(struct radeon_compiler* c,
965 struct rc_instruction* inst,
966 void* unused)
967 {
968 if (inst->U.I.Opcode != RC_OPCODE_DDX && inst->U.I.Opcode != RC_OPCODE_DDY)
969 return 0;
970
971 inst->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_1111;
972 inst->U.I.SrcReg[1].Negate = RC_MASK_XYZW;
973
974 return 1;
975 }
976
977 /**
978 * IF Temp[0].x -\
979 * KILP - > KIL -abs(Temp[0].x)
980 * ENDIF -/
981 *
982 * This needs to be done in its own pass, because it modifies the instructions
983 * before and after KILP.
984 */
985 void radeonTransformKILP(struct radeon_compiler * c)
986 {
987 struct rc_instruction * inst;
988 for (inst = c->Program.Instructions.Next;
989 inst != &c->Program.Instructions; inst = inst->Next) {
990
991 if (inst->U.I.Opcode != RC_OPCODE_KILP
992 || inst->Prev->U.I.Opcode != RC_OPCODE_IF
993 || inst->Next->U.I.Opcode != RC_OPCODE_ENDIF) {
994 continue;
995 }
996 inst->U.I.Opcode = RC_OPCODE_KIL;
997 inst->U.I.SrcReg[0] = negate(absolute(inst->Prev->U.I.SrcReg[0]));
998
999 /* Remove IF */
1000 rc_remove_instruction(inst->Prev);
1001 /* Remove ENDIF */
1002 rc_remove_instruction(inst->Next);
1003 }
1004 }