r600: remove (now) dead code
[mesa.git] / src / mesa / drivers / dri / r600 / r700_assembler.c
1 /*
2 * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
20 */
21
22 /*
23 * Authors:
24 * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
25 */
26
27 #include <stdio.h>
28 #include <stdarg.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <math.h>
32
33 #include "main/mtypes.h"
34 #include "main/imports.h"
35
36 #include "radeon_debug.h"
37 #include "r600_context.h"
38
39 #include "r700_assembler.h"
40
41 #define USE_CF_FOR_CONTINUE_BREAK 1
42 #define USE_CF_FOR_POP_AFTER 1
43
44 BITS addrmode_PVSDST(PVSDST * pPVSDST)
45 {
46 return pPVSDST->addrmode0 | ((BITS)pPVSDST->addrmode1 << 1);
47 }
48
49 void setaddrmode_PVSDST(PVSDST * pPVSDST, BITS addrmode)
50 {
51 pPVSDST->addrmode0 = addrmode & 1;
52 pPVSDST->addrmode1 = (addrmode >> 1) & 1;
53 }
54
55 void nomask_PVSDST(PVSDST * pPVSDST)
56 {
57 pPVSDST->writex = pPVSDST->writey = pPVSDST->writez = pPVSDST->writew = 1;
58 }
59
60 BITS addrmode_PVSSRC(PVSSRC* pPVSSRC)
61 {
62 return pPVSSRC->addrmode0 | ((BITS)pPVSSRC->addrmode1 << 1);
63 }
64
65 void setaddrmode_PVSSRC(PVSSRC* pPVSSRC, BITS addrmode)
66 {
67 pPVSSRC->addrmode0 = addrmode & 1;
68 pPVSSRC->addrmode1 = (addrmode >> 1) & 1;
69 }
70
71
72 void setswizzle_PVSSRC(PVSSRC* pPVSSRC, BITS swz)
73 {
74 pPVSSRC->swizzlex =
75 pPVSSRC->swizzley =
76 pPVSSRC->swizzlez =
77 pPVSSRC->swizzlew = swz;
78 }
79
80 void noswizzle_PVSSRC(PVSSRC* pPVSSRC)
81 {
82 pPVSSRC->swizzlex = SQ_SEL_X;
83 pPVSSRC->swizzley = SQ_SEL_Y;
84 pPVSSRC->swizzlez = SQ_SEL_Z;
85 pPVSSRC->swizzlew = SQ_SEL_W;
86 }
87
88 void
89 swizzleagain_PVSSRC(PVSSRC * pPVSSRC, BITS x, BITS y, BITS z, BITS w)
90 {
91 switch (x)
92 {
93 case SQ_SEL_X: x = pPVSSRC->swizzlex;
94 break;
95 case SQ_SEL_Y: x = pPVSSRC->swizzley;
96 break;
97 case SQ_SEL_Z: x = pPVSSRC->swizzlez;
98 break;
99 case SQ_SEL_W: x = pPVSSRC->swizzlew;
100 break;
101 default:;
102 }
103
104 switch (y)
105 {
106 case SQ_SEL_X: y = pPVSSRC->swizzlex;
107 break;
108 case SQ_SEL_Y: y = pPVSSRC->swizzley;
109 break;
110 case SQ_SEL_Z: y = pPVSSRC->swizzlez;
111 break;
112 case SQ_SEL_W: y = pPVSSRC->swizzlew;
113 break;
114 default:;
115 }
116
117 switch (z)
118 {
119 case SQ_SEL_X: z = pPVSSRC->swizzlex;
120 break;
121 case SQ_SEL_Y: z = pPVSSRC->swizzley;
122 break;
123 case SQ_SEL_Z: z = pPVSSRC->swizzlez;
124 break;
125 case SQ_SEL_W: z = pPVSSRC->swizzlew;
126 break;
127 default:;
128 }
129
130 switch (w)
131 {
132 case SQ_SEL_X: w = pPVSSRC->swizzlex;
133 break;
134 case SQ_SEL_Y: w = pPVSSRC->swizzley;
135 break;
136 case SQ_SEL_Z: w = pPVSSRC->swizzlez;
137 break;
138 case SQ_SEL_W: w = pPVSSRC->swizzlew;
139 break;
140 default:;
141 }
142
143 pPVSSRC->swizzlex = x;
144 pPVSSRC->swizzley = y;
145 pPVSSRC->swizzlez = z;
146 pPVSSRC->swizzlew = w;
147 }
148
149 void neg_PVSSRC(PVSSRC* pPVSSRC)
150 {
151 pPVSSRC->negx = 1;
152 pPVSSRC->negy = 1;
153 pPVSSRC->negz = 1;
154 pPVSSRC->negw = 1;
155 }
156
157 void noneg_PVSSRC(PVSSRC* pPVSSRC)
158 {
159 pPVSSRC->negx = 0;
160 pPVSSRC->negy = 0;
161 pPVSSRC->negz = 0;
162 pPVSSRC->negw = 0;
163 }
164
165 // negate argument (for SUB instead of ADD and alike)
166 void flipneg_PVSSRC(PVSSRC* pPVSSRC)
167 {
168 pPVSSRC->negx = !pPVSSRC->negx;
169 pPVSSRC->negy = !pPVSSRC->negy;
170 pPVSSRC->negz = !pPVSSRC->negz;
171 pPVSSRC->negw = !pPVSSRC->negw;
172 }
173
174 void zerocomp_PVSSRC(PVSSRC* pPVSSRC, int c)
175 {
176 switch (c)
177 {
178 case 0: pPVSSRC->swizzlex = SQ_SEL_0; pPVSSRC->negx = 0; break;
179 case 1: pPVSSRC->swizzley = SQ_SEL_0; pPVSSRC->negy = 0; break;
180 case 2: pPVSSRC->swizzlez = SQ_SEL_0; pPVSSRC->negz = 0; break;
181 case 3: pPVSSRC->swizzlew = SQ_SEL_0; pPVSSRC->negw = 0; break;
182 default:;
183 }
184 }
185
186 void onecomp_PVSSRC(PVSSRC* pPVSSRC, int c)
187 {
188 switch (c)
189 {
190 case 0: pPVSSRC->swizzlex = SQ_SEL_1; pPVSSRC->negx = 0; break;
191 case 1: pPVSSRC->swizzley = SQ_SEL_1; pPVSSRC->negy = 0; break;
192 case 2: pPVSSRC->swizzlez = SQ_SEL_1; pPVSSRC->negz = 0; break;
193 case 3: pPVSSRC->swizzlew = SQ_SEL_1; pPVSSRC->negw = 0; break;
194 default:;
195 }
196 }
197
198 BITS is_misc_component_exported(VAP_OUT_VTX_FMT_0* pOutVTXFmt0)
199 {
200 return (pOutVTXFmt0->point_size |
201 pOutVTXFmt0->edge_flag |
202 pOutVTXFmt0->rta_index |
203 pOutVTXFmt0->kill_flag |
204 pOutVTXFmt0->viewport_index);
205 }
206
207 BITS is_depth_component_exported(OUT_FRAGMENT_FMT_0* pFPOutFmt)
208 {
209 return (pFPOutFmt->depth |
210 pFPOutFmt->stencil_ref |
211 pFPOutFmt->mask |
212 pFPOutFmt->coverage_to_mask);
213 }
214
215 GLboolean is_reduction_opcode(PVSDWORD* dest)
216 {
217 if (dest->dst.op3 == 0)
218 {
219 if ( (dest->dst.opcode == SQ_OP2_INST_DOT4 || dest->dst.opcode == SQ_OP2_INST_DOT4_IEEE || dest->dst.opcode == SQ_OP2_INST_CUBE) )
220 {
221 return GL_TRUE;
222 }
223 }
224 return GL_FALSE;
225 }
226
227 GLuint GetSurfaceFormat(GLenum eType, GLuint nChannels, GLuint * pClient_size)
228 {
229 GLuint format = FMT_INVALID;
230 GLuint uiElemSize = 0;
231
232 switch (eType)
233 {
234 case GL_BYTE:
235 case GL_UNSIGNED_BYTE:
236 uiElemSize = 1;
237 switch(nChannels)
238 {
239 case 1:
240 format = FMT_8; break;
241 case 2:
242 format = FMT_8_8; break;
243 case 3:
244 format = FMT_8_8_8; break;
245 case 4:
246 format = FMT_8_8_8_8; break;
247 default:
248 break;
249 }
250 break;
251
252 case GL_UNSIGNED_SHORT:
253 case GL_SHORT:
254 uiElemSize = 2;
255 switch(nChannels)
256 {
257 case 1:
258 format = FMT_16; break;
259 case 2:
260 format = FMT_16_16; break;
261 case 3:
262 format = FMT_16_16_16; break;
263 case 4:
264 format = FMT_16_16_16_16; break;
265 default:
266 break;
267 }
268 break;
269
270 case GL_UNSIGNED_INT:
271 case GL_INT:
272 uiElemSize = 4;
273 switch(nChannels)
274 {
275 case 1:
276 format = FMT_32; break;
277 case 2:
278 format = FMT_32_32; break;
279 case 3:
280 format = FMT_32_32_32; break;
281 case 4:
282 format = FMT_32_32_32_32; break;
283 default:
284 break;
285 }
286 break;
287
288 case GL_FLOAT:
289 uiElemSize = 4;
290 switch(nChannels)
291 {
292 case 1:
293 format = FMT_32_FLOAT; break;
294 case 2:
295 format = FMT_32_32_FLOAT; break;
296 case 3:
297 format = FMT_32_32_32_FLOAT; break;
298 case 4:
299 format = FMT_32_32_32_32_FLOAT; break;
300 default:
301 break;
302 }
303 break;
304 case GL_DOUBLE:
305 uiElemSize = 8;
306 switch(nChannels)
307 {
308 case 1:
309 format = FMT_32_FLOAT; break;
310 case 2:
311 format = FMT_32_32_FLOAT; break;
312 case 3:
313 format = FMT_32_32_32_FLOAT; break;
314 case 4:
315 format = FMT_32_32_32_32_FLOAT; break;
316 default:
317 break;
318 }
319 break;
320 default:
321 ;
322 //GL_ASSERT_NO_CASE();
323 }
324
325 if(NULL != pClient_size)
326 {
327 *pClient_size = uiElemSize * nChannels;
328 }
329
330 return(format);
331 }
332
333 unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm)
334 {
335 if(pAsm->D.dst.op3)
336 {
337 return 3;
338 }
339
340 switch (pAsm->D.dst.opcode)
341 {
342 case SQ_OP2_INST_ADD:
343 case SQ_OP2_INST_KILLE:
344 case SQ_OP2_INST_KILLGT:
345 case SQ_OP2_INST_KILLGE:
346 case SQ_OP2_INST_KILLNE:
347 case SQ_OP2_INST_MUL:
348 case SQ_OP2_INST_MAX:
349 case SQ_OP2_INST_MIN:
350 //case SQ_OP2_INST_MAX_DX10:
351 //case SQ_OP2_INST_MIN_DX10:
352 case SQ_OP2_INST_SETE:
353 case SQ_OP2_INST_SETNE:
354 case SQ_OP2_INST_SETGT:
355 case SQ_OP2_INST_SETGE:
356 case SQ_OP2_INST_PRED_SETE:
357 case SQ_OP2_INST_PRED_SETGT:
358 case SQ_OP2_INST_PRED_SETGE:
359 case SQ_OP2_INST_PRED_SETNE:
360 case SQ_OP2_INST_DOT4:
361 case SQ_OP2_INST_DOT4_IEEE:
362 case SQ_OP2_INST_CUBE:
363 return 2;
364
365 case SQ_OP2_INST_MOV:
366 case SQ_OP2_INST_MOVA_FLOOR:
367 case SQ_OP2_INST_FRACT:
368 case SQ_OP2_INST_FLOOR:
369 case SQ_OP2_INST_TRUNC:
370 case SQ_OP2_INST_EXP_IEEE:
371 case SQ_OP2_INST_LOG_CLAMPED:
372 case SQ_OP2_INST_LOG_IEEE:
373 case SQ_OP2_INST_RECIP_IEEE:
374 case SQ_OP2_INST_RECIPSQRT_IEEE:
375 case SQ_OP2_INST_FLT_TO_INT:
376 case SQ_OP2_INST_SIN:
377 case SQ_OP2_INST_COS:
378 return 1;
379
380 default: radeon_error(
381 "Need instruction operand number for %x.\n", pAsm->D.dst.opcode);
382 };
383
384 return 3;
385 }
386
387 int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt, r700_AssemblerBase* pAsm, R700_Shader* pShader)
388 {
389 GLuint i;
390
391 Init_R700_Shader(pShader);
392 pAsm->pR700Shader = pShader;
393 pAsm->currentShaderType = spt;
394
395 pAsm->cf_last_export_ptr = NULL;
396
397 pAsm->cf_current_export_clause_ptr = NULL;
398 pAsm->cf_current_alu_clause_ptr = NULL;
399 pAsm->cf_current_tex_clause_ptr = NULL;
400 pAsm->cf_current_vtx_clause_ptr = NULL;
401 pAsm->cf_current_cf_clause_ptr = NULL;
402
403 // No clause has been created yet
404 pAsm->cf_current_clause_type = CF_EMPTY_CLAUSE;
405
406 pAsm->number_of_colorandz_exports = 0;
407 pAsm->number_of_exports = 0;
408 pAsm->number_of_export_opcodes = 0;
409
410 pAsm->alu_x_opcode = 0;
411
412 pAsm->D2.bits = 0;
413
414 pAsm->D.bits = 0;
415 pAsm->S[0].bits = 0;
416 pAsm->S[1].bits = 0;
417 pAsm->S[2].bits = 0;
418
419 pAsm->uLastPosUpdate = 0;
420
421 *(BITS *) &pAsm->fp_stOutFmt0 = 0;
422
423 pAsm->uIIns = 0;
424 pAsm->uOIns = 0;
425 pAsm->number_used_registers = 0;
426 pAsm->uUsedConsts = 256;
427
428
429 // Fragment programs
430 pAsm->uBoolConsts = 0;
431 pAsm->uIntConsts = 0;
432 pAsm->uInsts = 0;
433 pAsm->uConsts = 0;
434
435 pAsm->FCSP = 0;
436 pAsm->fc_stack[0].type = FC_NONE;
437
438 pAsm->aArgSubst[0] =
439 pAsm->aArgSubst[1] =
440 pAsm->aArgSubst[2] =
441 pAsm->aArgSubst[3] = (-1);
442
443 pAsm->uOutputs = 0;
444
445 for (i=0; i<NUMBER_OF_OUTPUT_COLORS; i++)
446 {
447 pAsm->color_export_register_number[i] = (-1);
448 }
449
450
451 pAsm->depth_export_register_number = (-1);
452 pAsm->stencil_export_register_number = (-1);
453 pAsm->coverage_to_mask_export_register_number = (-1);
454 pAsm->mask_export_register_number = (-1);
455
456 pAsm->starting_export_register_number = 0;
457 pAsm->starting_vfetch_register_number = 0;
458 pAsm->starting_temp_register_number = 0;
459 pAsm->uFirstHelpReg = 0;
460
461 pAsm->input_position_is_used = GL_FALSE;
462 pAsm->input_normal_is_used = GL_FALSE;
463
464 for (i=0; i<NUMBER_OF_INPUT_COLORS; i++)
465 {
466 pAsm->input_color_is_used[ i ] = GL_FALSE;
467 }
468
469 for (i=0; i<NUMBER_OF_TEXTURE_UNITS; i++)
470 {
471 pAsm->input_texture_unit_is_used[ i ] = GL_FALSE;
472 }
473
474 for (i=0; i<VERT_ATTRIB_MAX; i++)
475 {
476 pAsm->vfetch_instruction_ptr_array[ i ] = NULL;
477 }
478
479 pAsm->number_of_inputs = 0;
480
481 pAsm->is_tex = GL_FALSE;
482 pAsm->need_tex_barrier = GL_FALSE;
483
484 pAsm->subs = NULL;
485 pAsm->unSubArraySize = 0;
486 pAsm->unSubArrayPointer = 0;
487 pAsm->callers = NULL;
488 pAsm->unCallerArraySize = 0;
489 pAsm->unCallerArrayPointer = 0;
490
491 pAsm->CALLSP = 0;
492 pAsm->CALLSTACK[0].FCSP_BeforeEntry = 0;
493 pAsm->CALLSTACK[0].plstCFInstructions_local
494 = &(pAsm->pR700Shader->lstCFInstructions);
495
496 pAsm->CALLSTACK[0].max = 0;
497 pAsm->CALLSTACK[0].current = 0;
498
499 SetActiveCFlist(pAsm->pR700Shader, pAsm->CALLSTACK[0].plstCFInstructions_local);
500
501 pAsm->unCFflags = 0;
502
503 return 0;
504 }
505
506 GLboolean IsTex(gl_inst_opcode Opcode)
507 {
508 if( (OPCODE_TEX==Opcode) || (OPCODE_TXP==Opcode) || (OPCODE_TXB==Opcode) )
509 {
510 return GL_TRUE;
511 }
512 return GL_FALSE;
513 }
514
515 GLboolean IsAlu(gl_inst_opcode Opcode)
516 {
517 //TODO : more for fc and ex for higher spec.
518 if( IsTex(Opcode) )
519 {
520 return GL_FALSE;
521 }
522 return GL_TRUE;
523 }
524
525 int check_current_clause(r700_AssemblerBase* pAsm,
526 CF_CLAUSE_TYPE new_clause_type)
527 {
528 if (pAsm->cf_current_clause_type != new_clause_type)
529 { //Close last open clause
530 switch (pAsm->cf_current_clause_type)
531 {
532 case CF_ALU_CLAUSE:
533 if ( pAsm->cf_current_alu_clause_ptr != NULL)
534 {
535 pAsm->cf_current_alu_clause_ptr = NULL;
536 }
537 break;
538 case CF_VTX_CLAUSE:
539 if ( pAsm->cf_current_vtx_clause_ptr != NULL)
540 {
541 pAsm->cf_current_vtx_clause_ptr = NULL;
542 }
543 break;
544 case CF_TEX_CLAUSE:
545 if ( pAsm->cf_current_tex_clause_ptr != NULL)
546 {
547 pAsm->cf_current_tex_clause_ptr = NULL;
548 }
549 break;
550 case CF_EXPORT_CLAUSE:
551 if ( pAsm->cf_current_export_clause_ptr != NULL)
552 {
553 pAsm->cf_current_export_clause_ptr = NULL;
554 }
555 break;
556 case CF_OTHER_CLAUSE:
557 if ( pAsm->cf_current_cf_clause_ptr != NULL)
558 {
559 pAsm->cf_current_cf_clause_ptr = NULL;
560 }
561 break;
562 case CF_EMPTY_CLAUSE:
563 break;
564 default:
565 radeon_error(
566 "Unknown CF_CLAUSE_TYPE (%d) in check_current_clause. \n", (int) new_clause_type);
567 return GL_FALSE;
568 }
569
570 pAsm->cf_current_clause_type = CF_EMPTY_CLAUSE;
571
572 // Create new clause
573 switch (new_clause_type)
574 {
575 case CF_ALU_CLAUSE:
576 pAsm->cf_current_clause_type = CF_ALU_CLAUSE;
577 break;
578 case CF_VTX_CLAUSE:
579 pAsm->cf_current_clause_type = CF_VTX_CLAUSE;
580 break;
581 case CF_TEX_CLAUSE:
582 pAsm->cf_current_clause_type = CF_TEX_CLAUSE;
583 break;
584 case CF_EXPORT_CLAUSE:
585 {
586 R700ControlFlowSXClause* pR700ControlFlowSXClause
587 = (R700ControlFlowSXClause*) CALLOC_STRUCT(R700ControlFlowSXClause);
588
589 // Add new export instruction to control flow program
590 if (pR700ControlFlowSXClause != 0)
591 {
592 pAsm->cf_current_export_clause_ptr = pR700ControlFlowSXClause;
593 Init_R700ControlFlowSXClause(pR700ControlFlowSXClause);
594 AddCFInstruction( pAsm->pR700Shader,
595 (R700ControlFlowInstruction *)pR700ControlFlowSXClause );
596 }
597 else
598 {
599 radeon_error(
600 "Error allocating new EXPORT CF instruction in check_current_clause. \n");
601 return GL_FALSE;
602 }
603 pAsm->cf_current_clause_type = CF_EXPORT_CLAUSE;
604 }
605 break;
606 case CF_EMPTY_CLAUSE:
607 break;
608 case CF_OTHER_CLAUSE:
609 pAsm->cf_current_clause_type = CF_OTHER_CLAUSE;
610 break;
611 default:
612 radeon_error(
613 "Unknown CF_CLAUSE_TYPE (%d) in check_current_clause. \n", (int) new_clause_type);
614 return GL_FALSE;
615 }
616 }
617
618 return GL_TRUE;
619 }
620
621 GLboolean add_cf_instruction(r700_AssemblerBase* pAsm)
622 {
623 if(GL_FALSE == check_current_clause(pAsm, CF_OTHER_CLAUSE))
624 {
625 return GL_FALSE;
626 }
627
628 pAsm->cf_current_cf_clause_ptr =
629 (R700ControlFlowGenericClause*) CALLOC_STRUCT(R700ControlFlowGenericClause);
630
631 if (pAsm->cf_current_cf_clause_ptr != NULL)
632 {
633 Init_R700ControlFlowGenericClause(pAsm->cf_current_cf_clause_ptr);
634 AddCFInstruction( pAsm->pR700Shader,
635 (R700ControlFlowInstruction *)pAsm->cf_current_cf_clause_ptr );
636 }
637 else
638 {
639 radeon_error("Could not allocate a new VFetch CF instruction.\n");
640 return GL_FALSE;
641 }
642
643 return GL_TRUE;
644 }
645
646 GLboolean add_vfetch_instruction(r700_AssemblerBase* pAsm,
647 R700VertexInstruction* vertex_instruction_ptr)
648 {
649 if( GL_FALSE == check_current_clause(pAsm, CF_VTX_CLAUSE) )
650 {
651 return GL_FALSE;
652 }
653
654 if( pAsm->cf_current_vtx_clause_ptr == NULL ||
655 ( (pAsm->cf_current_vtx_clause_ptr != NULL) &&
656 (pAsm->cf_current_vtx_clause_ptr->m_Word1.f.count >= GetCFMaxInstructions(pAsm->cf_current_vtx_clause_ptr->m_ShaderInstType)-1)
657 ) )
658 {
659 // Create new Vfetch control flow instruction for this new clause
660 pAsm->cf_current_vtx_clause_ptr = (R700ControlFlowGenericClause*) CALLOC_STRUCT(R700ControlFlowGenericClause);
661
662 if (pAsm->cf_current_vtx_clause_ptr != NULL)
663 {
664 Init_R700ControlFlowGenericClause(pAsm->cf_current_vtx_clause_ptr);
665 AddCFInstruction( pAsm->pR700Shader,
666 (R700ControlFlowInstruction *)pAsm->cf_current_vtx_clause_ptr );
667 }
668 else
669 {
670 radeon_error("Could not allocate a new VFetch CF instruction.\n");
671 return GL_FALSE;
672 }
673
674 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.pop_count = 0x0;
675 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.cf_const = 0x0;
676 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
677 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.count = 0x0;
678 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.end_of_program = 0x0;
679 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
680 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_VTX;
681 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
682 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.barrier = 0x1;
683
684 LinkVertexInstruction(pAsm->cf_current_vtx_clause_ptr, vertex_instruction_ptr );
685 }
686 else
687 {
688 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.count++;
689 }
690
691 AddVTXInstruction(pAsm->pR700Shader, vertex_instruction_ptr);
692
693 return GL_TRUE;
694 }
695
696 GLboolean add_tex_instruction(r700_AssemblerBase* pAsm,
697 R700TextureInstruction* tex_instruction_ptr)
698 {
699 if ( GL_FALSE == check_current_clause(pAsm, CF_TEX_CLAUSE) )
700 {
701 return GL_FALSE;
702 }
703
704 if ( pAsm->cf_current_tex_clause_ptr == NULL ||
705 ( (pAsm->cf_current_tex_clause_ptr != NULL) &&
706 (pAsm->cf_current_tex_clause_ptr->m_Word1.f.count >= GetCFMaxInstructions(pAsm->cf_current_tex_clause_ptr->m_ShaderInstType)-1)
707 ) )
708 {
709 // new tex cf instruction for this new clause
710 pAsm->cf_current_tex_clause_ptr = (R700ControlFlowGenericClause*) CALLOC_STRUCT(R700ControlFlowGenericClause);
711
712 if (pAsm->cf_current_tex_clause_ptr != NULL)
713 {
714 Init_R700ControlFlowGenericClause(pAsm->cf_current_tex_clause_ptr);
715 AddCFInstruction( pAsm->pR700Shader,
716 (R700ControlFlowInstruction *)pAsm->cf_current_tex_clause_ptr );
717 }
718 else
719 {
720 radeon_error("Could not allocate a new TEX CF instruction.\n");
721 return GL_FALSE;
722 }
723
724 pAsm->cf_current_tex_clause_ptr->m_Word1.f.pop_count = 0x0;
725 pAsm->cf_current_tex_clause_ptr->m_Word1.f.cf_const = 0x0;
726 pAsm->cf_current_tex_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
727
728 pAsm->cf_current_tex_clause_ptr->m_Word1.f.end_of_program = 0x0;
729 pAsm->cf_current_tex_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
730 pAsm->cf_current_tex_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_TEX;
731 pAsm->cf_current_tex_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
732 pAsm->cf_current_tex_clause_ptr->m_Word1.f.barrier = 0x0; //0x1;
733 }
734 else
735 {
736 pAsm->cf_current_tex_clause_ptr->m_Word1.f.count++;
737 }
738
739 // If this clause constains any TEX instruction that is dependent on a previous instruction,
740 // set the barrier bit
741 if( pAsm->pInstDeps[pAsm->uiCurInst].nDstDep > (-1) || pAsm->need_tex_barrier == GL_TRUE )
742 {
743 pAsm->cf_current_tex_clause_ptr->m_Word1.f.barrier = 0x1;
744 }
745
746 if(NULL == pAsm->cf_current_tex_clause_ptr->m_pLinkedTEXInstruction)
747 {
748 pAsm->cf_current_tex_clause_ptr->m_pLinkedTEXInstruction = tex_instruction_ptr;
749 tex_instruction_ptr->m_pLinkedGenericClause = pAsm->cf_current_tex_clause_ptr;
750 }
751
752 AddTEXInstruction(pAsm->pR700Shader, tex_instruction_ptr);
753
754 return GL_TRUE;
755 }
756
757 GLboolean assemble_vfetch_instruction(r700_AssemblerBase* pAsm,
758 GLuint gl_client_id,
759 GLuint destination_register,
760 GLuint number_of_elements,
761 GLenum dataElementType,
762 VTX_FETCH_METHOD* pFetchMethod)
763 {
764 GLuint client_size_inbyte;
765 GLuint data_format;
766 GLuint mega_fetch_count;
767 GLuint is_mega_fetch_flag;
768
769 R700VertexGenericFetch* vfetch_instruction_ptr;
770 R700VertexGenericFetch* assembled_vfetch_instruction_ptr = pAsm->vfetch_instruction_ptr_array[ gl_client_id ];
771
772 if (assembled_vfetch_instruction_ptr == NULL)
773 {
774 vfetch_instruction_ptr = (R700VertexGenericFetch*) CALLOC_STRUCT(R700VertexGenericFetch);
775 if (vfetch_instruction_ptr == NULL)
776 {
777 return GL_FALSE;
778 }
779 Init_R700VertexGenericFetch(vfetch_instruction_ptr);
780 }
781 else
782 {
783 vfetch_instruction_ptr = assembled_vfetch_instruction_ptr;
784 }
785
786 data_format = GetSurfaceFormat(dataElementType, number_of_elements, &client_size_inbyte);
787
788 if(GL_TRUE == pFetchMethod->bEnableMini) //More conditions here
789 {
790 //TODO : mini fetch
791 }
792 else
793 {
794 mega_fetch_count = MEGA_FETCH_BYTES - 1;
795 is_mega_fetch_flag = 0x1;
796 pFetchMethod->mega_fetch_remainder = MEGA_FETCH_BYTES - client_size_inbyte;
797 }
798
799 vfetch_instruction_ptr->m_Word0.f.vtx_inst = SQ_VTX_INST_FETCH;
800 vfetch_instruction_ptr->m_Word0.f.fetch_type = SQ_VTX_FETCH_VERTEX_DATA;
801 vfetch_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0;
802
803 vfetch_instruction_ptr->m_Word0.f.buffer_id = gl_client_id;
804 vfetch_instruction_ptr->m_Word0.f.src_gpr = 0x0;
805 vfetch_instruction_ptr->m_Word0.f.src_rel = SQ_ABSOLUTE;
806 vfetch_instruction_ptr->m_Word0.f.src_sel_x = SQ_SEL_X;
807 vfetch_instruction_ptr->m_Word0.f.mega_fetch_count = mega_fetch_count;
808
809 vfetch_instruction_ptr->m_Word1.f.dst_sel_x = (number_of_elements < 1) ? SQ_SEL_0 : SQ_SEL_X;
810 vfetch_instruction_ptr->m_Word1.f.dst_sel_y = (number_of_elements < 2) ? SQ_SEL_0 : SQ_SEL_Y;
811 vfetch_instruction_ptr->m_Word1.f.dst_sel_z = (number_of_elements < 3) ? SQ_SEL_0 : SQ_SEL_Z;
812 vfetch_instruction_ptr->m_Word1.f.dst_sel_w = (number_of_elements < 4) ? SQ_SEL_1 : SQ_SEL_W;
813
814 vfetch_instruction_ptr->m_Word1.f.use_const_fields = 1;
815
816 // Destination register
817 vfetch_instruction_ptr->m_Word1_GPR.f.dst_gpr = destination_register;
818 vfetch_instruction_ptr->m_Word1_GPR.f.dst_rel = SQ_ABSOLUTE;
819
820 vfetch_instruction_ptr->m_Word2.f.offset = 0;
821 vfetch_instruction_ptr->m_Word2.f.const_buf_no_stride = 0x0;
822
823 vfetch_instruction_ptr->m_Word2.f.mega_fetch = is_mega_fetch_flag;
824
825 if (assembled_vfetch_instruction_ptr == NULL)
826 {
827 if ( GL_FALSE == add_vfetch_instruction(pAsm, (R700VertexInstruction *)vfetch_instruction_ptr) )
828 {
829 return GL_FALSE;
830 }
831
832 if (pAsm->vfetch_instruction_ptr_array[ gl_client_id ] != NULL)
833 {
834 return GL_FALSE;
835 }
836 else
837 {
838 pAsm->vfetch_instruction_ptr_array[ gl_client_id ] = vfetch_instruction_ptr;
839 }
840 }
841
842 return GL_TRUE;
843 }
844
845 GLboolean assemble_vfetch_instruction2(r700_AssemblerBase* pAsm,
846 GLuint destination_register,
847 GLenum type,
848 GLint size,
849 GLubyte element,
850 GLuint _signed,
851 GLboolean normalize,
852 VTX_FETCH_METHOD * pFetchMethod)
853 {
854 GLuint client_size_inbyte;
855 GLuint data_format;
856 GLuint mega_fetch_count;
857 GLuint is_mega_fetch_flag;
858
859 R700VertexGenericFetch* vfetch_instruction_ptr;
860 R700VertexGenericFetch* assembled_vfetch_instruction_ptr
861 = pAsm->vfetch_instruction_ptr_array[element];
862
863 if (assembled_vfetch_instruction_ptr == NULL)
864 {
865 vfetch_instruction_ptr = (R700VertexGenericFetch*) CALLOC_STRUCT(R700VertexGenericFetch);
866 if (vfetch_instruction_ptr == NULL)
867 {
868 return GL_FALSE;
869 }
870 Init_R700VertexGenericFetch(vfetch_instruction_ptr);
871 }
872 else
873 {
874 vfetch_instruction_ptr = assembled_vfetch_instruction_ptr;
875 }
876
877 data_format = GetSurfaceFormat(type, size, &client_size_inbyte);
878
879 if(GL_TRUE == pFetchMethod->bEnableMini) //More conditions here
880 {
881 //TODO : mini fetch
882 }
883 else
884 {
885 mega_fetch_count = MEGA_FETCH_BYTES - 1;
886 is_mega_fetch_flag = 0x1;
887 pFetchMethod->mega_fetch_remainder = MEGA_FETCH_BYTES - client_size_inbyte;
888 }
889
890 vfetch_instruction_ptr->m_Word0.f.vtx_inst = SQ_VTX_INST_FETCH;
891 vfetch_instruction_ptr->m_Word0.f.fetch_type = SQ_VTX_FETCH_VERTEX_DATA;
892 vfetch_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0;
893
894 vfetch_instruction_ptr->m_Word0.f.buffer_id = element;
895 vfetch_instruction_ptr->m_Word0.f.src_gpr = 0x0;
896 vfetch_instruction_ptr->m_Word0.f.src_rel = SQ_ABSOLUTE;
897 vfetch_instruction_ptr->m_Word0.f.src_sel_x = SQ_SEL_X;
898 vfetch_instruction_ptr->m_Word0.f.mega_fetch_count = mega_fetch_count;
899
900 vfetch_instruction_ptr->m_Word1.f.dst_sel_x = (size < 1) ? SQ_SEL_0 : SQ_SEL_X;
901 vfetch_instruction_ptr->m_Word1.f.dst_sel_y = (size < 2) ? SQ_SEL_0 : SQ_SEL_Y;
902 vfetch_instruction_ptr->m_Word1.f.dst_sel_z = (size < 3) ? SQ_SEL_0 : SQ_SEL_Z;
903 vfetch_instruction_ptr->m_Word1.f.dst_sel_w = (size < 4) ? SQ_SEL_1 : SQ_SEL_W;
904
905 vfetch_instruction_ptr->m_Word1.f.use_const_fields = 1;
906 vfetch_instruction_ptr->m_Word1.f.data_format = data_format;
907 vfetch_instruction_ptr->m_Word2.f.endian_swap = SQ_ENDIAN_NONE;
908
909 if(1 == _signed)
910 {
911 vfetch_instruction_ptr->m_Word1.f.format_comp_all = SQ_FORMAT_COMP_SIGNED;
912 }
913 else
914 {
915 vfetch_instruction_ptr->m_Word1.f.format_comp_all = SQ_FORMAT_COMP_UNSIGNED;
916 }
917
918 if(GL_TRUE == normalize)
919 {
920 vfetch_instruction_ptr->m_Word1.f.num_format_all = SQ_NUM_FORMAT_NORM;
921 }
922 else
923 {
924 vfetch_instruction_ptr->m_Word1.f.num_format_all = SQ_NUM_FORMAT_INT;
925 }
926
927 // Destination register
928 vfetch_instruction_ptr->m_Word1_GPR.f.dst_gpr = destination_register;
929 vfetch_instruction_ptr->m_Word1_GPR.f.dst_rel = SQ_ABSOLUTE;
930
931 vfetch_instruction_ptr->m_Word2.f.offset = 0;
932 vfetch_instruction_ptr->m_Word2.f.const_buf_no_stride = 0x0;
933
934 vfetch_instruction_ptr->m_Word2.f.mega_fetch = is_mega_fetch_flag;
935
936 if (assembled_vfetch_instruction_ptr == NULL)
937 {
938 if ( GL_FALSE == add_vfetch_instruction(pAsm, (R700VertexInstruction *)vfetch_instruction_ptr) )
939 {
940 return GL_FALSE;
941 }
942
943 if (pAsm->vfetch_instruction_ptr_array[element] != NULL)
944 {
945 return GL_FALSE;
946 }
947 else
948 {
949 pAsm->vfetch_instruction_ptr_array[element] = vfetch_instruction_ptr;
950 }
951 }
952
953 return GL_TRUE;
954 }
955
956 GLboolean cleanup_vfetch_instructions(r700_AssemblerBase* pAsm)
957 {
958 GLint i;
959 pAsm->cf_current_clause_type = CF_EMPTY_CLAUSE;
960 pAsm->cf_current_vtx_clause_ptr = NULL;
961
962 for (i=0; i<VERT_ATTRIB_MAX; i++)
963 {
964 pAsm->vfetch_instruction_ptr_array[ i ] = NULL;
965 }
966
967 cleanup_vfetch_shaderinst(pAsm->pR700Shader);
968
969 return GL_TRUE;
970 }
971
972 GLuint gethelpr(r700_AssemblerBase* pAsm)
973 {
974 GLuint r = pAsm->uHelpReg;
975 pAsm->uHelpReg++;
976 if (pAsm->uHelpReg > pAsm->number_used_registers)
977 {
978 pAsm->number_used_registers = pAsm->uHelpReg;
979 }
980 return r;
981 }
982 void resethelpr(r700_AssemblerBase* pAsm)
983 {
984 pAsm->uHelpReg = pAsm->uFirstHelpReg;
985 }
986
987 void checkop_init(r700_AssemblerBase* pAsm)
988 {
989 resethelpr(pAsm);
990 pAsm->aArgSubst[0] =
991 pAsm->aArgSubst[1] =
992 pAsm->aArgSubst[2] =
993 pAsm->aArgSubst[3] = -1;
994 }
995
996 GLboolean mov_temp(r700_AssemblerBase* pAsm, int src)
997 {
998 GLuint tmp = gethelpr(pAsm);
999
1000 //mov src to temp helper gpr.
1001 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
1002
1003 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1004
1005 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
1006 pAsm->D.dst.reg = tmp;
1007
1008 nomask_PVSDST(&(pAsm->D.dst));
1009
1010 if( GL_FALSE == assemble_src(pAsm, src, 0) )
1011 {
1012 return GL_FALSE;
1013 }
1014
1015 noswizzle_PVSSRC(&(pAsm->S[0].src));
1016 noneg_PVSSRC(&(pAsm->S[0].src));
1017
1018 if( GL_FALSE == next_ins(pAsm) )
1019 {
1020 return GL_FALSE;
1021 }
1022
1023 pAsm->aArgSubst[1 + src] = tmp;
1024
1025 return GL_TRUE;
1026 }
1027
1028 GLboolean checkop1(r700_AssemblerBase* pAsm)
1029 {
1030 checkop_init(pAsm);
1031 return GL_TRUE;
1032 }
1033
1034 GLboolean checkop2(r700_AssemblerBase* pAsm)
1035 {
1036 GLboolean bSrcConst[2];
1037 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1038
1039 checkop_init(pAsm);
1040
1041 if( (pILInst->SrcReg[0].File == PROGRAM_UNIFORM) ||
1042 (pILInst->SrcReg[0].File == PROGRAM_CONSTANT) ||
1043 (pILInst->SrcReg[0].File == PROGRAM_LOCAL_PARAM) ||
1044 (pILInst->SrcReg[0].File == PROGRAM_ENV_PARAM) ||
1045 (pILInst->SrcReg[0].File == PROGRAM_STATE_VAR) )
1046 {
1047 bSrcConst[0] = GL_TRUE;
1048 }
1049 else
1050 {
1051 bSrcConst[0] = GL_FALSE;
1052 }
1053 if( (pILInst->SrcReg[1].File == PROGRAM_UNIFORM) ||
1054 (pILInst->SrcReg[1].File == PROGRAM_CONSTANT) ||
1055 (pILInst->SrcReg[1].File == PROGRAM_LOCAL_PARAM) ||
1056 (pILInst->SrcReg[1].File == PROGRAM_ENV_PARAM) ||
1057 (pILInst->SrcReg[1].File == PROGRAM_STATE_VAR) )
1058 {
1059 bSrcConst[1] = GL_TRUE;
1060 }
1061 else
1062 {
1063 bSrcConst[1] = GL_FALSE;
1064 }
1065
1066 if( (bSrcConst[0] == GL_TRUE) && (bSrcConst[1] == GL_TRUE) )
1067 {
1068 if(pILInst->SrcReg[0].Index != pILInst->SrcReg[1].Index)
1069 {
1070 if( GL_FALSE == mov_temp(pAsm, 1) )
1071 {
1072 return GL_FALSE;
1073 }
1074 }
1075 }
1076
1077 return GL_TRUE;
1078 }
1079
1080 GLboolean checkop3(r700_AssemblerBase* pAsm)
1081 {
1082 GLboolean bSrcConst[3];
1083 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1084
1085 checkop_init(pAsm);
1086
1087 if( (pILInst->SrcReg[0].File == PROGRAM_UNIFORM) ||
1088 (pILInst->SrcReg[0].File == PROGRAM_CONSTANT) ||
1089 (pILInst->SrcReg[0].File == PROGRAM_LOCAL_PARAM) ||
1090 (pILInst->SrcReg[0].File == PROGRAM_ENV_PARAM) ||
1091 (pILInst->SrcReg[0].File == PROGRAM_STATE_VAR) )
1092 {
1093 bSrcConst[0] = GL_TRUE;
1094 }
1095 else
1096 {
1097 bSrcConst[0] = GL_FALSE;
1098 }
1099 if( (pILInst->SrcReg[1].File == PROGRAM_UNIFORM) ||
1100 (pILInst->SrcReg[1].File == PROGRAM_CONSTANT) ||
1101 (pILInst->SrcReg[1].File == PROGRAM_LOCAL_PARAM) ||
1102 (pILInst->SrcReg[1].File == PROGRAM_ENV_PARAM) ||
1103 (pILInst->SrcReg[1].File == PROGRAM_STATE_VAR) )
1104 {
1105 bSrcConst[1] = GL_TRUE;
1106 }
1107 else
1108 {
1109 bSrcConst[1] = GL_FALSE;
1110 }
1111 if( (pILInst->SrcReg[2].File == PROGRAM_UNIFORM) ||
1112 (pILInst->SrcReg[2].File == PROGRAM_CONSTANT) ||
1113 (pILInst->SrcReg[2].File == PROGRAM_LOCAL_PARAM) ||
1114 (pILInst->SrcReg[2].File == PROGRAM_ENV_PARAM) ||
1115 (pILInst->SrcReg[2].File == PROGRAM_STATE_VAR) )
1116 {
1117 bSrcConst[2] = GL_TRUE;
1118 }
1119 else
1120 {
1121 bSrcConst[2] = GL_FALSE;
1122 }
1123
1124 if( (GL_TRUE == bSrcConst[0]) &&
1125 (GL_TRUE == bSrcConst[1]) &&
1126 (GL_TRUE == bSrcConst[2]) )
1127 {
1128 if( GL_FALSE == mov_temp(pAsm, 1) )
1129 {
1130 return GL_FALSE;
1131 }
1132 if( GL_FALSE == mov_temp(pAsm, 2) )
1133 {
1134 return GL_FALSE;
1135 }
1136
1137 return GL_TRUE;
1138 }
1139 else if( (GL_TRUE == bSrcConst[0]) &&
1140 (GL_TRUE == bSrcConst[1]) )
1141 {
1142 if(pILInst->SrcReg[0].Index != pILInst->SrcReg[1].Index)
1143 {
1144 if( GL_FALSE == mov_temp(pAsm, 1) )
1145 {
1146 return 1;
1147 }
1148 }
1149
1150 return GL_TRUE;
1151 }
1152 else if ( (GL_TRUE == bSrcConst[0]) &&
1153 (GL_TRUE == bSrcConst[2]) )
1154 {
1155 if(pILInst->SrcReg[0].Index != pILInst->SrcReg[2].Index)
1156 {
1157 if( GL_FALSE == mov_temp(pAsm, 2) )
1158 {
1159 return GL_FALSE;
1160 }
1161 }
1162
1163 return GL_TRUE;
1164 }
1165 else if( (GL_TRUE == bSrcConst[1]) &&
1166 (GL_TRUE == bSrcConst[2]) )
1167 {
1168 if(pILInst->SrcReg[1].Index != pILInst->SrcReg[2].Index)
1169 {
1170 if( GL_FALSE == mov_temp(pAsm, 2) )
1171 {
1172 return GL_FALSE;
1173 }
1174 }
1175
1176 return GL_TRUE;
1177 }
1178
1179 return GL_TRUE;
1180 }
1181
1182 GLboolean assemble_src(r700_AssemblerBase *pAsm,
1183 int src,
1184 int fld)
1185 {
1186 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1187
1188 if (fld == -1)
1189 {
1190 fld = src;
1191 }
1192
1193 if(pAsm->aArgSubst[1+src] >= 0)
1194 {
1195 setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE);
1196 pAsm->S[fld].src.rtype = SRC_REG_TEMPORARY;
1197 pAsm->S[fld].src.reg = pAsm->aArgSubst[1+src];
1198 }
1199 else
1200 {
1201 switch (pILInst->SrcReg[src].File)
1202 {
1203 case PROGRAM_TEMPORARY:
1204 setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE);
1205 pAsm->S[fld].src.rtype = SRC_REG_TEMPORARY;
1206 pAsm->S[fld].src.reg = pILInst->SrcReg[src].Index + pAsm->starting_temp_register_number;
1207 break;
1208 case PROGRAM_CONSTANT:
1209 case PROGRAM_LOCAL_PARAM:
1210 case PROGRAM_ENV_PARAM:
1211 case PROGRAM_STATE_VAR:
1212 case PROGRAM_UNIFORM:
1213 if (1 == pILInst->SrcReg[src].RelAddr)
1214 {
1215 setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_RELATIVE_A0);
1216 }
1217 else
1218 {
1219 setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE);
1220 }
1221
1222 pAsm->S[fld].src.rtype = SRC_REG_CONSTANT;
1223 pAsm->S[fld].src.reg = pILInst->SrcReg[src].Index;
1224 break;
1225 case PROGRAM_INPUT:
1226 setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE);
1227 pAsm->S[fld].src.rtype = SRC_REG_INPUT;
1228 switch (pAsm->currentShaderType)
1229 {
1230 case SPT_FP:
1231 pAsm->S[fld].src.reg = pAsm->uiFP_AttributeMap[pILInst->SrcReg[src].Index];
1232 break;
1233 case SPT_VP:
1234 pAsm->S[fld].src.reg = pAsm->ucVP_AttributeMap[pILInst->SrcReg[src].Index];
1235 break;
1236 }
1237 break;
1238 default:
1239 radeon_error("Invalid source argument type : %d \n", pILInst->SrcReg[src].File);
1240 return GL_FALSE;
1241 }
1242 }
1243
1244 pAsm->S[fld].src.swizzlex = pILInst->SrcReg[src].Swizzle & 0x7;
1245 pAsm->S[fld].src.swizzley = (pILInst->SrcReg[src].Swizzle >> 3) & 0x7;
1246 pAsm->S[fld].src.swizzlez = (pILInst->SrcReg[src].Swizzle >> 6) & 0x7;
1247 pAsm->S[fld].src.swizzlew = (pILInst->SrcReg[src].Swizzle >> 9) & 0x7;
1248
1249 pAsm->S[fld].src.negx = pILInst->SrcReg[src].Negate & 0x1;
1250 pAsm->S[fld].src.negy = (pILInst->SrcReg[src].Negate >> 1) & 0x1;
1251 pAsm->S[fld].src.negz = (pILInst->SrcReg[src].Negate >> 2) & 0x1;
1252 pAsm->S[fld].src.negw = (pILInst->SrcReg[src].Negate >> 3) & 0x1;
1253
1254 return GL_TRUE;
1255 }
1256
1257 GLboolean assemble_dst(r700_AssemblerBase *pAsm)
1258 {
1259 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1260 switch (pILInst->DstReg.File)
1261 {
1262 case PROGRAM_TEMPORARY:
1263 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1264 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
1265 pAsm->D.dst.reg = pILInst->DstReg.Index + pAsm->starting_temp_register_number;
1266 break;
1267 case PROGRAM_ADDRESS:
1268 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1269 pAsm->D.dst.rtype = DST_REG_A0;
1270 pAsm->D.dst.reg = 0;
1271 break;
1272 case PROGRAM_OUTPUT:
1273 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1274 pAsm->D.dst.rtype = DST_REG_OUT;
1275 switch (pAsm->currentShaderType)
1276 {
1277 case SPT_FP:
1278 pAsm->D.dst.reg = pAsm->uiFP_OutputMap[pILInst->DstReg.Index];
1279 break;
1280 case SPT_VP:
1281 pAsm->D.dst.reg = pAsm->ucVP_OutputMap[pILInst->DstReg.Index];
1282 break;
1283 }
1284 break;
1285 default:
1286 radeon_error("Invalid destination output argument type\n");
1287 return GL_FALSE;
1288 }
1289
1290 pAsm->D.dst.writex = pILInst->DstReg.WriteMask & 0x1;
1291 pAsm->D.dst.writey = (pILInst->DstReg.WriteMask >> 1) & 0x1;
1292 pAsm->D.dst.writez = (pILInst->DstReg.WriteMask >> 2) & 0x1;
1293 pAsm->D.dst.writew = (pILInst->DstReg.WriteMask >> 3) & 0x1;
1294
1295 if(pILInst->SaturateMode == SATURATE_ZERO_ONE)
1296 {
1297 pAsm->D2.dst2.SaturateMode = 1;
1298 }
1299 else
1300 {
1301 pAsm->D2.dst2.SaturateMode = 0;
1302 }
1303
1304 return GL_TRUE;
1305 }
1306
1307 GLboolean tex_dst(r700_AssemblerBase *pAsm)
1308 {
1309 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1310
1311 if(PROGRAM_TEMPORARY == pILInst->DstReg.File)
1312 {
1313 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
1314 pAsm->D.dst.reg = pAsm->pILInst[pAsm->uiCurInst].DstReg.Index + pAsm->starting_temp_register_number;
1315
1316 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1317 }
1318 else if(PROGRAM_OUTPUT == pILInst->DstReg.File)
1319 {
1320 pAsm->D.dst.rtype = DST_REG_OUT;
1321 switch (pAsm->currentShaderType)
1322 {
1323 case SPT_FP:
1324 pAsm->D.dst.reg = pAsm->uiFP_OutputMap[pILInst->DstReg.Index];
1325 break;
1326 case SPT_VP:
1327 pAsm->D.dst.reg = pAsm->ucVP_OutputMap[pILInst->DstReg.Index];
1328 break;
1329 }
1330
1331 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1332 }
1333 else
1334 {
1335 radeon_error("Invalid destination output argument type\n");
1336 return GL_FALSE;
1337 }
1338
1339 pAsm->D.dst.writex = pILInst->DstReg.WriteMask & 0x1;
1340 pAsm->D.dst.writey = (pILInst->DstReg.WriteMask >> 1) & 0x1;
1341 pAsm->D.dst.writez = (pILInst->DstReg.WriteMask >> 2) & 0x1;
1342 pAsm->D.dst.writew = (pILInst->DstReg.WriteMask >> 3) & 0x1;
1343
1344 return GL_TRUE;
1345 }
1346
1347 GLboolean tex_src(r700_AssemblerBase *pAsm)
1348 {
1349 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1350
1351 GLboolean bValidTexCoord = GL_FALSE;
1352
1353 if(pAsm->aArgSubst[1] >= 0)
1354 {
1355 bValidTexCoord = GL_TRUE;
1356 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
1357 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
1358 pAsm->S[0].src.reg = pAsm->aArgSubst[1];
1359 }
1360 else
1361 {
1362 switch (pILInst->SrcReg[0].File) {
1363 case PROGRAM_UNIFORM:
1364 case PROGRAM_CONSTANT:
1365 case PROGRAM_LOCAL_PARAM:
1366 case PROGRAM_ENV_PARAM:
1367 case PROGRAM_STATE_VAR:
1368 break;
1369 case PROGRAM_TEMPORARY:
1370 bValidTexCoord = GL_TRUE;
1371 pAsm->S[0].src.reg = pILInst->SrcReg[0].Index +
1372 pAsm->starting_temp_register_number;
1373 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
1374 break;
1375 case PROGRAM_INPUT:
1376 switch (pILInst->SrcReg[0].Index)
1377 {
1378 case FRAG_ATTRIB_WPOS:
1379 case FRAG_ATTRIB_COL0:
1380 case FRAG_ATTRIB_COL1:
1381 case FRAG_ATTRIB_FOGC:
1382 case FRAG_ATTRIB_TEX0:
1383 case FRAG_ATTRIB_TEX1:
1384 case FRAG_ATTRIB_TEX2:
1385 case FRAG_ATTRIB_TEX3:
1386 case FRAG_ATTRIB_TEX4:
1387 case FRAG_ATTRIB_TEX5:
1388 case FRAG_ATTRIB_TEX6:
1389 case FRAG_ATTRIB_TEX7:
1390 bValidTexCoord = GL_TRUE;
1391 pAsm->S[0].src.reg =
1392 pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index];
1393 pAsm->S[0].src.rtype = SRC_REG_INPUT;
1394 break;
1395 case FRAG_ATTRIB_FACE:
1396 fprintf(stderr, "FRAG_ATTRIB_FACE unsupported\n");
1397 break;
1398 case FRAG_ATTRIB_PNTC:
1399 fprintf(stderr, "FRAG_ATTRIB_PNTC unsupported\n");
1400 break;
1401 }
1402
1403 if( (pILInst->SrcReg[0].Index >= FRAG_ATTRIB_VAR0) ||
1404 (pILInst->SrcReg[0].Index < FRAG_ATTRIB_MAX) )
1405 {
1406 bValidTexCoord = GL_TRUE;
1407 pAsm->S[0].src.reg =
1408 pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index];
1409 pAsm->S[0].src.rtype = SRC_REG_INPUT;
1410 }
1411
1412 break;
1413 }
1414 }
1415
1416 if(GL_TRUE == bValidTexCoord)
1417 {
1418 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
1419 }
1420 else
1421 {
1422 radeon_error("Invalid source texcoord for TEX instruction\n");
1423 return GL_FALSE;
1424 }
1425
1426 pAsm->S[0].src.swizzlex = pILInst->SrcReg[0].Swizzle & 0x7;
1427 pAsm->S[0].src.swizzley = (pILInst->SrcReg[0].Swizzle >> 3) & 0x7;
1428 pAsm->S[0].src.swizzlez = (pILInst->SrcReg[0].Swizzle >> 6) & 0x7;
1429 pAsm->S[0].src.swizzlew = (pILInst->SrcReg[0].Swizzle >> 9) & 0x7;
1430
1431 pAsm->S[0].src.negx = pILInst->SrcReg[0].Negate & 0x1;
1432 pAsm->S[0].src.negy = (pILInst->SrcReg[0].Negate >> 1) & 0x1;
1433 pAsm->S[0].src.negz = (pILInst->SrcReg[0].Negate >> 2) & 0x1;
1434 pAsm->S[0].src.negw = (pILInst->SrcReg[0].Negate >> 3) & 0x1;
1435
1436 return GL_TRUE;
1437 }
1438
1439 GLboolean assemble_tex_instruction(r700_AssemblerBase *pAsm, GLboolean normalized)
1440 {
1441 PVSSRC * texture_coordinate_source;
1442 PVSSRC * texture_unit_source;
1443
1444 R700TextureInstruction* tex_instruction_ptr = (R700TextureInstruction*) CALLOC_STRUCT(R700TextureInstruction);
1445 if (tex_instruction_ptr == NULL)
1446 {
1447 return GL_FALSE;
1448 }
1449 Init_R700TextureInstruction(tex_instruction_ptr);
1450
1451 texture_coordinate_source = &(pAsm->S[0].src);
1452 texture_unit_source = &(pAsm->S[1].src);
1453
1454 tex_instruction_ptr->m_Word0.f.tex_inst = pAsm->D.dst.opcode;
1455 tex_instruction_ptr->m_Word0.f.bc_frac_mode = 0x0;
1456 tex_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0;
1457
1458 tex_instruction_ptr->m_Word0.f.resource_id = texture_unit_source->reg;
1459
1460 tex_instruction_ptr->m_Word1.f.lod_bias = 0x0;
1461 if (normalized) {
1462 tex_instruction_ptr->m_Word1.f.coord_type_x = SQ_TEX_NORMALIZED;
1463 tex_instruction_ptr->m_Word1.f.coord_type_y = SQ_TEX_NORMALIZED;
1464 tex_instruction_ptr->m_Word1.f.coord_type_z = SQ_TEX_NORMALIZED;
1465 tex_instruction_ptr->m_Word1.f.coord_type_w = SQ_TEX_NORMALIZED;
1466 } else {
1467 /* XXX: UNNORMALIZED tex coords have limited wrap modes */
1468 tex_instruction_ptr->m_Word1.f.coord_type_x = SQ_TEX_UNNORMALIZED;
1469 tex_instruction_ptr->m_Word1.f.coord_type_y = SQ_TEX_UNNORMALIZED;
1470 tex_instruction_ptr->m_Word1.f.coord_type_z = SQ_TEX_UNNORMALIZED;
1471 tex_instruction_ptr->m_Word1.f.coord_type_w = SQ_TEX_UNNORMALIZED;
1472 }
1473
1474 tex_instruction_ptr->m_Word2.f.offset_x = 0x0;
1475 tex_instruction_ptr->m_Word2.f.offset_y = 0x0;
1476 tex_instruction_ptr->m_Word2.f.offset_z = 0x0;
1477
1478 tex_instruction_ptr->m_Word2.f.sampler_id = texture_unit_source->reg;
1479
1480 // dst
1481 if ( (pAsm->D.dst.rtype == DST_REG_TEMPORARY) ||
1482 (pAsm->D.dst.rtype == DST_REG_OUT) )
1483 {
1484 tex_instruction_ptr->m_Word0.f.src_gpr = texture_coordinate_source->reg;
1485 tex_instruction_ptr->m_Word0.f.src_rel = SQ_ABSOLUTE;
1486
1487 tex_instruction_ptr->m_Word1.f.dst_gpr = pAsm->D.dst.reg;
1488 tex_instruction_ptr->m_Word1.f.dst_rel = SQ_ABSOLUTE;
1489
1490 tex_instruction_ptr->m_Word1.f.dst_sel_x = (pAsm->D.dst.writex ? texture_unit_source->swizzlex : SQ_SEL_MASK);
1491 tex_instruction_ptr->m_Word1.f.dst_sel_y = (pAsm->D.dst.writey ? texture_unit_source->swizzley : SQ_SEL_MASK);
1492 tex_instruction_ptr->m_Word1.f.dst_sel_z = (pAsm->D.dst.writez ? texture_unit_source->swizzlez : SQ_SEL_MASK);
1493 tex_instruction_ptr->m_Word1.f.dst_sel_w = (pAsm->D.dst.writew ? texture_unit_source->swizzlew : SQ_SEL_MASK);
1494
1495
1496 tex_instruction_ptr->m_Word2.f.src_sel_x = texture_coordinate_source->swizzlex;
1497 tex_instruction_ptr->m_Word2.f.src_sel_y = texture_coordinate_source->swizzley;
1498 tex_instruction_ptr->m_Word2.f.src_sel_z = texture_coordinate_source->swizzlez;
1499 tex_instruction_ptr->m_Word2.f.src_sel_w = texture_coordinate_source->swizzlew;
1500 }
1501 else
1502 {
1503 radeon_error("Only temp destination registers supported for TEX dest regs.\n");
1504 return GL_FALSE;
1505 }
1506
1507 if( GL_FALSE == add_tex_instruction(pAsm, tex_instruction_ptr) )
1508 {
1509 return GL_FALSE;
1510 }
1511
1512 return GL_TRUE;
1513 }
1514
1515 void initialize(r700_AssemblerBase *pAsm)
1516 {
1517 GLuint cycle, component;
1518
1519 for (cycle=0; cycle<NUMBER_OF_CYCLES; cycle++)
1520 {
1521 for (component=0; component<NUMBER_OF_COMPONENTS; component++)
1522 {
1523 pAsm->hw_gpr[cycle][component] = (-1);
1524 }
1525 }
1526 for (component=0; component<NUMBER_OF_COMPONENTS; component++)
1527 {
1528 pAsm->hw_cfile_addr[component] = (-1);
1529 pAsm->hw_cfile_chan[component] = (-1);
1530 }
1531 }
1532
1533 GLboolean assemble_alu_src(R700ALUInstruction* alu_instruction_ptr,
1534 int source_index,
1535 PVSSRC* pSource,
1536 BITS scalar_channel_index)
1537 {
1538 BITS src_sel;
1539 BITS src_rel;
1540 BITS src_chan;
1541 BITS src_neg;
1542
1543 //--------------------------------------------------------------------------
1544 // Source for operands src0, src1.
1545 // Values [0,127] correspond to GPR[0..127].
1546 // Values [256,511] correspond to cfile constants c[0..255].
1547
1548 //--------------------------------------------------------------------------
1549 // Other special values are shown in the list below.
1550
1551 // 248 SQ_ALU_SRC_0: special constant 0.0.
1552 // 249 SQ_ALU_SRC_1: special constant 1.0 float.
1553
1554 // 250 SQ_ALU_SRC_1_INT: special constant 1 integer.
1555 // 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer.
1556
1557 // 252 SQ_ALU_SRC_0_5: special constant 0.5 float.
1558 // 253 SQ_ALU_SRC_LITERAL: literal constant.
1559
1560 // 254 SQ_ALU_SRC_PV: previous vector result.
1561 // 255 SQ_ALU_SRC_PS: previous scalar result.
1562 //--------------------------------------------------------------------------
1563
1564 BITS channel_swizzle;
1565 switch (scalar_channel_index)
1566 {
1567 case 0: channel_swizzle = pSource->swizzlex; break;
1568 case 1: channel_swizzle = pSource->swizzley; break;
1569 case 2: channel_swizzle = pSource->swizzlez; break;
1570 case 3: channel_swizzle = pSource->swizzlew; break;
1571 default: channel_swizzle = SQ_SEL_MASK; break;
1572 }
1573
1574 if(channel_swizzle == SQ_SEL_0)
1575 {
1576 src_sel = SQ_ALU_SRC_0;
1577 }
1578 else if (channel_swizzle == SQ_SEL_1)
1579 {
1580 src_sel = SQ_ALU_SRC_1;
1581 }
1582 else
1583 {
1584 if ( (pSource->rtype == SRC_REG_TEMPORARY) ||
1585 (pSource->rtype == SRC_REG_INPUT)
1586 )
1587 {
1588 src_sel = pSource->reg;
1589 }
1590 else if (pSource->rtype == SRC_REG_CONSTANT)
1591 {
1592 src_sel = pSource->reg + CFILE_REGISTER_OFFSET;
1593 }
1594 else if (pSource->rtype == SRC_REC_LITERAL)
1595 {
1596 src_sel = SQ_ALU_SRC_LITERAL;
1597 }
1598 else
1599 {
1600 radeon_error("Source (%d) register type (%d) not one of TEMP, INPUT, or CONSTANT.\n",
1601 source_index, pSource->rtype);
1602 return GL_FALSE;
1603 }
1604 }
1605
1606 if( ADDR_ABSOLUTE == addrmode_PVSSRC(pSource) )
1607 {
1608 src_rel = SQ_ABSOLUTE;
1609 }
1610 else
1611 {
1612 src_rel = SQ_RELATIVE;
1613 }
1614
1615 switch (channel_swizzle)
1616 {
1617 case SQ_SEL_X:
1618 src_chan = SQ_CHAN_X;
1619 break;
1620 case SQ_SEL_Y:
1621 src_chan = SQ_CHAN_Y;
1622 break;
1623 case SQ_SEL_Z:
1624 src_chan = SQ_CHAN_Z;
1625 break;
1626 case SQ_SEL_W:
1627 src_chan = SQ_CHAN_W;
1628 break;
1629 case SQ_SEL_0:
1630 case SQ_SEL_1:
1631 // Does not matter since src_sel controls
1632 src_chan = SQ_CHAN_X;
1633 break;
1634 default:
1635 radeon_error("Unknown source select value (%d) in assemble_alu_src().\n", channel_swizzle);
1636 return GL_FALSE;
1637 break;
1638 }
1639
1640 switch (scalar_channel_index)
1641 {
1642 case 0: src_neg = pSource->negx; break;
1643 case 1: src_neg = pSource->negy; break;
1644 case 2: src_neg = pSource->negz; break;
1645 case 3: src_neg = pSource->negw; break;
1646 default: src_neg = 0; break;
1647 }
1648
1649 switch (source_index)
1650 {
1651 case 0:
1652 alu_instruction_ptr->m_Word0.f.src0_sel = src_sel;
1653 alu_instruction_ptr->m_Word0.f.src0_rel = src_rel;
1654 alu_instruction_ptr->m_Word0.f.src0_chan = src_chan;
1655 alu_instruction_ptr->m_Word0.f.src0_neg = src_neg;
1656 break;
1657 case 1:
1658 alu_instruction_ptr->m_Word0.f.src1_sel = src_sel;
1659 alu_instruction_ptr->m_Word0.f.src1_rel = src_rel;
1660 alu_instruction_ptr->m_Word0.f.src1_chan = src_chan;
1661 alu_instruction_ptr->m_Word0.f.src1_neg = src_neg;
1662 break;
1663 case 2:
1664 alu_instruction_ptr->m_Word1_OP3.f.src2_sel = src_sel;
1665 alu_instruction_ptr->m_Word1_OP3.f.src2_rel = src_rel;
1666 alu_instruction_ptr->m_Word1_OP3.f.src2_chan = src_chan;
1667 alu_instruction_ptr->m_Word1_OP3.f.src2_neg = src_neg;
1668 break;
1669 default:
1670 radeon_error("Only three sources allowed in ALU opcodes.\n");
1671 return GL_FALSE;
1672 break;
1673 }
1674
1675 return GL_TRUE;
1676 }
1677
1678 GLboolean add_alu_instruction(r700_AssemblerBase* pAsm,
1679 R700ALUInstruction* alu_instruction_ptr,
1680 GLuint contiguous_slots_needed)
1681 {
1682 if( GL_FALSE == check_current_clause(pAsm, CF_ALU_CLAUSE) )
1683 {
1684 return GL_FALSE;
1685 }
1686
1687 if ( pAsm->alu_x_opcode != 0 ||
1688 pAsm->cf_current_alu_clause_ptr == NULL ||
1689 ( (pAsm->cf_current_alu_clause_ptr != NULL) &&
1690 (pAsm->cf_current_alu_clause_ptr->m_Word1.f.count >= (GetCFMaxInstructions(pAsm->cf_current_alu_clause_ptr->m_ShaderInstType)-contiguous_slots_needed-1) )
1691 ) )
1692 {
1693
1694 //new cf inst for this clause
1695 pAsm->cf_current_alu_clause_ptr = (R700ControlFlowALUClause*) CALLOC_STRUCT(R700ControlFlowALUClause);
1696
1697 // link the new cf to cf segment
1698 if(NULL != pAsm->cf_current_alu_clause_ptr)
1699 {
1700 Init_R700ControlFlowALUClause(pAsm->cf_current_alu_clause_ptr);
1701 AddCFInstruction( pAsm->pR700Shader,
1702 (R700ControlFlowInstruction *)pAsm->cf_current_alu_clause_ptr );
1703 }
1704 else
1705 {
1706 radeon_error("Could not allocate a new ALU CF instruction.\n");
1707 return GL_FALSE;
1708 }
1709
1710 pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_bank0 = 0x0;
1711 pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_bank1 = 0x0;
1712 pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_mode0 = SQ_CF_KCACHE_NOP;
1713
1714 pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_mode1 = SQ_CF_KCACHE_NOP;
1715 pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_addr0 = 0x0;
1716 pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_addr1 = 0x0;
1717
1718 pAsm->cf_current_alu_clause_ptr->m_Word1.f.count = 0x0;
1719
1720 if(pAsm->alu_x_opcode != 0)
1721 {
1722 pAsm->cf_current_alu_clause_ptr->m_Word1.f.cf_inst = pAsm->alu_x_opcode;
1723 pAsm->alu_x_opcode = 0;
1724 }
1725 else
1726 {
1727 pAsm->cf_current_alu_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_ALU;
1728 }
1729
1730 pAsm->cf_current_alu_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
1731
1732 pAsm->cf_current_alu_clause_ptr->m_Word1.f.barrier = 0x1;
1733 }
1734 else
1735 {
1736 pAsm->cf_current_alu_clause_ptr->m_Word1.f.count += (GetInstructionSize(alu_instruction_ptr->m_ShaderInstType) / 2);
1737 }
1738
1739 // If this clause constains any instruction that is forward dependent on a TEX instruction,
1740 // set the whole_quad_mode for this clause
1741 if ( pAsm->pInstDeps[pAsm->uiCurInst].nDstDep > (-1) )
1742 {
1743 pAsm->cf_current_alu_clause_ptr->m_Word1.f.whole_quad_mode = 0x1;
1744 }
1745
1746 if (pAsm->cf_current_alu_clause_ptr->m_Word1.f.count >= (GetCFMaxInstructions(pAsm->cf_current_alu_clause_ptr->m_ShaderInstType)-1) )
1747 {
1748 alu_instruction_ptr->m_Word0.f.last = 1;
1749 }
1750
1751 if(NULL == pAsm->cf_current_alu_clause_ptr->m_pLinkedALUInstruction)
1752 {
1753 pAsm->cf_current_alu_clause_ptr->m_pLinkedALUInstruction = alu_instruction_ptr;
1754 alu_instruction_ptr->m_pLinkedALUClause = pAsm->cf_current_alu_clause_ptr;
1755 }
1756
1757 AddALUInstruction(pAsm->pR700Shader, alu_instruction_ptr);
1758
1759 return GL_TRUE;
1760 }
1761
1762 void get_src_properties(R700ALUInstruction* alu_instruction_ptr,
1763 int source_index,
1764 BITS* psrc_sel,
1765 BITS* psrc_rel,
1766 BITS* psrc_chan,
1767 BITS* psrc_neg)
1768 {
1769 switch (source_index)
1770 {
1771 case 0:
1772 *psrc_sel = alu_instruction_ptr->m_Word0.f.src0_sel ;
1773 *psrc_rel = alu_instruction_ptr->m_Word0.f.src0_rel ;
1774 *psrc_chan = alu_instruction_ptr->m_Word0.f.src0_chan;
1775 *psrc_neg = alu_instruction_ptr->m_Word0.f.src0_neg ;
1776 break;
1777
1778 case 1:
1779 *psrc_sel = alu_instruction_ptr->m_Word0.f.src1_sel ;
1780 *psrc_rel = alu_instruction_ptr->m_Word0.f.src1_rel ;
1781 *psrc_chan = alu_instruction_ptr->m_Word0.f.src1_chan;
1782 *psrc_neg = alu_instruction_ptr->m_Word0.f.src1_neg ;
1783 break;
1784
1785 case 2:
1786 *psrc_sel = alu_instruction_ptr->m_Word1_OP3.f.src2_sel;
1787 *psrc_rel = alu_instruction_ptr->m_Word1_OP3.f.src2_rel;
1788 *psrc_chan = alu_instruction_ptr->m_Word1_OP3.f.src2_chan;
1789 *psrc_neg = alu_instruction_ptr->m_Word1_OP3.f.src2_neg;
1790 break;
1791 }
1792 }
1793
1794 int is_cfile(BITS sel)
1795 {
1796 if (sel > 255 && sel < 512)
1797 {
1798 return 1;
1799 }
1800 return 0;
1801 }
1802
1803 int is_const(BITS sel)
1804 {
1805 if (is_cfile(sel))
1806 {
1807 return 1;
1808 }
1809 else if(sel >= SQ_ALU_SRC_0 && sel <= SQ_ALU_SRC_LITERAL)
1810 {
1811 return 1;
1812 }
1813 return 0;
1814 }
1815
1816 int is_gpr(BITS sel)
1817 {
1818 if (sel >= 0 && sel < 128)
1819 {
1820 return 1;
1821 }
1822 return 0;
1823 }
1824
1825 const GLuint BANK_SWIZZLE_VEC[8] = {SQ_ALU_VEC_210, //000
1826 SQ_ALU_VEC_120, //001
1827 SQ_ALU_VEC_102, //010
1828
1829 SQ_ALU_VEC_201, //011
1830 SQ_ALU_VEC_012, //100
1831 SQ_ALU_VEC_021, //101
1832
1833 SQ_ALU_VEC_012, //110
1834 SQ_ALU_VEC_012}; //111
1835
1836 const GLuint BANK_SWIZZLE_SCL[8] = {SQ_ALU_SCL_210, //000
1837 SQ_ALU_SCL_122, //001
1838 SQ_ALU_SCL_122, //010
1839
1840 SQ_ALU_SCL_221, //011
1841 SQ_ALU_SCL_212, //100
1842 SQ_ALU_SCL_122, //101
1843
1844 SQ_ALU_SCL_122, //110
1845 SQ_ALU_SCL_122}; //111
1846
1847 GLboolean reserve_cfile(r700_AssemblerBase* pAsm,
1848 GLuint sel,
1849 GLuint chan)
1850 {
1851 int res_match = (-1);
1852 int res_empty = (-1);
1853
1854 GLint res;
1855
1856 for (res=3; res>=0; res--)
1857 {
1858 if(pAsm->hw_cfile_addr[ res] < 0)
1859 {
1860 res_empty = res;
1861 }
1862 else if( (pAsm->hw_cfile_addr[res] == (int)sel)
1863 &&
1864 (pAsm->hw_cfile_chan[ res ] == (int) chan) )
1865 {
1866 res_match = res;
1867 }
1868 }
1869
1870 if(res_match >= 0)
1871 {
1872 // Read for this scalar component already reserved, nothing to do here.
1873 ;
1874 }
1875 else if(res_empty >= 0)
1876 {
1877 pAsm->hw_cfile_addr[ res_empty ] = sel;
1878 pAsm->hw_cfile_chan[ res_empty ] = chan;
1879 }
1880 else
1881 {
1882 radeon_error("All cfile read ports are used, cannot reference C$sel, channel $chan.\n");
1883 return GL_FALSE;
1884 }
1885 return GL_TRUE;
1886 }
1887
1888 GLboolean reserve_gpr(r700_AssemblerBase* pAsm, GLuint sel, GLuint chan, GLuint cycle)
1889 {
1890 if(pAsm->hw_gpr[cycle][chan] < 0)
1891 {
1892 pAsm->hw_gpr[cycle][chan] = sel;
1893 }
1894 else if(pAsm->hw_gpr[cycle][chan] != (int)sel)
1895 {
1896 radeon_error("Another scalar operation has already used GPR read port for given channel\n");
1897 return GL_FALSE;
1898 }
1899
1900 return GL_TRUE;
1901 }
1902
1903 GLboolean cycle_for_scalar_bank_swizzle(const int swiz, const int sel, GLuint* pCycle)
1904 {
1905 switch (swiz)
1906 {
1907 case SQ_ALU_SCL_210:
1908 {
1909 int table[3] = {2, 1, 0};
1910 *pCycle = table[sel];
1911 return GL_TRUE;
1912 }
1913 break;
1914 case SQ_ALU_SCL_122:
1915 {
1916 int table[3] = {1, 2, 2};
1917 *pCycle = table[sel];
1918 return GL_TRUE;
1919 }
1920 break;
1921 case SQ_ALU_SCL_212:
1922 {
1923 int table[3] = {2, 1, 2};
1924 *pCycle = table[sel];
1925 return GL_TRUE;
1926 }
1927 break;
1928 case SQ_ALU_SCL_221:
1929 {
1930 int table[3] = {2, 2, 1};
1931 *pCycle = table[sel];
1932 return GL_TRUE;
1933 }
1934 break;
1935 default:
1936 radeon_error("Bad Scalar bank swizzle value\n");
1937 break;
1938 }
1939
1940 return GL_FALSE;
1941 }
1942
1943 GLboolean cycle_for_vector_bank_swizzle(const int swiz, const int sel, GLuint* pCycle)
1944 {
1945 switch (swiz)
1946 {
1947 case SQ_ALU_VEC_012:
1948 {
1949 int table[3] = {0, 1, 2};
1950 *pCycle = table[sel];
1951 }
1952 break;
1953 case SQ_ALU_VEC_021:
1954 {
1955 int table[3] = {0, 2, 1};
1956 *pCycle = table[sel];
1957 }
1958 break;
1959 case SQ_ALU_VEC_120:
1960 {
1961 int table[3] = {1, 2, 0};
1962 *pCycle = table[sel];
1963 }
1964 break;
1965 case SQ_ALU_VEC_102:
1966 {
1967 int table[3] = {1, 0, 2};
1968 *pCycle = table[sel];
1969 }
1970 break;
1971 case SQ_ALU_VEC_201:
1972 {
1973 int table[3] = {2, 0, 1};
1974 *pCycle = table[sel];
1975 }
1976 break;
1977 case SQ_ALU_VEC_210:
1978 {
1979 int table[3] = {2, 1, 0};
1980 *pCycle = table[sel];
1981 }
1982 break;
1983 default:
1984 radeon_error("Bad Vec bank swizzle value\n");
1985 return GL_FALSE;
1986 break;
1987 }
1988
1989 return GL_TRUE;
1990 }
1991
1992 GLboolean check_scalar(r700_AssemblerBase* pAsm,
1993 R700ALUInstruction* alu_instruction_ptr)
1994 {
1995 GLuint cycle;
1996 GLuint bank_swizzle;
1997 GLuint const_count = 0;
1998
1999 BITS sel;
2000 BITS chan;
2001 BITS rel;
2002 BITS neg;
2003
2004 GLuint src;
2005
2006 BITS src_sel [3] = {0,0,0};
2007 BITS src_chan[3] = {0,0,0};
2008 BITS src_rel [3] = {0,0,0};
2009 BITS src_neg [3] = {0,0,0};
2010
2011 GLuint swizzle_key;
2012
2013 GLuint number_of_operands = r700GetNumOperands(pAsm);
2014
2015 for (src=0; src<number_of_operands; src++)
2016 {
2017 get_src_properties(alu_instruction_ptr,
2018 src,
2019 &(src_sel[src]),
2020 &(src_rel[src]),
2021 &(src_chan[src]),
2022 &(src_neg[src]) );
2023 }
2024
2025
2026 swizzle_key = ( (is_const( src_sel[0] ) ? 4 : 0) +
2027 (is_const( src_sel[1] ) ? 2 : 0) +
2028 (is_const( src_sel[2] ) ? 1 : 0) );
2029
2030 alu_instruction_ptr->m_Word1.f.bank_swizzle = BANK_SWIZZLE_SCL[ swizzle_key ];
2031
2032 for (src=0; src<number_of_operands; src++)
2033 {
2034 sel = src_sel [src];
2035 chan = src_chan[src];
2036 rel = src_rel [src];
2037 neg = src_neg [src];
2038
2039 if (is_const( sel ))
2040 {
2041 // Any constant, including literal and inline constants
2042 const_count++;
2043
2044 if (is_cfile( sel ))
2045 {
2046 reserve_cfile(pAsm, sel, chan);
2047 }
2048
2049 }
2050 }
2051
2052 for (src=0; src<number_of_operands; src++)
2053 {
2054 sel = src_sel [src];
2055 chan = src_chan[src];
2056 rel = src_rel [src];
2057 neg = src_neg [src];
2058
2059 if( is_gpr(sel) )
2060 {
2061 bank_swizzle = alu_instruction_ptr->m_Word1.f.bank_swizzle;
2062
2063 if( GL_FALSE == cycle_for_scalar_bank_swizzle(bank_swizzle, src, &cycle) )
2064 {
2065 return GL_FALSE;
2066 }
2067
2068 if(cycle < const_count)
2069 {
2070 if( GL_FALSE == reserve_gpr(pAsm, sel, chan, cycle) )
2071 {
2072 return GL_FALSE;
2073 }
2074 }
2075 }
2076 }
2077
2078 return GL_TRUE;
2079 }
2080
2081 GLboolean check_vector(r700_AssemblerBase* pAsm,
2082 R700ALUInstruction* alu_instruction_ptr)
2083 {
2084 GLuint cycle;
2085 GLuint bank_swizzle;
2086 GLuint const_count = 0;
2087
2088 GLuint src;
2089
2090 BITS sel;
2091 BITS chan;
2092 BITS rel;
2093 BITS neg;
2094
2095 BITS src_sel [3] = {0,0,0};
2096 BITS src_chan[3] = {0,0,0};
2097 BITS src_rel [3] = {0,0,0};
2098 BITS src_neg [3] = {0,0,0};
2099
2100 GLuint swizzle_key;
2101
2102 GLuint number_of_operands = r700GetNumOperands(pAsm);
2103
2104 for (src=0; src<number_of_operands; src++)
2105 {
2106 get_src_properties(alu_instruction_ptr,
2107 src,
2108 &(src_sel[src]),
2109 &(src_rel[src]),
2110 &(src_chan[src]),
2111 &(src_neg[src]) );
2112 }
2113
2114
2115 swizzle_key = ( (is_const( src_sel[0] ) ? 4 : 0) +
2116 (is_const( src_sel[1] ) ? 2 : 0) +
2117 (is_const( src_sel[2] ) ? 1 : 0)
2118 );
2119
2120 alu_instruction_ptr->m_Word1.f.bank_swizzle = BANK_SWIZZLE_VEC[swizzle_key];
2121
2122 for (src=0; src<number_of_operands; src++)
2123 {
2124 sel = src_sel [src];
2125 chan = src_chan[src];
2126 rel = src_rel [src];
2127 neg = src_neg [src];
2128
2129
2130 bank_swizzle = alu_instruction_ptr->m_Word1.f.bank_swizzle;
2131
2132 if( is_gpr(sel) )
2133 {
2134 if( GL_FALSE == cycle_for_vector_bank_swizzle(bank_swizzle, src, &cycle) )
2135 {
2136 return GL_FALSE;
2137 }
2138
2139 if ( (src == 1) &&
2140 (sel == src_sel[0]) &&
2141 (chan == src_chan[0]) )
2142 {
2143 }
2144 else
2145 {
2146 if( GL_FALSE == reserve_gpr(pAsm, sel, chan, cycle) )
2147 {
2148 return GL_FALSE;
2149 }
2150 }
2151 }
2152 else if( is_const(sel) )
2153 {
2154 const_count++;
2155
2156 if( is_cfile(sel) )
2157 {
2158 if( GL_FALSE == reserve_cfile(pAsm, sel, chan) )
2159 {
2160 return GL_FALSE;
2161 }
2162 }
2163 }
2164 }
2165
2166 return GL_TRUE;
2167 }
2168
2169 GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm)
2170 {
2171 R700ALUInstruction * alu_instruction_ptr;
2172 R700ALUInstructionHalfLiteral * alu_instruction_ptr_hl;
2173 R700ALUInstructionFullLiteral * alu_instruction_ptr_fl;
2174
2175 GLuint number_of_scalar_operations;
2176 GLboolean is_single_scalar_operation;
2177 GLuint scalar_channel_index;
2178
2179 PVSSRC * pcurrent_source;
2180 int current_source_index;
2181 GLuint contiguous_slots_needed;
2182
2183 GLuint uNumSrc = r700GetNumOperands(pAsm);
2184 //GLuint channel_swizzle, j;
2185 //GLuint chan_counter[4] = {0, 0, 0, 0};
2186 //PVSSRC * pSource[3];
2187 GLboolean bSplitInst = GL_FALSE;
2188
2189 if (1 == pAsm->D.dst.math)
2190 {
2191 is_single_scalar_operation = GL_TRUE;
2192 number_of_scalar_operations = 1;
2193 }
2194 else
2195 {
2196 is_single_scalar_operation = GL_FALSE;
2197 number_of_scalar_operations = 4;
2198
2199 /* current assembler doesn't do more than 1 register per source */
2200 #if 0
2201 /* check read port, only very preliminary algorithm, not count in
2202 src0/1 same comp case and prev slot repeat case; also not count relative
2203 addressing. TODO: improve performance. */
2204 for(j=0; j<uNumSrc; j++)
2205 {
2206 pSource[j] = &(pAsm->S[j].src);
2207 }
2208 for(scalar_channel_index=0; scalar_channel_index<4; scalar_channel_index++)
2209 {
2210 for(j=0; j<uNumSrc; j++)
2211 {
2212 switch (scalar_channel_index)
2213 {
2214 case 0: channel_swizzle = pSource[j]->swizzlex; break;
2215 case 1: channel_swizzle = pSource[j]->swizzley; break;
2216 case 2: channel_swizzle = pSource[j]->swizzlez; break;
2217 case 3: channel_swizzle = pSource[j]->swizzlew; break;
2218 default: channel_swizzle = SQ_SEL_MASK; break;
2219 }
2220 if ( ((pSource[j]->rtype == SRC_REG_TEMPORARY) ||
2221 (pSource[j]->rtype == SRC_REG_INPUT))
2222 && (channel_swizzle <= SQ_SEL_W) )
2223 {
2224 chan_counter[channel_swizzle]++;
2225 }
2226 }
2227 }
2228 if( (chan_counter[SQ_SEL_X] > 3)
2229 || (chan_counter[SQ_SEL_Y] > 3)
2230 || (chan_counter[SQ_SEL_Z] > 3)
2231 || (chan_counter[SQ_SEL_W] > 3) ) /* each chan bank has only 3 ports. */
2232 {
2233 bSplitInst = GL_TRUE;
2234 }
2235 #endif
2236 }
2237
2238 contiguous_slots_needed = 0;
2239
2240 if(GL_TRUE == is_reduction_opcode(&(pAsm->D)) )
2241 {
2242 contiguous_slots_needed = 4;
2243 }
2244
2245 contiguous_slots_needed += pAsm->D2.dst2.literal_slots;
2246
2247 initialize(pAsm);
2248
2249 for (scalar_channel_index=0;
2250 scalar_channel_index < number_of_scalar_operations;
2251 scalar_channel_index++)
2252 {
2253 if(scalar_channel_index == (number_of_scalar_operations-1))
2254 {
2255 switch(pAsm->D2.dst2.literal_slots)
2256 {
2257 case 0:
2258 alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction);
2259 Init_R700ALUInstruction(alu_instruction_ptr);
2260 break;
2261 case 1:
2262 alu_instruction_ptr_hl = (R700ALUInstructionHalfLiteral*) CALLOC_STRUCT(R700ALUInstructionHalfLiteral);
2263 Init_R700ALUInstructionHalfLiteral(alu_instruction_ptr_hl, pAsm->C[0].f, pAsm->C[1].f);
2264 alu_instruction_ptr = (R700ALUInstruction*)alu_instruction_ptr_hl;
2265 break;
2266 case 2:
2267 alu_instruction_ptr_fl = (R700ALUInstructionFullLiteral*) CALLOC_STRUCT(R700ALUInstructionFullLiteral);
2268 Init_R700ALUInstructionFullLiteral(alu_instruction_ptr_fl,pAsm->C[0].f, pAsm->C[1].f, pAsm->C[2].f, pAsm->C[3].f);
2269 alu_instruction_ptr = (R700ALUInstruction*)alu_instruction_ptr_fl;
2270 break;
2271 };
2272 }
2273 else
2274 {
2275 alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction);
2276 Init_R700ALUInstruction(alu_instruction_ptr);
2277 }
2278
2279 //src 0
2280 current_source_index = 0;
2281 pcurrent_source = &(pAsm->S[0].src);
2282
2283 if (GL_FALSE == assemble_alu_src(alu_instruction_ptr,
2284 current_source_index,
2285 pcurrent_source,
2286 scalar_channel_index) )
2287 {
2288 return GL_FALSE;
2289 }
2290
2291 if (uNumSrc > 1)
2292 {
2293 // Process source 1
2294 current_source_index = 1;
2295 pcurrent_source = &(pAsm->S[current_source_index].src);
2296
2297 if (GL_FALSE == assemble_alu_src(alu_instruction_ptr,
2298 current_source_index,
2299 pcurrent_source,
2300 scalar_channel_index) )
2301 {
2302 return GL_FALSE;
2303 }
2304 }
2305
2306 //other bits
2307 alu_instruction_ptr->m_Word0.f.index_mode = pAsm->D2.dst2.index_mode;
2308
2309 if( (is_single_scalar_operation == GL_TRUE)
2310 || (GL_TRUE == bSplitInst) )
2311 {
2312 alu_instruction_ptr->m_Word0.f.last = 1;
2313 }
2314 else
2315 {
2316 alu_instruction_ptr->m_Word0.f.last = (scalar_channel_index == 3) ? 1 : 0;
2317 }
2318
2319 alu_instruction_ptr->m_Word0.f.pred_sel = (pAsm->D.dst.pred_inv > 0) ? 1 : 0;
2320 if(1 == pAsm->D.dst.predicated)
2321 {
2322 alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x1;
2323 alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x1;
2324 }
2325 else
2326 {
2327 alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0;
2328 alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0;
2329 }
2330
2331 // dst
2332 if( (pAsm->D.dst.rtype == DST_REG_TEMPORARY) ||
2333 (pAsm->D.dst.rtype == DST_REG_OUT) )
2334 {
2335 alu_instruction_ptr->m_Word1.f.dst_gpr = pAsm->D.dst.reg;
2336 }
2337 else
2338 {
2339 radeon_error("Only temp destination registers supported for ALU dest regs.\n");
2340 return GL_FALSE;
2341 }
2342
2343 alu_instruction_ptr->m_Word1.f.dst_rel = SQ_ABSOLUTE; //D.rtype
2344
2345 if ( is_single_scalar_operation == GL_TRUE )
2346 {
2347 // Override scalar_channel_index since only one scalar value will be written
2348 if(pAsm->D.dst.writex)
2349 {
2350 scalar_channel_index = 0;
2351 }
2352 else if(pAsm->D.dst.writey)
2353 {
2354 scalar_channel_index = 1;
2355 }
2356 else if(pAsm->D.dst.writez)
2357 {
2358 scalar_channel_index = 2;
2359 }
2360 else if(pAsm->D.dst.writew)
2361 {
2362 scalar_channel_index = 3;
2363 }
2364 }
2365
2366 alu_instruction_ptr->m_Word1.f.dst_chan = scalar_channel_index;
2367
2368 alu_instruction_ptr->m_Word1.f.clamp = pAsm->D2.dst2.SaturateMode;
2369
2370 if (pAsm->D.dst.op3)
2371 {
2372 //op3
2373
2374 alu_instruction_ptr->m_Word1_OP3.f.alu_inst = pAsm->D.dst.opcode;
2375
2376 //There's 3rd src for op3
2377 current_source_index = 2;
2378 pcurrent_source = &(pAsm->S[current_source_index].src);
2379
2380 if ( GL_FALSE == assemble_alu_src(alu_instruction_ptr,
2381 current_source_index,
2382 pcurrent_source,
2383 scalar_channel_index) )
2384 {
2385 return GL_FALSE;
2386 }
2387 }
2388 else
2389 {
2390 //op2
2391 if (pAsm->bR6xx)
2392 {
2393 alu_instruction_ptr->m_Word1_OP2.f6.alu_inst = pAsm->D.dst.opcode;
2394
2395 alu_instruction_ptr->m_Word1_OP2.f6.src0_abs = pAsm->S[0].src.abs;
2396 alu_instruction_ptr->m_Word1_OP2.f6.src1_abs = pAsm->S[1].src.abs;
2397
2398 //alu_instruction_ptr->m_Word1_OP2.f6.update_execute_mask = 0x0;
2399 //alu_instruction_ptr->m_Word1_OP2.f6.update_pred = 0x0;
2400 switch (scalar_channel_index)
2401 {
2402 case 0:
2403 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writex;
2404 break;
2405 case 1:
2406 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writey;
2407 break;
2408 case 2:
2409 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writez;
2410 break;
2411 case 3:
2412 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writew;
2413 break;
2414 default:
2415 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = 1; //SQ_SEL_MASK;
2416 break;
2417 }
2418 alu_instruction_ptr->m_Word1_OP2.f6.omod = SQ_ALU_OMOD_OFF;
2419 }
2420 else
2421 {
2422 alu_instruction_ptr->m_Word1_OP2.f.alu_inst = pAsm->D.dst.opcode;
2423
2424 alu_instruction_ptr->m_Word1_OP2.f.src0_abs = pAsm->S[0].src.abs;
2425 alu_instruction_ptr->m_Word1_OP2.f.src1_abs = pAsm->S[1].src.abs;
2426
2427 //alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0;
2428 //alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0;
2429 switch (scalar_channel_index)
2430 {
2431 case 0:
2432 alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writex;
2433 break;
2434 case 1:
2435 alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writey;
2436 break;
2437 case 2:
2438 alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writez;
2439 break;
2440 case 3:
2441 alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writew;
2442 break;
2443 default:
2444 alu_instruction_ptr->m_Word1_OP2.f.write_mask = 1; //SQ_SEL_MASK;
2445 break;
2446 }
2447 alu_instruction_ptr->m_Word1_OP2.f.omod = SQ_ALU_OMOD_OFF;
2448 }
2449 }
2450
2451 if(GL_FALSE == add_alu_instruction(pAsm, alu_instruction_ptr, contiguous_slots_needed) )
2452 {
2453 return GL_FALSE;
2454 }
2455
2456 /*
2457 * Judge the type of current instruction, is it vector or scalar
2458 * instruction.
2459 */
2460 if (is_single_scalar_operation)
2461 {
2462 if(GL_FALSE == check_scalar(pAsm, alu_instruction_ptr) )
2463 {
2464 return GL_FALSE;
2465 }
2466 }
2467 else
2468 {
2469 if(GL_FALSE == check_vector(pAsm, alu_instruction_ptr) )
2470 {
2471 return GL_FALSE;
2472 }
2473 }
2474
2475 contiguous_slots_needed -= 1;
2476 }
2477
2478 return GL_TRUE;
2479 }
2480
2481 GLboolean next_ins(r700_AssemblerBase *pAsm)
2482 {
2483 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
2484
2485 if( GL_TRUE == pAsm->is_tex )
2486 {
2487 if (pILInst->TexSrcTarget == TEXTURE_RECT_INDEX) {
2488 if( GL_FALSE == assemble_tex_instruction(pAsm, GL_FALSE) )
2489 {
2490 radeon_error("Error assembling TEX instruction\n");
2491 return GL_FALSE;
2492 }
2493 } else {
2494 if( GL_FALSE == assemble_tex_instruction(pAsm, GL_TRUE) )
2495 {
2496 radeon_error("Error assembling TEX instruction\n");
2497 return GL_FALSE;
2498 }
2499 }
2500 }
2501 else
2502 { //ALU
2503 if( GL_FALSE == assemble_alu_instruction(pAsm) )
2504 {
2505 radeon_error("Error assembling ALU instruction\n");
2506 return GL_FALSE;
2507 }
2508 }
2509
2510 if(pAsm->D.dst.rtype == DST_REG_OUT)
2511 {
2512 if(pAsm->D.dst.op3)
2513 {
2514 // There is no mask for OP3 instructions, so all channels are written
2515 pAsm->pucOutMask[pAsm->D.dst.reg - pAsm->starting_export_register_number] = 0xF;
2516 }
2517 else
2518 {
2519 pAsm->pucOutMask[pAsm->D.dst.reg - pAsm->starting_export_register_number]
2520 |= (unsigned char)pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask;
2521 }
2522 }
2523
2524 //reset for next inst.
2525 pAsm->D.bits = 0;
2526 pAsm->D2.bits = 0;
2527 pAsm->S[0].bits = 0;
2528 pAsm->S[1].bits = 0;
2529 pAsm->S[2].bits = 0;
2530 pAsm->is_tex = GL_FALSE;
2531 pAsm->need_tex_barrier = GL_FALSE;
2532 pAsm->D2.bits = 0;
2533 pAsm->C[0].bits = pAsm->C[1].bits = pAsm->C[2].bits = pAsm->C[3].bits = 0;
2534 return GL_TRUE;
2535 }
2536
2537 GLboolean assemble_math_function(r700_AssemblerBase* pAsm, BITS opcode)
2538 {
2539 BITS tmp;
2540
2541 checkop1(pAsm);
2542
2543 tmp = gethelpr(pAsm);
2544
2545 // opcode tmp.x, a.x
2546 // MOV dst, tmp.x
2547
2548 pAsm->D.dst.opcode = opcode;
2549 pAsm->D.dst.math = 1;
2550
2551 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
2552 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
2553 pAsm->D.dst.reg = tmp;
2554 pAsm->D.dst.writex = 1;
2555
2556 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
2557 {
2558 return GL_FALSE;
2559 }
2560
2561 if ( GL_FALSE == next_ins(pAsm) )
2562 {
2563 return GL_FALSE;
2564 }
2565
2566 // Now replicate result to all necessary channels in destination
2567 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
2568
2569 if( GL_FALSE == assemble_dst(pAsm) )
2570 {
2571 return GL_FALSE;
2572 }
2573
2574 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
2575 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
2576 pAsm->S[0].src.reg = tmp;
2577
2578 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
2579 noneg_PVSSRC(&(pAsm->S[0].src));
2580
2581 if( GL_FALSE == next_ins(pAsm) )
2582 {
2583 return GL_FALSE;
2584 }
2585
2586 return GL_TRUE;
2587 }
2588
2589 GLboolean assemble_ABS(r700_AssemblerBase *pAsm)
2590 {
2591 checkop1(pAsm);
2592
2593 pAsm->D.dst.opcode = SQ_OP2_INST_MAX;
2594
2595 if( GL_FALSE == assemble_dst(pAsm) )
2596 {
2597 return GL_FALSE;
2598 }
2599 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
2600 {
2601 return GL_FALSE;
2602 }
2603
2604 pAsm->S[1].bits = pAsm->S[0].bits;
2605 flipneg_PVSSRC(&(pAsm->S[1].src));
2606
2607 if ( GL_FALSE == next_ins(pAsm) )
2608 {
2609 return GL_FALSE;
2610 }
2611
2612 return GL_TRUE;
2613 }
2614
2615 GLboolean assemble_ADD(r700_AssemblerBase *pAsm)
2616 {
2617 if( GL_FALSE == checkop2(pAsm) )
2618 {
2619 return GL_FALSE;
2620 }
2621
2622 pAsm->D.dst.opcode = SQ_OP2_INST_ADD;
2623
2624 if( GL_FALSE == assemble_dst(pAsm) )
2625 {
2626 return GL_FALSE;
2627 }
2628
2629 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
2630 {
2631 return GL_FALSE;
2632 }
2633
2634 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
2635 {
2636 return GL_FALSE;
2637 }
2638
2639 if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_SUB)
2640 {
2641 flipneg_PVSSRC(&(pAsm->S[1].src));
2642 }
2643
2644 if( GL_FALSE == next_ins(pAsm) )
2645 {
2646 return GL_FALSE;
2647 }
2648
2649 return GL_TRUE;
2650 }
2651
2652 GLboolean assemble_ARL(r700_AssemblerBase *pAsm)
2653 { /* TODO: ar values dont' persist between clauses */
2654 if( GL_FALSE == checkop1(pAsm) )
2655 {
2656 return GL_FALSE;
2657 }
2658
2659 pAsm->D.dst.opcode = SQ_OP2_INST_MOVA_FLOOR;
2660 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
2661 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
2662 pAsm->D.dst.reg = 0;
2663 pAsm->D.dst.writex = 0;
2664 pAsm->D.dst.writey = 0;
2665 pAsm->D.dst.writez = 0;
2666 pAsm->D.dst.writew = 0;
2667
2668 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
2669 {
2670 return GL_FALSE;
2671 }
2672
2673 if( GL_FALSE == next_ins(pAsm) )
2674 {
2675 return GL_FALSE;
2676 }
2677
2678 return GL_TRUE;
2679 }
2680
2681 GLboolean assemble_BAD(char *opcode_str)
2682 {
2683 radeon_error("Not yet implemented instruction (%s)\n", opcode_str);
2684 return GL_FALSE;
2685 }
2686
2687 GLboolean assemble_CMP(r700_AssemblerBase *pAsm)
2688 {
2689 int tmp;
2690
2691 if( GL_FALSE == checkop3(pAsm) )
2692 {
2693 return GL_FALSE;
2694 }
2695
2696 pAsm->D.dst.opcode = SQ_OP3_INST_CNDGE;
2697 pAsm->D.dst.op3 = 1;
2698
2699 tmp = (-1);
2700
2701 if(0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask)
2702 {
2703 //OP3 has no support for write mask
2704 tmp = gethelpr(pAsm);
2705
2706 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
2707 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
2708 pAsm->D.dst.reg = tmp;
2709
2710 nomask_PVSDST(&(pAsm->D.dst));
2711 }
2712 else
2713 {
2714 if( GL_FALSE == assemble_dst(pAsm) )
2715 {
2716 return GL_FALSE;
2717 }
2718 }
2719
2720 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
2721 {
2722 return GL_FALSE;
2723 }
2724
2725 if( GL_FALSE == assemble_src(pAsm, 2, 1) )
2726 {
2727 return GL_FALSE;
2728 }
2729
2730 if( GL_FALSE == assemble_src(pAsm, 1, 2) )
2731 {
2732 return GL_FALSE;
2733 }
2734
2735 if ( GL_FALSE == next_ins(pAsm) )
2736 {
2737 return GL_FALSE;
2738 }
2739
2740 if (0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask)
2741 {
2742 if( GL_FALSE == assemble_dst(pAsm) )
2743 {
2744 return GL_FALSE;
2745 }
2746
2747 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
2748
2749 //tmp for source
2750 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
2751 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
2752 pAsm->S[0].src.reg = tmp;
2753
2754 noneg_PVSSRC(&(pAsm->S[0].src));
2755 noswizzle_PVSSRC(&(pAsm->S[0].src));
2756
2757 if( GL_FALSE == next_ins(pAsm) )
2758 {
2759 return GL_FALSE;
2760 }
2761 }
2762
2763 return GL_TRUE;
2764 }
2765
2766 GLboolean assemble_TRIG(r700_AssemblerBase *pAsm, BITS opcode)
2767 {
2768 int tmp;
2769 checkop1(pAsm);
2770
2771 tmp = gethelpr(pAsm);
2772
2773 pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
2774 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
2775 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
2776 pAsm->D.dst.reg = tmp;
2777 pAsm->D.dst.writex = 1;
2778
2779 assemble_src(pAsm, 0, -1);
2780
2781 pAsm->S[1].src.rtype = SRC_REC_LITERAL;
2782 setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X);
2783 pAsm->D2.dst2.literal_slots = 1;
2784 pAsm->C[0].f = 1/(3.1415926535 * 2);
2785 pAsm->C[1].f = 0.0F;
2786 next_ins(pAsm);
2787
2788 pAsm->D.dst.opcode = opcode;
2789 pAsm->D.dst.math = 1;
2790
2791 assemble_dst(pAsm);
2792
2793 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
2794 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
2795 pAsm->S[0].src.reg = tmp;
2796 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
2797 noneg_PVSSRC(&(pAsm->S[0].src));
2798
2799 next_ins(pAsm);
2800
2801 //TODO - replicate if more channels set in WriteMask
2802 return GL_TRUE;
2803
2804 }
2805
2806 GLboolean assemble_DOT(r700_AssemblerBase *pAsm)
2807 {
2808 if( GL_FALSE == checkop2(pAsm) )
2809 {
2810 return GL_FALSE;
2811 }
2812
2813 pAsm->D.dst.opcode = SQ_OP2_INST_DOT4;
2814
2815 if( GL_FALSE == assemble_dst(pAsm) )
2816 {
2817 return GL_FALSE;
2818 }
2819
2820 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
2821 {
2822 return GL_FALSE;
2823 }
2824
2825 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
2826 {
2827 return GL_FALSE;
2828 }
2829
2830 if(OPCODE_DP3 == pAsm->pILInst[pAsm->uiCurInst].Opcode)
2831 {
2832 zerocomp_PVSSRC(&(pAsm->S[0].src), 3);
2833 zerocomp_PVSSRC(&(pAsm->S[1].src), 3);
2834 }
2835 else if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_DPH)
2836 {
2837 onecomp_PVSSRC(&(pAsm->S[0].src), 3);
2838 }
2839
2840 if ( GL_FALSE == next_ins(pAsm) )
2841 {
2842 return GL_FALSE;
2843 }
2844
2845 return GL_TRUE;
2846 }
2847
2848 GLboolean assemble_DST(r700_AssemblerBase *pAsm)
2849 {
2850 if( GL_FALSE == checkop2(pAsm) )
2851 {
2852 return GL_FALSE;
2853 }
2854
2855 pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
2856
2857 if( GL_FALSE == assemble_dst(pAsm) )
2858 {
2859 return GL_FALSE;
2860 }
2861
2862 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
2863 {
2864 return GL_FALSE;
2865 }
2866
2867 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
2868 {
2869 return GL_FALSE;
2870 }
2871
2872 onecomp_PVSSRC(&(pAsm->S[0].src), 0);
2873 onecomp_PVSSRC(&(pAsm->S[0].src), 3);
2874
2875 onecomp_PVSSRC(&(pAsm->S[1].src), 0);
2876 onecomp_PVSSRC(&(pAsm->S[1].src), 2);
2877
2878 if ( GL_FALSE == next_ins(pAsm) )
2879 {
2880 return GL_FALSE;
2881 }
2882
2883 return GL_TRUE;
2884 }
2885
2886 GLboolean assemble_EX2(r700_AssemblerBase *pAsm)
2887 {
2888 return assemble_math_function(pAsm, SQ_OP2_INST_EXP_IEEE);
2889 }
2890
2891 GLboolean assemble_EXP(r700_AssemblerBase *pAsm)
2892 {
2893 BITS tmp;
2894
2895 checkop1(pAsm);
2896
2897 tmp = gethelpr(pAsm);
2898
2899 // FLOOR tmp.x, a.x
2900 // EX2 dst.x tmp.x
2901
2902 if (pAsm->pILInst->DstReg.WriteMask & 0x1) {
2903 pAsm->D.dst.opcode = SQ_OP2_INST_FLOOR;
2904
2905 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
2906 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
2907 pAsm->D.dst.reg = tmp;
2908 pAsm->D.dst.writex = 1;
2909
2910 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
2911 {
2912 return GL_FALSE;
2913 }
2914
2915 if( GL_FALSE == next_ins(pAsm) )
2916 {
2917 return GL_FALSE;
2918 }
2919
2920 pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
2921 pAsm->D.dst.math = 1;
2922
2923 if( GL_FALSE == assemble_dst(pAsm) )
2924 {
2925 return GL_FALSE;
2926 }
2927
2928 pAsm->D.dst.writey = pAsm->D.dst.writez = pAsm->D.dst.writew = 0;
2929
2930 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
2931 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
2932 pAsm->S[0].src.reg = tmp;
2933
2934 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
2935 noneg_PVSSRC(&(pAsm->S[0].src));
2936
2937 if( GL_FALSE == next_ins(pAsm) )
2938 {
2939 return GL_FALSE;
2940 }
2941 }
2942
2943 // FRACT dst.y a.x
2944
2945 if ((pAsm->pILInst->DstReg.WriteMask >> 1) & 0x1) {
2946 pAsm->D.dst.opcode = SQ_OP2_INST_FRACT;
2947
2948 if( GL_FALSE == assemble_dst(pAsm) )
2949 {
2950 return GL_FALSE;
2951 }
2952
2953 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
2954 {
2955 return GL_FALSE;
2956 }
2957
2958 pAsm->D.dst.writex = pAsm->D.dst.writez = pAsm->D.dst.writew = 0;
2959
2960 if( GL_FALSE == next_ins(pAsm) )
2961 {
2962 return GL_FALSE;
2963 }
2964 }
2965
2966 // EX2 dst.z, a.x
2967
2968 if ((pAsm->pILInst->DstReg.WriteMask >> 2) & 0x1) {
2969 pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
2970 pAsm->D.dst.math = 1;
2971
2972 if( GL_FALSE == assemble_dst(pAsm) )
2973 {
2974 return GL_FALSE;
2975 }
2976
2977 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
2978 {
2979 return GL_FALSE;
2980 }
2981
2982 pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writew = 0;
2983
2984 if( GL_FALSE == next_ins(pAsm) )
2985 {
2986 return GL_FALSE;
2987 }
2988 }
2989
2990 // MOV dst.w 1.0
2991
2992 if ((pAsm->pILInst->DstReg.WriteMask >> 3) & 0x1) {
2993 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
2994
2995 if( GL_FALSE == assemble_dst(pAsm) )
2996 {
2997 return GL_FALSE;
2998 }
2999
3000 pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writez = 0;
3001
3002 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3003 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3004 pAsm->S[0].src.reg = tmp;
3005
3006 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_1);
3007 noneg_PVSSRC(&(pAsm->S[0].src));
3008
3009 if( GL_FALSE == next_ins(pAsm) )
3010 {
3011 return GL_FALSE;
3012 }
3013 }
3014
3015 return GL_TRUE;
3016 }
3017
3018 GLboolean assemble_FLR(r700_AssemblerBase *pAsm)
3019 {
3020 checkop1(pAsm);
3021
3022 pAsm->D.dst.opcode = SQ_OP2_INST_FLOOR;
3023
3024 if ( GL_FALSE == assemble_dst(pAsm) )
3025 {
3026 return GL_FALSE;
3027 }
3028
3029 if ( GL_FALSE == assemble_src(pAsm, 0, -1) )
3030 {
3031 return GL_FALSE;
3032 }
3033
3034 if ( GL_FALSE == next_ins(pAsm) )
3035 {
3036 return GL_FALSE;
3037 }
3038
3039 return GL_TRUE;
3040 }
3041
3042 GLboolean assemble_FLR_INT(r700_AssemblerBase *pAsm)
3043 {
3044 return assemble_math_function(pAsm, SQ_OP2_INST_FLT_TO_INT);
3045 }
3046
3047 GLboolean assemble_FRC(r700_AssemblerBase *pAsm)
3048 {
3049 checkop1(pAsm);
3050
3051 pAsm->D.dst.opcode = SQ_OP2_INST_FRACT;
3052
3053 if ( GL_FALSE == assemble_dst(pAsm) )
3054 {
3055 return GL_FALSE;
3056 }
3057
3058 if ( GL_FALSE == assemble_src(pAsm, 0, -1) )
3059 {
3060 return GL_FALSE;
3061 }
3062
3063 if ( GL_FALSE == next_ins(pAsm) )
3064 {
3065 return GL_FALSE;
3066 }
3067
3068 return GL_TRUE;
3069 }
3070
3071 GLboolean assemble_KIL(r700_AssemblerBase *pAsm, GLuint opcode)
3072 {
3073 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
3074
3075 if(pILInst->Opcode == OPCODE_KIL)
3076 checkop1(pAsm);
3077
3078 pAsm->D.dst.opcode = opcode;
3079 //pAsm->D.dst.math = 1;
3080
3081 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3082 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3083 pAsm->D.dst.reg = 0;
3084 pAsm->D.dst.writex = 0;
3085 pAsm->D.dst.writey = 0;
3086 pAsm->D.dst.writez = 0;
3087 pAsm->D.dst.writew = 0;
3088
3089 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3090 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3091 pAsm->S[0].src.reg = 0;
3092 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_0);
3093 noneg_PVSSRC(&(pAsm->S[0].src));
3094
3095 if(pILInst->Opcode == OPCODE_KIL_NV)
3096 {
3097 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
3098 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
3099 pAsm->S[1].src.reg = 0;
3100 setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_1);
3101 neg_PVSSRC(&(pAsm->S[1].src));
3102 }
3103 else
3104 {
3105 if( GL_FALSE == assemble_src(pAsm, 0, 1) )
3106 {
3107 return GL_FALSE;
3108 }
3109
3110 }
3111
3112 if ( GL_FALSE == next_ins(pAsm) )
3113 {
3114 return GL_FALSE;
3115 }
3116
3117 /* Doc says KILL has to be last(end) ALU clause */
3118 pAsm->pR700Shader->killIsUsed = GL_TRUE;
3119 pAsm->alu_x_opcode = SQ_CF_INST_ALU;
3120
3121 return GL_TRUE;
3122 }
3123
3124 GLboolean assemble_LG2(r700_AssemblerBase *pAsm)
3125 {
3126 return assemble_math_function(pAsm, SQ_OP2_INST_LOG_IEEE);
3127 }
3128
3129 GLboolean assemble_LRP(r700_AssemblerBase *pAsm)
3130 {
3131 BITS tmp;
3132
3133 if( GL_FALSE == checkop3(pAsm) )
3134 {
3135 return GL_FALSE;
3136 }
3137
3138 tmp = gethelpr(pAsm);
3139
3140 pAsm->D.dst.opcode = SQ_OP2_INST_ADD;
3141
3142 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3143 pAsm->D.dst.reg = tmp;
3144 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3145 nomask_PVSDST(&(pAsm->D.dst));
3146
3147
3148 if( GL_FALSE == assemble_src(pAsm, 1, 0) )
3149 {
3150 return GL_FALSE;
3151 }
3152
3153 if ( GL_FALSE == assemble_src(pAsm, 2, 1) )
3154 {
3155 return GL_FALSE;
3156 }
3157
3158 neg_PVSSRC(&(pAsm->S[1].src));
3159
3160 if( GL_FALSE == next_ins(pAsm) )
3161 {
3162 return GL_FALSE;
3163 }
3164
3165 pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
3166 pAsm->D.dst.op3 = 1;
3167
3168 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3169 pAsm->D.dst.reg = tmp;
3170 nomask_PVSDST(&(pAsm->D.dst));
3171 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3172
3173 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3174 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3175 pAsm->S[0].src.reg = tmp;
3176 noswizzle_PVSSRC(&(pAsm->S[0].src));
3177
3178
3179 if( GL_FALSE == assemble_src(pAsm, 0, 1) )
3180 {
3181 return GL_FALSE;
3182 }
3183
3184 if( GL_FALSE == assemble_src(pAsm, 2, -1) )
3185 {
3186 return GL_FALSE;
3187 }
3188
3189 if( GL_FALSE == next_ins(pAsm) )
3190 {
3191 return GL_FALSE;
3192 }
3193
3194 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3195
3196 if( GL_FALSE == assemble_dst(pAsm) )
3197 {
3198 return GL_FALSE;
3199 }
3200
3201 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3202 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3203 pAsm->S[0].src.reg = tmp;
3204 noswizzle_PVSSRC(&(pAsm->S[0].src));
3205
3206 if( GL_FALSE == next_ins(pAsm) )
3207 {
3208 return GL_FALSE;
3209 }
3210
3211 return GL_TRUE;
3212 }
3213
3214 GLboolean assemble_LOG(r700_AssemblerBase *pAsm)
3215 {
3216 BITS tmp1, tmp2, tmp3;
3217
3218 checkop1(pAsm);
3219
3220 tmp1 = gethelpr(pAsm);
3221 tmp2 = gethelpr(pAsm);
3222 tmp3 = gethelpr(pAsm);
3223
3224 // FIXME: The hardware can do fabs() directly on input
3225 // elements, but the compiler doesn't have the
3226 // capability to use that.
3227
3228 // MAX tmp1.x, a.x, -a.x (fabs(a.x))
3229
3230 pAsm->D.dst.opcode = SQ_OP2_INST_MAX;
3231
3232 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3233 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3234 pAsm->D.dst.reg = tmp1;
3235 pAsm->D.dst.writex = 1;
3236
3237 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3238 {
3239 return GL_FALSE;
3240 }
3241
3242 pAsm->S[1].bits = pAsm->S[0].bits;
3243 flipneg_PVSSRC(&(pAsm->S[1].src));
3244
3245 if ( GL_FALSE == next_ins(pAsm) )
3246 {
3247 return GL_FALSE;
3248 }
3249
3250 // Entire algo:
3251 //
3252 // LG2 tmp2.x, tmp1.x
3253 // FLOOR tmp3.x, tmp2.x
3254 // MOV dst.x, tmp3.x
3255 // ADD tmp3.x, tmp2.x, -tmp3.x
3256 // EX2 dst.y, tmp3.x
3257 // MOV dst.z, tmp2.x
3258 // MOV dst.w, 1.0
3259
3260 // LG2 tmp2.x, tmp1.x
3261 // FLOOR tmp3.x, tmp2.x
3262
3263 pAsm->D.dst.opcode = SQ_OP2_INST_LOG_IEEE;
3264 pAsm->D.dst.math = 1;
3265
3266 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3267 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3268 pAsm->D.dst.reg = tmp2;
3269 pAsm->D.dst.writex = 1;
3270
3271 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3272 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
3273 pAsm->S[0].src.reg = tmp1;
3274
3275 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3276 noneg_PVSSRC(&(pAsm->S[0].src));
3277
3278 if( GL_FALSE == next_ins(pAsm) )
3279 {
3280 return GL_FALSE;
3281 }
3282
3283 pAsm->D.dst.opcode = SQ_OP2_INST_FLOOR;
3284
3285 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3286 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3287 pAsm->D.dst.reg = tmp3;
3288 pAsm->D.dst.writex = 1;
3289
3290 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3291 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
3292 pAsm->S[0].src.reg = tmp2;
3293
3294 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3295 noneg_PVSSRC(&(pAsm->S[0].src));
3296
3297 if( GL_FALSE == next_ins(pAsm) )
3298 {
3299 return GL_FALSE;
3300 }
3301
3302 // MOV dst.x, tmp3.x
3303
3304 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3305
3306 if( GL_FALSE == assemble_dst(pAsm) )
3307 {
3308 return GL_FALSE;
3309 }
3310
3311 pAsm->D.dst.writey = pAsm->D.dst.writez = pAsm->D.dst.writew = 0;
3312
3313 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3314 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
3315 pAsm->S[0].src.reg = tmp3;
3316
3317 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3318 noneg_PVSSRC(&(pAsm->S[0].src));
3319
3320 if( GL_FALSE == next_ins(pAsm) )
3321 {
3322 return GL_FALSE;
3323 }
3324
3325 // ADD tmp3.x, tmp2.x, -tmp3.x
3326 // EX2 dst.y, tmp3.x
3327
3328 pAsm->D.dst.opcode = SQ_OP2_INST_ADD;
3329
3330 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3331 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3332 pAsm->D.dst.reg = tmp3;
3333 pAsm->D.dst.writex = 1;
3334
3335 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3336 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
3337 pAsm->S[0].src.reg = tmp2;
3338
3339 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3340 noneg_PVSSRC(&(pAsm->S[0].src));
3341
3342 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
3343 pAsm->S[1].src.rtype = DST_REG_TEMPORARY;
3344 pAsm->S[1].src.reg = tmp3;
3345
3346 setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X);
3347 neg_PVSSRC(&(pAsm->S[1].src));
3348
3349 if( GL_FALSE == next_ins(pAsm) )
3350 {
3351 return GL_FALSE;
3352 }
3353
3354 pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
3355 pAsm->D.dst.math = 1;
3356
3357 if( GL_FALSE == assemble_dst(pAsm) )
3358 {
3359 return GL_FALSE;
3360 }
3361
3362 pAsm->D.dst.writex = pAsm->D.dst.writez = pAsm->D.dst.writew = 0;
3363
3364 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3365 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
3366 pAsm->S[0].src.reg = tmp3;
3367
3368 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3369 noneg_PVSSRC(&(pAsm->S[0].src));
3370
3371 if( GL_FALSE == next_ins(pAsm) )
3372 {
3373 return GL_FALSE;
3374 }
3375
3376 // MOV dst.z, tmp2.x
3377
3378 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3379
3380 if( GL_FALSE == assemble_dst(pAsm) )
3381 {
3382 return GL_FALSE;
3383 }
3384
3385 pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writew = 0;
3386
3387 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3388 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
3389 pAsm->S[0].src.reg = tmp2;
3390
3391 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3392 noneg_PVSSRC(&(pAsm->S[0].src));
3393
3394 if( GL_FALSE == next_ins(pAsm) )
3395 {
3396 return GL_FALSE;
3397 }
3398
3399 // MOV dst.w 1.0
3400
3401 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3402
3403 if( GL_FALSE == assemble_dst(pAsm) )
3404 {
3405 return GL_FALSE;
3406 }
3407
3408 pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writez = 0;
3409
3410 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3411 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3412 pAsm->S[0].src.reg = tmp1;
3413
3414 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_1);
3415 noneg_PVSSRC(&(pAsm->S[0].src));
3416
3417 if( GL_FALSE == next_ins(pAsm) )
3418 {
3419 return GL_FALSE;
3420 }
3421
3422 return GL_TRUE;
3423 }
3424
3425 GLboolean assemble_MAD(struct r700_AssemblerBase *pAsm)
3426 {
3427 int tmp, ii;
3428 GLboolean bReplaceDst = GL_FALSE;
3429 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
3430
3431 if( GL_FALSE == checkop3(pAsm) )
3432 {
3433 return GL_FALSE;
3434 }
3435
3436 pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
3437 pAsm->D.dst.op3 = 1;
3438
3439 tmp = (-1);
3440
3441 if(PROGRAM_TEMPORARY == pILInst->DstReg.File)
3442 { /* TODO : more investigation on MAD src and dst using same register */
3443 for(ii=0; ii<3; ii++)
3444 {
3445 if( (PROGRAM_TEMPORARY == pILInst->SrcReg[ii].File)
3446 && (pILInst->DstReg.Index == pILInst->SrcReg[ii].Index) )
3447 {
3448 bReplaceDst = GL_TRUE;
3449 break;
3450 }
3451 }
3452 }
3453 if(0xF != pILInst->DstReg.WriteMask)
3454 { /* OP3 has no support for write mask */
3455 bReplaceDst = GL_TRUE;
3456 }
3457
3458 if(GL_TRUE == bReplaceDst)
3459 {
3460 tmp = gethelpr(pAsm);
3461
3462 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3463 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3464 pAsm->D.dst.reg = tmp;
3465
3466 nomask_PVSDST(&(pAsm->D.dst));
3467 }
3468 else
3469 {
3470 if( GL_FALSE == assemble_dst(pAsm) )
3471 {
3472 return GL_FALSE;
3473 }
3474 }
3475
3476 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3477 {
3478 return GL_FALSE;
3479 }
3480
3481 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
3482 {
3483 return GL_FALSE;
3484 }
3485
3486 if( GL_FALSE == assemble_src(pAsm, 2, -1) )
3487 {
3488 return GL_FALSE;
3489 }
3490
3491 if ( GL_FALSE == next_ins(pAsm) )
3492 {
3493 return GL_FALSE;
3494 }
3495
3496 if (GL_TRUE == bReplaceDst)
3497 {
3498 if( GL_FALSE == assemble_dst(pAsm) )
3499 {
3500 return GL_FALSE;
3501 }
3502
3503 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3504
3505 //tmp for source
3506 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3507 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3508 pAsm->S[0].src.reg = tmp;
3509
3510 noneg_PVSSRC(&(pAsm->S[0].src));
3511 noswizzle_PVSSRC(&(pAsm->S[0].src));
3512
3513 if( GL_FALSE == next_ins(pAsm) )
3514 {
3515 return GL_FALSE;
3516 }
3517 }
3518
3519 return GL_TRUE;
3520 }
3521
3522 /* LIT dst, src */
3523 GLboolean assemble_LIT(r700_AssemblerBase *pAsm)
3524 {
3525 unsigned int dstReg;
3526 unsigned int dstType;
3527 unsigned int srcReg;
3528 unsigned int srcType;
3529 checkop1(pAsm);
3530 int tmp = gethelpr(pAsm);
3531
3532 if( GL_FALSE == assemble_dst(pAsm) )
3533 {
3534 return GL_FALSE;
3535 }
3536 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3537 {
3538 return GL_FALSE;
3539 }
3540 dstReg = pAsm->D.dst.reg;
3541 dstType = pAsm->D.dst.rtype;
3542 srcReg = pAsm->S[0].src.reg;
3543 srcType = pAsm->S[0].src.rtype;
3544
3545 /* dst.xw, <- 1.0 */
3546 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3547 pAsm->D.dst.rtype = dstType;
3548 pAsm->D.dst.reg = dstReg;
3549 pAsm->D.dst.writex = 1;
3550 pAsm->D.dst.writey = 0;
3551 pAsm->D.dst.writez = 0;
3552 pAsm->D.dst.writew = 1;
3553 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3554 pAsm->S[0].src.reg = tmp;
3555 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3556 noneg_PVSSRC(&(pAsm->S[0].src));
3557 pAsm->S[0].src.swizzlex = SQ_SEL_1;
3558 pAsm->S[0].src.swizzley = SQ_SEL_1;
3559 pAsm->S[0].src.swizzlez = SQ_SEL_1;
3560 pAsm->S[0].src.swizzlew = SQ_SEL_1;
3561 if( GL_FALSE == next_ins(pAsm) )
3562 {
3563 return GL_FALSE;
3564 }
3565
3566 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3567 {
3568 return GL_FALSE;
3569 }
3570
3571 /* dst.y = max(src.x, 0.0) */
3572 pAsm->D.dst.opcode = SQ_OP2_INST_MAX;
3573 pAsm->D.dst.rtype = dstType;
3574 pAsm->D.dst.reg = dstReg;
3575 pAsm->D.dst.writex = 0;
3576 pAsm->D.dst.writey = 1;
3577 pAsm->D.dst.writez = 0;
3578 pAsm->D.dst.writew = 0;
3579 pAsm->S[0].src.rtype = srcType;
3580 pAsm->S[0].src.reg = srcReg;
3581 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3582 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X, SQ_SEL_X, SQ_SEL_X, SQ_SEL_X);
3583 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
3584 pAsm->S[1].src.reg = tmp;
3585 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
3586 noneg_PVSSRC(&(pAsm->S[1].src));
3587 pAsm->S[1].src.swizzlex = SQ_SEL_0;
3588 pAsm->S[1].src.swizzley = SQ_SEL_0;
3589 pAsm->S[1].src.swizzlez = SQ_SEL_0;
3590 pAsm->S[1].src.swizzlew = SQ_SEL_0;
3591 if( GL_FALSE == next_ins(pAsm) )
3592 {
3593 return GL_FALSE;
3594 }
3595
3596 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3597 {
3598 return GL_FALSE;
3599 }
3600
3601 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Y, SQ_SEL_Y, SQ_SEL_Y, SQ_SEL_Y);
3602
3603 /* dst.z = log(src.y) */
3604 pAsm->D.dst.opcode = SQ_OP2_INST_LOG_CLAMPED;
3605 pAsm->D.dst.math = 1;
3606 pAsm->D.dst.rtype = dstType;
3607 pAsm->D.dst.reg = dstReg;
3608 pAsm->D.dst.writex = 0;
3609 pAsm->D.dst.writey = 0;
3610 pAsm->D.dst.writez = 1;
3611 pAsm->D.dst.writew = 0;
3612 pAsm->S[0].src.rtype = srcType;
3613 pAsm->S[0].src.reg = srcReg;
3614 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3615 if( GL_FALSE == next_ins(pAsm) )
3616 {
3617 return GL_FALSE;
3618 }
3619
3620 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3621 {
3622 return GL_FALSE;
3623 }
3624
3625 if( GL_FALSE == assemble_src(pAsm, 0, 2) )
3626 {
3627 return GL_FALSE;
3628 }
3629
3630 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_W, SQ_SEL_W, SQ_SEL_W, SQ_SEL_W);
3631
3632 swizzleagain_PVSSRC(&(pAsm->S[2].src), SQ_SEL_X, SQ_SEL_X, SQ_SEL_X, SQ_SEL_X);
3633
3634 /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
3635 pAsm->D.dst.opcode = SQ_OP3_INST_MUL_LIT;
3636 pAsm->D.dst.math = 1;
3637 pAsm->D.dst.op3 = 1;
3638 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3639 pAsm->D.dst.reg = tmp;
3640 pAsm->D.dst.writex = 1;
3641 pAsm->D.dst.writey = 0;
3642 pAsm->D.dst.writez = 0;
3643 pAsm->D.dst.writew = 0;
3644
3645 pAsm->S[0].src.rtype = srcType;
3646 pAsm->S[0].src.reg = srcReg;
3647 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3648
3649 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
3650 pAsm->S[1].src.reg = dstReg;
3651 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
3652 noneg_PVSSRC(&(pAsm->S[1].src));
3653 pAsm->S[1].src.swizzlex = SQ_SEL_Z;
3654 pAsm->S[1].src.swizzley = SQ_SEL_Z;
3655 pAsm->S[1].src.swizzlez = SQ_SEL_Z;
3656 pAsm->S[1].src.swizzlew = SQ_SEL_Z;
3657
3658 pAsm->S[2].src.rtype = srcType;
3659 pAsm->S[2].src.reg = srcReg;
3660 setaddrmode_PVSSRC(&(pAsm->S[2].src), ADDR_ABSOLUTE);
3661
3662 if( GL_FALSE == next_ins(pAsm) )
3663 {
3664 return GL_FALSE;
3665 }
3666
3667 /* dst.z = exp(tmp.x) */
3668 pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
3669 pAsm->D.dst.math = 1;
3670 pAsm->D.dst.rtype = dstType;
3671 pAsm->D.dst.reg = dstReg;
3672 pAsm->D.dst.writex = 0;
3673 pAsm->D.dst.writey = 0;
3674 pAsm->D.dst.writez = 1;
3675 pAsm->D.dst.writew = 0;
3676
3677 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3678 pAsm->S[0].src.reg = tmp;
3679 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3680 noneg_PVSSRC(&(pAsm->S[0].src));
3681 pAsm->S[0].src.swizzlex = SQ_SEL_X;
3682 pAsm->S[0].src.swizzley = SQ_SEL_X;
3683 pAsm->S[0].src.swizzlez = SQ_SEL_X;
3684 pAsm->S[0].src.swizzlew = SQ_SEL_X;
3685
3686 if( GL_FALSE == next_ins(pAsm) )
3687 {
3688 return GL_FALSE;
3689 }
3690
3691 return GL_TRUE;
3692 }
3693
3694 GLboolean assemble_MAX(r700_AssemblerBase *pAsm)
3695 {
3696 if( GL_FALSE == checkop2(pAsm) )
3697 {
3698 return GL_FALSE;
3699 }
3700
3701 pAsm->D.dst.opcode = SQ_OP2_INST_MAX;
3702
3703 if( GL_FALSE == assemble_dst(pAsm) )
3704 {
3705 return GL_FALSE;
3706 }
3707
3708 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3709 {
3710 return GL_FALSE;
3711 }
3712
3713 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
3714 {
3715 return GL_FALSE;
3716 }
3717
3718 if( GL_FALSE == next_ins(pAsm) )
3719 {
3720 return GL_FALSE;
3721 }
3722
3723 return GL_TRUE;
3724 }
3725
3726 GLboolean assemble_MIN(r700_AssemblerBase *pAsm)
3727 {
3728 if( GL_FALSE == checkop2(pAsm) )
3729 {
3730 return GL_FALSE;
3731 }
3732
3733 pAsm->D.dst.opcode = SQ_OP2_INST_MIN;
3734
3735 if( GL_FALSE == assemble_dst(pAsm) )
3736 {
3737 return GL_FALSE;
3738 }
3739
3740 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3741 {
3742 return GL_FALSE;
3743 }
3744
3745 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
3746 {
3747 return GL_FALSE;
3748 }
3749
3750 if( GL_FALSE == next_ins(pAsm) )
3751 {
3752 return GL_FALSE;
3753 }
3754
3755 return GL_TRUE;
3756 }
3757
3758 GLboolean assemble_MOV(r700_AssemblerBase *pAsm)
3759 {
3760 checkop1(pAsm);
3761
3762 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3763
3764 if (GL_FALSE == assemble_dst(pAsm))
3765 {
3766 return GL_FALSE;
3767 }
3768
3769 if (GL_FALSE == assemble_src(pAsm, 0, -1))
3770 {
3771 return GL_FALSE;
3772 }
3773
3774 if ( GL_FALSE == next_ins(pAsm) )
3775 {
3776 return GL_FALSE;
3777 }
3778
3779 return GL_TRUE;
3780 }
3781
3782 GLboolean assemble_MUL(r700_AssemblerBase *pAsm)
3783 {
3784 if( GL_FALSE == checkop2(pAsm) )
3785 {
3786 return GL_FALSE;
3787 }
3788
3789 pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
3790
3791 if( GL_FALSE == assemble_dst(pAsm) )
3792 {
3793 return GL_FALSE;
3794 }
3795
3796 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3797 {
3798 return GL_FALSE;
3799 }
3800
3801 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
3802 {
3803 return GL_FALSE;
3804 }
3805
3806 if( GL_FALSE == next_ins(pAsm) )
3807 {
3808 return GL_FALSE;
3809 }
3810
3811 return GL_TRUE;
3812 }
3813
3814 GLboolean assemble_POW(r700_AssemblerBase *pAsm)
3815 {
3816 BITS tmp;
3817
3818 checkop1(pAsm);
3819
3820 tmp = gethelpr(pAsm);
3821
3822 // LG2 tmp.x, a.swizzle
3823 pAsm->D.dst.opcode = SQ_OP2_INST_LOG_IEEE;
3824 pAsm->D.dst.math = 1;
3825
3826 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3827 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3828 pAsm->D.dst.reg = tmp;
3829 nomask_PVSDST(&(pAsm->D.dst));
3830
3831 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3832 {
3833 return GL_FALSE;
3834 }
3835
3836 if( GL_FALSE == next_ins(pAsm) )
3837 {
3838 return GL_FALSE;
3839 }
3840
3841 // MUL tmp.x, tmp.x, b.swizzle
3842 pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
3843
3844 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3845 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3846 pAsm->D.dst.reg = tmp;
3847 nomask_PVSDST(&(pAsm->D.dst));
3848
3849 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3850 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3851 pAsm->S[0].src.reg = tmp;
3852 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3853 noneg_PVSSRC(&(pAsm->S[0].src));
3854
3855 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
3856 {
3857 return GL_FALSE;
3858 }
3859
3860 if( GL_FALSE == next_ins(pAsm) )
3861 {
3862 return GL_FALSE;
3863 }
3864
3865 // EX2 dst.mask, tmp.x
3866 // EX2 tmp.x, tmp.x
3867 pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
3868 pAsm->D.dst.math = 1;
3869
3870 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3871 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3872 pAsm->D.dst.reg = tmp;
3873 nomask_PVSDST(&(pAsm->D.dst));
3874
3875 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3876 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3877 pAsm->S[0].src.reg = tmp;
3878 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3879 noneg_PVSSRC(&(pAsm->S[0].src));
3880
3881 if( GL_FALSE == next_ins(pAsm) )
3882 {
3883 return GL_FALSE;
3884 }
3885
3886 // Now replicate result to all necessary channels in destination
3887 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3888
3889 if( GL_FALSE == assemble_dst(pAsm) )
3890 {
3891 return GL_FALSE;
3892 }
3893
3894 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3895 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
3896 pAsm->S[0].src.reg = tmp;
3897
3898 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3899 noneg_PVSSRC(&(pAsm->S[0].src));
3900
3901 if( GL_FALSE == next_ins(pAsm) )
3902 {
3903 return GL_FALSE;
3904 }
3905
3906 return GL_TRUE;
3907 }
3908
3909 GLboolean assemble_RCP(r700_AssemblerBase *pAsm)
3910 {
3911 return assemble_math_function(pAsm, SQ_OP2_INST_RECIP_IEEE);
3912 }
3913
3914 GLboolean assemble_RSQ(r700_AssemblerBase *pAsm)
3915 {
3916 return assemble_math_function(pAsm, SQ_OP2_INST_RECIPSQRT_IEEE);
3917 }
3918
3919 GLboolean assemble_SCS(r700_AssemblerBase *pAsm)
3920 {
3921 BITS tmp;
3922
3923 checkop1(pAsm);
3924
3925 tmp = gethelpr(pAsm);
3926
3927 // COS tmp.x, a.x
3928 pAsm->D.dst.opcode = SQ_OP2_INST_COS;
3929 pAsm->D.dst.math = 1;
3930
3931 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3932 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3933 pAsm->D.dst.reg = tmp;
3934 pAsm->D.dst.writex = 1;
3935
3936 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3937 {
3938 return GL_FALSE;
3939 }
3940
3941 if ( GL_FALSE == next_ins(pAsm) )
3942 {
3943 return GL_FALSE;
3944 }
3945
3946 // SIN tmp.y, a.x
3947 pAsm->D.dst.opcode = SQ_OP2_INST_SIN;
3948 pAsm->D.dst.math = 1;
3949
3950 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3951 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3952 pAsm->D.dst.reg = tmp;
3953 pAsm->D.dst.writey = 1;
3954
3955 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3956 {
3957 return GL_FALSE;
3958 }
3959
3960 if( GL_FALSE == next_ins(pAsm) )
3961 {
3962 return GL_FALSE;
3963 }
3964
3965 // MOV dst.mask, tmp
3966 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3967
3968 if( GL_FALSE == assemble_dst(pAsm) )
3969 {
3970 return GL_FALSE;
3971 }
3972
3973 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3974 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
3975 pAsm->S[0].src.reg = tmp;
3976
3977 noswizzle_PVSSRC(&(pAsm->S[0].src));
3978 pAsm->S[0].src.swizzlez = SQ_SEL_0;
3979 pAsm->S[0].src.swizzlew = SQ_SEL_0;
3980
3981 if ( GL_FALSE == next_ins(pAsm) )
3982 {
3983 return GL_FALSE;
3984 }
3985
3986 return GL_TRUE;
3987 }
3988
3989 GLboolean assemble_LOGIC(r700_AssemblerBase *pAsm, BITS opcode)
3990 {
3991 if( GL_FALSE == checkop2(pAsm) )
3992 {
3993 return GL_FALSE;
3994 }
3995
3996 pAsm->D.dst.opcode = opcode;
3997 //pAsm->D.dst.math = 1;
3998
3999 if( GL_FALSE == assemble_dst(pAsm) )
4000 {
4001 return GL_FALSE;
4002 }
4003
4004 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4005 {
4006 return GL_FALSE;
4007 }
4008
4009 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
4010 {
4011 return GL_FALSE;
4012 }
4013
4014 if( GL_FALSE == next_ins(pAsm) )
4015 {
4016 return GL_FALSE;
4017 }
4018
4019 return GL_TRUE;
4020 }
4021
4022 GLboolean assemble_LOGIC_PRED(r700_AssemblerBase *pAsm, BITS opcode)
4023 {
4024 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
4025
4026 pAsm->D.dst.opcode = opcode;
4027 pAsm->D.dst.math = 1;
4028 pAsm->D.dst.predicated = 1;
4029
4030 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4031 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4032 pAsm->D.dst.reg = pAsm->uHelpReg;
4033 pAsm->D.dst.writex = 1;
4034 pAsm->D.dst.writey = pAsm->D.dst.writez = pAsm->D.dst.writew = 0;
4035
4036 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4037 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4038 pAsm->S[0].src.reg = pAsm->last_cond_register + pAsm->starting_temp_register_number;
4039 pAsm->S[0].src.swizzlex = pILInst->DstReg.CondSwizzle & 0x7;
4040 noneg_PVSSRC(&(pAsm->S[0].src));
4041
4042 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
4043 pAsm->S[1].src.reg = pAsm->uHelpReg;
4044 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
4045 noneg_PVSSRC(&(pAsm->S[1].src));
4046 pAsm->S[1].src.swizzlex = SQ_SEL_0;
4047 pAsm->S[1].src.swizzley = SQ_SEL_0;
4048 pAsm->S[1].src.swizzlez = SQ_SEL_0;
4049 pAsm->S[1].src.swizzlew = SQ_SEL_0;
4050
4051 if( GL_FALSE == next_ins(pAsm) )
4052 {
4053 return GL_FALSE;
4054 }
4055
4056 return GL_TRUE;
4057 }
4058
4059 GLboolean assemble_SGE(r700_AssemblerBase *pAsm)
4060 {
4061 if( GL_FALSE == checkop2(pAsm) )
4062 {
4063 return GL_FALSE;
4064 }
4065
4066 pAsm->D.dst.opcode = SQ_OP2_INST_SETGE;
4067
4068 if( GL_FALSE == assemble_dst(pAsm) )
4069 {
4070 return GL_FALSE;
4071 }
4072
4073 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4074 {
4075 return GL_FALSE;
4076 }
4077
4078 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
4079 {
4080 return GL_FALSE;
4081 }
4082
4083 if( GL_FALSE == next_ins(pAsm) )
4084 {
4085 return GL_FALSE;
4086 }
4087
4088 return GL_TRUE;
4089 }
4090
4091 GLboolean assemble_SLT(r700_AssemblerBase *pAsm)
4092 {
4093 if( GL_FALSE == checkop2(pAsm) )
4094 {
4095 return GL_FALSE;
4096 }
4097
4098 pAsm->D.dst.opcode = SQ_OP2_INST_SETGT;
4099
4100 if( GL_FALSE == assemble_dst(pAsm) )
4101 {
4102 return GL_FALSE;
4103 }
4104
4105 if( GL_FALSE == assemble_src(pAsm, 0, 1) )
4106 {
4107 return GL_FALSE;
4108 }
4109
4110 if( GL_FALSE == assemble_src(pAsm, 1, 0) )
4111 {
4112 return GL_FALSE;
4113 }
4114
4115 if( GL_FALSE == next_ins(pAsm) )
4116 {
4117 return GL_FALSE;
4118 }
4119
4120 return GL_TRUE;
4121 }
4122
4123 GLboolean assemble_STP(r700_AssemblerBase *pAsm)
4124 {
4125 return GL_TRUE;
4126 }
4127
4128 GLboolean assemble_TEX(r700_AssemblerBase *pAsm)
4129 {
4130 GLboolean src_const;
4131 GLboolean need_barrier = GL_FALSE;
4132
4133 checkop1(pAsm);
4134
4135 switch (pAsm->pILInst[pAsm->uiCurInst].SrcReg[0].File)
4136 {
4137 case PROGRAM_UNIFORM:
4138 case PROGRAM_CONSTANT:
4139 case PROGRAM_LOCAL_PARAM:
4140 case PROGRAM_ENV_PARAM:
4141 case PROGRAM_STATE_VAR:
4142 src_const = GL_TRUE;
4143 break;
4144 case PROGRAM_TEMPORARY:
4145 case PROGRAM_INPUT:
4146 default:
4147 src_const = GL_FALSE;
4148 break;
4149 }
4150
4151 if (GL_TRUE == src_const)
4152 {
4153 if ( GL_FALSE == mov_temp(pAsm, 0) )
4154 return GL_FALSE;
4155 need_barrier = GL_TRUE;
4156 }
4157
4158 if (pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_TXP)
4159 {
4160 GLuint tmp = gethelpr(pAsm);
4161 pAsm->D.dst.opcode = SQ_OP2_INST_RECIP_IEEE;
4162 pAsm->D.dst.math = 1;
4163 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4164 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4165 pAsm->D.dst.reg = tmp;
4166 pAsm->D.dst.writew = 1;
4167
4168 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4169 {
4170 return GL_FALSE;
4171 }
4172 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_W, SQ_SEL_W, SQ_SEL_W, SQ_SEL_W);
4173 if( GL_FALSE == next_ins(pAsm) )
4174 {
4175 return GL_FALSE;
4176 }
4177
4178 pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
4179 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4180 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4181 pAsm->D.dst.reg = tmp;
4182 pAsm->D.dst.writex = 1;
4183 pAsm->D.dst.writey = 1;
4184 pAsm->D.dst.writez = 1;
4185 pAsm->D.dst.writew = 0;
4186
4187 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4188 {
4189 return GL_FALSE;
4190 }
4191 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
4192 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
4193 pAsm->S[1].src.reg = tmp;
4194 setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_W);
4195
4196 if( GL_FALSE == next_ins(pAsm) )
4197 {
4198 return GL_FALSE;
4199 }
4200
4201 pAsm->aArgSubst[1] = tmp;
4202 need_barrier = GL_TRUE;
4203 }
4204
4205 if (pAsm->pILInst[pAsm->uiCurInst].TexSrcTarget == TEXTURE_CUBE_INDEX )
4206 {
4207 GLuint tmp1 = gethelpr(pAsm);
4208 GLuint tmp2 = gethelpr(pAsm);
4209
4210 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
4211 pAsm->D.dst.opcode = SQ_OP2_INST_CUBE;
4212 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4213 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4214 pAsm->D.dst.reg = tmp1;
4215 nomask_PVSDST(&(pAsm->D.dst));
4216
4217 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4218 {
4219 return GL_FALSE;
4220 }
4221
4222 if( GL_FALSE == assemble_src(pAsm, 0, 1) )
4223 {
4224 return GL_FALSE;
4225 }
4226
4227 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Z, SQ_SEL_Z, SQ_SEL_X, SQ_SEL_Y);
4228 swizzleagain_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Y, SQ_SEL_X, SQ_SEL_Z, SQ_SEL_Z);
4229
4230 if( GL_FALSE == next_ins(pAsm) )
4231 {
4232 return GL_FALSE;
4233 }
4234
4235 /* tmp1.z = RCP_e(|tmp1.z|) */
4236 pAsm->D.dst.opcode = SQ_OP2_INST_RECIP_IEEE;
4237 pAsm->D.dst.math = 1;
4238 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4239 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4240 pAsm->D.dst.reg = tmp1;
4241 pAsm->D.dst.writez = 1;
4242
4243 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4244 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4245 pAsm->S[0].src.reg = tmp1;
4246 pAsm->S[0].src.swizzlex = SQ_SEL_Z;
4247 pAsm->S[0].src.abs = 1;
4248
4249 next_ins(pAsm);
4250
4251 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x
4252 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x
4253 * muladd has no writemask, have to use another temp
4254 */
4255 pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
4256 pAsm->D.dst.op3 = 1;
4257 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4258 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4259 pAsm->D.dst.reg = tmp2;
4260
4261 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4262 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4263 pAsm->S[0].src.reg = tmp1;
4264 noswizzle_PVSSRC(&(pAsm->S[0].src));
4265 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
4266 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
4267 pAsm->S[1].src.reg = tmp1;
4268 setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Z);
4269 setaddrmode_PVSSRC(&(pAsm->S[2].src), ADDR_ABSOLUTE);
4270 /* immediate c 1.5 */
4271 pAsm->D2.dst2.literal_slots = 1;
4272 pAsm->C[0].f = 1.5F;
4273 pAsm->S[2].src.rtype = SRC_REC_LITERAL;
4274 pAsm->S[2].src.reg = tmp1;
4275 setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_X);
4276
4277 next_ins(pAsm);
4278
4279 /* tmp1.xy = temp2.xy */
4280 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
4281 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4282 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4283 pAsm->D.dst.reg = tmp1;
4284 pAsm->D.dst.writex = 1;
4285 pAsm->D.dst.writey = 1;
4286 pAsm->D.dst.writez = 0;
4287 pAsm->D.dst.writew = 0;
4288
4289 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4290 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4291 pAsm->S[0].src.reg = tmp2;
4292 noswizzle_PVSSRC(&(pAsm->S[0].src));
4293
4294 next_ins(pAsm);
4295 pAsm->aArgSubst[1] = tmp1;
4296 need_barrier = GL_TRUE;
4297
4298 }
4299
4300 if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_TXB)
4301 {
4302 pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE_L;
4303 }
4304 else
4305 {
4306 pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE;
4307 }
4308
4309 pAsm->is_tex = GL_TRUE;
4310 if ( GL_TRUE == need_barrier )
4311
4312 pAsm->is_tex = GL_TRUE;
4313 if ( GL_TRUE == need_barrier )
4314 {
4315 pAsm->need_tex_barrier = GL_TRUE;
4316 }
4317 // Set src1 to tex unit id
4318 pAsm->S[1].src.reg = pAsm->SamplerUnits[pAsm->pILInst[pAsm->uiCurInst].TexSrcUnit];
4319 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
4320
4321 //No sw info from mesa compiler, so hard code here.
4322 pAsm->S[1].src.swizzlex = SQ_SEL_X;
4323 pAsm->S[1].src.swizzley = SQ_SEL_Y;
4324 pAsm->S[1].src.swizzlez = SQ_SEL_Z;
4325 pAsm->S[1].src.swizzlew = SQ_SEL_W;
4326
4327 if( GL_FALSE == tex_dst(pAsm) )
4328 {
4329 return GL_FALSE;
4330 }
4331
4332 if( GL_FALSE == tex_src(pAsm) )
4333 {
4334 return GL_FALSE;
4335 }
4336
4337 if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_TXP)
4338 {
4339 /* hopefully did swizzles before */
4340 noswizzle_PVSSRC(&(pAsm->S[0].src));
4341 }
4342
4343 if(pAsm->pILInst[pAsm->uiCurInst].TexSrcTarget == TEXTURE_CUBE_INDEX)
4344 {
4345 /* SAMPLE dst, tmp.yxwy, CUBE */
4346 pAsm->S[0].src.swizzlex = SQ_SEL_Y;
4347 pAsm->S[0].src.swizzley = SQ_SEL_X;
4348 pAsm->S[0].src.swizzlez = SQ_SEL_W;
4349 pAsm->S[0].src.swizzlew = SQ_SEL_Y;
4350 }
4351
4352 if ( GL_FALSE == next_ins(pAsm) )
4353 {
4354 return GL_FALSE;
4355 }
4356
4357 return GL_TRUE;
4358 }
4359
4360 GLboolean assemble_XPD(r700_AssemblerBase *pAsm)
4361 {
4362 BITS tmp;
4363
4364 if( GL_FALSE == checkop2(pAsm) )
4365 {
4366 return GL_FALSE;
4367 }
4368
4369 tmp = gethelpr(pAsm);
4370
4371 pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
4372
4373 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4374 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4375 pAsm->D.dst.reg = tmp;
4376 nomask_PVSDST(&(pAsm->D.dst));
4377
4378 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4379 {
4380 return GL_FALSE;
4381 }
4382
4383 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
4384 {
4385 return GL_FALSE;
4386 }
4387
4388 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Z, SQ_SEL_X, SQ_SEL_Y, SQ_SEL_0);
4389 swizzleagain_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Y, SQ_SEL_Z, SQ_SEL_X, SQ_SEL_0);
4390
4391 if( GL_FALSE == next_ins(pAsm) )
4392 {
4393 return GL_FALSE;
4394 }
4395
4396 pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
4397 pAsm->D.dst.op3 = 1;
4398
4399 if(0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask)
4400 {
4401 tmp = gethelpr(pAsm);
4402
4403 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4404 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4405 pAsm->D.dst.reg = tmp;
4406
4407 nomask_PVSDST(&(pAsm->D.dst));
4408 }
4409 else
4410 {
4411 if( GL_FALSE == assemble_dst(pAsm) )
4412 {
4413 return GL_FALSE;
4414 }
4415 }
4416
4417 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4418 {
4419 return GL_FALSE;
4420 }
4421
4422 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
4423 {
4424 return GL_FALSE;
4425 }
4426
4427 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Y, SQ_SEL_Z, SQ_SEL_X, SQ_SEL_0);
4428 swizzleagain_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Z, SQ_SEL_X, SQ_SEL_Y, SQ_SEL_0);
4429
4430 // result1 + (neg) result0
4431 setaddrmode_PVSSRC(&(pAsm->S[2].src),ADDR_ABSOLUTE);
4432 pAsm->S[2].src.rtype = SRC_REG_TEMPORARY;
4433 pAsm->S[2].src.reg = tmp;
4434
4435 neg_PVSSRC(&(pAsm->S[2].src));
4436 noswizzle_PVSSRC(&(pAsm->S[2].src));
4437
4438 if( GL_FALSE == next_ins(pAsm) )
4439 {
4440 return GL_FALSE;
4441 }
4442
4443
4444 if(0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask)
4445 {
4446 if( GL_FALSE == assemble_dst(pAsm) )
4447 {
4448 return GL_FALSE;
4449 }
4450
4451 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
4452
4453 // Use tmp as source
4454 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4455 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4456 pAsm->S[0].src.reg = tmp;
4457
4458 noneg_PVSSRC(&(pAsm->S[0].src));
4459 noswizzle_PVSSRC(&(pAsm->S[0].src));
4460
4461 if( GL_FALSE == next_ins(pAsm) )
4462 {
4463 return GL_FALSE;
4464 }
4465 }
4466
4467 return GL_TRUE;
4468 }
4469
4470 GLboolean assemble_EXPORT(r700_AssemblerBase *pAsm)
4471 {
4472 return GL_TRUE;
4473 }
4474
4475 static inline void decreaseCurrent(r700_AssemblerBase *pAsm, GLuint uReason)
4476 {
4477 switch (uReason)
4478 {
4479 case FC_PUSH_VPM:
4480 pAsm->CALLSTACK[pAsm->CALLSP].current--;
4481 break;
4482 case FC_PUSH_WQM:
4483 pAsm->CALLSTACK[pAsm->CALLSP].current -= 4;
4484 break;
4485 case FC_LOOP:
4486 pAsm->CALLSTACK[pAsm->CALLSP].current -= 4;
4487 break;
4488 case FC_REP:
4489 /* TODO : for 16 vp asic, should -= 2; */
4490 pAsm->CALLSTACK[pAsm->CALLSP].current -= 1;
4491 break;
4492 };
4493 }
4494
4495 static inline void checkStackDepth(r700_AssemblerBase *pAsm, GLuint uReason, GLboolean bCheckMaxOnly)
4496 {
4497 if(GL_TRUE == bCheckMaxOnly)
4498 {
4499 switch (uReason)
4500 {
4501 case FC_PUSH_VPM:
4502 if((pAsm->CALLSTACK[pAsm->CALLSP].current + 1)
4503 > pAsm->CALLSTACK[pAsm->CALLSP].max)
4504 {
4505 pAsm->CALLSTACK[pAsm->CALLSP].max =
4506 pAsm->CALLSTACK[pAsm->CALLSP].current + 1;
4507 }
4508 break;
4509 case FC_PUSH_WQM:
4510 if((pAsm->CALLSTACK[pAsm->CALLSP].current + 4)
4511 > pAsm->CALLSTACK[pAsm->CALLSP].max)
4512 {
4513 pAsm->CALLSTACK[pAsm->CALLSP].max =
4514 pAsm->CALLSTACK[pAsm->CALLSP].current + 4;
4515 }
4516 break;
4517 }
4518 return;
4519 }
4520
4521 switch (uReason)
4522 {
4523 case FC_PUSH_VPM:
4524 pAsm->CALLSTACK[pAsm->CALLSP].current++;
4525 break;
4526 case FC_PUSH_WQM:
4527 pAsm->CALLSTACK[pAsm->CALLSP].current += 4;
4528 break;
4529 case FC_LOOP:
4530 pAsm->CALLSTACK[pAsm->CALLSP].current += 4;
4531 break;
4532 case FC_REP:
4533 /* TODO : for 16 vp asic, should += 2; */
4534 pAsm->CALLSTACK[pAsm->CALLSP].current += 1;
4535 break;
4536 };
4537
4538 if(pAsm->CALLSTACK[pAsm->CALLSP].current
4539 > pAsm->CALLSTACK[pAsm->CALLSP].max)
4540 {
4541 pAsm->CALLSTACK[pAsm->CALLSP].max =
4542 pAsm->CALLSTACK[pAsm->CALLSP].current;
4543 }
4544 }
4545
4546 GLboolean jumpToOffest(r700_AssemblerBase *pAsm, GLuint pops, GLint offset)
4547 {
4548 if(GL_FALSE == add_cf_instruction(pAsm) )
4549 {
4550 return GL_FALSE;
4551 }
4552
4553 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = pops;
4554 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
4555 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
4556
4557 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
4558 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
4559 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_JUMP;
4560 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
4561
4562 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
4563
4564 pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + offset;
4565
4566 return GL_TRUE;
4567 }
4568
4569 GLboolean pops(r700_AssemblerBase *pAsm, GLuint pops)
4570 {
4571 if(GL_FALSE == add_cf_instruction(pAsm) )
4572 {
4573 return GL_FALSE;
4574 }
4575
4576 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = pops;
4577 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
4578 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
4579
4580 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
4581 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
4582 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_POP;
4583
4584 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
4585
4586 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
4587 pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1;
4588
4589 return GL_TRUE;
4590 }
4591
4592 GLboolean assemble_IF(r700_AssemblerBase *pAsm, GLboolean bHasElse)
4593 {
4594 pAsm->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
4595
4596 assemble_LOGIC_PRED(pAsm, SQ_OP2_INST_PRED_SETNE);
4597
4598
4599 if(GL_FALSE == add_cf_instruction(pAsm) )
4600 {
4601 return GL_FALSE;
4602 }
4603
4604 if(GL_TRUE != bHasElse)
4605 {
4606 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
4607 }
4608 else
4609 {
4610 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0;
4611 }
4612 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
4613 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
4614
4615 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
4616 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
4617 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_JUMP;
4618 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
4619
4620 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
4621
4622 pAsm->FCSP++;
4623 pAsm->fc_stack[pAsm->FCSP].type = FC_IF;
4624 pAsm->fc_stack[pAsm->FCSP].mid = NULL;
4625 pAsm->fc_stack[pAsm->FCSP].midLen= 0;
4626 pAsm->fc_stack[pAsm->FCSP].first = pAsm->cf_current_cf_clause_ptr;
4627
4628 #ifndef USE_CF_FOR_POP_AFTER
4629 if(GL_TRUE != bHasElse)
4630 {
4631 pAsm->alu_x_opcode = SQ_CF_INST_ALU_POP_AFTER;
4632 }
4633 #endif /* USE_CF_FOR_POP_AFTER */
4634
4635 checkStackDepth(pAsm, FC_PUSH_VPM, GL_FALSE);
4636
4637 return GL_TRUE;
4638 }
4639
4640 GLboolean assemble_ELSE(r700_AssemblerBase *pAsm)
4641 {
4642 if(GL_FALSE == add_cf_instruction(pAsm) )
4643 {
4644 return GL_FALSE;
4645 }
4646
4647 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1; ///
4648 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
4649 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
4650
4651 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
4652 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
4653 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_ELSE;
4654 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
4655
4656 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
4657
4658 pAsm->fc_stack[pAsm->FCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc( (void *)pAsm->fc_stack[pAsm->FCSP].mid,
4659 0,
4660 sizeof(R700ControlFlowGenericClause *) );
4661 pAsm->fc_stack[pAsm->FCSP].mid[0] = pAsm->cf_current_cf_clause_ptr;
4662 //pAsm->fc_stack[pAsm->FCSP].unNumMid = 1;
4663
4664 #ifndef USE_CF_FOR_POP_AFTER
4665 pAsm->alu_x_opcode = SQ_CF_INST_ALU_POP_AFTER;
4666 #endif /* USE_CF_FOR_POP_AFTER */
4667
4668 pAsm->fc_stack[pAsm->FCSP].first->m_Word0.f.addr = pAsm->pR700Shader->plstCFInstructions_active->uNumOfNode - 1;
4669
4670 return GL_TRUE;
4671 }
4672
4673 GLboolean assemble_ENDIF(r700_AssemblerBase *pAsm)
4674 {
4675 #ifdef USE_CF_FOR_POP_AFTER
4676 pops(pAsm, 1);
4677 #endif /* USE_CF_FOR_POP_AFTER */
4678
4679 pAsm->alu_x_opcode = SQ_CF_INST_ALU;
4680
4681 if(NULL == pAsm->fc_stack[pAsm->FCSP].mid)
4682 {
4683 /* no else in between */
4684 pAsm->fc_stack[pAsm->FCSP].first->m_Word0.f.addr = pAsm->pR700Shader->plstCFInstructions_active->uNumOfNode;
4685 }
4686 else
4687 {
4688 pAsm->fc_stack[pAsm->FCSP].mid[0]->m_Word0.f.addr = pAsm->pR700Shader->plstCFInstructions_active->uNumOfNode;
4689 }
4690
4691 if(NULL != pAsm->fc_stack[pAsm->FCSP].mid)
4692 {
4693 FREE(pAsm->fc_stack[pAsm->FCSP].mid);
4694 }
4695
4696 if(pAsm->fc_stack[pAsm->FCSP].type != FC_IF)
4697 {
4698 radeon_error("if/endif in shader code are not paired. \n");
4699 return GL_FALSE;
4700 }
4701
4702 pAsm->FCSP--;
4703
4704 decreaseCurrent(pAsm, FC_PUSH_VPM);
4705
4706 return GL_TRUE;
4707 }
4708
4709 GLboolean assemble_BGNLOOP(r700_AssemblerBase *pAsm)
4710 {
4711 if(GL_FALSE == add_cf_instruction(pAsm) )
4712 {
4713 return GL_FALSE;
4714 }
4715
4716
4717 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0;
4718 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
4719 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
4720
4721 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
4722 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
4723 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_START_NO_AL;
4724 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
4725
4726 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
4727
4728 pAsm->FCSP++;
4729 pAsm->fc_stack[pAsm->FCSP].type = FC_LOOP;
4730 pAsm->fc_stack[pAsm->FCSP].mid = NULL;
4731 pAsm->fc_stack[pAsm->FCSP].unNumMid = 0;
4732 pAsm->fc_stack[pAsm->FCSP].midLen = 0;
4733 pAsm->fc_stack[pAsm->FCSP].first = pAsm->cf_current_cf_clause_ptr;
4734
4735 checkStackDepth(pAsm, FC_LOOP, GL_FALSE);
4736
4737 return GL_TRUE;
4738 }
4739
4740 GLboolean assemble_BRK(r700_AssemblerBase *pAsm)
4741 {
4742 #ifdef USE_CF_FOR_CONTINUE_BREAK
4743
4744 pAsm->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
4745
4746 assemble_LOGIC_PRED(pAsm, SQ_OP2_INST_PRED_SETNE);
4747
4748 unsigned int unFCSP;
4749 for(unFCSP=pAsm->FCSP; unFCSP>0; unFCSP--)
4750 {
4751 if(FC_LOOP == pAsm->fc_stack[unFCSP].type)
4752 {
4753 break;
4754 }
4755 }
4756 if(0 == FC_LOOP)
4757 {
4758 radeon_error("Break is not inside loop/endloop pair.\n");
4759 return GL_FALSE;
4760 }
4761
4762 if(GL_FALSE == add_cf_instruction(pAsm) )
4763 {
4764 return GL_FALSE;
4765 }
4766
4767
4768 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
4769 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
4770 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
4771
4772 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
4773 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
4774 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_BREAK;
4775
4776 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
4777
4778 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
4779
4780 pAsm->fc_stack[unFCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc(
4781 (void *)pAsm->fc_stack[unFCSP].mid,
4782 sizeof(R700ControlFlowGenericClause *) * pAsm->fc_stack[unFCSP].unNumMid,
4783 sizeof(R700ControlFlowGenericClause *) * (pAsm->fc_stack[unFCSP].unNumMid + 1) );
4784 pAsm->fc_stack[unFCSP].mid[pAsm->fc_stack[unFCSP].unNumMid] = pAsm->cf_current_cf_clause_ptr;
4785 pAsm->fc_stack[unFCSP].unNumMid++;
4786
4787 if(GL_FALSE == add_cf_instruction(pAsm) )
4788 {
4789 return GL_FALSE;
4790 }
4791
4792 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
4793 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
4794 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
4795
4796 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
4797 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
4798 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_POP;
4799
4800 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
4801
4802 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
4803 pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1;
4804
4805 checkStackDepth(pAsm, FC_PUSH_VPM, GL_TRUE);
4806
4807 #endif //USE_CF_FOR_CONTINUE_BREAK
4808 return GL_TRUE;
4809 }
4810
4811 GLboolean assemble_CONT(r700_AssemblerBase *pAsm)
4812 {
4813 #ifdef USE_CF_FOR_CONTINUE_BREAK
4814 pAsm->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
4815
4816 assemble_LOGIC_PRED(pAsm, SQ_OP2_INST_PRED_SETNE);
4817
4818 unsigned int unFCSP;
4819 for(unFCSP=pAsm->FCSP; unFCSP>0; unFCSP--)
4820 {
4821 if(FC_LOOP == pAsm->fc_stack[unFCSP].type)
4822 {
4823 break;
4824 }
4825 }
4826 if(0 == FC_LOOP)
4827 {
4828 radeon_error("Continue is not inside loop/endloop pair.\n");
4829 return GL_FALSE;
4830 }
4831
4832 if(GL_FALSE == add_cf_instruction(pAsm) )
4833 {
4834 return GL_FALSE;
4835 }
4836
4837
4838 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
4839 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
4840 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
4841
4842 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
4843 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
4844 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_CONTINUE;
4845
4846 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
4847
4848 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
4849
4850 pAsm->fc_stack[unFCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc(
4851 (void *)pAsm->fc_stack[unFCSP].mid,
4852 sizeof(R700ControlFlowGenericClause *) * pAsm->fc_stack[unFCSP].unNumMid,
4853 sizeof(R700ControlFlowGenericClause *) * (pAsm->fc_stack[unFCSP].unNumMid + 1) );
4854 pAsm->fc_stack[unFCSP].mid[pAsm->fc_stack[unFCSP].unNumMid] = pAsm->cf_current_cf_clause_ptr;
4855 pAsm->fc_stack[unFCSP].unNumMid++;
4856
4857 if(GL_FALSE == add_cf_instruction(pAsm) )
4858 {
4859 return GL_FALSE;
4860 }
4861
4862 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
4863 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
4864 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
4865
4866 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
4867 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
4868 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_POP;
4869
4870 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
4871
4872 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
4873 pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1;
4874
4875 checkStackDepth(pAsm, FC_PUSH_VPM, GL_TRUE);
4876
4877 #endif /* USE_CF_FOR_CONTINUE_BREAK */
4878
4879 return GL_TRUE;
4880 }
4881
4882 GLboolean assemble_ENDLOOP(r700_AssemblerBase *pAsm)
4883 {
4884 GLuint i;
4885
4886 if(GL_FALSE == add_cf_instruction(pAsm) )
4887 {
4888 return GL_FALSE;
4889 }
4890
4891
4892 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0;
4893 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
4894 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
4895
4896 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
4897 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
4898 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_END;
4899 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
4900
4901 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
4902
4903 pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->fc_stack[pAsm->FCSP].first->m_uIndex + 1;
4904 pAsm->fc_stack[pAsm->FCSP].first->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1;
4905
4906 #ifdef USE_CF_FOR_CONTINUE_BREAK
4907 for(i=0; i<pAsm->fc_stack[pAsm->FCSP].unNumMid; i++)
4908 {
4909 pAsm->fc_stack[pAsm->FCSP].mid[i]->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex;
4910 }
4911 if(NULL != pAsm->fc_stack[pAsm->FCSP].mid)
4912 {
4913 FREE(pAsm->fc_stack[pAsm->FCSP].mid);
4914 }
4915 #endif
4916
4917 if(pAsm->fc_stack[pAsm->FCSP].type != FC_LOOP)
4918 {
4919 radeon_error("loop/endloop in shader code are not paired. \n");
4920 return GL_FALSE;
4921 }
4922
4923 GLuint unFCSP;
4924 GLuint unIF = 0;
4925 if((pAsm->unCFflags & HAS_CURRENT_LOOPRET) > 0)
4926 {
4927 for(unFCSP=(pAsm->FCSP-1); unFCSP>pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry; unFCSP--)
4928 {
4929 if(FC_LOOP == pAsm->fc_stack[unFCSP].type)
4930 {
4931 breakLoopOnFlag(pAsm, unFCSP);
4932 break;
4933 }
4934 else if(FC_IF == pAsm->fc_stack[unFCSP].type)
4935 {
4936 unIF++;
4937 }
4938 }
4939 if(unFCSP <= pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry)
4940 {
4941 #ifdef USE_CF_FOR_POP_AFTER
4942 returnOnFlag(pAsm, unIF);
4943 #else
4944 returnOnFlag(pAsm, 0);
4945 #endif /* USE_CF_FOR_POP_AFTER */
4946 pAsm->unCFflags &= ~HAS_CURRENT_LOOPRET;
4947 }
4948 }
4949
4950 pAsm->FCSP--;
4951
4952 decreaseCurrent(pAsm, FC_LOOP);
4953
4954 return GL_TRUE;
4955 }
4956
4957 void add_return_inst(r700_AssemblerBase *pAsm)
4958 {
4959 if(GL_FALSE == add_cf_instruction(pAsm) )
4960 {
4961 return GL_FALSE;
4962 }
4963 //pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
4964 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0;
4965 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
4966 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
4967
4968 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
4969 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
4970 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_RETURN;
4971 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
4972
4973 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
4974 }
4975
4976 GLboolean assemble_BGNSUB(r700_AssemblerBase *pAsm, GLint nILindex)
4977 {
4978 /* Put in sub */
4979 if( (pAsm->unSubArrayPointer + 1) > pAsm->unSubArraySize )
4980 {
4981 pAsm->subs = (SUB_OFFSET*)_mesa_realloc( (void *)pAsm->subs,
4982 sizeof(SUB_OFFSET) * pAsm->unSubArraySize,
4983 sizeof(SUB_OFFSET) * (pAsm->unSubArraySize + 10) );
4984 if(NULL == pAsm->subs)
4985 {
4986 return GL_FALSE;
4987 }
4988 pAsm->unSubArraySize += 10;
4989 }
4990
4991 pAsm->subs[pAsm->unSubArrayPointer].subIL_Offset = nILindex;
4992 pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.pHead=NULL;
4993 pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.pTail=NULL;
4994 pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.uNumOfNode=0;
4995
4996 pAsm->CALLSP++;
4997 pAsm->CALLSTACK[pAsm->CALLSP].subDescIndex = pAsm->unSubArrayPointer;
4998 pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry = pAsm->FCSP;
4999 pAsm->CALLSTACK[pAsm->CALLSP].plstCFInstructions_local
5000 = &(pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local);
5001 pAsm->CALLSTACK[pAsm->CALLSP].max = 0;
5002 pAsm->CALLSTACK[pAsm->CALLSP].current = 0;
5003 SetActiveCFlist(pAsm->pR700Shader,
5004 pAsm->CALLSTACK[pAsm->CALLSP].plstCFInstructions_local);
5005
5006 pAsm->unSubArrayPointer++;
5007
5008 /* start sub */
5009 pAsm->alu_x_opcode = SQ_CF_INST_ALU;
5010
5011 pAsm->FCSP++;
5012 pAsm->fc_stack[pAsm->FCSP].type = FC_REP;
5013
5014 checkStackDepth(pAsm, FC_REP, GL_FALSE);
5015
5016 return GL_TRUE;
5017 }
5018
5019 GLboolean assemble_ENDSUB(r700_AssemblerBase *pAsm)
5020 {
5021 if(pAsm->fc_stack[pAsm->FCSP].type != FC_REP)
5022 {
5023 radeon_error("BGNSUB/ENDSUB in shader code are not paired. \n");
5024 return GL_FALSE;
5025 }
5026
5027 /* copy max to sub structure */
5028 pAsm->subs[pAsm->CALLSTACK[pAsm->CALLSP].subDescIndex].unStackDepthMax
5029 = pAsm->CALLSTACK[pAsm->CALLSP].max;
5030
5031 decreaseCurrent(pAsm, FC_REP);
5032
5033 pAsm->CALLSP--;
5034 SetActiveCFlist(pAsm->pR700Shader,
5035 pAsm->CALLSTACK[pAsm->CALLSP].plstCFInstructions_local);
5036
5037 pAsm->alu_x_opcode = SQ_CF_INST_ALU;
5038
5039 pAsm->FCSP--;
5040
5041 return GL_TRUE;
5042 }
5043
5044 GLboolean assemble_RET(r700_AssemblerBase *pAsm)
5045 {
5046 GLuint unIF = 0;
5047
5048 if(pAsm->CALLSP > 0)
5049 { /* in sub */
5050 GLuint unFCSP;
5051 for(unFCSP=pAsm->FCSP; unFCSP>pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry; unFCSP--)
5052 {
5053 if(FC_LOOP == pAsm->fc_stack[unFCSP].type)
5054 {
5055 setRetInLoopFlag(pAsm, SQ_SEL_1);
5056 breakLoopOnFlag(pAsm, unFCSP);
5057 pAsm->unCFflags |= LOOPRET_FLAGS;
5058
5059 return GL_TRUE;
5060 }
5061 else if(FC_IF == pAsm->fc_stack[unFCSP].type)
5062 {
5063 unIF++;
5064 }
5065 }
5066 }
5067
5068 #ifdef USE_CF_FOR_POP_AFTER
5069 if(unIF > 0)
5070 {
5071 pops(pAsm, unIF);
5072 }
5073 #endif /* USE_CF_FOR_POP_AFTER */
5074
5075 add_return_inst(pAsm);
5076
5077 return GL_TRUE;
5078 }
5079
5080 GLboolean assemble_CAL(r700_AssemblerBase *pAsm,
5081 GLint nILindex,
5082 GLuint uiNumberInsts,
5083 struct prog_instruction *pILInst)
5084 {
5085 pAsm->alu_x_opcode = SQ_CF_INST_ALU;
5086
5087 if(GL_FALSE == add_cf_instruction(pAsm) )
5088 {
5089 return GL_FALSE;
5090 }
5091
5092 pAsm->cf_current_cf_clause_ptr->m_Word1.f.call_count = 1;
5093 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0;
5094 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
5095 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
5096
5097 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
5098 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
5099 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_CALL;
5100 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
5101
5102 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
5103
5104 /* Put in caller */
5105 if( (pAsm->unCallerArrayPointer + 1) > pAsm->unCallerArraySize )
5106 {
5107 pAsm->callers = (CALLER_POINTER*)_mesa_realloc( (void *)pAsm->callers,
5108 sizeof(CALLER_POINTER) * pAsm->unCallerArraySize,
5109 sizeof(CALLER_POINTER) * (pAsm->unCallerArraySize + 10) );
5110 if(NULL == pAsm->callers)
5111 {
5112 return GL_FALSE;
5113 }
5114 pAsm->unCallerArraySize += 10;
5115 }
5116
5117 pAsm->callers[pAsm->unCallerArrayPointer].subIL_Offset = nILindex;
5118 pAsm->callers[pAsm->unCallerArrayPointer].cf_ptr = pAsm->cf_current_cf_clause_ptr;
5119
5120 pAsm->unCallerArrayPointer++;
5121
5122 int j;
5123 GLuint max;
5124 GLuint unSubID;
5125 GLboolean bRet;
5126 for(j=0; j<pAsm->unSubArrayPointer; j++)
5127 {
5128 if(nILindex == pAsm->subs[j].subIL_Offset)
5129 { /* compiled before */
5130
5131 max = pAsm->subs[j].unStackDepthMax
5132 + pAsm->CALLSTACK[pAsm->CALLSP].current;
5133 if(max > pAsm->CALLSTACK[pAsm->CALLSP].max)
5134 {
5135 pAsm->CALLSTACK[pAsm->CALLSP].max = max;
5136 }
5137
5138 pAsm->callers[pAsm->unCallerArrayPointer - 1].subDescIndex = j;
5139 return GL_TRUE;
5140 }
5141 }
5142
5143 pAsm->callers[pAsm->unCallerArrayPointer - 1].subDescIndex = pAsm->unSubArrayPointer;
5144 unSubID = pAsm->unSubArrayPointer;
5145
5146 bRet = AssembleInstr(nILindex, uiNumberInsts, pILInst, pAsm);
5147
5148 if(GL_TRUE == bRet)
5149 {
5150 max = pAsm->subs[unSubID].unStackDepthMax
5151 + pAsm->CALLSTACK[pAsm->CALLSP].current;
5152 if(max > pAsm->CALLSTACK[pAsm->CALLSP].max)
5153 {
5154 pAsm->CALLSTACK[pAsm->CALLSP].max = max;
5155 }
5156 }
5157
5158 return bRet;
5159 }
5160
5161 GLboolean setRetInLoopFlag(r700_AssemblerBase *pAsm, GLuint flagValue)
5162 {
5163 GLfloat fLiteral[2] = {0.1, 0.0};
5164
5165 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
5166 pAsm->D.dst.op3 = 0;
5167 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
5168 pAsm->D.dst.reg = pAsm->flag_reg_index;
5169 pAsm->D.dst.writex = 1;
5170 pAsm->D.dst.writey = 0;
5171 pAsm->D.dst.writez = 0;
5172 pAsm->D.dst.writew = 0;
5173 pAsm->D2.dst2.literal_slots = 1;
5174 pAsm->D2.dst2.SaturateMode = SATURATE_OFF;
5175 pAsm->D.dst.predicated = 0;
5176 /* in reloc where dislink flag init inst, only one slot alu inst is handled. */
5177 pAsm->D.dst.math = 1; /* TODO : not math really, but one channel op, more generic alu assembler needed */
5178 pAsm->D2.dst2.index_mode = SQ_INDEX_LOOP; /* Check this ! */
5179 #if 0
5180 pAsm->S[0].src.rtype = SRC_REC_LITERAL;
5181 //pAsm->S[0].src.reg = 0;
5182 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
5183 noneg_PVSSRC(&(pAsm->S[0].src));
5184 pAsm->S[0].src.swizzlex = SQ_SEL_X;
5185 pAsm->S[0].src.swizzley = SQ_SEL_Y;
5186 pAsm->S[0].src.swizzlez = SQ_SEL_Z;
5187 pAsm->S[0].src.swizzlew = SQ_SEL_W;
5188
5189 if( GL_FALSE == next_ins_literal(pAsm, &(fLiteral[0])) )
5190 {
5191 return GL_FALSE;
5192 }
5193 #else
5194 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
5195 pAsm->S[0].src.reg = 0;
5196 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
5197 noneg_PVSSRC(&(pAsm->S[0].src));
5198 pAsm->S[0].src.swizzlex = flagValue;
5199 pAsm->S[0].src.swizzley = flagValue;
5200 pAsm->S[0].src.swizzlez = flagValue;
5201 pAsm->S[0].src.swizzlew = flagValue;
5202
5203 if( GL_FALSE == next_ins(pAsm) )
5204 {
5205 return GL_FALSE;
5206 }
5207 #endif
5208
5209 return GL_TRUE;
5210 }
5211
5212 GLboolean testFlag(r700_AssemblerBase *pAsm)
5213 {
5214 GLfloat fLiteral[2] = {0.1, 0.0};
5215
5216 //Test flag
5217 GLuint tmp = gethelpr(pAsm);
5218 pAsm->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
5219
5220 pAsm->D.dst.opcode = SQ_OP2_INST_PRED_SETE;
5221 pAsm->D.dst.math = 1;
5222 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
5223 pAsm->D.dst.reg = tmp;
5224 pAsm->D.dst.writex = 1;
5225 pAsm->D.dst.writey = 0;
5226 pAsm->D.dst.writez = 0;
5227 pAsm->D.dst.writew = 0;
5228 pAsm->D2.dst2.literal_slots = 1;
5229 pAsm->D2.dst2.SaturateMode = SATURATE_OFF;
5230 pAsm->D.dst.predicated = 1;
5231 pAsm->D2.dst2.index_mode = SQ_INDEX_LOOP; /* Check this ! */
5232
5233 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
5234 pAsm->S[0].src.reg = pAsm->flag_reg_index;
5235 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
5236 noneg_PVSSRC(&(pAsm->S[0].src));
5237 pAsm->S[0].src.swizzlex = SQ_SEL_X;
5238 pAsm->S[0].src.swizzley = SQ_SEL_Y;
5239 pAsm->S[0].src.swizzlez = SQ_SEL_Z;
5240 pAsm->S[0].src.swizzlew = SQ_SEL_W;
5241 #if 0
5242 pAsm->S[1].src.rtype = SRC_REC_LITERAL;
5243 //pAsm->S[1].src.reg = 0;
5244 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
5245 noneg_PVSSRC(&(pAsm->S[1].src));
5246 pAsm->S[1].src.swizzlex = SQ_SEL_X;
5247 pAsm->S[1].src.swizzley = SQ_SEL_Y;
5248 pAsm->S[1].src.swizzlez = SQ_SEL_Z;
5249 pAsm->S[1].src.swizzlew = SQ_SEL_W;
5250
5251 if( GL_FALSE == next_ins_literal(pAsm, &(fLiteral[0])) )
5252 {
5253 return GL_FALSE;
5254 }
5255 #else
5256 pAsm->S[1].src.rtype = DST_REG_TEMPORARY;
5257 pAsm->S[1].src.reg = 0;
5258 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
5259 noneg_PVSSRC(&(pAsm->S[1].src));
5260 pAsm->S[1].src.swizzlex = SQ_SEL_1;
5261 pAsm->S[1].src.swizzley = SQ_SEL_1;
5262 pAsm->S[1].src.swizzlez = SQ_SEL_1;
5263 pAsm->S[1].src.swizzlew = SQ_SEL_1;
5264
5265 if( GL_FALSE == next_ins(pAsm) )
5266 {
5267 return GL_FALSE;
5268 }
5269 #endif
5270
5271 checkStackDepth(pAsm, FC_PUSH_VPM, GL_TRUE);
5272
5273 return GL_TRUE;
5274 }
5275
5276 GLboolean returnOnFlag(r700_AssemblerBase *pAsm, GLuint unIF)
5277 {
5278 testFlag(pAsm);
5279 jumpToOffest(pAsm, 1, 4);
5280 setRetInLoopFlag(pAsm, SQ_SEL_0);
5281 pops(pAsm, unIF + 1);
5282 add_return_inst(pAsm);
5283
5284 return GL_TRUE;
5285 }
5286
5287 GLboolean breakLoopOnFlag(r700_AssemblerBase *pAsm, GLuint unFCSP)
5288 {
5289 testFlag(pAsm);
5290
5291 //break
5292 if(GL_FALSE == add_cf_instruction(pAsm) )
5293 {
5294 return GL_FALSE;
5295 }
5296
5297 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
5298 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
5299 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
5300
5301 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
5302 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
5303 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_BREAK;
5304 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
5305
5306 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
5307
5308 pAsm->fc_stack[unFCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc(
5309 (void *)pAsm->fc_stack[unFCSP].mid,
5310 sizeof(R700ControlFlowGenericClause *) * pAsm->fc_stack[unFCSP].unNumMid,
5311 sizeof(R700ControlFlowGenericClause *) * (pAsm->fc_stack[unFCSP].unNumMid + 1) );
5312 pAsm->fc_stack[unFCSP].mid[pAsm->fc_stack[unFCSP].unNumMid] = pAsm->cf_current_cf_clause_ptr;
5313 pAsm->fc_stack[unFCSP].unNumMid++;
5314
5315 pops(pAsm, 1);
5316
5317 return GL_TRUE;
5318 }
5319
5320 GLboolean AssembleInstr(GLuint uiFirstInst,
5321 GLuint uiNumberInsts,
5322 struct prog_instruction *pILInst,
5323 r700_AssemblerBase *pR700AsmCode)
5324 {
5325 GLuint i;
5326
5327 pR700AsmCode->pILInst = pILInst;
5328 for(i=uiFirstInst; i<uiNumberInsts; i++)
5329 {
5330 pR700AsmCode->uiCurInst = i;
5331
5332 #ifndef USE_CF_FOR_CONTINUE_BREAK
5333 if(OPCODE_BRK == pILInst[i+1].Opcode)
5334 {
5335 switch(pILInst[i].Opcode)
5336 {
5337 case OPCODE_SLE:
5338 pILInst[i].Opcode = OPCODE_SGT;
5339 break;
5340 case OPCODE_SLT:
5341 pILInst[i].Opcode = OPCODE_SGE;
5342 break;
5343 case OPCODE_SGE:
5344 pILInst[i].Opcode = OPCODE_SLT;
5345 break;
5346 case OPCODE_SGT:
5347 pILInst[i].Opcode = OPCODE_SLE;
5348 break;
5349 case OPCODE_SEQ:
5350 pILInst[i].Opcode = OPCODE_SNE;
5351 break;
5352 case OPCODE_SNE:
5353 pILInst[i].Opcode = OPCODE_SEQ;
5354 break;
5355 default:
5356 break;
5357 }
5358 }
5359 #endif
5360 if(pILInst[i].CondUpdate == 1)
5361 {
5362 /* remember dest register used for cond evaluation */
5363 /* XXX also handle PROGRAM_OUTPUT registers here? */
5364 pR700AsmCode->last_cond_register = pILInst[i].DstReg.Index;
5365 }
5366
5367 switch (pILInst[i].Opcode)
5368 {
5369 case OPCODE_ABS:
5370 if ( GL_FALSE == assemble_ABS(pR700AsmCode) )
5371 return GL_FALSE;
5372 break;
5373 case OPCODE_ADD:
5374 case OPCODE_SUB:
5375 if ( GL_FALSE == assemble_ADD(pR700AsmCode) )
5376 return GL_FALSE;
5377 break;
5378
5379 case OPCODE_ARL:
5380 if ( GL_FALSE == assemble_ARL(pR700AsmCode) )
5381 return GL_FALSE;
5382 break;
5383 case OPCODE_ARR:
5384 radeon_error("Not yet implemented instruction OPCODE_ARR \n");
5385 //if ( GL_FALSE == assemble_BAD("ARR") )
5386 return GL_FALSE;
5387 break;
5388
5389 case OPCODE_CMP:
5390 if ( GL_FALSE == assemble_CMP(pR700AsmCode) )
5391 return GL_FALSE;
5392 break;
5393 case OPCODE_COS:
5394 if ( GL_FALSE == assemble_TRIG(pR700AsmCode, SQ_OP2_INST_COS) )
5395 return GL_FALSE;
5396 break;
5397
5398 case OPCODE_DP3:
5399 case OPCODE_DP4:
5400 case OPCODE_DPH:
5401 if ( GL_FALSE == assemble_DOT(pR700AsmCode) )
5402 return GL_FALSE;
5403 break;
5404
5405 case OPCODE_DST:
5406 if ( GL_FALSE == assemble_DST(pR700AsmCode) )
5407 return GL_FALSE;
5408 break;
5409
5410 case OPCODE_EX2:
5411 if ( GL_FALSE == assemble_EX2(pR700AsmCode) )
5412 return GL_FALSE;
5413 break;
5414 case OPCODE_EXP:
5415 if ( GL_FALSE == assemble_EXP(pR700AsmCode) )
5416 return GL_FALSE;
5417 break;
5418
5419 case OPCODE_FLR:
5420 if ( GL_FALSE == assemble_FLR(pR700AsmCode) )
5421 return GL_FALSE;
5422 break;
5423 //case OP_FLR_INT: ;
5424
5425 // if ( GL_FALSE == assemble_FLR_INT() )
5426 // return GL_FALSE;
5427 // break;
5428
5429 case OPCODE_FRC:
5430 if ( GL_FALSE == assemble_FRC(pR700AsmCode) )
5431 return GL_FALSE;
5432 break;
5433
5434 case OPCODE_KIL:
5435 case OPCODE_KIL_NV:
5436 if ( GL_FALSE == assemble_KIL(pR700AsmCode, SQ_OP2_INST_KILLGT) )
5437 return GL_FALSE;
5438 break;
5439 case OPCODE_LG2:
5440 if ( GL_FALSE == assemble_LG2(pR700AsmCode) )
5441 return GL_FALSE;
5442 break;
5443 case OPCODE_LIT:
5444 if ( GL_FALSE == assemble_LIT(pR700AsmCode) )
5445 return GL_FALSE;
5446 break;
5447 case OPCODE_LRP:
5448 if ( GL_FALSE == assemble_LRP(pR700AsmCode) )
5449 return GL_FALSE;
5450 break;
5451 case OPCODE_LOG:
5452 if ( GL_FALSE == assemble_LOG(pR700AsmCode) )
5453 return GL_FALSE;
5454 break;
5455
5456 case OPCODE_MAD:
5457 if ( GL_FALSE == assemble_MAD(pR700AsmCode) )
5458 return GL_FALSE;
5459 break;
5460 case OPCODE_MAX:
5461 if ( GL_FALSE == assemble_MAX(pR700AsmCode) )
5462 return GL_FALSE;
5463 break;
5464 case OPCODE_MIN:
5465 if ( GL_FALSE == assemble_MIN(pR700AsmCode) )
5466 return GL_FALSE;
5467 break;
5468
5469 case OPCODE_MOV:
5470 if ( GL_FALSE == assemble_MOV(pR700AsmCode) )
5471 return GL_FALSE;
5472 break;
5473 case OPCODE_MUL:
5474 if ( GL_FALSE == assemble_MUL(pR700AsmCode) )
5475 return GL_FALSE;
5476 break;
5477
5478 case OPCODE_POW:
5479 if ( GL_FALSE == assemble_POW(pR700AsmCode) )
5480 return GL_FALSE;
5481 break;
5482 case OPCODE_RCP:
5483 if ( GL_FALSE == assemble_RCP(pR700AsmCode) )
5484 return GL_FALSE;
5485 break;
5486 case OPCODE_RSQ:
5487 if ( GL_FALSE == assemble_RSQ(pR700AsmCode) )
5488 return GL_FALSE;
5489 break;
5490 case OPCODE_SIN:
5491 if ( GL_FALSE == assemble_TRIG(pR700AsmCode, SQ_OP2_INST_SIN) )
5492 return GL_FALSE;
5493 break;
5494 case OPCODE_SCS:
5495 if ( GL_FALSE == assemble_SCS(pR700AsmCode) )
5496 return GL_FALSE;
5497 break;
5498
5499 case OPCODE_SEQ:
5500 if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETE) )
5501 {
5502 return GL_FALSE;
5503 }
5504 break;
5505
5506 case OPCODE_SGT:
5507 if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGT) )
5508 {
5509 return GL_FALSE;
5510 }
5511 break;
5512
5513 case OPCODE_SGE:
5514 if ( GL_FALSE == assemble_SGE(pR700AsmCode) )
5515 {
5516 return GL_FALSE;
5517 }
5518 break;
5519
5520 /* NO LT, LE, TODO : use GE => LE, GT => LT : reverse 2 src order would be simpliest. Or use SQ_CF_COND_FALSE for SQ_CF_COND_ACTIVE.*/
5521 case OPCODE_SLT:
5522 {
5523 struct prog_src_register SrcRegSave[2];
5524 SrcRegSave[0] = pILInst[i].SrcReg[0];
5525 SrcRegSave[1] = pILInst[i].SrcReg[1];
5526 pILInst[i].SrcReg[0] = SrcRegSave[1];
5527 pILInst[i].SrcReg[1] = SrcRegSave[0];
5528 if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGT) )
5529 {
5530 pILInst[i].SrcReg[0] = SrcRegSave[0];
5531 pILInst[i].SrcReg[1] = SrcRegSave[1];
5532 return GL_FALSE;
5533 }
5534 pILInst[i].SrcReg[0] = SrcRegSave[0];
5535 pILInst[i].SrcReg[1] = SrcRegSave[1];
5536 }
5537 break;
5538
5539 case OPCODE_SLE:
5540 {
5541 struct prog_src_register SrcRegSave[2];
5542 SrcRegSave[0] = pILInst[i].SrcReg[0];
5543 SrcRegSave[1] = pILInst[i].SrcReg[1];
5544 pILInst[i].SrcReg[0] = SrcRegSave[1];
5545 pILInst[i].SrcReg[1] = SrcRegSave[0];
5546 if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGE) )
5547 {
5548 pILInst[i].SrcReg[0] = SrcRegSave[0];
5549 pILInst[i].SrcReg[1] = SrcRegSave[1];
5550 return GL_FALSE;
5551 }
5552 pILInst[i].SrcReg[0] = SrcRegSave[0];
5553 pILInst[i].SrcReg[1] = SrcRegSave[1];
5554 }
5555 break;
5556
5557 case OPCODE_SNE:
5558 if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETNE) )
5559 {
5560 return GL_FALSE;
5561 }
5562 break;
5563
5564 //case OP_STP:
5565 // if ( GL_FALSE == assemble_STP(pR700AsmCode) )
5566 // return GL_FALSE;
5567 // break;
5568
5569 case OPCODE_SWZ:
5570 if ( GL_FALSE == assemble_MOV(pR700AsmCode) )
5571 {
5572 return GL_FALSE;
5573 }
5574 else
5575 {
5576 if( (i+1)<uiNumberInsts )
5577 {
5578 if(OPCODE_END != pILInst[i+1].Opcode)
5579 {
5580 if( GL_TRUE == IsTex(pILInst[i+1].Opcode) )
5581 {
5582 pR700AsmCode->pInstDeps[i+1].nDstDep = i+1; //=1?
5583 }
5584 }
5585 }
5586 }
5587 break;
5588
5589 case OPCODE_TEX:
5590 case OPCODE_TXB:
5591 case OPCODE_TXP:
5592 if ( GL_FALSE == assemble_TEX(pR700AsmCode) )
5593 return GL_FALSE;
5594 break;
5595
5596 case OPCODE_TRUNC:
5597 if ( GL_FALSE == assemble_math_function(pR700AsmCode, SQ_OP2_INST_TRUNC) )
5598 return GL_FALSE;
5599 break;
5600
5601 case OPCODE_XPD:
5602 if ( GL_FALSE == assemble_XPD(pR700AsmCode) )
5603 return GL_FALSE;
5604 break;
5605
5606 case OPCODE_IF :
5607 {
5608 GLboolean bHasElse = GL_FALSE;
5609
5610 if(pILInst[pILInst[i].BranchTarget - 1].Opcode == OPCODE_ELSE)
5611 {
5612 bHasElse = GL_TRUE;
5613 }
5614
5615 if ( GL_FALSE == assemble_IF(pR700AsmCode, bHasElse) )
5616 {
5617 return GL_FALSE;
5618 }
5619 }
5620 break;
5621
5622 case OPCODE_ELSE :
5623 if ( GL_FALSE == assemble_ELSE(pR700AsmCode) )
5624 return GL_FALSE;
5625 break;
5626
5627 case OPCODE_ENDIF:
5628 if ( GL_FALSE == assemble_ENDIF(pR700AsmCode) )
5629 return GL_FALSE;
5630 break;
5631
5632 case OPCODE_BGNLOOP:
5633 if( GL_FALSE == assemble_BGNLOOP(pR700AsmCode) )
5634 {
5635 return GL_FALSE;
5636 }
5637 break;
5638
5639 case OPCODE_BRK:
5640 if( GL_FALSE == assemble_BRK(pR700AsmCode) )
5641 {
5642 return GL_FALSE;
5643 }
5644 break;
5645
5646 case OPCODE_CONT:
5647 if( GL_FALSE == assemble_CONT(pR700AsmCode) )
5648 {
5649 return GL_FALSE;
5650 }
5651 break;
5652
5653 case OPCODE_ENDLOOP:
5654 if( GL_FALSE == assemble_ENDLOOP(pR700AsmCode) )
5655 {
5656 return GL_FALSE;
5657 }
5658 break;
5659
5660 case OPCODE_BGNSUB:
5661 if( GL_FALSE == assemble_BGNSUB(pR700AsmCode, i) )
5662 {
5663 return GL_FALSE;
5664 }
5665 break;
5666
5667 case OPCODE_RET:
5668 if( GL_FALSE == assemble_RET(pR700AsmCode) )
5669 {
5670 return GL_FALSE;
5671 }
5672 break;
5673
5674 case OPCODE_CAL:
5675 if( GL_FALSE == assemble_CAL(pR700AsmCode,
5676 pILInst[i].BranchTarget,
5677 uiNumberInsts,
5678 pILInst) )
5679 {
5680 return GL_FALSE;
5681 }
5682 break;
5683
5684 //case OPCODE_EXPORT:
5685 // if ( GL_FALSE == assemble_EXPORT() )
5686 // return GL_FALSE;
5687 // break;
5688
5689 case OPCODE_ENDSUB:
5690 return assemble_ENDSUB(pR700AsmCode);
5691
5692 case OPCODE_END:
5693 //pR700AsmCode->uiCurInst = i;
5694 //This is to remaind that if in later exoort there is depth/stencil
5695 //export, we need a mov to re-arrange DST channel, where using a
5696 //psuedo inst, we will use this end inst to do it.
5697 return GL_TRUE;
5698
5699 default:
5700 radeon_error("internal: unknown instruction\n");
5701 return GL_FALSE;
5702 }
5703 }
5704
5705 return GL_TRUE;
5706 }
5707
5708 GLboolean InitShaderProgram(r700_AssemblerBase * pAsm)
5709 {
5710 setRetInLoopFlag(pAsm, SQ_SEL_0);
5711 pAsm->alu_x_opcode = SQ_CF_INST_ALU;
5712 return GL_TRUE;
5713 }
5714
5715 GLboolean RelocProgram(r700_AssemblerBase * pAsm)
5716 {
5717 GLuint i;
5718 GLuint unCFoffset;
5719 TypedShaderList * plstCFmain;
5720 TypedShaderList * plstCFsub;
5721
5722 R700ShaderInstruction * pInst;
5723 R700ControlFlowGenericClause * pCFInst;
5724
5725 plstCFmain = pAsm->CALLSTACK[0].plstCFInstructions_local;
5726
5727 /* remove flags init if they are not used */
5728 if((pAsm->unCFflags & HAS_LOOPRET) == 0)
5729 {
5730 R700ControlFlowALUClause * pCF_ALU;
5731 pInst = plstCFmain->pHead;
5732 while(pInst)
5733 {
5734 if(SIT_CF_ALU == pInst->m_ShaderInstType)
5735 {
5736 pCF_ALU = (R700ControlFlowALUClause *)pInst;
5737 if(0 == pCF_ALU->m_Word1.f.count)
5738 {
5739 pCF_ALU->m_Word1.f.cf_inst = SQ_CF_INST_NOP;
5740 }
5741 else
5742 {
5743 R700ALUInstruction * pALU = pCF_ALU->m_pLinkedALUInstruction;
5744
5745 pALU->m_pLinkedALUClause = NULL;
5746 pALU = (R700ALUInstruction *)(pALU->pNextInst);
5747 pALU->m_pLinkedALUClause = pCF_ALU;
5748 pCF_ALU->m_pLinkedALUInstruction = pALU;
5749
5750 pCF_ALU->m_Word1.f.count--;
5751 }
5752 break;
5753 }
5754 pInst = pInst->pNextInst;
5755 };
5756 }
5757
5758 if(pAsm->CALLSTACK[0].max > 0)
5759 {
5760 pAsm->pR700Shader->uStackSize = ((pAsm->CALLSTACK[0].max + 3)>>2) + 2;
5761 }
5762
5763 if(0 == pAsm->unSubArrayPointer)
5764 {
5765 return GL_TRUE;
5766 }
5767
5768 unCFoffset = plstCFmain->uNumOfNode;
5769
5770 /* Reloc subs */
5771 for(i=0; i<pAsm->unSubArrayPointer; i++)
5772 {
5773 pAsm->subs[i].unCFoffset = unCFoffset;
5774 plstCFsub = &(pAsm->subs[i].lstCFInstructions_local);
5775
5776 pInst = plstCFsub->pHead;
5777
5778 /* reloc instructions */
5779 while(pInst)
5780 {
5781 if(SIT_CF_GENERIC == pInst->m_ShaderInstType)
5782 {
5783 pCFInst = (R700ControlFlowGenericClause *)pInst;
5784
5785 switch (pCFInst->m_Word1.f.cf_inst)
5786 {
5787 case SQ_CF_INST_POP:
5788 case SQ_CF_INST_JUMP:
5789 case SQ_CF_INST_ELSE:
5790 case SQ_CF_INST_LOOP_END:
5791 case SQ_CF_INST_LOOP_START:
5792 case SQ_CF_INST_LOOP_START_NO_AL:
5793 case SQ_CF_INST_LOOP_CONTINUE:
5794 case SQ_CF_INST_LOOP_BREAK:
5795 pCFInst->m_Word0.f.addr += unCFoffset;
5796 break;
5797 default:
5798 break;
5799 }
5800 }
5801
5802 pInst->m_uIndex += unCFoffset;
5803
5804 pInst = pInst->pNextInst;
5805 };
5806
5807 /* Put sub into main */
5808 plstCFmain->pTail->pNextInst = plstCFsub->pHead;
5809 plstCFmain->pTail = plstCFsub->pTail;
5810 plstCFmain->uNumOfNode += plstCFsub->uNumOfNode;
5811
5812 unCFoffset += plstCFsub->uNumOfNode;
5813 }
5814
5815 /* reloc callers */
5816 for(i=0; i<pAsm->unCallerArrayPointer; i++)
5817 {
5818 pAsm->callers[i].cf_ptr->m_Word0.f.addr
5819 = pAsm->subs[pAsm->callers[i].subDescIndex].unCFoffset;
5820 }
5821
5822 return GL_TRUE;
5823 }
5824
5825 GLboolean Process_Export(r700_AssemblerBase* pAsm,
5826 GLuint type,
5827 GLuint export_starting_index,
5828 GLuint export_count,
5829 GLuint starting_register_number,
5830 GLboolean is_depth_export)
5831 {
5832 unsigned char ucWriteMask;
5833
5834 check_current_clause(pAsm, CF_EMPTY_CLAUSE);
5835 check_current_clause(pAsm, CF_EXPORT_CLAUSE); //alloc the cf_current_export_clause_ptr
5836
5837 pAsm->cf_current_export_clause_ptr->m_Word0.f.type = type;
5838
5839 switch (type)
5840 {
5841 case SQ_EXPORT_PIXEL:
5842 if(GL_TRUE == is_depth_export)
5843 {
5844 pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base = SQ_CF_PIXEL_Z;
5845 }
5846 else
5847 {
5848 pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base = SQ_CF_PIXEL_MRT0 + export_starting_index;
5849 }
5850 break;
5851
5852 case SQ_EXPORT_POS:
5853 pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base = SQ_CF_POS_0 + export_starting_index;
5854 break;
5855
5856 case SQ_EXPORT_PARAM:
5857 pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base = 0x0 + export_starting_index;
5858 break;
5859
5860 default:
5861 radeon_error("Unknown export type: %d\n", type);
5862 return GL_FALSE;
5863 break;
5864 }
5865
5866 pAsm->cf_current_export_clause_ptr->m_Word0.f.rw_gpr = starting_register_number;
5867
5868 pAsm->cf_current_export_clause_ptr->m_Word0.f.rw_rel = SQ_ABSOLUTE;
5869 pAsm->cf_current_export_clause_ptr->m_Word0.f.index_gpr = 0x0;
5870 pAsm->cf_current_export_clause_ptr->m_Word0.f.elem_size = 0x3;
5871
5872 pAsm->cf_current_export_clause_ptr->m_Word1.f.burst_count = (export_count - 1);
5873 pAsm->cf_current_export_clause_ptr->m_Word1.f.end_of_program = 0x0;
5874 pAsm->cf_current_export_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
5875 pAsm->cf_current_export_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT; // _DONE
5876 pAsm->cf_current_export_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
5877 pAsm->cf_current_export_clause_ptr->m_Word1.f.barrier = 0x1;
5878
5879 if (export_count == 1)
5880 {
5881 ucWriteMask = pAsm->pucOutMask[starting_register_number - pAsm->starting_export_register_number];
5882 /* exports Z as a float into Red channel */
5883 if (GL_TRUE == is_depth_export)
5884 ucWriteMask = 0x1;
5885
5886 if( (ucWriteMask & 0x1) != 0)
5887 {
5888 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_X;
5889 }
5890 else
5891 {
5892 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_MASK;
5893 }
5894 if( ((ucWriteMask>>1) & 0x1) != 0)
5895 {
5896 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_Y;
5897 }
5898 else
5899 {
5900 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_MASK;
5901 }
5902 if( ((ucWriteMask>>2) & 0x1) != 0)
5903 {
5904 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_Z;
5905 }
5906 else
5907 {
5908 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_MASK;
5909 }
5910 if( ((ucWriteMask>>3) & 0x1) != 0)
5911 {
5912 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_W;
5913 }
5914 else
5915 {
5916 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_MASK;
5917 }
5918 }
5919 else
5920 {
5921 // This should only be used if all components for all registers have been written
5922 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_X;
5923 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_Y;
5924 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_Z;
5925 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_W;
5926 }
5927
5928 pAsm->cf_last_export_ptr = pAsm->cf_current_export_clause_ptr;
5929
5930 return GL_TRUE;
5931 }
5932
5933 GLboolean Move_Depth_Exports_To_Correct_Channels(r700_AssemblerBase *pAsm, BITS depth_channel_select)
5934 {
5935 gl_inst_opcode Opcode_save = pAsm->pILInst[pAsm->uiCurInst].Opcode; //Should be OPCODE_END
5936 pAsm->pILInst[pAsm->uiCurInst].Opcode = OPCODE_MOV;
5937
5938 // MOV depth_export_register.hw_depth_channel, depth_export_register.depth_channel_select
5939
5940 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
5941
5942 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
5943 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
5944 pAsm->D.dst.reg = pAsm->depth_export_register_number;
5945
5946 pAsm->D.dst.writex = 1; // depth goes in R channel for HW
5947
5948 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
5949 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
5950 pAsm->S[0].src.reg = pAsm->depth_export_register_number;
5951
5952 setswizzle_PVSSRC(&(pAsm->S[0].src), depth_channel_select);
5953
5954 noneg_PVSSRC(&(pAsm->S[0].src));
5955
5956 if( GL_FALSE == next_ins(pAsm) )
5957 {
5958 return GL_FALSE;
5959 }
5960
5961 pAsm->pILInst[pAsm->uiCurInst].Opcode = Opcode_save;
5962
5963 return GL_TRUE;
5964 }
5965
5966 GLboolean Process_Fragment_Exports(r700_AssemblerBase *pR700AsmCode,
5967 GLbitfield OutputsWritten)
5968 {
5969 unsigned int unBit;
5970 GLuint export_count = 0;
5971
5972 if(pR700AsmCode->depth_export_register_number >= 0)
5973 {
5974 if( GL_FALSE == Move_Depth_Exports_To_Correct_Channels(pR700AsmCode, SQ_SEL_Z) ) // depth
5975 {
5976 return GL_FALSE;
5977 }
5978 }
5979
5980 unBit = 1 << FRAG_RESULT_COLOR;
5981 if(OutputsWritten & unBit)
5982 {
5983 if( GL_FALSE == Process_Export(pR700AsmCode,
5984 SQ_EXPORT_PIXEL,
5985 0,
5986 1,
5987 pR700AsmCode->uiFP_OutputMap[FRAG_RESULT_COLOR],
5988 GL_FALSE) )
5989 {
5990 return GL_FALSE;
5991 }
5992 export_count++;
5993 }
5994 unBit = 1 << FRAG_RESULT_DEPTH;
5995 if(OutputsWritten & unBit)
5996 {
5997 if( GL_FALSE == Process_Export(pR700AsmCode,
5998 SQ_EXPORT_PIXEL,
5999 0,
6000 1,
6001 pR700AsmCode->uiFP_OutputMap[FRAG_RESULT_DEPTH],
6002 GL_TRUE))
6003 {
6004 return GL_FALSE;
6005 }
6006 export_count++;
6007 }
6008 /* Need to export something, otherwise we'll hang
6009 * results are undefined anyway */
6010 if(export_count == 0)
6011 {
6012 Process_Export(pR700AsmCode, SQ_EXPORT_PIXEL, 0, 1, 0, GL_FALSE);
6013 }
6014
6015 if(pR700AsmCode->cf_last_export_ptr != NULL)
6016 {
6017 pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
6018 pR700AsmCode->cf_last_export_ptr->m_Word1.f.end_of_program = 0x1;
6019 }
6020
6021 return GL_TRUE;
6022 }
6023
6024 GLboolean Process_Vertex_Exports(r700_AssemblerBase *pR700AsmCode,
6025 GLbitfield OutputsWritten)
6026 {
6027 unsigned int unBit;
6028 unsigned int i;
6029
6030 GLuint export_starting_index = 0;
6031 GLuint export_count = pR700AsmCode->number_of_exports;
6032
6033 unBit = 1 << VERT_RESULT_HPOS;
6034 if(OutputsWritten & unBit)
6035 {
6036 if( GL_FALSE == Process_Export(pR700AsmCode,
6037 SQ_EXPORT_POS,
6038 export_starting_index,
6039 1,
6040 pR700AsmCode->ucVP_OutputMap[VERT_RESULT_HPOS],
6041 GL_FALSE) )
6042 {
6043 return GL_FALSE;
6044 }
6045
6046 export_count--;
6047
6048 pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
6049 }
6050
6051 pR700AsmCode->number_of_exports = export_count;
6052
6053 unBit = 1 << VERT_RESULT_COL0;
6054 if(OutputsWritten & unBit)
6055 {
6056 if( GL_FALSE == Process_Export(pR700AsmCode,
6057 SQ_EXPORT_PARAM,
6058 export_starting_index,
6059 1,
6060 pR700AsmCode->ucVP_OutputMap[VERT_RESULT_COL0],
6061 GL_FALSE) )
6062 {
6063 return GL_FALSE;
6064 }
6065
6066 export_starting_index++;
6067 }
6068
6069 unBit = 1 << VERT_RESULT_COL1;
6070 if(OutputsWritten & unBit)
6071 {
6072 if( GL_FALSE == Process_Export(pR700AsmCode,
6073 SQ_EXPORT_PARAM,
6074 export_starting_index,
6075 1,
6076 pR700AsmCode->ucVP_OutputMap[VERT_RESULT_COL1],
6077 GL_FALSE) )
6078 {
6079 return GL_FALSE;
6080 }
6081
6082 export_starting_index++;
6083 }
6084
6085 unBit = 1 << VERT_RESULT_FOGC;
6086 if(OutputsWritten & unBit)
6087 {
6088 if( GL_FALSE == Process_Export(pR700AsmCode,
6089 SQ_EXPORT_PARAM,
6090 export_starting_index,
6091 1,
6092 pR700AsmCode->ucVP_OutputMap[VERT_RESULT_FOGC],
6093 GL_FALSE) )
6094 {
6095 return GL_FALSE;
6096 }
6097
6098 export_starting_index++;
6099 }
6100
6101 for(i=0; i<8; i++)
6102 {
6103 unBit = 1 << (VERT_RESULT_TEX0 + i);
6104 if(OutputsWritten & unBit)
6105 {
6106 if( GL_FALSE == Process_Export(pR700AsmCode,
6107 SQ_EXPORT_PARAM,
6108 export_starting_index,
6109 1,
6110 pR700AsmCode->ucVP_OutputMap[VERT_RESULT_TEX0 + i],
6111 GL_FALSE) )
6112 {
6113 return GL_FALSE;
6114 }
6115
6116 export_starting_index++;
6117 }
6118 }
6119
6120 for(i=VERT_RESULT_VAR0; i<VERT_RESULT_MAX; i++)
6121 {
6122 unBit = 1 << i;
6123 if(OutputsWritten & unBit)
6124 {
6125 if( GL_FALSE == Process_Export(pR700AsmCode,
6126 SQ_EXPORT_PARAM,
6127 export_starting_index,
6128 1,
6129 pR700AsmCode->ucVP_OutputMap[i],
6130 GL_FALSE) )
6131 {
6132 return GL_FALSE;
6133 }
6134
6135 export_starting_index++;
6136 }
6137 }
6138
6139 // At least one param should be exported
6140 if (export_count)
6141 {
6142 pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
6143 }
6144 else
6145 {
6146 if( GL_FALSE == Process_Export(pR700AsmCode,
6147 SQ_EXPORT_PARAM,
6148 0,
6149 1,
6150 pR700AsmCode->starting_export_register_number,
6151 GL_FALSE) )
6152 {
6153 return GL_FALSE;
6154 }
6155
6156 pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_0;
6157 pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_0;
6158 pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_0;
6159 pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_1;
6160 pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
6161 }
6162
6163 pR700AsmCode->cf_last_export_ptr->m_Word1.f.end_of_program = 0x1;
6164
6165 return GL_TRUE;
6166 }
6167
6168 GLboolean Clean_Up_Assembler(r700_AssemblerBase *pR700AsmCode)
6169 {
6170 FREE(pR700AsmCode->pucOutMask);
6171 FREE(pR700AsmCode->pInstDeps);
6172
6173 if(NULL != pR700AsmCode->subs)
6174 {
6175 FREE(pR700AsmCode->subs);
6176 }
6177 if(NULL != pR700AsmCode->callers)
6178 {
6179 FREE(pR700AsmCode->callers);
6180 }
6181
6182 return GL_TRUE;
6183 }
6184