67e0ee774631c681df7701bec1950c8ae6a840c6
[mesa.git] / src / mesa / drivers / dri / r600 / r700_assembler.c
1 /*
2 * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
20 */
21
22 /*
23 * Authors:
24 * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
25 */
26
27 #include <stdio.h>
28 #include <stdarg.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <math.h>
32
33 #include "main/mtypes.h"
34 #include "main/imports.h"
35
36 #include "radeon_debug.h"
37 #include "r600_context.h"
38
39 #include "r700_assembler.h"
40
41 BITS addrmode_PVSDST(PVSDST * pPVSDST)
42 {
43 return pPVSDST->addrmode0 | ((BITS)pPVSDST->addrmode1 << 1);
44 }
45
46 void setaddrmode_PVSDST(PVSDST * pPVSDST, BITS addrmode)
47 {
48 pPVSDST->addrmode0 = addrmode & 1;
49 pPVSDST->addrmode1 = (addrmode >> 1) & 1;
50 }
51
52 void nomask_PVSDST(PVSDST * pPVSDST)
53 {
54 pPVSDST->writex = pPVSDST->writey = pPVSDST->writez = pPVSDST->writew = 1;
55 }
56
57 BITS addrmode_PVSSRC(PVSSRC* pPVSSRC)
58 {
59 return pPVSSRC->addrmode0 | ((BITS)pPVSSRC->addrmode1 << 1);
60 }
61
62 void setaddrmode_PVSSRC(PVSSRC* pPVSSRC, BITS addrmode)
63 {
64 pPVSSRC->addrmode0 = addrmode & 1;
65 pPVSSRC->addrmode1 = (addrmode >> 1) & 1;
66 }
67
68
69 void setswizzle_PVSSRC(PVSSRC* pPVSSRC, BITS swz)
70 {
71 pPVSSRC->swizzlex =
72 pPVSSRC->swizzley =
73 pPVSSRC->swizzlez =
74 pPVSSRC->swizzlew = swz;
75 }
76
77 void noswizzle_PVSSRC(PVSSRC* pPVSSRC)
78 {
79 pPVSSRC->swizzlex = SQ_SEL_X;
80 pPVSSRC->swizzley = SQ_SEL_Y;
81 pPVSSRC->swizzlez = SQ_SEL_Z;
82 pPVSSRC->swizzlew = SQ_SEL_W;
83 }
84
85 void
86 swizzleagain_PVSSRC(PVSSRC * pPVSSRC, BITS x, BITS y, BITS z, BITS w)
87 {
88 switch (x)
89 {
90 case SQ_SEL_X: x = pPVSSRC->swizzlex;
91 break;
92 case SQ_SEL_Y: x = pPVSSRC->swizzley;
93 break;
94 case SQ_SEL_Z: x = pPVSSRC->swizzlez;
95 break;
96 case SQ_SEL_W: x = pPVSSRC->swizzlew;
97 break;
98 default:;
99 }
100
101 switch (y)
102 {
103 case SQ_SEL_X: y = pPVSSRC->swizzlex;
104 break;
105 case SQ_SEL_Y: y = pPVSSRC->swizzley;
106 break;
107 case SQ_SEL_Z: y = pPVSSRC->swizzlez;
108 break;
109 case SQ_SEL_W: y = pPVSSRC->swizzlew;
110 break;
111 default:;
112 }
113
114 switch (z)
115 {
116 case SQ_SEL_X: z = pPVSSRC->swizzlex;
117 break;
118 case SQ_SEL_Y: z = pPVSSRC->swizzley;
119 break;
120 case SQ_SEL_Z: z = pPVSSRC->swizzlez;
121 break;
122 case SQ_SEL_W: z = pPVSSRC->swizzlew;
123 break;
124 default:;
125 }
126
127 switch (w)
128 {
129 case SQ_SEL_X: w = pPVSSRC->swizzlex;
130 break;
131 case SQ_SEL_Y: w = pPVSSRC->swizzley;
132 break;
133 case SQ_SEL_Z: w = pPVSSRC->swizzlez;
134 break;
135 case SQ_SEL_W: w = pPVSSRC->swizzlew;
136 break;
137 default:;
138 }
139
140 pPVSSRC->swizzlex = x;
141 pPVSSRC->swizzley = y;
142 pPVSSRC->swizzlez = z;
143 pPVSSRC->swizzlew = w;
144 }
145
146 void neg_PVSSRC(PVSSRC* pPVSSRC)
147 {
148 pPVSSRC->negx = 1;
149 pPVSSRC->negy = 1;
150 pPVSSRC->negz = 1;
151 pPVSSRC->negw = 1;
152 }
153
154 void noneg_PVSSRC(PVSSRC* pPVSSRC)
155 {
156 pPVSSRC->negx = 0;
157 pPVSSRC->negy = 0;
158 pPVSSRC->negz = 0;
159 pPVSSRC->negw = 0;
160 }
161
162 // negate argument (for SUB instead of ADD and alike)
163 void flipneg_PVSSRC(PVSSRC* pPVSSRC)
164 {
165 pPVSSRC->negx = !pPVSSRC->negx;
166 pPVSSRC->negy = !pPVSSRC->negy;
167 pPVSSRC->negz = !pPVSSRC->negz;
168 pPVSSRC->negw = !pPVSSRC->negw;
169 }
170
171 void zerocomp_PVSSRC(PVSSRC* pPVSSRC, int c)
172 {
173 switch (c)
174 {
175 case 0: pPVSSRC->swizzlex = SQ_SEL_0; pPVSSRC->negx = 0; break;
176 case 1: pPVSSRC->swizzley = SQ_SEL_0; pPVSSRC->negy = 0; break;
177 case 2: pPVSSRC->swizzlez = SQ_SEL_0; pPVSSRC->negz = 0; break;
178 case 3: pPVSSRC->swizzlew = SQ_SEL_0; pPVSSRC->negw = 0; break;
179 default:;
180 }
181 }
182
183 void onecomp_PVSSRC(PVSSRC* pPVSSRC, int c)
184 {
185 switch (c)
186 {
187 case 0: pPVSSRC->swizzlex = SQ_SEL_1; pPVSSRC->negx = 0; break;
188 case 1: pPVSSRC->swizzley = SQ_SEL_1; pPVSSRC->negy = 0; break;
189 case 2: pPVSSRC->swizzlez = SQ_SEL_1; pPVSSRC->negz = 0; break;
190 case 3: pPVSSRC->swizzlew = SQ_SEL_1; pPVSSRC->negw = 0; break;
191 default:;
192 }
193 }
194
195 BITS is_misc_component_exported(VAP_OUT_VTX_FMT_0* pOutVTXFmt0)
196 {
197 return (pOutVTXFmt0->point_size |
198 pOutVTXFmt0->edge_flag |
199 pOutVTXFmt0->rta_index |
200 pOutVTXFmt0->kill_flag |
201 pOutVTXFmt0->viewport_index);
202 }
203
204 BITS is_depth_component_exported(OUT_FRAGMENT_FMT_0* pFPOutFmt)
205 {
206 return (pFPOutFmt->depth |
207 pFPOutFmt->stencil_ref |
208 pFPOutFmt->mask |
209 pFPOutFmt->coverage_to_mask);
210 }
211
212 GLboolean is_reduction_opcode(PVSDWORD* dest)
213 {
214 if (dest->dst.op3 == 0)
215 {
216 if ( (dest->dst.opcode == SQ_OP2_INST_DOT4 || dest->dst.opcode == SQ_OP2_INST_DOT4_IEEE || dest->dst.opcode == SQ_OP2_INST_CUBE) )
217 {
218 return GL_TRUE;
219 }
220 }
221 return GL_FALSE;
222 }
223
224 GLuint GetSurfaceFormat(GLenum eType, GLuint nChannels, GLuint * pClient_size)
225 {
226 GLuint format = FMT_INVALID;
227 GLuint uiElemSize = 0;
228
229 switch (eType)
230 {
231 case GL_BYTE:
232 case GL_UNSIGNED_BYTE:
233 uiElemSize = 1;
234 switch(nChannels)
235 {
236 case 1:
237 format = FMT_8; break;
238 case 2:
239 format = FMT_8_8; break;
240 case 3:
241 format = FMT_8_8_8; break;
242 case 4:
243 format = FMT_8_8_8_8; break;
244 default:
245 break;
246 }
247 break;
248
249 case GL_UNSIGNED_SHORT:
250 case GL_SHORT:
251 uiElemSize = 2;
252 switch(nChannels)
253 {
254 case 1:
255 format = FMT_16; break;
256 case 2:
257 format = FMT_16_16; break;
258 case 3:
259 format = FMT_16_16_16; break;
260 case 4:
261 format = FMT_16_16_16_16; break;
262 default:
263 break;
264 }
265 break;
266
267 case GL_UNSIGNED_INT:
268 case GL_INT:
269 uiElemSize = 4;
270 switch(nChannels)
271 {
272 case 1:
273 format = FMT_32; break;
274 case 2:
275 format = FMT_32_32; break;
276 case 3:
277 format = FMT_32_32_32; break;
278 case 4:
279 format = FMT_32_32_32_32; break;
280 default:
281 break;
282 }
283 break;
284
285 case GL_FLOAT:
286 uiElemSize = 4;
287 switch(nChannels)
288 {
289 case 1:
290 format = FMT_32_FLOAT; break;
291 case 2:
292 format = FMT_32_32_FLOAT; break;
293 case 3:
294 format = FMT_32_32_32_FLOAT; break;
295 case 4:
296 format = FMT_32_32_32_32_FLOAT; break;
297 default:
298 break;
299 }
300 break;
301 case GL_DOUBLE:
302 uiElemSize = 8;
303 switch(nChannels)
304 {
305 case 1:
306 format = FMT_32_FLOAT; break;
307 case 2:
308 format = FMT_32_32_FLOAT; break;
309 case 3:
310 format = FMT_32_32_32_FLOAT; break;
311 case 4:
312 format = FMT_32_32_32_32_FLOAT; break;
313 default:
314 break;
315 }
316 break;
317 default:
318 ;
319 //GL_ASSERT_NO_CASE();
320 }
321
322 if(NULL != pClient_size)
323 {
324 *pClient_size = uiElemSize * nChannels;
325 }
326
327 return(format);
328 }
329
330 unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm)
331 {
332 if(pAsm->D.dst.op3)
333 {
334 return 3;
335 }
336
337 switch (pAsm->D.dst.opcode)
338 {
339 case SQ_OP2_INST_ADD:
340 case SQ_OP2_INST_KILLGT:
341 case SQ_OP2_INST_MUL:
342 case SQ_OP2_INST_MAX:
343 case SQ_OP2_INST_MIN:
344 //case SQ_OP2_INST_MAX_DX10:
345 //case SQ_OP2_INST_MIN_DX10:
346 case SQ_OP2_INST_SETGT:
347 case SQ_OP2_INST_SETGE:
348 case SQ_OP2_INST_PRED_SETE:
349 case SQ_OP2_INST_PRED_SETGT:
350 case SQ_OP2_INST_PRED_SETGE:
351 case SQ_OP2_INST_PRED_SETNE:
352 case SQ_OP2_INST_DOT4:
353 case SQ_OP2_INST_DOT4_IEEE:
354 case SQ_OP2_INST_CUBE:
355 return 2;
356
357 case SQ_OP2_INST_MOV:
358 case SQ_OP2_INST_MOVA_FLOOR:
359 case SQ_OP2_INST_FRACT:
360 case SQ_OP2_INST_FLOOR:
361 case SQ_OP2_INST_EXP_IEEE:
362 case SQ_OP2_INST_LOG_CLAMPED:
363 case SQ_OP2_INST_LOG_IEEE:
364 case SQ_OP2_INST_RECIP_IEEE:
365 case SQ_OP2_INST_RECIPSQRT_IEEE:
366 case SQ_OP2_INST_FLT_TO_INT:
367 case SQ_OP2_INST_SIN:
368 case SQ_OP2_INST_COS:
369 return 1;
370
371 default: radeon_error(
372 "Need instruction operand number for %x.\n", pAsm->D.dst.opcode);
373 };
374
375 return 3;
376 }
377
378 int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt, r700_AssemblerBase* pAsm, R700_Shader* pShader)
379 {
380 GLuint i;
381
382 Init_R700_Shader(pShader);
383 pAsm->pR700Shader = pShader;
384 pAsm->currentShaderType = spt;
385
386 pAsm->cf_last_export_ptr = NULL;
387
388 pAsm->cf_current_export_clause_ptr = NULL;
389 pAsm->cf_current_alu_clause_ptr = NULL;
390 pAsm->cf_current_tex_clause_ptr = NULL;
391 pAsm->cf_current_vtx_clause_ptr = NULL;
392 pAsm->cf_current_cf_clause_ptr = NULL;
393
394 // No clause has been created yet
395 pAsm->cf_current_clause_type = CF_EMPTY_CLAUSE;
396
397 pAsm->number_of_colorandz_exports = 0;
398 pAsm->number_of_exports = 0;
399 pAsm->number_of_export_opcodes = 0;
400
401
402 pAsm->D.bits = 0;
403 pAsm->S[0].bits = 0;
404 pAsm->S[1].bits = 0;
405 pAsm->S[2].bits = 0;
406
407 pAsm->uLastPosUpdate = 0;
408
409 *(BITS *) &pAsm->fp_stOutFmt0 = 0;
410
411 pAsm->uIIns = 0;
412 pAsm->uOIns = 0;
413 pAsm->number_used_registers = 0;
414 pAsm->uUsedConsts = 256;
415
416
417 // Fragment programs
418 pAsm->uBoolConsts = 0;
419 pAsm->uIntConsts = 0;
420 pAsm->uInsts = 0;
421 pAsm->uConsts = 0;
422
423 pAsm->FCSP = 0;
424 pAsm->fc_stack[0].type = FC_NONE;
425
426 pAsm->branch_depth = 0;
427 pAsm->max_branch_depth = 0;
428
429 pAsm->aArgSubst[0] =
430 pAsm->aArgSubst[1] =
431 pAsm->aArgSubst[2] =
432 pAsm->aArgSubst[3] = (-1);
433
434 pAsm->uOutputs = 0;
435
436 for (i=0; i<NUMBER_OF_OUTPUT_COLORS; i++)
437 {
438 pAsm->color_export_register_number[i] = (-1);
439 }
440
441
442 pAsm->depth_export_register_number = (-1);
443 pAsm->stencil_export_register_number = (-1);
444 pAsm->coverage_to_mask_export_register_number = (-1);
445 pAsm->mask_export_register_number = (-1);
446
447 pAsm->starting_export_register_number = 0;
448 pAsm->starting_vfetch_register_number = 0;
449 pAsm->starting_temp_register_number = 0;
450 pAsm->uFirstHelpReg = 0;
451
452
453 pAsm->input_position_is_used = GL_FALSE;
454 pAsm->input_normal_is_used = GL_FALSE;
455
456
457 for (i=0; i<NUMBER_OF_INPUT_COLORS; i++)
458 {
459 pAsm->input_color_is_used[ i ] = GL_FALSE;
460 }
461
462 for (i=0; i<NUMBER_OF_TEXTURE_UNITS; i++)
463 {
464 pAsm->input_texture_unit_is_used[ i ] = GL_FALSE;
465 }
466
467 for (i=0; i<VERT_ATTRIB_MAX; i++)
468 {
469 pAsm->vfetch_instruction_ptr_array[ i ] = NULL;
470 }
471
472 pAsm->number_of_inputs = 0;
473
474 pAsm->is_tex = GL_FALSE;
475 pAsm->need_tex_barrier = GL_FALSE;
476
477 return 0;
478 }
479
480 GLboolean IsTex(gl_inst_opcode Opcode)
481 {
482 if( (OPCODE_TEX==Opcode) || (OPCODE_TXP==Opcode) || (OPCODE_TXB==Opcode) )
483 {
484 return GL_TRUE;
485 }
486 return GL_FALSE;
487 }
488
489 GLboolean IsAlu(gl_inst_opcode Opcode)
490 {
491 //TODO : more for fc and ex for higher spec.
492 if( IsTex(Opcode) )
493 {
494 return GL_FALSE;
495 }
496 return GL_TRUE;
497 }
498
499 int check_current_clause(r700_AssemblerBase* pAsm,
500 CF_CLAUSE_TYPE new_clause_type)
501 {
502 if (pAsm->cf_current_clause_type != new_clause_type)
503 { //Close last open clause
504 switch (pAsm->cf_current_clause_type)
505 {
506 case CF_ALU_CLAUSE:
507 if ( pAsm->cf_current_alu_clause_ptr != NULL)
508 {
509 pAsm->cf_current_alu_clause_ptr = NULL;
510 }
511 break;
512 case CF_VTX_CLAUSE:
513 if ( pAsm->cf_current_vtx_clause_ptr != NULL)
514 {
515 pAsm->cf_current_vtx_clause_ptr = NULL;
516 }
517 break;
518 case CF_TEX_CLAUSE:
519 if ( pAsm->cf_current_tex_clause_ptr != NULL)
520 {
521 pAsm->cf_current_tex_clause_ptr = NULL;
522 }
523 break;
524 case CF_EXPORT_CLAUSE:
525 if ( pAsm->cf_current_export_clause_ptr != NULL)
526 {
527 pAsm->cf_current_export_clause_ptr = NULL;
528 }
529 break;
530 case CF_OTHER_CLAUSE:
531 if ( pAsm->cf_current_cf_clause_ptr != NULL)
532 {
533 pAsm->cf_current_cf_clause_ptr = NULL;
534 }
535 break;
536 case CF_EMPTY_CLAUSE:
537 break;
538 default:
539 radeon_error(
540 "Unknown CF_CLAUSE_TYPE (%d) in check_current_clause. \n", (int) new_clause_type);
541 return GL_FALSE;
542 }
543
544 pAsm->cf_current_clause_type = CF_EMPTY_CLAUSE;
545
546 // Create new clause
547 switch (new_clause_type)
548 {
549 case CF_ALU_CLAUSE:
550 pAsm->cf_current_clause_type = CF_ALU_CLAUSE;
551 break;
552 case CF_VTX_CLAUSE:
553 pAsm->cf_current_clause_type = CF_VTX_CLAUSE;
554 break;
555 case CF_TEX_CLAUSE:
556 pAsm->cf_current_clause_type = CF_TEX_CLAUSE;
557 break;
558 case CF_EXPORT_CLAUSE:
559 {
560 R700ControlFlowSXClause* pR700ControlFlowSXClause
561 = (R700ControlFlowSXClause*) CALLOC_STRUCT(R700ControlFlowSXClause);
562
563 // Add new export instruction to control flow program
564 if (pR700ControlFlowSXClause != 0)
565 {
566 pAsm->cf_current_export_clause_ptr = pR700ControlFlowSXClause;
567 Init_R700ControlFlowSXClause(pR700ControlFlowSXClause);
568 AddCFInstruction( pAsm->pR700Shader,
569 (R700ControlFlowInstruction *)pR700ControlFlowSXClause );
570 }
571 else
572 {
573 radeon_error(
574 "Error allocating new EXPORT CF instruction in check_current_clause. \n");
575 return GL_FALSE;
576 }
577 pAsm->cf_current_clause_type = CF_EXPORT_CLAUSE;
578 }
579 break;
580 case CF_EMPTY_CLAUSE:
581 break;
582 case CF_OTHER_CLAUSE:
583 pAsm->cf_current_clause_type = CF_OTHER_CLAUSE;
584 break;
585 default:
586 radeon_error(
587 "Unknown CF_CLAUSE_TYPE (%d) in check_current_clause. \n", (int) new_clause_type);
588 return GL_FALSE;
589 }
590 }
591
592 return GL_TRUE;
593 }
594
595 GLboolean add_vfetch_instruction(r700_AssemblerBase* pAsm,
596 R700VertexInstruction* vertex_instruction_ptr)
597 {
598 if( GL_FALSE == check_current_clause(pAsm, CF_VTX_CLAUSE) )
599 {
600 return GL_FALSE;
601 }
602
603 if( pAsm->cf_current_vtx_clause_ptr == NULL ||
604 ( (pAsm->cf_current_vtx_clause_ptr != NULL) &&
605 (pAsm->cf_current_vtx_clause_ptr->m_Word1.f.count >= GetCFMaxInstructions(pAsm->cf_current_vtx_clause_ptr->m_ShaderInstType)-1)
606 ) )
607 {
608 // Create new Vfetch control flow instruction for this new clause
609 pAsm->cf_current_vtx_clause_ptr = (R700ControlFlowGenericClause*) CALLOC_STRUCT(R700ControlFlowGenericClause);
610
611 if (pAsm->cf_current_vtx_clause_ptr != NULL)
612 {
613 Init_R700ControlFlowGenericClause(pAsm->cf_current_vtx_clause_ptr);
614 AddCFInstruction( pAsm->pR700Shader,
615 (R700ControlFlowInstruction *)pAsm->cf_current_vtx_clause_ptr );
616 }
617 else
618 {
619 radeon_error("Could not allocate a new VFetch CF instruction.\n");
620 return GL_FALSE;
621 }
622
623 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.pop_count = 0x0;
624 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.cf_const = 0x0;
625 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
626 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.count = 0x0;
627 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.end_of_program = 0x0;
628 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
629 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_VTX;
630 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
631 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.barrier = 0x1;
632
633 LinkVertexInstruction(pAsm->cf_current_vtx_clause_ptr, vertex_instruction_ptr );
634 }
635 else
636 {
637 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.count++;
638 }
639
640 AddVTXInstruction(pAsm->pR700Shader, vertex_instruction_ptr);
641
642 return GL_TRUE;
643 }
644
645 GLboolean add_tex_instruction(r700_AssemblerBase* pAsm,
646 R700TextureInstruction* tex_instruction_ptr)
647 {
648 if ( GL_FALSE == check_current_clause(pAsm, CF_TEX_CLAUSE) )
649 {
650 return GL_FALSE;
651 }
652
653 if ( pAsm->cf_current_tex_clause_ptr == NULL ||
654 ( (pAsm->cf_current_tex_clause_ptr != NULL) &&
655 (pAsm->cf_current_tex_clause_ptr->m_Word1.f.count >= GetCFMaxInstructions(pAsm->cf_current_tex_clause_ptr->m_ShaderInstType)-1)
656 ) )
657 {
658 // new tex cf instruction for this new clause
659 pAsm->cf_current_tex_clause_ptr = (R700ControlFlowGenericClause*) CALLOC_STRUCT(R700ControlFlowGenericClause);
660
661 if (pAsm->cf_current_tex_clause_ptr != NULL)
662 {
663 Init_R700ControlFlowGenericClause(pAsm->cf_current_tex_clause_ptr);
664 AddCFInstruction( pAsm->pR700Shader,
665 (R700ControlFlowInstruction *)pAsm->cf_current_tex_clause_ptr );
666 }
667 else
668 {
669 radeon_error("Could not allocate a new TEX CF instruction.\n");
670 return GL_FALSE;
671 }
672
673 pAsm->cf_current_tex_clause_ptr->m_Word1.f.pop_count = 0x0;
674 pAsm->cf_current_tex_clause_ptr->m_Word1.f.cf_const = 0x0;
675 pAsm->cf_current_tex_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
676
677 pAsm->cf_current_tex_clause_ptr->m_Word1.f.end_of_program = 0x0;
678 pAsm->cf_current_tex_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
679 pAsm->cf_current_tex_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_TEX;
680 pAsm->cf_current_tex_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
681 pAsm->cf_current_tex_clause_ptr->m_Word1.f.barrier = 0x0; //0x1;
682 }
683 else
684 {
685 pAsm->cf_current_tex_clause_ptr->m_Word1.f.count++;
686 }
687
688 // If this clause constains any TEX instruction that is dependent on a previous instruction,
689 // set the barrier bit
690 if( pAsm->pInstDeps[pAsm->uiCurInst].nDstDep > (-1) || pAsm->need_tex_barrier == GL_TRUE )
691 {
692 pAsm->cf_current_tex_clause_ptr->m_Word1.f.barrier = 0x1;
693 }
694
695 if(NULL == pAsm->cf_current_tex_clause_ptr->m_pLinkedTEXInstruction)
696 {
697 pAsm->cf_current_tex_clause_ptr->m_pLinkedTEXInstruction = tex_instruction_ptr;
698 tex_instruction_ptr->m_pLinkedGenericClause = pAsm->cf_current_tex_clause_ptr;
699 }
700
701 AddTEXInstruction(pAsm->pR700Shader, tex_instruction_ptr);
702
703 return GL_TRUE;
704 }
705
706 GLboolean assemble_vfetch_instruction(r700_AssemblerBase* pAsm,
707 GLuint gl_client_id,
708 GLuint destination_register,
709 GLuint number_of_elements,
710 GLenum dataElementType,
711 VTX_FETCH_METHOD* pFetchMethod)
712 {
713 GLuint client_size_inbyte;
714 GLuint data_format;
715 GLuint mega_fetch_count;
716 GLuint is_mega_fetch_flag;
717
718 R700VertexGenericFetch* vfetch_instruction_ptr;
719 R700VertexGenericFetch* assembled_vfetch_instruction_ptr = pAsm->vfetch_instruction_ptr_array[ gl_client_id ];
720
721 if (assembled_vfetch_instruction_ptr == NULL)
722 {
723 vfetch_instruction_ptr = (R700VertexGenericFetch*) CALLOC_STRUCT(R700VertexGenericFetch);
724 if (vfetch_instruction_ptr == NULL)
725 {
726 return GL_FALSE;
727 }
728 Init_R700VertexGenericFetch(vfetch_instruction_ptr);
729 }
730 else
731 {
732 vfetch_instruction_ptr = assembled_vfetch_instruction_ptr;
733 }
734
735 data_format = GetSurfaceFormat(dataElementType, number_of_elements, &client_size_inbyte);
736
737 if(GL_TRUE == pFetchMethod->bEnableMini) //More conditions here
738 {
739 //TODO : mini fetch
740 }
741 else
742 {
743 mega_fetch_count = MEGA_FETCH_BYTES - 1;
744 is_mega_fetch_flag = 0x1;
745 pFetchMethod->mega_fetch_remainder = MEGA_FETCH_BYTES - client_size_inbyte;
746 }
747
748 vfetch_instruction_ptr->m_Word0.f.vtx_inst = SQ_VTX_INST_FETCH;
749 vfetch_instruction_ptr->m_Word0.f.fetch_type = SQ_VTX_FETCH_VERTEX_DATA;
750 vfetch_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0;
751
752 vfetch_instruction_ptr->m_Word0.f.buffer_id = gl_client_id;
753 vfetch_instruction_ptr->m_Word0.f.src_gpr = 0x0;
754 vfetch_instruction_ptr->m_Word0.f.src_rel = SQ_ABSOLUTE;
755 vfetch_instruction_ptr->m_Word0.f.src_sel_x = SQ_SEL_X;
756 vfetch_instruction_ptr->m_Word0.f.mega_fetch_count = mega_fetch_count;
757
758 vfetch_instruction_ptr->m_Word1.f.dst_sel_x = (number_of_elements < 1) ? SQ_SEL_0 : SQ_SEL_X;
759 vfetch_instruction_ptr->m_Word1.f.dst_sel_y = (number_of_elements < 2) ? SQ_SEL_0 : SQ_SEL_Y;
760 vfetch_instruction_ptr->m_Word1.f.dst_sel_z = (number_of_elements < 3) ? SQ_SEL_0 : SQ_SEL_Z;
761 vfetch_instruction_ptr->m_Word1.f.dst_sel_w = (number_of_elements < 4) ? SQ_SEL_1 : SQ_SEL_W;
762
763 vfetch_instruction_ptr->m_Word1.f.use_const_fields = 1;
764
765 // Destination register
766 vfetch_instruction_ptr->m_Word1_GPR.f.dst_gpr = destination_register;
767 vfetch_instruction_ptr->m_Word1_GPR.f.dst_rel = SQ_ABSOLUTE;
768
769 vfetch_instruction_ptr->m_Word2.f.offset = 0;
770 vfetch_instruction_ptr->m_Word2.f.const_buf_no_stride = 0x0;
771
772 vfetch_instruction_ptr->m_Word2.f.mega_fetch = is_mega_fetch_flag;
773
774 if (assembled_vfetch_instruction_ptr == NULL)
775 {
776 if ( GL_FALSE == add_vfetch_instruction(pAsm, (R700VertexInstruction *)vfetch_instruction_ptr) )
777 {
778 return GL_FALSE;
779 }
780
781 if (pAsm->vfetch_instruction_ptr_array[ gl_client_id ] != NULL)
782 {
783 return GL_FALSE;
784 }
785 else
786 {
787 pAsm->vfetch_instruction_ptr_array[ gl_client_id ] = vfetch_instruction_ptr;
788 }
789 }
790
791 return GL_TRUE;
792 }
793
794 GLboolean assemble_vfetch_instruction2(r700_AssemblerBase* pAsm,
795 GLuint destination_register,
796 GLenum type,
797 GLint size,
798 GLubyte element,
799 GLuint _signed,
800 GLboolean normalize,
801 VTX_FETCH_METHOD * pFetchMethod)
802 {
803 GLuint client_size_inbyte;
804 GLuint data_format;
805 GLuint mega_fetch_count;
806 GLuint is_mega_fetch_flag;
807
808 R700VertexGenericFetch* vfetch_instruction_ptr;
809 R700VertexGenericFetch* assembled_vfetch_instruction_ptr
810 = pAsm->vfetch_instruction_ptr_array[element];
811
812 if (assembled_vfetch_instruction_ptr == NULL)
813 {
814 vfetch_instruction_ptr = (R700VertexGenericFetch*) CALLOC_STRUCT(R700VertexGenericFetch);
815 if (vfetch_instruction_ptr == NULL)
816 {
817 return GL_FALSE;
818 }
819 Init_R700VertexGenericFetch(vfetch_instruction_ptr);
820 }
821 else
822 {
823 vfetch_instruction_ptr = assembled_vfetch_instruction_ptr;
824 }
825
826 data_format = GetSurfaceFormat(type, size, &client_size_inbyte);
827
828 if(GL_TRUE == pFetchMethod->bEnableMini) //More conditions here
829 {
830 //TODO : mini fetch
831 }
832 else
833 {
834 mega_fetch_count = MEGA_FETCH_BYTES - 1;
835 is_mega_fetch_flag = 0x1;
836 pFetchMethod->mega_fetch_remainder = MEGA_FETCH_BYTES - client_size_inbyte;
837 }
838
839 vfetch_instruction_ptr->m_Word0.f.vtx_inst = SQ_VTX_INST_FETCH;
840 vfetch_instruction_ptr->m_Word0.f.fetch_type = SQ_VTX_FETCH_VERTEX_DATA;
841 vfetch_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0;
842
843 vfetch_instruction_ptr->m_Word0.f.buffer_id = element;
844 vfetch_instruction_ptr->m_Word0.f.src_gpr = 0x0;
845 vfetch_instruction_ptr->m_Word0.f.src_rel = SQ_ABSOLUTE;
846 vfetch_instruction_ptr->m_Word0.f.src_sel_x = SQ_SEL_X;
847 vfetch_instruction_ptr->m_Word0.f.mega_fetch_count = mega_fetch_count;
848
849 vfetch_instruction_ptr->m_Word1.f.dst_sel_x = (size < 1) ? SQ_SEL_0 : SQ_SEL_X;
850 vfetch_instruction_ptr->m_Word1.f.dst_sel_y = (size < 2) ? SQ_SEL_0 : SQ_SEL_Y;
851 vfetch_instruction_ptr->m_Word1.f.dst_sel_z = (size < 3) ? SQ_SEL_0 : SQ_SEL_Z;
852 vfetch_instruction_ptr->m_Word1.f.dst_sel_w = (size < 4) ? SQ_SEL_1 : SQ_SEL_W;
853
854 vfetch_instruction_ptr->m_Word1.f.use_const_fields = 1;
855 vfetch_instruction_ptr->m_Word1.f.data_format = data_format;
856 vfetch_instruction_ptr->m_Word2.f.endian_swap = SQ_ENDIAN_NONE;
857
858 if(1 == _signed)
859 {
860 vfetch_instruction_ptr->m_Word1.f.format_comp_all = SQ_FORMAT_COMP_SIGNED;
861 }
862 else
863 {
864 vfetch_instruction_ptr->m_Word1.f.format_comp_all = SQ_FORMAT_COMP_UNSIGNED;
865 }
866
867 if(GL_TRUE == normalize)
868 {
869 vfetch_instruction_ptr->m_Word1.f.num_format_all = SQ_NUM_FORMAT_NORM;
870 }
871 else
872 {
873 vfetch_instruction_ptr->m_Word1.f.num_format_all = SQ_NUM_FORMAT_INT;
874 }
875
876 // Destination register
877 vfetch_instruction_ptr->m_Word1_GPR.f.dst_gpr = destination_register;
878 vfetch_instruction_ptr->m_Word1_GPR.f.dst_rel = SQ_ABSOLUTE;
879
880 vfetch_instruction_ptr->m_Word2.f.offset = 0;
881 vfetch_instruction_ptr->m_Word2.f.const_buf_no_stride = 0x0;
882
883 vfetch_instruction_ptr->m_Word2.f.mega_fetch = is_mega_fetch_flag;
884
885 if (assembled_vfetch_instruction_ptr == NULL)
886 {
887 if ( GL_FALSE == add_vfetch_instruction(pAsm, (R700VertexInstruction *)vfetch_instruction_ptr) )
888 {
889 return GL_FALSE;
890 }
891
892 if (pAsm->vfetch_instruction_ptr_array[element] != NULL)
893 {
894 return GL_FALSE;
895 }
896 else
897 {
898 pAsm->vfetch_instruction_ptr_array[element] = vfetch_instruction_ptr;
899 }
900 }
901
902 return GL_TRUE;
903 }
904
905 GLboolean cleanup_vfetch_instructions(r700_AssemblerBase* pAsm)
906 {
907 GLint i;
908 pAsm->cf_current_clause_type = CF_EMPTY_CLAUSE;
909 pAsm->cf_current_vtx_clause_ptr = NULL;
910
911 for (i=0; i<VERT_ATTRIB_MAX; i++)
912 {
913 pAsm->vfetch_instruction_ptr_array[ i ] = NULL;
914 }
915
916 cleanup_vfetch_shaderinst(pAsm->pR700Shader);
917
918 return GL_TRUE;
919 }
920
921 GLuint gethelpr(r700_AssemblerBase* pAsm)
922 {
923 GLuint r = pAsm->uHelpReg;
924 pAsm->uHelpReg++;
925 if (pAsm->uHelpReg > pAsm->number_used_registers)
926 {
927 pAsm->number_used_registers = pAsm->uHelpReg;
928 }
929 return r;
930 }
931 void resethelpr(r700_AssemblerBase* pAsm)
932 {
933 pAsm->uHelpReg = pAsm->uFirstHelpReg;
934 }
935
936 void checkop_init(r700_AssemblerBase* pAsm)
937 {
938 resethelpr(pAsm);
939 pAsm->aArgSubst[0] =
940 pAsm->aArgSubst[1] =
941 pAsm->aArgSubst[2] =
942 pAsm->aArgSubst[3] = -1;
943 }
944
945 GLboolean mov_temp(r700_AssemblerBase* pAsm, int src)
946 {
947 GLuint tmp = gethelpr(pAsm);
948
949 //mov src to temp helper gpr.
950 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
951
952 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
953
954 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
955 pAsm->D.dst.reg = tmp;
956
957 nomask_PVSDST(&(pAsm->D.dst));
958
959 if( GL_FALSE == assemble_src(pAsm, src, 0) )
960 {
961 return GL_FALSE;
962 }
963
964 noswizzle_PVSSRC(&(pAsm->S[0].src));
965 noneg_PVSSRC(&(pAsm->S[0].src));
966
967 if( GL_FALSE == next_ins(pAsm) )
968 {
969 return GL_FALSE;
970 }
971
972 pAsm->aArgSubst[1 + src] = tmp;
973
974 return GL_TRUE;
975 }
976
977 GLboolean checkop1(r700_AssemblerBase* pAsm)
978 {
979 checkop_init(pAsm);
980 return GL_TRUE;
981 }
982
983 GLboolean checkop2(r700_AssemblerBase* pAsm)
984 {
985 GLboolean bSrcConst[2];
986 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
987
988 checkop_init(pAsm);
989
990 if( (pILInst->SrcReg[0].File == PROGRAM_CONSTANT) ||
991 (pILInst->SrcReg[0].File == PROGRAM_LOCAL_PARAM) ||
992 (pILInst->SrcReg[0].File == PROGRAM_ENV_PARAM) ||
993 (pILInst->SrcReg[0].File == PROGRAM_STATE_VAR) )
994 {
995 bSrcConst[0] = GL_TRUE;
996 }
997 else
998 {
999 bSrcConst[0] = GL_FALSE;
1000 }
1001 if( (pILInst->SrcReg[1].File == PROGRAM_CONSTANT) ||
1002 (pILInst->SrcReg[1].File == PROGRAM_LOCAL_PARAM) ||
1003 (pILInst->SrcReg[1].File == PROGRAM_ENV_PARAM) ||
1004 (pILInst->SrcReg[1].File == PROGRAM_STATE_VAR) )
1005 {
1006 bSrcConst[1] = GL_TRUE;
1007 }
1008 else
1009 {
1010 bSrcConst[1] = GL_FALSE;
1011 }
1012
1013 if( (bSrcConst[0] == GL_TRUE) && (bSrcConst[1] == GL_TRUE) )
1014 {
1015 if(pILInst->SrcReg[0].Index != pILInst->SrcReg[1].Index)
1016 {
1017 if( GL_FALSE == mov_temp(pAsm, 1) )
1018 {
1019 return GL_FALSE;
1020 }
1021 }
1022 }
1023
1024 return GL_TRUE;
1025 }
1026
1027 GLboolean checkop3(r700_AssemblerBase* pAsm)
1028 {
1029 GLboolean bSrcConst[3];
1030 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1031
1032 checkop_init(pAsm);
1033
1034 if( (pILInst->SrcReg[0].File == PROGRAM_CONSTANT) ||
1035 (pILInst->SrcReg[0].File == PROGRAM_LOCAL_PARAM) ||
1036 (pILInst->SrcReg[0].File == PROGRAM_ENV_PARAM) ||
1037 (pILInst->SrcReg[0].File == PROGRAM_STATE_VAR) )
1038 {
1039 bSrcConst[0] = GL_TRUE;
1040 }
1041 else
1042 {
1043 bSrcConst[0] = GL_FALSE;
1044 }
1045 if( (pILInst->SrcReg[1].File == PROGRAM_CONSTANT) ||
1046 (pILInst->SrcReg[1].File == PROGRAM_LOCAL_PARAM) ||
1047 (pILInst->SrcReg[1].File == PROGRAM_ENV_PARAM) ||
1048 (pILInst->SrcReg[1].File == PROGRAM_STATE_VAR) )
1049 {
1050 bSrcConst[1] = GL_TRUE;
1051 }
1052 else
1053 {
1054 bSrcConst[1] = GL_FALSE;
1055 }
1056 if( (pILInst->SrcReg[2].File == PROGRAM_CONSTANT) ||
1057 (pILInst->SrcReg[2].File == PROGRAM_LOCAL_PARAM) ||
1058 (pILInst->SrcReg[2].File == PROGRAM_ENV_PARAM) ||
1059 (pILInst->SrcReg[2].File == PROGRAM_STATE_VAR) )
1060 {
1061 bSrcConst[2] = GL_TRUE;
1062 }
1063 else
1064 {
1065 bSrcConst[2] = GL_FALSE;
1066 }
1067
1068 if( (GL_TRUE == bSrcConst[0]) &&
1069 (GL_TRUE == bSrcConst[1]) &&
1070 (GL_TRUE == bSrcConst[2]) )
1071 {
1072 if( GL_FALSE == mov_temp(pAsm, 1) )
1073 {
1074 return GL_FALSE;
1075 }
1076 if( GL_FALSE == mov_temp(pAsm, 2) )
1077 {
1078 return GL_FALSE;
1079 }
1080
1081 return GL_TRUE;
1082 }
1083 else if( (GL_TRUE == bSrcConst[0]) &&
1084 (GL_TRUE == bSrcConst[1]) )
1085 {
1086 if(pILInst->SrcReg[0].Index != pILInst->SrcReg[1].Index)
1087 {
1088 if( GL_FALSE == mov_temp(pAsm, 1) )
1089 {
1090 return 1;
1091 }
1092 }
1093
1094 return GL_TRUE;
1095 }
1096 else if ( (GL_TRUE == bSrcConst[0]) &&
1097 (GL_TRUE == bSrcConst[2]) )
1098 {
1099 if(pILInst->SrcReg[0].Index != pILInst->SrcReg[2].Index)
1100 {
1101 if( GL_FALSE == mov_temp(pAsm, 2) )
1102 {
1103 return GL_FALSE;
1104 }
1105 }
1106
1107 return GL_TRUE;
1108 }
1109 else if( (GL_TRUE == bSrcConst[1]) &&
1110 (GL_TRUE == bSrcConst[2]) )
1111 {
1112 if(pILInst->SrcReg[1].Index != pILInst->SrcReg[2].Index)
1113 {
1114 if( GL_FALSE == mov_temp(pAsm, 2) )
1115 {
1116 return GL_FALSE;
1117 }
1118 }
1119
1120 return GL_TRUE;
1121 }
1122
1123 return GL_TRUE;
1124 }
1125
1126 GLboolean assemble_src(r700_AssemblerBase *pAsm,
1127 int src,
1128 int fld)
1129 {
1130 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1131
1132 if (fld == -1)
1133 {
1134 fld = src;
1135 }
1136
1137 if(pAsm->aArgSubst[1+src] >= 0)
1138 {
1139 setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE);
1140 pAsm->S[fld].src.rtype = SRC_REG_TEMPORARY;
1141 pAsm->S[fld].src.reg = pAsm->aArgSubst[1+src];
1142 }
1143 else
1144 {
1145 switch (pILInst->SrcReg[src].File)
1146 {
1147 case PROGRAM_TEMPORARY:
1148 setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE);
1149 pAsm->S[fld].src.rtype = SRC_REG_TEMPORARY;
1150 pAsm->S[fld].src.reg = pILInst->SrcReg[src].Index + pAsm->starting_temp_register_number;
1151 break;
1152 case PROGRAM_CONSTANT:
1153 case PROGRAM_LOCAL_PARAM:
1154 case PROGRAM_ENV_PARAM:
1155 case PROGRAM_STATE_VAR:
1156 if (1 == pILInst->SrcReg[src].RelAddr)
1157 {
1158 setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_RELATIVE_A0);
1159 }
1160 else
1161 {
1162 setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE);
1163 }
1164
1165 pAsm->S[fld].src.rtype = SRC_REG_CONSTANT;
1166 pAsm->S[fld].src.reg = pILInst->SrcReg[src].Index;
1167 break;
1168 case PROGRAM_INPUT:
1169 setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE);
1170 pAsm->S[fld].src.rtype = SRC_REG_INPUT;
1171 switch (pAsm->currentShaderType)
1172 {
1173 case SPT_FP:
1174 pAsm->S[fld].src.reg = pAsm->uiFP_AttributeMap[pILInst->SrcReg[src].Index];
1175 break;
1176 case SPT_VP:
1177 pAsm->S[fld].src.reg = pAsm->ucVP_AttributeMap[pILInst->SrcReg[src].Index];
1178 break;
1179 }
1180 break;
1181 default:
1182 radeon_error("Invalid source argument type\n");
1183 return GL_FALSE;
1184 }
1185 }
1186
1187 pAsm->S[fld].src.swizzlex = pILInst->SrcReg[src].Swizzle & 0x7;
1188 pAsm->S[fld].src.swizzley = (pILInst->SrcReg[src].Swizzle >> 3) & 0x7;
1189 pAsm->S[fld].src.swizzlez = (pILInst->SrcReg[src].Swizzle >> 6) & 0x7;
1190 pAsm->S[fld].src.swizzlew = (pILInst->SrcReg[src].Swizzle >> 9) & 0x7;
1191
1192 pAsm->S[fld].src.negx = pILInst->SrcReg[src].Negate & 0x1;
1193 pAsm->S[fld].src.negy = (pILInst->SrcReg[src].Negate >> 1) & 0x1;
1194 pAsm->S[fld].src.negz = (pILInst->SrcReg[src].Negate >> 2) & 0x1;
1195 pAsm->S[fld].src.negw = (pILInst->SrcReg[src].Negate >> 3) & 0x1;
1196
1197 return GL_TRUE;
1198 }
1199
1200 GLboolean assemble_dst(r700_AssemblerBase *pAsm)
1201 {
1202 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1203 switch (pILInst->DstReg.File)
1204 {
1205 case PROGRAM_TEMPORARY:
1206 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1207 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
1208 pAsm->D.dst.reg = pILInst->DstReg.Index + pAsm->starting_temp_register_number;
1209 break;
1210 case PROGRAM_ADDRESS:
1211 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1212 pAsm->D.dst.rtype = DST_REG_A0;
1213 pAsm->D.dst.reg = 0;
1214 break;
1215 case PROGRAM_OUTPUT:
1216 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1217 pAsm->D.dst.rtype = DST_REG_OUT;
1218 switch (pAsm->currentShaderType)
1219 {
1220 case SPT_FP:
1221 pAsm->D.dst.reg = pAsm->uiFP_OutputMap[pILInst->DstReg.Index];
1222 break;
1223 case SPT_VP:
1224 pAsm->D.dst.reg = pAsm->ucVP_OutputMap[pILInst->DstReg.Index];
1225 break;
1226 }
1227 break;
1228 default:
1229 radeon_error("Invalid destination output argument type\n");
1230 return GL_FALSE;
1231 }
1232
1233 pAsm->D.dst.writex = pILInst->DstReg.WriteMask & 0x1;
1234 pAsm->D.dst.writey = (pILInst->DstReg.WriteMask >> 1) & 0x1;
1235 pAsm->D.dst.writez = (pILInst->DstReg.WriteMask >> 2) & 0x1;
1236 pAsm->D.dst.writew = (pILInst->DstReg.WriteMask >> 3) & 0x1;
1237
1238 return GL_TRUE;
1239 }
1240
1241 GLboolean tex_dst(r700_AssemblerBase *pAsm)
1242 {
1243 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1244
1245 if(PROGRAM_TEMPORARY == pILInst->DstReg.File)
1246 {
1247 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
1248 pAsm->D.dst.reg = pAsm->pILInst[pAsm->uiCurInst].DstReg.Index + pAsm->starting_temp_register_number;
1249
1250 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1251 }
1252 else if(PROGRAM_OUTPUT == pILInst->DstReg.File)
1253 {
1254 pAsm->D.dst.rtype = DST_REG_OUT;
1255 switch (pAsm->currentShaderType)
1256 {
1257 case SPT_FP:
1258 pAsm->D.dst.reg = pAsm->uiFP_OutputMap[pILInst->DstReg.Index];
1259 break;
1260 case SPT_VP:
1261 pAsm->D.dst.reg = pAsm->ucVP_OutputMap[pILInst->DstReg.Index];
1262 break;
1263 }
1264
1265 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1266 }
1267 else
1268 {
1269 radeon_error("Invalid destination output argument type\n");
1270 return GL_FALSE;
1271 }
1272
1273 pAsm->D.dst.writex = pILInst->DstReg.WriteMask & 0x1;
1274 pAsm->D.dst.writey = (pILInst->DstReg.WriteMask >> 1) & 0x1;
1275 pAsm->D.dst.writez = (pILInst->DstReg.WriteMask >> 2) & 0x1;
1276 pAsm->D.dst.writew = (pILInst->DstReg.WriteMask >> 3) & 0x1;
1277
1278 return GL_TRUE;
1279 }
1280
1281 GLboolean tex_src(r700_AssemblerBase *pAsm)
1282 {
1283 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1284
1285 GLboolean bValidTexCoord = GL_FALSE;
1286
1287 if(pAsm->aArgSubst[1] >= 0)
1288 {
1289 bValidTexCoord = GL_TRUE;
1290 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
1291 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
1292 pAsm->S[0].src.reg = pAsm->aArgSubst[1];
1293 }
1294 else
1295 {
1296 switch (pILInst->SrcReg[0].File) {
1297 case PROGRAM_CONSTANT:
1298 case PROGRAM_LOCAL_PARAM:
1299 case PROGRAM_ENV_PARAM:
1300 case PROGRAM_STATE_VAR:
1301 break;
1302 case PROGRAM_TEMPORARY:
1303 bValidTexCoord = GL_TRUE;
1304 pAsm->S[0].src.reg = pILInst->SrcReg[0].Index +
1305 pAsm->starting_temp_register_number;
1306 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
1307 break;
1308 case PROGRAM_INPUT:
1309 switch (pILInst->SrcReg[0].Index)
1310 {
1311 case FRAG_ATTRIB_WPOS:
1312 case FRAG_ATTRIB_COL0:
1313 case FRAG_ATTRIB_COL1:
1314 case FRAG_ATTRIB_FOGC:
1315 case FRAG_ATTRIB_TEX0:
1316 case FRAG_ATTRIB_TEX1:
1317 case FRAG_ATTRIB_TEX2:
1318 case FRAG_ATTRIB_TEX3:
1319 case FRAG_ATTRIB_TEX4:
1320 case FRAG_ATTRIB_TEX5:
1321 case FRAG_ATTRIB_TEX6:
1322 case FRAG_ATTRIB_TEX7:
1323 bValidTexCoord = GL_TRUE;
1324 pAsm->S[0].src.reg =
1325 pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index];
1326 pAsm->S[0].src.rtype = SRC_REG_INPUT;
1327 break;
1328 case FRAG_ATTRIB_FACE:
1329 fprintf(stderr, "FRAG_ATTRIB_FACE unsupported\n");
1330 break;
1331 case FRAG_ATTRIB_PNTC:
1332 fprintf(stderr, "FRAG_ATTRIB_PNTC unsupported\n");
1333 break;
1334 case FRAG_ATTRIB_VAR0:
1335 fprintf(stderr, "FRAG_ATTRIB_VAR0 unsupported\n");
1336 break;
1337 }
1338 break;
1339 }
1340 }
1341
1342 if(GL_TRUE == bValidTexCoord)
1343 {
1344 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
1345 }
1346 else
1347 {
1348 radeon_error("Invalid source texcoord for TEX instruction\n");
1349 return GL_FALSE;
1350 }
1351
1352 pAsm->S[0].src.swizzlex = pILInst->SrcReg[0].Swizzle & 0x7;
1353 pAsm->S[0].src.swizzley = (pILInst->SrcReg[0].Swizzle >> 3) & 0x7;
1354 pAsm->S[0].src.swizzlez = (pILInst->SrcReg[0].Swizzle >> 6) & 0x7;
1355 pAsm->S[0].src.swizzlew = (pILInst->SrcReg[0].Swizzle >> 9) & 0x7;
1356
1357 pAsm->S[0].src.negx = pILInst->SrcReg[0].Negate & 0x1;
1358 pAsm->S[0].src.negy = (pILInst->SrcReg[0].Negate >> 1) & 0x1;
1359 pAsm->S[0].src.negz = (pILInst->SrcReg[0].Negate >> 2) & 0x1;
1360 pAsm->S[0].src.negw = (pILInst->SrcReg[0].Negate >> 3) & 0x1;
1361
1362 return GL_TRUE;
1363 }
1364
1365 GLboolean assemble_tex_instruction(r700_AssemblerBase *pAsm, GLboolean normalized)
1366 {
1367 PVSSRC * texture_coordinate_source;
1368 PVSSRC * texture_unit_source;
1369
1370 R700TextureInstruction* tex_instruction_ptr = (R700TextureInstruction*) CALLOC_STRUCT(R700TextureInstruction);
1371 if (tex_instruction_ptr == NULL)
1372 {
1373 return GL_FALSE;
1374 }
1375 Init_R700TextureInstruction(tex_instruction_ptr);
1376
1377 texture_coordinate_source = &(pAsm->S[0].src);
1378 texture_unit_source = &(pAsm->S[1].src);
1379
1380 tex_instruction_ptr->m_Word0.f.tex_inst = pAsm->D.dst.opcode;
1381 tex_instruction_ptr->m_Word0.f.bc_frac_mode = 0x0;
1382 tex_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0;
1383
1384 tex_instruction_ptr->m_Word0.f.resource_id = texture_unit_source->reg;
1385
1386 tex_instruction_ptr->m_Word1.f.lod_bias = 0x0;
1387 if (normalized) {
1388 tex_instruction_ptr->m_Word1.f.coord_type_x = SQ_TEX_NORMALIZED;
1389 tex_instruction_ptr->m_Word1.f.coord_type_y = SQ_TEX_NORMALIZED;
1390 tex_instruction_ptr->m_Word1.f.coord_type_z = SQ_TEX_NORMALIZED;
1391 tex_instruction_ptr->m_Word1.f.coord_type_w = SQ_TEX_NORMALIZED;
1392 } else {
1393 /* XXX: UNNORMALIZED tex coords have limited wrap modes */
1394 tex_instruction_ptr->m_Word1.f.coord_type_x = SQ_TEX_UNNORMALIZED;
1395 tex_instruction_ptr->m_Word1.f.coord_type_y = SQ_TEX_UNNORMALIZED;
1396 tex_instruction_ptr->m_Word1.f.coord_type_z = SQ_TEX_UNNORMALIZED;
1397 tex_instruction_ptr->m_Word1.f.coord_type_w = SQ_TEX_UNNORMALIZED;
1398 }
1399
1400 tex_instruction_ptr->m_Word2.f.offset_x = 0x0;
1401 tex_instruction_ptr->m_Word2.f.offset_y = 0x0;
1402 tex_instruction_ptr->m_Word2.f.offset_z = 0x0;
1403
1404 tex_instruction_ptr->m_Word2.f.sampler_id = texture_unit_source->reg;
1405
1406 // dst
1407 if ( (pAsm->D.dst.rtype == DST_REG_TEMPORARY) ||
1408 (pAsm->D.dst.rtype == DST_REG_OUT) )
1409 {
1410 tex_instruction_ptr->m_Word0.f.src_gpr = texture_coordinate_source->reg;
1411 tex_instruction_ptr->m_Word0.f.src_rel = SQ_ABSOLUTE;
1412
1413 tex_instruction_ptr->m_Word1.f.dst_gpr = pAsm->D.dst.reg;
1414 tex_instruction_ptr->m_Word1.f.dst_rel = SQ_ABSOLUTE;
1415
1416 tex_instruction_ptr->m_Word1.f.dst_sel_x = (pAsm->D.dst.writex ? texture_unit_source->swizzlex : SQ_SEL_MASK);
1417 tex_instruction_ptr->m_Word1.f.dst_sel_y = (pAsm->D.dst.writey ? texture_unit_source->swizzley : SQ_SEL_MASK);
1418 tex_instruction_ptr->m_Word1.f.dst_sel_z = (pAsm->D.dst.writez ? texture_unit_source->swizzlez : SQ_SEL_MASK);
1419 tex_instruction_ptr->m_Word1.f.dst_sel_w = (pAsm->D.dst.writew ? texture_unit_source->swizzlew : SQ_SEL_MASK);
1420
1421
1422 tex_instruction_ptr->m_Word2.f.src_sel_x = texture_coordinate_source->swizzlex;
1423 tex_instruction_ptr->m_Word2.f.src_sel_y = texture_coordinate_source->swizzley;
1424 tex_instruction_ptr->m_Word2.f.src_sel_z = texture_coordinate_source->swizzlez;
1425 tex_instruction_ptr->m_Word2.f.src_sel_w = texture_coordinate_source->swizzlew;
1426 }
1427 else
1428 {
1429 radeon_error("Only temp destination registers supported for TEX dest regs.\n");
1430 return GL_FALSE;
1431 }
1432
1433 if( GL_FALSE == add_tex_instruction(pAsm, tex_instruction_ptr) )
1434 {
1435 return GL_FALSE;
1436 }
1437
1438 return GL_TRUE;
1439 }
1440
1441 void initialize(r700_AssemblerBase *pAsm)
1442 {
1443 GLuint cycle, component;
1444
1445 for (cycle=0; cycle<NUMBER_OF_CYCLES; cycle++)
1446 {
1447 for (component=0; component<NUMBER_OF_COMPONENTS; component++)
1448 {
1449 pAsm->hw_gpr[cycle][component] = (-1);
1450 }
1451 }
1452 for (component=0; component<NUMBER_OF_COMPONENTS; component++)
1453 {
1454 pAsm->hw_cfile_addr[component] = (-1);
1455 pAsm->hw_cfile_chan[component] = (-1);
1456 }
1457 }
1458
1459 GLboolean assemble_alu_src(R700ALUInstruction* alu_instruction_ptr,
1460 int source_index,
1461 PVSSRC* pSource,
1462 BITS scalar_channel_index)
1463 {
1464 BITS src_sel;
1465 BITS src_rel;
1466 BITS src_chan;
1467 BITS src_neg;
1468
1469 //--------------------------------------------------------------------------
1470 // Source for operands src0, src1.
1471 // Values [0,127] correspond to GPR[0..127].
1472 // Values [256,511] correspond to cfile constants c[0..255].
1473
1474 //--------------------------------------------------------------------------
1475 // Other special values are shown in the list below.
1476
1477 // 248 SQ_ALU_SRC_0: special constant 0.0.
1478 // 249 SQ_ALU_SRC_1: special constant 1.0 float.
1479
1480 // 250 SQ_ALU_SRC_1_INT: special constant 1 integer.
1481 // 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer.
1482
1483 // 252 SQ_ALU_SRC_0_5: special constant 0.5 float.
1484 // 253 SQ_ALU_SRC_LITERAL: literal constant.
1485
1486 // 254 SQ_ALU_SRC_PV: previous vector result.
1487 // 255 SQ_ALU_SRC_PS: previous scalar result.
1488 //--------------------------------------------------------------------------
1489
1490 BITS channel_swizzle;
1491 switch (scalar_channel_index)
1492 {
1493 case 0: channel_swizzle = pSource->swizzlex; break;
1494 case 1: channel_swizzle = pSource->swizzley; break;
1495 case 2: channel_swizzle = pSource->swizzlez; break;
1496 case 3: channel_swizzle = pSource->swizzlew; break;
1497 default: channel_swizzle = SQ_SEL_MASK; break;
1498 }
1499
1500 if(channel_swizzle == SQ_SEL_0)
1501 {
1502 src_sel = SQ_ALU_SRC_0;
1503 }
1504 else if (channel_swizzle == SQ_SEL_1)
1505 {
1506 src_sel = SQ_ALU_SRC_1;
1507 }
1508 else
1509 {
1510 if ( (pSource->rtype == SRC_REG_TEMPORARY) ||
1511 (pSource->rtype == SRC_REG_INPUT)
1512 )
1513 {
1514 src_sel = pSource->reg;
1515 }
1516 else if (pSource->rtype == SRC_REG_CONSTANT)
1517 {
1518 src_sel = pSource->reg + CFILE_REGISTER_OFFSET;
1519 }
1520 else
1521 {
1522 radeon_error("Source (%d) register type (%d) not one of TEMP, INPUT, or CONSTANT.\n",
1523 source_index, pSource->rtype);
1524 return GL_FALSE;
1525 }
1526 }
1527
1528 if( ADDR_ABSOLUTE == addrmode_PVSSRC(pSource) )
1529 {
1530 src_rel = SQ_ABSOLUTE;
1531 }
1532 else
1533 {
1534 src_rel = SQ_RELATIVE;
1535 }
1536
1537 switch (channel_swizzle)
1538 {
1539 case SQ_SEL_X:
1540 src_chan = SQ_CHAN_X;
1541 break;
1542 case SQ_SEL_Y:
1543 src_chan = SQ_CHAN_Y;
1544 break;
1545 case SQ_SEL_Z:
1546 src_chan = SQ_CHAN_Z;
1547 break;
1548 case SQ_SEL_W:
1549 src_chan = SQ_CHAN_W;
1550 break;
1551 case SQ_SEL_0:
1552 case SQ_SEL_1:
1553 // Does not matter since src_sel controls
1554 src_chan = SQ_CHAN_X;
1555 break;
1556 default:
1557 radeon_error("Unknown source select value (%d) in assemble_alu_src().\n", channel_swizzle);
1558 return GL_FALSE;
1559 break;
1560 }
1561
1562 switch (scalar_channel_index)
1563 {
1564 case 0: src_neg = pSource->negx; break;
1565 case 1: src_neg = pSource->negy; break;
1566 case 2: src_neg = pSource->negz; break;
1567 case 3: src_neg = pSource->negw; break;
1568 default: src_neg = 0; break;
1569 }
1570
1571 switch (source_index)
1572 {
1573 case 0:
1574 alu_instruction_ptr->m_Word0.f.src0_sel = src_sel;
1575 alu_instruction_ptr->m_Word0.f.src0_rel = src_rel;
1576 alu_instruction_ptr->m_Word0.f.src0_chan = src_chan;
1577 alu_instruction_ptr->m_Word0.f.src0_neg = src_neg;
1578 break;
1579 case 1:
1580 alu_instruction_ptr->m_Word0.f.src1_sel = src_sel;
1581 alu_instruction_ptr->m_Word0.f.src1_rel = src_rel;
1582 alu_instruction_ptr->m_Word0.f.src1_chan = src_chan;
1583 alu_instruction_ptr->m_Word0.f.src1_neg = src_neg;
1584 break;
1585 case 2:
1586 alu_instruction_ptr->m_Word1_OP3.f.src2_sel = src_sel;
1587 alu_instruction_ptr->m_Word1_OP3.f.src2_rel = src_rel;
1588 alu_instruction_ptr->m_Word1_OP3.f.src2_chan = src_chan;
1589 alu_instruction_ptr->m_Word1_OP3.f.src2_neg = src_neg;
1590 break;
1591 default:
1592 radeon_error("Only three sources allowed in ALU opcodes.\n");
1593 return GL_FALSE;
1594 break;
1595 }
1596
1597 return GL_TRUE;
1598 }
1599
1600 GLboolean add_alu_instruction(r700_AssemblerBase* pAsm,
1601 R700ALUInstruction* alu_instruction_ptr,
1602 GLuint contiguous_slots_needed)
1603 {
1604 if( GL_FALSE == check_current_clause(pAsm, CF_ALU_CLAUSE) )
1605 {
1606 return GL_FALSE;
1607 }
1608
1609 if ( pAsm->cf_current_alu_clause_ptr == NULL ||
1610 ( (pAsm->cf_current_alu_clause_ptr != NULL) &&
1611 (pAsm->cf_current_alu_clause_ptr->m_Word1.f.count >= (GetCFMaxInstructions(pAsm->cf_current_alu_clause_ptr->m_ShaderInstType)-contiguous_slots_needed-1) )
1612 ) )
1613 {
1614
1615 //new cf inst for this clause
1616 pAsm->cf_current_alu_clause_ptr = (R700ControlFlowALUClause*) CALLOC_STRUCT(R700ControlFlowALUClause);
1617
1618 // link the new cf to cf segment
1619 if(NULL != pAsm->cf_current_alu_clause_ptr)
1620 {
1621 Init_R700ControlFlowALUClause(pAsm->cf_current_alu_clause_ptr);
1622 AddCFInstruction( pAsm->pR700Shader,
1623 (R700ControlFlowInstruction *)pAsm->cf_current_alu_clause_ptr );
1624 }
1625 else
1626 {
1627 radeon_error("Could not allocate a new ALU CF instruction.\n");
1628 return GL_FALSE;
1629 }
1630
1631 pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_bank0 = 0x0;
1632 pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_bank1 = 0x0;
1633 pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_mode0 = SQ_CF_KCACHE_NOP;
1634
1635 pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_mode1 = SQ_CF_KCACHE_NOP;
1636 pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_addr0 = 0x0;
1637 pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_addr1 = 0x0;
1638
1639 //cf_current_alu_clause_ptr->m_Word1.f.count = number_of_scalar_operations - 1;
1640 pAsm->cf_current_alu_clause_ptr->m_Word1.f.count = 0x0;
1641 pAsm->cf_current_alu_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_ALU;
1642
1643 pAsm->cf_current_alu_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
1644
1645 pAsm->cf_current_alu_clause_ptr->m_Word1.f.barrier = 0x1;
1646 }
1647 else
1648 {
1649 pAsm->cf_current_alu_clause_ptr->m_Word1.f.count++;
1650 }
1651
1652 // If this clause constains any instruction that is forward dependent on a TEX instruction,
1653 // set the whole_quad_mode for this clause
1654 if ( pAsm->pInstDeps[pAsm->uiCurInst].nDstDep > (-1) )
1655 {
1656 pAsm->cf_current_alu_clause_ptr->m_Word1.f.whole_quad_mode = 0x1;
1657 }
1658
1659 if (pAsm->cf_current_alu_clause_ptr->m_Word1.f.count >= (GetCFMaxInstructions(pAsm->cf_current_alu_clause_ptr->m_ShaderInstType)-1) )
1660 {
1661 alu_instruction_ptr->m_Word0.f.last = 1;
1662 }
1663
1664 if(NULL == pAsm->cf_current_alu_clause_ptr->m_pLinkedALUInstruction)
1665 {
1666 pAsm->cf_current_alu_clause_ptr->m_pLinkedALUInstruction = alu_instruction_ptr;
1667 alu_instruction_ptr->m_pLinkedALUClause = pAsm->cf_current_alu_clause_ptr;
1668 }
1669
1670 AddALUInstruction(pAsm->pR700Shader, alu_instruction_ptr);
1671
1672 return GL_TRUE;
1673 }
1674
1675 void get_src_properties(R700ALUInstruction* alu_instruction_ptr,
1676 int source_index,
1677 BITS* psrc_sel,
1678 BITS* psrc_rel,
1679 BITS* psrc_chan,
1680 BITS* psrc_neg)
1681 {
1682 switch (source_index)
1683 {
1684 case 0:
1685 *psrc_sel = alu_instruction_ptr->m_Word0.f.src0_sel ;
1686 *psrc_rel = alu_instruction_ptr->m_Word0.f.src0_rel ;
1687 *psrc_chan = alu_instruction_ptr->m_Word0.f.src0_chan;
1688 *psrc_neg = alu_instruction_ptr->m_Word0.f.src0_neg ;
1689 break;
1690
1691 case 1:
1692 *psrc_sel = alu_instruction_ptr->m_Word0.f.src1_sel ;
1693 *psrc_rel = alu_instruction_ptr->m_Word0.f.src1_rel ;
1694 *psrc_chan = alu_instruction_ptr->m_Word0.f.src1_chan;
1695 *psrc_neg = alu_instruction_ptr->m_Word0.f.src1_neg ;
1696 break;
1697
1698 case 2:
1699 *psrc_sel = alu_instruction_ptr->m_Word1_OP3.f.src2_sel;
1700 *psrc_rel = alu_instruction_ptr->m_Word1_OP3.f.src2_rel;
1701 *psrc_chan = alu_instruction_ptr->m_Word1_OP3.f.src2_chan;
1702 *psrc_neg = alu_instruction_ptr->m_Word1_OP3.f.src2_neg;
1703 break;
1704 }
1705 }
1706
1707 int is_cfile(BITS sel)
1708 {
1709 if (sel > 255 && sel < 512)
1710 {
1711 return 1;
1712 }
1713 return 0;
1714 }
1715
1716 int is_const(BITS sel)
1717 {
1718 if (is_cfile(sel))
1719 {
1720 return 1;
1721 }
1722 else if(sel >= SQ_ALU_SRC_0 && sel <= SQ_ALU_SRC_LITERAL)
1723 {
1724 return 1;
1725 }
1726 return 0;
1727 }
1728
1729 int is_gpr(BITS sel)
1730 {
1731 if (sel >= 0 && sel < 128)
1732 {
1733 return 1;
1734 }
1735 return 0;
1736 }
1737
1738 const GLuint BANK_SWIZZLE_VEC[8] = {SQ_ALU_VEC_210, //000
1739 SQ_ALU_VEC_120, //001
1740 SQ_ALU_VEC_102, //010
1741
1742 SQ_ALU_VEC_201, //011
1743 SQ_ALU_VEC_012, //100
1744 SQ_ALU_VEC_021, //101
1745
1746 SQ_ALU_VEC_012, //110
1747 SQ_ALU_VEC_012}; //111
1748
1749 const GLuint BANK_SWIZZLE_SCL[8] = {SQ_ALU_SCL_210, //000
1750 SQ_ALU_SCL_122, //001
1751 SQ_ALU_SCL_122, //010
1752
1753 SQ_ALU_SCL_221, //011
1754 SQ_ALU_SCL_212, //100
1755 SQ_ALU_SCL_122, //101
1756
1757 SQ_ALU_SCL_122, //110
1758 SQ_ALU_SCL_122}; //111
1759
1760 GLboolean reserve_cfile(r700_AssemblerBase* pAsm,
1761 GLuint sel,
1762 GLuint chan)
1763 {
1764 int res_match = (-1);
1765 int res_empty = (-1);
1766
1767 GLint res;
1768
1769 for (res=3; res>=0; res--)
1770 {
1771 if(pAsm->hw_cfile_addr[ res] < 0)
1772 {
1773 res_empty = res;
1774 }
1775 else if( (pAsm->hw_cfile_addr[res] == (int)sel)
1776 &&
1777 (pAsm->hw_cfile_chan[ res ] == (int) chan) )
1778 {
1779 res_match = res;
1780 }
1781 }
1782
1783 if(res_match >= 0)
1784 {
1785 // Read for this scalar component already reserved, nothing to do here.
1786 ;
1787 }
1788 else if(res_empty >= 0)
1789 {
1790 pAsm->hw_cfile_addr[ res_empty ] = sel;
1791 pAsm->hw_cfile_chan[ res_empty ] = chan;
1792 }
1793 else
1794 {
1795 radeon_error("All cfile read ports are used, cannot reference C$sel, channel $chan.\n");
1796 return GL_FALSE;
1797 }
1798 return GL_TRUE;
1799 }
1800
1801 GLboolean reserve_gpr(r700_AssemblerBase* pAsm, GLuint sel, GLuint chan, GLuint cycle)
1802 {
1803 if(pAsm->hw_gpr[cycle][chan] < 0)
1804 {
1805 pAsm->hw_gpr[cycle][chan] = sel;
1806 }
1807 else if(pAsm->hw_gpr[cycle][chan] != (int)sel)
1808 {
1809 radeon_error("Another scalar operation has already used GPR read port for given channel\n");
1810 return GL_FALSE;
1811 }
1812
1813 return GL_TRUE;
1814 }
1815
1816 GLboolean cycle_for_scalar_bank_swizzle(const int swiz, const int sel, GLuint* pCycle)
1817 {
1818 switch (swiz)
1819 {
1820 case SQ_ALU_SCL_210:
1821 {
1822 int table[3] = {2, 1, 0};
1823 *pCycle = table[sel];
1824 return GL_TRUE;
1825 }
1826 break;
1827 case SQ_ALU_SCL_122:
1828 {
1829 int table[3] = {1, 2, 2};
1830 *pCycle = table[sel];
1831 return GL_TRUE;
1832 }
1833 break;
1834 case SQ_ALU_SCL_212:
1835 {
1836 int table[3] = {2, 1, 2};
1837 *pCycle = table[sel];
1838 return GL_TRUE;
1839 }
1840 break;
1841 case SQ_ALU_SCL_221:
1842 {
1843 int table[3] = {2, 2, 1};
1844 *pCycle = table[sel];
1845 return GL_TRUE;
1846 }
1847 break;
1848 default:
1849 radeon_error("Bad Scalar bank swizzle value\n");
1850 break;
1851 }
1852
1853 return GL_FALSE;
1854 }
1855
1856 GLboolean cycle_for_vector_bank_swizzle(const int swiz, const int sel, GLuint* pCycle)
1857 {
1858 switch (swiz)
1859 {
1860 case SQ_ALU_VEC_012:
1861 {
1862 int table[3] = {0, 1, 2};
1863 *pCycle = table[sel];
1864 }
1865 break;
1866 case SQ_ALU_VEC_021:
1867 {
1868 int table[3] = {0, 2, 1};
1869 *pCycle = table[sel];
1870 }
1871 break;
1872 case SQ_ALU_VEC_120:
1873 {
1874 int table[3] = {1, 2, 0};
1875 *pCycle = table[sel];
1876 }
1877 break;
1878 case SQ_ALU_VEC_102:
1879 {
1880 int table[3] = {1, 0, 2};
1881 *pCycle = table[sel];
1882 }
1883 break;
1884 case SQ_ALU_VEC_201:
1885 {
1886 int table[3] = {2, 0, 1};
1887 *pCycle = table[sel];
1888 }
1889 break;
1890 case SQ_ALU_VEC_210:
1891 {
1892 int table[3] = {2, 1, 0};
1893 *pCycle = table[sel];
1894 }
1895 break;
1896 default:
1897 radeon_error("Bad Vec bank swizzle value\n");
1898 return GL_FALSE;
1899 break;
1900 }
1901
1902 return GL_TRUE;
1903 }
1904
1905 GLboolean check_scalar(r700_AssemblerBase* pAsm,
1906 R700ALUInstruction* alu_instruction_ptr)
1907 {
1908 GLuint cycle;
1909 GLuint bank_swizzle;
1910 GLuint const_count = 0;
1911
1912 BITS sel;
1913 BITS chan;
1914 BITS rel;
1915 BITS neg;
1916
1917 GLuint src;
1918
1919 BITS src_sel [3] = {0,0,0};
1920 BITS src_chan[3] = {0,0,0};
1921 BITS src_rel [3] = {0,0,0};
1922 BITS src_neg [3] = {0,0,0};
1923
1924 GLuint swizzle_key;
1925
1926 GLuint number_of_operands = r700GetNumOperands(pAsm);
1927
1928 for (src=0; src<number_of_operands; src++)
1929 {
1930 get_src_properties(alu_instruction_ptr,
1931 src,
1932 &(src_sel[src]),
1933 &(src_rel[src]),
1934 &(src_chan[src]),
1935 &(src_neg[src]) );
1936 }
1937
1938
1939 swizzle_key = ( (is_const( src_sel[0] ) ? 4 : 0) +
1940 (is_const( src_sel[1] ) ? 2 : 0) +
1941 (is_const( src_sel[2] ) ? 1 : 0) );
1942
1943 alu_instruction_ptr->m_Word1.f.bank_swizzle = BANK_SWIZZLE_SCL[ swizzle_key ];
1944
1945 for (src=0; src<number_of_operands; src++)
1946 {
1947 sel = src_sel [src];
1948 chan = src_chan[src];
1949 rel = src_rel [src];
1950 neg = src_neg [src];
1951
1952 if (is_const( sel ))
1953 {
1954 // Any constant, including literal and inline constants
1955 const_count++;
1956
1957 if (is_cfile( sel ))
1958 {
1959 reserve_cfile(pAsm, sel, chan);
1960 }
1961
1962 }
1963 }
1964
1965 for (src=0; src<number_of_operands; src++)
1966 {
1967 sel = src_sel [src];
1968 chan = src_chan[src];
1969 rel = src_rel [src];
1970 neg = src_neg [src];
1971
1972 if( is_gpr(sel) )
1973 {
1974 bank_swizzle = alu_instruction_ptr->m_Word1.f.bank_swizzle;
1975
1976 if( GL_FALSE == cycle_for_scalar_bank_swizzle(bank_swizzle, src, &cycle) )
1977 {
1978 return GL_FALSE;
1979 }
1980
1981 if(cycle < const_count)
1982 {
1983 if( GL_FALSE == reserve_gpr(pAsm, sel, chan, cycle) )
1984 {
1985 return GL_FALSE;
1986 }
1987 }
1988 }
1989 }
1990
1991 return GL_TRUE;
1992 }
1993
1994 GLboolean check_vector(r700_AssemblerBase* pAsm,
1995 R700ALUInstruction* alu_instruction_ptr)
1996 {
1997 GLuint cycle;
1998 GLuint bank_swizzle;
1999 GLuint const_count = 0;
2000
2001 GLuint src;
2002
2003 BITS sel;
2004 BITS chan;
2005 BITS rel;
2006 BITS neg;
2007
2008 BITS src_sel [3] = {0,0,0};
2009 BITS src_chan[3] = {0,0,0};
2010 BITS src_rel [3] = {0,0,0};
2011 BITS src_neg [3] = {0,0,0};
2012
2013 GLuint swizzle_key;
2014
2015 GLuint number_of_operands = r700GetNumOperands(pAsm);
2016
2017 for (src=0; src<number_of_operands; src++)
2018 {
2019 get_src_properties(alu_instruction_ptr,
2020 src,
2021 &(src_sel[src]),
2022 &(src_rel[src]),
2023 &(src_chan[src]),
2024 &(src_neg[src]) );
2025 }
2026
2027
2028 swizzle_key = ( (is_const( src_sel[0] ) ? 4 : 0) +
2029 (is_const( src_sel[1] ) ? 2 : 0) +
2030 (is_const( src_sel[2] ) ? 1 : 0)
2031 );
2032
2033 alu_instruction_ptr->m_Word1.f.bank_swizzle = BANK_SWIZZLE_VEC[swizzle_key];
2034
2035 for (src=0; src<number_of_operands; src++)
2036 {
2037 sel = src_sel [src];
2038 chan = src_chan[src];
2039 rel = src_rel [src];
2040 neg = src_neg [src];
2041
2042
2043 bank_swizzle = alu_instruction_ptr->m_Word1.f.bank_swizzle;
2044
2045 if( is_gpr(sel) )
2046 {
2047 if( GL_FALSE == cycle_for_vector_bank_swizzle(bank_swizzle, src, &cycle) )
2048 {
2049 return GL_FALSE;
2050 }
2051
2052 if ( (src == 1) &&
2053 (sel == src_sel[0]) &&
2054 (chan == src_chan[0]) )
2055 {
2056 }
2057 else
2058 {
2059 if( GL_FALSE == reserve_gpr(pAsm, sel, chan, cycle) )
2060 {
2061 return GL_FALSE;
2062 }
2063 }
2064 }
2065 else if( is_const(sel) )
2066 {
2067 const_count++;
2068
2069 if( is_cfile(sel) )
2070 {
2071 if( GL_FALSE == reserve_cfile(pAsm, sel, chan) )
2072 {
2073 return GL_FALSE;
2074 }
2075 }
2076 }
2077 }
2078
2079 return GL_TRUE;
2080 }
2081
2082 GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm)
2083 {
2084 GLuint number_of_scalar_operations;
2085 GLboolean is_single_scalar_operation;
2086 GLuint scalar_channel_index;
2087
2088 PVSSRC * pcurrent_source;
2089 int current_source_index;
2090 GLuint contiguous_slots_needed;
2091
2092 GLuint uNumSrc = r700GetNumOperands(pAsm);
2093 //GLuint channel_swizzle, j;
2094 //GLuint chan_counter[4] = {0, 0, 0, 0};
2095 //PVSSRC * pSource[3];
2096 GLboolean bSplitInst = GL_FALSE;
2097
2098 if (1 == pAsm->D.dst.math)
2099 {
2100 is_single_scalar_operation = GL_TRUE;
2101 number_of_scalar_operations = 1;
2102 }
2103 else
2104 {
2105 is_single_scalar_operation = GL_FALSE;
2106 number_of_scalar_operations = 4;
2107
2108 /* current assembler doesn't do more than 1 register per source */
2109 #if 0
2110 /* check read port, only very preliminary algorithm, not count in
2111 src0/1 same comp case and prev slot repeat case; also not count relative
2112 addressing. TODO: improve performance. */
2113 for(j=0; j<uNumSrc; j++)
2114 {
2115 pSource[j] = &(pAsm->S[j].src);
2116 }
2117 for(scalar_channel_index=0; scalar_channel_index<4; scalar_channel_index++)
2118 {
2119 for(j=0; j<uNumSrc; j++)
2120 {
2121 switch (scalar_channel_index)
2122 {
2123 case 0: channel_swizzle = pSource[j]->swizzlex; break;
2124 case 1: channel_swizzle = pSource[j]->swizzley; break;
2125 case 2: channel_swizzle = pSource[j]->swizzlez; break;
2126 case 3: channel_swizzle = pSource[j]->swizzlew; break;
2127 default: channel_swizzle = SQ_SEL_MASK; break;
2128 }
2129 if ( ((pSource[j]->rtype == SRC_REG_TEMPORARY) ||
2130 (pSource[j]->rtype == SRC_REG_INPUT))
2131 && (channel_swizzle <= SQ_SEL_W) )
2132 {
2133 chan_counter[channel_swizzle]++;
2134 }
2135 }
2136 }
2137 if( (chan_counter[SQ_SEL_X] > 3)
2138 || (chan_counter[SQ_SEL_Y] > 3)
2139 || (chan_counter[SQ_SEL_Z] > 3)
2140 || (chan_counter[SQ_SEL_W] > 3) ) /* each chan bank has only 3 ports. */
2141 {
2142 bSplitInst = GL_TRUE;
2143 }
2144 #endif
2145 }
2146
2147 contiguous_slots_needed = 0;
2148
2149 if(GL_TRUE == is_reduction_opcode(&(pAsm->D)) )
2150 {
2151 contiguous_slots_needed = 4;
2152 }
2153
2154 initialize(pAsm);
2155
2156 for (scalar_channel_index=0;
2157 scalar_channel_index < number_of_scalar_operations;
2158 scalar_channel_index++)
2159 {
2160 R700ALUInstruction* alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction);
2161 if (alu_instruction_ptr == NULL)
2162 {
2163 return GL_FALSE;
2164 }
2165 Init_R700ALUInstruction(alu_instruction_ptr);
2166
2167 //src 0
2168 current_source_index = 0;
2169 pcurrent_source = &(pAsm->S[0].src);
2170
2171 if (GL_FALSE == assemble_alu_src(alu_instruction_ptr,
2172 current_source_index,
2173 pcurrent_source,
2174 scalar_channel_index) )
2175 {
2176 return GL_FALSE;
2177 }
2178
2179 if (uNumSrc > 1)
2180 {
2181 // Process source 1
2182 current_source_index = 1;
2183 pcurrent_source = &(pAsm->S[current_source_index].src);
2184
2185 if (GL_FALSE == assemble_alu_src(alu_instruction_ptr,
2186 current_source_index,
2187 pcurrent_source,
2188 scalar_channel_index) )
2189 {
2190 return GL_FALSE;
2191 }
2192 }
2193
2194 //other bits
2195 alu_instruction_ptr->m_Word0.f.index_mode = SQ_INDEX_AR_X;
2196
2197 if( (is_single_scalar_operation == GL_TRUE)
2198 || (GL_TRUE == bSplitInst) )
2199 {
2200 alu_instruction_ptr->m_Word0.f.last = 1;
2201 }
2202 else
2203 {
2204 alu_instruction_ptr->m_Word0.f.last = (scalar_channel_index == 3) ? 1 : 0;
2205 }
2206
2207 alu_instruction_ptr->m_Word0.f.pred_sel = 0x0;
2208 alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0;
2209 alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0;
2210
2211 // dst
2212 if( (pAsm->D.dst.rtype == DST_REG_TEMPORARY) ||
2213 (pAsm->D.dst.rtype == DST_REG_OUT) )
2214 {
2215 alu_instruction_ptr->m_Word1.f.dst_gpr = pAsm->D.dst.reg;
2216 }
2217 else
2218 {
2219 radeon_error("Only temp destination registers supported for ALU dest regs.\n");
2220 return GL_FALSE;
2221 }
2222
2223 alu_instruction_ptr->m_Word1.f.dst_rel = SQ_ABSOLUTE; //D.rtype
2224
2225 if ( is_single_scalar_operation == GL_TRUE )
2226 {
2227 // Override scalar_channel_index since only one scalar value will be written
2228 if(pAsm->D.dst.writex)
2229 {
2230 scalar_channel_index = 0;
2231 }
2232 else if(pAsm->D.dst.writey)
2233 {
2234 scalar_channel_index = 1;
2235 }
2236 else if(pAsm->D.dst.writez)
2237 {
2238 scalar_channel_index = 2;
2239 }
2240 else if(pAsm->D.dst.writew)
2241 {
2242 scalar_channel_index = 3;
2243 }
2244 }
2245
2246 alu_instruction_ptr->m_Word1.f.dst_chan = scalar_channel_index;
2247
2248 alu_instruction_ptr->m_Word1.f.clamp = pAsm->pILInst[pAsm->uiCurInst].SaturateMode;
2249
2250 if (pAsm->D.dst.op3)
2251 {
2252 //op3
2253
2254 alu_instruction_ptr->m_Word1_OP3.f.alu_inst = pAsm->D.dst.opcode;
2255
2256 //There's 3rd src for op3
2257 current_source_index = 2;
2258 pcurrent_source = &(pAsm->S[current_source_index].src);
2259
2260 if ( GL_FALSE == assemble_alu_src(alu_instruction_ptr,
2261 current_source_index,
2262 pcurrent_source,
2263 scalar_channel_index) )
2264 {
2265 return GL_FALSE;
2266 }
2267 }
2268 else
2269 {
2270 //op2
2271 if (pAsm->bR6xx)
2272 {
2273 alu_instruction_ptr->m_Word1_OP2.f6.alu_inst = pAsm->D.dst.opcode;
2274
2275 alu_instruction_ptr->m_Word1_OP2.f6.src0_abs = 0x0;
2276 alu_instruction_ptr->m_Word1_OP2.f6.src1_abs = 0x0;
2277
2278 //alu_instruction_ptr->m_Word1_OP2.f6.update_execute_mask = 0x0;
2279 //alu_instruction_ptr->m_Word1_OP2.f6.update_pred = 0x0;
2280 switch (scalar_channel_index)
2281 {
2282 case 0:
2283 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writex;
2284 break;
2285 case 1:
2286 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writey;
2287 break;
2288 case 2:
2289 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writez;
2290 break;
2291 case 3:
2292 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writew;
2293 break;
2294 default:
2295 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = 1; //SQ_SEL_MASK;
2296 break;
2297 }
2298 alu_instruction_ptr->m_Word1_OP2.f6.omod = SQ_ALU_OMOD_OFF;
2299 }
2300 else
2301 {
2302 alu_instruction_ptr->m_Word1_OP2.f.alu_inst = pAsm->D.dst.opcode;
2303
2304 alu_instruction_ptr->m_Word1_OP2.f.src0_abs = 0x0;
2305 alu_instruction_ptr->m_Word1_OP2.f.src1_abs = 0x0;
2306
2307 //alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0;
2308 //alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0;
2309 switch (scalar_channel_index)
2310 {
2311 case 0:
2312 alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writex;
2313 break;
2314 case 1:
2315 alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writey;
2316 break;
2317 case 2:
2318 alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writez;
2319 break;
2320 case 3:
2321 alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writew;
2322 break;
2323 default:
2324 alu_instruction_ptr->m_Word1_OP2.f.write_mask = 1; //SQ_SEL_MASK;
2325 break;
2326 }
2327 alu_instruction_ptr->m_Word1_OP2.f.omod = SQ_ALU_OMOD_OFF;
2328 }
2329 }
2330
2331 if(GL_FALSE == add_alu_instruction(pAsm, alu_instruction_ptr, contiguous_slots_needed) )
2332 {
2333 return GL_FALSE;
2334 }
2335
2336 /*
2337 * Judge the type of current instruction, is it vector or scalar
2338 * instruction.
2339 */
2340 if (is_single_scalar_operation)
2341 {
2342 if(GL_FALSE == check_scalar(pAsm, alu_instruction_ptr) )
2343 {
2344 return GL_FALSE;
2345 }
2346 }
2347 else
2348 {
2349 if(GL_FALSE == check_vector(pAsm, alu_instruction_ptr) )
2350 {
2351 return 1;
2352 }
2353 }
2354
2355 contiguous_slots_needed = 0;
2356 }
2357
2358 return GL_TRUE;
2359 }
2360
2361 GLboolean next_ins(r700_AssemblerBase *pAsm)
2362 {
2363 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
2364
2365 if( GL_TRUE == pAsm->is_tex )
2366 {
2367 if (pILInst->TexSrcTarget == TEXTURE_RECT_INDEX) {
2368 if( GL_FALSE == assemble_tex_instruction(pAsm, GL_FALSE) )
2369 {
2370 radeon_error("Error assembling TEX instruction\n");
2371 return GL_FALSE;
2372 }
2373 } else {
2374 if( GL_FALSE == assemble_tex_instruction(pAsm, GL_TRUE) )
2375 {
2376 radeon_error("Error assembling TEX instruction\n");
2377 return GL_FALSE;
2378 }
2379 }
2380 }
2381 else
2382 { //ALU
2383 if( GL_FALSE == assemble_alu_instruction(pAsm) )
2384 {
2385 radeon_error("Error assembling ALU instruction\n");
2386 return GL_FALSE;
2387 }
2388 }
2389
2390 if(pAsm->D.dst.rtype == DST_REG_OUT)
2391 {
2392 if(pAsm->D.dst.op3)
2393 {
2394 // There is no mask for OP3 instructions, so all channels are written
2395 pAsm->pucOutMask[pAsm->D.dst.reg - pAsm->starting_export_register_number] = 0xF;
2396 }
2397 else
2398 {
2399 pAsm->pucOutMask[pAsm->D.dst.reg - pAsm->starting_export_register_number]
2400 |= (unsigned char)pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask;
2401 }
2402 }
2403
2404 //reset for next inst.
2405 pAsm->D.bits = 0;
2406 pAsm->S[0].bits = 0;
2407 pAsm->S[1].bits = 0;
2408 pAsm->S[2].bits = 0;
2409 pAsm->is_tex = GL_FALSE;
2410 pAsm->need_tex_barrier = GL_FALSE;
2411 return GL_TRUE;
2412 }
2413
2414 GLboolean assemble_math_function(r700_AssemblerBase* pAsm, BITS opcode)
2415 {
2416 BITS tmp;
2417
2418 checkop1(pAsm);
2419
2420 tmp = gethelpr(pAsm);
2421
2422 // opcode tmp.x, a.x
2423 // MOV dst, tmp.x
2424
2425 pAsm->D.dst.opcode = opcode;
2426 pAsm->D.dst.math = 1;
2427
2428 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
2429 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
2430 pAsm->D.dst.reg = tmp;
2431 pAsm->D.dst.writex = 1;
2432
2433 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
2434 {
2435 return GL_FALSE;
2436 }
2437
2438 if ( GL_FALSE == next_ins(pAsm) )
2439 {
2440 return GL_FALSE;
2441 }
2442
2443 // Now replicate result to all necessary channels in destination
2444 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
2445
2446 if( GL_FALSE == assemble_dst(pAsm) )
2447 {
2448 return GL_FALSE;
2449 }
2450
2451 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
2452 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
2453 pAsm->S[0].src.reg = tmp;
2454
2455 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
2456 noneg_PVSSRC(&(pAsm->S[0].src));
2457
2458 if( GL_FALSE == next_ins(pAsm) )
2459 {
2460 return GL_FALSE;
2461 }
2462
2463 return GL_TRUE;
2464 }
2465
2466 GLboolean assemble_ABS(r700_AssemblerBase *pAsm)
2467 {
2468 checkop1(pAsm);
2469
2470 pAsm->D.dst.opcode = SQ_OP2_INST_MAX;
2471
2472 if( GL_FALSE == assemble_dst(pAsm) )
2473 {
2474 return GL_FALSE;
2475 }
2476 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
2477 {
2478 return GL_FALSE;
2479 }
2480
2481 pAsm->S[1].bits = pAsm->S[0].bits;
2482 flipneg_PVSSRC(&(pAsm->S[1].src));
2483
2484 if ( GL_FALSE == next_ins(pAsm) )
2485 {
2486 return GL_FALSE;
2487 }
2488
2489 return GL_TRUE;
2490 }
2491
2492 GLboolean assemble_ADD(r700_AssemblerBase *pAsm)
2493 {
2494 if( GL_FALSE == checkop2(pAsm) )
2495 {
2496 return GL_FALSE;
2497 }
2498
2499 pAsm->D.dst.opcode = SQ_OP2_INST_ADD;
2500
2501 if( GL_FALSE == assemble_dst(pAsm) )
2502 {
2503 return GL_FALSE;
2504 }
2505
2506 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
2507 {
2508 return GL_FALSE;
2509 }
2510
2511 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
2512 {
2513 return GL_FALSE;
2514 }
2515
2516 if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_SUB)
2517 {
2518 flipneg_PVSSRC(&(pAsm->S[1].src));
2519 }
2520
2521 if( GL_FALSE == next_ins(pAsm) )
2522 {
2523 return GL_FALSE;
2524 }
2525
2526 return GL_TRUE;
2527 }
2528
2529 GLboolean assemble_ARL(r700_AssemblerBase *pAsm)
2530 { /* TODO: ar values dont' persist between clauses */
2531 if( GL_FALSE == checkop1(pAsm) )
2532 {
2533 return GL_FALSE;
2534 }
2535
2536 pAsm->D.dst.opcode = SQ_OP2_INST_MOVA_FLOOR;
2537 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
2538 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
2539 pAsm->D.dst.reg = 0;
2540 pAsm->D.dst.writex = 0;
2541 pAsm->D.dst.writey = 0;
2542 pAsm->D.dst.writez = 0;
2543 pAsm->D.dst.writew = 0;
2544
2545 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
2546 {
2547 return GL_FALSE;
2548 }
2549
2550 if( GL_FALSE == next_ins(pAsm) )
2551 {
2552 return GL_FALSE;
2553 }
2554
2555 return GL_TRUE;
2556 }
2557
2558 GLboolean assemble_BAD(char *opcode_str)
2559 {
2560 radeon_error("Not yet implemented instruction (%s)\n", opcode_str);
2561 return GL_FALSE;
2562 }
2563
2564 GLboolean assemble_CMP(r700_AssemblerBase *pAsm)
2565 {
2566 int tmp;
2567
2568 if( GL_FALSE == checkop3(pAsm) )
2569 {
2570 return GL_FALSE;
2571 }
2572
2573 pAsm->D.dst.opcode = SQ_OP3_INST_CNDGE;
2574 pAsm->D.dst.op3 = 1;
2575
2576 tmp = (-1);
2577
2578 if(0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask)
2579 {
2580 //OP3 has no support for write mask
2581 tmp = gethelpr(pAsm);
2582
2583 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
2584 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
2585 pAsm->D.dst.reg = tmp;
2586
2587 nomask_PVSDST(&(pAsm->D.dst));
2588 }
2589 else
2590 {
2591 if( GL_FALSE == assemble_dst(pAsm) )
2592 {
2593 return GL_FALSE;
2594 }
2595 }
2596
2597 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
2598 {
2599 return GL_FALSE;
2600 }
2601
2602 if( GL_FALSE == assemble_src(pAsm, 2, 1) )
2603 {
2604 return GL_FALSE;
2605 }
2606
2607 if( GL_FALSE == assemble_src(pAsm, 1, 2) )
2608 {
2609 return GL_FALSE;
2610 }
2611
2612 if ( GL_FALSE == next_ins(pAsm) )
2613 {
2614 return GL_FALSE;
2615 }
2616
2617 if (0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask)
2618 {
2619 if( GL_FALSE == assemble_dst(pAsm) )
2620 {
2621 return GL_FALSE;
2622 }
2623
2624 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
2625
2626 //tmp for source
2627 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
2628 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
2629 pAsm->S[0].src.reg = tmp;
2630
2631 noneg_PVSSRC(&(pAsm->S[0].src));
2632 noswizzle_PVSSRC(&(pAsm->S[0].src));
2633
2634 if( GL_FALSE == next_ins(pAsm) )
2635 {
2636 return GL_FALSE;
2637 }
2638 }
2639
2640 return GL_TRUE;
2641 }
2642
2643 GLboolean assemble_COS(r700_AssemblerBase *pAsm)
2644 {
2645 return assemble_math_function(pAsm, SQ_OP2_INST_COS);
2646 }
2647
2648 GLboolean assemble_DOT(r700_AssemblerBase *pAsm)
2649 {
2650 if( GL_FALSE == checkop2(pAsm) )
2651 {
2652 return GL_FALSE;
2653 }
2654
2655 pAsm->D.dst.opcode = SQ_OP2_INST_DOT4;
2656
2657 if( GL_FALSE == assemble_dst(pAsm) )
2658 {
2659 return GL_FALSE;
2660 }
2661
2662 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
2663 {
2664 return GL_FALSE;
2665 }
2666
2667 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
2668 {
2669 return GL_FALSE;
2670 }
2671
2672 if(OPCODE_DP3 == pAsm->pILInst[pAsm->uiCurInst].Opcode)
2673 {
2674 zerocomp_PVSSRC(&(pAsm->S[0].src), 3);
2675 zerocomp_PVSSRC(&(pAsm->S[1].src), 3);
2676 }
2677 else if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_DPH)
2678 {
2679 onecomp_PVSSRC(&(pAsm->S[0].src), 3);
2680 }
2681
2682 if ( GL_FALSE == next_ins(pAsm) )
2683 {
2684 return GL_FALSE;
2685 }
2686
2687 return GL_TRUE;
2688 }
2689
2690 GLboolean assemble_DST(r700_AssemblerBase *pAsm)
2691 {
2692 if( GL_FALSE == checkop2(pAsm) )
2693 {
2694 return GL_FALSE;
2695 }
2696
2697 pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
2698
2699 if( GL_FALSE == assemble_dst(pAsm) )
2700 {
2701 return GL_FALSE;
2702 }
2703
2704 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
2705 {
2706 return GL_FALSE;
2707 }
2708
2709 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
2710 {
2711 return GL_FALSE;
2712 }
2713
2714 onecomp_PVSSRC(&(pAsm->S[0].src), 0);
2715 onecomp_PVSSRC(&(pAsm->S[0].src), 3);
2716
2717 onecomp_PVSSRC(&(pAsm->S[1].src), 0);
2718 onecomp_PVSSRC(&(pAsm->S[1].src), 2);
2719
2720 if ( GL_FALSE == next_ins(pAsm) )
2721 {
2722 return GL_FALSE;
2723 }
2724
2725 return GL_TRUE;
2726 }
2727
2728 GLboolean assemble_EX2(r700_AssemblerBase *pAsm)
2729 {
2730 return assemble_math_function(pAsm, SQ_OP2_INST_EXP_IEEE);
2731 }
2732
2733 GLboolean assemble_EXP(r700_AssemblerBase *pAsm)
2734 {
2735 BITS tmp;
2736
2737 checkop1(pAsm);
2738
2739 tmp = gethelpr(pAsm);
2740
2741 // FLOOR tmp.x, a.x
2742 // EX2 dst.x tmp.x
2743
2744 if (pAsm->pILInst->DstReg.WriteMask & 0x1) {
2745 pAsm->D.dst.opcode = SQ_OP2_INST_FLOOR;
2746
2747 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
2748 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
2749 pAsm->D.dst.reg = tmp;
2750 pAsm->D.dst.writex = 1;
2751
2752 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
2753 {
2754 return GL_FALSE;
2755 }
2756
2757 if( GL_FALSE == next_ins(pAsm) )
2758 {
2759 return GL_FALSE;
2760 }
2761
2762 pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
2763 pAsm->D.dst.math = 1;
2764
2765 if( GL_FALSE == assemble_dst(pAsm) )
2766 {
2767 return GL_FALSE;
2768 }
2769
2770 pAsm->D.dst.writey = pAsm->D.dst.writez = pAsm->D.dst.writew = 0;
2771
2772 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
2773 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
2774 pAsm->S[0].src.reg = tmp;
2775
2776 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
2777 noneg_PVSSRC(&(pAsm->S[0].src));
2778
2779 if( GL_FALSE == next_ins(pAsm) )
2780 {
2781 return GL_FALSE;
2782 }
2783 }
2784
2785 // FRACT dst.y a.x
2786
2787 if ((pAsm->pILInst->DstReg.WriteMask >> 1) & 0x1) {
2788 pAsm->D.dst.opcode = SQ_OP2_INST_FRACT;
2789
2790 if( GL_FALSE == assemble_dst(pAsm) )
2791 {
2792 return GL_FALSE;
2793 }
2794
2795 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
2796 {
2797 return GL_FALSE;
2798 }
2799
2800 pAsm->D.dst.writex = pAsm->D.dst.writez = pAsm->D.dst.writew = 0;
2801
2802 if( GL_FALSE == next_ins(pAsm) )
2803 {
2804 return GL_FALSE;
2805 }
2806 }
2807
2808 // EX2 dst.z, a.x
2809
2810 if ((pAsm->pILInst->DstReg.WriteMask >> 2) & 0x1) {
2811 pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
2812 pAsm->D.dst.math = 1;
2813
2814 if( GL_FALSE == assemble_dst(pAsm) )
2815 {
2816 return GL_FALSE;
2817 }
2818
2819 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
2820 {
2821 return GL_FALSE;
2822 }
2823
2824 pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writew = 0;
2825
2826 if( GL_FALSE == next_ins(pAsm) )
2827 {
2828 return GL_FALSE;
2829 }
2830 }
2831
2832 // MOV dst.w 1.0
2833
2834 if ((pAsm->pILInst->DstReg.WriteMask >> 3) & 0x1) {
2835 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
2836
2837 if( GL_FALSE == assemble_dst(pAsm) )
2838 {
2839 return GL_FALSE;
2840 }
2841
2842 pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writez = 0;
2843
2844 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
2845 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
2846 pAsm->S[0].src.reg = tmp;
2847
2848 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_1);
2849 noneg_PVSSRC(&(pAsm->S[0].src));
2850
2851 if( GL_FALSE == next_ins(pAsm) )
2852 {
2853 return GL_FALSE;
2854 }
2855 }
2856
2857 return GL_TRUE;
2858 }
2859
2860 GLboolean assemble_FLR(r700_AssemblerBase *pAsm)
2861 {
2862 checkop1(pAsm);
2863
2864 pAsm->D.dst.opcode = SQ_OP2_INST_FLOOR;
2865
2866 if ( GL_FALSE == assemble_dst(pAsm) )
2867 {
2868 return GL_FALSE;
2869 }
2870
2871 if ( GL_FALSE == assemble_src(pAsm, 0, -1) )
2872 {
2873 return GL_FALSE;
2874 }
2875
2876 if ( GL_FALSE == next_ins(pAsm) )
2877 {
2878 return GL_FALSE;
2879 }
2880
2881 return GL_TRUE;
2882 }
2883
2884 GLboolean assemble_FLR_INT(r700_AssemblerBase *pAsm)
2885 {
2886 return assemble_math_function(pAsm, SQ_OP2_INST_FLT_TO_INT);
2887 }
2888
2889 GLboolean assemble_FRC(r700_AssemblerBase *pAsm)
2890 {
2891 checkop1(pAsm);
2892
2893 pAsm->D.dst.opcode = SQ_OP2_INST_FRACT;
2894
2895 if ( GL_FALSE == assemble_dst(pAsm) )
2896 {
2897 return GL_FALSE;
2898 }
2899
2900 if ( GL_FALSE == assemble_src(pAsm, 0, -1) )
2901 {
2902 return GL_FALSE;
2903 }
2904
2905 if ( GL_FALSE == next_ins(pAsm) )
2906 {
2907 return GL_FALSE;
2908 }
2909
2910 return GL_TRUE;
2911 }
2912
2913 GLboolean assemble_KIL(r700_AssemblerBase *pAsm)
2914 {
2915 /* TODO: doc says KILL has to be last(end) ALU clause */
2916
2917 checkop1(pAsm);
2918
2919 pAsm->D.dst.opcode = SQ_OP2_INST_KILLGT;
2920
2921 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
2922 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
2923 pAsm->D.dst.reg = 0;
2924 pAsm->D.dst.writex = 0;
2925 pAsm->D.dst.writey = 0;
2926 pAsm->D.dst.writez = 0;
2927 pAsm->D.dst.writew = 0;
2928
2929 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
2930 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
2931 pAsm->S[0].src.reg = 0;
2932
2933 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_0);
2934 noneg_PVSSRC(&(pAsm->S[0].src));
2935
2936 if ( GL_FALSE == assemble_src(pAsm, 0, 1) )
2937 {
2938 return GL_FALSE;
2939 }
2940
2941 if ( GL_FALSE == next_ins(pAsm) )
2942 {
2943 return GL_FALSE;
2944 }
2945
2946 pAsm->pR700Shader->killIsUsed = GL_TRUE;
2947
2948 return GL_TRUE;
2949 }
2950
2951 GLboolean assemble_LG2(r700_AssemblerBase *pAsm)
2952 {
2953 return assemble_math_function(pAsm, SQ_OP2_INST_LOG_IEEE);
2954 }
2955
2956 GLboolean assemble_LRP(r700_AssemblerBase *pAsm)
2957 {
2958 BITS tmp;
2959
2960 if( GL_FALSE == checkop3(pAsm) )
2961 {
2962 return GL_FALSE;
2963 }
2964
2965 tmp = gethelpr(pAsm);
2966
2967 pAsm->D.dst.opcode = SQ_OP2_INST_ADD;
2968
2969 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
2970 pAsm->D.dst.reg = tmp;
2971 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
2972 nomask_PVSDST(&(pAsm->D.dst));
2973
2974
2975 if( GL_FALSE == assemble_src(pAsm, 1, 0) )
2976 {
2977 return GL_FALSE;
2978 }
2979
2980 if ( GL_FALSE == assemble_src(pAsm, 2, 1) )
2981 {
2982 return GL_FALSE;
2983 }
2984
2985 neg_PVSSRC(&(pAsm->S[1].src));
2986
2987 if( GL_FALSE == next_ins(pAsm) )
2988 {
2989 return GL_FALSE;
2990 }
2991
2992 pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
2993 pAsm->D.dst.op3 = 1;
2994
2995 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
2996 pAsm->D.dst.reg = tmp;
2997 nomask_PVSDST(&(pAsm->D.dst));
2998 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
2999
3000 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3001 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3002 pAsm->S[0].src.reg = tmp;
3003 noswizzle_PVSSRC(&(pAsm->S[0].src));
3004
3005
3006 if( GL_FALSE == assemble_src(pAsm, 0, 1) )
3007 {
3008 return GL_FALSE;
3009 }
3010 if( GL_FALSE == assemble_src(pAsm, 2, -1) )
3011 {
3012 return GL_FALSE;
3013 }
3014
3015 if( GL_FALSE == next_ins(pAsm) )
3016 {
3017 return GL_FALSE;
3018 }
3019
3020 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3021
3022 if( GL_FALSE == assemble_dst(pAsm) )
3023 {
3024 return GL_FALSE;
3025 }
3026
3027 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3028 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3029 pAsm->S[0].src.reg = tmp;
3030 noswizzle_PVSSRC(&(pAsm->S[0].src));
3031
3032 if( GL_FALSE == next_ins(pAsm) )
3033 {
3034 return GL_FALSE;
3035 }
3036
3037 return GL_TRUE;
3038 }
3039
3040 GLboolean assemble_LOG(r700_AssemblerBase *pAsm)
3041 {
3042 BITS tmp1, tmp2, tmp3;
3043
3044 checkop1(pAsm);
3045
3046 tmp1 = gethelpr(pAsm);
3047 tmp2 = gethelpr(pAsm);
3048 tmp3 = gethelpr(pAsm);
3049
3050 // FIXME: The hardware can do fabs() directly on input
3051 // elements, but the compiler doesn't have the
3052 // capability to use that.
3053
3054 // MAX tmp1.x, a.x, -a.x (fabs(a.x))
3055
3056 pAsm->D.dst.opcode = SQ_OP2_INST_MAX;
3057
3058 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3059 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3060 pAsm->D.dst.reg = tmp1;
3061 pAsm->D.dst.writex = 1;
3062
3063 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3064 {
3065 return GL_FALSE;
3066 }
3067
3068 pAsm->S[1].bits = pAsm->S[0].bits;
3069 flipneg_PVSSRC(&(pAsm->S[1].src));
3070
3071 if ( GL_FALSE == next_ins(pAsm) )
3072 {
3073 return GL_FALSE;
3074 }
3075
3076 // Entire algo:
3077 //
3078 // LG2 tmp2.x, tmp1.x
3079 // FLOOR tmp3.x, tmp2.x
3080 // MOV dst.x, tmp3.x
3081 // ADD tmp3.x, tmp2.x, -tmp3.x
3082 // EX2 dst.y, tmp3.x
3083 // MOV dst.z, tmp2.x
3084 // MOV dst.w, 1.0
3085
3086 // LG2 tmp2.x, tmp1.x
3087 // FLOOR tmp3.x, tmp2.x
3088
3089 pAsm->D.dst.opcode = SQ_OP2_INST_LOG_IEEE;
3090 pAsm->D.dst.math = 1;
3091
3092 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3093 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3094 pAsm->D.dst.reg = tmp2;
3095 pAsm->D.dst.writex = 1;
3096
3097 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3098 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
3099 pAsm->S[0].src.reg = tmp1;
3100
3101 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3102 noneg_PVSSRC(&(pAsm->S[0].src));
3103
3104 if( GL_FALSE == next_ins(pAsm) )
3105 {
3106 return GL_FALSE;
3107 }
3108
3109 pAsm->D.dst.opcode = SQ_OP2_INST_FLOOR;
3110
3111 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3112 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3113 pAsm->D.dst.reg = tmp3;
3114 pAsm->D.dst.writex = 1;
3115
3116 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3117 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
3118 pAsm->S[0].src.reg = tmp2;
3119
3120 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3121 noneg_PVSSRC(&(pAsm->S[0].src));
3122
3123 if( GL_FALSE == next_ins(pAsm) )
3124 {
3125 return GL_FALSE;
3126 }
3127
3128 // MOV dst.x, tmp3.x
3129
3130 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3131
3132 if( GL_FALSE == assemble_dst(pAsm) )
3133 {
3134 return GL_FALSE;
3135 }
3136
3137 pAsm->D.dst.writey = pAsm->D.dst.writez = pAsm->D.dst.writew = 0;
3138
3139 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3140 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
3141 pAsm->S[0].src.reg = tmp3;
3142
3143 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3144 noneg_PVSSRC(&(pAsm->S[0].src));
3145
3146 if( GL_FALSE == next_ins(pAsm) )
3147 {
3148 return GL_FALSE;
3149 }
3150
3151 // ADD tmp3.x, tmp2.x, -tmp3.x
3152 // EX2 dst.y, tmp3.x
3153
3154 pAsm->D.dst.opcode = SQ_OP2_INST_ADD;
3155
3156 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3157 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3158 pAsm->D.dst.reg = tmp3;
3159 pAsm->D.dst.writex = 1;
3160
3161 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3162 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
3163 pAsm->S[0].src.reg = tmp2;
3164
3165 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3166 noneg_PVSSRC(&(pAsm->S[0].src));
3167
3168 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
3169 pAsm->S[1].src.rtype = DST_REG_TEMPORARY;
3170 pAsm->S[1].src.reg = tmp3;
3171
3172 setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X);
3173 neg_PVSSRC(&(pAsm->S[1].src));
3174
3175 if( GL_FALSE == next_ins(pAsm) )
3176 {
3177 return GL_FALSE;
3178 }
3179
3180 pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
3181 pAsm->D.dst.math = 1;
3182
3183 if( GL_FALSE == assemble_dst(pAsm) )
3184 {
3185 return GL_FALSE;
3186 }
3187
3188 pAsm->D.dst.writex = pAsm->D.dst.writez = pAsm->D.dst.writew = 0;
3189
3190 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3191 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
3192 pAsm->S[0].src.reg = tmp3;
3193
3194 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3195 noneg_PVSSRC(&(pAsm->S[0].src));
3196
3197 if( GL_FALSE == next_ins(pAsm) )
3198 {
3199 return GL_FALSE;
3200 }
3201
3202 // MOV dst.z, tmp2.x
3203
3204 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3205
3206 if( GL_FALSE == assemble_dst(pAsm) )
3207 {
3208 return GL_FALSE;
3209 }
3210
3211 pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writew = 0;
3212
3213 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3214 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
3215 pAsm->S[0].src.reg = tmp2;
3216
3217 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3218 noneg_PVSSRC(&(pAsm->S[0].src));
3219
3220 if( GL_FALSE == next_ins(pAsm) )
3221 {
3222 return GL_FALSE;
3223 }
3224
3225 // MOV dst.w 1.0
3226
3227 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3228
3229 if( GL_FALSE == assemble_dst(pAsm) )
3230 {
3231 return GL_FALSE;
3232 }
3233
3234 pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writez = 0;
3235
3236 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3237 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3238 pAsm->S[0].src.reg = tmp1;
3239
3240 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_1);
3241 noneg_PVSSRC(&(pAsm->S[0].src));
3242
3243 if( GL_FALSE == next_ins(pAsm) )
3244 {
3245 return GL_FALSE;
3246 }
3247
3248 return GL_TRUE;
3249 }
3250
3251 GLboolean assemble_MAD(struct r700_AssemblerBase *pAsm)
3252 {
3253 int tmp, ii;
3254 GLboolean bReplaceDst = GL_FALSE;
3255 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
3256
3257 if( GL_FALSE == checkop3(pAsm) )
3258 {
3259 return GL_FALSE;
3260 }
3261
3262 pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
3263 pAsm->D.dst.op3 = 1;
3264
3265 tmp = (-1);
3266
3267 if(PROGRAM_TEMPORARY == pILInst->DstReg.File)
3268 { /* TODO : more investigation on MAD src and dst using same register */
3269 for(ii=0; ii<3; ii++)
3270 {
3271 if( (PROGRAM_TEMPORARY == pILInst->SrcReg[ii].File)
3272 && (pILInst->DstReg.Index == pILInst->SrcReg[ii].Index) )
3273 {
3274 bReplaceDst = GL_TRUE;
3275 break;
3276 }
3277 }
3278 }
3279 if(0xF != pILInst->DstReg.WriteMask)
3280 { /* OP3 has no support for write mask */
3281 bReplaceDst = GL_TRUE;
3282 }
3283
3284 if(GL_TRUE == bReplaceDst)
3285 {
3286 tmp = gethelpr(pAsm);
3287
3288 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3289 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3290 pAsm->D.dst.reg = tmp;
3291
3292 nomask_PVSDST(&(pAsm->D.dst));
3293 }
3294 else
3295 {
3296 if( GL_FALSE == assemble_dst(pAsm) )
3297 {
3298 return GL_FALSE;
3299 }
3300 }
3301
3302 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3303 {
3304 return GL_FALSE;
3305 }
3306
3307 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
3308 {
3309 return GL_FALSE;
3310 }
3311
3312 if( GL_FALSE == assemble_src(pAsm, 2, -1) )
3313 {
3314 return GL_FALSE;
3315 }
3316
3317 if ( GL_FALSE == next_ins(pAsm) )
3318 {
3319 return GL_FALSE;
3320 }
3321
3322 if (GL_TRUE == bReplaceDst)
3323 {
3324 if( GL_FALSE == assemble_dst(pAsm) )
3325 {
3326 return GL_FALSE;
3327 }
3328
3329 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3330
3331 //tmp for source
3332 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3333 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3334 pAsm->S[0].src.reg = tmp;
3335
3336 noneg_PVSSRC(&(pAsm->S[0].src));
3337 noswizzle_PVSSRC(&(pAsm->S[0].src));
3338
3339 if( GL_FALSE == next_ins(pAsm) )
3340 {
3341 return GL_FALSE;
3342 }
3343 }
3344
3345 return GL_TRUE;
3346 }
3347
3348 /* LIT dst, src */
3349 GLboolean assemble_LIT(r700_AssemblerBase *pAsm)
3350 {
3351 unsigned int dstReg;
3352 unsigned int dstType;
3353 unsigned int srcReg;
3354 unsigned int srcType;
3355 checkop1(pAsm);
3356 int tmp = gethelpr(pAsm);
3357
3358 if( GL_FALSE == assemble_dst(pAsm) )
3359 {
3360 return GL_FALSE;
3361 }
3362 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3363 {
3364 return GL_FALSE;
3365 }
3366 dstReg = pAsm->D.dst.reg;
3367 dstType = pAsm->D.dst.rtype;
3368 srcReg = pAsm->S[0].src.reg;
3369 srcType = pAsm->S[0].src.rtype;
3370
3371 /* dst.xw, <- 1.0 */
3372 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3373 pAsm->D.dst.rtype = dstType;
3374 pAsm->D.dst.reg = dstReg;
3375 pAsm->D.dst.writex = 1;
3376 pAsm->D.dst.writey = 0;
3377 pAsm->D.dst.writez = 0;
3378 pAsm->D.dst.writew = 1;
3379 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3380 pAsm->S[0].src.reg = tmp;
3381 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3382 noneg_PVSSRC(&(pAsm->S[0].src));
3383 pAsm->S[0].src.swizzlex = SQ_SEL_1;
3384 pAsm->S[0].src.swizzley = SQ_SEL_1;
3385 pAsm->S[0].src.swizzlez = SQ_SEL_1;
3386 pAsm->S[0].src.swizzlew = SQ_SEL_1;
3387 if( GL_FALSE == next_ins(pAsm) )
3388 {
3389 return GL_FALSE;
3390 }
3391
3392 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3393 {
3394 return GL_FALSE;
3395 }
3396
3397 /* dst.y = max(src.x, 0.0) */
3398 pAsm->D.dst.opcode = SQ_OP2_INST_MAX;
3399 pAsm->D.dst.rtype = dstType;
3400 pAsm->D.dst.reg = dstReg;
3401 pAsm->D.dst.writex = 0;
3402 pAsm->D.dst.writey = 1;
3403 pAsm->D.dst.writez = 0;
3404 pAsm->D.dst.writew = 0;
3405 pAsm->S[0].src.rtype = srcType;
3406 pAsm->S[0].src.reg = srcReg;
3407 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3408 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X, SQ_SEL_X, SQ_SEL_X, SQ_SEL_X);
3409 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
3410 pAsm->S[1].src.reg = tmp;
3411 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
3412 noneg_PVSSRC(&(pAsm->S[1].src));
3413 pAsm->S[1].src.swizzlex = SQ_SEL_0;
3414 pAsm->S[1].src.swizzley = SQ_SEL_0;
3415 pAsm->S[1].src.swizzlez = SQ_SEL_0;
3416 pAsm->S[1].src.swizzlew = SQ_SEL_0;
3417 if( GL_FALSE == next_ins(pAsm) )
3418 {
3419 return GL_FALSE;
3420 }
3421
3422 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3423 {
3424 return GL_FALSE;
3425 }
3426
3427 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Y, SQ_SEL_Y, SQ_SEL_Y, SQ_SEL_Y);
3428
3429 /* dst.z = log(src.y) */
3430 pAsm->D.dst.opcode = SQ_OP2_INST_LOG_CLAMPED;
3431 pAsm->D.dst.math = 1;
3432 pAsm->D.dst.rtype = dstType;
3433 pAsm->D.dst.reg = dstReg;
3434 pAsm->D.dst.writex = 0;
3435 pAsm->D.dst.writey = 0;
3436 pAsm->D.dst.writez = 1;
3437 pAsm->D.dst.writew = 0;
3438 pAsm->S[0].src.rtype = srcType;
3439 pAsm->S[0].src.reg = srcReg;
3440 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3441 if( GL_FALSE == next_ins(pAsm) )
3442 {
3443 return GL_FALSE;
3444 }
3445
3446 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3447 {
3448 return GL_FALSE;
3449 }
3450
3451 if( GL_FALSE == assemble_src(pAsm, 0, 2) )
3452 {
3453 return GL_FALSE;
3454 }
3455
3456 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_W, SQ_SEL_W, SQ_SEL_W, SQ_SEL_W);
3457
3458 swizzleagain_PVSSRC(&(pAsm->S[2].src), SQ_SEL_X, SQ_SEL_X, SQ_SEL_X, SQ_SEL_X);
3459
3460 /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
3461 pAsm->D.dst.opcode = SQ_OP3_INST_MUL_LIT;
3462 pAsm->D.dst.math = 1;
3463 pAsm->D.dst.op3 = 1;
3464 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3465 pAsm->D.dst.reg = tmp;
3466 pAsm->D.dst.writex = 1;
3467 pAsm->D.dst.writey = 0;
3468 pAsm->D.dst.writez = 0;
3469 pAsm->D.dst.writew = 0;
3470
3471 pAsm->S[0].src.rtype = srcType;
3472 pAsm->S[0].src.reg = srcReg;
3473 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3474
3475 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
3476 pAsm->S[1].src.reg = dstReg;
3477 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
3478 noneg_PVSSRC(&(pAsm->S[1].src));
3479 pAsm->S[1].src.swizzlex = SQ_SEL_Z;
3480 pAsm->S[1].src.swizzley = SQ_SEL_Z;
3481 pAsm->S[1].src.swizzlez = SQ_SEL_Z;
3482 pAsm->S[1].src.swizzlew = SQ_SEL_Z;
3483
3484 pAsm->S[2].src.rtype = srcType;
3485 pAsm->S[2].src.reg = srcReg;
3486 setaddrmode_PVSSRC(&(pAsm->S[2].src), ADDR_ABSOLUTE);
3487
3488 if( GL_FALSE == next_ins(pAsm) )
3489 {
3490 return GL_FALSE;
3491 }
3492
3493 /* dst.z = exp(tmp.x) */
3494 pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
3495 pAsm->D.dst.math = 1;
3496 pAsm->D.dst.rtype = dstType;
3497 pAsm->D.dst.reg = dstReg;
3498 pAsm->D.dst.writex = 0;
3499 pAsm->D.dst.writey = 0;
3500 pAsm->D.dst.writez = 1;
3501 pAsm->D.dst.writew = 0;
3502
3503 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3504 pAsm->S[0].src.reg = tmp;
3505 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3506 noneg_PVSSRC(&(pAsm->S[0].src));
3507 pAsm->S[0].src.swizzlex = SQ_SEL_X;
3508 pAsm->S[0].src.swizzley = SQ_SEL_X;
3509 pAsm->S[0].src.swizzlez = SQ_SEL_X;
3510 pAsm->S[0].src.swizzlew = SQ_SEL_X;
3511
3512 if( GL_FALSE == next_ins(pAsm) )
3513 {
3514 return GL_FALSE;
3515 }
3516
3517 return GL_TRUE;
3518 }
3519
3520 GLboolean assemble_MAX(r700_AssemblerBase *pAsm)
3521 {
3522 if( GL_FALSE == checkop2(pAsm) )
3523 {
3524 return GL_FALSE;
3525 }
3526
3527 pAsm->D.dst.opcode = SQ_OP2_INST_MAX;
3528
3529 if( GL_FALSE == assemble_dst(pAsm) )
3530 {
3531 return GL_FALSE;
3532 }
3533
3534 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3535 {
3536 return GL_FALSE;
3537 }
3538
3539 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
3540 {
3541 return GL_FALSE;
3542 }
3543
3544 if( GL_FALSE == next_ins(pAsm) )
3545 {
3546 return GL_FALSE;
3547 }
3548
3549 return GL_TRUE;
3550 }
3551
3552 GLboolean assemble_MIN(r700_AssemblerBase *pAsm)
3553 {
3554 if( GL_FALSE == checkop2(pAsm) )
3555 {
3556 return GL_FALSE;
3557 }
3558
3559 pAsm->D.dst.opcode = SQ_OP2_INST_MIN;
3560
3561 if( GL_FALSE == assemble_dst(pAsm) )
3562 {
3563 return GL_FALSE;
3564 }
3565
3566 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3567 {
3568 return GL_FALSE;
3569 }
3570
3571 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
3572 {
3573 return GL_FALSE;
3574 }
3575
3576 if( GL_FALSE == next_ins(pAsm) )
3577 {
3578 return GL_FALSE;
3579 }
3580
3581 return GL_TRUE;
3582 }
3583
3584 GLboolean assemble_MOV(r700_AssemblerBase *pAsm)
3585 {
3586 checkop1(pAsm);
3587
3588 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3589
3590 if (GL_FALSE == assemble_dst(pAsm))
3591 {
3592 return GL_FALSE;
3593 }
3594
3595 if (GL_FALSE == assemble_src(pAsm, 0, -1))
3596 {
3597 return GL_FALSE;
3598 }
3599
3600 if ( GL_FALSE == next_ins(pAsm) )
3601 {
3602 return GL_FALSE;
3603 }
3604
3605 return GL_TRUE;
3606 }
3607
3608 GLboolean assemble_MUL(r700_AssemblerBase *pAsm)
3609 {
3610 if( GL_FALSE == checkop2(pAsm) )
3611 {
3612 return GL_FALSE;
3613 }
3614
3615 pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
3616
3617 if( GL_FALSE == assemble_dst(pAsm) )
3618 {
3619 return GL_FALSE;
3620 }
3621
3622 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3623 {
3624 return GL_FALSE;
3625 }
3626
3627 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
3628 {
3629 return GL_FALSE;
3630 }
3631
3632 if( GL_FALSE == next_ins(pAsm) )
3633 {
3634 return GL_FALSE;
3635 }
3636
3637 return GL_TRUE;
3638 }
3639
3640 GLboolean assemble_POW(r700_AssemblerBase *pAsm)
3641 {
3642 BITS tmp;
3643
3644 checkop1(pAsm);
3645
3646 tmp = gethelpr(pAsm);
3647
3648 // LG2 tmp.x, a.swizzle
3649 pAsm->D.dst.opcode = SQ_OP2_INST_LOG_IEEE;
3650 pAsm->D.dst.math = 1;
3651
3652 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3653 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3654 pAsm->D.dst.reg = tmp;
3655 nomask_PVSDST(&(pAsm->D.dst));
3656
3657 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3658 {
3659 return GL_FALSE;
3660 }
3661
3662 if( GL_FALSE == next_ins(pAsm) )
3663 {
3664 return GL_FALSE;
3665 }
3666
3667 // MUL tmp.x, tmp.x, b.swizzle
3668 pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
3669
3670 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3671 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3672 pAsm->D.dst.reg = tmp;
3673 nomask_PVSDST(&(pAsm->D.dst));
3674
3675 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3676 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3677 pAsm->S[0].src.reg = tmp;
3678 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3679 noneg_PVSSRC(&(pAsm->S[0].src));
3680
3681 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
3682 {
3683 return GL_FALSE;
3684 }
3685
3686 if( GL_FALSE == next_ins(pAsm) )
3687 {
3688 return GL_FALSE;
3689 }
3690
3691 // EX2 dst.mask, tmp.x
3692 // EX2 tmp.x, tmp.x
3693 pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
3694 pAsm->D.dst.math = 1;
3695
3696 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3697 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3698 pAsm->D.dst.reg = tmp;
3699 nomask_PVSDST(&(pAsm->D.dst));
3700
3701 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3702 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3703 pAsm->S[0].src.reg = tmp;
3704 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3705 noneg_PVSSRC(&(pAsm->S[0].src));
3706
3707 if( GL_FALSE == next_ins(pAsm) )
3708 {
3709 return GL_FALSE;
3710 }
3711
3712 // Now replicate result to all necessary channels in destination
3713 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3714
3715 if( GL_FALSE == assemble_dst(pAsm) )
3716 {
3717 return GL_FALSE;
3718 }
3719
3720 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3721 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
3722 pAsm->S[0].src.reg = tmp;
3723
3724 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3725 noneg_PVSSRC(&(pAsm->S[0].src));
3726
3727 if( GL_FALSE == next_ins(pAsm) )
3728 {
3729 return GL_FALSE;
3730 }
3731
3732 return GL_TRUE;
3733 }
3734
3735 GLboolean assemble_RCP(r700_AssemblerBase *pAsm)
3736 {
3737 return assemble_math_function(pAsm, SQ_OP2_INST_RECIP_IEEE);
3738 }
3739
3740 GLboolean assemble_RSQ(r700_AssemblerBase *pAsm)
3741 {
3742 return assemble_math_function(pAsm, SQ_OP2_INST_RECIPSQRT_IEEE);
3743 }
3744
3745 GLboolean assemble_SIN(r700_AssemblerBase *pAsm)
3746 {
3747 return assemble_math_function(pAsm, SQ_OP2_INST_SIN);
3748 }
3749
3750 GLboolean assemble_SCS(r700_AssemblerBase *pAsm)
3751 {
3752 BITS tmp;
3753
3754 checkop1(pAsm);
3755
3756 tmp = gethelpr(pAsm);
3757
3758 // COS tmp.x, a.x
3759 pAsm->D.dst.opcode = SQ_OP2_INST_COS;
3760 pAsm->D.dst.math = 1;
3761
3762 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3763 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3764 pAsm->D.dst.reg = tmp;
3765 pAsm->D.dst.writex = 1;
3766
3767 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3768 {
3769 return GL_FALSE;
3770 }
3771
3772 if ( GL_FALSE == next_ins(pAsm) )
3773 {
3774 return GL_FALSE;
3775 }
3776
3777 // SIN tmp.y, a.x
3778 pAsm->D.dst.opcode = SQ_OP2_INST_SIN;
3779 pAsm->D.dst.math = 1;
3780
3781 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3782 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3783 pAsm->D.dst.reg = tmp;
3784 pAsm->D.dst.writey = 1;
3785
3786 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3787 {
3788 return GL_FALSE;
3789 }
3790
3791 if( GL_FALSE == next_ins(pAsm) )
3792 {
3793 return GL_FALSE;
3794 }
3795
3796 // MOV dst.mask, tmp
3797 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3798
3799 if( GL_FALSE == assemble_dst(pAsm) )
3800 {
3801 return GL_FALSE;
3802 }
3803
3804 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3805 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
3806 pAsm->S[0].src.reg = tmp;
3807
3808 noswizzle_PVSSRC(&(pAsm->S[0].src));
3809 pAsm->S[0].src.swizzlez = SQ_SEL_0;
3810 pAsm->S[0].src.swizzlew = SQ_SEL_0;
3811
3812 if ( GL_FALSE == next_ins(pAsm) )
3813 {
3814 return GL_FALSE;
3815 }
3816
3817 return GL_TRUE;
3818 }
3819
3820 GLboolean assemble_SGE(r700_AssemblerBase *pAsm)
3821 {
3822 if( GL_FALSE == checkop2(pAsm) )
3823 {
3824 return GL_FALSE;
3825 }
3826
3827 pAsm->D.dst.opcode = SQ_OP2_INST_SETGE;
3828
3829 if( GL_FALSE == assemble_dst(pAsm) )
3830 {
3831 return GL_FALSE;
3832 }
3833
3834 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3835 {
3836 return GL_FALSE;
3837 }
3838
3839 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
3840 {
3841 return GL_FALSE;
3842 }
3843
3844 if( GL_FALSE == next_ins(pAsm) )
3845 {
3846 return GL_FALSE;
3847 }
3848
3849 return GL_TRUE;
3850 }
3851
3852 GLboolean assemble_SLT(r700_AssemblerBase *pAsm)
3853 {
3854 if( GL_FALSE == checkop2(pAsm) )
3855 {
3856 return GL_FALSE;
3857 }
3858
3859 pAsm->D.dst.opcode = SQ_OP2_INST_SETGT;
3860
3861 if( GL_FALSE == assemble_dst(pAsm) )
3862 {
3863 return GL_FALSE;
3864 }
3865
3866 if( GL_FALSE == assemble_src(pAsm, 0, 1) )
3867 {
3868 return GL_FALSE;
3869 }
3870
3871 if( GL_FALSE == assemble_src(pAsm, 1, 0) )
3872 {
3873 return GL_FALSE;
3874 }
3875
3876 if( GL_FALSE == next_ins(pAsm) )
3877 {
3878 return GL_FALSE;
3879 }
3880
3881 return GL_TRUE;
3882 }
3883
3884 GLboolean assemble_STP(r700_AssemblerBase *pAsm)
3885 {
3886 return GL_TRUE;
3887 }
3888
3889 GLboolean assemble_TEX(r700_AssemblerBase *pAsm)
3890 {
3891 GLboolean src_const;
3892 GLboolean need_barrier = GL_FALSE;
3893
3894 checkop1(pAsm);
3895
3896 switch (pAsm->pILInst[pAsm->uiCurInst].SrcReg[0].File)
3897 {
3898 case PROGRAM_CONSTANT:
3899 case PROGRAM_LOCAL_PARAM:
3900 case PROGRAM_ENV_PARAM:
3901 case PROGRAM_STATE_VAR:
3902 src_const = GL_TRUE;
3903 break;
3904 case PROGRAM_TEMPORARY:
3905 case PROGRAM_INPUT:
3906 default:
3907 src_const = GL_FALSE;
3908 break;
3909 }
3910
3911 if (GL_TRUE == src_const)
3912 {
3913 if ( GL_FALSE == mov_temp(pAsm, 0) )
3914 return GL_FALSE;
3915 need_barrier = GL_TRUE;
3916 }
3917
3918 if (pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_TXP)
3919 {
3920 GLuint tmp = gethelpr(pAsm);
3921 pAsm->D.dst.opcode = SQ_OP2_INST_RECIP_IEEE;
3922 pAsm->D.dst.math = 1;
3923 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3924 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3925 pAsm->D.dst.reg = tmp;
3926 pAsm->D.dst.writew = 1;
3927
3928 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3929 {
3930 return GL_FALSE;
3931 }
3932 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_W, SQ_SEL_W, SQ_SEL_W, SQ_SEL_W);
3933 if( GL_FALSE == next_ins(pAsm) )
3934 {
3935 return GL_FALSE;
3936 }
3937
3938 pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
3939 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3940 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3941 pAsm->D.dst.reg = tmp;
3942 pAsm->D.dst.writex = 1;
3943 pAsm->D.dst.writey = 1;
3944 pAsm->D.dst.writez = 1;
3945 pAsm->D.dst.writew = 0;
3946
3947 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3948 {
3949 return GL_FALSE;
3950 }
3951 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
3952 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
3953 pAsm->S[1].src.reg = tmp;
3954 setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_W);
3955
3956 if( GL_FALSE == next_ins(pAsm) )
3957 {
3958 return GL_FALSE;
3959 }
3960
3961 pAsm->aArgSubst[1] = tmp;
3962 need_barrier = GL_TRUE;
3963 }
3964
3965 if (pAsm->pILInst[pAsm->uiCurInst].TexSrcTarget == TEXTURE_CUBE_INDEX )
3966 {
3967 GLuint tmp1 = gethelpr(pAsm);
3968 GLuint tmp2 = gethelpr(pAsm);
3969
3970 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
3971 pAsm->D.dst.opcode = SQ_OP2_INST_CUBE;
3972 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3973 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3974 pAsm->D.dst.reg = tmp1;
3975 nomask_PVSDST(&(pAsm->D.dst));
3976
3977 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3978 {
3979 return GL_FALSE;
3980 }
3981
3982 if( GL_FALSE == assemble_src(pAsm, 0, 1) )
3983 {
3984 return GL_FALSE;
3985 }
3986
3987 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Z, SQ_SEL_Z, SQ_SEL_X, SQ_SEL_Y);
3988 swizzleagain_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Y, SQ_SEL_X, SQ_SEL_Z, SQ_SEL_Z);
3989
3990 if( GL_FALSE == next_ins(pAsm) )
3991 {
3992 return GL_FALSE;
3993 }
3994
3995 /* tmp1.z = ABS(tmp1.z) dont have abs support in assembler currently
3996 * have to do explicit instruction
3997 */
3998 pAsm->D.dst.opcode = SQ_OP2_INST_MAX;
3999 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4000 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4001 pAsm->D.dst.reg = tmp1;
4002 pAsm->D.dst.writez = 1;
4003
4004 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4005 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4006 pAsm->S[0].src.reg = tmp1;
4007 noswizzle_PVSSRC(&(pAsm->S[0].src));
4008 pAsm->S[1].bits = pAsm->S[0].bits;
4009 flipneg_PVSSRC(&(pAsm->S[1].src));
4010
4011 next_ins(pAsm);
4012
4013 /* tmp1.z = RCP_e(|tmp1.z|) */
4014 pAsm->D.dst.opcode = SQ_OP2_INST_RECIP_IEEE;
4015 pAsm->D.dst.math = 1;
4016 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4017 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4018 pAsm->D.dst.reg = tmp1;
4019 pAsm->D.dst.writez = 1;
4020
4021 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4022 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4023 pAsm->S[0].src.reg = tmp1;
4024 pAsm->S[0].src.swizzlex = SQ_SEL_Z;
4025
4026 next_ins(pAsm);
4027
4028 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x
4029 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x
4030 * muladd has no writemask, have to use another temp
4031 * also no support for imm constants, so add 1 here
4032 */
4033 pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
4034 pAsm->D.dst.op3 = 1;
4035 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4036 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4037 pAsm->D.dst.reg = tmp2;
4038
4039 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4040 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4041 pAsm->S[0].src.reg = tmp1;
4042 noswizzle_PVSSRC(&(pAsm->S[0].src));
4043 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
4044 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
4045 pAsm->S[1].src.reg = tmp1;
4046 setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Z);
4047 setaddrmode_PVSSRC(&(pAsm->S[2].src), ADDR_ABSOLUTE);
4048 pAsm->S[2].src.rtype = SRC_REG_TEMPORARY;
4049 pAsm->S[2].src.reg = tmp1;
4050 setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_1);
4051
4052 next_ins(pAsm);
4053
4054 /* ADD the remaining .5 */
4055 pAsm->D.dst.opcode = SQ_OP2_INST_ADD;
4056 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4057 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4058 pAsm->D.dst.reg = tmp2;
4059 pAsm->D.dst.writex = 1;
4060 pAsm->D.dst.writey = 1;
4061 pAsm->D.dst.writez = 0;
4062 pAsm->D.dst.writew = 0;
4063
4064 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4065 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4066 pAsm->S[0].src.reg = tmp2;
4067 noswizzle_PVSSRC(&(pAsm->S[0].src));
4068 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
4069 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
4070 pAsm->S[1].src.reg = 252; // SQ_ALU_SRC_0_5
4071 noswizzle_PVSSRC(&(pAsm->S[1].src));
4072
4073 next_ins(pAsm);
4074
4075 /* tmp1.xy = temp2.xy */
4076 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
4077 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4078 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4079 pAsm->D.dst.reg = tmp1;
4080 pAsm->D.dst.writex = 1;
4081 pAsm->D.dst.writey = 1;
4082 pAsm->D.dst.writez = 0;
4083 pAsm->D.dst.writew = 0;
4084
4085 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4086 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4087 pAsm->S[0].src.reg = tmp2;
4088 noswizzle_PVSSRC(&(pAsm->S[0].src));
4089
4090 next_ins(pAsm);
4091 pAsm->aArgSubst[1] = tmp1;
4092 need_barrier = GL_TRUE;
4093
4094 }
4095
4096 if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_TXB)
4097 {
4098 pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE_L;
4099 }
4100 else
4101 {
4102 pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE;
4103 }
4104
4105 pAsm->is_tex = GL_TRUE;
4106 if ( GL_TRUE == need_barrier )
4107 {
4108 pAsm->need_tex_barrier = GL_TRUE;
4109 }
4110 // Set src1 to tex unit id
4111 pAsm->S[1].src.reg = pAsm->pILInst[pAsm->uiCurInst].TexSrcUnit;
4112 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
4113
4114 //No sw info from mesa compiler, so hard code here.
4115 pAsm->S[1].src.swizzlex = SQ_SEL_X;
4116 pAsm->S[1].src.swizzley = SQ_SEL_Y;
4117 pAsm->S[1].src.swizzlez = SQ_SEL_Z;
4118 pAsm->S[1].src.swizzlew = SQ_SEL_W;
4119
4120 if( GL_FALSE == tex_dst(pAsm) )
4121 {
4122 return GL_FALSE;
4123 }
4124
4125 if( GL_FALSE == tex_src(pAsm) )
4126 {
4127 return GL_FALSE;
4128 }
4129
4130 if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_TXP)
4131 {
4132 /* hopefully did swizzles before */
4133 noswizzle_PVSSRC(&(pAsm->S[0].src));
4134 }
4135
4136 if(pAsm->pILInst[pAsm->uiCurInst].TexSrcTarget == TEXTURE_CUBE_INDEX)
4137 {
4138 /* SAMPLE dst, tmp.yxwy, CUBE */
4139 pAsm->S[0].src.swizzlex = SQ_SEL_Y;
4140 pAsm->S[0].src.swizzley = SQ_SEL_X;
4141 pAsm->S[0].src.swizzlez = SQ_SEL_W;
4142 pAsm->S[0].src.swizzlew = SQ_SEL_Y;
4143 }
4144
4145 if ( GL_FALSE == next_ins(pAsm) )
4146 {
4147 return GL_FALSE;
4148 }
4149
4150 return GL_TRUE;
4151 }
4152
4153 GLboolean assemble_XPD(r700_AssemblerBase *pAsm)
4154 {
4155 BITS tmp;
4156
4157 if( GL_FALSE == checkop2(pAsm) )
4158 {
4159 return GL_FALSE;
4160 }
4161
4162 tmp = gethelpr(pAsm);
4163
4164 pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
4165
4166 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4167 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4168 pAsm->D.dst.reg = tmp;
4169 nomask_PVSDST(&(pAsm->D.dst));
4170
4171 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4172 {
4173 return GL_FALSE;
4174 }
4175
4176 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
4177 {
4178 return GL_FALSE;
4179 }
4180
4181 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Z, SQ_SEL_X, SQ_SEL_Y, SQ_SEL_0);
4182 swizzleagain_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Y, SQ_SEL_Z, SQ_SEL_X, SQ_SEL_0);
4183
4184 if( GL_FALSE == next_ins(pAsm) )
4185 {
4186 return GL_FALSE;
4187 }
4188
4189 pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
4190 pAsm->D.dst.op3 = 1;
4191
4192 if(0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask)
4193 {
4194 tmp = gethelpr(pAsm);
4195
4196 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4197 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4198 pAsm->D.dst.reg = tmp;
4199
4200 nomask_PVSDST(&(pAsm->D.dst));
4201 }
4202 else
4203 {
4204 if( GL_FALSE == assemble_dst(pAsm) )
4205 {
4206 return GL_FALSE;
4207 }
4208 }
4209
4210 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4211 {
4212 return GL_FALSE;
4213 }
4214
4215 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
4216 {
4217 return GL_FALSE;
4218 }
4219
4220 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Y, SQ_SEL_Z, SQ_SEL_X, SQ_SEL_0);
4221 swizzleagain_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Z, SQ_SEL_X, SQ_SEL_Y, SQ_SEL_0);
4222
4223 // result1 + (neg) result0
4224 setaddrmode_PVSSRC(&(pAsm->S[2].src),ADDR_ABSOLUTE);
4225 pAsm->S[2].src.rtype = SRC_REG_TEMPORARY;
4226 pAsm->S[2].src.reg = tmp;
4227
4228 neg_PVSSRC(&(pAsm->S[2].src));
4229 noswizzle_PVSSRC(&(pAsm->S[2].src));
4230
4231 if( GL_FALSE == next_ins(pAsm) )
4232 {
4233 return GL_FALSE;
4234 }
4235
4236
4237 if(0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask)
4238 {
4239 if( GL_FALSE == assemble_dst(pAsm) )
4240 {
4241 return GL_FALSE;
4242 }
4243
4244 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
4245
4246 // Use tmp as source
4247 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4248 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4249 pAsm->S[0].src.reg = tmp;
4250
4251 noneg_PVSSRC(&(pAsm->S[0].src));
4252 noswizzle_PVSSRC(&(pAsm->S[0].src));
4253
4254 if( GL_FALSE == next_ins(pAsm) )
4255 {
4256 return GL_FALSE;
4257 }
4258 }
4259
4260 return GL_TRUE;
4261 }
4262
4263 GLboolean assemble_EXPORT(r700_AssemblerBase *pAsm)
4264 {
4265 return GL_TRUE;
4266 }
4267
4268 GLboolean assemble_IF(r700_AssemblerBase *pAsm)
4269 {
4270 return GL_TRUE;
4271 }
4272
4273 GLboolean assemble_ENDIF(r700_AssemblerBase *pAsm)
4274 {
4275 return GL_TRUE;
4276 }
4277
4278 GLboolean AssembleInstr(GLuint uiNumberInsts,
4279 struct prog_instruction *pILInst,
4280 r700_AssemblerBase *pR700AsmCode)
4281 {
4282 GLuint i;
4283
4284 pR700AsmCode->pILInst = pILInst;
4285 for(i=0; i<uiNumberInsts; i++)
4286 {
4287 pR700AsmCode->uiCurInst = i;
4288
4289 switch (pILInst[i].Opcode)
4290 {
4291 case OPCODE_ABS:
4292 if ( GL_FALSE == assemble_ABS(pR700AsmCode) )
4293 return GL_FALSE;
4294 break;
4295 case OPCODE_ADD:
4296 case OPCODE_SUB:
4297 if ( GL_FALSE == assemble_ADD(pR700AsmCode) )
4298 return GL_FALSE;
4299 break;
4300
4301 case OPCODE_ARL:
4302 if ( GL_FALSE == assemble_ARL(pR700AsmCode) )
4303 return GL_FALSE;
4304 break;
4305 case OPCODE_ARR:
4306 radeon_error("Not yet implemented instruction OPCODE_ARR \n");
4307 //if ( GL_FALSE == assemble_BAD("ARR") )
4308 return GL_FALSE;
4309 break;
4310
4311 case OPCODE_CMP:
4312 if ( GL_FALSE == assemble_CMP(pR700AsmCode) )
4313 return GL_FALSE;
4314 break;
4315 case OPCODE_COS:
4316 if ( GL_FALSE == assemble_COS(pR700AsmCode) )
4317 return GL_FALSE;
4318 break;
4319
4320 case OPCODE_DP3:
4321 case OPCODE_DP4:
4322 case OPCODE_DPH:
4323 if ( GL_FALSE == assemble_DOT(pR700AsmCode) )
4324 return GL_FALSE;
4325 break;
4326
4327 case OPCODE_DST:
4328 if ( GL_FALSE == assemble_DST(pR700AsmCode) )
4329 return GL_FALSE;
4330 break;
4331
4332 case OPCODE_EX2:
4333 if ( GL_FALSE == assemble_EX2(pR700AsmCode) )
4334 return GL_FALSE;
4335 break;
4336 case OPCODE_EXP:
4337 if ( GL_FALSE == assemble_EXP(pR700AsmCode) )
4338 return GL_FALSE;
4339 break;
4340
4341 case OPCODE_FLR:
4342 if ( GL_FALSE == assemble_FLR(pR700AsmCode) )
4343 return GL_FALSE;
4344 break;
4345 //case OP_FLR_INT:
4346 // if ( GL_FALSE == assemble_FLR_INT() )
4347 // return GL_FALSE;
4348 // break;
4349
4350 case OPCODE_FRC:
4351 if ( GL_FALSE == assemble_FRC(pR700AsmCode) )
4352 return GL_FALSE;
4353 break;
4354
4355 case OPCODE_KIL:
4356 if ( GL_FALSE == assemble_KIL(pR700AsmCode) )
4357 return GL_FALSE;
4358 break;
4359 case OPCODE_LG2:
4360 if ( GL_FALSE == assemble_LG2(pR700AsmCode) )
4361 return GL_FALSE;
4362 break;
4363 case OPCODE_LIT:
4364 if ( GL_FALSE == assemble_LIT(pR700AsmCode) )
4365 return GL_FALSE;
4366 break;
4367 case OPCODE_LRP:
4368 if ( GL_FALSE == assemble_LRP(pR700AsmCode) )
4369 return GL_FALSE;
4370 break;
4371 case OPCODE_LOG:
4372 if ( GL_FALSE == assemble_LOG(pR700AsmCode) )
4373 return GL_FALSE;
4374 break;
4375
4376 case OPCODE_MAD:
4377 if ( GL_FALSE == assemble_MAD(pR700AsmCode) )
4378 return GL_FALSE;
4379 break;
4380 case OPCODE_MAX:
4381 if ( GL_FALSE == assemble_MAX(pR700AsmCode) )
4382 return GL_FALSE;
4383 break;
4384 case OPCODE_MIN:
4385 if ( GL_FALSE == assemble_MIN(pR700AsmCode) )
4386 return GL_FALSE;
4387 break;
4388
4389 case OPCODE_MOV:
4390 if ( GL_FALSE == assemble_MOV(pR700AsmCode) )
4391 return GL_FALSE;
4392 break;
4393 case OPCODE_MUL:
4394 if ( GL_FALSE == assemble_MUL(pR700AsmCode) )
4395 return GL_FALSE;
4396 break;
4397
4398 case OPCODE_POW:
4399 if ( GL_FALSE == assemble_POW(pR700AsmCode) )
4400 return GL_FALSE;
4401 break;
4402 case OPCODE_RCP:
4403 if ( GL_FALSE == assemble_RCP(pR700AsmCode) )
4404 return GL_FALSE;
4405 break;
4406 case OPCODE_RSQ:
4407 if ( GL_FALSE == assemble_RSQ(pR700AsmCode) )
4408 return GL_FALSE;
4409 break;
4410 case OPCODE_SIN:
4411 if ( GL_FALSE == assemble_SIN(pR700AsmCode) )
4412 return GL_FALSE;
4413 break;
4414 case OPCODE_SCS:
4415 if ( GL_FALSE == assemble_SCS(pR700AsmCode) )
4416 return GL_FALSE;
4417 break;
4418
4419 case OPCODE_SGE:
4420 if ( GL_FALSE == assemble_SGE(pR700AsmCode) )
4421 return GL_FALSE;
4422 break;
4423 case OPCODE_SLT:
4424 if ( GL_FALSE == assemble_SLT(pR700AsmCode) )
4425 return GL_FALSE;
4426 break;
4427
4428 //case OP_STP:
4429 // if ( GL_FALSE == assemble_STP(pR700AsmCode) )
4430 // return GL_FALSE;
4431 // break;
4432
4433 case OPCODE_SWZ:
4434 if ( GL_FALSE == assemble_MOV(pR700AsmCode) )
4435 {
4436 return GL_FALSE;
4437 }
4438 else
4439 {
4440 if( (i+1)<uiNumberInsts )
4441 {
4442 if(OPCODE_END != pILInst[i+1].Opcode)
4443 {
4444 if( GL_TRUE == IsTex(pILInst[i+1].Opcode) )
4445 {
4446 pR700AsmCode->pInstDeps[i+1].nDstDep = i+1; //=1?
4447 }
4448 }
4449 }
4450 }
4451 break;
4452
4453 case OPCODE_TEX:
4454 case OPCODE_TXB:
4455 case OPCODE_TXP:
4456 if ( GL_FALSE == assemble_TEX(pR700AsmCode) )
4457 return GL_FALSE;
4458 break;
4459
4460 case OPCODE_XPD:
4461 if ( GL_FALSE == assemble_XPD(pR700AsmCode) )
4462 return GL_FALSE;
4463 break;
4464
4465 case OPCODE_IF :
4466 if ( GL_FALSE == assemble_IF(pR700AsmCode) )
4467 return GL_FALSE;
4468 break;
4469 case OPCODE_ELSE :
4470 radeon_error("Not yet implemented instruction OPCODE_ELSE \n");
4471 //if ( GL_FALSE == assemble_BAD("ELSE") )
4472 return GL_FALSE;
4473 break;
4474 case OPCODE_ENDIF:
4475 if ( GL_FALSE == assemble_ENDIF(pR700AsmCode) )
4476 return GL_FALSE;
4477 break;
4478
4479 //case OPCODE_EXPORT:
4480 // if ( GL_FALSE == assemble_EXPORT() )
4481 // return GL_FALSE;
4482 // break;
4483
4484 case OPCODE_END:
4485 //pR700AsmCode->uiCurInst = i;
4486 //This is to remaind that if in later exoort there is depth/stencil
4487 //export, we need a mov to re-arrange DST channel, where using a
4488 //psuedo inst, we will use this end inst to do it.
4489 return GL_TRUE;
4490
4491 default:
4492 radeon_error("internal: unknown instruction\n");
4493 return GL_FALSE;
4494 }
4495 }
4496
4497 return GL_TRUE;
4498 }
4499
4500 GLboolean Process_Export(r700_AssemblerBase* pAsm,
4501 GLuint type,
4502 GLuint export_starting_index,
4503 GLuint export_count,
4504 GLuint starting_register_number,
4505 GLboolean is_depth_export)
4506 {
4507 unsigned char ucWriteMask;
4508
4509 check_current_clause(pAsm, CF_EMPTY_CLAUSE);
4510 check_current_clause(pAsm, CF_EXPORT_CLAUSE); //alloc the cf_current_export_clause_ptr
4511
4512 pAsm->cf_current_export_clause_ptr->m_Word0.f.type = type;
4513
4514 switch (type)
4515 {
4516 case SQ_EXPORT_PIXEL:
4517 if(GL_TRUE == is_depth_export)
4518 {
4519 pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base = SQ_CF_PIXEL_Z;
4520 }
4521 else
4522 {
4523 pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base = SQ_CF_PIXEL_MRT0 + export_starting_index;
4524 }
4525 break;
4526
4527 case SQ_EXPORT_POS:
4528 pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base = SQ_CF_POS_0 + export_starting_index;
4529 break;
4530
4531 case SQ_EXPORT_PARAM:
4532 pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base = 0x0 + export_starting_index;
4533 break;
4534
4535 default:
4536 radeon_error("Unknown export type: %d\n", type);
4537 return GL_FALSE;
4538 break;
4539 }
4540
4541 pAsm->cf_current_export_clause_ptr->m_Word0.f.rw_gpr = starting_register_number;
4542
4543 pAsm->cf_current_export_clause_ptr->m_Word0.f.rw_rel = SQ_ABSOLUTE;
4544 pAsm->cf_current_export_clause_ptr->m_Word0.f.index_gpr = 0x0;
4545 pAsm->cf_current_export_clause_ptr->m_Word0.f.elem_size = 0x3;
4546
4547 pAsm->cf_current_export_clause_ptr->m_Word1.f.burst_count = (export_count - 1);
4548 pAsm->cf_current_export_clause_ptr->m_Word1.f.end_of_program = 0x0;
4549 pAsm->cf_current_export_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
4550 pAsm->cf_current_export_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT; // _DONE
4551 pAsm->cf_current_export_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
4552 pAsm->cf_current_export_clause_ptr->m_Word1.f.barrier = 0x1;
4553
4554 if (export_count == 1)
4555 {
4556 ucWriteMask = pAsm->pucOutMask[starting_register_number - pAsm->starting_export_register_number];
4557 /* exports Z as a float into Red channel */
4558 if (GL_TRUE == is_depth_export)
4559 ucWriteMask = 0x1;
4560
4561 if( (ucWriteMask & 0x1) != 0)
4562 {
4563 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_X;
4564 }
4565 else
4566 {
4567 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_MASK;
4568 }
4569 if( ((ucWriteMask>>1) & 0x1) != 0)
4570 {
4571 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_Y;
4572 }
4573 else
4574 {
4575 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_MASK;
4576 }
4577 if( ((ucWriteMask>>2) & 0x1) != 0)
4578 {
4579 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_Z;
4580 }
4581 else
4582 {
4583 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_MASK;
4584 }
4585 if( ((ucWriteMask>>3) & 0x1) != 0)
4586 {
4587 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_W;
4588 }
4589 else
4590 {
4591 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_MASK;
4592 }
4593 }
4594 else
4595 {
4596 // This should only be used if all components for all registers have been written
4597 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_X;
4598 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_Y;
4599 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_Z;
4600 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_W;
4601 }
4602
4603 pAsm->cf_last_export_ptr = pAsm->cf_current_export_clause_ptr;
4604
4605 return GL_TRUE;
4606 }
4607
4608 GLboolean Move_Depth_Exports_To_Correct_Channels(r700_AssemblerBase *pAsm, BITS depth_channel_select)
4609 {
4610 gl_inst_opcode Opcode_save = pAsm->pILInst[pAsm->uiCurInst].Opcode; //Should be OPCODE_END
4611 pAsm->pILInst[pAsm->uiCurInst].Opcode = OPCODE_MOV;
4612
4613 // MOV depth_export_register.hw_depth_channel, depth_export_register.depth_channel_select
4614
4615 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
4616
4617 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4618 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4619 pAsm->D.dst.reg = pAsm->depth_export_register_number;
4620
4621 pAsm->D.dst.writex = 1; // depth goes in R channel for HW
4622
4623 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4624 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
4625 pAsm->S[0].src.reg = pAsm->depth_export_register_number;
4626
4627 setswizzle_PVSSRC(&(pAsm->S[0].src), depth_channel_select);
4628
4629 noneg_PVSSRC(&(pAsm->S[0].src));
4630
4631 if( GL_FALSE == next_ins(pAsm) )
4632 {
4633 return GL_FALSE;
4634 }
4635
4636 pAsm->pILInst[pAsm->uiCurInst].Opcode = Opcode_save;
4637
4638 return GL_TRUE;
4639 }
4640
4641 GLboolean Process_Fragment_Exports(r700_AssemblerBase *pR700AsmCode,
4642 GLbitfield OutputsWritten)
4643 {
4644 unsigned int unBit;
4645 GLuint export_count = 0;
4646
4647 if(pR700AsmCode->depth_export_register_number >= 0)
4648 {
4649 if( GL_FALSE == Move_Depth_Exports_To_Correct_Channels(pR700AsmCode, SQ_SEL_Z) ) // depth
4650 {
4651 return GL_FALSE;
4652 }
4653 }
4654
4655 unBit = 1 << FRAG_RESULT_COLOR;
4656 if(OutputsWritten & unBit)
4657 {
4658 if( GL_FALSE == Process_Export(pR700AsmCode,
4659 SQ_EXPORT_PIXEL,
4660 0,
4661 1,
4662 pR700AsmCode->uiFP_OutputMap[FRAG_RESULT_COLOR],
4663 GL_FALSE) )
4664 {
4665 return GL_FALSE;
4666 }
4667 export_count++;
4668 }
4669 unBit = 1 << FRAG_RESULT_DEPTH;
4670 if(OutputsWritten & unBit)
4671 {
4672 if( GL_FALSE == Process_Export(pR700AsmCode,
4673 SQ_EXPORT_PIXEL,
4674 0,
4675 1,
4676 pR700AsmCode->uiFP_OutputMap[FRAG_RESULT_DEPTH],
4677 GL_TRUE))
4678 {
4679 return GL_FALSE;
4680 }
4681 export_count++;
4682 }
4683 /* Need to export something, otherwise we'll hang
4684 * results are undefined anyway */
4685 if(export_count == 0)
4686 {
4687 Process_Export(pR700AsmCode, SQ_EXPORT_PIXEL, 0, 1, 0, GL_FALSE);
4688 }
4689
4690 if(pR700AsmCode->cf_last_export_ptr != NULL)
4691 {
4692 pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
4693 pR700AsmCode->cf_last_export_ptr->m_Word1.f.end_of_program = 0x1;
4694 }
4695
4696 return GL_TRUE;
4697 }
4698
4699 GLboolean Process_Vertex_Exports(r700_AssemblerBase *pR700AsmCode,
4700 GLbitfield OutputsWritten)
4701 {
4702 unsigned int unBit;
4703 unsigned int i;
4704
4705 GLuint export_starting_index = 0;
4706 GLuint export_count = pR700AsmCode->number_of_exports;
4707
4708 unBit = 1 << VERT_RESULT_HPOS;
4709 if(OutputsWritten & unBit)
4710 {
4711 if( GL_FALSE == Process_Export(pR700AsmCode,
4712 SQ_EXPORT_POS,
4713 export_starting_index,
4714 1,
4715 pR700AsmCode->ucVP_OutputMap[VERT_RESULT_HPOS],
4716 GL_FALSE) )
4717 {
4718 return GL_FALSE;
4719 }
4720
4721 export_count--;
4722
4723 pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
4724 }
4725
4726 pR700AsmCode->number_of_exports = export_count;
4727
4728 unBit = 1 << VERT_RESULT_COL0;
4729 if(OutputsWritten & unBit)
4730 {
4731 if( GL_FALSE == Process_Export(pR700AsmCode,
4732 SQ_EXPORT_PARAM,
4733 export_starting_index,
4734 1,
4735 pR700AsmCode->ucVP_OutputMap[VERT_RESULT_COL0],
4736 GL_FALSE) )
4737 {
4738 return GL_FALSE;
4739 }
4740
4741 export_starting_index++;
4742 }
4743
4744 unBit = 1 << VERT_RESULT_COL1;
4745 if(OutputsWritten & unBit)
4746 {
4747 if( GL_FALSE == Process_Export(pR700AsmCode,
4748 SQ_EXPORT_PARAM,
4749 export_starting_index,
4750 1,
4751 pR700AsmCode->ucVP_OutputMap[VERT_RESULT_COL1],
4752 GL_FALSE) )
4753 {
4754 return GL_FALSE;
4755 }
4756
4757 export_starting_index++;
4758 }
4759
4760 unBit = 1 << VERT_RESULT_FOGC;
4761 if(OutputsWritten & unBit)
4762 {
4763 if( GL_FALSE == Process_Export(pR700AsmCode,
4764 SQ_EXPORT_PARAM,
4765 export_starting_index,
4766 1,
4767 pR700AsmCode->ucVP_OutputMap[VERT_RESULT_FOGC],
4768 GL_FALSE) )
4769 {
4770 return GL_FALSE;
4771 }
4772
4773 export_starting_index++;
4774 }
4775
4776 for(i=0; i<8; i++)
4777 {
4778 unBit = 1 << (VERT_RESULT_TEX0 + i);
4779 if(OutputsWritten & unBit)
4780 {
4781 if( GL_FALSE == Process_Export(pR700AsmCode,
4782 SQ_EXPORT_PARAM,
4783 export_starting_index,
4784 1,
4785 pR700AsmCode->ucVP_OutputMap[VERT_RESULT_TEX0 + i],
4786 GL_FALSE) )
4787 {
4788 return GL_FALSE;
4789 }
4790
4791 export_starting_index++;
4792 }
4793 }
4794
4795 // At least one param should be exported
4796 if (export_count)
4797 {
4798 pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
4799 }
4800 else
4801 {
4802 if( GL_FALSE == Process_Export(pR700AsmCode,
4803 SQ_EXPORT_PARAM,
4804 0,
4805 1,
4806 pR700AsmCode->starting_export_register_number,
4807 GL_FALSE) )
4808 {
4809 return GL_FALSE;
4810 }
4811
4812 pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_0;
4813 pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_0;
4814 pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_0;
4815 pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_1;
4816 pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
4817 }
4818
4819 pR700AsmCode->cf_last_export_ptr->m_Word1.f.end_of_program = 0x1;
4820
4821 return GL_TRUE;
4822 }
4823
4824 GLboolean Clean_Up_Assembler(r700_AssemblerBase *pR700AsmCode)
4825 {
4826 FREE(pR700AsmCode->pucOutMask);
4827 FREE(pR700AsmCode->pInstDeps);
4828 return GL_TRUE;
4829 }
4830