be875ae6b800b21e06a5bdcd964c44d8138960c8
[mesa.git] / src / mesa / drivers / dri / r600 / r700_assembler.c
1 /*
2 * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
20 */
21
22 /*
23 * Authors:
24 * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
25 */
26
27 #include <stdio.h>
28 #include <stdarg.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <math.h>
32
33 #include "main/mtypes.h"
34 #include "main/imports.h"
35
36 #include "radeon_debug.h"
37 #include "r600_context.h"
38
39 #include "r700_assembler.h"
40
41 BITS addrmode_PVSDST(PVSDST * pPVSDST)
42 {
43 return pPVSDST->addrmode0 | ((BITS)pPVSDST->addrmode1 << 1);
44 }
45
46 void setaddrmode_PVSDST(PVSDST * pPVSDST, BITS addrmode)
47 {
48 pPVSDST->addrmode0 = addrmode & 1;
49 pPVSDST->addrmode1 = (addrmode >> 1) & 1;
50 }
51
52 void nomask_PVSDST(PVSDST * pPVSDST)
53 {
54 pPVSDST->writex = pPVSDST->writey = pPVSDST->writez = pPVSDST->writew = 1;
55 }
56
57 BITS addrmode_PVSSRC(PVSSRC* pPVSSRC)
58 {
59 return pPVSSRC->addrmode0 | ((BITS)pPVSSRC->addrmode1 << 1);
60 }
61
62 void setaddrmode_PVSSRC(PVSSRC* pPVSSRC, BITS addrmode)
63 {
64 pPVSSRC->addrmode0 = addrmode & 1;
65 pPVSSRC->addrmode1 = (addrmode >> 1) & 1;
66 }
67
68
69 void setswizzle_PVSSRC(PVSSRC* pPVSSRC, BITS swz)
70 {
71 pPVSSRC->swizzlex =
72 pPVSSRC->swizzley =
73 pPVSSRC->swizzlez =
74 pPVSSRC->swizzlew = swz;
75 }
76
77 void noswizzle_PVSSRC(PVSSRC* pPVSSRC)
78 {
79 pPVSSRC->swizzlex = SQ_SEL_X;
80 pPVSSRC->swizzley = SQ_SEL_Y;
81 pPVSSRC->swizzlez = SQ_SEL_Z;
82 pPVSSRC->swizzlew = SQ_SEL_W;
83 }
84
85 void
86 swizzleagain_PVSSRC(PVSSRC * pPVSSRC, BITS x, BITS y, BITS z, BITS w)
87 {
88 switch (x)
89 {
90 case SQ_SEL_X: x = pPVSSRC->swizzlex;
91 break;
92 case SQ_SEL_Y: x = pPVSSRC->swizzley;
93 break;
94 case SQ_SEL_Z: x = pPVSSRC->swizzlez;
95 break;
96 case SQ_SEL_W: x = pPVSSRC->swizzlew;
97 break;
98 default:;
99 }
100
101 switch (y)
102 {
103 case SQ_SEL_X: y = pPVSSRC->swizzlex;
104 break;
105 case SQ_SEL_Y: y = pPVSSRC->swizzley;
106 break;
107 case SQ_SEL_Z: y = pPVSSRC->swizzlez;
108 break;
109 case SQ_SEL_W: y = pPVSSRC->swizzlew;
110 break;
111 default:;
112 }
113
114 switch (z)
115 {
116 case SQ_SEL_X: z = pPVSSRC->swizzlex;
117 break;
118 case SQ_SEL_Y: z = pPVSSRC->swizzley;
119 break;
120 case SQ_SEL_Z: z = pPVSSRC->swizzlez;
121 break;
122 case SQ_SEL_W: z = pPVSSRC->swizzlew;
123 break;
124 default:;
125 }
126
127 switch (w)
128 {
129 case SQ_SEL_X: w = pPVSSRC->swizzlex;
130 break;
131 case SQ_SEL_Y: w = pPVSSRC->swizzley;
132 break;
133 case SQ_SEL_Z: w = pPVSSRC->swizzlez;
134 break;
135 case SQ_SEL_W: w = pPVSSRC->swizzlew;
136 break;
137 default:;
138 }
139
140 pPVSSRC->swizzlex = x;
141 pPVSSRC->swizzley = y;
142 pPVSSRC->swizzlez = z;
143 pPVSSRC->swizzlew = w;
144 }
145
146 void neg_PVSSRC(PVSSRC* pPVSSRC)
147 {
148 pPVSSRC->negx = 1;
149 pPVSSRC->negy = 1;
150 pPVSSRC->negz = 1;
151 pPVSSRC->negw = 1;
152 }
153
154 void noneg_PVSSRC(PVSSRC* pPVSSRC)
155 {
156 pPVSSRC->negx = 0;
157 pPVSSRC->negy = 0;
158 pPVSSRC->negz = 0;
159 pPVSSRC->negw = 0;
160 }
161
162 // negate argument (for SUB instead of ADD and alike)
163 void flipneg_PVSSRC(PVSSRC* pPVSSRC)
164 {
165 pPVSSRC->negx = !pPVSSRC->negx;
166 pPVSSRC->negy = !pPVSSRC->negy;
167 pPVSSRC->negz = !pPVSSRC->negz;
168 pPVSSRC->negw = !pPVSSRC->negw;
169 }
170
171 void zerocomp_PVSSRC(PVSSRC* pPVSSRC, int c)
172 {
173 switch (c)
174 {
175 case 0: pPVSSRC->swizzlex = SQ_SEL_0; pPVSSRC->negx = 0; break;
176 case 1: pPVSSRC->swizzley = SQ_SEL_0; pPVSSRC->negy = 0; break;
177 case 2: pPVSSRC->swizzlez = SQ_SEL_0; pPVSSRC->negz = 0; break;
178 case 3: pPVSSRC->swizzlew = SQ_SEL_0; pPVSSRC->negw = 0; break;
179 default:;
180 }
181 }
182
183 void onecomp_PVSSRC(PVSSRC* pPVSSRC, int c)
184 {
185 switch (c)
186 {
187 case 0: pPVSSRC->swizzlex = SQ_SEL_1; pPVSSRC->negx = 0; break;
188 case 1: pPVSSRC->swizzley = SQ_SEL_1; pPVSSRC->negy = 0; break;
189 case 2: pPVSSRC->swizzlez = SQ_SEL_1; pPVSSRC->negz = 0; break;
190 case 3: pPVSSRC->swizzlew = SQ_SEL_1; pPVSSRC->negw = 0; break;
191 default:;
192 }
193 }
194
195 BITS is_misc_component_exported(VAP_OUT_VTX_FMT_0* pOutVTXFmt0)
196 {
197 return (pOutVTXFmt0->point_size |
198 pOutVTXFmt0->edge_flag |
199 pOutVTXFmt0->rta_index |
200 pOutVTXFmt0->kill_flag |
201 pOutVTXFmt0->viewport_index);
202 }
203
204 BITS is_depth_component_exported(OUT_FRAGMENT_FMT_0* pFPOutFmt)
205 {
206 return (pFPOutFmt->depth |
207 pFPOutFmt->stencil_ref |
208 pFPOutFmt->mask |
209 pFPOutFmt->coverage_to_mask);
210 }
211
212 GLboolean is_reduction_opcode(PVSDWORD* dest)
213 {
214 if (dest->dst.op3 == 0)
215 {
216 if ( (dest->dst.opcode == SQ_OP2_INST_DOT4 || dest->dst.opcode == SQ_OP2_INST_DOT4_IEEE || dest->dst.opcode == SQ_OP2_INST_CUBE) )
217 {
218 return GL_TRUE;
219 }
220 }
221 return GL_FALSE;
222 }
223
224 GLuint GetSurfaceFormat(GLenum eType, GLuint nChannels, GLuint * pClient_size)
225 {
226 GLuint format = FMT_INVALID;
227 GLuint uiElemSize = 0;
228
229 switch (eType)
230 {
231 case GL_BYTE:
232 case GL_UNSIGNED_BYTE:
233 uiElemSize = 1;
234 switch(nChannels)
235 {
236 case 1:
237 format = FMT_8; break;
238 case 2:
239 format = FMT_8_8; break;
240 case 3:
241 format = FMT_8_8_8; break;
242 case 4:
243 format = FMT_8_8_8_8; break;
244 default:
245 break;
246 }
247 break;
248
249 case GL_UNSIGNED_SHORT:
250 case GL_SHORT:
251 uiElemSize = 2;
252 switch(nChannels)
253 {
254 case 1:
255 format = FMT_16; break;
256 case 2:
257 format = FMT_16_16; break;
258 case 3:
259 format = FMT_16_16_16; break;
260 case 4:
261 format = FMT_16_16_16_16; break;
262 default:
263 break;
264 }
265 break;
266
267 case GL_UNSIGNED_INT:
268 case GL_INT:
269 uiElemSize = 4;
270 switch(nChannels)
271 {
272 case 1:
273 format = FMT_32; break;
274 case 2:
275 format = FMT_32_32; break;
276 case 3:
277 format = FMT_32_32_32; break;
278 case 4:
279 format = FMT_32_32_32_32; break;
280 default:
281 break;
282 }
283 break;
284
285 case GL_FLOAT:
286 uiElemSize = 4;
287 switch(nChannels)
288 {
289 case 1:
290 format = FMT_32_FLOAT; break;
291 case 2:
292 format = FMT_32_32_FLOAT; break;
293 case 3:
294 format = FMT_32_32_32_FLOAT; break;
295 case 4:
296 format = FMT_32_32_32_32_FLOAT; break;
297 default:
298 break;
299 }
300 break;
301 case GL_DOUBLE:
302 uiElemSize = 8;
303 switch(nChannels)
304 {
305 case 1:
306 format = FMT_32_FLOAT; break;
307 case 2:
308 format = FMT_32_32_FLOAT; break;
309 case 3:
310 format = FMT_32_32_32_FLOAT; break;
311 case 4:
312 format = FMT_32_32_32_32_FLOAT; break;
313 default:
314 break;
315 }
316 break;
317 default:
318 ;
319 //GL_ASSERT_NO_CASE();
320 }
321
322 if(NULL != pClient_size)
323 {
324 *pClient_size = uiElemSize * nChannels;
325 }
326
327 return(format);
328 }
329
330 unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm)
331 {
332 if(pAsm->D.dst.op3)
333 {
334 return 3;
335 }
336
337 switch (pAsm->D.dst.opcode)
338 {
339 case SQ_OP2_INST_ADD:
340 case SQ_OP2_INST_KILLGT:
341 case SQ_OP2_INST_MUL:
342 case SQ_OP2_INST_MAX:
343 case SQ_OP2_INST_MIN:
344 //case SQ_OP2_INST_MAX_DX10:
345 //case SQ_OP2_INST_MIN_DX10:
346 case SQ_OP2_INST_SETGT:
347 case SQ_OP2_INST_SETGE:
348 case SQ_OP2_INST_PRED_SETE:
349 case SQ_OP2_INST_PRED_SETGT:
350 case SQ_OP2_INST_PRED_SETGE:
351 case SQ_OP2_INST_PRED_SETNE:
352 case SQ_OP2_INST_DOT4:
353 case SQ_OP2_INST_DOT4_IEEE:
354 case SQ_OP2_INST_CUBE:
355 return 2;
356
357 case SQ_OP2_INST_MOV:
358 case SQ_OP2_INST_MOVA_FLOOR:
359 case SQ_OP2_INST_FRACT:
360 case SQ_OP2_INST_FLOOR:
361 case SQ_OP2_INST_EXP_IEEE:
362 case SQ_OP2_INST_LOG_CLAMPED:
363 case SQ_OP2_INST_LOG_IEEE:
364 case SQ_OP2_INST_RECIP_IEEE:
365 case SQ_OP2_INST_RECIPSQRT_IEEE:
366 case SQ_OP2_INST_FLT_TO_INT:
367 case SQ_OP2_INST_SIN:
368 case SQ_OP2_INST_COS:
369 return 1;
370
371 default: radeon_error(
372 "Need instruction operand number for %x.\n", pAsm->D.dst.opcode);
373 };
374
375 return 3;
376 }
377
378 int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt, r700_AssemblerBase* pAsm, R700_Shader* pShader)
379 {
380 GLuint i;
381
382 Init_R700_Shader(pShader);
383 pAsm->pR700Shader = pShader;
384 pAsm->currentShaderType = spt;
385
386 pAsm->cf_last_export_ptr = NULL;
387
388 pAsm->cf_current_export_clause_ptr = NULL;
389 pAsm->cf_current_alu_clause_ptr = NULL;
390 pAsm->cf_current_tex_clause_ptr = NULL;
391 pAsm->cf_current_vtx_clause_ptr = NULL;
392 pAsm->cf_current_cf_clause_ptr = NULL;
393
394 // No clause has been created yet
395 pAsm->cf_current_clause_type = CF_EMPTY_CLAUSE;
396
397 pAsm->number_of_colorandz_exports = 0;
398 pAsm->number_of_exports = 0;
399 pAsm->number_of_export_opcodes = 0;
400
401
402 pAsm->D.bits = 0;
403 pAsm->S[0].bits = 0;
404 pAsm->S[1].bits = 0;
405 pAsm->S[2].bits = 0;
406
407 pAsm->uLastPosUpdate = 0;
408
409 *(BITS *) &pAsm->fp_stOutFmt0 = 0;
410
411 pAsm->uIIns = 0;
412 pAsm->uOIns = 0;
413 pAsm->number_used_registers = 0;
414 pAsm->uUsedConsts = 256;
415
416
417 // Fragment programs
418 pAsm->uBoolConsts = 0;
419 pAsm->uIntConsts = 0;
420 pAsm->uInsts = 0;
421 pAsm->uConsts = 0;
422
423 pAsm->FCSP = 0;
424 pAsm->fc_stack[0].type = FC_NONE;
425
426 pAsm->branch_depth = 0;
427 pAsm->max_branch_depth = 0;
428
429 pAsm->aArgSubst[0] =
430 pAsm->aArgSubst[1] =
431 pAsm->aArgSubst[2] =
432 pAsm->aArgSubst[3] = (-1);
433
434 pAsm->uOutputs = 0;
435
436 for (i=0; i<NUMBER_OF_OUTPUT_COLORS; i++)
437 {
438 pAsm->color_export_register_number[i] = (-1);
439 }
440
441
442 pAsm->depth_export_register_number = (-1);
443 pAsm->stencil_export_register_number = (-1);
444 pAsm->coverage_to_mask_export_register_number = (-1);
445 pAsm->mask_export_register_number = (-1);
446
447 pAsm->starting_export_register_number = 0;
448 pAsm->starting_vfetch_register_number = 0;
449 pAsm->starting_temp_register_number = 0;
450 pAsm->uFirstHelpReg = 0;
451
452
453 pAsm->input_position_is_used = GL_FALSE;
454 pAsm->input_normal_is_used = GL_FALSE;
455
456
457 for (i=0; i<NUMBER_OF_INPUT_COLORS; i++)
458 {
459 pAsm->input_color_is_used[ i ] = GL_FALSE;
460 }
461
462 for (i=0; i<NUMBER_OF_TEXTURE_UNITS; i++)
463 {
464 pAsm->input_texture_unit_is_used[ i ] = GL_FALSE;
465 }
466
467 for (i=0; i<VERT_ATTRIB_MAX; i++)
468 {
469 pAsm->vfetch_instruction_ptr_array[ i ] = NULL;
470 }
471
472 pAsm->number_of_inputs = 0;
473
474 pAsm->is_tex = GL_FALSE;
475 pAsm->need_tex_barrier = GL_FALSE;
476
477 return 0;
478 }
479
480 GLboolean IsTex(gl_inst_opcode Opcode)
481 {
482 if( (OPCODE_TEX==Opcode) || (OPCODE_TXP==Opcode) || (OPCODE_TXB==Opcode) )
483 {
484 return GL_TRUE;
485 }
486 return GL_FALSE;
487 }
488
489 GLboolean IsAlu(gl_inst_opcode Opcode)
490 {
491 //TODO : more for fc and ex for higher spec.
492 if( IsTex(Opcode) )
493 {
494 return GL_FALSE;
495 }
496 return GL_TRUE;
497 }
498
499 int check_current_clause(r700_AssemblerBase* pAsm,
500 CF_CLAUSE_TYPE new_clause_type)
501 {
502 if (pAsm->cf_current_clause_type != new_clause_type)
503 { //Close last open clause
504 switch (pAsm->cf_current_clause_type)
505 {
506 case CF_ALU_CLAUSE:
507 if ( pAsm->cf_current_alu_clause_ptr != NULL)
508 {
509 pAsm->cf_current_alu_clause_ptr = NULL;
510 }
511 break;
512 case CF_VTX_CLAUSE:
513 if ( pAsm->cf_current_vtx_clause_ptr != NULL)
514 {
515 pAsm->cf_current_vtx_clause_ptr = NULL;
516 }
517 break;
518 case CF_TEX_CLAUSE:
519 if ( pAsm->cf_current_tex_clause_ptr != NULL)
520 {
521 pAsm->cf_current_tex_clause_ptr = NULL;
522 }
523 break;
524 case CF_EXPORT_CLAUSE:
525 if ( pAsm->cf_current_export_clause_ptr != NULL)
526 {
527 pAsm->cf_current_export_clause_ptr = NULL;
528 }
529 break;
530 case CF_OTHER_CLAUSE:
531 if ( pAsm->cf_current_cf_clause_ptr != NULL)
532 {
533 pAsm->cf_current_cf_clause_ptr = NULL;
534 }
535 break;
536 case CF_EMPTY_CLAUSE:
537 break;
538 default:
539 radeon_error(
540 "Unknown CF_CLAUSE_TYPE (%d) in check_current_clause. \n", (int) new_clause_type);
541 return GL_FALSE;
542 }
543
544 pAsm->cf_current_clause_type = CF_EMPTY_CLAUSE;
545
546 // Create new clause
547 switch (new_clause_type)
548 {
549 case CF_ALU_CLAUSE:
550 pAsm->cf_current_clause_type = CF_ALU_CLAUSE;
551 break;
552 case CF_VTX_CLAUSE:
553 pAsm->cf_current_clause_type = CF_VTX_CLAUSE;
554 break;
555 case CF_TEX_CLAUSE:
556 pAsm->cf_current_clause_type = CF_TEX_CLAUSE;
557 break;
558 case CF_EXPORT_CLAUSE:
559 {
560 R700ControlFlowSXClause* pR700ControlFlowSXClause
561 = (R700ControlFlowSXClause*) CALLOC_STRUCT(R700ControlFlowSXClause);
562
563 // Add new export instruction to control flow program
564 if (pR700ControlFlowSXClause != 0)
565 {
566 pAsm->cf_current_export_clause_ptr = pR700ControlFlowSXClause;
567 Init_R700ControlFlowSXClause(pR700ControlFlowSXClause);
568 AddCFInstruction( pAsm->pR700Shader,
569 (R700ControlFlowInstruction *)pR700ControlFlowSXClause );
570 }
571 else
572 {
573 radeon_error(
574 "Error allocating new EXPORT CF instruction in check_current_clause. \n");
575 return GL_FALSE;
576 }
577 pAsm->cf_current_clause_type = CF_EXPORT_CLAUSE;
578 }
579 break;
580 case CF_EMPTY_CLAUSE:
581 break;
582 case CF_OTHER_CLAUSE:
583 pAsm->cf_current_clause_type = CF_OTHER_CLAUSE;
584 break;
585 default:
586 radeon_error(
587 "Unknown CF_CLAUSE_TYPE (%d) in check_current_clause. \n", (int) new_clause_type);
588 return GL_FALSE;
589 }
590 }
591
592 return GL_TRUE;
593 }
594
595 GLboolean add_vfetch_instruction(r700_AssemblerBase* pAsm,
596 R700VertexInstruction* vertex_instruction_ptr)
597 {
598 if( GL_FALSE == check_current_clause(pAsm, CF_VTX_CLAUSE) )
599 {
600 return GL_FALSE;
601 }
602
603 if( pAsm->cf_current_vtx_clause_ptr == NULL ||
604 ( (pAsm->cf_current_vtx_clause_ptr != NULL) &&
605 (pAsm->cf_current_vtx_clause_ptr->m_Word1.f.count >= GetCFMaxInstructions(pAsm->cf_current_vtx_clause_ptr->m_ShaderInstType)-1)
606 ) )
607 {
608 // Create new Vfetch control flow instruction for this new clause
609 pAsm->cf_current_vtx_clause_ptr = (R700ControlFlowGenericClause*) CALLOC_STRUCT(R700ControlFlowGenericClause);
610
611 if (pAsm->cf_current_vtx_clause_ptr != NULL)
612 {
613 Init_R700ControlFlowGenericClause(pAsm->cf_current_vtx_clause_ptr);
614 AddCFInstruction( pAsm->pR700Shader,
615 (R700ControlFlowInstruction *)pAsm->cf_current_vtx_clause_ptr );
616 }
617 else
618 {
619 radeon_error("Could not allocate a new VFetch CF instruction.\n");
620 return GL_FALSE;
621 }
622
623 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.pop_count = 0x0;
624 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.cf_const = 0x0;
625 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
626 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.count = 0x0;
627 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.end_of_program = 0x0;
628 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
629 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_VTX;
630 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
631 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.barrier = 0x1;
632
633 LinkVertexInstruction(pAsm->cf_current_vtx_clause_ptr, vertex_instruction_ptr );
634 }
635 else
636 {
637 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.count++;
638 }
639
640 AddVTXInstruction(pAsm->pR700Shader, vertex_instruction_ptr);
641
642 return GL_TRUE;
643 }
644
645 GLboolean add_tex_instruction(r700_AssemblerBase* pAsm,
646 R700TextureInstruction* tex_instruction_ptr)
647 {
648 if ( GL_FALSE == check_current_clause(pAsm, CF_TEX_CLAUSE) )
649 {
650 return GL_FALSE;
651 }
652
653 if ( pAsm->cf_current_tex_clause_ptr == NULL ||
654 ( (pAsm->cf_current_tex_clause_ptr != NULL) &&
655 (pAsm->cf_current_tex_clause_ptr->m_Word1.f.count >= GetCFMaxInstructions(pAsm->cf_current_tex_clause_ptr->m_ShaderInstType)-1)
656 ) )
657 {
658 // new tex cf instruction for this new clause
659 pAsm->cf_current_tex_clause_ptr = (R700ControlFlowGenericClause*) CALLOC_STRUCT(R700ControlFlowGenericClause);
660
661 if (pAsm->cf_current_tex_clause_ptr != NULL)
662 {
663 Init_R700ControlFlowGenericClause(pAsm->cf_current_tex_clause_ptr);
664 AddCFInstruction( pAsm->pR700Shader,
665 (R700ControlFlowInstruction *)pAsm->cf_current_tex_clause_ptr );
666 }
667 else
668 {
669 radeon_error("Could not allocate a new TEX CF instruction.\n");
670 return GL_FALSE;
671 }
672
673 pAsm->cf_current_tex_clause_ptr->m_Word1.f.pop_count = 0x0;
674 pAsm->cf_current_tex_clause_ptr->m_Word1.f.cf_const = 0x0;
675 pAsm->cf_current_tex_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
676
677 pAsm->cf_current_tex_clause_ptr->m_Word1.f.end_of_program = 0x0;
678 pAsm->cf_current_tex_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
679 pAsm->cf_current_tex_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_TEX;
680 pAsm->cf_current_tex_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
681 pAsm->cf_current_tex_clause_ptr->m_Word1.f.barrier = 0x0; //0x1;
682 }
683 else
684 {
685 pAsm->cf_current_tex_clause_ptr->m_Word1.f.count++;
686 }
687
688 // If this clause constains any TEX instruction that is dependent on a previous instruction,
689 // set the barrier bit
690 if( pAsm->pInstDeps[pAsm->uiCurInst].nDstDep > (-1) || pAsm->need_tex_barrier == GL_TRUE )
691 {
692 pAsm->cf_current_tex_clause_ptr->m_Word1.f.barrier = 0x1;
693 }
694
695 if(NULL == pAsm->cf_current_tex_clause_ptr->m_pLinkedTEXInstruction)
696 {
697 pAsm->cf_current_tex_clause_ptr->m_pLinkedTEXInstruction = tex_instruction_ptr;
698 tex_instruction_ptr->m_pLinkedGenericClause = pAsm->cf_current_tex_clause_ptr;
699 }
700
701 AddTEXInstruction(pAsm->pR700Shader, tex_instruction_ptr);
702
703 return GL_TRUE;
704 }
705
706 GLboolean assemble_vfetch_instruction(r700_AssemblerBase* pAsm,
707 GLuint gl_client_id,
708 GLuint destination_register,
709 GLuint number_of_elements,
710 GLenum dataElementType,
711 VTX_FETCH_METHOD* pFetchMethod)
712 {
713 GLuint client_size_inbyte;
714 GLuint data_format;
715 GLuint mega_fetch_count;
716 GLuint is_mega_fetch_flag;
717
718 R700VertexGenericFetch* vfetch_instruction_ptr;
719 R700VertexGenericFetch* assembled_vfetch_instruction_ptr = pAsm->vfetch_instruction_ptr_array[ gl_client_id ];
720
721 if (assembled_vfetch_instruction_ptr == NULL)
722 {
723 vfetch_instruction_ptr = (R700VertexGenericFetch*) CALLOC_STRUCT(R700VertexGenericFetch);
724 if (vfetch_instruction_ptr == NULL)
725 {
726 return GL_FALSE;
727 }
728 Init_R700VertexGenericFetch(vfetch_instruction_ptr);
729 }
730 else
731 {
732 vfetch_instruction_ptr = assembled_vfetch_instruction_ptr;
733 }
734
735 data_format = GetSurfaceFormat(dataElementType, number_of_elements, &client_size_inbyte);
736
737 if(GL_TRUE == pFetchMethod->bEnableMini) //More conditions here
738 {
739 //TODO : mini fetch
740 }
741 else
742 {
743 mega_fetch_count = MEGA_FETCH_BYTES - 1;
744 is_mega_fetch_flag = 0x1;
745 pFetchMethod->mega_fetch_remainder = MEGA_FETCH_BYTES - client_size_inbyte;
746 }
747
748 vfetch_instruction_ptr->m_Word0.f.vtx_inst = SQ_VTX_INST_FETCH;
749 vfetch_instruction_ptr->m_Word0.f.fetch_type = SQ_VTX_FETCH_VERTEX_DATA;
750 vfetch_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0;
751
752 vfetch_instruction_ptr->m_Word0.f.buffer_id = gl_client_id;
753 vfetch_instruction_ptr->m_Word0.f.src_gpr = 0x0;
754 vfetch_instruction_ptr->m_Word0.f.src_rel = SQ_ABSOLUTE;
755 vfetch_instruction_ptr->m_Word0.f.src_sel_x = SQ_SEL_X;
756 vfetch_instruction_ptr->m_Word0.f.mega_fetch_count = mega_fetch_count;
757
758 vfetch_instruction_ptr->m_Word1.f.dst_sel_x = (number_of_elements < 1) ? SQ_SEL_0 : SQ_SEL_X;
759 vfetch_instruction_ptr->m_Word1.f.dst_sel_y = (number_of_elements < 2) ? SQ_SEL_0 : SQ_SEL_Y;
760 vfetch_instruction_ptr->m_Word1.f.dst_sel_z = (number_of_elements < 3) ? SQ_SEL_0 : SQ_SEL_Z;
761 vfetch_instruction_ptr->m_Word1.f.dst_sel_w = (number_of_elements < 4) ? SQ_SEL_1 : SQ_SEL_W;
762
763 vfetch_instruction_ptr->m_Word1.f.use_const_fields = 1;
764
765 // Destination register
766 vfetch_instruction_ptr->m_Word1_GPR.f.dst_gpr = destination_register;
767 vfetch_instruction_ptr->m_Word1_GPR.f.dst_rel = SQ_ABSOLUTE;
768
769 vfetch_instruction_ptr->m_Word2.f.offset = 0;
770 vfetch_instruction_ptr->m_Word2.f.const_buf_no_stride = 0x0;
771
772 vfetch_instruction_ptr->m_Word2.f.mega_fetch = is_mega_fetch_flag;
773
774 if (assembled_vfetch_instruction_ptr == NULL)
775 {
776 if ( GL_FALSE == add_vfetch_instruction(pAsm, (R700VertexInstruction *)vfetch_instruction_ptr) )
777 {
778 return GL_FALSE;
779 }
780
781 if (pAsm->vfetch_instruction_ptr_array[ gl_client_id ] != NULL)
782 {
783 return GL_FALSE;
784 }
785 else
786 {
787 pAsm->vfetch_instruction_ptr_array[ gl_client_id ] = vfetch_instruction_ptr;
788 }
789 }
790
791 return GL_TRUE;
792 }
793
794 GLuint gethelpr(r700_AssemblerBase* pAsm)
795 {
796 GLuint r = pAsm->uHelpReg;
797 pAsm->uHelpReg++;
798 if (pAsm->uHelpReg > pAsm->number_used_registers)
799 {
800 pAsm->number_used_registers = pAsm->uHelpReg;
801 }
802 return r;
803 }
804 void resethelpr(r700_AssemblerBase* pAsm)
805 {
806 pAsm->uHelpReg = pAsm->uFirstHelpReg;
807 }
808
809 void checkop_init(r700_AssemblerBase* pAsm)
810 {
811 resethelpr(pAsm);
812 pAsm->aArgSubst[0] =
813 pAsm->aArgSubst[1] =
814 pAsm->aArgSubst[2] =
815 pAsm->aArgSubst[3] = -1;
816 }
817
818 GLboolean mov_temp(r700_AssemblerBase* pAsm, int src)
819 {
820 GLuint tmp = gethelpr(pAsm);
821
822 //mov src to temp helper gpr.
823 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
824
825 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
826
827 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
828 pAsm->D.dst.reg = tmp;
829
830 nomask_PVSDST(&(pAsm->D.dst));
831
832 if( GL_FALSE == assemble_src(pAsm, src, 0) )
833 {
834 return GL_FALSE;
835 }
836
837 noswizzle_PVSSRC(&(pAsm->S[0].src));
838 noneg_PVSSRC(&(pAsm->S[0].src));
839
840 if( GL_FALSE == next_ins(pAsm) )
841 {
842 return GL_FALSE;
843 }
844
845 pAsm->aArgSubst[1 + src] = tmp;
846
847 return GL_TRUE;
848 }
849
850 GLboolean checkop1(r700_AssemblerBase* pAsm)
851 {
852 checkop_init(pAsm);
853 return GL_TRUE;
854 }
855
856 GLboolean checkop2(r700_AssemblerBase* pAsm)
857 {
858 GLboolean bSrcConst[2];
859 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
860
861 checkop_init(pAsm);
862
863 if( (pILInst->SrcReg[0].File == PROGRAM_CONSTANT) ||
864 (pILInst->SrcReg[0].File == PROGRAM_LOCAL_PARAM) ||
865 (pILInst->SrcReg[0].File == PROGRAM_ENV_PARAM) ||
866 (pILInst->SrcReg[0].File == PROGRAM_STATE_VAR) )
867 {
868 bSrcConst[0] = GL_TRUE;
869 }
870 else
871 {
872 bSrcConst[0] = GL_FALSE;
873 }
874 if( (pILInst->SrcReg[1].File == PROGRAM_CONSTANT) ||
875 (pILInst->SrcReg[1].File == PROGRAM_LOCAL_PARAM) ||
876 (pILInst->SrcReg[1].File == PROGRAM_ENV_PARAM) ||
877 (pILInst->SrcReg[1].File == PROGRAM_STATE_VAR) )
878 {
879 bSrcConst[1] = GL_TRUE;
880 }
881 else
882 {
883 bSrcConst[1] = GL_FALSE;
884 }
885
886 if( (bSrcConst[0] == GL_TRUE) && (bSrcConst[1] == GL_TRUE) )
887 {
888 if(pILInst->SrcReg[0].Index != pILInst->SrcReg[1].Index)
889 {
890 if( GL_FALSE == mov_temp(pAsm, 1) )
891 {
892 return GL_FALSE;
893 }
894 }
895 }
896
897 return GL_TRUE;
898 }
899
900 GLboolean checkop3(r700_AssemblerBase* pAsm)
901 {
902 GLboolean bSrcConst[3];
903 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
904
905 checkop_init(pAsm);
906
907 if( (pILInst->SrcReg[0].File == PROGRAM_CONSTANT) ||
908 (pILInst->SrcReg[0].File == PROGRAM_LOCAL_PARAM) ||
909 (pILInst->SrcReg[0].File == PROGRAM_ENV_PARAM) ||
910 (pILInst->SrcReg[0].File == PROGRAM_STATE_VAR) )
911 {
912 bSrcConst[0] = GL_TRUE;
913 }
914 else
915 {
916 bSrcConst[0] = GL_FALSE;
917 }
918 if( (pILInst->SrcReg[1].File == PROGRAM_CONSTANT) ||
919 (pILInst->SrcReg[1].File == PROGRAM_LOCAL_PARAM) ||
920 (pILInst->SrcReg[1].File == PROGRAM_ENV_PARAM) ||
921 (pILInst->SrcReg[1].File == PROGRAM_STATE_VAR) )
922 {
923 bSrcConst[1] = GL_TRUE;
924 }
925 else
926 {
927 bSrcConst[1] = GL_FALSE;
928 }
929 if( (pILInst->SrcReg[2].File == PROGRAM_CONSTANT) ||
930 (pILInst->SrcReg[2].File == PROGRAM_LOCAL_PARAM) ||
931 (pILInst->SrcReg[2].File == PROGRAM_ENV_PARAM) ||
932 (pILInst->SrcReg[2].File == PROGRAM_STATE_VAR) )
933 {
934 bSrcConst[2] = GL_TRUE;
935 }
936 else
937 {
938 bSrcConst[2] = GL_FALSE;
939 }
940
941 if( (GL_TRUE == bSrcConst[0]) &&
942 (GL_TRUE == bSrcConst[1]) &&
943 (GL_TRUE == bSrcConst[2]) )
944 {
945 if( GL_FALSE == mov_temp(pAsm, 1) )
946 {
947 return GL_FALSE;
948 }
949 if( GL_FALSE == mov_temp(pAsm, 2) )
950 {
951 return GL_FALSE;
952 }
953
954 return GL_TRUE;
955 }
956 else if( (GL_TRUE == bSrcConst[0]) &&
957 (GL_TRUE == bSrcConst[1]) )
958 {
959 if(pILInst->SrcReg[0].Index != pILInst->SrcReg[1].Index)
960 {
961 if( GL_FALSE == mov_temp(pAsm, 1) )
962 {
963 return 1;
964 }
965 }
966
967 return GL_TRUE;
968 }
969 else if ( (GL_TRUE == bSrcConst[0]) &&
970 (GL_TRUE == bSrcConst[2]) )
971 {
972 if(pILInst->SrcReg[0].Index != pILInst->SrcReg[2].Index)
973 {
974 if( GL_FALSE == mov_temp(pAsm, 2) )
975 {
976 return GL_FALSE;
977 }
978 }
979
980 return GL_TRUE;
981 }
982 else if( (GL_TRUE == bSrcConst[1]) &&
983 (GL_TRUE == bSrcConst[2]) )
984 {
985 if(pILInst->SrcReg[1].Index != pILInst->SrcReg[2].Index)
986 {
987 if( GL_FALSE == mov_temp(pAsm, 2) )
988 {
989 return GL_FALSE;
990 }
991 }
992
993 return GL_TRUE;
994 }
995
996 return GL_TRUE;
997 }
998
999 GLboolean assemble_src(r700_AssemblerBase *pAsm,
1000 int src,
1001 int fld)
1002 {
1003 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1004
1005 if (fld == -1)
1006 {
1007 fld = src;
1008 }
1009
1010 if(pAsm->aArgSubst[1+src] >= 0)
1011 {
1012 setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE);
1013 pAsm->S[fld].src.rtype = SRC_REG_TEMPORARY;
1014 pAsm->S[fld].src.reg = pAsm->aArgSubst[1+src];
1015 }
1016 else
1017 {
1018 switch (pILInst->SrcReg[src].File)
1019 {
1020 case PROGRAM_TEMPORARY:
1021 setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE);
1022 pAsm->S[fld].src.rtype = SRC_REG_TEMPORARY;
1023 pAsm->S[fld].src.reg = pILInst->SrcReg[src].Index + pAsm->starting_temp_register_number;
1024 break;
1025 case PROGRAM_CONSTANT:
1026 case PROGRAM_LOCAL_PARAM:
1027 case PROGRAM_ENV_PARAM:
1028 case PROGRAM_STATE_VAR:
1029 if (1 == pILInst->SrcReg[src].RelAddr)
1030 {
1031 setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_RELATIVE_A0);
1032 }
1033 else
1034 {
1035 setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE);
1036 }
1037
1038 pAsm->S[fld].src.rtype = SRC_REG_CONSTANT;
1039 pAsm->S[fld].src.reg = pILInst->SrcReg[src].Index;
1040 break;
1041 case PROGRAM_INPUT:
1042 setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE);
1043 pAsm->S[fld].src.rtype = SRC_REG_INPUT;
1044 switch (pAsm->currentShaderType)
1045 {
1046 case SPT_FP:
1047 pAsm->S[fld].src.reg = pAsm->uiFP_AttributeMap[pILInst->SrcReg[src].Index];
1048 break;
1049 case SPT_VP:
1050 pAsm->S[fld].src.reg = pAsm->ucVP_AttributeMap[pILInst->SrcReg[src].Index];
1051 break;
1052 }
1053 break;
1054 default:
1055 radeon_error("Invalid source argument type\n");
1056 return GL_FALSE;
1057 }
1058 }
1059
1060 pAsm->S[fld].src.swizzlex = pILInst->SrcReg[src].Swizzle & 0x7;
1061 pAsm->S[fld].src.swizzley = (pILInst->SrcReg[src].Swizzle >> 3) & 0x7;
1062 pAsm->S[fld].src.swizzlez = (pILInst->SrcReg[src].Swizzle >> 6) & 0x7;
1063 pAsm->S[fld].src.swizzlew = (pILInst->SrcReg[src].Swizzle >> 9) & 0x7;
1064
1065 pAsm->S[fld].src.negx = pILInst->SrcReg[src].Negate & 0x1;
1066 pAsm->S[fld].src.negy = (pILInst->SrcReg[src].Negate >> 1) & 0x1;
1067 pAsm->S[fld].src.negz = (pILInst->SrcReg[src].Negate >> 2) & 0x1;
1068 pAsm->S[fld].src.negw = (pILInst->SrcReg[src].Negate >> 3) & 0x1;
1069
1070 return GL_TRUE;
1071 }
1072
1073 GLboolean assemble_dst(r700_AssemblerBase *pAsm)
1074 {
1075 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1076 switch (pILInst->DstReg.File)
1077 {
1078 case PROGRAM_TEMPORARY:
1079 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1080 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
1081 pAsm->D.dst.reg = pILInst->DstReg.Index + pAsm->starting_temp_register_number;
1082 break;
1083 case PROGRAM_ADDRESS:
1084 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1085 pAsm->D.dst.rtype = DST_REG_A0;
1086 pAsm->D.dst.reg = 0;
1087 break;
1088 case PROGRAM_OUTPUT:
1089 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1090 pAsm->D.dst.rtype = DST_REG_OUT;
1091 switch (pAsm->currentShaderType)
1092 {
1093 case SPT_FP:
1094 pAsm->D.dst.reg = pAsm->uiFP_OutputMap[pILInst->DstReg.Index];
1095 break;
1096 case SPT_VP:
1097 pAsm->D.dst.reg = pAsm->ucVP_OutputMap[pILInst->DstReg.Index];
1098 break;
1099 }
1100 break;
1101 default:
1102 radeon_error("Invalid destination output argument type\n");
1103 return GL_FALSE;
1104 }
1105
1106 pAsm->D.dst.writex = pILInst->DstReg.WriteMask & 0x1;
1107 pAsm->D.dst.writey = (pILInst->DstReg.WriteMask >> 1) & 0x1;
1108 pAsm->D.dst.writez = (pILInst->DstReg.WriteMask >> 2) & 0x1;
1109 pAsm->D.dst.writew = (pILInst->DstReg.WriteMask >> 3) & 0x1;
1110
1111 return GL_TRUE;
1112 }
1113
1114 GLboolean tex_dst(r700_AssemblerBase *pAsm)
1115 {
1116 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1117
1118 if(PROGRAM_TEMPORARY == pILInst->DstReg.File)
1119 {
1120 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
1121 pAsm->D.dst.reg = pAsm->pILInst[pAsm->uiCurInst].DstReg.Index + pAsm->starting_temp_register_number;
1122
1123 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1124 }
1125 else if(PROGRAM_OUTPUT == pILInst->DstReg.File)
1126 {
1127 pAsm->D.dst.rtype = DST_REG_OUT;
1128 switch (pAsm->currentShaderType)
1129 {
1130 case SPT_FP:
1131 pAsm->D.dst.reg = pAsm->uiFP_OutputMap[pILInst->DstReg.Index];
1132 break;
1133 case SPT_VP:
1134 pAsm->D.dst.reg = pAsm->ucVP_OutputMap[pILInst->DstReg.Index];
1135 break;
1136 }
1137
1138 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1139 }
1140 else
1141 {
1142 radeon_error("Invalid destination output argument type\n");
1143 return GL_FALSE;
1144 }
1145
1146 pAsm->D.dst.writex = pILInst->DstReg.WriteMask & 0x1;
1147 pAsm->D.dst.writey = (pILInst->DstReg.WriteMask >> 1) & 0x1;
1148 pAsm->D.dst.writez = (pILInst->DstReg.WriteMask >> 2) & 0x1;
1149 pAsm->D.dst.writew = (pILInst->DstReg.WriteMask >> 3) & 0x1;
1150
1151 return GL_TRUE;
1152 }
1153
1154 GLboolean tex_src(r700_AssemblerBase *pAsm)
1155 {
1156 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1157
1158 GLboolean bValidTexCoord = GL_FALSE;
1159
1160 if(pAsm->aArgSubst[1] >= 0)
1161 {
1162 bValidTexCoord = GL_TRUE;
1163 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
1164 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
1165 pAsm->S[0].src.reg = pAsm->aArgSubst[1];
1166 }
1167 else
1168 {
1169 switch (pILInst->SrcReg[0].File) {
1170 case PROGRAM_CONSTANT:
1171 case PROGRAM_LOCAL_PARAM:
1172 case PROGRAM_ENV_PARAM:
1173 case PROGRAM_STATE_VAR:
1174 break;
1175 case PROGRAM_TEMPORARY:
1176 bValidTexCoord = GL_TRUE;
1177 pAsm->S[0].src.reg = pILInst->SrcReg[0].Index +
1178 pAsm->starting_temp_register_number;
1179 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
1180 break;
1181 case PROGRAM_INPUT:
1182 switch (pILInst->SrcReg[0].Index)
1183 {
1184 case FRAG_ATTRIB_WPOS:
1185 case FRAG_ATTRIB_COL0:
1186 case FRAG_ATTRIB_COL1:
1187 case FRAG_ATTRIB_FOGC:
1188 case FRAG_ATTRIB_TEX0:
1189 case FRAG_ATTRIB_TEX1:
1190 case FRAG_ATTRIB_TEX2:
1191 case FRAG_ATTRIB_TEX3:
1192 case FRAG_ATTRIB_TEX4:
1193 case FRAG_ATTRIB_TEX5:
1194 case FRAG_ATTRIB_TEX6:
1195 case FRAG_ATTRIB_TEX7:
1196 bValidTexCoord = GL_TRUE;
1197 pAsm->S[0].src.reg =
1198 pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index];
1199 pAsm->S[0].src.rtype = SRC_REG_INPUT;
1200 break;
1201 case FRAG_ATTRIB_FACE:
1202 fprintf(stderr, "FRAG_ATTRIB_FACE unsupported\n");
1203 break;
1204 case FRAG_ATTRIB_PNTC:
1205 fprintf(stderr, "FRAG_ATTRIB_PNTC unsupported\n");
1206 break;
1207 case FRAG_ATTRIB_VAR0:
1208 fprintf(stderr, "FRAG_ATTRIB_VAR0 unsupported\n");
1209 break;
1210 }
1211 break;
1212 }
1213 }
1214
1215 if(GL_TRUE == bValidTexCoord)
1216 {
1217 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
1218 }
1219 else
1220 {
1221 radeon_error("Invalid source texcoord for TEX instruction\n");
1222 return GL_FALSE;
1223 }
1224
1225 pAsm->S[0].src.swizzlex = pILInst->SrcReg[0].Swizzle & 0x7;
1226 pAsm->S[0].src.swizzley = (pILInst->SrcReg[0].Swizzle >> 3) & 0x7;
1227 pAsm->S[0].src.swizzlez = (pILInst->SrcReg[0].Swizzle >> 6) & 0x7;
1228 pAsm->S[0].src.swizzlew = (pILInst->SrcReg[0].Swizzle >> 9) & 0x7;
1229
1230 pAsm->S[0].src.negx = pILInst->SrcReg[0].Negate & 0x1;
1231 pAsm->S[0].src.negy = (pILInst->SrcReg[0].Negate >> 1) & 0x1;
1232 pAsm->S[0].src.negz = (pILInst->SrcReg[0].Negate >> 2) & 0x1;
1233 pAsm->S[0].src.negw = (pILInst->SrcReg[0].Negate >> 3) & 0x1;
1234
1235 return GL_TRUE;
1236 }
1237
1238 GLboolean assemble_tex_instruction(r700_AssemblerBase *pAsm, GLboolean normalized)
1239 {
1240 PVSSRC * texture_coordinate_source;
1241 PVSSRC * texture_unit_source;
1242
1243 R700TextureInstruction* tex_instruction_ptr = (R700TextureInstruction*) CALLOC_STRUCT(R700TextureInstruction);
1244 if (tex_instruction_ptr == NULL)
1245 {
1246 return GL_FALSE;
1247 }
1248 Init_R700TextureInstruction(tex_instruction_ptr);
1249
1250 texture_coordinate_source = &(pAsm->S[0].src);
1251 texture_unit_source = &(pAsm->S[1].src);
1252
1253 tex_instruction_ptr->m_Word0.f.tex_inst = pAsm->D.dst.opcode;
1254 tex_instruction_ptr->m_Word0.f.bc_frac_mode = 0x0;
1255 tex_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0;
1256
1257 tex_instruction_ptr->m_Word0.f.resource_id = texture_unit_source->reg;
1258
1259 tex_instruction_ptr->m_Word1.f.lod_bias = 0x0;
1260 if (normalized) {
1261 tex_instruction_ptr->m_Word1.f.coord_type_x = SQ_TEX_NORMALIZED;
1262 tex_instruction_ptr->m_Word1.f.coord_type_y = SQ_TEX_NORMALIZED;
1263 tex_instruction_ptr->m_Word1.f.coord_type_z = SQ_TEX_NORMALIZED;
1264 tex_instruction_ptr->m_Word1.f.coord_type_w = SQ_TEX_NORMALIZED;
1265 } else {
1266 /* XXX: UNNORMALIZED tex coords have limited wrap modes */
1267 tex_instruction_ptr->m_Word1.f.coord_type_x = SQ_TEX_UNNORMALIZED;
1268 tex_instruction_ptr->m_Word1.f.coord_type_y = SQ_TEX_UNNORMALIZED;
1269 tex_instruction_ptr->m_Word1.f.coord_type_z = SQ_TEX_UNNORMALIZED;
1270 tex_instruction_ptr->m_Word1.f.coord_type_w = SQ_TEX_UNNORMALIZED;
1271 }
1272
1273 tex_instruction_ptr->m_Word2.f.offset_x = 0x0;
1274 tex_instruction_ptr->m_Word2.f.offset_y = 0x0;
1275 tex_instruction_ptr->m_Word2.f.offset_z = 0x0;
1276
1277 tex_instruction_ptr->m_Word2.f.sampler_id = texture_unit_source->reg;
1278
1279 // dst
1280 if ( (pAsm->D.dst.rtype == DST_REG_TEMPORARY) ||
1281 (pAsm->D.dst.rtype == DST_REG_OUT) )
1282 {
1283 tex_instruction_ptr->m_Word0.f.src_gpr = texture_coordinate_source->reg;
1284 tex_instruction_ptr->m_Word0.f.src_rel = SQ_ABSOLUTE;
1285
1286 tex_instruction_ptr->m_Word1.f.dst_gpr = pAsm->D.dst.reg;
1287 tex_instruction_ptr->m_Word1.f.dst_rel = SQ_ABSOLUTE;
1288
1289 tex_instruction_ptr->m_Word1.f.dst_sel_x = (pAsm->D.dst.writex ? texture_unit_source->swizzlex : SQ_SEL_MASK);
1290 tex_instruction_ptr->m_Word1.f.dst_sel_y = (pAsm->D.dst.writey ? texture_unit_source->swizzley : SQ_SEL_MASK);
1291 tex_instruction_ptr->m_Word1.f.dst_sel_z = (pAsm->D.dst.writez ? texture_unit_source->swizzlez : SQ_SEL_MASK);
1292 tex_instruction_ptr->m_Word1.f.dst_sel_w = (pAsm->D.dst.writew ? texture_unit_source->swizzlew : SQ_SEL_MASK);
1293
1294
1295 tex_instruction_ptr->m_Word2.f.src_sel_x = texture_coordinate_source->swizzlex;
1296 tex_instruction_ptr->m_Word2.f.src_sel_y = texture_coordinate_source->swizzley;
1297 tex_instruction_ptr->m_Word2.f.src_sel_z = texture_coordinate_source->swizzlez;
1298 tex_instruction_ptr->m_Word2.f.src_sel_w = texture_coordinate_source->swizzlew;
1299 }
1300 else
1301 {
1302 radeon_error("Only temp destination registers supported for TEX dest regs.\n");
1303 return GL_FALSE;
1304 }
1305
1306 if( GL_FALSE == add_tex_instruction(pAsm, tex_instruction_ptr) )
1307 {
1308 return GL_FALSE;
1309 }
1310
1311 return GL_TRUE;
1312 }
1313
1314 void initialize(r700_AssemblerBase *pAsm)
1315 {
1316 GLuint cycle, component;
1317
1318 for (cycle=0; cycle<NUMBER_OF_CYCLES; cycle++)
1319 {
1320 for (component=0; component<NUMBER_OF_COMPONENTS; component++)
1321 {
1322 pAsm->hw_gpr[cycle][component] = (-1);
1323 }
1324 }
1325 for (component=0; component<NUMBER_OF_COMPONENTS; component++)
1326 {
1327 pAsm->hw_cfile_addr[component] = (-1);
1328 pAsm->hw_cfile_chan[component] = (-1);
1329 }
1330 }
1331
1332 GLboolean assemble_alu_src(R700ALUInstruction* alu_instruction_ptr,
1333 int source_index,
1334 PVSSRC* pSource,
1335 BITS scalar_channel_index)
1336 {
1337 BITS src_sel;
1338 BITS src_rel;
1339 BITS src_chan;
1340 BITS src_neg;
1341
1342 //--------------------------------------------------------------------------
1343 // Source for operands src0, src1.
1344 // Values [0,127] correspond to GPR[0..127].
1345 // Values [256,511] correspond to cfile constants c[0..255].
1346
1347 //--------------------------------------------------------------------------
1348 // Other special values are shown in the list below.
1349
1350 // 248 SQ_ALU_SRC_0: special constant 0.0.
1351 // 249 SQ_ALU_SRC_1: special constant 1.0 float.
1352
1353 // 250 SQ_ALU_SRC_1_INT: special constant 1 integer.
1354 // 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer.
1355
1356 // 252 SQ_ALU_SRC_0_5: special constant 0.5 float.
1357 // 253 SQ_ALU_SRC_LITERAL: literal constant.
1358
1359 // 254 SQ_ALU_SRC_PV: previous vector result.
1360 // 255 SQ_ALU_SRC_PS: previous scalar result.
1361 //--------------------------------------------------------------------------
1362
1363 BITS channel_swizzle;
1364 switch (scalar_channel_index)
1365 {
1366 case 0: channel_swizzle = pSource->swizzlex; break;
1367 case 1: channel_swizzle = pSource->swizzley; break;
1368 case 2: channel_swizzle = pSource->swizzlez; break;
1369 case 3: channel_swizzle = pSource->swizzlew; break;
1370 default: channel_swizzle = SQ_SEL_MASK; break;
1371 }
1372
1373 if(channel_swizzle == SQ_SEL_0)
1374 {
1375 src_sel = SQ_ALU_SRC_0;
1376 }
1377 else if (channel_swizzle == SQ_SEL_1)
1378 {
1379 src_sel = SQ_ALU_SRC_1;
1380 }
1381 else
1382 {
1383 if ( (pSource->rtype == SRC_REG_TEMPORARY) ||
1384 (pSource->rtype == SRC_REG_INPUT)
1385 )
1386 {
1387 src_sel = pSource->reg;
1388 }
1389 else if (pSource->rtype == SRC_REG_CONSTANT)
1390 {
1391 src_sel = pSource->reg + CFILE_REGISTER_OFFSET;
1392 }
1393 else
1394 {
1395 radeon_error("Source (%d) register type (%d) not one of TEMP, INPUT, or CONSTANT.\n",
1396 source_index, pSource->rtype);
1397 return GL_FALSE;
1398 }
1399 }
1400
1401 if( ADDR_ABSOLUTE == addrmode_PVSSRC(pSource) )
1402 {
1403 src_rel = SQ_ABSOLUTE;
1404 }
1405 else
1406 {
1407 src_rel = SQ_RELATIVE;
1408 }
1409
1410 switch (channel_swizzle)
1411 {
1412 case SQ_SEL_X:
1413 src_chan = SQ_CHAN_X;
1414 break;
1415 case SQ_SEL_Y:
1416 src_chan = SQ_CHAN_Y;
1417 break;
1418 case SQ_SEL_Z:
1419 src_chan = SQ_CHAN_Z;
1420 break;
1421 case SQ_SEL_W:
1422 src_chan = SQ_CHAN_W;
1423 break;
1424 case SQ_SEL_0:
1425 case SQ_SEL_1:
1426 // Does not matter since src_sel controls
1427 src_chan = SQ_CHAN_X;
1428 break;
1429 default:
1430 radeon_error("Unknown source select value (%d) in assemble_alu_src().\n", channel_swizzle);
1431 return GL_FALSE;
1432 break;
1433 }
1434
1435 switch (scalar_channel_index)
1436 {
1437 case 0: src_neg = pSource->negx; break;
1438 case 1: src_neg = pSource->negy; break;
1439 case 2: src_neg = pSource->negz; break;
1440 case 3: src_neg = pSource->negw; break;
1441 default: src_neg = 0; break;
1442 }
1443
1444 switch (source_index)
1445 {
1446 case 0:
1447 alu_instruction_ptr->m_Word0.f.src0_sel = src_sel;
1448 alu_instruction_ptr->m_Word0.f.src0_rel = src_rel;
1449 alu_instruction_ptr->m_Word0.f.src0_chan = src_chan;
1450 alu_instruction_ptr->m_Word0.f.src0_neg = src_neg;
1451 break;
1452 case 1:
1453 alu_instruction_ptr->m_Word0.f.src1_sel = src_sel;
1454 alu_instruction_ptr->m_Word0.f.src1_rel = src_rel;
1455 alu_instruction_ptr->m_Word0.f.src1_chan = src_chan;
1456 alu_instruction_ptr->m_Word0.f.src1_neg = src_neg;
1457 break;
1458 case 2:
1459 alu_instruction_ptr->m_Word1_OP3.f.src2_sel = src_sel;
1460 alu_instruction_ptr->m_Word1_OP3.f.src2_rel = src_rel;
1461 alu_instruction_ptr->m_Word1_OP3.f.src2_chan = src_chan;
1462 alu_instruction_ptr->m_Word1_OP3.f.src2_neg = src_neg;
1463 break;
1464 default:
1465 radeon_error("Only three sources allowed in ALU opcodes.\n");
1466 return GL_FALSE;
1467 break;
1468 }
1469
1470 return GL_TRUE;
1471 }
1472
1473 GLboolean add_alu_instruction(r700_AssemblerBase* pAsm,
1474 R700ALUInstruction* alu_instruction_ptr,
1475 GLuint contiguous_slots_needed)
1476 {
1477 if( GL_FALSE == check_current_clause(pAsm, CF_ALU_CLAUSE) )
1478 {
1479 return GL_FALSE;
1480 }
1481
1482 if ( pAsm->cf_current_alu_clause_ptr == NULL ||
1483 ( (pAsm->cf_current_alu_clause_ptr != NULL) &&
1484 (pAsm->cf_current_alu_clause_ptr->m_Word1.f.count >= (GetCFMaxInstructions(pAsm->cf_current_alu_clause_ptr->m_ShaderInstType)-contiguous_slots_needed-1) )
1485 ) )
1486 {
1487
1488 //new cf inst for this clause
1489 pAsm->cf_current_alu_clause_ptr = (R700ControlFlowALUClause*) CALLOC_STRUCT(R700ControlFlowALUClause);
1490
1491 // link the new cf to cf segment
1492 if(NULL != pAsm->cf_current_alu_clause_ptr)
1493 {
1494 Init_R700ControlFlowALUClause(pAsm->cf_current_alu_clause_ptr);
1495 AddCFInstruction( pAsm->pR700Shader,
1496 (R700ControlFlowInstruction *)pAsm->cf_current_alu_clause_ptr );
1497 }
1498 else
1499 {
1500 radeon_error("Could not allocate a new ALU CF instruction.\n");
1501 return GL_FALSE;
1502 }
1503
1504 pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_bank0 = 0x0;
1505 pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_bank1 = 0x0;
1506 pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_mode0 = SQ_CF_KCACHE_NOP;
1507
1508 pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_mode1 = SQ_CF_KCACHE_NOP;
1509 pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_addr0 = 0x0;
1510 pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_addr1 = 0x0;
1511
1512 //cf_current_alu_clause_ptr->m_Word1.f.count = number_of_scalar_operations - 1;
1513 pAsm->cf_current_alu_clause_ptr->m_Word1.f.count = 0x0;
1514 pAsm->cf_current_alu_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_ALU;
1515
1516 pAsm->cf_current_alu_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
1517
1518 pAsm->cf_current_alu_clause_ptr->m_Word1.f.barrier = 0x1;
1519 }
1520 else
1521 {
1522 pAsm->cf_current_alu_clause_ptr->m_Word1.f.count++;
1523 }
1524
1525 // If this clause constains any instruction that is forward dependent on a TEX instruction,
1526 // set the whole_quad_mode for this clause
1527 if ( pAsm->pInstDeps[pAsm->uiCurInst].nDstDep > (-1) )
1528 {
1529 pAsm->cf_current_alu_clause_ptr->m_Word1.f.whole_quad_mode = 0x1;
1530 }
1531
1532 if (pAsm->cf_current_alu_clause_ptr->m_Word1.f.count >= (GetCFMaxInstructions(pAsm->cf_current_alu_clause_ptr->m_ShaderInstType)-1) )
1533 {
1534 alu_instruction_ptr->m_Word0.f.last = 1;
1535 }
1536
1537 if(NULL == pAsm->cf_current_alu_clause_ptr->m_pLinkedALUInstruction)
1538 {
1539 pAsm->cf_current_alu_clause_ptr->m_pLinkedALUInstruction = alu_instruction_ptr;
1540 alu_instruction_ptr->m_pLinkedALUClause = pAsm->cf_current_alu_clause_ptr;
1541 }
1542
1543 AddALUInstruction(pAsm->pR700Shader, alu_instruction_ptr);
1544
1545 return GL_TRUE;
1546 }
1547
1548 void get_src_properties(R700ALUInstruction* alu_instruction_ptr,
1549 int source_index,
1550 BITS* psrc_sel,
1551 BITS* psrc_rel,
1552 BITS* psrc_chan,
1553 BITS* psrc_neg)
1554 {
1555 switch (source_index)
1556 {
1557 case 0:
1558 *psrc_sel = alu_instruction_ptr->m_Word0.f.src0_sel ;
1559 *psrc_rel = alu_instruction_ptr->m_Word0.f.src0_rel ;
1560 *psrc_chan = alu_instruction_ptr->m_Word0.f.src0_chan;
1561 *psrc_neg = alu_instruction_ptr->m_Word0.f.src0_neg ;
1562 break;
1563
1564 case 1:
1565 *psrc_sel = alu_instruction_ptr->m_Word0.f.src1_sel ;
1566 *psrc_rel = alu_instruction_ptr->m_Word0.f.src1_rel ;
1567 *psrc_chan = alu_instruction_ptr->m_Word0.f.src1_chan;
1568 *psrc_neg = alu_instruction_ptr->m_Word0.f.src1_neg ;
1569 break;
1570
1571 case 2:
1572 *psrc_sel = alu_instruction_ptr->m_Word1_OP3.f.src2_sel;
1573 *psrc_rel = alu_instruction_ptr->m_Word1_OP3.f.src2_rel;
1574 *psrc_chan = alu_instruction_ptr->m_Word1_OP3.f.src2_chan;
1575 *psrc_neg = alu_instruction_ptr->m_Word1_OP3.f.src2_neg;
1576 break;
1577 }
1578 }
1579
1580 int is_cfile(BITS sel)
1581 {
1582 if (sel > 255 && sel < 512)
1583 {
1584 return 1;
1585 }
1586 return 0;
1587 }
1588
1589 int is_const(BITS sel)
1590 {
1591 if (is_cfile(sel))
1592 {
1593 return 1;
1594 }
1595 else if(sel >= SQ_ALU_SRC_0 && sel <= SQ_ALU_SRC_LITERAL)
1596 {
1597 return 1;
1598 }
1599 return 0;
1600 }
1601
1602 int is_gpr(BITS sel)
1603 {
1604 if (sel >= 0 && sel < 128)
1605 {
1606 return 1;
1607 }
1608 return 0;
1609 }
1610
1611 const GLuint BANK_SWIZZLE_VEC[8] = {SQ_ALU_VEC_210, //000
1612 SQ_ALU_VEC_120, //001
1613 SQ_ALU_VEC_102, //010
1614
1615 SQ_ALU_VEC_201, //011
1616 SQ_ALU_VEC_012, //100
1617 SQ_ALU_VEC_021, //101
1618
1619 SQ_ALU_VEC_012, //110
1620 SQ_ALU_VEC_012}; //111
1621
1622 const GLuint BANK_SWIZZLE_SCL[8] = {SQ_ALU_SCL_210, //000
1623 SQ_ALU_SCL_122, //001
1624 SQ_ALU_SCL_122, //010
1625
1626 SQ_ALU_SCL_221, //011
1627 SQ_ALU_SCL_212, //100
1628 SQ_ALU_SCL_122, //101
1629
1630 SQ_ALU_SCL_122, //110
1631 SQ_ALU_SCL_122}; //111
1632
1633 GLboolean reserve_cfile(r700_AssemblerBase* pAsm,
1634 GLuint sel,
1635 GLuint chan)
1636 {
1637 int res_match = (-1);
1638 int res_empty = (-1);
1639
1640 GLint res;
1641
1642 for (res=3; res>=0; res--)
1643 {
1644 if(pAsm->hw_cfile_addr[ res] < 0)
1645 {
1646 res_empty = res;
1647 }
1648 else if( (pAsm->hw_cfile_addr[res] == (int)sel)
1649 &&
1650 (pAsm->hw_cfile_chan[ res ] == (int) chan) )
1651 {
1652 res_match = res;
1653 }
1654 }
1655
1656 if(res_match >= 0)
1657 {
1658 // Read for this scalar component already reserved, nothing to do here.
1659 ;
1660 }
1661 else if(res_empty >= 0)
1662 {
1663 pAsm->hw_cfile_addr[ res_empty ] = sel;
1664 pAsm->hw_cfile_chan[ res_empty ] = chan;
1665 }
1666 else
1667 {
1668 radeon_error("All cfile read ports are used, cannot reference C$sel, channel $chan.\n");
1669 return GL_FALSE;
1670 }
1671 return GL_TRUE;
1672 }
1673
1674 GLboolean reserve_gpr(r700_AssemblerBase* pAsm, GLuint sel, GLuint chan, GLuint cycle)
1675 {
1676 if(pAsm->hw_gpr[cycle][chan] < 0)
1677 {
1678 pAsm->hw_gpr[cycle][chan] = sel;
1679 }
1680 else if(pAsm->hw_gpr[cycle][chan] != (int)sel)
1681 {
1682 radeon_error("Another scalar operation has already used GPR read port for given channel\n");
1683 return GL_FALSE;
1684 }
1685
1686 return GL_TRUE;
1687 }
1688
1689 GLboolean cycle_for_scalar_bank_swizzle(const int swiz, const int sel, GLuint* pCycle)
1690 {
1691 switch (swiz)
1692 {
1693 case SQ_ALU_SCL_210:
1694 {
1695 int table[3] = {2, 1, 0};
1696 *pCycle = table[sel];
1697 return GL_TRUE;
1698 }
1699 break;
1700 case SQ_ALU_SCL_122:
1701 {
1702 int table[3] = {1, 2, 2};
1703 *pCycle = table[sel];
1704 return GL_TRUE;
1705 }
1706 break;
1707 case SQ_ALU_SCL_212:
1708 {
1709 int table[3] = {2, 1, 2};
1710 *pCycle = table[sel];
1711 return GL_TRUE;
1712 }
1713 break;
1714 case SQ_ALU_SCL_221:
1715 {
1716 int table[3] = {2, 2, 1};
1717 *pCycle = table[sel];
1718 return GL_TRUE;
1719 }
1720 break;
1721 default:
1722 radeon_error("Bad Scalar bank swizzle value\n");
1723 break;
1724 }
1725
1726 return GL_FALSE;
1727 }
1728
1729 GLboolean cycle_for_vector_bank_swizzle(const int swiz, const int sel, GLuint* pCycle)
1730 {
1731 switch (swiz)
1732 {
1733 case SQ_ALU_VEC_012:
1734 {
1735 int table[3] = {0, 1, 2};
1736 *pCycle = table[sel];
1737 }
1738 break;
1739 case SQ_ALU_VEC_021:
1740 {
1741 int table[3] = {0, 2, 1};
1742 *pCycle = table[sel];
1743 }
1744 break;
1745 case SQ_ALU_VEC_120:
1746 {
1747 int table[3] = {1, 2, 0};
1748 *pCycle = table[sel];
1749 }
1750 break;
1751 case SQ_ALU_VEC_102:
1752 {
1753 int table[3] = {1, 0, 2};
1754 *pCycle = table[sel];
1755 }
1756 break;
1757 case SQ_ALU_VEC_201:
1758 {
1759 int table[3] = {2, 0, 1};
1760 *pCycle = table[sel];
1761 }
1762 break;
1763 case SQ_ALU_VEC_210:
1764 {
1765 int table[3] = {2, 1, 0};
1766 *pCycle = table[sel];
1767 }
1768 break;
1769 default:
1770 radeon_error("Bad Vec bank swizzle value\n");
1771 return GL_FALSE;
1772 break;
1773 }
1774
1775 return GL_TRUE;
1776 }
1777
1778 GLboolean check_scalar(r700_AssemblerBase* pAsm,
1779 R700ALUInstruction* alu_instruction_ptr)
1780 {
1781 GLuint cycle;
1782 GLuint bank_swizzle;
1783 GLuint const_count = 0;
1784
1785 BITS sel;
1786 BITS chan;
1787 BITS rel;
1788 BITS neg;
1789
1790 GLuint src;
1791
1792 BITS src_sel [3] = {0,0,0};
1793 BITS src_chan[3] = {0,0,0};
1794 BITS src_rel [3] = {0,0,0};
1795 BITS src_neg [3] = {0,0,0};
1796
1797 GLuint swizzle_key;
1798
1799 GLuint number_of_operands = r700GetNumOperands(pAsm);
1800
1801 for (src=0; src<number_of_operands; src++)
1802 {
1803 get_src_properties(alu_instruction_ptr,
1804 src,
1805 &(src_sel[src]),
1806 &(src_rel[src]),
1807 &(src_chan[src]),
1808 &(src_neg[src]) );
1809 }
1810
1811
1812 swizzle_key = ( (is_const( src_sel[0] ) ? 4 : 0) +
1813 (is_const( src_sel[1] ) ? 2 : 0) +
1814 (is_const( src_sel[2] ) ? 1 : 0) );
1815
1816 alu_instruction_ptr->m_Word1.f.bank_swizzle = BANK_SWIZZLE_SCL[ swizzle_key ];
1817
1818 for (src=0; src<number_of_operands; src++)
1819 {
1820 sel = src_sel [src];
1821 chan = src_chan[src];
1822 rel = src_rel [src];
1823 neg = src_neg [src];
1824
1825 if (is_const( sel ))
1826 {
1827 // Any constant, including literal and inline constants
1828 const_count++;
1829
1830 if (is_cfile( sel ))
1831 {
1832 reserve_cfile(pAsm, sel, chan);
1833 }
1834
1835 }
1836 }
1837
1838 for (src=0; src<number_of_operands; src++)
1839 {
1840 sel = src_sel [src];
1841 chan = src_chan[src];
1842 rel = src_rel [src];
1843 neg = src_neg [src];
1844
1845 if( is_gpr(sel) )
1846 {
1847 bank_swizzle = alu_instruction_ptr->m_Word1.f.bank_swizzle;
1848
1849 if( GL_FALSE == cycle_for_scalar_bank_swizzle(bank_swizzle, src, &cycle) )
1850 {
1851 return GL_FALSE;
1852 }
1853
1854 if(cycle < const_count)
1855 {
1856 if( GL_FALSE == reserve_gpr(pAsm, sel, chan, cycle) )
1857 {
1858 return GL_FALSE;
1859 }
1860 }
1861 }
1862 }
1863
1864 return GL_TRUE;
1865 }
1866
1867 GLboolean check_vector(r700_AssemblerBase* pAsm,
1868 R700ALUInstruction* alu_instruction_ptr)
1869 {
1870 GLuint cycle;
1871 GLuint bank_swizzle;
1872 GLuint const_count = 0;
1873
1874 GLuint src;
1875
1876 BITS sel;
1877 BITS chan;
1878 BITS rel;
1879 BITS neg;
1880
1881 BITS src_sel [3] = {0,0,0};
1882 BITS src_chan[3] = {0,0,0};
1883 BITS src_rel [3] = {0,0,0};
1884 BITS src_neg [3] = {0,0,0};
1885
1886 GLuint swizzle_key;
1887
1888 GLuint number_of_operands = r700GetNumOperands(pAsm);
1889
1890 for (src=0; src<number_of_operands; src++)
1891 {
1892 get_src_properties(alu_instruction_ptr,
1893 src,
1894 &(src_sel[src]),
1895 &(src_rel[src]),
1896 &(src_chan[src]),
1897 &(src_neg[src]) );
1898 }
1899
1900
1901 swizzle_key = ( (is_const( src_sel[0] ) ? 4 : 0) +
1902 (is_const( src_sel[1] ) ? 2 : 0) +
1903 (is_const( src_sel[2] ) ? 1 : 0)
1904 );
1905
1906 alu_instruction_ptr->m_Word1.f.bank_swizzle = BANK_SWIZZLE_VEC[swizzle_key];
1907
1908 for (src=0; src<number_of_operands; src++)
1909 {
1910 sel = src_sel [src];
1911 chan = src_chan[src];
1912 rel = src_rel [src];
1913 neg = src_neg [src];
1914
1915
1916 bank_swizzle = alu_instruction_ptr->m_Word1.f.bank_swizzle;
1917
1918 if( is_gpr(sel) )
1919 {
1920 if( GL_FALSE == cycle_for_vector_bank_swizzle(bank_swizzle, src, &cycle) )
1921 {
1922 return GL_FALSE;
1923 }
1924
1925 if ( (src == 1) &&
1926 (sel == src_sel[0]) &&
1927 (chan == src_chan[0]) )
1928 {
1929 }
1930 else
1931 {
1932 if( GL_FALSE == reserve_gpr(pAsm, sel, chan, cycle) )
1933 {
1934 return GL_FALSE;
1935 }
1936 }
1937 }
1938 else if( is_const(sel) )
1939 {
1940 const_count++;
1941
1942 if( is_cfile(sel) )
1943 {
1944 if( GL_FALSE == reserve_cfile(pAsm, sel, chan) )
1945 {
1946 return GL_FALSE;
1947 }
1948 }
1949 }
1950 }
1951
1952 return GL_TRUE;
1953 }
1954
1955 GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm)
1956 {
1957 GLuint number_of_scalar_operations;
1958 GLboolean is_single_scalar_operation;
1959 GLuint scalar_channel_index;
1960
1961 PVSSRC * pcurrent_source;
1962 int current_source_index;
1963 GLuint contiguous_slots_needed;
1964
1965 GLuint uNumSrc = r700GetNumOperands(pAsm);
1966 GLuint channel_swizzle, j;
1967 GLuint chan_counter[4] = {0, 0, 0, 0};
1968 PVSSRC * pSource[3];
1969 GLboolean bSplitInst = GL_FALSE;
1970
1971 if (1 == pAsm->D.dst.math)
1972 {
1973 is_single_scalar_operation = GL_TRUE;
1974 number_of_scalar_operations = 1;
1975 }
1976 else
1977 {
1978 is_single_scalar_operation = GL_FALSE;
1979 number_of_scalar_operations = 4;
1980
1981 /* current assembler doesn't do more than 1 register per source */
1982 #if 0
1983 /* check read port, only very preliminary algorithm, not count in
1984 src0/1 same comp case and prev slot repeat case; also not count relative
1985 addressing. TODO: improve performance. */
1986 for(j=0; j<uNumSrc; j++)
1987 {
1988 pSource[j] = &(pAsm->S[j].src);
1989 }
1990 for(scalar_channel_index=0; scalar_channel_index<4; scalar_channel_index++)
1991 {
1992 for(j=0; j<uNumSrc; j++)
1993 {
1994 switch (scalar_channel_index)
1995 {
1996 case 0: channel_swizzle = pSource[j]->swizzlex; break;
1997 case 1: channel_swizzle = pSource[j]->swizzley; break;
1998 case 2: channel_swizzle = pSource[j]->swizzlez; break;
1999 case 3: channel_swizzle = pSource[j]->swizzlew; break;
2000 default: channel_swizzle = SQ_SEL_MASK; break;
2001 }
2002 if ( ((pSource[j]->rtype == SRC_REG_TEMPORARY) ||
2003 (pSource[j]->rtype == SRC_REG_INPUT))
2004 && (channel_swizzle <= SQ_SEL_W) )
2005 {
2006 chan_counter[channel_swizzle]++;
2007 }
2008 }
2009 }
2010 if( (chan_counter[SQ_SEL_X] > 3)
2011 || (chan_counter[SQ_SEL_Y] > 3)
2012 || (chan_counter[SQ_SEL_Z] > 3)
2013 || (chan_counter[SQ_SEL_W] > 3) ) /* each chan bank has only 3 ports. */
2014 {
2015 bSplitInst = GL_TRUE;
2016 }
2017 #endif
2018 }
2019
2020 contiguous_slots_needed = 0;
2021
2022 if(GL_TRUE == is_reduction_opcode(&(pAsm->D)) )
2023 {
2024 contiguous_slots_needed = 4;
2025 }
2026
2027 initialize(pAsm);
2028
2029 for (scalar_channel_index=0;
2030 scalar_channel_index < number_of_scalar_operations;
2031 scalar_channel_index++)
2032 {
2033 R700ALUInstruction* alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction);
2034 if (alu_instruction_ptr == NULL)
2035 {
2036 return GL_FALSE;
2037 }
2038 Init_R700ALUInstruction(alu_instruction_ptr);
2039
2040 //src 0
2041 current_source_index = 0;
2042 pcurrent_source = &(pAsm->S[0].src);
2043
2044 if (GL_FALSE == assemble_alu_src(alu_instruction_ptr,
2045 current_source_index,
2046 pcurrent_source,
2047 scalar_channel_index) )
2048 {
2049 return GL_FALSE;
2050 }
2051
2052 if (uNumSrc > 1)
2053 {
2054 // Process source 1
2055 current_source_index = 1;
2056 pcurrent_source = &(pAsm->S[current_source_index].src);
2057
2058 if (GL_FALSE == assemble_alu_src(alu_instruction_ptr,
2059 current_source_index,
2060 pcurrent_source,
2061 scalar_channel_index) )
2062 {
2063 return GL_FALSE;
2064 }
2065 }
2066
2067 //other bits
2068 alu_instruction_ptr->m_Word0.f.index_mode = SQ_INDEX_AR_X;
2069
2070 if( (is_single_scalar_operation == GL_TRUE)
2071 || (GL_TRUE == bSplitInst) )
2072 {
2073 alu_instruction_ptr->m_Word0.f.last = 1;
2074 }
2075 else
2076 {
2077 alu_instruction_ptr->m_Word0.f.last = (scalar_channel_index == 3) ? 1 : 0;
2078 }
2079
2080 alu_instruction_ptr->m_Word0.f.pred_sel = 0x0;
2081 alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0;
2082 alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0;
2083
2084 // dst
2085 if( (pAsm->D.dst.rtype == DST_REG_TEMPORARY) ||
2086 (pAsm->D.dst.rtype == DST_REG_OUT) )
2087 {
2088 alu_instruction_ptr->m_Word1.f.dst_gpr = pAsm->D.dst.reg;
2089 }
2090 else
2091 {
2092 radeon_error("Only temp destination registers supported for ALU dest regs.\n");
2093 return GL_FALSE;
2094 }
2095
2096 alu_instruction_ptr->m_Word1.f.dst_rel = SQ_ABSOLUTE; //D.rtype
2097
2098 if ( is_single_scalar_operation == GL_TRUE )
2099 {
2100 // Override scalar_channel_index since only one scalar value will be written
2101 if(pAsm->D.dst.writex)
2102 {
2103 scalar_channel_index = 0;
2104 }
2105 else if(pAsm->D.dst.writey)
2106 {
2107 scalar_channel_index = 1;
2108 }
2109 else if(pAsm->D.dst.writez)
2110 {
2111 scalar_channel_index = 2;
2112 }
2113 else if(pAsm->D.dst.writew)
2114 {
2115 scalar_channel_index = 3;
2116 }
2117 }
2118
2119 alu_instruction_ptr->m_Word1.f.dst_chan = scalar_channel_index;
2120
2121 alu_instruction_ptr->m_Word1.f.clamp = pAsm->pILInst[pAsm->uiCurInst].SaturateMode;
2122
2123 if (pAsm->D.dst.op3)
2124 {
2125 //op3
2126
2127 alu_instruction_ptr->m_Word1_OP3.f.alu_inst = pAsm->D.dst.opcode;
2128
2129 //There's 3rd src for op3
2130 current_source_index = 2;
2131 pcurrent_source = &(pAsm->S[current_source_index].src);
2132
2133 if ( GL_FALSE == assemble_alu_src(alu_instruction_ptr,
2134 current_source_index,
2135 pcurrent_source,
2136 scalar_channel_index) )
2137 {
2138 return GL_FALSE;
2139 }
2140 }
2141 else
2142 {
2143 //op2
2144 if (pAsm->bR6xx)
2145 {
2146 alu_instruction_ptr->m_Word1_OP2.f6.alu_inst = pAsm->D.dst.opcode;
2147
2148 alu_instruction_ptr->m_Word1_OP2.f6.src0_abs = 0x0;
2149 alu_instruction_ptr->m_Word1_OP2.f6.src1_abs = 0x0;
2150
2151 //alu_instruction_ptr->m_Word1_OP2.f6.update_execute_mask = 0x0;
2152 //alu_instruction_ptr->m_Word1_OP2.f6.update_pred = 0x0;
2153 switch (scalar_channel_index)
2154 {
2155 case 0:
2156 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writex;
2157 break;
2158 case 1:
2159 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writey;
2160 break;
2161 case 2:
2162 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writez;
2163 break;
2164 case 3:
2165 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writew;
2166 break;
2167 default:
2168 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = 1; //SQ_SEL_MASK;
2169 break;
2170 }
2171 alu_instruction_ptr->m_Word1_OP2.f6.omod = SQ_ALU_OMOD_OFF;
2172 }
2173 else
2174 {
2175 alu_instruction_ptr->m_Word1_OP2.f.alu_inst = pAsm->D.dst.opcode;
2176
2177 alu_instruction_ptr->m_Word1_OP2.f.src0_abs = 0x0;
2178 alu_instruction_ptr->m_Word1_OP2.f.src1_abs = 0x0;
2179
2180 //alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0;
2181 //alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0;
2182 switch (scalar_channel_index)
2183 {
2184 case 0:
2185 alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writex;
2186 break;
2187 case 1:
2188 alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writey;
2189 break;
2190 case 2:
2191 alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writez;
2192 break;
2193 case 3:
2194 alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writew;
2195 break;
2196 default:
2197 alu_instruction_ptr->m_Word1_OP2.f.write_mask = 1; //SQ_SEL_MASK;
2198 break;
2199 }
2200 alu_instruction_ptr->m_Word1_OP2.f.omod = SQ_ALU_OMOD_OFF;
2201 }
2202 }
2203
2204 if(GL_FALSE == add_alu_instruction(pAsm, alu_instruction_ptr, contiguous_slots_needed) )
2205 {
2206 return GL_FALSE;
2207 }
2208
2209 /*
2210 * Judge the type of current instruction, is it vector or scalar
2211 * instruction.
2212 */
2213 if (is_single_scalar_operation)
2214 {
2215 if(GL_FALSE == check_scalar(pAsm, alu_instruction_ptr) )
2216 {
2217 return GL_FALSE;
2218 }
2219 }
2220 else
2221 {
2222 if(GL_FALSE == check_vector(pAsm, alu_instruction_ptr) )
2223 {
2224 return 1;
2225 }
2226 }
2227
2228 contiguous_slots_needed = 0;
2229 }
2230
2231 return GL_TRUE;
2232 }
2233
2234 GLboolean next_ins(r700_AssemblerBase *pAsm)
2235 {
2236 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
2237
2238 if( GL_TRUE == pAsm->is_tex )
2239 {
2240 if (pILInst->TexSrcTarget == TEXTURE_RECT_INDEX) {
2241 if( GL_FALSE == assemble_tex_instruction(pAsm, GL_FALSE) )
2242 {
2243 radeon_error("Error assembling TEX instruction\n");
2244 return GL_FALSE;
2245 }
2246 } else {
2247 if( GL_FALSE == assemble_tex_instruction(pAsm, GL_TRUE) )
2248 {
2249 radeon_error("Error assembling TEX instruction\n");
2250 return GL_FALSE;
2251 }
2252 }
2253 }
2254 else
2255 { //ALU
2256 if( GL_FALSE == assemble_alu_instruction(pAsm) )
2257 {
2258 radeon_error("Error assembling ALU instruction\n");
2259 return GL_FALSE;
2260 }
2261 }
2262
2263 if(pAsm->D.dst.rtype == DST_REG_OUT)
2264 {
2265 if(pAsm->D.dst.op3)
2266 {
2267 // There is no mask for OP3 instructions, so all channels are written
2268 pAsm->pucOutMask[pAsm->D.dst.reg - pAsm->starting_export_register_number] = 0xF;
2269 }
2270 else
2271 {
2272 pAsm->pucOutMask[pAsm->D.dst.reg - pAsm->starting_export_register_number]
2273 |= (unsigned char)pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask;
2274 }
2275 }
2276
2277 //reset for next inst.
2278 pAsm->D.bits = 0;
2279 pAsm->S[0].bits = 0;
2280 pAsm->S[1].bits = 0;
2281 pAsm->S[2].bits = 0;
2282 pAsm->is_tex = GL_FALSE;
2283 pAsm->need_tex_barrier = GL_FALSE;
2284 return GL_TRUE;
2285 }
2286
2287 GLboolean assemble_math_function(r700_AssemblerBase* pAsm, BITS opcode)
2288 {
2289 BITS tmp;
2290
2291 checkop1(pAsm);
2292
2293 tmp = gethelpr(pAsm);
2294
2295 // opcode tmp.x, a.x
2296 // MOV dst, tmp.x
2297
2298 pAsm->D.dst.opcode = opcode;
2299 pAsm->D.dst.math = 1;
2300
2301 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
2302 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
2303 pAsm->D.dst.reg = tmp;
2304 pAsm->D.dst.writex = 1;
2305
2306 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
2307 {
2308 return GL_FALSE;
2309 }
2310
2311 if ( GL_FALSE == next_ins(pAsm) )
2312 {
2313 return GL_FALSE;
2314 }
2315
2316 // Now replicate result to all necessary channels in destination
2317 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
2318
2319 if( GL_FALSE == assemble_dst(pAsm) )
2320 {
2321 return GL_FALSE;
2322 }
2323
2324 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
2325 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
2326 pAsm->S[0].src.reg = tmp;
2327
2328 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
2329 noneg_PVSSRC(&(pAsm->S[0].src));
2330
2331 if( GL_FALSE == next_ins(pAsm) )
2332 {
2333 return GL_FALSE;
2334 }
2335
2336 return GL_TRUE;
2337 }
2338
2339 GLboolean assemble_ABS(r700_AssemblerBase *pAsm)
2340 {
2341 checkop1(pAsm);
2342
2343 pAsm->D.dst.opcode = SQ_OP2_INST_MAX;
2344
2345 if( GL_FALSE == assemble_dst(pAsm) )
2346 {
2347 return GL_FALSE;
2348 }
2349 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
2350 {
2351 return GL_FALSE;
2352 }
2353
2354 pAsm->S[1].bits = pAsm->S[0].bits;
2355 flipneg_PVSSRC(&(pAsm->S[1].src));
2356
2357 if ( GL_FALSE == next_ins(pAsm) )
2358 {
2359 return GL_FALSE;
2360 }
2361
2362 return GL_TRUE;
2363 }
2364
2365 GLboolean assemble_ADD(r700_AssemblerBase *pAsm)
2366 {
2367 if( GL_FALSE == checkop2(pAsm) )
2368 {
2369 return GL_FALSE;
2370 }
2371
2372 pAsm->D.dst.opcode = SQ_OP2_INST_ADD;
2373
2374 if( GL_FALSE == assemble_dst(pAsm) )
2375 {
2376 return GL_FALSE;
2377 }
2378
2379 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
2380 {
2381 return GL_FALSE;
2382 }
2383
2384 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
2385 {
2386 return GL_FALSE;
2387 }
2388
2389 if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_SUB)
2390 {
2391 flipneg_PVSSRC(&(pAsm->S[1].src));
2392 }
2393
2394 if( GL_FALSE == next_ins(pAsm) )
2395 {
2396 return GL_FALSE;
2397 }
2398
2399 return GL_TRUE;
2400 }
2401
2402 GLboolean assemble_ARL(r700_AssemblerBase *pAsm)
2403 { /* TODO: ar values dont' persist between clauses */
2404 if( GL_FALSE == checkop1(pAsm) )
2405 {
2406 return GL_FALSE;
2407 }
2408
2409 pAsm->D.dst.opcode = SQ_OP2_INST_MOVA_FLOOR;
2410 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
2411 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
2412 pAsm->D.dst.reg = 0;
2413 pAsm->D.dst.writex = 0;
2414 pAsm->D.dst.writey = 0;
2415 pAsm->D.dst.writez = 0;
2416 pAsm->D.dst.writew = 0;
2417
2418 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
2419 {
2420 return GL_FALSE;
2421 }
2422
2423 if( GL_FALSE == next_ins(pAsm) )
2424 {
2425 return GL_FALSE;
2426 }
2427
2428 return GL_TRUE;
2429 }
2430
2431 GLboolean assemble_BAD(char *opcode_str)
2432 {
2433 radeon_error("Not yet implemented instruction (%s)\n", opcode_str);
2434 return GL_FALSE;
2435 }
2436
2437 GLboolean assemble_CMP(r700_AssemblerBase *pAsm)
2438 {
2439 int tmp;
2440
2441 if( GL_FALSE == checkop3(pAsm) )
2442 {
2443 return GL_FALSE;
2444 }
2445
2446 pAsm->D.dst.opcode = SQ_OP3_INST_CNDGE;
2447 pAsm->D.dst.op3 = 1;
2448
2449 tmp = (-1);
2450
2451 if(0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask)
2452 {
2453 //OP3 has no support for write mask
2454 tmp = gethelpr(pAsm);
2455
2456 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
2457 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
2458 pAsm->D.dst.reg = tmp;
2459
2460 nomask_PVSDST(&(pAsm->D.dst));
2461 }
2462 else
2463 {
2464 if( GL_FALSE == assemble_dst(pAsm) )
2465 {
2466 return GL_FALSE;
2467 }
2468 }
2469
2470 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
2471 {
2472 return GL_FALSE;
2473 }
2474
2475 if( GL_FALSE == assemble_src(pAsm, 2, 1) )
2476 {
2477 return GL_FALSE;
2478 }
2479
2480 if( GL_FALSE == assemble_src(pAsm, 1, 2) )
2481 {
2482 return GL_FALSE;
2483 }
2484
2485 if ( GL_FALSE == next_ins(pAsm) )
2486 {
2487 return GL_FALSE;
2488 }
2489
2490 if (0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask)
2491 {
2492 if( GL_FALSE == assemble_dst(pAsm) )
2493 {
2494 return GL_FALSE;
2495 }
2496
2497 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
2498
2499 //tmp for source
2500 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
2501 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
2502 pAsm->S[0].src.reg = tmp;
2503
2504 noneg_PVSSRC(&(pAsm->S[0].src));
2505 noswizzle_PVSSRC(&(pAsm->S[0].src));
2506
2507 if( GL_FALSE == next_ins(pAsm) )
2508 {
2509 return GL_FALSE;
2510 }
2511 }
2512
2513 return GL_TRUE;
2514 }
2515
2516 GLboolean assemble_COS(r700_AssemblerBase *pAsm)
2517 {
2518 return assemble_math_function(pAsm, SQ_OP2_INST_COS);
2519 }
2520
2521 GLboolean assemble_DOT(r700_AssemblerBase *pAsm)
2522 {
2523 if( GL_FALSE == checkop2(pAsm) )
2524 {
2525 return GL_FALSE;
2526 }
2527
2528 pAsm->D.dst.opcode = SQ_OP2_INST_DOT4;
2529
2530 if( GL_FALSE == assemble_dst(pAsm) )
2531 {
2532 return GL_FALSE;
2533 }
2534
2535 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
2536 {
2537 return GL_FALSE;
2538 }
2539
2540 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
2541 {
2542 return GL_FALSE;
2543 }
2544
2545 if(OPCODE_DP3 == pAsm->pILInst[pAsm->uiCurInst].Opcode)
2546 {
2547 zerocomp_PVSSRC(&(pAsm->S[0].src), 3);
2548 zerocomp_PVSSRC(&(pAsm->S[1].src), 3);
2549 }
2550 else if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_DPH)
2551 {
2552 onecomp_PVSSRC(&(pAsm->S[0].src), 3);
2553 }
2554
2555 if ( GL_FALSE == next_ins(pAsm) )
2556 {
2557 return GL_FALSE;
2558 }
2559
2560 return GL_TRUE;
2561 }
2562
2563 GLboolean assemble_DST(r700_AssemblerBase *pAsm)
2564 {
2565 if( GL_FALSE == checkop2(pAsm) )
2566 {
2567 return GL_FALSE;
2568 }
2569
2570 pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
2571
2572 if( GL_FALSE == assemble_dst(pAsm) )
2573 {
2574 return GL_FALSE;
2575 }
2576
2577 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
2578 {
2579 return GL_FALSE;
2580 }
2581
2582 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
2583 {
2584 return GL_FALSE;
2585 }
2586
2587 onecomp_PVSSRC(&(pAsm->S[0].src), 0);
2588 onecomp_PVSSRC(&(pAsm->S[0].src), 3);
2589
2590 onecomp_PVSSRC(&(pAsm->S[1].src), 0);
2591 onecomp_PVSSRC(&(pAsm->S[1].src), 2);
2592
2593 if ( GL_FALSE == next_ins(pAsm) )
2594 {
2595 return GL_FALSE;
2596 }
2597
2598 return GL_TRUE;
2599 }
2600
2601 GLboolean assemble_EX2(r700_AssemblerBase *pAsm)
2602 {
2603 return assemble_math_function(pAsm, SQ_OP2_INST_EXP_IEEE);
2604 }
2605
2606 GLboolean assemble_FLR(r700_AssemblerBase *pAsm)
2607 {
2608 checkop1(pAsm);
2609
2610 pAsm->D.dst.opcode = SQ_OP2_INST_FLOOR;
2611
2612 if ( GL_FALSE == assemble_dst(pAsm) )
2613 {
2614 return GL_FALSE;
2615 }
2616
2617 if ( GL_FALSE == assemble_src(pAsm, 0, -1) )
2618 {
2619 return GL_FALSE;
2620 }
2621
2622 if ( GL_FALSE == next_ins(pAsm) )
2623 {
2624 return GL_FALSE;
2625 }
2626
2627 return GL_TRUE;
2628 }
2629
2630 GLboolean assemble_FLR_INT(r700_AssemblerBase *pAsm)
2631 {
2632 return assemble_math_function(pAsm, SQ_OP2_INST_FLT_TO_INT);
2633 }
2634
2635 GLboolean assemble_FRC(r700_AssemblerBase *pAsm)
2636 {
2637 checkop1(pAsm);
2638
2639 pAsm->D.dst.opcode = SQ_OP2_INST_FRACT;
2640
2641 if ( GL_FALSE == assemble_dst(pAsm) )
2642 {
2643 return GL_FALSE;
2644 }
2645
2646 if ( GL_FALSE == assemble_src(pAsm, 0, -1) )
2647 {
2648 return GL_FALSE;
2649 }
2650
2651 if ( GL_FALSE == next_ins(pAsm) )
2652 {
2653 return GL_FALSE;
2654 }
2655
2656 return GL_TRUE;
2657 }
2658
2659 GLboolean assemble_KIL(r700_AssemblerBase *pAsm)
2660 {
2661 /* TODO: doc says KILL has to be last(end) ALU clause */
2662
2663 checkop1(pAsm);
2664
2665 pAsm->D.dst.opcode = SQ_OP2_INST_KILLGT;
2666
2667 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
2668 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
2669 pAsm->D.dst.reg = 0;
2670 pAsm->D.dst.writex = 0;
2671 pAsm->D.dst.writey = 0;
2672 pAsm->D.dst.writez = 0;
2673 pAsm->D.dst.writew = 0;
2674
2675 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
2676 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
2677 pAsm->S[0].src.reg = 0;
2678
2679 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_0);
2680 noneg_PVSSRC(&(pAsm->S[0].src));
2681
2682 if ( GL_FALSE == assemble_src(pAsm, 0, 1) )
2683 {
2684 return GL_FALSE;
2685 }
2686
2687 if ( GL_FALSE == next_ins(pAsm) )
2688 {
2689 return GL_FALSE;
2690 }
2691
2692 pAsm->pR700Shader->killIsUsed = GL_TRUE;
2693
2694 return GL_TRUE;
2695 }
2696
2697 GLboolean assemble_LG2(r700_AssemblerBase *pAsm)
2698 {
2699 return assemble_math_function(pAsm, SQ_OP2_INST_LOG_IEEE);
2700 }
2701
2702 GLboolean assemble_LRP(r700_AssemblerBase *pAsm)
2703 {
2704 BITS tmp;
2705
2706 if( GL_FALSE == checkop3(pAsm) )
2707 {
2708 return GL_FALSE;
2709 }
2710
2711 tmp = gethelpr(pAsm);
2712
2713 pAsm->D.dst.opcode = SQ_OP2_INST_ADD;
2714
2715 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
2716 pAsm->D.dst.reg = tmp;
2717 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
2718 nomask_PVSDST(&(pAsm->D.dst));
2719
2720
2721 if( GL_FALSE == assemble_src(pAsm, 1, 0) )
2722 {
2723 return GL_FALSE;
2724 }
2725
2726 if ( GL_FALSE == assemble_src(pAsm, 2, 1) )
2727 {
2728 return GL_FALSE;
2729 }
2730
2731 neg_PVSSRC(&(pAsm->S[1].src));
2732
2733 if( GL_FALSE == next_ins(pAsm) )
2734 {
2735 return GL_FALSE;
2736 }
2737
2738 pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
2739 pAsm->D.dst.op3 = 1;
2740
2741 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
2742 pAsm->D.dst.reg = tmp;
2743 nomask_PVSDST(&(pAsm->D.dst));
2744 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
2745
2746 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
2747 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
2748 pAsm->S[0].src.reg = tmp;
2749 noswizzle_PVSSRC(&(pAsm->S[0].src));
2750
2751
2752 if( GL_FALSE == assemble_src(pAsm, 0, 1) )
2753 {
2754 return GL_FALSE;
2755 }
2756 if( GL_FALSE == assemble_src(pAsm, 2, -1) )
2757 {
2758 return GL_FALSE;
2759 }
2760
2761 if( GL_FALSE == next_ins(pAsm) )
2762 {
2763 return GL_FALSE;
2764 }
2765
2766 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
2767
2768 if( GL_FALSE == assemble_dst(pAsm) )
2769 {
2770 return GL_FALSE;
2771 }
2772
2773 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
2774 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
2775 pAsm->S[0].src.reg = tmp;
2776 noswizzle_PVSSRC(&(pAsm->S[0].src));
2777
2778 if( GL_FALSE == next_ins(pAsm) )
2779 {
2780 return GL_FALSE;
2781 }
2782
2783 return GL_TRUE;
2784 }
2785
2786 GLboolean assemble_MAD(struct r700_AssemblerBase *pAsm)
2787 {
2788 int tmp, ii;
2789 GLboolean bReplaceDst = GL_FALSE;
2790 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
2791
2792 if( GL_FALSE == checkop3(pAsm) )
2793 {
2794 return GL_FALSE;
2795 }
2796
2797 pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
2798 pAsm->D.dst.op3 = 1;
2799
2800 tmp = (-1);
2801
2802 if(PROGRAM_TEMPORARY == pILInst->DstReg.File)
2803 { /* TODO : more investigation on MAD src and dst using same register */
2804 for(ii=0; ii<3; ii++)
2805 {
2806 if( (PROGRAM_TEMPORARY == pILInst->SrcReg[ii].File)
2807 && (pILInst->DstReg.Index == pILInst->SrcReg[ii].Index) )
2808 {
2809 bReplaceDst = GL_TRUE;
2810 break;
2811 }
2812 }
2813 }
2814 if(0xF != pILInst->DstReg.WriteMask)
2815 { /* OP3 has no support for write mask */
2816 bReplaceDst = GL_TRUE;
2817 }
2818
2819 if(GL_TRUE == bReplaceDst)
2820 {
2821 tmp = gethelpr(pAsm);
2822
2823 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
2824 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
2825 pAsm->D.dst.reg = tmp;
2826
2827 nomask_PVSDST(&(pAsm->D.dst));
2828 }
2829 else
2830 {
2831 if( GL_FALSE == assemble_dst(pAsm) )
2832 {
2833 return GL_FALSE;
2834 }
2835 }
2836
2837 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
2838 {
2839 return GL_FALSE;
2840 }
2841
2842 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
2843 {
2844 return GL_FALSE;
2845 }
2846
2847 if( GL_FALSE == assemble_src(pAsm, 2, -1) )
2848 {
2849 return GL_FALSE;
2850 }
2851
2852 if ( GL_FALSE == next_ins(pAsm) )
2853 {
2854 return GL_FALSE;
2855 }
2856
2857 if (GL_TRUE == bReplaceDst)
2858 {
2859 if( GL_FALSE == assemble_dst(pAsm) )
2860 {
2861 return GL_FALSE;
2862 }
2863
2864 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
2865
2866 //tmp for source
2867 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
2868 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
2869 pAsm->S[0].src.reg = tmp;
2870
2871 noneg_PVSSRC(&(pAsm->S[0].src));
2872 noswizzle_PVSSRC(&(pAsm->S[0].src));
2873
2874 if( GL_FALSE == next_ins(pAsm) )
2875 {
2876 return GL_FALSE;
2877 }
2878 }
2879
2880 return GL_TRUE;
2881 }
2882
2883 /* LIT dst, src */
2884 GLboolean assemble_LIT(r700_AssemblerBase *pAsm)
2885 {
2886 unsigned int dstReg;
2887 unsigned int dstType;
2888 unsigned int srcReg;
2889 unsigned int srcType;
2890 checkop1(pAsm);
2891 int tmp = gethelpr(pAsm);
2892
2893 if( GL_FALSE == assemble_dst(pAsm) )
2894 {
2895 return GL_FALSE;
2896 }
2897 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
2898 {
2899 return GL_FALSE;
2900 }
2901 dstReg = pAsm->D.dst.reg;
2902 dstType = pAsm->D.dst.rtype;
2903 srcReg = pAsm->S[0].src.reg;
2904 srcType = pAsm->S[0].src.rtype;
2905
2906 /* dst.xw, <- 1.0 */
2907 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
2908 pAsm->D.dst.rtype = dstType;
2909 pAsm->D.dst.reg = dstReg;
2910 pAsm->D.dst.writex = 1;
2911 pAsm->D.dst.writey = 0;
2912 pAsm->D.dst.writez = 0;
2913 pAsm->D.dst.writew = 1;
2914 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
2915 pAsm->S[0].src.reg = tmp;
2916 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
2917 noneg_PVSSRC(&(pAsm->S[0].src));
2918 pAsm->S[0].src.swizzlex = SQ_SEL_1;
2919 pAsm->S[0].src.swizzley = SQ_SEL_1;
2920 pAsm->S[0].src.swizzlez = SQ_SEL_1;
2921 pAsm->S[0].src.swizzlew = SQ_SEL_1;
2922 if( GL_FALSE == next_ins(pAsm) )
2923 {
2924 return GL_FALSE;
2925 }
2926
2927 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
2928 {
2929 return GL_FALSE;
2930 }
2931
2932 /* dst.y = max(src.x, 0.0) */
2933 pAsm->D.dst.opcode = SQ_OP2_INST_MAX;
2934 pAsm->D.dst.rtype = dstType;
2935 pAsm->D.dst.reg = dstReg;
2936 pAsm->D.dst.writex = 0;
2937 pAsm->D.dst.writey = 1;
2938 pAsm->D.dst.writez = 0;
2939 pAsm->D.dst.writew = 0;
2940 pAsm->S[0].src.rtype = srcType;
2941 pAsm->S[0].src.reg = srcReg;
2942 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
2943 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X, SQ_SEL_X, SQ_SEL_X, SQ_SEL_X);
2944 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
2945 pAsm->S[1].src.reg = tmp;
2946 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
2947 noneg_PVSSRC(&(pAsm->S[1].src));
2948 pAsm->S[1].src.swizzlex = SQ_SEL_0;
2949 pAsm->S[1].src.swizzley = SQ_SEL_0;
2950 pAsm->S[1].src.swizzlez = SQ_SEL_0;
2951 pAsm->S[1].src.swizzlew = SQ_SEL_0;
2952 if( GL_FALSE == next_ins(pAsm) )
2953 {
2954 return GL_FALSE;
2955 }
2956
2957 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
2958 {
2959 return GL_FALSE;
2960 }
2961
2962 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Y, SQ_SEL_Y, SQ_SEL_Y, SQ_SEL_Y);
2963
2964 /* dst.z = log(src.y) */
2965 pAsm->D.dst.opcode = SQ_OP2_INST_LOG_CLAMPED;
2966 pAsm->D.dst.math = 1;
2967 pAsm->D.dst.rtype = dstType;
2968 pAsm->D.dst.reg = dstReg;
2969 pAsm->D.dst.writex = 0;
2970 pAsm->D.dst.writey = 0;
2971 pAsm->D.dst.writez = 1;
2972 pAsm->D.dst.writew = 0;
2973 pAsm->S[0].src.rtype = srcType;
2974 pAsm->S[0].src.reg = srcReg;
2975 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
2976 if( GL_FALSE == next_ins(pAsm) )
2977 {
2978 return GL_FALSE;
2979 }
2980
2981 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
2982 {
2983 return GL_FALSE;
2984 }
2985
2986 if( GL_FALSE == assemble_src(pAsm, 0, 2) )
2987 {
2988 return GL_FALSE;
2989 }
2990
2991 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_W, SQ_SEL_W, SQ_SEL_W, SQ_SEL_W);
2992
2993 swizzleagain_PVSSRC(&(pAsm->S[2].src), SQ_SEL_X, SQ_SEL_X, SQ_SEL_X, SQ_SEL_X);
2994
2995 /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
2996 pAsm->D.dst.opcode = SQ_OP3_INST_MUL_LIT;
2997 pAsm->D.dst.math = 1;
2998 pAsm->D.dst.op3 = 1;
2999 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3000 pAsm->D.dst.reg = tmp;
3001 pAsm->D.dst.writex = 1;
3002 pAsm->D.dst.writey = 0;
3003 pAsm->D.dst.writez = 0;
3004 pAsm->D.dst.writew = 0;
3005
3006 pAsm->S[0].src.rtype = srcType;
3007 pAsm->S[0].src.reg = srcReg;
3008 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3009
3010 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
3011 pAsm->S[1].src.reg = dstReg;
3012 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
3013 noneg_PVSSRC(&(pAsm->S[1].src));
3014 pAsm->S[1].src.swizzlex = SQ_SEL_Z;
3015 pAsm->S[1].src.swizzley = SQ_SEL_Z;
3016 pAsm->S[1].src.swizzlez = SQ_SEL_Z;
3017 pAsm->S[1].src.swizzlew = SQ_SEL_Z;
3018
3019 pAsm->S[2].src.rtype = srcType;
3020 pAsm->S[2].src.reg = srcReg;
3021 setaddrmode_PVSSRC(&(pAsm->S[2].src), ADDR_ABSOLUTE);
3022
3023 if( GL_FALSE == next_ins(pAsm) )
3024 {
3025 return GL_FALSE;
3026 }
3027
3028 /* dst.z = exp(tmp.x) */
3029 pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
3030 pAsm->D.dst.math = 1;
3031 pAsm->D.dst.rtype = dstType;
3032 pAsm->D.dst.reg = dstReg;
3033 pAsm->D.dst.writex = 0;
3034 pAsm->D.dst.writey = 0;
3035 pAsm->D.dst.writez = 1;
3036 pAsm->D.dst.writew = 0;
3037
3038 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3039 pAsm->S[0].src.reg = tmp;
3040 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3041 noneg_PVSSRC(&(pAsm->S[0].src));
3042 pAsm->S[0].src.swizzlex = SQ_SEL_X;
3043 pAsm->S[0].src.swizzley = SQ_SEL_X;
3044 pAsm->S[0].src.swizzlez = SQ_SEL_X;
3045 pAsm->S[0].src.swizzlew = SQ_SEL_X;
3046
3047 if( GL_FALSE == next_ins(pAsm) )
3048 {
3049 return GL_FALSE;
3050 }
3051
3052 return GL_TRUE;
3053 }
3054
3055 GLboolean assemble_MAX(r700_AssemblerBase *pAsm)
3056 {
3057 if( GL_FALSE == checkop2(pAsm) )
3058 {
3059 return GL_FALSE;
3060 }
3061
3062 pAsm->D.dst.opcode = SQ_OP2_INST_MAX;
3063
3064 if( GL_FALSE == assemble_dst(pAsm) )
3065 {
3066 return GL_FALSE;
3067 }
3068
3069 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3070 {
3071 return GL_FALSE;
3072 }
3073
3074 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
3075 {
3076 return GL_FALSE;
3077 }
3078
3079 if( GL_FALSE == next_ins(pAsm) )
3080 {
3081 return GL_FALSE;
3082 }
3083
3084 return GL_TRUE;
3085 }
3086
3087 GLboolean assemble_MIN(r700_AssemblerBase *pAsm)
3088 {
3089 if( GL_FALSE == checkop2(pAsm) )
3090 {
3091 return GL_FALSE;
3092 }
3093
3094 pAsm->D.dst.opcode = SQ_OP2_INST_MIN;
3095
3096 if( GL_FALSE == assemble_dst(pAsm) )
3097 {
3098 return GL_FALSE;
3099 }
3100
3101 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3102 {
3103 return GL_FALSE;
3104 }
3105
3106 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
3107 {
3108 return GL_FALSE;
3109 }
3110
3111 if( GL_FALSE == next_ins(pAsm) )
3112 {
3113 return GL_FALSE;
3114 }
3115
3116 return GL_TRUE;
3117 }
3118
3119 GLboolean assemble_MOV(r700_AssemblerBase *pAsm)
3120 {
3121 checkop1(pAsm);
3122
3123 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3124
3125 if (GL_FALSE == assemble_dst(pAsm))
3126 {
3127 return GL_FALSE;
3128 }
3129
3130 if (GL_FALSE == assemble_src(pAsm, 0, -1))
3131 {
3132 return GL_FALSE;
3133 }
3134
3135 if ( GL_FALSE == next_ins(pAsm) )
3136 {
3137 return GL_FALSE;
3138 }
3139
3140 return GL_TRUE;
3141 }
3142
3143 GLboolean assemble_MUL(r700_AssemblerBase *pAsm)
3144 {
3145 if( GL_FALSE == checkop2(pAsm) )
3146 {
3147 return GL_FALSE;
3148 }
3149
3150 pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
3151
3152 if( GL_FALSE == assemble_dst(pAsm) )
3153 {
3154 return GL_FALSE;
3155 }
3156
3157 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3158 {
3159 return GL_FALSE;
3160 }
3161
3162 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
3163 {
3164 return GL_FALSE;
3165 }
3166
3167 if( GL_FALSE == next_ins(pAsm) )
3168 {
3169 return GL_FALSE;
3170 }
3171
3172 return GL_TRUE;
3173 }
3174
3175 GLboolean assemble_POW(r700_AssemblerBase *pAsm)
3176 {
3177 BITS tmp;
3178
3179 checkop1(pAsm);
3180
3181 tmp = gethelpr(pAsm);
3182
3183 // LG2 tmp.x, a.swizzle
3184 pAsm->D.dst.opcode = SQ_OP2_INST_LOG_IEEE;
3185 pAsm->D.dst.math = 1;
3186
3187 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3188 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3189 pAsm->D.dst.reg = tmp;
3190 nomask_PVSDST(&(pAsm->D.dst));
3191
3192 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3193 {
3194 return GL_FALSE;
3195 }
3196
3197 if( GL_FALSE == next_ins(pAsm) )
3198 {
3199 return GL_FALSE;
3200 }
3201
3202 // MUL tmp.x, tmp.x, b.swizzle
3203 pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
3204
3205 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3206 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3207 pAsm->D.dst.reg = tmp;
3208 nomask_PVSDST(&(pAsm->D.dst));
3209
3210 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3211 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3212 pAsm->S[0].src.reg = tmp;
3213 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3214 noneg_PVSSRC(&(pAsm->S[0].src));
3215
3216 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
3217 {
3218 return GL_FALSE;
3219 }
3220
3221 if( GL_FALSE == next_ins(pAsm) )
3222 {
3223 return GL_FALSE;
3224 }
3225
3226 // EX2 dst.mask, tmp.x
3227 // EX2 tmp.x, tmp.x
3228 pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
3229 pAsm->D.dst.math = 1;
3230
3231 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3232 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3233 pAsm->D.dst.reg = tmp;
3234 nomask_PVSDST(&(pAsm->D.dst));
3235
3236 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3237 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3238 pAsm->S[0].src.reg = tmp;
3239 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3240 noneg_PVSSRC(&(pAsm->S[0].src));
3241
3242 if( GL_FALSE == next_ins(pAsm) )
3243 {
3244 return GL_FALSE;
3245 }
3246
3247 // Now replicate result to all necessary channels in destination
3248 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3249
3250 if( GL_FALSE == assemble_dst(pAsm) )
3251 {
3252 return GL_FALSE;
3253 }
3254
3255 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3256 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
3257 pAsm->S[0].src.reg = tmp;
3258
3259 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3260 noneg_PVSSRC(&(pAsm->S[0].src));
3261
3262 if( GL_FALSE == next_ins(pAsm) )
3263 {
3264 return GL_FALSE;
3265 }
3266
3267 return GL_TRUE;
3268 }
3269
3270 GLboolean assemble_RCP(r700_AssemblerBase *pAsm)
3271 {
3272 return assemble_math_function(pAsm, SQ_OP2_INST_RECIP_IEEE);
3273 }
3274
3275 GLboolean assemble_RSQ(r700_AssemblerBase *pAsm)
3276 {
3277 return assemble_math_function(pAsm, SQ_OP2_INST_RECIPSQRT_IEEE);
3278 }
3279
3280 GLboolean assemble_SIN(r700_AssemblerBase *pAsm)
3281 {
3282 return assemble_math_function(pAsm, SQ_OP2_INST_SIN);
3283 }
3284
3285 GLboolean assemble_SCS(r700_AssemblerBase *pAsm)
3286 {
3287 BITS tmp;
3288
3289 checkop1(pAsm);
3290
3291 tmp = gethelpr(pAsm);
3292
3293 // COS tmp.x, a.x
3294 pAsm->D.dst.opcode = SQ_OP2_INST_COS;
3295 pAsm->D.dst.math = 1;
3296
3297 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3298 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3299 pAsm->D.dst.reg = tmp;
3300 pAsm->D.dst.writex = 1;
3301
3302 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3303 {
3304 return GL_FALSE;
3305 }
3306
3307 if ( GL_FALSE == next_ins(pAsm) )
3308 {
3309 return GL_FALSE;
3310 }
3311
3312 // SIN tmp.y, a.x
3313 pAsm->D.dst.opcode = SQ_OP2_INST_SIN;
3314 pAsm->D.dst.math = 1;
3315
3316 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3317 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3318 pAsm->D.dst.reg = tmp;
3319 pAsm->D.dst.writey = 1;
3320
3321 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3322 {
3323 return GL_FALSE;
3324 }
3325
3326 if( GL_FALSE == next_ins(pAsm) )
3327 {
3328 return GL_FALSE;
3329 }
3330
3331 // MOV dst.mask, tmp
3332 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3333
3334 if( GL_FALSE == assemble_dst(pAsm) )
3335 {
3336 return GL_FALSE;
3337 }
3338
3339 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3340 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
3341 pAsm->S[0].src.reg = tmp;
3342
3343 noswizzle_PVSSRC(&(pAsm->S[0].src));
3344 pAsm->S[0].src.swizzlez = SQ_SEL_0;
3345 pAsm->S[0].src.swizzlew = SQ_SEL_0;
3346
3347 if ( GL_FALSE == next_ins(pAsm) )
3348 {
3349 return GL_FALSE;
3350 }
3351
3352 return GL_TRUE;
3353 }
3354
3355 GLboolean assemble_SGE(r700_AssemblerBase *pAsm)
3356 {
3357 if( GL_FALSE == checkop2(pAsm) )
3358 {
3359 return GL_FALSE;
3360 }
3361
3362 pAsm->D.dst.opcode = SQ_OP2_INST_SETGE;
3363
3364 if( GL_FALSE == assemble_dst(pAsm) )
3365 {
3366 return GL_FALSE;
3367 }
3368
3369 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3370 {
3371 return GL_FALSE;
3372 }
3373
3374 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
3375 {
3376 return GL_FALSE;
3377 }
3378
3379 if( GL_FALSE == next_ins(pAsm) )
3380 {
3381 return GL_FALSE;
3382 }
3383
3384 return GL_TRUE;
3385 }
3386
3387 GLboolean assemble_SLT(r700_AssemblerBase *pAsm)
3388 {
3389 if( GL_FALSE == checkop2(pAsm) )
3390 {
3391 return GL_FALSE;
3392 }
3393
3394 pAsm->D.dst.opcode = SQ_OP2_INST_SETGT;
3395
3396 if( GL_FALSE == assemble_dst(pAsm) )
3397 {
3398 return GL_FALSE;
3399 }
3400
3401 if( GL_FALSE == assemble_src(pAsm, 0, 1) )
3402 {
3403 return GL_FALSE;
3404 }
3405
3406 if( GL_FALSE == assemble_src(pAsm, 1, 0) )
3407 {
3408 return GL_FALSE;
3409 }
3410
3411 if( GL_FALSE == next_ins(pAsm) )
3412 {
3413 return GL_FALSE;
3414 }
3415
3416 return GL_TRUE;
3417 }
3418
3419 GLboolean assemble_STP(r700_AssemblerBase *pAsm)
3420 {
3421 return GL_TRUE;
3422 }
3423
3424 GLboolean assemble_TEX(r700_AssemblerBase *pAsm)
3425 {
3426 GLboolean src_const;
3427 GLboolean need_barrier = GL_FALSE;
3428
3429 checkop1(pAsm);
3430
3431 switch (pAsm->pILInst[pAsm->uiCurInst].SrcReg[0].File)
3432 {
3433 case PROGRAM_CONSTANT:
3434 case PROGRAM_LOCAL_PARAM:
3435 case PROGRAM_ENV_PARAM:
3436 case PROGRAM_STATE_VAR:
3437 src_const = GL_TRUE;
3438 break;
3439 case PROGRAM_TEMPORARY:
3440 case PROGRAM_INPUT:
3441 default:
3442 src_const = GL_FALSE;
3443 break;
3444 }
3445
3446 if (GL_TRUE == src_const)
3447 {
3448 if ( GL_FALSE == mov_temp(pAsm, 0) )
3449 return GL_FALSE;
3450 need_barrier = GL_TRUE;
3451 }
3452
3453 if (pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_TXP)
3454 {
3455 GLuint tmp = gethelpr(pAsm);
3456 pAsm->D.dst.opcode = SQ_OP2_INST_RECIP_IEEE;
3457 pAsm->D.dst.math = 1;
3458 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3459 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3460 pAsm->D.dst.reg = tmp;
3461 pAsm->D.dst.writew = 1;
3462
3463 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3464 {
3465 return GL_FALSE;
3466 }
3467 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_W, SQ_SEL_W, SQ_SEL_W, SQ_SEL_W);
3468 if( GL_FALSE == next_ins(pAsm) )
3469 {
3470 return GL_FALSE;
3471 }
3472
3473 pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
3474 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3475 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3476 pAsm->D.dst.reg = tmp;
3477 pAsm->D.dst.writex = 1;
3478 pAsm->D.dst.writey = 1;
3479 pAsm->D.dst.writez = 1;
3480 pAsm->D.dst.writew = 0;
3481
3482 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3483 {
3484 return GL_FALSE;
3485 }
3486 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
3487 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
3488 pAsm->S[1].src.reg = tmp;
3489 setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_W);
3490
3491 if( GL_FALSE == next_ins(pAsm) )
3492 {
3493 return GL_FALSE;
3494 }
3495
3496 pAsm->aArgSubst[1] = tmp;
3497 need_barrier = GL_TRUE;
3498 }
3499
3500 if (pAsm->pILInst[pAsm->uiCurInst].TexSrcTarget == TEXTURE_CUBE_INDEX )
3501 {
3502 GLuint tmp1 = gethelpr(pAsm);
3503 GLuint tmp2 = gethelpr(pAsm);
3504
3505 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
3506 pAsm->D.dst.opcode = SQ_OP2_INST_CUBE;
3507 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3508 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3509 pAsm->D.dst.reg = tmp1;
3510 nomask_PVSDST(&(pAsm->D.dst));
3511
3512 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3513 {
3514 return GL_FALSE;
3515 }
3516
3517 if( GL_FALSE == assemble_src(pAsm, 0, 1) )
3518 {
3519 return GL_FALSE;
3520 }
3521
3522 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Z, SQ_SEL_Z, SQ_SEL_X, SQ_SEL_Y);
3523 swizzleagain_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Y, SQ_SEL_X, SQ_SEL_Z, SQ_SEL_Z);
3524
3525 if( GL_FALSE == next_ins(pAsm) )
3526 {
3527 return GL_FALSE;
3528 }
3529
3530 /* tmp1.z = ABS(tmp1.z) dont have abs support in assembler currently
3531 * have to do explicit instruction
3532 */
3533 pAsm->D.dst.opcode = SQ_OP2_INST_MAX;
3534 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3535 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3536 pAsm->D.dst.reg = tmp1;
3537 pAsm->D.dst.writez = 1;
3538
3539 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3540 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3541 pAsm->S[0].src.reg = tmp1;
3542 noswizzle_PVSSRC(&(pAsm->S[0].src));
3543 pAsm->S[1].bits = pAsm->S[0].bits;
3544 flipneg_PVSSRC(&(pAsm->S[1].src));
3545
3546 next_ins(pAsm);
3547
3548 /* tmp1.z = RCP_e(|tmp1.z|) */
3549 pAsm->D.dst.opcode = SQ_OP2_INST_RECIP_IEEE;
3550 pAsm->D.dst.math = 1;
3551 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3552 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3553 pAsm->D.dst.reg = tmp1;
3554 pAsm->D.dst.writez = 1;
3555
3556 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3557 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3558 pAsm->S[0].src.reg = tmp1;
3559 pAsm->S[0].src.swizzlex = SQ_SEL_Z;
3560
3561 next_ins(pAsm);
3562
3563 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x
3564 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x
3565 * muladd has no writemask, have to use another temp
3566 * also no support for imm constants, so add 1 here
3567 */
3568 pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
3569 pAsm->D.dst.op3 = 1;
3570 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3571 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3572 pAsm->D.dst.reg = tmp2;
3573
3574 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3575 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3576 pAsm->S[0].src.reg = tmp1;
3577 noswizzle_PVSSRC(&(pAsm->S[0].src));
3578 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
3579 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
3580 pAsm->S[1].src.reg = tmp1;
3581 setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Z);
3582 setaddrmode_PVSSRC(&(pAsm->S[2].src), ADDR_ABSOLUTE);
3583 pAsm->S[2].src.rtype = SRC_REG_TEMPORARY;
3584 pAsm->S[2].src.reg = tmp1;
3585 setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_1);
3586
3587 next_ins(pAsm);
3588
3589 /* ADD the remaining .5 */
3590 pAsm->D.dst.opcode = SQ_OP2_INST_ADD;
3591 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3592 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3593 pAsm->D.dst.reg = tmp2;
3594 pAsm->D.dst.writex = 1;
3595 pAsm->D.dst.writey = 1;
3596 pAsm->D.dst.writez = 0;
3597 pAsm->D.dst.writew = 0;
3598
3599 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3600 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3601 pAsm->S[0].src.reg = tmp2;
3602 noswizzle_PVSSRC(&(pAsm->S[0].src));
3603 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
3604 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
3605 pAsm->S[1].src.reg = 252; // SQ_ALU_SRC_0_5
3606 noswizzle_PVSSRC(&(pAsm->S[1].src));
3607
3608 next_ins(pAsm);
3609
3610 /* tmp1.xy = temp2.xy */
3611 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3612 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3613 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3614 pAsm->D.dst.reg = tmp1;
3615 pAsm->D.dst.writex = 1;
3616 pAsm->D.dst.writey = 1;
3617 pAsm->D.dst.writez = 0;
3618 pAsm->D.dst.writew = 0;
3619
3620 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3621 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3622 pAsm->S[0].src.reg = tmp2;
3623 noswizzle_PVSSRC(&(pAsm->S[0].src));
3624
3625 next_ins(pAsm);
3626 pAsm->aArgSubst[1] = tmp1;
3627 need_barrier = GL_TRUE;
3628
3629 }
3630
3631 if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_TXB)
3632 {
3633 pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE_L;
3634 }
3635 else
3636 {
3637 pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE;
3638 }
3639
3640 pAsm->is_tex = GL_TRUE;
3641 if ( GL_TRUE == need_barrier )
3642 {
3643 pAsm->need_tex_barrier = GL_TRUE;
3644 }
3645 // Set src1 to tex unit id
3646 pAsm->S[1].src.reg = pAsm->pILInst[pAsm->uiCurInst].TexSrcUnit;
3647 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
3648
3649 //No sw info from mesa compiler, so hard code here.
3650 pAsm->S[1].src.swizzlex = SQ_SEL_X;
3651 pAsm->S[1].src.swizzley = SQ_SEL_Y;
3652 pAsm->S[1].src.swizzlez = SQ_SEL_Z;
3653 pAsm->S[1].src.swizzlew = SQ_SEL_W;
3654
3655 if( GL_FALSE == tex_dst(pAsm) )
3656 {
3657 return GL_FALSE;
3658 }
3659
3660 if( GL_FALSE == tex_src(pAsm) )
3661 {
3662 return GL_FALSE;
3663 }
3664
3665 if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_TXP)
3666 {
3667 /* hopefully did swizzles before */
3668 noswizzle_PVSSRC(&(pAsm->S[0].src));
3669 }
3670
3671 if(pAsm->pILInst[pAsm->uiCurInst].TexSrcTarget == TEXTURE_CUBE_INDEX)
3672 {
3673 /* SAMPLE dst, tmp.yxwy, CUBE */
3674 pAsm->S[0].src.swizzlex = SQ_SEL_Y;
3675 pAsm->S[0].src.swizzley = SQ_SEL_X;
3676 pAsm->S[0].src.swizzlez = SQ_SEL_W;
3677 pAsm->S[0].src.swizzlew = SQ_SEL_Y;
3678 }
3679
3680 if ( GL_FALSE == next_ins(pAsm) )
3681 {
3682 return GL_FALSE;
3683 }
3684
3685 return GL_TRUE;
3686 }
3687
3688 GLboolean assemble_XPD(r700_AssemblerBase *pAsm)
3689 {
3690 BITS tmp;
3691
3692 if( GL_FALSE == checkop2(pAsm) )
3693 {
3694 return GL_FALSE;
3695 }
3696
3697 tmp = gethelpr(pAsm);
3698
3699 pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
3700
3701 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3702 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3703 pAsm->D.dst.reg = tmp;
3704 nomask_PVSDST(&(pAsm->D.dst));
3705
3706 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3707 {
3708 return GL_FALSE;
3709 }
3710
3711 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
3712 {
3713 return GL_FALSE;
3714 }
3715
3716 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Z, SQ_SEL_X, SQ_SEL_Y, SQ_SEL_0);
3717 swizzleagain_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Y, SQ_SEL_Z, SQ_SEL_X, SQ_SEL_0);
3718
3719 if( GL_FALSE == next_ins(pAsm) )
3720 {
3721 return GL_FALSE;
3722 }
3723
3724 pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
3725 pAsm->D.dst.op3 = 1;
3726
3727 if(0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask)
3728 {
3729 tmp = gethelpr(pAsm);
3730
3731 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3732 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3733 pAsm->D.dst.reg = tmp;
3734
3735 nomask_PVSDST(&(pAsm->D.dst));
3736 }
3737 else
3738 {
3739 if( GL_FALSE == assemble_dst(pAsm) )
3740 {
3741 return GL_FALSE;
3742 }
3743 }
3744
3745 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3746 {
3747 return GL_FALSE;
3748 }
3749
3750 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
3751 {
3752 return GL_FALSE;
3753 }
3754
3755 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Y, SQ_SEL_Z, SQ_SEL_X, SQ_SEL_0);
3756 swizzleagain_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Z, SQ_SEL_X, SQ_SEL_Y, SQ_SEL_0);
3757
3758 // result1 + (neg) result0
3759 setaddrmode_PVSSRC(&(pAsm->S[2].src),ADDR_ABSOLUTE);
3760 pAsm->S[2].src.rtype = SRC_REG_TEMPORARY;
3761 pAsm->S[2].src.reg = tmp;
3762
3763 neg_PVSSRC(&(pAsm->S[2].src));
3764 noswizzle_PVSSRC(&(pAsm->S[2].src));
3765
3766 if( GL_FALSE == next_ins(pAsm) )
3767 {
3768 return GL_FALSE;
3769 }
3770
3771
3772 if(0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask)
3773 {
3774 if( GL_FALSE == assemble_dst(pAsm) )
3775 {
3776 return GL_FALSE;
3777 }
3778
3779 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3780
3781 // Use tmp as source
3782 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3783 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3784 pAsm->S[0].src.reg = tmp;
3785
3786 noneg_PVSSRC(&(pAsm->S[0].src));
3787 noswizzle_PVSSRC(&(pAsm->S[0].src));
3788
3789 if( GL_FALSE == next_ins(pAsm) )
3790 {
3791 return GL_FALSE;
3792 }
3793 }
3794
3795 return GL_TRUE;
3796 }
3797
3798 GLboolean assemble_EXPORT(r700_AssemblerBase *pAsm)
3799 {
3800 return GL_TRUE;
3801 }
3802
3803 GLboolean assemble_IF(r700_AssemblerBase *pAsm)
3804 {
3805 return GL_TRUE;
3806 }
3807
3808 GLboolean assemble_ENDIF(r700_AssemblerBase *pAsm)
3809 {
3810 return GL_TRUE;
3811 }
3812
3813 GLboolean AssembleInstr(GLuint uiNumberInsts,
3814 struct prog_instruction *pILInst,
3815 r700_AssemblerBase *pR700AsmCode)
3816 {
3817 GLuint i;
3818
3819 pR700AsmCode->pILInst = pILInst;
3820 for(i=0; i<uiNumberInsts; i++)
3821 {
3822 pR700AsmCode->uiCurInst = i;
3823
3824 switch (pILInst[i].Opcode)
3825 {
3826 case OPCODE_ABS:
3827 if ( GL_FALSE == assemble_ABS(pR700AsmCode) )
3828 return GL_FALSE;
3829 break;
3830 case OPCODE_ADD:
3831 case OPCODE_SUB:
3832 if ( GL_FALSE == assemble_ADD(pR700AsmCode) )
3833 return GL_FALSE;
3834 break;
3835
3836 case OPCODE_ARL:
3837 if ( GL_FALSE == assemble_ARL(pR700AsmCode) )
3838 return GL_FALSE;
3839 break;
3840 case OPCODE_ARR:
3841 radeon_error("Not yet implemented instruction OPCODE_ARR \n");
3842 //if ( GL_FALSE == assemble_BAD("ARR") )
3843 return GL_FALSE;
3844 break;
3845
3846 case OPCODE_CMP:
3847 if ( GL_FALSE == assemble_CMP(pR700AsmCode) )
3848 return GL_FALSE;
3849 break;
3850 case OPCODE_COS:
3851 if ( GL_FALSE == assemble_COS(pR700AsmCode) )
3852 return GL_FALSE;
3853 break;
3854
3855 case OPCODE_DP3:
3856 case OPCODE_DP4:
3857 case OPCODE_DPH:
3858 if ( GL_FALSE == assemble_DOT(pR700AsmCode) )
3859 return GL_FALSE;
3860 break;
3861
3862 case OPCODE_DST:
3863 if ( GL_FALSE == assemble_DST(pR700AsmCode) )
3864 return GL_FALSE;
3865 break;
3866
3867 case OPCODE_EX2:
3868 if ( GL_FALSE == assemble_EX2(pR700AsmCode) )
3869 return GL_FALSE;
3870 break;
3871 case OPCODE_EXP:
3872 radeon_error("Not yet implemented instruction OPCODE_EXP \n");
3873 //if ( GL_FALSE == assemble_BAD("EXP") )
3874 return GL_FALSE;
3875 break; // approx of EX2
3876
3877 case OPCODE_FLR:
3878 if ( GL_FALSE == assemble_FLR(pR700AsmCode) )
3879 return GL_FALSE;
3880 break;
3881 //case OP_FLR_INT:
3882 // if ( GL_FALSE == assemble_FLR_INT() )
3883 // return GL_FALSE;
3884 // break;
3885
3886 case OPCODE_FRC:
3887 if ( GL_FALSE == assemble_FRC(pR700AsmCode) )
3888 return GL_FALSE;
3889 break;
3890
3891 case OPCODE_KIL:
3892 if ( GL_FALSE == assemble_KIL(pR700AsmCode) )
3893 return GL_FALSE;
3894 break;
3895 case OPCODE_LG2:
3896 if ( GL_FALSE == assemble_LG2(pR700AsmCode) )
3897 return GL_FALSE;
3898 break;
3899 case OPCODE_LIT:
3900 if ( GL_FALSE == assemble_LIT(pR700AsmCode) )
3901 return GL_FALSE;
3902 break;
3903 case OPCODE_LRP:
3904 if ( GL_FALSE == assemble_LRP(pR700AsmCode) )
3905 return GL_FALSE;
3906 break;
3907 case OPCODE_LOG:
3908 radeon_error("Not yet implemented instruction OPCODE_LOG \n");
3909 //if ( GL_FALSE == assemble_BAD("LOG") )
3910 return GL_FALSE;
3911 break; // approx of LG2
3912
3913 case OPCODE_MAD:
3914 if ( GL_FALSE == assemble_MAD(pR700AsmCode) )
3915 return GL_FALSE;
3916 break;
3917 case OPCODE_MAX:
3918 if ( GL_FALSE == assemble_MAX(pR700AsmCode) )
3919 return GL_FALSE;
3920 break;
3921 case OPCODE_MIN:
3922 if ( GL_FALSE == assemble_MIN(pR700AsmCode) )
3923 return GL_FALSE;
3924 break;
3925
3926 case OPCODE_MOV:
3927 if ( GL_FALSE == assemble_MOV(pR700AsmCode) )
3928 return GL_FALSE;
3929 break;
3930 case OPCODE_MUL:
3931 if ( GL_FALSE == assemble_MUL(pR700AsmCode) )
3932 return GL_FALSE;
3933 break;
3934
3935 case OPCODE_POW:
3936 if ( GL_FALSE == assemble_POW(pR700AsmCode) )
3937 return GL_FALSE;
3938 break;
3939 case OPCODE_RCP:
3940 if ( GL_FALSE == assemble_RCP(pR700AsmCode) )
3941 return GL_FALSE;
3942 break;
3943 case OPCODE_RSQ:
3944 if ( GL_FALSE == assemble_RSQ(pR700AsmCode) )
3945 return GL_FALSE;
3946 break;
3947 case OPCODE_SIN:
3948 if ( GL_FALSE == assemble_SIN(pR700AsmCode) )
3949 return GL_FALSE;
3950 break;
3951 case OPCODE_SCS:
3952 if ( GL_FALSE == assemble_SCS(pR700AsmCode) )
3953 return GL_FALSE;
3954 break;
3955
3956 case OPCODE_SGE:
3957 if ( GL_FALSE == assemble_SGE(pR700AsmCode) )
3958 return GL_FALSE;
3959 break;
3960 case OPCODE_SLT:
3961 if ( GL_FALSE == assemble_SLT(pR700AsmCode) )
3962 return GL_FALSE;
3963 break;
3964
3965 //case OP_STP:
3966 // if ( GL_FALSE == assemble_STP(pR700AsmCode) )
3967 // return GL_FALSE;
3968 // break;
3969
3970 case OPCODE_SWZ:
3971 if ( GL_FALSE == assemble_MOV(pR700AsmCode) )
3972 {
3973 return GL_FALSE;
3974 }
3975 else
3976 {
3977 if( (i+1)<uiNumberInsts )
3978 {
3979 if(OPCODE_END != pILInst[i+1].Opcode)
3980 {
3981 if( GL_TRUE == IsTex(pILInst[i+1].Opcode) )
3982 {
3983 pR700AsmCode->pInstDeps[i+1].nDstDep = i+1; //=1?
3984 }
3985 }
3986 }
3987 }
3988 break;
3989
3990 case OPCODE_TEX:
3991 case OPCODE_TXB:
3992 case OPCODE_TXP:
3993 if ( GL_FALSE == assemble_TEX(pR700AsmCode) )
3994 return GL_FALSE;
3995 break;
3996
3997 case OPCODE_XPD:
3998 if ( GL_FALSE == assemble_XPD(pR700AsmCode) )
3999 return GL_FALSE;
4000 break;
4001
4002 case OPCODE_IF :
4003 if ( GL_FALSE == assemble_IF(pR700AsmCode) )
4004 return GL_FALSE;
4005 break;
4006 case OPCODE_ELSE :
4007 radeon_error("Not yet implemented instruction OPCODE_ELSE \n");
4008 //if ( GL_FALSE == assemble_BAD("ELSE") )
4009 return GL_FALSE;
4010 break;
4011 case OPCODE_ENDIF:
4012 if ( GL_FALSE == assemble_ENDIF(pR700AsmCode) )
4013 return GL_FALSE;
4014 break;
4015
4016 //case OPCODE_EXPORT:
4017 // if ( GL_FALSE == assemble_EXPORT() )
4018 // return GL_FALSE;
4019 // break;
4020
4021 case OPCODE_END:
4022 //pR700AsmCode->uiCurInst = i;
4023 //This is to remaind that if in later exoort there is depth/stencil
4024 //export, we need a mov to re-arrange DST channel, where using a
4025 //psuedo inst, we will use this end inst to do it.
4026 return GL_TRUE;
4027
4028 default:
4029 radeon_error("internal: unknown instruction\n");
4030 return GL_FALSE;
4031 }
4032 }
4033
4034 return GL_TRUE;
4035 }
4036
4037 GLboolean Process_Export(r700_AssemblerBase* pAsm,
4038 GLuint type,
4039 GLuint export_starting_index,
4040 GLuint export_count,
4041 GLuint starting_register_number,
4042 GLboolean is_depth_export)
4043 {
4044 unsigned char ucWriteMask;
4045
4046 check_current_clause(pAsm, CF_EMPTY_CLAUSE);
4047 check_current_clause(pAsm, CF_EXPORT_CLAUSE); //alloc the cf_current_export_clause_ptr
4048
4049 pAsm->cf_current_export_clause_ptr->m_Word0.f.type = type;
4050
4051 switch (type)
4052 {
4053 case SQ_EXPORT_PIXEL:
4054 if(GL_TRUE == is_depth_export)
4055 {
4056 pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base = SQ_CF_PIXEL_Z;
4057 }
4058 else
4059 {
4060 pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base = SQ_CF_PIXEL_MRT0 + export_starting_index;
4061 }
4062 break;
4063
4064 case SQ_EXPORT_POS:
4065 pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base = SQ_CF_POS_0 + export_starting_index;
4066 break;
4067
4068 case SQ_EXPORT_PARAM:
4069 pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base = 0x0 + export_starting_index;
4070 break;
4071
4072 default:
4073 radeon_error("Unknown export type: %d\n", type);
4074 return GL_FALSE;
4075 break;
4076 }
4077
4078 pAsm->cf_current_export_clause_ptr->m_Word0.f.rw_gpr = starting_register_number;
4079
4080 pAsm->cf_current_export_clause_ptr->m_Word0.f.rw_rel = SQ_ABSOLUTE;
4081 pAsm->cf_current_export_clause_ptr->m_Word0.f.index_gpr = 0x0;
4082 pAsm->cf_current_export_clause_ptr->m_Word0.f.elem_size = 0x3;
4083
4084 pAsm->cf_current_export_clause_ptr->m_Word1.f.burst_count = (export_count - 1);
4085 pAsm->cf_current_export_clause_ptr->m_Word1.f.end_of_program = 0x0;
4086 pAsm->cf_current_export_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
4087 pAsm->cf_current_export_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT; // _DONE
4088 pAsm->cf_current_export_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
4089 pAsm->cf_current_export_clause_ptr->m_Word1.f.barrier = 0x1;
4090
4091 if (export_count == 1)
4092 {
4093 ucWriteMask = pAsm->pucOutMask[starting_register_number - pAsm->starting_export_register_number];
4094 /* exports Z as a float into Red channel */
4095 if (GL_TRUE == is_depth_export)
4096 ucWriteMask = 0x1;
4097
4098 if( (ucWriteMask & 0x1) != 0)
4099 {
4100 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_X;
4101 }
4102 else
4103 {
4104 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_MASK;
4105 }
4106 if( ((ucWriteMask>>1) & 0x1) != 0)
4107 {
4108 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_Y;
4109 }
4110 else
4111 {
4112 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_MASK;
4113 }
4114 if( ((ucWriteMask>>2) & 0x1) != 0)
4115 {
4116 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_Z;
4117 }
4118 else
4119 {
4120 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_MASK;
4121 }
4122 if( ((ucWriteMask>>3) & 0x1) != 0)
4123 {
4124 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_W;
4125 }
4126 else
4127 {
4128 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_MASK;
4129 }
4130 }
4131 else
4132 {
4133 // This should only be used if all components for all registers have been written
4134 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_X;
4135 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_Y;
4136 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_Z;
4137 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_W;
4138 }
4139
4140 pAsm->cf_last_export_ptr = pAsm->cf_current_export_clause_ptr;
4141
4142 return GL_TRUE;
4143 }
4144
4145 GLboolean Move_Depth_Exports_To_Correct_Channels(r700_AssemblerBase *pAsm, BITS depth_channel_select)
4146 {
4147 gl_inst_opcode Opcode_save = pAsm->pILInst[pAsm->uiCurInst].Opcode; //Should be OPCODE_END
4148 pAsm->pILInst[pAsm->uiCurInst].Opcode = OPCODE_MOV;
4149
4150 // MOV depth_export_register.hw_depth_channel, depth_export_register.depth_channel_select
4151
4152 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
4153
4154 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4155 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4156 pAsm->D.dst.reg = pAsm->depth_export_register_number;
4157
4158 pAsm->D.dst.writex = 1; // depth goes in R channel for HW
4159
4160 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4161 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
4162 pAsm->S[0].src.reg = pAsm->depth_export_register_number;
4163
4164 setswizzle_PVSSRC(&(pAsm->S[0].src), depth_channel_select);
4165
4166 noneg_PVSSRC(&(pAsm->S[0].src));
4167
4168 if( GL_FALSE == next_ins(pAsm) )
4169 {
4170 return GL_FALSE;
4171 }
4172
4173 pAsm->pILInst[pAsm->uiCurInst].Opcode = Opcode_save;
4174
4175 return GL_TRUE;
4176 }
4177
4178 GLboolean Process_Fragment_Exports(r700_AssemblerBase *pR700AsmCode,
4179 GLbitfield OutputsWritten)
4180 {
4181 unsigned int unBit;
4182 GLuint export_count = 0;
4183
4184 if(pR700AsmCode->depth_export_register_number >= 0)
4185 {
4186 if( GL_FALSE == Move_Depth_Exports_To_Correct_Channels(pR700AsmCode, SQ_SEL_Z) ) // depth
4187 {
4188 return GL_FALSE;
4189 }
4190 }
4191
4192 unBit = 1 << FRAG_RESULT_COLOR;
4193 if(OutputsWritten & unBit)
4194 {
4195 if( GL_FALSE == Process_Export(pR700AsmCode,
4196 SQ_EXPORT_PIXEL,
4197 0,
4198 1,
4199 pR700AsmCode->uiFP_OutputMap[FRAG_RESULT_COLOR],
4200 GL_FALSE) )
4201 {
4202 return GL_FALSE;
4203 }
4204 export_count++;
4205 }
4206 unBit = 1 << FRAG_RESULT_DEPTH;
4207 if(OutputsWritten & unBit)
4208 {
4209 if( GL_FALSE == Process_Export(pR700AsmCode,
4210 SQ_EXPORT_PIXEL,
4211 0,
4212 1,
4213 pR700AsmCode->uiFP_OutputMap[FRAG_RESULT_DEPTH],
4214 GL_TRUE))
4215 {
4216 return GL_FALSE;
4217 }
4218 export_count++;
4219 }
4220 /* Need to export something, otherwise we'll hang
4221 * results are undefined anyway */
4222 if(export_count == 0)
4223 {
4224 Process_Export(pR700AsmCode, SQ_EXPORT_PIXEL, 0, 1, 0, GL_FALSE);
4225 }
4226
4227 if(pR700AsmCode->cf_last_export_ptr != NULL)
4228 {
4229 pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
4230 pR700AsmCode->cf_last_export_ptr->m_Word1.f.end_of_program = 0x1;
4231 }
4232
4233 return GL_TRUE;
4234 }
4235
4236 GLboolean Process_Vertex_Exports(r700_AssemblerBase *pR700AsmCode,
4237 GLbitfield OutputsWritten)
4238 {
4239 unsigned int unBit;
4240 unsigned int i;
4241
4242 GLuint export_starting_index = 0;
4243 GLuint export_count = pR700AsmCode->number_of_exports;
4244
4245 unBit = 1 << VERT_RESULT_HPOS;
4246 if(OutputsWritten & unBit)
4247 {
4248 if( GL_FALSE == Process_Export(pR700AsmCode,
4249 SQ_EXPORT_POS,
4250 export_starting_index,
4251 1,
4252 pR700AsmCode->ucVP_OutputMap[VERT_RESULT_HPOS],
4253 GL_FALSE) )
4254 {
4255 return GL_FALSE;
4256 }
4257
4258 export_count--;
4259
4260 pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
4261 }
4262
4263 pR700AsmCode->number_of_exports = export_count;
4264
4265 unBit = 1 << VERT_RESULT_COL0;
4266 if(OutputsWritten & unBit)
4267 {
4268 if( GL_FALSE == Process_Export(pR700AsmCode,
4269 SQ_EXPORT_PARAM,
4270 export_starting_index,
4271 1,
4272 pR700AsmCode->ucVP_OutputMap[VERT_RESULT_COL0],
4273 GL_FALSE) )
4274 {
4275 return GL_FALSE;
4276 }
4277
4278 export_starting_index++;
4279 }
4280
4281 unBit = 1 << VERT_RESULT_COL1;
4282 if(OutputsWritten & unBit)
4283 {
4284 if( GL_FALSE == Process_Export(pR700AsmCode,
4285 SQ_EXPORT_PARAM,
4286 export_starting_index,
4287 1,
4288 pR700AsmCode->ucVP_OutputMap[VERT_RESULT_COL1],
4289 GL_FALSE) )
4290 {
4291 return GL_FALSE;
4292 }
4293
4294 export_starting_index++;
4295 }
4296
4297 unBit = 1 << VERT_RESULT_FOGC;
4298 if(OutputsWritten & unBit)
4299 {
4300 if( GL_FALSE == Process_Export(pR700AsmCode,
4301 SQ_EXPORT_PARAM,
4302 export_starting_index,
4303 1,
4304 pR700AsmCode->ucVP_OutputMap[VERT_RESULT_FOGC],
4305 GL_FALSE) )
4306 {
4307 return GL_FALSE;
4308 }
4309
4310 export_starting_index++;
4311 }
4312
4313 for(i=0; i<8; i++)
4314 {
4315 unBit = 1 << (VERT_RESULT_TEX0 + i);
4316 if(OutputsWritten & unBit)
4317 {
4318 if( GL_FALSE == Process_Export(pR700AsmCode,
4319 SQ_EXPORT_PARAM,
4320 export_starting_index,
4321 1,
4322 pR700AsmCode->ucVP_OutputMap[VERT_RESULT_TEX0 + i],
4323 GL_FALSE) )
4324 {
4325 return GL_FALSE;
4326 }
4327
4328 export_starting_index++;
4329 }
4330 }
4331
4332 // At least one param should be exported
4333 if (export_count)
4334 {
4335 pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
4336 }
4337 else
4338 {
4339 if( GL_FALSE == Process_Export(pR700AsmCode,
4340 SQ_EXPORT_PARAM,
4341 0,
4342 1,
4343 pR700AsmCode->starting_export_register_number,
4344 GL_FALSE) )
4345 {
4346 return GL_FALSE;
4347 }
4348
4349 pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_0;
4350 pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_0;
4351 pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_0;
4352 pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_1;
4353 pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
4354 }
4355
4356 pR700AsmCode->cf_last_export_ptr->m_Word1.f.end_of_program = 0x1;
4357
4358 return GL_TRUE;
4359 }
4360
4361 GLboolean Clean_Up_Assembler(r700_AssemblerBase *pR700AsmCode)
4362 {
4363 FREE(pR700AsmCode->pucOutMask);
4364 FREE(pR700AsmCode->pInstDeps);
4365 return GL_TRUE;
4366 }
4367