2 * Copyright (C) 2009 Nicolai Haehnle.
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28 #include "radeon_dataflow.h"
30 #include "radeon_compiler.h"
33 struct dataflow_state
{
34 struct radeon_compiler
* C
;
36 unsigned int UpdateRunning
:1;
38 struct rc_dataflow_vector
* Input
[RC_REGISTER_MAX_INDEX
];
39 struct rc_dataflow_vector
* Output
[RC_REGISTER_MAX_INDEX
];
40 struct rc_dataflow_vector
* Temporary
[RC_REGISTER_MAX_INDEX
];
41 struct rc_dataflow_vector
* Address
;
43 struct rc_dataflow_vector
** UpdateStack
;
44 unsigned int UpdateStackSize
;
45 unsigned int UpdateStackReserved
;
48 static void mark_vector_use(struct dataflow_state
* s
, struct rc_dataflow_vector
* vector
, unsigned int mask
);
50 static struct rc_dataflow_vector
* get_register_contents(struct dataflow_state
* s
,
51 rc_register_file file
, unsigned int index
)
53 if (file
== RC_FILE_INPUT
|| file
== RC_FILE_OUTPUT
|| file
== RC_FILE_TEMPORARY
) {
54 if (index
>= RC_REGISTER_MAX_INDEX
)
55 return 0; /* cannot happen, but be defensive */
57 if (file
== RC_FILE_TEMPORARY
)
58 return s
->Temporary
[index
];
59 if (file
== RC_FILE_INPUT
)
60 return s
->Input
[index
];
61 if (file
== RC_FILE_OUTPUT
)
62 return s
->Output
[index
];
65 if (file
== RC_FILE_ADDRESS
)
68 return 0; /* can happen, constant register file */
71 static void mark_ref_use(struct dataflow_state
* s
, struct rc_dataflow_ref
* ref
, unsigned int mask
)
73 if (!(mask
& ~ref
->UseMask
))
77 mark_vector_use(s
, ref
->Vector
, ref
->UseMask
);
80 static void mark_source_use(struct dataflow_state
* s
, struct rc_instruction
* inst
,
81 unsigned int src
, unsigned int srcmask
)
83 unsigned int refmask
= 0;
85 for(unsigned int i
= 0; i
< 4; ++i
) {
86 if (GET_BIT(srcmask
, i
))
87 refmask
|= 1 << GET_SWZ(inst
->I
.SrcReg
[src
].Swizzle
, i
);
90 /* get rid of spurious bits from ZERO, ONE, etc. swizzles */
91 refmask
&= RC_MASK_XYZW
;
94 return; /* can happen if the swizzle contains constant components */
96 if (inst
->Dataflow
.SrcReg
[src
])
97 mark_ref_use(s
, inst
->Dataflow
.SrcReg
[src
], refmask
);
99 if (inst
->Dataflow
.SrcRegAddress
[src
])
100 mark_ref_use(s
, inst
->Dataflow
.SrcRegAddress
[src
], RC_MASK_X
);
103 static void compute_sources_for_writemask(
104 struct rc_instruction
* inst
,
105 unsigned int writemask
,
106 unsigned int *srcmasks
)
108 const struct rc_opcode_info
* opcode
= rc_get_opcode_info(inst
->I
.Opcode
);
114 if (inst
->I
.Opcode
== RC_OPCODE_KIL
)
115 srcmasks
[0] |= RC_MASK_XYZW
;
120 if (opcode
->IsComponentwise
) {
121 for(unsigned int src
= 0; src
< opcode
->NumSrcRegs
; ++src
)
122 srcmasks
[src
] |= writemask
;
123 } else if (opcode
->IsStandardScalar
) {
124 for(unsigned int src
= 0; src
< opcode
->NumSrcRegs
; ++src
)
125 srcmasks
[src
] |= RC_MASK_X
;
127 switch(inst
->I
.Opcode
) {
129 srcmasks
[0] |= RC_MASK_X
;
132 srcmasks
[0] |= RC_MASK_XYZ
;
133 srcmasks
[1] |= RC_MASK_XYZ
;
136 srcmasks
[0] |= RC_MASK_XYZW
;
137 srcmasks
[1] |= RC_MASK_XYZW
;
142 srcmasks
[0] |= RC_MASK_XYZW
;
150 srcmasks
[0] |= RC_MASK_XY
;
161 static void mark_instruction_source_use(struct dataflow_state
* s
,
162 struct rc_instruction
* inst
, unsigned int writemask
)
164 unsigned int srcmasks
[3];
166 compute_sources_for_writemask(inst
, writemask
, srcmasks
);
168 for(unsigned int src
= 0; src
< 3; ++src
)
169 mark_source_use(s
, inst
, src
, srcmasks
[src
]);
172 static void run_update(struct dataflow_state
* s
)
174 s
->UpdateRunning
= 1;
176 while(s
->UpdateStackSize
) {
177 struct rc_dataflow_vector
* vector
= s
->UpdateStack
[--s
->UpdateStackSize
];
180 if (vector
->WriteInstruction
) {
181 struct rc_instruction
* inst
= vector
->WriteInstruction
;
183 if (inst
->Dataflow
.DstRegPrev
) {
184 unsigned int carryover
= vector
->UseMask
& ~inst
->I
.DstReg
.WriteMask
;
187 mark_ref_use(s
, inst
->Dataflow
.DstRegPrev
, carryover
);
190 mark_instruction_source_use(
191 s
, vector
->WriteInstruction
,
192 vector
->UseMask
& inst
->I
.DstReg
.WriteMask
);
196 s
->UpdateRunning
= 0;
199 static void mark_vector_use(struct dataflow_state
* s
, struct rc_dataflow_vector
* vector
, unsigned int mask
)
201 if (!(mask
& ~vector
->UseMask
))
202 return; /* no new used bits */
204 vector
->UseMask
|= mask
;
208 if (s
->UpdateStackSize
>= s
->UpdateStackReserved
) {
209 unsigned int new_reserve
= 2 * s
->UpdateStackReserved
;
210 struct rc_dataflow_vector
** new_stack
;
215 new_stack
= memory_pool_malloc(&s
->C
->Pool
, new_reserve
* sizeof(struct rc_dataflow_vector
*));
216 memcpy(new_stack
, s
->UpdateStack
, s
->UpdateStackSize
* sizeof(struct rc_dataflow_vector
*));
218 s
->UpdateStack
= new_stack
;
219 s
->UpdateStackReserved
= new_reserve
;
222 s
->UpdateStack
[s
->UpdateStackSize
++] = vector
;
225 if (!s
->UpdateRunning
)
229 static void annotate_instruction(struct dataflow_state
* s
, struct rc_instruction
* inst
)
231 const struct rc_opcode_info
* opcode
= rc_get_opcode_info(inst
->I
.Opcode
);
234 for(src
= 0; src
< opcode
->NumSrcRegs
; ++src
) {
235 struct rc_dataflow_vector
* vector
= get_register_contents(s
, inst
->I
.SrcReg
[src
].File
, inst
->I
.SrcReg
[src
].Index
);
237 inst
->Dataflow
.SrcReg
[src
] = rc_dataflow_create_ref(s
->C
, vector
, inst
);
239 if (inst
->I
.SrcReg
[src
].RelAddr
) {
240 struct rc_dataflow_vector
* addr
= get_register_contents(s
, RC_FILE_ADDRESS
, 0);
242 inst
->Dataflow
.SrcRegAddress
[src
] = rc_dataflow_create_ref(s
->C
, addr
, inst
);
246 mark_instruction_source_use(s
, inst
, 0); /* for KIL */
248 if (opcode
->HasDstReg
) {
249 struct rc_dataflow_vector
* oldvec
= get_register_contents(s
, inst
->I
.DstReg
.File
, inst
->I
.DstReg
.Index
);
250 struct rc_dataflow_vector
* newvec
= rc_dataflow_create_vector(s
->C
, inst
->I
.DstReg
.File
, inst
->I
.DstReg
.Index
, inst
);
252 newvec
->ValidMask
= inst
->I
.DstReg
.WriteMask
;
255 unsigned int carryover
= oldvec
->ValidMask
& ~inst
->I
.DstReg
.WriteMask
;
257 if (oldvec
->ValidMask
)
258 inst
->Dataflow
.DstRegAliased
= 1;
261 inst
->Dataflow
.DstRegPrev
= rc_dataflow_create_ref(s
->C
, oldvec
, inst
);
262 newvec
->ValidMask
|= carryover
;
265 mark_ref_use(s
, inst
->Dataflow
.DstRegPrev
, carryover
);
269 inst
->Dataflow
.DstReg
= newvec
;
271 if (newvec
->File
== RC_FILE_TEMPORARY
)
272 s
->Temporary
[newvec
->Index
] = newvec
;
273 else if (newvec
->File
== RC_FILE_OUTPUT
)
274 s
->Output
[newvec
->Index
] = newvec
;
279 mark_vector_use(s
, newvec
, inst
->I
.DstReg
.WriteMask
);
283 static void init_inputs(struct dataflow_state
* s
)
287 for(index
= 0; index
< 32; ++index
) {
288 if (s
->C
->Program
.InputsRead
& (1 << index
)) {
289 s
->Input
[index
] = rc_dataflow_create_vector(s
->C
, RC_FILE_INPUT
, index
, 0);
290 s
->Input
[index
]->ValidMask
= RC_MASK_XYZW
;
295 static void mark_output_use(void * data
, unsigned int index
, unsigned int mask
)
297 struct dataflow_state
* s
= data
;
298 struct rc_dataflow_vector
* vec
= s
->Output
[index
];
301 mark_vector_use(s
, vec
, mask
);
304 void rc_dataflow_annotate(struct radeon_compiler
* c
, rc_dataflow_mark_outputs_fn dce
, void * userdata
)
306 struct dataflow_state s
;
307 struct rc_instruction
* inst
;
309 memset(&s
, 0, sizeof(s
));
315 for(inst
= c
->Program
.Instructions
.Next
; inst
!= &c
->Program
.Instructions
; inst
= inst
->Next
) {
316 annotate_instruction(&s
, inst
);
320 dce(userdata
, &s
, &mark_output_use
);
322 for(inst
= c
->Program
.Instructions
.Next
; inst
!= &c
->Program
.Instructions
; inst
= inst
->Next
) {
323 const struct rc_opcode_info
* opcode
= rc_get_opcode_info(inst
->I
.Opcode
);
325 if (opcode
->HasDstReg
) {
326 unsigned int redundant_writes
= inst
->I
.DstReg
.WriteMask
& ~inst
->Dataflow
.DstReg
->UseMask
;
328 inst
->I
.DstReg
.WriteMask
&= ~redundant_writes
;
330 if (!inst
->I
.DstReg
.WriteMask
) {
331 struct rc_instruction
* todelete
= inst
;
333 rc_remove_instruction(todelete
);
338 unsigned int srcmasks
[3];
339 compute_sources_for_writemask(inst
, inst
->I
.DstReg
.WriteMask
, srcmasks
);
341 for(unsigned int src
= 0; src
< 3; ++src
) {
342 for(unsigned int chan
= 0; chan
< 4; ++chan
) {
343 if (!GET_BIT(srcmasks
[src
], chan
))
344 SET_SWZ(inst
->I
.SrcReg
[src
].Swizzle
, chan
, RC_SWIZZLE_UNUSED
);
347 if (inst
->Dataflow
.SrcReg
[src
]) {
348 if (!inst
->Dataflow
.SrcReg
[src
]->UseMask
) {
349 rc_dataflow_remove_ref(inst
->Dataflow
.SrcReg
[src
]);
350 inst
->Dataflow
.SrcReg
[src
] = 0;
354 if (inst
->Dataflow
.SrcRegAddress
[src
]) {
355 if (!inst
->Dataflow
.SrcRegAddress
[src
]->UseMask
) {
356 rc_dataflow_remove_ref(inst
->Dataflow
.SrcRegAddress
[src
]);
357 inst
->Dataflow
.SrcRegAddress
[src
] = 0;
363 rc_calculate_inputs_outputs(c
);