2 * Copyright © 2011 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
26 #include "main/macros.h"
27 #include "program/prog_parameter.h"
30 #define MAX_INSTRUCTION (1 << 30)
35 vec4_visitor::calculate_live_intervals()
37 int *def
= ralloc_array(mem_ctx
, int, virtual_grf_count
);
38 int *use
= ralloc_array(mem_ctx
, int, virtual_grf_count
);
42 if (this->live_intervals_valid
)
45 for (int i
= 0; i
< virtual_grf_count
; i
++) {
46 def
[i
] = MAX_INSTRUCTION
;
51 foreach_list(node
, &this->instructions
) {
52 vec4_instruction
*inst
= (vec4_instruction
*)node
;
54 if (inst
->opcode
== BRW_OPCODE_DO
) {
55 if (loop_depth
++ == 0)
57 } else if (inst
->opcode
== BRW_OPCODE_WHILE
) {
60 if (loop_depth
== 0) {
61 /* Patches up the use of vars marked for being live across
64 for (int i
= 0; i
< virtual_grf_count
; i
++) {
65 if (use
[i
] == loop_start
) {
71 for (unsigned int i
= 0; i
< 3; i
++) {
72 if (inst
->src
[i
].file
== GRF
) {
73 int reg
= inst
->src
[i
].reg
;
78 def
[reg
] = MIN2(loop_start
, def
[reg
]);
79 use
[reg
] = loop_start
;
81 /* Nobody else is going to go smash our start to
82 * later in the loop now, because def[reg] now
83 * points before the bb header.
88 if (inst
->dst
.file
== GRF
) {
89 int reg
= inst
->dst
.reg
;
92 def
[reg
] = MIN2(def
[reg
], ip
);
94 def
[reg
] = MIN2(def
[reg
], loop_start
);
102 ralloc_free(this->virtual_grf_def
);
103 ralloc_free(this->virtual_grf_use
);
104 this->virtual_grf_def
= def
;
105 this->virtual_grf_use
= use
;
107 this->live_intervals_valid
= true;
111 vec4_visitor::virtual_grf_interferes(int a
, int b
)
113 int start
= MAX2(this->virtual_grf_def
[a
], this->virtual_grf_def
[b
]);
114 int end
= MIN2(this->virtual_grf_use
[a
], this->virtual_grf_use
[b
]);
116 /* We can't handle dead register writes here, without iterating
117 * over the whole instruction stream to find every single dead
118 * write to that register to compare to the live interval of the
119 * other register. Just assert that dead_code_eliminate() has been
122 assert((this->virtual_grf_use
[a
] != -1 ||
123 this->virtual_grf_def
[a
] == MAX_INSTRUCTION
) &&
124 (this->virtual_grf_use
[b
] != -1 ||
125 this->virtual_grf_def
[b
] == MAX_INSTRUCTION
));
131 * Must be called after calculate_live_intervales() to remove unused
132 * writes to registers -- register allocation will fail otherwise
133 * because something deffed but not used won't be considered to
134 * interfere with other regs.
137 vec4_visitor::dead_code_eliminate()
139 bool progress
= false;
142 calculate_live_intervals();
144 foreach_list_safe(node
, &this->instructions
) {
145 vec4_instruction
*inst
= (vec4_instruction
*)node
;
147 if (inst
->dst
.file
== GRF
&& this->virtual_grf_use
[inst
->dst
.reg
] <= pc
) {
156 live_intervals_valid
= false;
162 vec4_visitor::split_uniform_registers()
164 /* Prior to this, uniforms have been in an array sized according to
165 * the number of vector uniforms present, sparsely filled (so an
166 * aggregate results in reg indices being skipped over). Now we're
167 * going to cut those aggregates up so each .reg index is one
168 * vector. The goal is to make elimination of unused uniform
169 * components easier later.
171 foreach_list(node
, &this->instructions
) {
172 vec4_instruction
*inst
= (vec4_instruction
*)node
;
174 for (int i
= 0 ; i
< 3; i
++) {
175 if (inst
->src
[i
].file
!= UNIFORM
)
178 assert(!inst
->src
[i
].reladdr
);
180 inst
->src
[i
].reg
+= inst
->src
[i
].reg_offset
;
181 inst
->src
[i
].reg_offset
= 0;
185 /* Update that everything is now vector-sized. */
186 for (int i
= 0; i
< this->uniforms
; i
++) {
187 this->uniform_size
[i
] = 1;
192 vec4_visitor::pack_uniform_registers()
194 bool uniform_used
[this->uniforms
];
195 int new_loc
[this->uniforms
];
196 int new_chan
[this->uniforms
];
198 memset(uniform_used
, 0, sizeof(uniform_used
));
199 memset(new_loc
, 0, sizeof(new_loc
));
200 memset(new_chan
, 0, sizeof(new_chan
));
202 /* Find which uniform vectors are actually used by the program. We
203 * expect unused vector elements when we've moved array access out
204 * to pull constants, and from some GLSL code generators like wine.
206 foreach_list(node
, &this->instructions
) {
207 vec4_instruction
*inst
= (vec4_instruction
*)node
;
209 for (int i
= 0 ; i
< 3; i
++) {
210 if (inst
->src
[i
].file
!= UNIFORM
)
213 uniform_used
[inst
->src
[i
].reg
] = true;
217 int new_uniform_count
= 0;
219 /* Now, figure out a packing of the live uniform vectors into our
222 for (int src
= 0; src
< uniforms
; src
++) {
223 int size
= this->uniform_vector_size
[src
];
225 if (!uniform_used
[src
]) {
226 this->uniform_vector_size
[src
] = 0;
231 /* Find the lowest place we can slot this uniform in. */
232 for (dst
= 0; dst
< src
; dst
++) {
233 if (this->uniform_vector_size
[dst
] + size
<= 4)
242 new_chan
[src
] = this->uniform_vector_size
[dst
];
244 /* Move the references to the data */
245 for (int j
= 0; j
< size
; j
++) {
246 c
->prog_data
.param
[dst
* 4 + new_chan
[src
] + j
] =
247 c
->prog_data
.param
[src
* 4 + j
];
250 this->uniform_vector_size
[dst
] += size
;
251 this->uniform_vector_size
[src
] = 0;
254 new_uniform_count
= MAX2(new_uniform_count
, dst
+ 1);
257 this->uniforms
= new_uniform_count
;
259 /* Now, update the instructions for our repacked uniforms. */
260 foreach_list(node
, &this->instructions
) {
261 vec4_instruction
*inst
= (vec4_instruction
*)node
;
263 for (int i
= 0 ; i
< 3; i
++) {
264 int src
= inst
->src
[i
].reg
;
266 if (inst
->src
[i
].file
!= UNIFORM
)
269 inst
->src
[i
].reg
= new_loc
[src
];
271 int sx
= BRW_GET_SWZ(inst
->src
[i
].swizzle
, 0) + new_chan
[src
];
272 int sy
= BRW_GET_SWZ(inst
->src
[i
].swizzle
, 1) + new_chan
[src
];
273 int sz
= BRW_GET_SWZ(inst
->src
[i
].swizzle
, 2) + new_chan
[src
];
274 int sw
= BRW_GET_SWZ(inst
->src
[i
].swizzle
, 3) + new_chan
[src
];
275 inst
->src
[i
].swizzle
= BRW_SWIZZLE4(sx
, sy
, sz
, sw
);
280 } /* namespace brw */