2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
50 typedef std::multimap
<node
*, error_info
> error_map
;
54 typedef std::vector
<shader_input
> inputs_vec
;
55 typedef std::vector
<gpr_array
*> gpr_array_vec
;
61 ra_edge(value
*a
, value
*b
, unsigned cost
) : a(a
), b(b
), cost(cost
) {}
65 RCF_GLOBAL
= (1 << 0),
66 RCF_PIN_CHAN
= (1 << 1),
67 RCF_PIN_REG
= (1 << 2),
71 RCF_PREALLOC
= (1 << 4)
75 DF_REMOVE_DEAD
= (1 << 0),
76 DF_REMOVE_UNUSED
= (1 << 1),
80 inline dce_flags
operator |(dce_flags l
, dce_flags r
) {
81 return (dce_flags
)((unsigned)l
|(unsigned)r
);
84 inline chunk_flags
operator |(chunk_flags l
, chunk_flags r
) {
85 return (chunk_flags
)((unsigned)l
|(unsigned)r
);
87 inline chunk_flags
& operator |=(chunk_flags
&l
, chunk_flags r
) {
92 inline chunk_flags
& operator &=(chunk_flags
&l
, chunk_flags r
) {
93 l
= (chunk_flags
)((unsigned)l
& (unsigned)r
);
97 inline chunk_flags
operator ~(chunk_flags r
) {
98 return (chunk_flags
)~(unsigned)r
;
107 ra_chunk() : values(), flags(), cost(), pin() {}
109 bool is_fixed() { return flags
& RCF_FIXED
; }
110 void fix() { flags
|= RCF_FIXED
; }
112 bool is_global() { return flags
& RCF_GLOBAL
; }
113 void set_global() { flags
|= RCF_GLOBAL
; }
115 bool is_reg_pinned() { return flags
& RCF_PIN_REG
; }
116 bool is_chan_pinned() { return flags
& RCF_PIN_CHAN
; }
118 bool is_prealloc() { return flags
& RCF_PREALLOC
; }
119 void set_prealloc() { flags
|= RCF_PREALLOC
; }
122 typedef std::vector
<ra_chunk
*> chunk_vector
;
124 class ra_constraint
{
126 ra_constraint(constraint_kind kind
) : kind(kind
), cost(0) {}
128 constraint_kind kind
;
132 void update_values();
136 typedef std::vector
<ra_constraint
*> constraint_vec
;
137 typedef std::vector
<ra_chunk
*> chunk_vec
;
140 // FIXME use something more suitale or custom class ?
143 struct cost_compare
{
144 bool operator ()(const T
& t1
, const T
& t2
) {
145 return t1
->cost
> t2
->cost
;
149 template <class T
, class Comp
>
151 typedef std::vector
<T
> container
;
157 typedef typename
container::iterator iterator
;
159 iterator
begin() { return cont
.begin(); }
160 iterator
end() { return cont
.end(); }
162 iterator
insert(const T
& t
) {
163 iterator I
= std::upper_bound(begin(), end(), t
, Comp());
172 void erase(const T
& t
) {
173 std::pair
<iterator
, iterator
> R
=
174 std::equal_range(begin(), end(), t
, Comp());
175 iterator F
= std::find(R
.first
, R
.second
, t
);
181 typedef queue
<ra_chunk
*, cost_compare
<ra_chunk
*> > chunk_queue
;
182 typedef queue
<ra_edge
*, cost_compare
<ra_edge
*> > edge_queue
;
183 typedef queue
<ra_constraint
*, cost_compare
<ra_constraint
*> > constraint_queue
;
185 typedef std::set
<ra_chunk
*> chunk_set
;
195 constraint_queue constraints
;
197 constraint_vec all_constraints
;
198 chunk_vec all_chunks
;
202 coalescer(shader
&sh
) : sh(sh
), edges(), chunks(), constraints() {}
207 void add_edge(value
*a
, value
*b
, unsigned cost
);
209 void build_constraint_queue();
210 void build_chunk_queue();
211 int color_constraints();
214 ra_constraint
* create_constraint(constraint_kind kind
);
223 void dump_constraint_queue();
225 static void dump_chunk(ra_chunk
*c
);
226 static void dump_constraint(ra_constraint
* c
);
228 void get_chunk_interferences(ra_chunk
*c
, val_set
&s
);
232 void create_chunk(value
*v
);
233 void unify_chunks(ra_edge
*e
);
234 bool chunks_interference(ra_chunk
*c1
, ra_chunk
*c2
);
236 int color_reg_constraint(ra_constraint
*c
);
237 void color_phi_constraint(ra_constraint
*c
);
240 void init_reg_bitset(sb_bitset
&bs
, val_set
&vs
);
242 void color_chunk(ra_chunk
*c
, sel_chan color
);
244 ra_chunk
* detach_value(value
*v
);
253 typedef sb_map
<uint32_t, value
*> value_map
;
254 value_map reg_values
;
257 value_map const_values
; // immediate constants key -const value (uint32_t)
258 value_map special_ro_values
; // key - hw alu_sel & chan
259 value_map kcache_values
;
261 gpr_array_vec gpr_arrays
;
263 unsigned next_temp_value_index
;
265 unsigned prep_regs_count
;
274 sb_value_pool val_pool
;
277 std::vector
<node
*> all_nodes
;
280 shader_stats src_stats
, opt_stats
;
290 static const unsigned temp_regid_offset
= 512;
294 const shader_target target
;
299 container_node
*root
;
301 bool compute_interferences
;
303 bool has_alu_predication
;
308 unsigned ngpr
, nstack
;
312 shader(sb_context
&sctx
, shader_target t
, unsigned id
);
316 sb_context
&get_ctx() const { return ctx
; }
318 value
* get_const_value(const literal
& v
);
319 value
* get_special_value(unsigned sv_id
, unsigned version
= 0);
320 value
* create_temp_value();
321 value
* get_gpr_value(bool src
, unsigned reg
, unsigned chan
, bool rel
,
322 unsigned version
= 0);
325 value
* get_special_ro_value(unsigned sel
);
326 value
* get_kcache_value(unsigned bank
, unsigned index
, unsigned chan
, alu_kcache_index_mode index_mode
);
328 value
* get_value_version(value
* v
, unsigned ver
);
331 void add_pinned_gpr_values(vvec
& vec
, unsigned gpr
, unsigned comp_mask
, bool src
);
335 void add_gpr_array(unsigned gpr_start
, unsigned gpr_count
,
338 value
* get_pred_sel(int sel
);
339 bool assign_slot(alu_node
*n
, alu_node
*slots
[5]);
341 gpr_array
* get_gpr_array(unsigned reg
, unsigned chan
);
343 void add_input(unsigned gpr
, bool preloaded
= false,
344 unsigned comp_mask
= 0xF);
346 const inputs_vec
& get_inputs() {return inputs
; }
348 regions_vec
& get_regions() { return regions
; }
350 void init_call_fs(cf_node
*cf
);
352 value
*get_undef_value();
353 void set_undef(val_set
&s
);
355 node
* create_node(node_type nt
, node_subtype nst
,
356 node_flags flags
= NF_EMPTY
);
357 alu_node
* create_alu();
358 alu_group_node
* create_alu_group();
359 alu_packed_node
* create_alu_packed();
360 cf_node
* create_cf();
361 cf_node
* create_cf(unsigned op
);
362 fetch_node
* create_fetch();
363 region_node
* create_region();
364 depart_node
* create_depart(region_node
*target
);
365 repeat_node
* create_repeat(region_node
*target
);
366 container_node
* create_container(node_type nt
= NT_LIST
,
367 node_subtype nst
= NST_LIST
,
368 node_flags flags
= NF_EMPTY
);
369 if_node
* create_if();
370 bb_node
* create_bb(unsigned id
, unsigned loop_level
);
372 value
* get_value_by_uid(unsigned id
) { return val_pool
[id
- 1]; }
374 cf_node
* create_clause(node_subtype nst
);
379 alu_node
* create_mov(value
* dst
, value
* src
);
380 alu_node
* create_copy_mov(value
*dst
, value
*src
, unsigned affcost
= 1);
382 const char * get_shader_target_name();
384 std::string
get_full_target_name();
386 void create_bbs(container_node
* n
, bbs_vec
&bbs
, int loop_level
= 0);
387 void expand_bbs(bbs_vec
&bbs
);
389 sched_queue_id
get_queue_id(node
* n
);
391 void simplify_dep_rep(node
*dr
);
393 unsigned first_temp_gpr();
394 unsigned num_nontemp_gpr();
396 gpr_array_vec
& arrays() { return gpr_arrays
; }
398 void set_uses_kill();
400 void fill_array_values(gpr_array
*a
, vvec
&vv
);
402 alu_node
* clone(alu_node
*n
);
404 sb_value_pool
& get_value_pool() { return val_pool
; }
406 void collect_stats(bool opt
);
409 value
* create_value(value_kind k
, sel_chan regid
, unsigned ver
);
410 value
* get_value(value_kind kind
, sel_chan id
,
411 unsigned version
= 0);
412 value
* get_ro_value(value_map
&vm
, value_kind vk
, unsigned key
);
417 #endif /* SHADER_H_ */