#include <algorithm>
#include <stdint.h>
-#include <iostream>
#include <vector>
#include <set>
#include <algorithm>
SV_ALU_PRED = 128,
SV_EXEC_MASK,
SV_AR_INDEX,
- SV_VALID_MASK
+ SV_VALID_MASK,
+ SV_GEOMETRY_EMIT,
+ SV_LDS_RW,
+ SV_LDS_OQA,
+ SV_LDS_OQB,
+ SV_SCRATCH
};
class node;
static unsigned sel(unsigned idx) { return (idx-1) >> 2; }
static unsigned chan(unsigned idx) { return (idx-1) & 3; }
+
+ sel_chan(unsigned bank, unsigned index,
+ unsigned chan, alu_kcache_index_mode index_mode)
+ : id(sel_chan((bank << 12) | index | ((unsigned)index_mode << 28), chan).id) {}
+ unsigned kcache_index_mode() const { return sel() >> 28; }
+ unsigned kcache_sel() const { return sel() & 0x0fffffffu; }
+ unsigned kcache_bank() const { return kcache_sel() >> 12; }
};
-inline std::ostream& operator <<(std::ostream& o, sel_chan r) {
+inline sb_ostream& operator <<(sb_ostream& o, sel_chan r) {
static const char * ch = "xyzw";
o << r.sel() << "." << ch[r.chan()];
return o;
}
};
-class value;
-
enum value_kind {
VLK_REG,
VLK_REL_REG,
return l;
}
-struct value;
-
-std::ostream& operator << (std::ostream &o, value &v);
+sb_ostream& operator << (sb_ostream &o, value &v);
typedef uint32_t value_hash;
-enum use_kind {
- UK_SRC,
- UK_SRC_REL,
- UK_DST_REL,
- UK_MAYDEF,
- UK_MAYUSE,
- UK_PRED,
- UK_COND
-};
-
-struct use_info {
- use_info *next;
- node *op;
- use_kind kind;
- int arg;
-
- use_info(node *n, use_kind kind, int arg, use_info* next)
- : next(next), op(n), kind(kind), arg(arg) {}
-};
+typedef std::list< node * > uselist;
enum constraint_kind {
CK_SAME_REG,
class shader;
class sb_value_pool;
-class ra_chunk;
+struct ra_chunk;
class ra_constraint;
class value {
value_hash ghash;
node *def, *adef;
- use_info *uses;
+ uselist uses;
ra_constraint *constraint;
ra_chunk *chunk;
bool is_AR() {
return is_special_reg() && select == sel_chan(SV_AR_INDEX, 0);
}
+ bool is_geometry_emit() {
+ return is_special_reg() && select == sel_chan(SV_GEOMETRY_EMIT, 0);
+ }
+ bool is_lds_access() {
+ return is_special_reg() && select == sel_chan(SV_LDS_RW, 0);
+ }
+ bool is_lds_oq() {
+ return is_special_reg() && (select == sel_chan(SV_LDS_OQA, 0) || select == sel_chan(SV_LDS_OQB, 0));
+ }
node* any_def() {
assert(!(def && adef));
v = v->gvn_source;
return v;
}
+ bool is_scratch() {
+ return is_special_reg() && select == sel_chan(SV_SCRATCH, 0);
+ }
bool is_float_0_or_1() {
value *v = gvalue();
&& literal_value != literal(1.0);
}
- void add_use(node *n, use_kind kind, int arg);
+ void add_use(node *n);
+ void remove_use(const node *n);
value_hash hash();
value_hash rel_hash();
}
}
+ /* Check whether copy-propagation of src into this would create an access
+ * conflict with relative addressing, i.e. an operation that tries to access
+ * array elements with different address register values.
+ */
+ bool no_reladdr_conflict_with(value *src);
+
val_set interferences;
unsigned uid;
};
NST_FETCH_INST,
NST_TEX_CLAUSE,
NST_VTX_CLAUSE,
+ NST_GDS_CLAUSE,
NST_BB,
NF_DONT_MOVE = (1 << 8),
// for KILLxx - we want to schedule them as early as possible
- NF_SCHEDULE_EARLY = (1 << 9)
+ NF_SCHEDULE_EARLY = (1 << 9),
+
+ // for ALU_PUSH_BEFORE - when set, replace with PUSH + ALU
+ NF_ALU_STACK_WORKAROUND = (1 << 10)
};
inline node_flags operator |(node_flags l, node_flags r) {
unsigned depart_count;
unsigned repeat_count;
unsigned if_count;
+ bool uses_ar;
node_stats() : alu_count(), alu_kill_count(), alu_copy_mov_count(),
cf_count(), fetch_count(), region_count(),
loop_count(), phi_count(), loop_phi_count(), depart_count(),
- repeat_count(), if_count() {}
+ repeat_count(), if_count(), uses_ar(false) {}
void dump();
};
void replace_with(node *n);
void remove();
- virtual value_hash hash();
- value_hash hash_src();
+ virtual value_hash hash() const;
+ value_hash hash_src() const;
virtual bool fold_dispatch(expr_handler *ex);
bool is_alu_clause() { return subtype == NST_ALU_CLAUSE; }
bool is_fetch_clause() {
- return subtype == NST_TEX_CLAUSE || subtype == NST_VTX_CLAUSE;
+ return subtype == NST_TEX_CLAUSE || subtype == NST_VTX_CLAUSE || subtype == NST_GDS_CLAUSE;
}
bool is_copy() { return subtype == NST_COPY; }
return vec_uses_ar(dst) || vec_uses_ar(src);
}
+ bool vec_uses_lds_oq(vvec &vv) {
+ for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
+ value *v = *I;
+ if (v && v->is_lds_oq())
+ return true;
+ }
+ return false;
+ }
+
+ bool consumes_lds_oq() {
+ return vec_uses_lds_oq(src);
+ }
+
+ bool produces_lds_oq() {
+ return vec_uses_lds_oq(dst);
+ }
region_node* get_parent_region();
class cf_node : public container_node {
protected:
cf_node() : container_node(NT_OP, NST_CF_INST), jump_target(),
- jump_after_target() {};
+ jump_after_target() { memset(&bc, 0, sizeof(bc_cf)); };
public:
bc_cf bc;
class alu_node : public node {
protected:
- alu_node() : node(NT_OP, NST_ALU_INST) {};
+ alu_node() : node(NT_OP, NST_ALU_INST) { memset(&bc, 0, sizeof(bc_alu)); };
public:
bc_alu bc;
return static_cast<alu_node*>(first)->bc.op_ptr;
}
unsigned op() { return static_cast<alu_node*>(first)->bc.op; }
- void init_args();
+ void init_args(bool repl);
virtual bool is_valid() { return subtype == NST_ALU_PACKED_INST; }
virtual bool accept(vpass &p, bool enter);
class fetch_node : public node {
protected:
- fetch_node() : node(NT_OP, NST_FETCH_INST) {};
+ fetch_node() : node(NT_OP, NST_FETCH_INST) { memset(&bc, 0, sizeof(bc_fetch)); };
public:
bc_fetch bc;
class region_node : public container_node {
protected:
region_node(unsigned id) : container_node(NT_REGION, NST_LIST), region_id(id),
- loop_phi(), phi(), vars_defined(), departs(), repeats() {}
+ loop_phi(), phi(), vars_defined(), departs(), repeats(), src_loop()
+ {}
public:
unsigned region_id;
depart_vec departs;
repeat_vec repeats;
+ // true if region was created for loop in the parser, sometimes repeat_node
+ // may be optimized away so we need to remember this information
+ bool src_loop;
+
virtual bool accept(vpass &p, bool enter);
unsigned dep_count() { return departs.size(); }
unsigned rep_count() { return repeats.size() + 1; }
- bool is_loop() { return !repeats.empty(); }
+ bool is_loop() { return src_loop || !repeats.empty(); }
container_node* get_entry_code_location() {
node *p = first;