r600/sfn: Add support for shared atomics
[mesa.git] / src / gallium / drivers / r600 / sb / sb_psi_ops.cpp
1 /*
2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Vadim Girlin
25 */
26
27 #include "sb_shader.h"
28
29 #include "sb_pass.h"
30
31 namespace r600_sb {
32
33 bool r600_sb::psi_ops::visit(alu_node& n, bool enter) {
34 if (enter) {
35 }
36 return false;
37 }
38
39 bool psi_ops::visit(node& n, bool enter) {
40 if (enter) {
41 assert(n.subtype == NST_PSI);
42
43 try_inline(n);
44
45 // TODO eliminate predication until there is full support in all passes
46 // unpredicate instructions and replace psi-nodes with conditional moves
47 eliminate(n);
48 }
49 return false;
50 }
51
52 value* get_pred_val(node &n) {
53 value *pred_val = NULL;
54
55 for (vvec::iterator I = n.src.begin(), E = n.src.end(); I != E; I += 3) {
56 value* &pred = *I;
57 if (pred) {
58 if (!pred_val)
59 pred_val = pred;
60 else {
61 assert(pred == pred_val);
62 }
63 }
64 }
65 return pred_val;
66 }
67
68 // for now we'll never inline psi's with different predicate values,
69 // so psi node may only contain the refs to one predicate value.
70 bool psi_ops::try_inline(node& n) {
71 assert(n.subtype == NST_PSI);
72
73 vvec &ns = n.src;
74
75 int sz = ns.size();
76 assert(sz && (sz % 3 == 0));
77
78 value *pred_val = get_pred_val(n);
79
80 int ps_mask = 0;
81
82 bool r = false;
83
84 for (int i = sz - 1; i >= 0; i -= 3) {
85
86 if (ps_mask == 3) {
87 ns.erase(ns.begin(), ns.begin() + i + 1);
88 return r;
89 }
90
91 value* val = ns[i];
92 value* predsel = ns[i-1];
93 int ps = !predsel ? 3 : predsel == sh.get_pred_sel(0) ? 1 : 2;
94
95 assert(val->def);
96
97 if (val->def->subtype == NST_PSI && ps == 3) {
98 if (get_pred_val(*val->def) != pred_val)
99 continue;
100
101 vvec &ds = val->def->src;
102
103 ns.insert(ns.begin() + i + 1, ds.begin(), ds.end());
104 ns.erase(ns.begin() + i - 2, ns.begin() + i + 1);
105 i += ds.size();
106 r = true;
107
108 } else {
109 if ((ps_mask & ps) == ps) {
110 // this predicate select is subsumed by already handled ops
111 ns.erase(ns.begin() + i - 2, ns.begin() + i + 1);
112 } else {
113 ps_mask |= ps;
114 }
115 }
116 }
117 return r;
118 }
119
120 bool psi_ops::try_reduce(node& n) {
121 assert(n.subtype == NST_PSI);
122 assert(n.src.size() % 3 == 0);
123
124 // TODO
125
126 return false;
127 }
128
129 void psi_ops::unpredicate(node *n) {
130
131 if (!n->is_alu_inst())
132 return;
133
134 alu_node *a = static_cast<alu_node*>(n);
135 a->pred = NULL;
136 }
137
138 bool psi_ops::eliminate(node& n) {
139 assert(n.subtype == NST_PSI);
140 assert(n.src.size() == 6);
141
142 value *d = n.dst[0];
143
144 value *s1 = n.src[2];
145 value *s2 = n.src[5];
146
147 value *pred = n.src[3];
148
149 bool psel = n.src[4] == sh.get_pred_sel(0);
150
151 value *sel = get_select_value_for_em(sh, pred);
152
153 if (s1->is_undef()) {
154 if (s2->is_undef()) {
155
156 } else {
157 n.insert_after(sh.create_mov(d, s2));
158 }
159 } else if (s2->is_undef()) {
160 n.insert_after(sh.create_mov(d, s1));
161 } else {
162 alu_node *a = sh.create_alu();
163 a->bc.set_op(ALU_OP3_CNDE_INT);
164
165 a->dst.push_back(d);
166 a->src.push_back(sel);
167
168 if (psel) {
169 a->src.push_back(s1);
170 a->src.push_back(s2);
171 } else {
172 a->src.push_back(s2);
173 a->src.push_back(s1);
174 }
175
176 n.insert_after(a);
177 }
178
179 n.remove();
180
181 if (s1->is_any_gpr() && !s1->is_undef() && s1->def)
182 unpredicate(s1->def);
183 if (s2->is_any_gpr() && !s2->is_undef() && s2->def)
184 unpredicate(s2->def);
185
186 return false;
187 }
188
189 } // namespace r600_sb