nvfx: fix GPU hardlocks when depth buffer is absent
[mesa.git] / src / gallium / drivers / nvfx / nvfx_state_fb.c
1 #include "nvfx_context.h"
2 #include "nvfx_resource.h"
3 #include "util/u_format.h"
4
5 static inline boolean
6 nvfx_surface_linear_renderable(struct pipe_surface* surf)
7 {
8 return (surf->texture->flags & NVFX_RESOURCE_FLAG_LINEAR)
9 && !(surf->offset & 63)
10 && !(((struct nvfx_surface*)surf)->pitch & 63);
11 }
12
13 static inline boolean
14 nvfx_surface_swizzled_renderable(struct pipe_framebuffer_state* fb, struct pipe_surface* surf)
15 {
16 /* TODO: return FALSE if we have a format not supporting swizzled rendering (e.g. r8); currently those are not supported at all */
17 return !((struct nvfx_miptree*)surf->texture)->linear_pitch
18 && (surf->texture->target != PIPE_TEXTURE_3D || u_minify(surf->texture->depth0, surf->level) <= 1)
19 && !(surf->offset & 127)
20 && (surf->width == fb->width)
21 && (surf->height == fb->height)
22 && !((struct nvfx_surface*)surf)->temp;
23 }
24
25 static boolean
26 nvfx_surface_get_render_target(struct pipe_surface* surf, int all_swizzled, struct nvfx_render_target* target)
27 {
28 struct nvfx_surface* ns = (struct nvfx_surface*)surf;
29 if(!ns->temp)
30 {
31 target->bo = ((struct nvfx_miptree*)surf->texture)->base.bo;
32 target->offset = surf->offset;
33 target->pitch = align(ns->pitch, 64);
34 assert(target->pitch);
35 return FALSE;
36 }
37 else
38 {
39 target->offset = 0;
40 target->pitch = ns->temp->linear_pitch;
41 target->bo = ns->temp->base.bo;
42 assert(target->pitch);
43 return TRUE;
44 }
45 }
46
47 int
48 nvfx_framebuffer_prepare(struct nvfx_context *nvfx)
49 {
50 struct pipe_framebuffer_state *fb = &nvfx->framebuffer;
51 int i, color_format = 0, zeta_format = 0;
52 int all_swizzled = 1;
53
54 if(!nvfx->is_nv4x)
55 assert(fb->nr_cbufs <= 2);
56 else
57 assert(fb->nr_cbufs <= 4);
58
59 for (i = 0; i < fb->nr_cbufs; i++) {
60 if (color_format) {
61 if(color_format != fb->cbufs[i]->format)
62 return -1;
63 } else
64 color_format = fb->cbufs[i]->format;
65
66 if(!nvfx_surface_swizzled_renderable(fb, fb->cbufs[i]))
67 all_swizzled = 0;
68 }
69
70 if (fb->zsbuf) {
71 /* TODO: return FALSE if we have a format not supporting a depth buffer (e.g. r8); currently those are not supported at all */
72 if(!nvfx_surface_swizzled_renderable(fb, fb->zsbuf))
73 all_swizzled = 0;
74
75 if(all_swizzled && util_format_get_blocksize(color_format) != util_format_get_blocksize(zeta_format))
76 all_swizzled = 0;
77 }
78
79 for (i = 0; i < fb->nr_cbufs; i++) {
80 if(!((struct nvfx_surface*)fb->cbufs[i])->temp && !all_swizzled && !nvfx_surface_linear_renderable(fb->cbufs[i]))
81 nvfx_surface_create_temp(&nvfx->pipe, fb->cbufs[i]);
82 }
83
84 if(fb->zsbuf) {
85 if(!((struct nvfx_surface*)fb->zsbuf)->temp && !all_swizzled && !nvfx_surface_linear_renderable(fb->zsbuf))
86 nvfx_surface_create_temp(&nvfx->pipe, fb->zsbuf);
87 }
88
89 return all_swizzled;
90 }
91
92 void
93 nvfx_framebuffer_validate(struct nvfx_context *nvfx, unsigned prepare_result)
94 {
95 struct pipe_framebuffer_state *fb = &nvfx->framebuffer;
96 struct nouveau_channel *chan = nvfx->screen->base.channel;
97 uint32_t rt_enable, rt_format;
98 int i;
99 unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM;
100 unsigned w = fb->width;
101 unsigned h = fb->height;
102
103 rt_enable = (NV34TCL_RT_ENABLE_COLOR0 << fb->nr_cbufs) - 1;
104 if (rt_enable & (NV34TCL_RT_ENABLE_COLOR1 |
105 NV40TCL_RT_ENABLE_COLOR2 | NV40TCL_RT_ENABLE_COLOR3))
106 rt_enable |= NV34TCL_RT_ENABLE_MRT;
107
108 nvfx->state.render_temps = 0;
109
110 for (i = 0; i < fb->nr_cbufs; i++)
111 nvfx->state.render_temps |= nvfx_surface_get_render_target(fb->cbufs[i], prepare_result, &nvfx->hw_rt[i]) << i;
112
113 for(; i < 4; ++i)
114 nvfx->hw_rt[i].bo = 0;
115
116 if (fb->zsbuf) {
117 nvfx->state.render_temps |= nvfx_surface_get_render_target(fb->zsbuf, prepare_result, &nvfx->hw_zeta) << 7;
118
119 assert(util_format_get_stride(fb->zsbuf->format, fb->width) <= nvfx->hw_zeta.pitch);
120 assert(nvfx->hw_zeta.offset + nvfx->hw_zeta.pitch * fb->height <= nvfx->hw_zeta.bo->size);
121 }
122
123 if (prepare_result) {
124 assert(!(fb->width & (fb->width - 1)) && !(fb->height & (fb->height - 1)));
125
126 rt_format = NV34TCL_RT_FORMAT_TYPE_SWIZZLED |
127 (util_logbase2(fb->width) << NV34TCL_RT_FORMAT_LOG2_WIDTH_SHIFT) |
128 (util_logbase2(fb->height) << NV34TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT);
129 } else
130 rt_format = NV34TCL_RT_FORMAT_TYPE_LINEAR;
131
132 if(fb->nr_cbufs > 0) {
133 switch (fb->cbufs[0]->format) {
134 case PIPE_FORMAT_B8G8R8X8_UNORM:
135 rt_format |= NV34TCL_RT_FORMAT_COLOR_X8R8G8B8;
136 break;
137 case PIPE_FORMAT_B8G8R8A8_UNORM:
138 case 0:
139 rt_format |= NV34TCL_RT_FORMAT_COLOR_A8R8G8B8;
140 break;
141 case PIPE_FORMAT_B5G6R5_UNORM:
142 rt_format |= NV34TCL_RT_FORMAT_COLOR_R5G6B5;
143 break;
144 default:
145 assert(0);
146 }
147 } else if(fb->zsbuf && util_format_get_blocksize(fb->zsbuf->format) == 2)
148 rt_format |= NV34TCL_RT_FORMAT_COLOR_R5G6B5;
149 else
150 rt_format |= NV34TCL_RT_FORMAT_COLOR_A8R8G8B8;
151
152 if(fb->zsbuf) {
153 switch (fb->zsbuf->format) {
154 case PIPE_FORMAT_Z16_UNORM:
155 rt_format |= NV34TCL_RT_FORMAT_ZETA_Z16;
156 break;
157 case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
158 case PIPE_FORMAT_X8Z24_UNORM:
159 case 0:
160 rt_format |= NV34TCL_RT_FORMAT_ZETA_Z24S8;
161 break;
162 default:
163 assert(0);
164 }
165 } else if(fb->nr_cbufs && util_format_get_blocksize(fb->cbufs[0]->format) == 2)
166 rt_format |= NV34TCL_RT_FORMAT_ZETA_Z16;
167 else
168 rt_format |= NV34TCL_RT_FORMAT_ZETA_Z24S8;
169
170 if ((rt_enable & NV34TCL_RT_ENABLE_COLOR0) || fb->zsbuf) {
171 struct nvfx_render_target *rt0 = &nvfx->hw_rt[0];
172 uint32_t pitch;
173
174 if(!(rt_enable & NV34TCL_RT_ENABLE_COLOR0))
175 rt0 = &nvfx->hw_zeta;
176
177 pitch = rt0->pitch;
178
179 if(!nvfx->is_nv4x)
180 {
181 if (nvfx->hw_zeta.bo)
182 pitch |= (nvfx->hw_zeta.pitch << 16);
183 else
184 pitch |= (pitch << 16);
185 }
186
187 //printf("rendering to bo %p [%i] at offset %i with pitch %i\n", rt0->bo, rt0->bo->handle, rt0->offset, pitch);
188
189 OUT_RING(chan, RING_3D(NV34TCL_DMA_COLOR0, 1));
190 OUT_RELOC(chan, rt0->bo, 0,
191 rt_flags | NOUVEAU_BO_OR,
192 chan->vram->handle, chan->gart->handle);
193 OUT_RING(chan, RING_3D(NV34TCL_COLOR0_PITCH, 2));
194 OUT_RING(chan, pitch);
195 OUT_RELOC(chan, rt0->bo,
196 rt0->offset, rt_flags | NOUVEAU_BO_LOW,
197 0, 0);
198 }
199
200 if (rt_enable & NV34TCL_RT_ENABLE_COLOR1) {
201 OUT_RING(chan, RING_3D(NV34TCL_DMA_COLOR1, 1));
202 OUT_RELOC(chan, nvfx->hw_rt[1].bo, 0,
203 rt_flags | NOUVEAU_BO_OR,
204 chan->vram->handle, chan->gart->handle);
205 OUT_RING(chan, RING_3D(NV34TCL_COLOR1_OFFSET, 2));
206 OUT_RELOC(chan, nvfx->hw_rt[1].bo,
207 nvfx->hw_rt[1].offset, rt_flags | NOUVEAU_BO_LOW,
208 0, 0);
209 OUT_RING(chan, nvfx->hw_rt[1].pitch);
210 }
211
212 if(nvfx->is_nv4x)
213 {
214 if (rt_enable & NV40TCL_RT_ENABLE_COLOR2) {
215 OUT_RING(chan, RING_3D(NV40TCL_DMA_COLOR2, 1));
216 OUT_RELOC(chan, nvfx->hw_rt[2].bo, 0,
217 rt_flags | NOUVEAU_BO_OR,
218 chan->vram->handle, chan->gart->handle);
219 OUT_RING(chan, RING_3D(NV40TCL_COLOR2_OFFSET, 1));
220 OUT_RELOC(chan, nvfx->hw_rt[2].bo,
221 nvfx->hw_rt[2].offset, rt_flags | NOUVEAU_BO_LOW,
222 0, 0);
223 OUT_RING(chan, RING_3D(NV40TCL_COLOR2_PITCH, 1));
224 OUT_RING(chan, nvfx->hw_rt[2].pitch);
225 }
226
227 if (rt_enable & NV40TCL_RT_ENABLE_COLOR3) {
228 OUT_RING(chan, RING_3D(NV40TCL_DMA_COLOR3, 1));
229 OUT_RELOC(chan, nvfx->hw_rt[3].bo, 0,
230 rt_flags | NOUVEAU_BO_OR,
231 chan->vram->handle, chan->gart->handle);
232 OUT_RING(chan, RING_3D(NV40TCL_COLOR3_OFFSET, 1));
233 OUT_RELOC(chan, nvfx->hw_rt[3].bo,
234 nvfx->hw_rt[3].offset, rt_flags | NOUVEAU_BO_LOW,
235 0, 0);
236 OUT_RING(chan, RING_3D(NV40TCL_COLOR3_PITCH, 1));
237 OUT_RING(chan, nvfx->hw_rt[3].pitch);
238 }
239 }
240
241 if (fb->zsbuf) {
242 OUT_RING(chan, RING_3D(NV34TCL_DMA_ZETA, 1));
243 OUT_RELOC(chan, nvfx->hw_zeta.bo, 0,
244 rt_flags | NOUVEAU_BO_OR,
245 chan->vram->handle, chan->gart->handle);
246 OUT_RING(chan, RING_3D(NV34TCL_ZETA_OFFSET, 1));
247 /* TODO: reverse engineer LMA */
248 OUT_RELOC(chan, nvfx->hw_zeta.bo,
249 nvfx->hw_zeta.offset, rt_flags | NOUVEAU_BO_LOW, 0, 0);
250 if(nvfx->is_nv4x) {
251 OUT_RING(chan, RING_3D(NV40TCL_ZETA_PITCH, 1));
252 OUT_RING(chan, nvfx->hw_zeta.pitch);
253 }
254 }
255 else if(nvfx->is_nv4x) {
256 OUT_RING(chan, RING_3D(NV40TCL_ZETA_PITCH, 1));
257 OUT_RING(chan, 64);
258 }
259
260 OUT_RING(chan, RING_3D(NV34TCL_RT_ENABLE, 1));
261 OUT_RING(chan, rt_enable);
262 OUT_RING(chan, RING_3D(NV34TCL_RT_HORIZ, 3));
263 OUT_RING(chan, (w << 16) | 0);
264 OUT_RING(chan, (h << 16) | 0);
265 OUT_RING(chan, rt_format);
266 OUT_RING(chan, RING_3D(NV34TCL_VIEWPORT_HORIZ, 2));
267 OUT_RING(chan, (w << 16) | 0);
268 OUT_RING(chan, (h << 16) | 0);
269 OUT_RING(chan, RING_3D(NV34TCL_VIEWPORT_CLIP_HORIZ(0), 2));
270 OUT_RING(chan, ((w - 1) << 16) | 0);
271 OUT_RING(chan, ((h - 1) << 16) | 0);
272 OUT_RING(chan, RING_3D(0x1d88, 1));
273 OUT_RING(chan, (1 << 12) | h);
274
275 if(!nvfx->is_nv4x) {
276 /* Wonder why this is needed, context should all be set to zero on init */
277 /* TODO: we can most likely remove this, after putting it in context init */
278 OUT_RING(chan, RING_3D(NV34TCL_VIEWPORT_TX_ORIGIN, 1));
279 OUT_RING(chan, 0);
280 }
281 }
282
283 void
284 nvfx_framebuffer_relocate(struct nvfx_context *nvfx)
285 {
286 struct nouveau_channel *chan = nvfx->screen->base.channel;
287 unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM;
288 rt_flags |= NOUVEAU_BO_DUMMY;
289 MARK_RING(chan, 20, 20);
290
291 #define DO_(var, pfx, name) \
292 if(var.bo) { \
293 OUT_RELOC(chan, var.bo, RING_3D(pfx##TCL_DMA_##name, 1), rt_flags, 0, 0); \
294 OUT_RELOC(chan, var.bo, 0, \
295 rt_flags | NOUVEAU_BO_OR, \
296 chan->vram->handle, chan->gart->handle); \
297 OUT_RELOC(chan, var.bo, RING_3D(pfx##TCL_##name##_OFFSET, 1), rt_flags, 0, 0); \
298 OUT_RELOC(chan, var.bo, \
299 var.offset, rt_flags | NOUVEAU_BO_LOW, \
300 0, 0); \
301 }
302
303 #define DO(pfx, num) DO_(nvfx->hw_rt[num], pfx, COLOR##num)
304 DO(NV34, 0);
305 DO(NV34, 1);
306 DO(NV40, 2);
307 DO(NV40, 3);
308
309 DO_(nvfx->hw_zeta, NV34, ZETA);
310 }