gallium: add a cap for VIEWPORT_SUBPIXEL_BITS (v2)
[mesa.git] / src / gallium / drivers / nouveau / nvc0 / nvc0_tex.c
1 /*
2 * Copyright 2008 Ben Skeggs
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23 #include "nvc0/nvc0_context.h"
24 #include "nvc0/nvc0_resource.h"
25 #include "nvc0/gm107_texture.xml.h"
26 #include "nvc0/nvc0_compute.xml.h"
27 #include "nv50/g80_texture.xml.h"
28 #include "nv50/g80_defs.xml.h"
29
30 #include "util/u_format.h"
31
32 #define NVE4_TIC_ENTRY_INVALID 0x000fffff
33 #define NVE4_TSC_ENTRY_INVALID 0xfff00000
34
35 static inline uint32_t
36 nv50_tic_swizzle(const struct nvc0_format *fmt, unsigned swz, bool tex_int)
37 {
38 switch (swz) {
39 case PIPE_SWIZZLE_X : return fmt->tic.src_x;
40 case PIPE_SWIZZLE_Y: return fmt->tic.src_y;
41 case PIPE_SWIZZLE_Z : return fmt->tic.src_z;
42 case PIPE_SWIZZLE_W: return fmt->tic.src_w;
43 case PIPE_SWIZZLE_1:
44 return tex_int ? G80_TIC_SOURCE_ONE_INT : G80_TIC_SOURCE_ONE_FLOAT;
45 case PIPE_SWIZZLE_0:
46 default:
47 return G80_TIC_SOURCE_ZERO;
48 }
49 }
50
51 struct pipe_sampler_view *
52 nvc0_create_sampler_view(struct pipe_context *pipe,
53 struct pipe_resource *res,
54 const struct pipe_sampler_view *templ)
55 {
56 uint32_t flags = 0;
57
58 if (templ->target == PIPE_TEXTURE_RECT || templ->target == PIPE_BUFFER)
59 flags |= NV50_TEXVIEW_SCALED_COORDS;
60
61 return nvc0_create_texture_view(pipe, res, templ, flags, templ->target);
62 }
63
64 static struct pipe_sampler_view *
65 gm107_create_texture_view(struct pipe_context *pipe,
66 struct pipe_resource *texture,
67 const struct pipe_sampler_view *templ,
68 uint32_t flags,
69 enum pipe_texture_target target)
70 {
71 const struct util_format_description *desc;
72 const struct nvc0_format *fmt;
73 uint64_t address;
74 uint32_t *tic;
75 uint32_t swz[4];
76 uint32_t width, height;
77 uint32_t depth;
78 struct nv50_tic_entry *view;
79 struct nv50_miptree *mt;
80 bool tex_int;
81
82 view = MALLOC_STRUCT(nv50_tic_entry);
83 if (!view)
84 return NULL;
85 mt = nv50_miptree(texture);
86
87 view->pipe = *templ;
88 view->pipe.reference.count = 1;
89 view->pipe.texture = NULL;
90 view->pipe.context = pipe;
91
92 view->id = -1;
93
94 pipe_resource_reference(&view->pipe.texture, texture);
95
96 tic = &view->tic[0];
97
98 desc = util_format_description(view->pipe.format);
99 tex_int = util_format_is_pure_integer(view->pipe.format);
100
101 fmt = &nvc0_format_table[view->pipe.format];
102 swz[0] = nv50_tic_swizzle(fmt, view->pipe.swizzle_r, tex_int);
103 swz[1] = nv50_tic_swizzle(fmt, view->pipe.swizzle_g, tex_int);
104 swz[2] = nv50_tic_swizzle(fmt, view->pipe.swizzle_b, tex_int);
105 swz[3] = nv50_tic_swizzle(fmt, view->pipe.swizzle_a, tex_int);
106
107 tic[0] = fmt->tic.format << GM107_TIC2_0_COMPONENTS_SIZES__SHIFT;
108 tic[0] |= fmt->tic.type_r << GM107_TIC2_0_R_DATA_TYPE__SHIFT;
109 tic[0] |= fmt->tic.type_g << GM107_TIC2_0_G_DATA_TYPE__SHIFT;
110 tic[0] |= fmt->tic.type_b << GM107_TIC2_0_B_DATA_TYPE__SHIFT;
111 tic[0] |= fmt->tic.type_a << GM107_TIC2_0_A_DATA_TYPE__SHIFT;
112 tic[0] |= swz[0] << GM107_TIC2_0_X_SOURCE__SHIFT;
113 tic[0] |= swz[1] << GM107_TIC2_0_Y_SOURCE__SHIFT;
114 tic[0] |= swz[2] << GM107_TIC2_0_Z_SOURCE__SHIFT;
115 tic[0] |= swz[3] << GM107_TIC2_0_W_SOURCE__SHIFT;
116
117 address = mt->base.address;
118
119 tic[3] = GM107_TIC2_3_LOD_ANISO_QUALITY_2;
120 tic[4] = GM107_TIC2_4_SECTOR_PROMOTION_PROMOTE_TO_2_V;
121 tic[4] |= GM107_TIC2_4_BORDER_SIZE_SAMPLER_COLOR;
122
123 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
124 tic[4] |= GM107_TIC2_4_SRGB_CONVERSION;
125
126 if (!(flags & NV50_TEXVIEW_SCALED_COORDS))
127 tic[5] = GM107_TIC2_5_NORMALIZED_COORDS;
128 else
129 tic[5] = 0;
130
131 /* check for linear storage type */
132 if (unlikely(!nouveau_bo_memtype(nv04_resource(texture)->bo))) {
133 if (texture->target == PIPE_BUFFER) {
134 assert(!(tic[5] & GM107_TIC2_5_NORMALIZED_COORDS));
135 width = view->pipe.u.buf.last_element - view->pipe.u.buf.first_element;
136 address +=
137 view->pipe.u.buf.first_element * desc->block.bits / 8;
138 tic[2] = GM107_TIC2_2_HEADER_VERSION_ONE_D_BUFFER;
139 tic[3] |= width >> 16;
140 tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_ONE_D_BUFFER;
141 tic[4] |= width & 0xffff;
142 } else {
143 assert(!(mt->level[0].pitch & 0x1f));
144 /* must be 2D texture without mip maps */
145 tic[2] = GM107_TIC2_2_HEADER_VERSION_PITCH;
146 tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_TWO_D_NO_MIPMAP;
147 tic[3] |= mt->level[0].pitch >> 5;
148 tic[4] |= mt->base.base.width0 - 1;
149 tic[5] |= 0 << GM107_TIC2_5_DEPTH_MINUS_ONE__SHIFT;
150 tic[5] |= mt->base.base.height0 - 1;
151 }
152 tic[1] = address;
153 tic[2] |= address >> 32;
154 tic[6] = 0;
155 tic[7] = 0;
156 return &view->pipe;
157 }
158
159 tic[2] = GM107_TIC2_2_HEADER_VERSION_BLOCKLINEAR;
160 tic[3] |=
161 ((mt->level[0].tile_mode & 0x0f0) >> 4 << 3) |
162 ((mt->level[0].tile_mode & 0xf00) >> 8 << 6);
163
164 depth = MAX2(mt->base.base.array_size, mt->base.base.depth0);
165
166 if (mt->base.base.array_size > 1) {
167 /* there doesn't seem to be a base layer field in TIC */
168 address += view->pipe.u.tex.first_layer * mt->layer_stride;
169 depth = view->pipe.u.tex.last_layer - view->pipe.u.tex.first_layer + 1;
170 }
171 tic[1] = address;
172 tic[2] |= address >> 32;
173
174 switch (target) {
175 case PIPE_TEXTURE_1D:
176 tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_ONE_D;
177 break;
178 case PIPE_TEXTURE_2D:
179 tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_TWO_D;
180 break;
181 case PIPE_TEXTURE_RECT:
182 tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_TWO_D;
183 break;
184 case PIPE_TEXTURE_3D:
185 tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_THREE_D;
186 break;
187 case PIPE_TEXTURE_CUBE:
188 depth /= 6;
189 tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_CUBEMAP;
190 break;
191 case PIPE_TEXTURE_1D_ARRAY:
192 tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_ONE_D_ARRAY;
193 break;
194 case PIPE_TEXTURE_2D_ARRAY:
195 tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_TWO_D_ARRAY;
196 break;
197 case PIPE_TEXTURE_CUBE_ARRAY:
198 depth /= 6;
199 tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_CUBE_ARRAY;
200 break;
201 default:
202 unreachable("unexpected/invalid texture target");
203 }
204
205 tic[3] |= (flags & NV50_TEXVIEW_FILTER_MSAA8) ?
206 GM107_TIC2_3_USE_HEADER_OPT_CONTROL :
207 GM107_TIC2_3_LOD_ANISO_QUALITY_HIGH |
208 GM107_TIC2_3_LOD_ISO_QUALITY_HIGH;
209
210 if (flags & NV50_TEXVIEW_ACCESS_RESOLVE) {
211 width = mt->base.base.width0 << mt->ms_x;
212 height = mt->base.base.height0 << mt->ms_y;
213 } else {
214 width = mt->base.base.width0;
215 height = mt->base.base.height0;
216 }
217
218 tic[4] |= width - 1;
219
220 tic[5] |= (height - 1) & 0xffff;
221 tic[5] |= (depth - 1) << GM107_TIC2_5_DEPTH_MINUS_ONE__SHIFT;
222 tic[3] |= mt->base.base.last_level << GM107_TIC2_3_MAX_MIP_LEVEL__SHIFT;
223
224 /* sampling points: (?) */
225 if ((flags & NV50_TEXVIEW_ACCESS_RESOLVE) && mt->ms_x > 1) {
226 tic[6] = GM107_TIC2_6_ANISO_FINE_SPREAD_MODIFIER_CONST_TWO;
227 tic[6] |= GM107_TIC2_6_MAX_ANISOTROPY_2_TO_1;
228 } else {
229 tic[6] = GM107_TIC2_6_ANISO_FINE_SPREAD_FUNC_TWO;
230 tic[6] |= GM107_TIC2_6_ANISO_COARSE_SPREAD_FUNC_ONE;
231 }
232
233 tic[7] = (view->pipe.u.tex.last_level << 4) | view->pipe.u.tex.first_level;
234 tic[7] |= mt->ms_mode << GM107_TIC2_7_MULTI_SAMPLE_COUNT__SHIFT;
235
236 return &view->pipe;
237 }
238
239 struct pipe_sampler_view *
240 gm107_create_texture_view_from_image(struct pipe_context *pipe,
241 const struct pipe_image_view *view)
242 {
243 struct nv04_resource *res = nv04_resource(view->resource);
244 struct pipe_sampler_view templ = {};
245 enum pipe_texture_target target;
246 uint32_t flags = 0;
247
248 if (!res)
249 return NULL;
250 target = res->base.target;
251
252 if (target == PIPE_TEXTURE_CUBE || target == PIPE_TEXTURE_CUBE_ARRAY)
253 target = PIPE_TEXTURE_2D_ARRAY;
254
255 templ.format = view->format;
256 templ.swizzle_r = PIPE_SWIZZLE_X;
257 templ.swizzle_g = PIPE_SWIZZLE_Y;
258 templ.swizzle_b = PIPE_SWIZZLE_Z;
259 templ.swizzle_a = PIPE_SWIZZLE_W;
260
261 if (target == PIPE_BUFFER) {
262 templ.u.buf.first_element = view->u.buf.first_element;
263 templ.u.buf.last_element = view->u.buf.last_element;
264 } else {
265 templ.u.tex.first_layer = view->u.tex.first_layer;
266 templ.u.tex.last_layer = view->u.tex.last_layer;
267 templ.u.tex.first_level = templ.u.tex.last_level = view->u.tex.level;
268 }
269
270 flags = NV50_TEXVIEW_SCALED_COORDS;
271
272 return nvc0_create_texture_view(pipe, &res->base, &templ, flags, target);
273 }
274
275 static struct pipe_sampler_view *
276 gf100_create_texture_view(struct pipe_context *pipe,
277 struct pipe_resource *texture,
278 const struct pipe_sampler_view *templ,
279 uint32_t flags,
280 enum pipe_texture_target target)
281 {
282 const struct util_format_description *desc;
283 const struct nvc0_format *fmt;
284 uint64_t address;
285 uint32_t *tic;
286 uint32_t swz[4];
287 uint32_t width, height;
288 uint32_t depth;
289 uint32_t tex_fmt;
290 struct nv50_tic_entry *view;
291 struct nv50_miptree *mt;
292 bool tex_int;
293
294 view = MALLOC_STRUCT(nv50_tic_entry);
295 if (!view)
296 return NULL;
297 mt = nv50_miptree(texture);
298
299 view->pipe = *templ;
300 view->pipe.reference.count = 1;
301 view->pipe.texture = NULL;
302 view->pipe.context = pipe;
303
304 view->id = -1;
305
306 pipe_resource_reference(&view->pipe.texture, texture);
307
308 tic = &view->tic[0];
309
310 desc = util_format_description(view->pipe.format);
311
312 fmt = &nvc0_format_table[view->pipe.format];
313
314 tex_int = util_format_is_pure_integer(view->pipe.format);
315 tex_fmt = fmt->tic.format & 0x3f;
316
317 swz[0] = nv50_tic_swizzle(fmt, view->pipe.swizzle_r, tex_int);
318 swz[1] = nv50_tic_swizzle(fmt, view->pipe.swizzle_g, tex_int);
319 swz[2] = nv50_tic_swizzle(fmt, view->pipe.swizzle_b, tex_int);
320 swz[3] = nv50_tic_swizzle(fmt, view->pipe.swizzle_a, tex_int);
321 tic[0] = (tex_fmt << G80_TIC_0_COMPONENTS_SIZES__SHIFT) |
322 (fmt->tic.type_r << G80_TIC_0_R_DATA_TYPE__SHIFT) |
323 (fmt->tic.type_g << G80_TIC_0_G_DATA_TYPE__SHIFT) |
324 (fmt->tic.type_b << G80_TIC_0_B_DATA_TYPE__SHIFT) |
325 (fmt->tic.type_a << G80_TIC_0_A_DATA_TYPE__SHIFT) |
326 (swz[0] << G80_TIC_0_X_SOURCE__SHIFT) |
327 (swz[1] << G80_TIC_0_Y_SOURCE__SHIFT) |
328 (swz[2] << G80_TIC_0_Z_SOURCE__SHIFT) |
329 (swz[3] << G80_TIC_0_W_SOURCE__SHIFT) |
330 ((fmt->tic.format & 0x40) << (GK20A_TIC_0_USE_COMPONENT_SIZES_EXTENDED__SHIFT - 6));
331
332 address = mt->base.address;
333
334 tic[2] = 0x10001000 | G80_TIC_2_BORDER_SOURCE_COLOR;
335
336 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
337 tic[2] |= G80_TIC_2_SRGB_CONVERSION;
338
339 if (!(flags & NV50_TEXVIEW_SCALED_COORDS))
340 tic[2] |= G80_TIC_2_NORMALIZED_COORDS;
341
342 /* check for linear storage type */
343 if (unlikely(!nouveau_bo_memtype(nv04_resource(texture)->bo))) {
344 if (texture->target == PIPE_BUFFER) {
345 assert(!(tic[2] & G80_TIC_2_NORMALIZED_COORDS));
346 address +=
347 view->pipe.u.buf.first_element * desc->block.bits / 8;
348 tic[2] |= G80_TIC_2_LAYOUT_PITCH | G80_TIC_2_TEXTURE_TYPE_ONE_D_BUFFER;
349 tic[3] = 0;
350 tic[4] = /* width */
351 view->pipe.u.buf.last_element - view->pipe.u.buf.first_element + 1;
352 tic[5] = 0;
353 } else {
354 /* must be 2D texture without mip maps */
355 tic[2] |= G80_TIC_2_LAYOUT_PITCH | G80_TIC_2_TEXTURE_TYPE_TWO_D_NO_MIPMAP;
356 tic[3] = mt->level[0].pitch;
357 tic[4] = mt->base.base.width0;
358 tic[5] = (1 << 16) | mt->base.base.height0;
359 }
360 tic[6] =
361 tic[7] = 0;
362 tic[1] = address;
363 tic[2] |= address >> 32;
364 return &view->pipe;
365 }
366
367 tic[2] |=
368 ((mt->level[0].tile_mode & 0x0f0) << (22 - 4)) |
369 ((mt->level[0].tile_mode & 0xf00) << (25 - 8));
370
371 depth = MAX2(mt->base.base.array_size, mt->base.base.depth0);
372
373 if (mt->base.base.array_size > 1) {
374 /* there doesn't seem to be a base layer field in TIC */
375 address += view->pipe.u.tex.first_layer * mt->layer_stride;
376 depth = view->pipe.u.tex.last_layer - view->pipe.u.tex.first_layer + 1;
377 }
378 tic[1] = address;
379 tic[2] |= address >> 32;
380
381 switch (target) {
382 case PIPE_TEXTURE_1D:
383 tic[2] |= G80_TIC_2_TEXTURE_TYPE_ONE_D;
384 break;
385 case PIPE_TEXTURE_2D:
386 tic[2] |= G80_TIC_2_TEXTURE_TYPE_TWO_D;
387 break;
388 case PIPE_TEXTURE_RECT:
389 tic[2] |= G80_TIC_2_TEXTURE_TYPE_TWO_D;
390 break;
391 case PIPE_TEXTURE_3D:
392 tic[2] |= G80_TIC_2_TEXTURE_TYPE_THREE_D;
393 break;
394 case PIPE_TEXTURE_CUBE:
395 depth /= 6;
396 tic[2] |= G80_TIC_2_TEXTURE_TYPE_CUBEMAP;
397 break;
398 case PIPE_TEXTURE_1D_ARRAY:
399 tic[2] |= G80_TIC_2_TEXTURE_TYPE_ONE_D_ARRAY;
400 break;
401 case PIPE_TEXTURE_2D_ARRAY:
402 tic[2] |= G80_TIC_2_TEXTURE_TYPE_TWO_D_ARRAY;
403 break;
404 case PIPE_TEXTURE_CUBE_ARRAY:
405 depth /= 6;
406 tic[2] |= G80_TIC_2_TEXTURE_TYPE_CUBE_ARRAY;
407 break;
408 default:
409 unreachable("unexpected/invalid texture target");
410 }
411
412 tic[3] = (flags & NV50_TEXVIEW_FILTER_MSAA8) ? 0x20000000 : 0x00300000;
413
414 if (flags & NV50_TEXVIEW_ACCESS_RESOLVE) {
415 width = mt->base.base.width0 << mt->ms_x;
416 height = mt->base.base.height0 << mt->ms_y;
417 } else {
418 width = mt->base.base.width0;
419 height = mt->base.base.height0;
420 }
421
422 tic[4] = (1 << 31) | width;
423
424 tic[5] = height & 0xffff;
425 tic[5] |= depth << 16;
426 tic[5] |= mt->base.base.last_level << 28;
427
428 /* sampling points: (?) */
429 if (flags & NV50_TEXVIEW_ACCESS_RESOLVE)
430 tic[6] = (mt->ms_x > 1) ? 0x88000000 : 0x03000000;
431 else
432 tic[6] = 0x03000000;
433
434 tic[7] = (view->pipe.u.tex.last_level << 4) | view->pipe.u.tex.first_level;
435 tic[7] |= mt->ms_mode << 12;
436
437 return &view->pipe;
438 }
439
440 struct pipe_sampler_view *
441 nvc0_create_texture_view(struct pipe_context *pipe,
442 struct pipe_resource *texture,
443 const struct pipe_sampler_view *templ,
444 uint32_t flags,
445 enum pipe_texture_target target)
446 {
447 if (nvc0_context(pipe)->screen->tic.maxwell)
448 return gm107_create_texture_view(pipe, texture, templ, flags, target);
449 return gf100_create_texture_view(pipe, texture, templ, flags, target);
450 }
451
452 void
453 nvc0_update_tic(struct nvc0_context *nvc0, struct nv50_tic_entry *tic,
454 struct nv04_resource *res)
455 {
456 uint64_t address = res->address;
457 if (res->base.target != PIPE_BUFFER)
458 return;
459 address += tic->pipe.u.buf.first_element *
460 util_format_get_blocksize(tic->pipe.format);
461 if (tic->tic[1] == (uint32_t)address &&
462 (tic->tic[2] & 0xff) == address >> 32)
463 return;
464
465 nvc0_screen_tic_unlock(nvc0->screen, tic);
466 tic->id = -1;
467 tic->tic[1] = address;
468 tic->tic[2] &= 0xffffff00;
469 tic->tic[2] |= address >> 32;
470 }
471
472 bool
473 nvc0_validate_tic(struct nvc0_context *nvc0, int s)
474 {
475 uint32_t commands[32];
476 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
477 struct nouveau_bo *txc = nvc0->screen->txc;
478 unsigned i;
479 unsigned n = 0;
480 bool need_flush = false;
481
482 for (i = 0; i < nvc0->num_textures[s]; ++i) {
483 struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]);
484 struct nv04_resource *res;
485 const bool dirty = !!(nvc0->textures_dirty[s] & (1 << i));
486
487 if (!tic) {
488 if (dirty)
489 commands[n++] = (i << 1) | 0;
490 continue;
491 }
492 res = nv04_resource(tic->pipe.texture);
493 nvc0_update_tic(nvc0, tic, res);
494
495 if (tic->id < 0) {
496 tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic);
497
498 PUSH_SPACE(push, 17);
499 BEGIN_NVC0(push, NVC0_M2MF(OFFSET_OUT_HIGH), 2);
500 PUSH_DATAh(push, txc->offset + (tic->id * 32));
501 PUSH_DATA (push, txc->offset + (tic->id * 32));
502 BEGIN_NVC0(push, NVC0_M2MF(LINE_LENGTH_IN), 2);
503 PUSH_DATA (push, 32);
504 PUSH_DATA (push, 1);
505 BEGIN_NVC0(push, NVC0_M2MF(EXEC), 1);
506 PUSH_DATA (push, 0x100111);
507 BEGIN_NIC0(push, NVC0_M2MF(DATA), 8);
508 PUSH_DATAp(push, &tic->tic[0], 8);
509
510 need_flush = true;
511 } else
512 if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
513 if (unlikely(s == 5))
514 BEGIN_NVC0(push, NVC0_CP(TEX_CACHE_CTL), 1);
515 else
516 BEGIN_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 1);
517 PUSH_DATA (push, (tic->id << 4) | 1);
518 NOUVEAU_DRV_STAT(&nvc0->screen->base, tex_cache_flush_count, 1);
519 }
520 nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);
521
522 res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;
523 res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING;
524
525 if (!dirty)
526 continue;
527 commands[n++] = (tic->id << 9) | (i << 1) | 1;
528
529 if (unlikely(s == 5))
530 BCTX_REFN(nvc0->bufctx_cp, CP_TEX(i), res, RD);
531 else
532 BCTX_REFN(nvc0->bufctx_3d, 3D_TEX(s, i), res, RD);
533 }
534 for (; i < nvc0->state.num_textures[s]; ++i)
535 commands[n++] = (i << 1) | 0;
536
537 nvc0->state.num_textures[s] = nvc0->num_textures[s];
538
539 if (n) {
540 if (unlikely(s == 5))
541 BEGIN_NIC0(push, NVC0_CP(BIND_TIC), n);
542 else
543 BEGIN_NIC0(push, NVC0_3D(BIND_TIC(s)), n);
544 PUSH_DATAp(push, commands, n);
545 }
546 nvc0->textures_dirty[s] = 0;
547
548 return need_flush;
549 }
550
551 static bool
552 nve4_validate_tic(struct nvc0_context *nvc0, unsigned s)
553 {
554 struct nouveau_bo *txc = nvc0->screen->txc;
555 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
556 unsigned i;
557 bool need_flush = false;
558
559 for (i = 0; i < nvc0->num_textures[s]; ++i) {
560 struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]);
561 struct nv04_resource *res;
562 const bool dirty = !!(nvc0->textures_dirty[s] & (1 << i));
563
564 if (!tic) {
565 nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID;
566 continue;
567 }
568 res = nv04_resource(tic->pipe.texture);
569 nvc0_update_tic(nvc0, tic, res);
570
571 if (tic->id < 0) {
572 tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic);
573
574 PUSH_SPACE(push, 16);
575 BEGIN_NVC0(push, NVE4_P2MF(UPLOAD_DST_ADDRESS_HIGH), 2);
576 PUSH_DATAh(push, txc->offset + (tic->id * 32));
577 PUSH_DATA (push, txc->offset + (tic->id * 32));
578 BEGIN_NVC0(push, NVE4_P2MF(UPLOAD_LINE_LENGTH_IN), 2);
579 PUSH_DATA (push, 32);
580 PUSH_DATA (push, 1);
581 BEGIN_1IC0(push, NVE4_P2MF(UPLOAD_EXEC), 9);
582 PUSH_DATA (push, 0x1001);
583 PUSH_DATAp(push, &tic->tic[0], 8);
584
585 need_flush = true;
586 } else
587 if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
588 BEGIN_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 1);
589 PUSH_DATA (push, (tic->id << 4) | 1);
590 }
591 nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);
592
593 res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;
594 res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING;
595
596 nvc0->tex_handles[s][i] &= ~NVE4_TIC_ENTRY_INVALID;
597 nvc0->tex_handles[s][i] |= tic->id;
598 if (dirty)
599 BCTX_REFN(nvc0->bufctx_3d, 3D_TEX(s, i), res, RD);
600 }
601 for (; i < nvc0->state.num_textures[s]; ++i) {
602 nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID;
603 nvc0->textures_dirty[s] |= 1 << i;
604 }
605
606 nvc0->state.num_textures[s] = nvc0->num_textures[s];
607
608 return need_flush;
609 }
610
611 void nvc0_validate_textures(struct nvc0_context *nvc0)
612 {
613 bool need_flush = false;
614 int i;
615
616 for (i = 0; i < 5; i++) {
617 if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS)
618 need_flush |= nve4_validate_tic(nvc0, i);
619 else
620 need_flush |= nvc0_validate_tic(nvc0, i);
621 }
622
623 if (need_flush) {
624 BEGIN_NVC0(nvc0->base.pushbuf, NVC0_3D(TIC_FLUSH), 1);
625 PUSH_DATA (nvc0->base.pushbuf, 0);
626 }
627
628 if (nvc0->screen->base.class_3d < NVE4_3D_CLASS) {
629 /* Invalidate all CP textures because they are aliased. */
630 for (int i = 0; i < nvc0->num_textures[5]; i++)
631 nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_CP_TEX(i));
632 nvc0->textures_dirty[5] = ~0;
633 nvc0->dirty_cp |= NVC0_NEW_CP_TEXTURES;
634 }
635 }
636
637 bool
638 nvc0_validate_tsc(struct nvc0_context *nvc0, int s)
639 {
640 uint32_t commands[16];
641 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
642 unsigned i;
643 unsigned n = 0;
644 bool need_flush = false;
645
646 for (i = 0; i < nvc0->num_samplers[s]; ++i) {
647 struct nv50_tsc_entry *tsc = nv50_tsc_entry(nvc0->samplers[s][i]);
648
649 if (!(nvc0->samplers_dirty[s] & (1 << i)))
650 continue;
651 if (!tsc) {
652 commands[n++] = (i << 4) | 0;
653 continue;
654 }
655 nvc0->seamless_cube_map = tsc->seamless_cube_map;
656 if (tsc->id < 0) {
657 tsc->id = nvc0_screen_tsc_alloc(nvc0->screen, tsc);
658
659 nvc0_m2mf_push_linear(&nvc0->base, nvc0->screen->txc,
660 65536 + tsc->id * 32, NV_VRAM_DOMAIN(&nvc0->screen->base),
661 32, tsc->tsc);
662 need_flush = true;
663 }
664 nvc0->screen->tsc.lock[tsc->id / 32] |= 1 << (tsc->id % 32);
665
666 commands[n++] = (tsc->id << 12) | (i << 4) | 1;
667 }
668 for (; i < nvc0->state.num_samplers[s]; ++i)
669 commands[n++] = (i << 4) | 0;
670
671 nvc0->state.num_samplers[s] = nvc0->num_samplers[s];
672
673 if (n) {
674 if (unlikely(s == 5))
675 BEGIN_NIC0(push, NVC0_CP(BIND_TSC), n);
676 else
677 BEGIN_NIC0(push, NVC0_3D(BIND_TSC(s)), n);
678 PUSH_DATAp(push, commands, n);
679 }
680 nvc0->samplers_dirty[s] = 0;
681
682 return need_flush;
683 }
684
685 bool
686 nve4_validate_tsc(struct nvc0_context *nvc0, int s)
687 {
688 struct nouveau_bo *txc = nvc0->screen->txc;
689 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
690 unsigned i;
691 bool need_flush = false;
692
693 for (i = 0; i < nvc0->num_samplers[s]; ++i) {
694 struct nv50_tsc_entry *tsc = nv50_tsc_entry(nvc0->samplers[s][i]);
695
696 if (!tsc) {
697 nvc0->tex_handles[s][i] |= NVE4_TSC_ENTRY_INVALID;
698 continue;
699 }
700 if (tsc->id < 0) {
701 tsc->id = nvc0_screen_tsc_alloc(nvc0->screen, tsc);
702
703 PUSH_SPACE(push, 16);
704 BEGIN_NVC0(push, NVE4_P2MF(UPLOAD_DST_ADDRESS_HIGH), 2);
705 PUSH_DATAh(push, txc->offset + 65536 + (tsc->id * 32));
706 PUSH_DATA (push, txc->offset + 65536 + (tsc->id * 32));
707 BEGIN_NVC0(push, NVE4_P2MF(UPLOAD_LINE_LENGTH_IN), 2);
708 PUSH_DATA (push, 32);
709 PUSH_DATA (push, 1);
710 BEGIN_1IC0(push, NVE4_P2MF(UPLOAD_EXEC), 9);
711 PUSH_DATA (push, 0x1001);
712 PUSH_DATAp(push, &tsc->tsc[0], 8);
713
714 need_flush = true;
715 }
716 nvc0->screen->tsc.lock[tsc->id / 32] |= 1 << (tsc->id % 32);
717
718 nvc0->tex_handles[s][i] &= ~NVE4_TSC_ENTRY_INVALID;
719 nvc0->tex_handles[s][i] |= tsc->id << 20;
720 }
721 for (; i < nvc0->state.num_samplers[s]; ++i) {
722 nvc0->tex_handles[s][i] |= NVE4_TSC_ENTRY_INVALID;
723 nvc0->samplers_dirty[s] |= 1 << i;
724 }
725
726 nvc0->state.num_samplers[s] = nvc0->num_samplers[s];
727
728 return need_flush;
729 }
730
731 void nvc0_validate_samplers(struct nvc0_context *nvc0)
732 {
733 bool need_flush = false;
734 int i;
735
736 for (i = 0; i < 5; i++) {
737 if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS)
738 need_flush |= nve4_validate_tsc(nvc0, i);
739 else
740 need_flush |= nvc0_validate_tsc(nvc0, i);
741 }
742
743 if (need_flush) {
744 BEGIN_NVC0(nvc0->base.pushbuf, NVC0_3D(TSC_FLUSH), 1);
745 PUSH_DATA (nvc0->base.pushbuf, 0);
746 }
747
748 if (nvc0->screen->base.class_3d < NVE4_3D_CLASS) {
749 /* Invalidate all CP samplers because they are aliased. */
750 nvc0->samplers_dirty[5] = ~0;
751 nvc0->dirty_cp |= NVC0_NEW_CP_SAMPLERS;
752 }
753 }
754
755 /* Upload the "diagonal" entries for the possible texture sources ($t == $s).
756 * At some point we might want to get a list of the combinations used by a
757 * shader and fill in those entries instead of having it extract the handles.
758 */
759 void
760 nve4_set_tex_handles(struct nvc0_context *nvc0)
761 {
762 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
763 struct nvc0_screen *screen = nvc0->screen;
764 unsigned s;
765
766 if (nvc0->screen->base.class_3d < NVE4_3D_CLASS)
767 return;
768
769 for (s = 0; s < 5; ++s) {
770 uint32_t dirty = nvc0->textures_dirty[s] | nvc0->samplers_dirty[s];
771 if (!dirty)
772 continue;
773 BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
774 PUSH_DATA (push, NVC0_CB_AUX_SIZE);
775 PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
776 PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
777 do {
778 int i = ffs(dirty) - 1;
779 dirty &= ~(1 << i);
780
781 BEGIN_NVC0(push, NVC0_3D(CB_POS), 2);
782 PUSH_DATA (push, (8 + i) * 4);
783 PUSH_DATA (push, nvc0->tex_handles[s][i]);
784 } while (dirty);
785
786 nvc0->textures_dirty[s] = 0;
787 nvc0->samplers_dirty[s] = 0;
788 }
789 }
790
791
792 static const uint8_t nve4_su_format_map[PIPE_FORMAT_COUNT];
793 static const uint16_t nve4_su_format_aux_map[PIPE_FORMAT_COUNT];
794 static const uint16_t nve4_suldp_lib_offset[PIPE_FORMAT_COUNT];
795
796 static void
797 nvc0_get_surface_dims(struct pipe_image_view *view, int *width, int *height,
798 int *depth)
799 {
800 struct nv04_resource *res = nv04_resource(view->resource);
801 int level;
802
803 *width = *height = *depth = 1;
804 if (res->base.target == PIPE_BUFFER) {
805 *width = view->u.buf.last_element - view->u.buf.first_element + 1;
806 return;
807 }
808
809 level = view->u.tex.level;
810 *width = u_minify(view->resource->width0, level);
811 *height = u_minify(view->resource->height0, level);
812 *depth = u_minify(view->resource->depth0, level);
813
814 switch (res->base.target) {
815 case PIPE_TEXTURE_1D_ARRAY:
816 case PIPE_TEXTURE_2D_ARRAY:
817 case PIPE_TEXTURE_CUBE:
818 case PIPE_TEXTURE_CUBE_ARRAY:
819 *depth = view->u.tex.last_layer - view->u.tex.first_layer + 1;
820 break;
821 case PIPE_TEXTURE_1D:
822 case PIPE_TEXTURE_2D:
823 case PIPE_TEXTURE_RECT:
824 case PIPE_TEXTURE_3D:
825 break;
826 default:
827 assert(!"unexpected texture target");
828 break;
829 }
830 }
831
832 void
833 nvc0_mark_image_range_valid(const struct pipe_image_view *view)
834 {
835 struct nv04_resource *res = (struct nv04_resource *)view->resource;
836 const struct util_format_description *desc;
837 unsigned stride;
838
839 assert(view->resource->target == PIPE_BUFFER);
840
841 desc = util_format_description(view->format);
842 stride = desc->block.bits / 8;
843
844 util_range_add(&res->valid_buffer_range,
845 stride * (view->u.buf.first_element),
846 stride * (view->u.buf.last_element + 1));
847 }
848
849 void
850 nve4_set_surface_info(struct nouveau_pushbuf *push,
851 struct pipe_image_view *view,
852 struct nvc0_context *nvc0)
853 {
854 struct nvc0_screen *screen = nvc0->screen;
855 struct nv04_resource *res;
856 uint64_t address;
857 uint32_t *const info = push->cur;
858 int width, height, depth;
859 uint8_t log2cpp;
860
861 if (view && !nve4_su_format_map[view->format])
862 NOUVEAU_ERR("unsupported surface format, try is_format_supported() !\n");
863
864 push->cur += 16;
865
866 if (!view || !nve4_su_format_map[view->format]) {
867 memset(info, 0, 16 * sizeof(*info));
868
869 info[0] = 0xbadf0000;
870 info[1] = 0x80004000;
871 info[12] = nve4_suldp_lib_offset[PIPE_FORMAT_R32G32B32A32_UINT] +
872 screen->lib_code->start;
873 return;
874 }
875 res = nv04_resource(view->resource);
876
877 address = res->address;
878
879 /* get surface dimensions based on the target. */
880 nvc0_get_surface_dims(view, &width, &height, &depth);
881
882 info[8] = width;
883 info[9] = height;
884 info[10] = depth;
885 switch (res->base.target) {
886 case PIPE_TEXTURE_1D_ARRAY:
887 info[11] = 1;
888 break;
889 case PIPE_TEXTURE_2D:
890 case PIPE_TEXTURE_RECT:
891 info[11] = 2;
892 break;
893 case PIPE_TEXTURE_3D:
894 info[11] = 3;
895 break;
896 case PIPE_TEXTURE_2D_ARRAY:
897 case PIPE_TEXTURE_CUBE:
898 case PIPE_TEXTURE_CUBE_ARRAY:
899 info[11] = 4;
900 break;
901 default:
902 info[11] = 0;
903 break;
904 }
905 log2cpp = (0xf000 & nve4_su_format_aux_map[view->format]) >> 12;
906
907 /* Stick the blockwidth (ie. number of bytes per pixel) to check if the
908 * format doesn't mismatch. */
909 info[12] = util_format_get_blocksize(view->format);
910
911 /* limit in bytes for raw access */
912 info[13] = (0x06 << 22) | ((width << log2cpp) - 1);
913
914 info[1] = nve4_su_format_map[view->format];
915
916 #if 0
917 switch (util_format_get_blocksizebits(view->format)) {
918 case 16: info[1] |= 1 << 16; break;
919 case 32: info[1] |= 2 << 16; break;
920 case 64: info[1] |= 3 << 16; break;
921 case 128: info[1] |= 4 << 16; break;
922 default:
923 break;
924 }
925 #else
926 info[1] |= log2cpp << 16;
927 info[1] |= 0x4000;
928 info[1] |= (0x0f00 & nve4_su_format_aux_map[view->format]);
929 #endif
930
931 if (res->base.target == PIPE_BUFFER) {
932 unsigned blocksize = util_format_get_blocksize(view->format);
933
934 address += view->u.buf.first_element * blocksize;
935
936 info[0] = address >> 8;
937 info[2] = width - 1;
938 info[2] |= (0xff & nve4_su_format_aux_map[view->format]) << 22;
939 info[3] = 0;
940 info[4] = 0;
941 info[5] = 0;
942 info[6] = 0;
943 info[7] = 0;
944 info[14] = 0;
945 info[15] = 0;
946 } else {
947 struct nv50_miptree *mt = nv50_miptree(&res->base);
948 struct nv50_miptree_level *lvl = &mt->level[view->u.tex.level];
949 const unsigned z = view->u.tex.first_layer;
950
951 if (z) {
952 if (mt->layout_3d) {
953 address += nvc0_mt_zslice_offset(mt, view->u.tex.level, z);
954 /* doesn't work if z passes z-tile boundary */
955 if (depth > 1) {
956 pipe_debug_message(&nvc0->base.debug, CONFORMANCE,
957 "3D images are not really supported!");
958 debug_printf("3D images are not really supported!\n");
959 }
960 } else {
961 address += mt->layer_stride * z;
962 }
963 }
964 address += lvl->offset;
965
966 info[0] = address >> 8;
967 info[2] = (width << mt->ms_x) - 1;
968 /* NOTE: this is really important: */
969 info[2] |= (0xff & nve4_su_format_aux_map[view->format]) << 22;
970 info[3] = (0x88 << 24) | (lvl->pitch / 64);
971 info[4] = (height << mt->ms_y) - 1;
972 info[4] |= (lvl->tile_mode & 0x0f0) << 25;
973 info[4] |= NVC0_TILE_SHIFT_Y(lvl->tile_mode) << 22;
974 info[5] = mt->layer_stride >> 8;
975 info[6] = depth - 1;
976 info[6] |= (lvl->tile_mode & 0xf00) << 21;
977 info[6] |= NVC0_TILE_SHIFT_Z(lvl->tile_mode) << 22;
978 info[7] = 0;
979 info[14] = mt->ms_x;
980 info[15] = mt->ms_y;
981 }
982 }
983
984 static inline void
985 nvc0_set_surface_info(struct nouveau_pushbuf *push,
986 struct pipe_image_view *view, uint64_t address,
987 int width, int height, int depth)
988 {
989 struct nv04_resource *res;
990 uint32_t *const info = push->cur;
991
992 push->cur += 16;
993
994 /* Make sure to always initialize the surface information area because it's
995 * used to check if the given image is bound or not. */
996 memset(info, 0, 16 * sizeof(*info));
997
998 if (!view || !view->resource)
999 return;
1000 res = nv04_resource(view->resource);
1001
1002 /* Stick the image dimensions for the imageSize() builtin. */
1003 info[8] = width;
1004 info[9] = height;
1005 info[10] = depth;
1006
1007 /* Stick the blockwidth (ie. number of bytes per pixel) to calculate pixel
1008 * offset and to check if the format doesn't mismatch. */
1009 info[12] = util_format_get_blocksize(view->format);
1010
1011 if (res->base.target == PIPE_BUFFER) {
1012 info[0] = address >> 8;
1013 info[2] = width;
1014 } else {
1015 struct nv50_miptree *mt = nv50_miptree(&res->base);
1016
1017 info[0] = address >> 8;
1018 info[2] = width;
1019 info[4] = height;
1020 info[5] = mt->layer_stride >> 8;
1021 info[6] = depth;
1022 info[14] = mt->ms_x;
1023 info[15] = mt->ms_y;
1024 }
1025 }
1026
1027 void
1028 nvc0_validate_suf(struct nvc0_context *nvc0, int s)
1029 {
1030 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
1031 struct nvc0_screen *screen = nvc0->screen;
1032
1033 for (int i = 0; i < NVC0_MAX_IMAGES; ++i) {
1034 struct pipe_image_view *view = &nvc0->images[s][i];
1035 int width, height, depth;
1036 uint64_t address = 0;
1037
1038 if (s == 5)
1039 BEGIN_NVC0(push, NVC0_CP(IMAGE(i)), 6);
1040 else
1041 BEGIN_NVC0(push, NVC0_3D(IMAGE(i)), 6);
1042
1043 if (view->resource) {
1044 struct nv04_resource *res = nv04_resource(view->resource);
1045 unsigned rt = nvc0_format_table[view->format].rt;
1046
1047 if (util_format_is_depth_or_stencil(view->format))
1048 rt = rt << 12;
1049 else
1050 rt = (rt << 4) | (0x14 << 12);
1051
1052 /* get surface dimensions based on the target. */
1053 nvc0_get_surface_dims(view, &width, &height, &depth);
1054
1055 address = res->address;
1056 if (res->base.target == PIPE_BUFFER) {
1057 unsigned blocksize = util_format_get_blocksize(view->format);
1058
1059 address += view->u.buf.first_element * blocksize;
1060 assert(!(address & 0xff));
1061
1062 if (view->access & PIPE_IMAGE_ACCESS_WRITE)
1063 nvc0_mark_image_range_valid(view);
1064
1065 PUSH_DATAh(push, address);
1066 PUSH_DATA (push, address);
1067 PUSH_DATA (push, align(width * blocksize, 0x100));
1068 PUSH_DATA (push, NVC0_3D_IMAGE_HEIGHT_LINEAR | 1);
1069 PUSH_DATA (push, rt);
1070 PUSH_DATA (push, 0);
1071 } else {
1072 struct nv50_miptree *mt = nv50_miptree(view->resource);
1073 struct nv50_miptree_level *lvl = &mt->level[view->u.tex.level];
1074 const unsigned z = view->u.tex.first_layer;
1075
1076 if (mt->layout_3d) {
1077 address += nvc0_mt_zslice_offset(mt, view->u.tex.level, z);
1078 if (depth >= 1) {
1079 pipe_debug_message(&nvc0->base.debug, CONFORMANCE,
1080 "3D images are not supported!");
1081 debug_printf("3D images are not supported!\n");
1082 }
1083 } else {
1084 address += mt->layer_stride * z;
1085 }
1086 address += lvl->offset;
1087
1088 PUSH_DATAh(push, address);
1089 PUSH_DATA (push, address);
1090 PUSH_DATA (push, width << mt->ms_x);
1091 PUSH_DATA (push, height << mt->ms_y);
1092 PUSH_DATA (push, rt);
1093 PUSH_DATA (push, lvl->tile_mode & 0xff); /* mask out z-tiling */
1094 }
1095
1096 if (s == 5)
1097 BCTX_REFN(nvc0->bufctx_cp, CP_SUF, res, RDWR);
1098 else
1099 BCTX_REFN(nvc0->bufctx_3d, 3D_SUF, res, RDWR);
1100 } else {
1101 PUSH_DATA(push, 0);
1102 PUSH_DATA(push, 0);
1103 PUSH_DATA(push, 0);
1104 PUSH_DATA(push, 0);
1105 PUSH_DATA(push, 0x14000);
1106 PUSH_DATA(push, 0);
1107 }
1108
1109 /* stick surface information into the driver constant buffer */
1110 if (s == 5)
1111 BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);
1112 else
1113 BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
1114 PUSH_DATA (push, NVC0_CB_AUX_SIZE);
1115 PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
1116 PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
1117 if (s == 5)
1118 BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + 16);
1119 else
1120 BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 16);
1121 PUSH_DATA (push, NVC0_CB_AUX_SU_INFO(i));
1122
1123 nvc0_set_surface_info(push, view, address, width, height, depth);
1124 }
1125 }
1126
1127 static inline void
1128 nvc0_update_surface_bindings(struct nvc0_context *nvc0)
1129 {
1130 nvc0_validate_suf(nvc0, 4);
1131
1132 /* Invalidate all COMPUTE images because they are aliased with FRAGMENT. */
1133 nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_SUF);
1134 nvc0->dirty_cp |= NVC0_NEW_CP_SURFACES;
1135 nvc0->images_dirty[5] |= nvc0->images_valid[5];
1136 }
1137
1138 static void
1139 gm107_validate_surfaces(struct nvc0_context *nvc0,
1140 struct pipe_image_view *view, int stage, int slot)
1141 {
1142 struct nv04_resource *res = nv04_resource(view->resource);
1143 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
1144 struct nvc0_screen *screen = nvc0->screen;
1145 struct nouveau_bo *txc = nvc0->screen->txc;
1146 struct nv50_tic_entry *tic;
1147
1148 tic = nv50_tic_entry(nvc0->images_tic[stage][slot]);
1149
1150 res = nv04_resource(tic->pipe.texture);
1151 nvc0_update_tic(nvc0, tic, res);
1152
1153 if (tic->id < 0) {
1154 tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic);
1155
1156 /* upload the texture view */
1157 PUSH_SPACE(push, 16);
1158 BEGIN_NVC0(push, NVE4_P2MF(UPLOAD_DST_ADDRESS_HIGH), 2);
1159 PUSH_DATAh(push, txc->offset + (tic->id * 32));
1160 PUSH_DATA (push, txc->offset + (tic->id * 32));
1161 BEGIN_NVC0(push, NVE4_P2MF(UPLOAD_LINE_LENGTH_IN), 2);
1162 PUSH_DATA (push, 32);
1163 PUSH_DATA (push, 1);
1164 BEGIN_1IC0(push, NVE4_P2MF(UPLOAD_EXEC), 9);
1165 PUSH_DATA (push, 0x1001);
1166 PUSH_DATAp(push, &tic->tic[0], 8);
1167
1168 BEGIN_NVC0(push, NVC0_3D(TIC_FLUSH), 1);
1169 PUSH_DATA (push, 0);
1170 } else
1171 if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
1172 BEGIN_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 1);
1173 PUSH_DATA (push, (tic->id << 4) | 1);
1174 }
1175 nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);
1176
1177 res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;
1178 res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING;
1179
1180 BCTX_REFN(nvc0->bufctx_3d, 3D_SUF, res, RD);
1181
1182 /* upload the texture handle */
1183 BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
1184 PUSH_DATA (push, NVC0_CB_AUX_SIZE);
1185 PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(stage));
1186 PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(stage));
1187 BEGIN_NVC0(push, NVC0_3D(CB_POS), 2);
1188 PUSH_DATA (push, NVC0_CB_AUX_TEX_INFO(slot + 32));
1189 PUSH_DATA (push, tic->id);
1190 }
1191
1192 static inline void
1193 nve4_update_surface_bindings(struct nvc0_context *nvc0)
1194 {
1195 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
1196 struct nvc0_screen *screen = nvc0->screen;
1197 int i, j, s;
1198
1199 for (s = 0; s < 5; s++) {
1200 if (!nvc0->images_dirty[s])
1201 continue;
1202
1203 for (i = 0; i < NVC0_MAX_IMAGES; ++i) {
1204 struct pipe_image_view *view = &nvc0->images[s][i];
1205
1206 BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
1207 PUSH_DATA (push, NVC0_CB_AUX_SIZE);
1208 PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
1209 PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
1210 BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 16);
1211 PUSH_DATA (push, NVC0_CB_AUX_SU_INFO(i));
1212
1213 if (view->resource) {
1214 struct nv04_resource *res = nv04_resource(view->resource);
1215
1216 if (res->base.target == PIPE_BUFFER) {
1217 if (view->access & PIPE_IMAGE_ACCESS_WRITE)
1218 nvc0_mark_image_range_valid(view);
1219 }
1220
1221 nve4_set_surface_info(push, view, nvc0);
1222 BCTX_REFN(nvc0->bufctx_3d, 3D_SUF, res, RDWR);
1223
1224 if (nvc0->screen->base.class_3d >= GM107_3D_CLASS)
1225 gm107_validate_surfaces(nvc0, view, s, i);
1226 } else {
1227 for (j = 0; j < 16; j++)
1228 PUSH_DATA(push, 0);
1229 }
1230 }
1231 }
1232 }
1233
1234 void
1235 nvc0_validate_surfaces(struct nvc0_context *nvc0)
1236 {
1237 if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS) {
1238 nve4_update_surface_bindings(nvc0);
1239 } else {
1240 nvc0_update_surface_bindings(nvc0);
1241 }
1242 }
1243
1244
1245 static const uint8_t nve4_su_format_map[PIPE_FORMAT_COUNT] =
1246 {
1247 [PIPE_FORMAT_R32G32B32A32_FLOAT] = GK104_IMAGE_FORMAT_RGBA32_FLOAT,
1248 [PIPE_FORMAT_R32G32B32A32_SINT] = GK104_IMAGE_FORMAT_RGBA32_SINT,
1249 [PIPE_FORMAT_R32G32B32A32_UINT] = GK104_IMAGE_FORMAT_RGBA32_UINT,
1250 [PIPE_FORMAT_R16G16B16A16_FLOAT] = GK104_IMAGE_FORMAT_RGBA16_FLOAT,
1251 [PIPE_FORMAT_R16G16B16A16_UNORM] = GK104_IMAGE_FORMAT_RGBA16_UNORM,
1252 [PIPE_FORMAT_R16G16B16A16_SNORM] = GK104_IMAGE_FORMAT_RGBA16_SNORM,
1253 [PIPE_FORMAT_R16G16B16A16_SINT] = GK104_IMAGE_FORMAT_RGBA16_SINT,
1254 [PIPE_FORMAT_R16G16B16A16_UINT] = GK104_IMAGE_FORMAT_RGBA16_UINT,
1255 [PIPE_FORMAT_B8G8R8A8_UNORM] = GK104_IMAGE_FORMAT_BGRA8_UNORM,
1256 [PIPE_FORMAT_R8G8B8A8_UNORM] = GK104_IMAGE_FORMAT_RGBA8_UNORM,
1257 [PIPE_FORMAT_R8G8B8A8_SNORM] = GK104_IMAGE_FORMAT_RGBA8_SNORM,
1258 [PIPE_FORMAT_R8G8B8A8_SINT] = GK104_IMAGE_FORMAT_RGBA8_SINT,
1259 [PIPE_FORMAT_R8G8B8A8_UINT] = GK104_IMAGE_FORMAT_RGBA8_UINT,
1260 [PIPE_FORMAT_R11G11B10_FLOAT] = GK104_IMAGE_FORMAT_R11G11B10_FLOAT,
1261 [PIPE_FORMAT_R10G10B10A2_UNORM] = GK104_IMAGE_FORMAT_RGB10_A2_UNORM,
1262 [PIPE_FORMAT_R10G10B10A2_UINT] = GK104_IMAGE_FORMAT_RGB10_A2_UINT,
1263 [PIPE_FORMAT_R32G32_FLOAT] = GK104_IMAGE_FORMAT_RG32_FLOAT,
1264 [PIPE_FORMAT_R32G32_SINT] = GK104_IMAGE_FORMAT_RG32_SINT,
1265 [PIPE_FORMAT_R32G32_UINT] = GK104_IMAGE_FORMAT_RG32_UINT,
1266 [PIPE_FORMAT_R16G16_FLOAT] = GK104_IMAGE_FORMAT_RG16_FLOAT,
1267 [PIPE_FORMAT_R16G16_UNORM] = GK104_IMAGE_FORMAT_RG16_UNORM,
1268 [PIPE_FORMAT_R16G16_SNORM] = GK104_IMAGE_FORMAT_RG16_SNORM,
1269 [PIPE_FORMAT_R16G16_SINT] = GK104_IMAGE_FORMAT_RG16_SINT,
1270 [PIPE_FORMAT_R16G16_UINT] = GK104_IMAGE_FORMAT_RG16_UINT,
1271 [PIPE_FORMAT_R8G8_UNORM] = GK104_IMAGE_FORMAT_RG8_UNORM,
1272 [PIPE_FORMAT_R8G8_SNORM] = GK104_IMAGE_FORMAT_RG8_SNORM,
1273 [PIPE_FORMAT_R8G8_SINT] = GK104_IMAGE_FORMAT_RG8_SINT,
1274 [PIPE_FORMAT_R8G8_UINT] = GK104_IMAGE_FORMAT_RG8_UINT,
1275 [PIPE_FORMAT_R32_FLOAT] = GK104_IMAGE_FORMAT_R32_FLOAT,
1276 [PIPE_FORMAT_R32_SINT] = GK104_IMAGE_FORMAT_R32_SINT,
1277 [PIPE_FORMAT_R32_UINT] = GK104_IMAGE_FORMAT_R32_UINT,
1278 [PIPE_FORMAT_R16_FLOAT] = GK104_IMAGE_FORMAT_R16_FLOAT,
1279 [PIPE_FORMAT_R16_UNORM] = GK104_IMAGE_FORMAT_R16_UNORM,
1280 [PIPE_FORMAT_R16_SNORM] = GK104_IMAGE_FORMAT_R16_SNORM,
1281 [PIPE_FORMAT_R16_SINT] = GK104_IMAGE_FORMAT_R16_SINT,
1282 [PIPE_FORMAT_R16_UINT] = GK104_IMAGE_FORMAT_R16_UINT,
1283 [PIPE_FORMAT_R8_UNORM] = GK104_IMAGE_FORMAT_R8_UNORM,
1284 [PIPE_FORMAT_R8_SNORM] = GK104_IMAGE_FORMAT_R8_SNORM,
1285 [PIPE_FORMAT_R8_SINT] = GK104_IMAGE_FORMAT_R8_SINT,
1286 [PIPE_FORMAT_R8_UINT] = GK104_IMAGE_FORMAT_R8_UINT,
1287 };
1288
1289 /* Auxiliary format description values for surface instructions.
1290 * (log2(bytes per pixel) << 12) | (unk8 << 8) | unk22
1291 */
1292 static const uint16_t nve4_su_format_aux_map[PIPE_FORMAT_COUNT] =
1293 {
1294 [PIPE_FORMAT_R32G32B32A32_FLOAT] = 0x4842,
1295 [PIPE_FORMAT_R32G32B32A32_SINT] = 0x4842,
1296 [PIPE_FORMAT_R32G32B32A32_UINT] = 0x4842,
1297
1298 [PIPE_FORMAT_R16G16B16A16_UNORM] = 0x3933,
1299 [PIPE_FORMAT_R16G16B16A16_SNORM] = 0x3933,
1300 [PIPE_FORMAT_R16G16B16A16_SINT] = 0x3933,
1301 [PIPE_FORMAT_R16G16B16A16_UINT] = 0x3933,
1302 [PIPE_FORMAT_R16G16B16A16_FLOAT] = 0x3933,
1303
1304 [PIPE_FORMAT_R32G32_FLOAT] = 0x3433,
1305 [PIPE_FORMAT_R32G32_SINT] = 0x3433,
1306 [PIPE_FORMAT_R32G32_UINT] = 0x3433,
1307
1308 [PIPE_FORMAT_R10G10B10A2_UNORM] = 0x2a24,
1309 [PIPE_FORMAT_R10G10B10A2_UINT] = 0x2a24,
1310 [PIPE_FORMAT_B8G8R8A8_UNORM] = 0x2a24,
1311 [PIPE_FORMAT_R8G8B8A8_UNORM] = 0x2a24,
1312 [PIPE_FORMAT_R8G8B8A8_SNORM] = 0x2a24,
1313 [PIPE_FORMAT_R8G8B8A8_SINT] = 0x2a24,
1314 [PIPE_FORMAT_R8G8B8A8_UINT] = 0x2a24,
1315 [PIPE_FORMAT_R11G11B10_FLOAT] = 0x2a24,
1316
1317 [PIPE_FORMAT_R16G16_UNORM] = 0x2524,
1318 [PIPE_FORMAT_R16G16_SNORM] = 0x2524,
1319 [PIPE_FORMAT_R16G16_SINT] = 0x2524,
1320 [PIPE_FORMAT_R16G16_UINT] = 0x2524,
1321 [PIPE_FORMAT_R16G16_FLOAT] = 0x2524,
1322
1323 [PIPE_FORMAT_R32_SINT] = 0x2024,
1324 [PIPE_FORMAT_R32_UINT] = 0x2024,
1325 [PIPE_FORMAT_R32_FLOAT] = 0x2024,
1326
1327 [PIPE_FORMAT_R8G8_UNORM] = 0x1615,
1328 [PIPE_FORMAT_R8G8_SNORM] = 0x1615,
1329 [PIPE_FORMAT_R8G8_SINT] = 0x1615,
1330 [PIPE_FORMAT_R8G8_UINT] = 0x1615,
1331
1332 [PIPE_FORMAT_R16_UNORM] = 0x1115,
1333 [PIPE_FORMAT_R16_SNORM] = 0x1115,
1334 [PIPE_FORMAT_R16_SINT] = 0x1115,
1335 [PIPE_FORMAT_R16_UINT] = 0x1115,
1336 [PIPE_FORMAT_R16_FLOAT] = 0x1115,
1337
1338 [PIPE_FORMAT_R8_UNORM] = 0x0206,
1339 [PIPE_FORMAT_R8_SNORM] = 0x0206,
1340 [PIPE_FORMAT_R8_SINT] = 0x0206,
1341 [PIPE_FORMAT_R8_UINT] = 0x0206
1342 };
1343
1344 /* NOTE: These are hardcoded offsets for the shader library.
1345 * TODO: Automate them.
1346 */
1347 static const uint16_t nve4_suldp_lib_offset[PIPE_FORMAT_COUNT] =
1348 {
1349 [PIPE_FORMAT_R32G32B32A32_FLOAT] = 0x218,
1350 [PIPE_FORMAT_R32G32B32A32_SINT] = 0x218,
1351 [PIPE_FORMAT_R32G32B32A32_UINT] = 0x218,
1352 [PIPE_FORMAT_R16G16B16A16_UNORM] = 0x248,
1353 [PIPE_FORMAT_R16G16B16A16_SNORM] = 0x2b8,
1354 [PIPE_FORMAT_R16G16B16A16_SINT] = 0x330,
1355 [PIPE_FORMAT_R16G16B16A16_UINT] = 0x388,
1356 [PIPE_FORMAT_R16G16B16A16_FLOAT] = 0x3d8,
1357 [PIPE_FORMAT_R32G32_FLOAT] = 0x428,
1358 [PIPE_FORMAT_R32G32_SINT] = 0x468,
1359 [PIPE_FORMAT_R32G32_UINT] = 0x468,
1360 [PIPE_FORMAT_R10G10B10A2_UNORM] = 0x4a8,
1361 [PIPE_FORMAT_R10G10B10A2_UINT] = 0x530,
1362 [PIPE_FORMAT_R8G8B8A8_UNORM] = 0x588,
1363 [PIPE_FORMAT_R8G8B8A8_SNORM] = 0x5f8,
1364 [PIPE_FORMAT_R8G8B8A8_SINT] = 0x670,
1365 [PIPE_FORMAT_R8G8B8A8_UINT] = 0x6c8,
1366 [PIPE_FORMAT_B5G6R5_UNORM] = 0x718,
1367 [PIPE_FORMAT_B5G5R5X1_UNORM] = 0x7a0,
1368 [PIPE_FORMAT_R16G16_UNORM] = 0x828,
1369 [PIPE_FORMAT_R16G16_SNORM] = 0x890,
1370 [PIPE_FORMAT_R16G16_SINT] = 0x8f0,
1371 [PIPE_FORMAT_R16G16_UINT] = 0x948,
1372 [PIPE_FORMAT_R16G16_FLOAT] = 0x998,
1373 [PIPE_FORMAT_R32_FLOAT] = 0x9e8,
1374 [PIPE_FORMAT_R32_SINT] = 0xa30,
1375 [PIPE_FORMAT_R32_UINT] = 0xa30,
1376 [PIPE_FORMAT_R8G8_UNORM] = 0xa78,
1377 [PIPE_FORMAT_R8G8_SNORM] = 0xae0,
1378 [PIPE_FORMAT_R8G8_UINT] = 0xb48,
1379 [PIPE_FORMAT_R8G8_SINT] = 0xb98,
1380 [PIPE_FORMAT_R16_UNORM] = 0xbe8,
1381 [PIPE_FORMAT_R16_SNORM] = 0xc48,
1382 [PIPE_FORMAT_R16_SINT] = 0xca0,
1383 [PIPE_FORMAT_R16_UINT] = 0xce8,
1384 [PIPE_FORMAT_R16_FLOAT] = 0xd30,
1385 [PIPE_FORMAT_R8_UNORM] = 0xd88,
1386 [PIPE_FORMAT_R8_SNORM] = 0xde0,
1387 [PIPE_FORMAT_R8_SINT] = 0xe38,
1388 [PIPE_FORMAT_R8_UINT] = 0xe88,
1389 [PIPE_FORMAT_R11G11B10_FLOAT] = 0xed0
1390 };