src/gallium/drivers/vc4/kernel/vc4_validate.c

   1 /*
   2  * Copyright © 2014 Broadcom
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 /**
  25  * Command list validator for VC4.
  26  *
  27  * The VC4 has no IOMMU between it and system memory.  So, a user with
  28  * access to execute command lists could escalate privilege by
  29  * overwriting system memory (drawing to it as a framebuffer) or
  30  * reading system memory it shouldn't (reading it as a texture, or
  31  * uniform data, or vertex data).
  32  *
  33  * This validates command lists to ensure that all accesses are within
  34  * the bounds of the GEM objects referenced.  It explicitly whitelists
  35  * packets, and looks at the offsets in any address fields to make
  36  * sure they're constrained within the BOs they reference.
  37  *
  38  * Note that because of the validation that's happening anyway, this
  39  * is where GEM relocation processing happens.
  40  */
  41
  42 #include "vc4_drv.h"
  43 #include "vc4_packet.h"
  44
  45 #define VALIDATE_ARGS \
  46         struct vc4_exec_info *exec,                     \
  47         void *validated,                                \
  48         void *untrusted
  49
  50 /** Return the width in pixels of a 64-byte microtile. */
  51 static uint32_t
  52 utile_width(int cpp)
  53 {
  54         switch (cpp) {
  55         case 1:
  56         case 2:
  57                 return 8;
  58         case 4:
  59                 return 4;
  60         case 8:
  61                 return 2;
  62         default:
  63                 DRM_ERROR("unknown cpp: %d\n", cpp);
  64                 return 1;
  65         }
  66 }
  67
  68 /** Return the height in pixels of a 64-byte microtile. */
  69 static uint32_t
  70 utile_height(int cpp)
  71 {
  72         switch (cpp) {
  73         case 1:
  74                 return 8;
  75         case 2:
  76         case 4:
  77         case 8:
  78                 return 4;
  79         default:
  80                 DRM_ERROR("unknown cpp: %d\n", cpp);
  81                 return 1;
  82         }
  83 }
  84
  85 /**
  86  * The texture unit decides what tiling format a particular miplevel is using
  87  * this function, so we lay out our miptrees accordingly.
  88  */
  89 static bool
  90 size_is_lt(uint32_t width, uint32_t height, int cpp)
  91 {
  92         return (width <= 4 * utile_width(cpp) ||
  93                 height <= 4 * utile_height(cpp));
  94 }
  95
  96 struct drm_gem_cma_object *
  97 vc4_use_bo(struct vc4_exec_info *exec, uint32_t hindex)
  98 {
  99         struct drm_gem_cma_object *obj;
 100         struct drm_vc4_bo *bo;
 101
 102         if (hindex >= exec->bo_count) {
 103                 DRM_ERROR("BO index %d greater than BO count %d\n",
 104                           hindex, exec->bo_count);
 105                 return NULL;
 106         }
 107         obj = exec->bo[hindex];
 108         bo = to_vc4_bo(&obj->base);
 109
 110         if (bo->validated_shader) {
 111                 DRM_ERROR("Trying to use shader BO as something other than "
 112                           "a shader\n");
 113                 return NULL;
 114         }
 115
 116         return obj;
 117 }
 118
 119 static struct drm_gem_cma_object *
 120 vc4_use_handle(struct vc4_exec_info *exec, uint32_t gem_handles_packet_index)
 121 {
 122         return vc4_use_bo(exec, exec->bo_index[gem_handles_packet_index]);
 123 }
 124
 125 static bool
 126 validate_bin_pos(struct vc4_exec_info *exec, void *untrusted, uint32_t pos)
 127 {
 128         /* Note that the untrusted pointer passed to these functions is
 129          * incremented past the packet byte.
 130          */
 131         return (untrusted - 1 == exec->bin_u + pos);
 132 }
 133
 134 static uint32_t
 135 gl_shader_rec_size(uint32_t pointer_bits)
 136 {
 137         uint32_t attribute_count = pointer_bits & 7;
 138         bool extended = pointer_bits & 8;
 139
 140         if (attribute_count == 0)
 141                 attribute_count = 8;
 142
 143         if (extended)
 144                 return 100 + attribute_count * 4;
 145         else
 146                 return 36 + attribute_count * 8;
 147 }
 148
 149 bool
 150 vc4_check_tex_size(struct vc4_exec_info *exec, struct drm_gem_cma_object *fbo,
 151                    uint32_t offset, uint8_t tiling_format,
 152                    uint32_t width, uint32_t height, uint8_t cpp)
 153 {
 154         uint32_t aligned_width, aligned_height, stride, size;
 155         uint32_t utile_w = utile_width(cpp);
 156         uint32_t utile_h = utile_height(cpp);
 157
 158         /* The shaded vertex format stores signed 12.4 fixed point
 159          * (-2048,2047) offsets from the viewport center, so we should
 160          * never have a render target larger than 4096.  The texture
 161          * unit can only sample from 2048x2048, so it's even more
 162          * restricted.  This lets us avoid worrying about overflow in
 163          * our math.
 164          */
 165         if (width > 4096 || height > 4096) {
 166                 DRM_ERROR("Surface dimesions (%d,%d) too large", width, height);
 167                 return false;
 168         }
 169
 170         switch (tiling_format) {
 171         case VC4_TILING_FORMAT_LINEAR:
 172                 aligned_width = round_up(width, utile_w);
 173                 aligned_height = height;
 174                 break;
 175         case VC4_TILING_FORMAT_T:
 176                 aligned_width = round_up(width, utile_w * 8);
 177                 aligned_height = round_up(height, utile_h * 8);
 178                 break;
 179         case VC4_TILING_FORMAT_LT:
 180                 aligned_width = round_up(width, utile_w);
 181                 aligned_height = round_up(height, utile_h);
 182                 break;
 183         default:
 184                 DRM_ERROR("buffer tiling %d unsupported\n", tiling_format);
 185                 return false;
 186         }
 187
 188         stride = aligned_width * cpp;
 189         size = stride * aligned_height;
 190
 191         if (size + offset < size ||
 192             size + offset > fbo->base.size) {
 193                 DRM_ERROR("Overflow in %dx%d (%dx%d) fbo size (%d + %d > %zd)\n",
 194                           width, height,
 195                           aligned_width, aligned_height,
 196                           size, offset, fbo->base.size);
 197                 return false;
 198         }
 199
 200         return true;
 201 }
 202
 203 static int
 204 validate_flush(VALIDATE_ARGS)
 205 {
 206         if (!validate_bin_pos(exec, untrusted, exec->args->bin_cl_size - 1)) {
 207                 DRM_ERROR("Bin CL must end with VC4_PACKET_FLUSH\n");
 208                 return -EINVAL;
 209         }
 210         exec->found_flush = true;
 211
 212         return 0;
 213 }
 214
 215 static int
 216 validate_start_tile_binning(VALIDATE_ARGS)
 217 {
 218         if (exec->found_start_tile_binning_packet) {
 219                 DRM_ERROR("Duplicate VC4_PACKET_START_TILE_BINNING\n");
 220                 return -EINVAL;
 221         }
 222         exec->found_start_tile_binning_packet = true;
 223
 224         if (!exec->found_tile_binning_mode_config_packet) {
 225                 DRM_ERROR("missing VC4_PACKET_TILE_BINNING_MODE_CONFIG\n");
 226                 return -EINVAL;
 227         }
 228
 229         return 0;
 230 }
 231
 232 static int
 233 validate_increment_semaphore(VALIDATE_ARGS)
 234 {
 235         if (!validate_bin_pos(exec, untrusted, exec->args->bin_cl_size - 2)) {
 236                 DRM_ERROR("Bin CL must end with "
 237                           "VC4_PACKET_INCREMENT_SEMAPHORE\n");
 238                 return -EINVAL;
 239         }
 240         exec->found_increment_semaphore_packet = true;
 241
 242         return 0;
 243 }
 244
 245 static int
 246 validate_indexed_prim_list(VALIDATE_ARGS)
 247 {
 248         struct drm_gem_cma_object *ib;
 249         uint32_t length = *(uint32_t *)(untrusted + 1);
 250         uint32_t offset = *(uint32_t *)(untrusted + 5);
 251         uint32_t max_index = *(uint32_t *)(untrusted + 9);
 252         uint32_t index_size = (*(uint8_t *)(untrusted + 0) >> 4) ? 2 : 1;
 253         struct vc4_shader_state *shader_state;
 254
 255         /* Check overflow condition */
 256         if (exec->shader_state_count == 0) {
 257                 DRM_ERROR("shader state must precede primitives\n");
 258                 return -EINVAL;
 259         }
 260         shader_state = &exec->shader_state[exec->shader_state_count - 1];
 261
 262         if (max_index > shader_state->max_index)
 263                 shader_state->max_index = max_index;
 264
 265         ib = vc4_use_handle(exec, 0);
 266         if (!ib)
 267                 return -EINVAL;
 268
 269         if (offset > ib->base.size ||
 270             (ib->base.size - offset) / index_size < length) {
 271                 DRM_ERROR("IB access overflow (%d + %d*%d > %zd)\n",
 272                           offset, length, index_size, ib->base.size);
 273                 return -EINVAL;
 274         }
 275
 276         *(uint32_t *)(validated + 5) = ib->paddr + offset;
 277
 278         return 0;
 279 }
 280
 281 static int
 282 validate_gl_array_primitive(VALIDATE_ARGS)
 283 {
 284         uint32_t length = *(uint32_t *)(untrusted + 1);
 285         uint32_t base_index = *(uint32_t *)(untrusted + 5);
 286         uint32_t max_index;
 287         struct vc4_shader_state *shader_state;
 288
 289         /* Check overflow condition */
 290         if (exec->shader_state_count == 0) {
 291                 DRM_ERROR("shader state must precede primitives\n");
 292                 return -EINVAL;
 293         }
 294         shader_state = &exec->shader_state[exec->shader_state_count - 1];
 295
 296         if (length + base_index < length) {
 297                 DRM_ERROR("primitive vertex count overflow\n");
 298                 return -EINVAL;
 299         }
 300         max_index = length + base_index - 1;
 301
 302         if (max_index > shader_state->max_index)
 303                 shader_state->max_index = max_index;
 304
 305         return 0;
 306 }
 307
 308 static int
 309 validate_gl_shader_state(VALIDATE_ARGS)
 310 {
 311         uint32_t i = exec->shader_state_count++;
 312
 313         if (i >= exec->shader_state_size) {
 314                 DRM_ERROR("More requests for shader states than declared\n");
 315                 return -EINVAL;
 316         }
 317
 318         exec->shader_state[i].addr = *(uint32_t *)untrusted;
 319         exec->shader_state[i].max_index = 0;
 320
 321         if (exec->shader_state[i].addr & ~0xf) {
 322                 DRM_ERROR("high bits set in GL shader rec reference\n");
 323                 return -EINVAL;
 324         }
 325
 326         *(uint32_t *)validated = (exec->shader_rec_p +
 327                                   exec->shader_state[i].addr);
 328
 329         exec->shader_rec_p +=
 330                 roundup(gl_shader_rec_size(exec->shader_state[i].addr), 16);
 331
 332         return 0;
 333 }
 334
 335 static int
 336 validate_tile_binning_config(VALIDATE_ARGS)
 337 {
 338         struct drm_device *dev = exec->exec_bo->base.dev;
 339         uint8_t flags;
 340         uint32_t tile_state_size, tile_alloc_size;
 341         uint32_t tile_count;
 342
 343         if (exec->found_tile_binning_mode_config_packet) {
 344                 DRM_ERROR("Duplicate VC4_PACKET_TILE_BINNING_MODE_CONFIG\n");
 345                 return -EINVAL;
 346         }
 347         exec->found_tile_binning_mode_config_packet = true;
 348
 349         exec->bin_tiles_x = *(uint8_t *)(untrusted + 12);
 350         exec->bin_tiles_y = *(uint8_t *)(untrusted + 13);
 351         tile_count = exec->bin_tiles_x * exec->bin_tiles_y;
 352         flags = *(uint8_t *)(untrusted + 14);
 353
 354         if (exec->bin_tiles_x == 0 ||
 355             exec->bin_tiles_y == 0) {
 356                 DRM_ERROR("Tile binning config of %dx%d too small\n",
 357                           exec->bin_tiles_x, exec->bin_tiles_y);
 358                 return -EINVAL;
 359         }
 360
 361         if (flags & (VC4_BIN_CONFIG_DB_NON_MS |
 362                      VC4_BIN_CONFIG_TILE_BUFFER_64BIT)) {
 363                 DRM_ERROR("unsupported binning config flags 0x%02x\n", flags);
 364                 return -EINVAL;
 365         }
 366
 367         /* The tile state data array is 48 bytes per tile, and we put it at
 368          * the start of a BO containing both it and the tile alloc.
 369          */
 370         tile_state_size = 48 * tile_count;
 371
 372         /* Since the tile alloc array will follow us, align. */
 373         exec->tile_alloc_offset = roundup(tile_state_size, 4096);
 374
 375         *(uint8_t *)(validated + 14) =
 376                 ((flags & ~(VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_MASK |
 377                             VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_MASK)) |
 378                  VC4_BIN_CONFIG_AUTO_INIT_TSDA |
 379                  VC4_SET_FIELD(VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_32,
 380                                VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE) |
 381                  VC4_SET_FIELD(VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_128,
 382                                VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE));
 383
 384         /* Initial block size. */
 385         tile_alloc_size = 32 * tile_count;
 386
 387         /*
 388          * The initial allocation gets rounded to the next 256 bytes before
 389          * the hardware starts fulfilling further allocations.
 390          */
 391         tile_alloc_size = roundup(tile_alloc_size, 256);
 392
 393         /* Add space for the extra allocations.  This is what gets used first,
 394          * before overflow memory.  It must have at least 4096 bytes, but we
 395          * want to avoid overflow memory usage if possible.
 396          */
 397         tile_alloc_size += 1024 * 1024;
 398
 399         exec->tile_bo = drm_gem_cma_create(dev, exec->tile_alloc_offset +
 400                                            tile_alloc_size);
 401         if (!exec->tile_bo)
 402                 return -ENOMEM;
 403         list_addtail(&to_vc4_bo(&exec->tile_bo->base)->unref_head,
 404                      &exec->unref_list);
 405
 406         /* tile alloc address. */
 407         *(uint32_t *)(validated + 0) = (exec->tile_bo->paddr +
 408                                         exec->tile_alloc_offset);
 409         /* tile alloc size. */
 410         *(uint32_t *)(validated + 4) = tile_alloc_size;
 411         /* tile state address. */
 412         *(uint32_t *)(validated + 8) = exec->tile_bo->paddr;
 413
 414         return 0;
 415 }
 416
 417 static int
 418 validate_gem_handles(VALIDATE_ARGS)
 419 {
 420         memcpy(exec->bo_index, untrusted, sizeof(exec->bo_index));
 421         return 0;
 422 }
 423
 424 #define VC4_DEFINE_PACKET(packet, func) \
 425         [packet] = { packet ## _SIZE, #packet, func }
 426
 427 static const struct cmd_info {
 428         uint16_t len;
 429         const char *name;
 430         int (*func)(struct vc4_exec_info *exec, void *validated,
 431                     void *untrusted);
 432 } cmd_info[] = {
 433         VC4_DEFINE_PACKET(VC4_PACKET_HALT, NULL),
 434         VC4_DEFINE_PACKET(VC4_PACKET_NOP, NULL),
 435         VC4_DEFINE_PACKET(VC4_PACKET_FLUSH, validate_flush),
 436         VC4_DEFINE_PACKET(VC4_PACKET_FLUSH_ALL, NULL),
 437         VC4_DEFINE_PACKET(VC4_PACKET_START_TILE_BINNING,
 438                           validate_start_tile_binning),
 439         VC4_DEFINE_PACKET(VC4_PACKET_INCREMENT_SEMAPHORE,
 440                           validate_increment_semaphore),
 441
 442         VC4_DEFINE_PACKET(VC4_PACKET_GL_INDEXED_PRIMITIVE,
 443                           validate_indexed_prim_list),
 444         VC4_DEFINE_PACKET(VC4_PACKET_GL_ARRAY_PRIMITIVE,
 445                           validate_gl_array_primitive),
 446
 447         VC4_DEFINE_PACKET(VC4_PACKET_PRIMITIVE_LIST_FORMAT, NULL),
 448
 449         VC4_DEFINE_PACKET(VC4_PACKET_GL_SHADER_STATE, validate_gl_shader_state),
 450
 451         VC4_DEFINE_PACKET(VC4_PACKET_CONFIGURATION_BITS, NULL),
 452         VC4_DEFINE_PACKET(VC4_PACKET_FLAT_SHADE_FLAGS, NULL),
 453         VC4_DEFINE_PACKET(VC4_PACKET_POINT_SIZE, NULL),
 454         VC4_DEFINE_PACKET(VC4_PACKET_LINE_WIDTH, NULL),
 455         VC4_DEFINE_PACKET(VC4_PACKET_RHT_X_BOUNDARY, NULL),
 456         VC4_DEFINE_PACKET(VC4_PACKET_DEPTH_OFFSET, NULL),
 457         VC4_DEFINE_PACKET(VC4_PACKET_CLIP_WINDOW, NULL),
 458         VC4_DEFINE_PACKET(VC4_PACKET_VIEWPORT_OFFSET, NULL),
 459         VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_XY_SCALING, NULL),
 460         /* Note: The docs say this was also 105, but it was 106 in the
 461          * initial userland code drop.
 462          */
 463         VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_Z_SCALING, NULL),
 464
 465         VC4_DEFINE_PACKET(VC4_PACKET_TILE_BINNING_MODE_CONFIG,
 466                           validate_tile_binning_config),
 467
 468         VC4_DEFINE_PACKET(VC4_PACKET_GEM_HANDLES, validate_gem_handles),
 469 };
 470
 471 int
 472 vc4_validate_bin_cl(struct drm_device *dev,
 473                     void *validated,
 474                     void *unvalidated,
 475                     struct vc4_exec_info *exec)
 476 {
 477         uint32_t len = exec->args->bin_cl_size;
 478         uint32_t dst_offset = 0;
 479         uint32_t src_offset = 0;
 480
 481         while (src_offset < len) {
 482                 void *dst_pkt = validated + dst_offset;
 483                 void *src_pkt = unvalidated + src_offset;
 484                 u8 cmd = *(uint8_t *)src_pkt;
 485                 const struct cmd_info *info;
 486
 487                 if (cmd >= ARRAY_SIZE(cmd_info)) {
 488                         DRM_ERROR("0x%08x: packet %d out of bounds\n",
 489                                   src_offset, cmd);
 490                         return -EINVAL;
 491                 }
 492
 493                 info = &cmd_info[cmd];
 494                 if (!info->name) {
 495                         DRM_ERROR("0x%08x: packet %d invalid\n",
 496                                   src_offset, cmd);
 497                         return -EINVAL;
 498                 }
 499
 500                 if (src_offset + info->len > len) {
 501                         DRM_ERROR("0x%08x: packet %d (%s) length 0x%08x "
 502                                   "exceeds bounds (0x%08x)\n",
 503                                   src_offset, cmd, info->name, info->len,
 504                                   src_offset + len);
 505                         return -EINVAL;
 506                 }
 507
 508                 if (cmd != VC4_PACKET_GEM_HANDLES)
 509                         memcpy(dst_pkt, src_pkt, info->len);
 510
 511                 if (info->func && info->func(exec,
 512                                              dst_pkt + 1,
 513                                              src_pkt + 1)) {
 514                         DRM_ERROR("0x%08x: packet %d (%s) failed to validate\n",
 515                                   src_offset, cmd, info->name);
 516                         return -EINVAL;
 517                 }
 518
 519                 src_offset += info->len;
 520                 /* GEM handle loading doesn't produce HW packets. */
 521                 if (cmd != VC4_PACKET_GEM_HANDLES)
 522                         dst_offset += info->len;
 523
 524                 /* When the CL hits halt, it'll stop reading anything else. */
 525                 if (cmd == VC4_PACKET_HALT)
 526                         break;
 527         }
 528
 529         exec->ct0ea = exec->ct0ca + dst_offset;
 530
 531         if (!exec->found_start_tile_binning_packet) {
 532                 DRM_ERROR("Bin CL missing VC4_PACKET_START_TILE_BINNING\n");
 533                 return -EINVAL;
 534         }
 535
 536         /* The bin CL must be ended with INCREMENT_SEMAPHORE and FLUSH.  The
 537          * semaphore is used to trigger the render CL to start up, and the
 538          * FLUSH is what caps the bin lists with
 539          * VC4_PACKET_RETURN_FROM_SUB_LIST (so they jump back to the main
 540          * render CL when they get called to) and actually triggers the queued
 541          * semaphore increment.
 542          */
 543         if (!exec->found_increment_semaphore_packet || !exec->found_flush) {
 544                 DRM_ERROR("Bin CL missing VC4_PACKET_INCREMENT_SEMAPHORE + "
 545                           "VC4_PACKET_FLUSH\n");
 546                 return -EINVAL;
 547         }
 548
 549         return 0;
 550 }
 551
 552 static bool
 553 reloc_tex(struct vc4_exec_info *exec,
 554           void *uniform_data_u,
 555           struct vc4_texture_sample_info *sample,
 556           uint32_t texture_handle_index)
 557
 558 {
 559         struct drm_gem_cma_object *tex;
 560         uint32_t p0 = *(uint32_t *)(uniform_data_u + sample->p_offset[0]);
 561         uint32_t p1 = *(uint32_t *)(uniform_data_u + sample->p_offset[1]);
 562         uint32_t p2 = (sample->p_offset[2] != ~0 ?
 563                        *(uint32_t *)(uniform_data_u + sample->p_offset[2]) : 0);
 564         uint32_t p3 = (sample->p_offset[3] != ~0 ?
 565                        *(uint32_t *)(uniform_data_u + sample->p_offset[3]) : 0);
 566         uint32_t *validated_p0 = exec->uniforms_v + sample->p_offset[0];
 567         uint32_t offset = p0 & VC4_TEX_P0_OFFSET_MASK;
 568         uint32_t miplevels = VC4_GET_FIELD(p0, VC4_TEX_P0_MIPLVLS);
 569         uint32_t width = VC4_GET_FIELD(p1, VC4_TEX_P1_WIDTH);
 570         uint32_t height = VC4_GET_FIELD(p1, VC4_TEX_P1_HEIGHT);
 571         uint32_t cpp, tiling_format, utile_w, utile_h;
 572         uint32_t i;
 573         uint32_t cube_map_stride = 0;
 574         enum vc4_texture_data_type type;
 575
 576         tex = vc4_use_bo(exec, texture_handle_index);
 577         if (!tex)
 578                 return false;
 579
 580         if (sample->is_direct) {
 581                 uint32_t remaining_size = tex->base.size - p0;
 582
 583                 if (p0 > tex->base.size - 4) {
 584                         DRM_ERROR("UBO offset greater than UBO size\n");
 585                         goto fail;
 586                 }
 587                 if (p1 > remaining_size - 4) {
 588                         DRM_ERROR("UBO clamp would allow reads "
 589                                   "outside of UBO\n");
 590                         goto fail;
 591                 }
 592                 *validated_p0 = tex->paddr + p0;
 593                 return true;
 594         }
 595
 596         if (width == 0)
 597                 width = 2048;
 598         if (height == 0)
 599                 height = 2048;
 600
 601         if (p0 & VC4_TEX_P0_CMMODE_MASK) {
 602                 if (VC4_GET_FIELD(p2, VC4_TEX_P2_PTYPE) ==
 603                     VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE)
 604                         cube_map_stride = p2 & VC4_TEX_P2_CMST_MASK;
 605                 if (VC4_GET_FIELD(p3, VC4_TEX_P2_PTYPE) ==
 606                     VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE) {
 607                         if (cube_map_stride) {
 608                                 DRM_ERROR("Cube map stride set twice\n");
 609                                 goto fail;
 610                         }
 611
 612                         cube_map_stride = p3 & VC4_TEX_P2_CMST_MASK;
 613                 }
 614                 if (!cube_map_stride) {
 615                         DRM_ERROR("Cube map stride not set\n");
 616                         goto fail;
 617                 }
 618         }
 619
 620         type = (VC4_GET_FIELD(p0, VC4_TEX_P0_TYPE) |
 621                 (VC4_GET_FIELD(p1, VC4_TEX_P1_TYPE4) << 4));
 622
 623         switch (type) {
 624         case VC4_TEXTURE_TYPE_RGBA8888:
 625         case VC4_TEXTURE_TYPE_RGBX8888:
 626         case VC4_TEXTURE_TYPE_RGBA32R:
 627                 cpp = 4;
 628                 break;
 629         case VC4_TEXTURE_TYPE_RGBA4444:
 630         case VC4_TEXTURE_TYPE_RGBA5551:
 631         case VC4_TEXTURE_TYPE_RGB565:
 632         case VC4_TEXTURE_TYPE_LUMALPHA:
 633         case VC4_TEXTURE_TYPE_S16F:
 634         case VC4_TEXTURE_TYPE_S16:
 635                 cpp = 2;
 636                 break;
 637         case VC4_TEXTURE_TYPE_LUMINANCE:
 638         case VC4_TEXTURE_TYPE_ALPHA:
 639         case VC4_TEXTURE_TYPE_S8:
 640                 cpp = 1;
 641                 break;
 642         case VC4_TEXTURE_TYPE_ETC1:
 643                 /* ETC1 is arranged as 64-bit blocks, where each block is 4x4
 644                  * pixels.
 645                  */
 646                 cpp = 8;
 647                 width = (width + 3) >> 2;
 648                 height = (height + 3) >> 2;
 649                 break;
 650         case VC4_TEXTURE_TYPE_BW1:
 651         case VC4_TEXTURE_TYPE_A4:
 652         case VC4_TEXTURE_TYPE_A1:
 653         case VC4_TEXTURE_TYPE_RGBA64:
 654         case VC4_TEXTURE_TYPE_YUV422R:
 655         default:
 656                 DRM_ERROR("Texture format %d unsupported\n", type);
 657                 goto fail;
 658         }
 659         utile_w = utile_width(cpp);
 660         utile_h = utile_height(cpp);
 661
 662         if (type == VC4_TEXTURE_TYPE_RGBA32R) {
 663                 tiling_format = VC4_TILING_FORMAT_LINEAR;
 664         } else {
 665                 if (size_is_lt(width, height, cpp))
 666                         tiling_format = VC4_TILING_FORMAT_LT;
 667                 else
 668                         tiling_format = VC4_TILING_FORMAT_T;
 669         }
 670
 671         if (!vc4_check_tex_size(exec, tex, offset + cube_map_stride * 5,
 672                                 tiling_format, width, height, cpp)) {
 673                 goto fail;
 674         }
 675
 676         /* The mipmap levels are stored before the base of the texture.  Make
 677          * sure there is actually space in the BO.
 678          */
 679         for (i = 1; i <= miplevels; i++) {
 680                 uint32_t level_width = max(width >> i, 1u);
 681                 uint32_t level_height = max(height >> i, 1u);
 682                 uint32_t aligned_width, aligned_height;
 683                 uint32_t level_size;
 684
 685                 /* Once the levels get small enough, they drop from T to LT. */
 686                 if (tiling_format == VC4_TILING_FORMAT_T &&
 687                     size_is_lt(level_width, level_height, cpp)) {
 688                         tiling_format = VC4_TILING_FORMAT_LT;
 689                 }
 690
 691                 switch (tiling_format) {
 692                 case VC4_TILING_FORMAT_T:
 693                         aligned_width = round_up(level_width, utile_w * 8);
 694                         aligned_height = round_up(level_height, utile_h * 8);
 695                         break;
 696                 case VC4_TILING_FORMAT_LT:
 697                         aligned_width = round_up(level_width, utile_w);
 698                         aligned_height = round_up(level_height, utile_h);
 699                         break;
 700                 default:
 701                         aligned_width = round_up(level_width, utile_w);
 702                         aligned_height = level_height;
 703                         break;
 704                 }
 705
 706                 level_size = aligned_width * cpp * aligned_height;
 707
 708                 if (offset < level_size) {
 709                         DRM_ERROR("Level %d (%dx%d -> %dx%d) size %db "
 710                                   "overflowed buffer bounds (offset %d)\n",
 711                                   i, level_width, level_height,
 712                                   aligned_width, aligned_height,
 713                                   level_size, offset);
 714                         goto fail;
 715                 }
 716
 717                 offset -= level_size;
 718         }
 719
 720         *validated_p0 = tex->paddr + p0;
 721
 722         return true;
 723  fail:
 724         DRM_INFO("Texture p0 at %d: 0x%08x\n", sample->p_offset[0], p0);
 725         DRM_INFO("Texture p1 at %d: 0x%08x\n", sample->p_offset[1], p1);
 726         DRM_INFO("Texture p2 at %d: 0x%08x\n", sample->p_offset[2], p2);
 727         DRM_INFO("Texture p3 at %d: 0x%08x\n", sample->p_offset[3], p3);
 728         return false;
 729 }
 730
 731 static int
 732 validate_gl_shader_rec(struct drm_device *dev,
 733                        struct vc4_exec_info *exec,
 734                        struct vc4_shader_state *state)
 735 {
 736         uint32_t *src_handles;
 737         void *pkt_u, *pkt_v;
 738         static const uint32_t shader_reloc_offsets[] = {
 739                 4, /* fs */
 740                 16, /* vs */
 741                 28, /* cs */
 742         };
 743         uint32_t shader_reloc_count = ARRAY_SIZE(shader_reloc_offsets);
 744         struct drm_gem_cma_object *bo[shader_reloc_count + 8];
 745         uint32_t nr_attributes, nr_relocs, packet_size;
 746         int i;
 747
 748         nr_attributes = state->addr & 0x7;
 749         if (nr_attributes == 0)
 750                 nr_attributes = 8;
 751         packet_size = gl_shader_rec_size(state->addr);
 752
 753         nr_relocs = ARRAY_SIZE(shader_reloc_offsets) + nr_attributes;
 754         if (nr_relocs * 4 > exec->shader_rec_size) {
 755                 DRM_ERROR("overflowed shader recs reading %d handles "
 756                           "from %d bytes left\n",
 757                           nr_relocs, exec->shader_rec_size);
 758                 return -EINVAL;
 759         }
 760         src_handles = exec->shader_rec_u;
 761         exec->shader_rec_u += nr_relocs * 4;
 762         exec->shader_rec_size -= nr_relocs * 4;
 763
 764         if (packet_size > exec->shader_rec_size) {
 765                 DRM_ERROR("overflowed shader recs copying %db packet "
 766                           "from %d bytes left\n",
 767                           packet_size, exec->shader_rec_size);
 768                 return -EINVAL;
 769         }
 770         pkt_u = exec->shader_rec_u;
 771         pkt_v = exec->shader_rec_v;
 772         memcpy(pkt_v, pkt_u, packet_size);
 773         exec->shader_rec_u += packet_size;
 774         /* Shader recs have to be aligned to 16 bytes (due to the attribute
 775          * flags being in the low bytes), so round the next validated shader
 776          * rec address up.  This should be safe, since we've got so many
 777          * relocations in a shader rec packet.
 778          */
 779         BUG_ON(roundup(packet_size, 16) - packet_size > nr_relocs * 4);
 780         exec->shader_rec_v += roundup(packet_size, 16);
 781         exec->shader_rec_size -= packet_size;
 782
 783         for (i = 0; i < shader_reloc_count; i++) {
 784                 if (src_handles[i] > exec->bo_count) {
 785                         DRM_ERROR("Shader handle %d too big\n", src_handles[i]);
 786                         return -EINVAL;
 787                 }
 788
 789                 bo[i] = exec->bo[src_handles[i]];
 790                 if (!bo[i])
 791                         return -EINVAL;
 792         }
 793         for (i = shader_reloc_count; i < nr_relocs; i++) {
 794                 bo[i] = vc4_use_bo(exec, src_handles[i]);
 795                 if (!bo[i])
 796                         return -EINVAL;
 797         }
 798
 799         if (((*(uint16_t *)pkt_u & VC4_SHADER_FLAG_FS_SINGLE_THREAD) == 0) !=
 800             to_vc4_bo(&bo[0]->base)->validated_shader->is_threaded) {
 801                 DRM_ERROR("Thread mode of CL and FS do not match\n");
 802                 return -EINVAL;
 803         }
 804
 805         if (to_vc4_bo(&bo[1]->base)->validated_shader->is_threaded ||
 806             to_vc4_bo(&bo[2]->base)->validated_shader->is_threaded) {
 807                 DRM_ERROR("cs and vs cannot be threaded\n");
 808                 return -EINVAL;
 809         }
 810
 811         for (i = 0; i < shader_reloc_count; i++) {
 812                 struct vc4_validated_shader_info *validated_shader;
 813                 uint32_t o = shader_reloc_offsets[i];
 814                 uint32_t src_offset = *(uint32_t *)(pkt_u + o);
 815                 uint32_t *texture_handles_u;
 816                 void *uniform_data_u;
 817                 uint32_t tex, uni;
 818
 819                 *(uint32_t *)(pkt_v + o) = bo[i]->paddr + src_offset;
 820
 821                 if (src_offset != 0) {
 822                         DRM_ERROR("Shaders must be at offset 0 of "
 823                                   "the BO.\n");
 824                         return -EINVAL;
 825                 }
 826
 827                 validated_shader = to_vc4_bo(&bo[i]->base)->validated_shader;
 828                 if (!validated_shader)
 829                         return -EINVAL;
 830
 831                 if (validated_shader->uniforms_src_size >
 832                     exec->uniforms_size) {
 833                         DRM_ERROR("Uniforms src buffer overflow\n");
 834                         return -EINVAL;
 835                 }
 836
 837                 texture_handles_u = exec->uniforms_u;
 838                 uniform_data_u = (texture_handles_u +
 839                                   validated_shader->num_texture_samples);
 840
 841                 memcpy(exec->uniforms_v, uniform_data_u,
 842                        validated_shader->uniforms_size);
 843
 844                 for (tex = 0;
 845                      tex < validated_shader->num_texture_samples;
 846                      tex++) {
 847                         if (!reloc_tex(exec,
 848                                        uniform_data_u,
 849                                        &validated_shader->texture_samples[tex],
 850                                        texture_handles_u[tex])) {
 851                                 return -EINVAL;
 852                         }
 853                 }
 854
 855                 /* Fill in the uniform slots that need this shader's
 856                  * start-of-uniforms address (used for resetting the uniform
 857                  * stream in the presence of control flow).
 858                  */
 859                 for (uni = 0;
 860                      uni < validated_shader->num_uniform_addr_offsets;
 861                      uni++) {
 862                         uint32_t o = validated_shader->uniform_addr_offsets[uni];
 863                         ((uint32_t *)exec->uniforms_v)[o] = exec->uniforms_p;
 864                 }
 865
 866                 *(uint32_t *)(pkt_v + o + 4) = exec->uniforms_p;
 867
 868                 exec->uniforms_u += validated_shader->uniforms_src_size;
 869                 exec->uniforms_v += validated_shader->uniforms_size;
 870                 exec->uniforms_p += validated_shader->uniforms_size;
 871         }
 872
 873         for (i = 0; i < nr_attributes; i++) {
 874                 struct drm_gem_cma_object *vbo =
 875                         bo[ARRAY_SIZE(shader_reloc_offsets) + i];
 876                 uint32_t o = 36 + i * 8;
 877                 uint32_t offset = *(uint32_t *)(pkt_u + o + 0);
 878                 uint32_t attr_size = *(uint8_t *)(pkt_u + o + 4) + 1;
 879                 uint32_t stride = *(uint8_t *)(pkt_u + o + 5);
 880                 uint32_t max_index;
 881
 882                 if (state->addr & 0x8)
 883                         stride |= (*(uint32_t *)(pkt_u + 100 + i * 4)) & ~0xff;
 884
 885                 if (vbo->base.size < offset ||
 886                     vbo->base.size - offset < attr_size) {
 887                         DRM_ERROR("BO offset overflow (%d + %d > %zd)\n",
 888                                   offset, attr_size, vbo->base.size);
 889                         return -EINVAL;
 890                 }
 891
 892                 if (stride != 0) {
 893                         max_index = ((vbo->base.size - offset - attr_size) /
 894                                      stride);
 895                         if (state->max_index > max_index) {
 896                                 DRM_ERROR("primitives use index %d out of "
 897                                           "supplied %d\n",
 898                                           state->max_index, max_index);
 899                                 return -EINVAL;
 900                         }
 901                 }
 902
 903                 *(uint32_t *)(pkt_v + o) = vbo->paddr + offset;
 904         }
 905
 906         return 0;
 907 }
 908
 909 int
 910 vc4_validate_shader_recs(struct drm_device *dev,
 911                          struct vc4_exec_info *exec)
 912 {
 913         uint32_t i;
 914         int ret = 0;
 915
 916         for (i = 0; i < exec->shader_state_count; i++) {
 917                 ret = validate_gl_shader_rec(dev, exec, &exec->shader_state[i]);
 918                 if (ret)
 919                         return ret;
 920         }
 921
 922         return ret;
 923 }