.framebuffer = framebuffer,
         };
 
-        /* Normally, there should be no padding. However, fragment jobs are
-         * shared with 64-bit Bifrost systems, and accordingly there is 4-bytes
-         * of zero padding in between. */
-
         struct panfrost_transfer transfer = panfrost_allocate_transient(batch, sizeof(header) + sizeof(payload));
         memcpy(transfer.cpu, &header, sizeof(header));
         memcpy(transfer.cpu + sizeof(header), &payload, sizeof(payload));
 
                 assert(batch->polygon_list->size >= size);
         } else {
                 /* Create the BO as invisible, as there's no reason to map */
+                size = util_next_power_of_two(size);
 
                 batch->polygon_list = panfrost_batch_create_bo(batch, size,
                                                                PAN_BO_INVISIBLE,
         /* Now that all draws are in, we can finally prepare the
          * FBD for the batch */
 
-        if (batch->framebuffer.gpu) {
+        if (batch->framebuffer.gpu && batch->first_job.gpu) {
                 struct panfrost_context *ctx = batch->ctx;
                 struct pipe_context *gallium = (struct pipe_context *) ctx;
                 struct panfrost_screen *screen = pan_screen(gallium->screen);