--- /dev/null
+/* { dg-do compile } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O3 -msse2" } */
+
+typedef long v2di __attribute__((vector_size(16)));
+typedef int v4si __attribute__((vector_size(16)));
+
+void foo (v4si *p, v2di *q)
+{
+  union { v2di a; v4si b; } u;
+  u.a = *q;
+  (*p)[0] = u.b[0];
+  (*p)[1] = u.b[2];
+  (*p)[2] = u.b[1];
+  (*p)[3] = u.b[3];
+}
+
+void bar (v4si *p, __int128_t *q)
+{
+  union { __int128_t a; v4si b; } u;
+  u.a = *q;
+  (*p)[0] = u.b[1];
+  (*p)[1] = u.b[2];
+  (*p)[2] = u.b[1];
+  (*p)[3] = u.b[3];
+}
+
+/* Both functions should end up with sth like
+     [v]pshufd $val, (%esi), %xmm0
+     [v]movdqa %xmm0, (%edi)
+     ret
+   recognized by SLP vectorization involving an existing "vector".  */
+/* { dg-final { scan-assembler-not "punpck" } } */
+/* { dg-final { scan-assembler-times "pshufd" 2 } } */
 
              tree vec = TREE_OPERAND (gimple_assign_rhs1 (stmt), 0);
              if (!is_a <bb_vec_info> (vinfo)
                  || TREE_CODE (vec) != SSA_NAME
-                 || !types_compatible_p (vectype, TREE_TYPE (vec)))
+                 || !operand_equal_p (TYPE_SIZE (vectype),
+                                      TYPE_SIZE (TREE_TYPE (vec))))
                {
                  if (dump_enabled_p ())
                    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
          lperm.safe_push (std::make_pair (0, (unsigned)lane));
        }
       slp_tree vnode = vect_create_new_slp_node (vNULL);
-      SLP_TREE_VECTYPE (vnode) = TREE_TYPE (vec);
+      /* ???  We record vectype here but we hide eventually necessary
+        punning and instead rely on code generation to materialize
+        VIEW_CONVERT_EXPRs as necessary.  We instead should make
+        this explicit somehow.  */
+      SLP_TREE_VECTYPE (vnode) = vectype;
       SLP_TREE_VEC_DEFS (vnode).safe_push (vec);
       /* We are always building a permutation node even if it is an identity
         permute to shield the rest of the vectorizer from the odd node
              slp_tree first_node = SLP_TREE_CHILDREN (node)[first_vec.first];
              tree first_def
                = vect_get_slp_vect_def (first_node, first_vec.second);
+             /* ???  We SLP match existing vector element extracts but
+                allow punning which we need to re-instantiate at uses
+                but have no good way of explicitely representing.  */
+             if (!types_compatible_p (TREE_TYPE (first_def), vectype))
+               {
+                 gassign *conv_stmt;
+                 conv_stmt = gimple_build_assign (make_ssa_name (vectype),
+                                                  build1 (VIEW_CONVERT_EXPR,
+                                                          vectype, first_def));
+                 vect_finish_stmt_generation (vinfo, NULL, conv_stmt, gsi);
+                 first_def = gimple_assign_lhs (conv_stmt);
+               }
              gassign *perm_stmt;
              tree perm_dest = make_ssa_name (vectype);
              if (!identity_p)
                    = SLP_TREE_CHILDREN (node)[second_vec.first];
                  tree second_def
                    = vect_get_slp_vect_def (second_node, second_vec.second);
+                 if (!types_compatible_p (TREE_TYPE (second_def), vectype))
+                   {
+                     gassign *conv_stmt;
+                     conv_stmt = gimple_build_assign (make_ssa_name (vectype),
+                                                      build1
+                                                        (VIEW_CONVERT_EXPR,
+                                                         vectype, second_def));
+                     vect_finish_stmt_generation (vinfo, NULL, conv_stmt, gsi);
+                     second_def = gimple_assign_lhs (conv_stmt);
+                   }
                  tree mask_vec = vect_gen_perm_mask_checked (vectype, indices);
                  perm_stmt = gimple_build_assign (perm_dest, VEC_PERM_EXPR,
                                                   first_def, second_def,