verify.cc (pop64): Removed.
[gcc.git] / libjava / verify.cc
1 // defineclass.cc - defining a class from .class format.
2
3 /* Copyright (C) 2001, 2002, 2003 Free Software Foundation
4
5 This file is part of libgcj.
6
7 This software is copyrighted work licensed under the terms of the
8 Libgcj License. Please consult the file "LIBGCJ_LICENSE" for
9 details. */
10
11 // Written by Tom Tromey <tromey@redhat.com>
12
13 // Define VERIFY_DEBUG to enable debugging output.
14
15 #include <config.h>
16
17 #include <jvm.h>
18 #include <gcj/cni.h>
19 #include <java-insns.h>
20 #include <java-interp.h>
21
22 #ifdef INTERPRETER
23
24 #include <java/lang/Class.h>
25 #include <java/lang/VerifyError.h>
26 #include <java/lang/Throwable.h>
27 #include <java/lang/reflect/Modifier.h>
28 #include <java/lang/StringBuffer.h>
29
30 #ifdef VERIFY_DEBUG
31 #include <stdio.h>
32 #endif /* VERIFY_DEBUG */
33
34
35 static void debug_print (const char *fmt, ...)
36 __attribute__ ((format (printf, 1, 2)));
37
38 static inline void
39 debug_print (const char *fmt, ...)
40 {
41 #ifdef VERIFY_DEBUG
42 va_list ap;
43 va_start (ap, fmt);
44 vfprintf (stderr, fmt, ap);
45 va_end (ap);
46 #endif /* VERIFY_DEBUG */
47 }
48
49 class _Jv_BytecodeVerifier
50 {
51 private:
52
53 static const int FLAG_INSN_START = 1;
54 static const int FLAG_BRANCH_TARGET = 2;
55
56 struct state;
57 struct type;
58 struct subr_info;
59 struct subr_entry_info;
60 struct linked_utf8;
61
62 // The current PC.
63 int PC;
64 // The PC corresponding to the start of the current instruction.
65 int start_PC;
66
67 // The current state of the stack, locals, etc.
68 state *current_state;
69
70 // We store the state at branch targets, for merging. This holds
71 // such states.
72 state **states;
73
74 // We keep a linked list of all the PCs which we must reverify.
75 // The link is done using the PC values. This is the head of the
76 // list.
77 int next_verify_pc;
78
79 // We keep some flags for each instruction. The values are the
80 // FLAG_* constants defined above.
81 char *flags;
82
83 // We need to keep track of which instructions can call a given
84 // subroutine. FIXME: this is inefficient. We keep a linked list
85 // of all calling `jsr's at at each jsr target.
86 subr_info **jsr_ptrs;
87
88 // We keep a linked list of entries which map each `ret' instruction
89 // to its unique subroutine entry point. We expect that there won't
90 // be many `ret' instructions, so a linked list is ok.
91 subr_entry_info *entry_points;
92
93 // The bytecode itself.
94 unsigned char *bytecode;
95 // The exceptions.
96 _Jv_InterpException *exception;
97
98 // Defining class.
99 jclass current_class;
100 // This method.
101 _Jv_InterpMethod *current_method;
102
103 // A linked list of utf8 objects we allocate. This is really ugly,
104 // but without this our utf8 objects would be collected.
105 linked_utf8 *utf8_list;
106
107 struct linked_utf8
108 {
109 _Jv_Utf8Const *val;
110 linked_utf8 *next;
111 };
112
113 _Jv_Utf8Const *make_utf8_const (char *s, int len)
114 {
115 _Jv_Utf8Const *val = _Jv_makeUtf8Const (s, len);
116 _Jv_Utf8Const *r = (_Jv_Utf8Const *) _Jv_Malloc (sizeof (_Jv_Utf8Const)
117 + val->length
118 + 1);
119 r->length = val->length;
120 r->hash = val->hash;
121 memcpy (r->data, val->data, val->length + 1);
122
123 linked_utf8 *lu = (linked_utf8 *) _Jv_Malloc (sizeof (linked_utf8));
124 lu->val = r;
125 lu->next = utf8_list;
126 utf8_list = lu;
127
128 return r;
129 }
130
131 __attribute__ ((__noreturn__)) void verify_fail (char *s, jint pc = -1)
132 {
133 using namespace java::lang;
134 StringBuffer *buf = new StringBuffer ();
135
136 buf->append (JvNewStringLatin1 ("verification failed"));
137 if (pc == -1)
138 pc = start_PC;
139 if (pc != -1)
140 {
141 buf->append (JvNewStringLatin1 (" at PC "));
142 buf->append (pc);
143 }
144
145 _Jv_InterpMethod *method = current_method;
146 buf->append (JvNewStringLatin1 (" in "));
147 buf->append (current_class->getName());
148 buf->append ((jchar) ':');
149 buf->append (JvNewStringUTF (method->get_method()->name->data));
150 buf->append ((jchar) '(');
151 buf->append (JvNewStringUTF (method->get_method()->signature->data));
152 buf->append ((jchar) ')');
153
154 buf->append (JvNewStringLatin1 (": "));
155 buf->append (JvNewStringLatin1 (s));
156 throw new java::lang::VerifyError (buf->toString ());
157 }
158
159 // This enum holds a list of tags for all the different types we
160 // need to handle. Reference types are treated specially by the
161 // type class.
162 enum type_val
163 {
164 void_type,
165
166 // The values for primitive types are chosen to correspond to values
167 // specified to newarray.
168 boolean_type = 4,
169 char_type = 5,
170 float_type = 6,
171 double_type = 7,
172 byte_type = 8,
173 short_type = 9,
174 int_type = 10,
175 long_type = 11,
176
177 // Used when overwriting second word of a double or long in the
178 // local variables. Also used after merging local variable states
179 // to indicate an unusable value.
180 unsuitable_type,
181 return_address_type,
182 continuation_type,
183
184 // There is an obscure special case which requires us to note when
185 // a local variable has not been used by a subroutine. See
186 // push_jump_merge for more information.
187 unused_by_subroutine_type,
188
189 // Everything after `reference_type' must be a reference type.
190 reference_type,
191 null_type,
192 unresolved_reference_type,
193 uninitialized_reference_type,
194 uninitialized_unresolved_reference_type
195 };
196
197 // Return the type_val corresponding to a primitive signature
198 // character. For instance `I' returns `int.class'.
199 type_val get_type_val_for_signature (jchar sig)
200 {
201 type_val rt;
202 switch (sig)
203 {
204 case 'Z':
205 rt = boolean_type;
206 break;
207 case 'B':
208 rt = byte_type;
209 break;
210 case 'C':
211 rt = char_type;
212 break;
213 case 'S':
214 rt = short_type;
215 break;
216 case 'I':
217 rt = int_type;
218 break;
219 case 'J':
220 rt = long_type;
221 break;
222 case 'F':
223 rt = float_type;
224 break;
225 case 'D':
226 rt = double_type;
227 break;
228 case 'V':
229 rt = void_type;
230 break;
231 default:
232 verify_fail ("invalid signature");
233 }
234 return rt;
235 }
236
237 // Return the type_val corresponding to a primitive class.
238 type_val get_type_val_for_signature (jclass k)
239 {
240 return get_type_val_for_signature ((jchar) k->method_count);
241 }
242
243 // This is like _Jv_IsAssignableFrom, but it works even if SOURCE or
244 // TARGET haven't been prepared.
245 static bool is_assignable_from_slow (jclass target, jclass source)
246 {
247 // This will terminate when SOURCE==Object.
248 while (true)
249 {
250 if (source == target)
251 return true;
252
253 if (target->isPrimitive () || source->isPrimitive ())
254 return false;
255
256 if (target->isArray ())
257 {
258 if (! source->isArray ())
259 return false;
260 target = target->getComponentType ();
261 source = source->getComponentType ();
262 }
263 else if (target->isInterface ())
264 {
265 for (int i = 0; i < source->interface_count; ++i)
266 {
267 // We use a recursive call because we also need to
268 // check superinterfaces.
269 if (is_assignable_from_slow (target, source->interfaces[i]))
270 return true;
271 }
272 source = source->getSuperclass ();
273 if (source == NULL)
274 return false;
275 }
276 // We must do this check before we check to see if SOURCE is
277 // an interface. This way we know that any interface is
278 // assignable to an Object.
279 else if (target == &java::lang::Object::class$)
280 return true;
281 else if (source->isInterface ())
282 {
283 for (int i = 0; i < target->interface_count; ++i)
284 {
285 // We use a recursive call because we also need to
286 // check superinterfaces.
287 if (is_assignable_from_slow (target->interfaces[i], source))
288 return true;
289 }
290 target = target->getSuperclass ();
291 if (target == NULL)
292 return false;
293 }
294 else if (source == &java::lang::Object::class$)
295 return false;
296 else
297 source = source->getSuperclass ();
298 }
299 }
300
301 // This is used to keep track of which `jsr's correspond to a given
302 // jsr target.
303 struct subr_info
304 {
305 // PC of the instruction just after the jsr.
306 int pc;
307 // Link.
308 subr_info *next;
309 };
310
311 // This is used to keep track of which subroutine entry point
312 // corresponds to which `ret' instruction.
313 struct subr_entry_info
314 {
315 // PC of the subroutine entry point.
316 int pc;
317 // PC of the `ret' instruction.
318 int ret_pc;
319 // Link.
320 subr_entry_info *next;
321 };
322
323 // The `type' class is used to represent a single type in the
324 // verifier.
325 struct type
326 {
327 // The type.
328 type_val key;
329 // Some associated data.
330 union
331 {
332 // For a resolved reference type, this is a pointer to the class.
333 jclass klass;
334 // For other reference types, this it the name of the class.
335 _Jv_Utf8Const *name;
336 } data;
337 // This is used when constructing a new object. It is the PC of the
338 // `new' instruction which created the object. We use the special
339 // value -2 to mean that this is uninitialized, and the special
340 // value -1 for the case where the current method is itself the
341 // <init> method.
342 int pc;
343
344 static const int UNINIT = -2;
345 static const int SELF = -1;
346
347 // Basic constructor.
348 type ()
349 {
350 key = unsuitable_type;
351 data.klass = NULL;
352 pc = UNINIT;
353 }
354
355 // Make a new instance given the type tag. We assume a generic
356 // `reference_type' means Object.
357 type (type_val k)
358 {
359 key = k;
360 data.klass = NULL;
361 if (key == reference_type)
362 data.klass = &java::lang::Object::class$;
363 pc = UNINIT;
364 }
365
366 // Make a new instance given a class.
367 type (jclass klass)
368 {
369 key = reference_type;
370 data.klass = klass;
371 pc = UNINIT;
372 }
373
374 // Make a new instance given the name of a class.
375 type (_Jv_Utf8Const *n)
376 {
377 key = unresolved_reference_type;
378 data.name = n;
379 pc = UNINIT;
380 }
381
382 // Copy constructor.
383 type (const type &t)
384 {
385 key = t.key;
386 data = t.data;
387 pc = t.pc;
388 }
389
390 // These operators are required because libgcj can't link in
391 // -lstdc++.
392 void *operator new[] (size_t bytes)
393 {
394 return _Jv_Malloc (bytes);
395 }
396
397 void operator delete[] (void *mem)
398 {
399 _Jv_Free (mem);
400 }
401
402 type& operator= (type_val k)
403 {
404 key = k;
405 data.klass = NULL;
406 pc = UNINIT;
407 return *this;
408 }
409
410 type& operator= (const type& t)
411 {
412 key = t.key;
413 data = t.data;
414 pc = t.pc;
415 return *this;
416 }
417
418 // Promote a numeric type.
419 type &promote ()
420 {
421 if (key == boolean_type || key == char_type
422 || key == byte_type || key == short_type)
423 key = int_type;
424 return *this;
425 }
426
427 // If *THIS is an unresolved reference type, resolve it.
428 void resolve (_Jv_BytecodeVerifier *verifier)
429 {
430 if (key != unresolved_reference_type
431 && key != uninitialized_unresolved_reference_type)
432 return;
433
434 using namespace java::lang;
435 java::lang::ClassLoader *loader
436 = verifier->current_class->getClassLoaderInternal();
437 // We might see either kind of name. Sigh.
438 if (data.name->data[0] == 'L'
439 && data.name->data[data.name->length - 1] == ';')
440 data.klass = _Jv_FindClassFromSignature (data.name->data, loader);
441 else
442 data.klass = Class::forName (_Jv_NewStringUtf8Const (data.name),
443 false, loader);
444 key = (key == unresolved_reference_type
445 ? reference_type
446 : uninitialized_reference_type);
447 }
448
449 // Mark this type as the uninitialized result of `new'.
450 void set_uninitialized (int npc, _Jv_BytecodeVerifier *verifier)
451 {
452 if (key == reference_type)
453 key = uninitialized_reference_type;
454 else if (key == unresolved_reference_type)
455 key = uninitialized_unresolved_reference_type;
456 else
457 verifier->verify_fail ("internal error in type::uninitialized");
458 pc = npc;
459 }
460
461 // Mark this type as now initialized.
462 void set_initialized (int npc)
463 {
464 if (npc != UNINIT && pc == npc
465 && (key == uninitialized_reference_type
466 || key == uninitialized_unresolved_reference_type))
467 {
468 key = (key == uninitialized_reference_type
469 ? reference_type
470 : unresolved_reference_type);
471 pc = UNINIT;
472 }
473 }
474
475
476 // Return true if an object of type K can be assigned to a variable
477 // of type *THIS. Handle various special cases too. Might modify
478 // *THIS or K. Note however that this does not perform numeric
479 // promotion.
480 bool compatible (type &k, _Jv_BytecodeVerifier *verifier)
481 {
482 // Any type is compatible with the unsuitable type.
483 if (key == unsuitable_type)
484 return true;
485
486 if (key < reference_type || k.key < reference_type)
487 return key == k.key;
488
489 // The `null' type is convertible to any initialized reference
490 // type.
491 if (key == null_type || k.key == null_type)
492 return true;
493
494 // Any reference type is convertible to Object. This is a special
495 // case so we don't need to unnecessarily resolve a class.
496 if (key == reference_type
497 && data.klass == &java::lang::Object::class$)
498 return true;
499
500 // An initialized type and an uninitialized type are not
501 // compatible.
502 if (isinitialized () != k.isinitialized ())
503 return false;
504
505 // Two uninitialized objects are compatible if either:
506 // * The PCs are identical, or
507 // * One PC is UNINIT.
508 if (! isinitialized ())
509 {
510 if (pc != k.pc && pc != UNINIT && k.pc != UNINIT)
511 return false;
512 }
513
514 // Two unresolved types are equal if their names are the same.
515 if (! isresolved ()
516 && ! k.isresolved ()
517 && _Jv_equalUtf8Consts (data.name, k.data.name))
518 return true;
519
520 // We must resolve both types and check assignability.
521 resolve (verifier);
522 k.resolve (verifier);
523 return is_assignable_from_slow (data.klass, k.data.klass);
524 }
525
526 bool isvoid () const
527 {
528 return key == void_type;
529 }
530
531 bool iswide () const
532 {
533 return key == long_type || key == double_type;
534 }
535
536 // Return number of stack or local variable slots taken by this
537 // type.
538 int depth () const
539 {
540 return iswide () ? 2 : 1;
541 }
542
543 bool isarray () const
544 {
545 // We treat null_type as not an array. This is ok based on the
546 // current uses of this method.
547 if (key == reference_type)
548 return data.klass->isArray ();
549 else if (key == unresolved_reference_type)
550 return data.name->data[0] == '[';
551 return false;
552 }
553
554 bool isnull () const
555 {
556 return key == null_type;
557 }
558
559 bool isinterface (_Jv_BytecodeVerifier *verifier)
560 {
561 resolve (verifier);
562 if (key != reference_type)
563 return false;
564 return data.klass->isInterface ();
565 }
566
567 bool isabstract (_Jv_BytecodeVerifier *verifier)
568 {
569 resolve (verifier);
570 if (key != reference_type)
571 return false;
572 using namespace java::lang::reflect;
573 return Modifier::isAbstract (data.klass->getModifiers ());
574 }
575
576 // Return the element type of an array.
577 type element_type (_Jv_BytecodeVerifier *verifier)
578 {
579 // FIXME: maybe should do string manipulation here.
580 resolve (verifier);
581 if (key != reference_type)
582 verifier->verify_fail ("programmer error in type::element_type()", -1);
583
584 jclass k = data.klass->getComponentType ();
585 if (k->isPrimitive ())
586 return type (verifier->get_type_val_for_signature (k));
587 return type (k);
588 }
589
590 // Return the array type corresponding to an initialized
591 // reference. We could expand this to work for other kinds of
592 // types, but currently we don't need to.
593 type to_array (_Jv_BytecodeVerifier *verifier)
594 {
595 // Resolving isn't ideal, because it might force us to load
596 // another class, but it's easy. FIXME?
597 if (key == unresolved_reference_type)
598 resolve (verifier);
599
600 if (key == reference_type)
601 return type (_Jv_GetArrayClass (data.klass,
602 data.klass->getClassLoaderInternal()));
603 else
604 verifier->verify_fail ("internal error in type::to_array()");
605 }
606
607 bool isreference () const
608 {
609 return key >= reference_type;
610 }
611
612 int get_pc () const
613 {
614 return pc;
615 }
616
617 bool isinitialized () const
618 {
619 return (key == reference_type
620 || key == null_type
621 || key == unresolved_reference_type);
622 }
623
624 bool isresolved () const
625 {
626 return (key == reference_type
627 || key == null_type
628 || key == uninitialized_reference_type);
629 }
630
631 void verify_dimensions (int ndims, _Jv_BytecodeVerifier *verifier)
632 {
633 // The way this is written, we don't need to check isarray().
634 if (key == reference_type)
635 {
636 jclass k = data.klass;
637 while (k->isArray () && ndims > 0)
638 {
639 k = k->getComponentType ();
640 --ndims;
641 }
642 }
643 else
644 {
645 // We know KEY == unresolved_reference_type.
646 char *p = data.name->data;
647 while (*p++ == '[' && ndims-- > 0)
648 ;
649 }
650
651 if (ndims > 0)
652 verifier->verify_fail ("array type has fewer dimensions than required");
653 }
654
655 // Merge OLD_TYPE into this. On error throw exception.
656 bool merge (type& old_type, bool local_semantics,
657 _Jv_BytecodeVerifier *verifier)
658 {
659 bool changed = false;
660 bool refo = old_type.isreference ();
661 bool refn = isreference ();
662 if (refo && refn)
663 {
664 if (old_type.key == null_type)
665 ;
666 else if (key == null_type)
667 {
668 *this = old_type;
669 changed = true;
670 }
671 else if (isinitialized () != old_type.isinitialized ())
672 verifier->verify_fail ("merging initialized and uninitialized types");
673 else
674 {
675 if (! isinitialized ())
676 {
677 if (pc == UNINIT)
678 pc = old_type.pc;
679 else if (old_type.pc == UNINIT)
680 ;
681 else if (pc != old_type.pc)
682 verifier->verify_fail ("merging different uninitialized types");
683 }
684
685 if (! isresolved ()
686 && ! old_type.isresolved ()
687 && _Jv_equalUtf8Consts (data.name, old_type.data.name))
688 {
689 // Types are identical.
690 }
691 else
692 {
693 resolve (verifier);
694 old_type.resolve (verifier);
695
696 jclass k = data.klass;
697 jclass oldk = old_type.data.klass;
698
699 int arraycount = 0;
700 while (k->isArray () && oldk->isArray ())
701 {
702 ++arraycount;
703 k = k->getComponentType ();
704 oldk = oldk->getComponentType ();
705 }
706
707 // Ordinarily this terminates when we hit Object...
708 while (k != NULL)
709 {
710 if (is_assignable_from_slow (k, oldk))
711 break;
712 k = k->getSuperclass ();
713 changed = true;
714 }
715 // ... but K could have been an interface, in which
716 // case we'll end up here. We just convert this
717 // into Object.
718 if (k == NULL)
719 k = &java::lang::Object::class$;
720
721 if (changed)
722 {
723 while (arraycount > 0)
724 {
725 java::lang::ClassLoader *loader
726 = verifier->current_class->getClassLoaderInternal();
727 k = _Jv_GetArrayClass (k, loader);
728 --arraycount;
729 }
730 data.klass = k;
731 }
732 }
733 }
734 }
735 else if (refo || refn || key != old_type.key)
736 {
737 if (local_semantics)
738 {
739 // If we're merging into an "unused" slot, then we
740 // simply accept whatever we're merging from.
741 if (key == unused_by_subroutine_type)
742 {
743 *this = old_type;
744 changed = true;
745 }
746 else if (old_type.key == unused_by_subroutine_type)
747 {
748 // Do nothing.
749 }
750 // If we already have an `unsuitable' type, then we
751 // don't need to change again.
752 else if (key != unsuitable_type)
753 {
754 key = unsuitable_type;
755 changed = true;
756 }
757 }
758 else
759 verifier->verify_fail ("unmergeable type");
760 }
761 return changed;
762 }
763
764 #ifdef VERIFY_DEBUG
765 void print (void) const
766 {
767 char c = '?';
768 switch (key)
769 {
770 case boolean_type: c = 'Z'; break;
771 case byte_type: c = 'B'; break;
772 case char_type: c = 'C'; break;
773 case short_type: c = 'S'; break;
774 case int_type: c = 'I'; break;
775 case long_type: c = 'J'; break;
776 case float_type: c = 'F'; break;
777 case double_type: c = 'D'; break;
778 case void_type: c = 'V'; break;
779 case unsuitable_type: c = '-'; break;
780 case return_address_type: c = 'r'; break;
781 case continuation_type: c = '+'; break;
782 case unused_by_subroutine_type: c = '_'; break;
783 case reference_type: c = 'L'; break;
784 case null_type: c = '@'; break;
785 case unresolved_reference_type: c = 'l'; break;
786 case uninitialized_reference_type: c = 'U'; break;
787 case uninitialized_unresolved_reference_type: c = 'u'; break;
788 }
789 debug_print ("%c", c);
790 }
791 #endif /* VERIFY_DEBUG */
792 };
793
794 // This class holds all the state information we need for a given
795 // location.
796 struct state
797 {
798 // The current top of the stack, in terms of slots.
799 int stacktop;
800 // The current depth of the stack. This will be larger than
801 // STACKTOP when wide types are on the stack.
802 int stackdepth;
803 // The stack.
804 type *stack;
805 // The local variables.
806 type *locals;
807 // This is used in subroutines to keep track of which local
808 // variables have been accessed.
809 bool *local_changed;
810 // If not 0, then we are in a subroutine. The value is the PC of
811 // the subroutine's entry point. We can use 0 as an exceptional
812 // value because PC=0 can never be a subroutine.
813 int subroutine;
814 // This is used to keep a linked list of all the states which
815 // require re-verification. We use the PC to keep track.
816 int next;
817 // We keep track of the type of `this' specially. This is used to
818 // ensure that an instance initializer invokes another initializer
819 // on `this' before returning. We must keep track of this
820 // specially because otherwise we might be confused by code which
821 // assigns to locals[0] (overwriting `this') and then returns
822 // without really initializing.
823 type this_type;
824 // This is a list of all subroutines that have been seen at this
825 // point. Ordinarily this is NULL; it is only allocated and used
826 // in relatively weird situations involving non-ret exit from a
827 // subroutine. We have to keep track of this in this way to avoid
828 // endless recursion in these cases.
829 subr_info *seen_subrs;
830
831 // INVALID marks a state which is not on the linked list of states
832 // requiring reverification.
833 static const int INVALID = -1;
834 // NO_NEXT marks the state at the end of the reverification list.
835 static const int NO_NEXT = -2;
836
837 // This is used to mark the stack depth at the instruction just
838 // after a `jsr' when we haven't yet processed the corresponding
839 // `ret'. See handle_jsr_insn for more information.
840 static const int NO_STACK = -1;
841
842 state ()
843 : this_type ()
844 {
845 stack = NULL;
846 locals = NULL;
847 local_changed = NULL;
848 seen_subrs = NULL;
849 }
850
851 state (int max_stack, int max_locals)
852 : this_type ()
853 {
854 stacktop = 0;
855 stackdepth = 0;
856 stack = new type[max_stack];
857 for (int i = 0; i < max_stack; ++i)
858 stack[i] = unsuitable_type;
859 locals = new type[max_locals];
860 local_changed = (bool *) _Jv_Malloc (sizeof (bool) * max_locals);
861 seen_subrs = NULL;
862 for (int i = 0; i < max_locals; ++i)
863 {
864 locals[i] = unsuitable_type;
865 local_changed[i] = false;
866 }
867 next = INVALID;
868 subroutine = 0;
869 }
870
871 state (const state *orig, int max_stack, int max_locals,
872 bool ret_semantics = false)
873 {
874 stack = new type[max_stack];
875 locals = new type[max_locals];
876 local_changed = (bool *) _Jv_Malloc (sizeof (bool) * max_locals);
877 seen_subrs = NULL;
878 copy (orig, max_stack, max_locals, ret_semantics);
879 next = INVALID;
880 }
881
882 ~state ()
883 {
884 if (stack)
885 delete[] stack;
886 if (locals)
887 delete[] locals;
888 if (local_changed)
889 _Jv_Free (local_changed);
890 clean_subrs ();
891 }
892
893 void *operator new[] (size_t bytes)
894 {
895 return _Jv_Malloc (bytes);
896 }
897
898 void operator delete[] (void *mem)
899 {
900 _Jv_Free (mem);
901 }
902
903 void *operator new (size_t bytes)
904 {
905 return _Jv_Malloc (bytes);
906 }
907
908 void operator delete (void *mem)
909 {
910 _Jv_Free (mem);
911 }
912
913 void clean_subrs ()
914 {
915 subr_info *info = seen_subrs;
916 while (info != NULL)
917 {
918 subr_info *next = info->next;
919 _Jv_Free (info);
920 info = next;
921 }
922 }
923
924 void copy (const state *copy, int max_stack, int max_locals,
925 bool ret_semantics = false)
926 {
927 stacktop = copy->stacktop;
928 stackdepth = copy->stackdepth;
929 subroutine = copy->subroutine;
930 for (int i = 0; i < max_stack; ++i)
931 stack[i] = copy->stack[i];
932 for (int i = 0; i < max_locals; ++i)
933 {
934 // See push_jump_merge to understand this case.
935 if (ret_semantics)
936 locals[i] = type (copy->local_changed[i]
937 ? unsuitable_type
938 : unused_by_subroutine_type);
939 else
940 locals[i] = copy->locals[i];
941 local_changed[i] = copy->local_changed[i];
942 }
943
944 clean_subrs ();
945 if (copy->seen_subrs)
946 {
947 for (subr_info *info = seen_subrs; info != NULL; info = info->next)
948 add_subr (info->pc);
949 }
950 else
951 seen_subrs = NULL;
952
953 this_type = copy->this_type;
954 // Don't modify `next'.
955 }
956
957 // Modify this state to reflect entry to an exception handler.
958 void set_exception (type t, int max_stack)
959 {
960 stackdepth = 1;
961 stacktop = 1;
962 stack[0] = t;
963 for (int i = stacktop; i < max_stack; ++i)
964 stack[i] = unsuitable_type;
965 }
966
967 // Modify this state to reflect entry into a subroutine.
968 void enter_subroutine (int npc, int max_locals)
969 {
970 subroutine = npc;
971 // Mark all items as unchanged. Each subroutine needs to keep
972 // track of its `changed' state independently. In the case of
973 // nested subroutines, this information will be merged back into
974 // parent by the `ret'.
975 for (int i = 0; i < max_locals; ++i)
976 local_changed[i] = false;
977 }
978
979 // Indicate that we've been in this this subroutine.
980 void add_subr (int pc)
981 {
982 subr_info *n = (subr_info *) _Jv_Malloc (sizeof (subr_info));
983 n->pc = pc;
984 n->next = seen_subrs;
985 seen_subrs = n;
986 }
987
988 // Merge STATE_OLD into this state. Destructively modifies this
989 // state. Returns true if the new state was in fact changed.
990 // Will throw an exception if the states are not mergeable.
991 bool merge (state *state_old, bool ret_semantics,
992 int max_locals, _Jv_BytecodeVerifier *verifier)
993 {
994 bool changed = false;
995
996 // Special handling for `this'. If one or the other is
997 // uninitialized, then the merge is uninitialized.
998 if (this_type.isinitialized ())
999 this_type = state_old->this_type;
1000
1001 // Merge subroutine states. Here we just keep track of what
1002 // subroutine we think we're in. We only check for a merge
1003 // (which is invalid) when we see a `ret'.
1004 if (subroutine == state_old->subroutine)
1005 {
1006 // Nothing.
1007 }
1008 else if (subroutine == 0)
1009 {
1010 subroutine = state_old->subroutine;
1011 changed = true;
1012 }
1013 else
1014 {
1015 // If the subroutines differ, and we haven't seen this
1016 // subroutine before, indicate that the state changed. This
1017 // is needed to detect when subroutines have merged.
1018 bool found = false;
1019 for (subr_info *info = seen_subrs; info != NULL; info = info->next)
1020 {
1021 if (info->pc == state_old->subroutine)
1022 {
1023 found = true;
1024 break;
1025 }
1026 }
1027 if (! found)
1028 {
1029 add_subr (state_old->subroutine);
1030 changed = true;
1031 }
1032 }
1033
1034 // Merge stacks. Special handling for NO_STACK case.
1035 if (state_old->stacktop == NO_STACK)
1036 {
1037 // Nothing to do in this case; we don't care about modifying
1038 // the old state.
1039 }
1040 else if (stacktop == NO_STACK)
1041 {
1042 stacktop = state_old->stacktop;
1043 stackdepth = state_old->stackdepth;
1044 for (int i = 0; i < stacktop; ++i)
1045 stack[i] = state_old->stack[i];
1046 changed = true;
1047 }
1048 else if (state_old->stacktop != stacktop)
1049 verifier->verify_fail ("stack sizes differ");
1050 else
1051 {
1052 for (int i = 0; i < state_old->stacktop; ++i)
1053 {
1054 if (stack[i].merge (state_old->stack[i], false, verifier))
1055 changed = true;
1056 }
1057 }
1058
1059 // Merge local variables.
1060 for (int i = 0; i < max_locals; ++i)
1061 {
1062 // If we're not processing a `ret', then we merge every
1063 // local variable. If we are processing a `ret', then we
1064 // only merge locals which changed in the subroutine. When
1065 // processing a `ret', STATE_OLD is the state at the point
1066 // of the `ret', and THIS is the state just after the `jsr'.
1067 if (! ret_semantics || state_old->local_changed[i])
1068 {
1069 if (locals[i].merge (state_old->locals[i], true, verifier))
1070 {
1071 // Note that we don't call `note_variable' here.
1072 // This change doesn't represent a real change to a
1073 // local, but rather a merge artifact. If we're in
1074 // a subroutine which is called with two
1075 // incompatible types in a slot that is unused by
1076 // the subroutine, then we don't want to mark that
1077 // variable as having been modified.
1078 changed = true;
1079 }
1080 }
1081
1082 // If we're in a subroutine, we must compute the union of
1083 // all the changed local variables.
1084 if (state_old->local_changed[i])
1085 note_variable (i);
1086 }
1087
1088 return changed;
1089 }
1090
1091 // Throw an exception if there is an uninitialized object on the
1092 // stack or in a local variable. EXCEPTION_SEMANTICS controls
1093 // whether we're using backwards-branch or exception-handing
1094 // semantics.
1095 void check_no_uninitialized_objects (int max_locals,
1096 _Jv_BytecodeVerifier *verifier,
1097 bool exception_semantics = false)
1098 {
1099 if (! exception_semantics)
1100 {
1101 for (int i = 0; i < stacktop; ++i)
1102 if (stack[i].isreference () && ! stack[i].isinitialized ())
1103 verifier->verify_fail ("uninitialized object on stack");
1104 }
1105
1106 for (int i = 0; i < max_locals; ++i)
1107 if (locals[i].isreference () && ! locals[i].isinitialized ())
1108 verifier->verify_fail ("uninitialized object in local variable");
1109
1110 check_this_initialized (verifier);
1111 }
1112
1113 // Ensure that `this' has been initialized.
1114 void check_this_initialized (_Jv_BytecodeVerifier *verifier)
1115 {
1116 if (this_type.isreference () && ! this_type.isinitialized ())
1117 verifier->verify_fail ("`this' is uninitialized");
1118 }
1119
1120 // Set type of `this'.
1121 void set_this_type (const type &k)
1122 {
1123 this_type = k;
1124 }
1125
1126 // Note that a local variable was modified.
1127 void note_variable (int index)
1128 {
1129 if (subroutine > 0)
1130 local_changed[index] = true;
1131 }
1132
1133 // Mark each `new'd object we know of that was allocated at PC as
1134 // initialized.
1135 void set_initialized (int pc, int max_locals)
1136 {
1137 for (int i = 0; i < stacktop; ++i)
1138 stack[i].set_initialized (pc);
1139 for (int i = 0; i < max_locals; ++i)
1140 locals[i].set_initialized (pc);
1141 this_type.set_initialized (pc);
1142 }
1143
1144 // Return true if this state is the unmerged result of a `ret'.
1145 bool is_unmerged_ret_state (int max_locals) const
1146 {
1147 if (stacktop == NO_STACK)
1148 return true;
1149 for (int i = 0; i < max_locals; ++i)
1150 {
1151 if (locals[i].key == unused_by_subroutine_type)
1152 return true;
1153 }
1154 return false;
1155 }
1156
1157 #ifdef VERIFY_DEBUG
1158 void print (const char *leader, int pc,
1159 int max_stack, int max_locals) const
1160 {
1161 debug_print ("%s [%4d]: [stack] ", leader, pc);
1162 int i;
1163 for (i = 0; i < stacktop; ++i)
1164 stack[i].print ();
1165 for (; i < max_stack; ++i)
1166 debug_print (".");
1167 debug_print (" [local] ");
1168 for (i = 0; i < max_locals; ++i)
1169 {
1170 locals[i].print ();
1171 debug_print (local_changed[i] ? "+" : " ");
1172 }
1173 if (subroutine == 0)
1174 debug_print (" | None");
1175 else
1176 debug_print (" | %4d", subroutine);
1177 debug_print (" | %p\n", this);
1178 }
1179 #else
1180 inline void print (const char *, int, int, int) const
1181 {
1182 }
1183 #endif /* VERIFY_DEBUG */
1184 };
1185
1186 type pop_raw ()
1187 {
1188 if (current_state->stacktop <= 0)
1189 verify_fail ("stack empty");
1190 type r = current_state->stack[--current_state->stacktop];
1191 current_state->stackdepth -= r.depth ();
1192 if (current_state->stackdepth < 0)
1193 verify_fail ("stack empty", start_PC);
1194 return r;
1195 }
1196
1197 type pop32 ()
1198 {
1199 type r = pop_raw ();
1200 if (r.iswide ())
1201 verify_fail ("narrow pop of wide type");
1202 return r;
1203 }
1204
1205 type pop_type (type match)
1206 {
1207 match.promote ();
1208 type t = pop_raw ();
1209 if (! match.compatible (t, this))
1210 verify_fail ("incompatible type on stack");
1211 return t;
1212 }
1213
1214 // Pop a reference which is guaranteed to be initialized. MATCH
1215 // doesn't have to be a reference type; in this case this acts like
1216 // pop_type.
1217 type pop_init_ref (type match)
1218 {
1219 type t = pop_raw ();
1220 if (t.isreference () && ! t.isinitialized ())
1221 verify_fail ("initialized reference required");
1222 else if (! match.compatible (t, this))
1223 verify_fail ("incompatible type on stack");
1224 return t;
1225 }
1226
1227 // Pop a reference type or a return address.
1228 type pop_ref_or_return ()
1229 {
1230 type t = pop_raw ();
1231 if (! t.isreference () && t.key != return_address_type)
1232 verify_fail ("expected reference or return address on stack");
1233 return t;
1234 }
1235
1236 void push_type (type t)
1237 {
1238 // If T is a numeric type like short, promote it to int.
1239 t.promote ();
1240
1241 int depth = t.depth ();
1242 if (current_state->stackdepth + depth > current_method->max_stack)
1243 verify_fail ("stack overflow");
1244 current_state->stack[current_state->stacktop++] = t;
1245 current_state->stackdepth += depth;
1246 }
1247
1248 void set_variable (int index, type t)
1249 {
1250 // If T is a numeric type like short, promote it to int.
1251 t.promote ();
1252
1253 int depth = t.depth ();
1254 if (index > current_method->max_locals - depth)
1255 verify_fail ("invalid local variable");
1256 current_state->locals[index] = t;
1257 current_state->note_variable (index);
1258
1259 if (depth == 2)
1260 {
1261 current_state->locals[index + 1] = continuation_type;
1262 current_state->note_variable (index + 1);
1263 }
1264 if (index > 0 && current_state->locals[index - 1].iswide ())
1265 {
1266 current_state->locals[index - 1] = unsuitable_type;
1267 // There's no need to call note_variable here.
1268 }
1269 }
1270
1271 type get_variable (int index, type t)
1272 {
1273 int depth = t.depth ();
1274 if (index > current_method->max_locals - depth)
1275 verify_fail ("invalid local variable");
1276 if (! t.compatible (current_state->locals[index], this))
1277 verify_fail ("incompatible type in local variable");
1278 if (depth == 2)
1279 {
1280 type t (continuation_type);
1281 if (! current_state->locals[index + 1].compatible (t, this))
1282 verify_fail ("invalid local variable");
1283 }
1284 return current_state->locals[index];
1285 }
1286
1287 // Make sure ARRAY is an array type and that its elements are
1288 // compatible with type ELEMENT. Returns the actual element type.
1289 type require_array_type (type array, type element)
1290 {
1291 // An odd case. Here we just pretend that everything went ok. If
1292 // the requested element type is some kind of reference, return
1293 // the null type instead.
1294 if (array.isnull ())
1295 return element.isreference () ? type (null_type) : element;
1296
1297 if (! array.isarray ())
1298 verify_fail ("array required");
1299
1300 type t = array.element_type (this);
1301 if (! element.compatible (t, this))
1302 {
1303 // Special case for byte arrays, which must also be boolean
1304 // arrays.
1305 bool ok = true;
1306 if (element.key == byte_type)
1307 {
1308 type e2 (boolean_type);
1309 ok = e2.compatible (t, this);
1310 }
1311 if (! ok)
1312 verify_fail ("incompatible array element type");
1313 }
1314
1315 // Return T and not ELEMENT, because T might be specialized.
1316 return t;
1317 }
1318
1319 jint get_byte ()
1320 {
1321 if (PC >= current_method->code_length)
1322 verify_fail ("premature end of bytecode");
1323 return (jint) bytecode[PC++] & 0xff;
1324 }
1325
1326 jint get_ushort ()
1327 {
1328 jint b1 = get_byte ();
1329 jint b2 = get_byte ();
1330 return (jint) ((b1 << 8) | b2) & 0xffff;
1331 }
1332
1333 jint get_short ()
1334 {
1335 jint b1 = get_byte ();
1336 jint b2 = get_byte ();
1337 jshort s = (b1 << 8) | b2;
1338 return (jint) s;
1339 }
1340
1341 jint get_int ()
1342 {
1343 jint b1 = get_byte ();
1344 jint b2 = get_byte ();
1345 jint b3 = get_byte ();
1346 jint b4 = get_byte ();
1347 return (b1 << 24) | (b2 << 16) | (b3 << 8) | b4;
1348 }
1349
1350 int compute_jump (int offset)
1351 {
1352 int npc = start_PC + offset;
1353 if (npc < 0 || npc >= current_method->code_length)
1354 verify_fail ("branch out of range", start_PC);
1355 return npc;
1356 }
1357
1358 // Merge the indicated state into the state at the branch target and
1359 // schedule a new PC if there is a change. If RET_SEMANTICS is
1360 // true, then we are merging from a `ret' instruction into the
1361 // instruction after a `jsr'. This is a special case with its own
1362 // modified semantics.
1363 void push_jump_merge (int npc, state *nstate, bool ret_semantics = false)
1364 {
1365 bool changed = true;
1366 if (states[npc] == NULL)
1367 {
1368 // There's a weird situation here. If are examining the
1369 // branch that results from a `ret', and there is not yet a
1370 // state available at the branch target (the instruction just
1371 // after the `jsr'), then we have to construct a special kind
1372 // of state at that point for future merging. This special
1373 // state has the type `unused_by_subroutine_type' in each slot
1374 // which was not modified by the subroutine.
1375 states[npc] = new state (nstate, current_method->max_stack,
1376 current_method->max_locals, ret_semantics);
1377 debug_print ("== New state in push_jump_merge\n");
1378 states[npc]->print ("New", npc, current_method->max_stack,
1379 current_method->max_locals);
1380 }
1381 else
1382 {
1383 debug_print ("== Merge states in push_jump_merge\n");
1384 nstate->print ("Frm", start_PC, current_method->max_stack,
1385 current_method->max_locals);
1386 states[npc]->print (" To", npc, current_method->max_stack,
1387 current_method->max_locals);
1388 changed = states[npc]->merge (nstate, ret_semantics,
1389 current_method->max_locals, this);
1390 states[npc]->print ("New", npc, current_method->max_stack,
1391 current_method->max_locals);
1392 }
1393
1394 if (changed && states[npc]->next == state::INVALID)
1395 {
1396 // The merge changed the state, and the new PC isn't yet on our
1397 // list of PCs to re-verify.
1398 states[npc]->next = next_verify_pc;
1399 next_verify_pc = npc;
1400 }
1401 }
1402
1403 void push_jump (int offset)
1404 {
1405 int npc = compute_jump (offset);
1406 if (npc < PC)
1407 current_state->check_no_uninitialized_objects (current_method->max_locals, this);
1408 push_jump_merge (npc, current_state);
1409 }
1410
1411 void push_exception_jump (type t, int pc)
1412 {
1413 current_state->check_no_uninitialized_objects (current_method->max_locals,
1414 this, true);
1415 state s (current_state, current_method->max_stack,
1416 current_method->max_locals);
1417 if (current_method->max_stack < 1)
1418 verify_fail ("stack overflow at exception handler");
1419 s.set_exception (t, current_method->max_stack);
1420 push_jump_merge (pc, &s);
1421 }
1422
1423 int pop_jump ()
1424 {
1425 int *prev_loc = &next_verify_pc;
1426 int npc = next_verify_pc;
1427
1428 while (npc != state::NO_NEXT)
1429 {
1430 // If the next available PC is an unmerged `ret' state, then
1431 // we aren't yet ready to handle it. That's because we would
1432 // need all kind of special cases to do so. So instead we
1433 // defer this jump until after we've processed it via a
1434 // fall-through. This has to happen because the instruction
1435 // before this one must be a `jsr'.
1436 if (! states[npc]->is_unmerged_ret_state (current_method->max_locals))
1437 {
1438 *prev_loc = states[npc]->next;
1439 states[npc]->next = state::INVALID;
1440 return npc;
1441 }
1442
1443 prev_loc = &states[npc]->next;
1444 npc = states[npc]->next;
1445 }
1446
1447 // Note that we might have gotten here even when there are
1448 // remaining states to process. That can happen if we find a
1449 // `jsr' without a `ret'.
1450 return state::NO_NEXT;
1451 }
1452
1453 void invalidate_pc ()
1454 {
1455 PC = state::NO_NEXT;
1456 }
1457
1458 void note_branch_target (int pc, bool is_jsr_target = false)
1459 {
1460 // Don't check `pc <= PC', because we've advanced PC after
1461 // fetching the target and we haven't yet checked the next
1462 // instruction.
1463 if (pc < PC && ! (flags[pc] & FLAG_INSN_START))
1464 verify_fail ("branch not to instruction start", start_PC);
1465 flags[pc] |= FLAG_BRANCH_TARGET;
1466 if (is_jsr_target)
1467 {
1468 // Record the jsr which called this instruction.
1469 subr_info *info = (subr_info *) _Jv_Malloc (sizeof (subr_info));
1470 info->pc = PC;
1471 info->next = jsr_ptrs[pc];
1472 jsr_ptrs[pc] = info;
1473 }
1474 }
1475
1476 void skip_padding ()
1477 {
1478 while ((PC % 4) > 0)
1479 if (get_byte () != 0)
1480 verify_fail ("found nonzero padding byte");
1481 }
1482
1483 // Return the subroutine to which the instruction at PC belongs.
1484 int get_subroutine (int pc)
1485 {
1486 if (states[pc] == NULL)
1487 return 0;
1488 return states[pc]->subroutine;
1489 }
1490
1491 // Do the work for a `ret' instruction. INDEX is the index into the
1492 // local variables.
1493 void handle_ret_insn (int index)
1494 {
1495 get_variable (index, return_address_type);
1496
1497 int csub = current_state->subroutine;
1498 if (csub == 0)
1499 verify_fail ("no subroutine");
1500
1501 // Check to see if we've merged subroutines.
1502 subr_entry_info *entry;
1503 for (entry = entry_points; entry != NULL; entry = entry->next)
1504 {
1505 if (entry->ret_pc == start_PC)
1506 break;
1507 }
1508 if (entry == NULL)
1509 {
1510 entry = (subr_entry_info *) _Jv_Malloc (sizeof (subr_entry_info));
1511 entry->pc = csub;
1512 entry->ret_pc = start_PC;
1513 entry->next = entry_points;
1514 entry_points = entry;
1515 }
1516 else if (entry->pc != csub)
1517 verify_fail ("subroutines merged");
1518
1519 for (subr_info *subr = jsr_ptrs[csub]; subr != NULL; subr = subr->next)
1520 {
1521 // We might be returning to a `jsr' that is at the end of the
1522 // bytecode. This is ok if we never return from the called
1523 // subroutine, but if we see this here it is an error.
1524 if (subr->pc >= current_method->code_length)
1525 verify_fail ("fell off end");
1526
1527 // Temporarily modify the current state so it looks like we're
1528 // in the enclosing context.
1529 current_state->subroutine = get_subroutine (subr->pc);
1530 if (subr->pc < PC)
1531 current_state->check_no_uninitialized_objects (current_method->max_locals, this);
1532 push_jump_merge (subr->pc, current_state, true);
1533 }
1534
1535 current_state->subroutine = csub;
1536 invalidate_pc ();
1537 }
1538
1539 // We're in the subroutine SUB, calling a subroutine at DEST. Make
1540 // sure this subroutine isn't already on the stack.
1541 void check_nonrecursive_call (int sub, int dest)
1542 {
1543 if (sub == 0)
1544 return;
1545 if (sub == dest)
1546 verify_fail ("recursive subroutine call");
1547 for (subr_info *info = jsr_ptrs[sub]; info != NULL; info = info->next)
1548 check_nonrecursive_call (get_subroutine (info->pc), dest);
1549 }
1550
1551 void handle_jsr_insn (int offset)
1552 {
1553 int npc = compute_jump (offset);
1554
1555 if (npc < PC)
1556 current_state->check_no_uninitialized_objects (current_method->max_locals, this);
1557 check_nonrecursive_call (current_state->subroutine, npc);
1558
1559 // Modify our state as appropriate for entry into a subroutine.
1560 push_type (return_address_type);
1561 push_jump_merge (npc, current_state);
1562 // Clean up.
1563 pop_type (return_address_type);
1564
1565 // On entry to the subroutine, the subroutine number must be set
1566 // and the locals must be marked as cleared. We do this after
1567 // merging state so that we don't erroneously "notice" a variable
1568 // change merely on entry.
1569 states[npc]->enter_subroutine (npc, current_method->max_locals);
1570
1571 // Indicate that we don't know the stack depth of the instruction
1572 // following the `jsr'. The idea here is that we need to merge
1573 // the local variable state across the jsr, but the subroutine
1574 // might change the stack depth, so we can't make any assumptions
1575 // about it. So we have yet another special case. We know that
1576 // at this point PC points to the instruction after the jsr. Note
1577 // that it is ok to have a `jsr' at the end of the bytecode,
1578 // provided that the called subroutine never returns. So, we have
1579 // a special case here and another one when we handle the ret.
1580 if (PC < current_method->code_length)
1581 {
1582 current_state->stacktop = state::NO_STACK;
1583 push_jump_merge (PC, current_state);
1584 }
1585 invalidate_pc ();
1586 }
1587
1588 jclass construct_primitive_array_type (type_val prim)
1589 {
1590 jclass k = NULL;
1591 switch (prim)
1592 {
1593 case boolean_type:
1594 k = JvPrimClass (boolean);
1595 break;
1596 case char_type:
1597 k = JvPrimClass (char);
1598 break;
1599 case float_type:
1600 k = JvPrimClass (float);
1601 break;
1602 case double_type:
1603 k = JvPrimClass (double);
1604 break;
1605 case byte_type:
1606 k = JvPrimClass (byte);
1607 break;
1608 case short_type:
1609 k = JvPrimClass (short);
1610 break;
1611 case int_type:
1612 k = JvPrimClass (int);
1613 break;
1614 case long_type:
1615 k = JvPrimClass (long);
1616 break;
1617
1618 // These aren't used here but we call them out to avoid
1619 // warnings.
1620 case void_type:
1621 case unsuitable_type:
1622 case return_address_type:
1623 case continuation_type:
1624 case unused_by_subroutine_type:
1625 case reference_type:
1626 case null_type:
1627 case unresolved_reference_type:
1628 case uninitialized_reference_type:
1629 case uninitialized_unresolved_reference_type:
1630 default:
1631 verify_fail ("unknown type in construct_primitive_array_type");
1632 }
1633 k = _Jv_GetArrayClass (k, NULL);
1634 return k;
1635 }
1636
1637 // This pass computes the location of branch targets and also
1638 // instruction starts.
1639 void branch_prepass ()
1640 {
1641 flags = (char *) _Jv_Malloc (current_method->code_length);
1642 jsr_ptrs = (subr_info **) _Jv_Malloc (sizeof (subr_info *)
1643 * current_method->code_length);
1644
1645 for (int i = 0; i < current_method->code_length; ++i)
1646 {
1647 flags[i] = 0;
1648 jsr_ptrs[i] = NULL;
1649 }
1650
1651 bool last_was_jsr = false;
1652
1653 PC = 0;
1654 while (PC < current_method->code_length)
1655 {
1656 // Set `start_PC' early so that error checking can have the
1657 // correct value.
1658 start_PC = PC;
1659 flags[PC] |= FLAG_INSN_START;
1660
1661 // If the previous instruction was a jsr, then the next
1662 // instruction is a branch target -- the branch being the
1663 // corresponding `ret'.
1664 if (last_was_jsr)
1665 note_branch_target (PC);
1666 last_was_jsr = false;
1667
1668 java_opcode opcode = (java_opcode) bytecode[PC++];
1669 switch (opcode)
1670 {
1671 case op_nop:
1672 case op_aconst_null:
1673 case op_iconst_m1:
1674 case op_iconst_0:
1675 case op_iconst_1:
1676 case op_iconst_2:
1677 case op_iconst_3:
1678 case op_iconst_4:
1679 case op_iconst_5:
1680 case op_lconst_0:
1681 case op_lconst_1:
1682 case op_fconst_0:
1683 case op_fconst_1:
1684 case op_fconst_2:
1685 case op_dconst_0:
1686 case op_dconst_1:
1687 case op_iload_0:
1688 case op_iload_1:
1689 case op_iload_2:
1690 case op_iload_3:
1691 case op_lload_0:
1692 case op_lload_1:
1693 case op_lload_2:
1694 case op_lload_3:
1695 case op_fload_0:
1696 case op_fload_1:
1697 case op_fload_2:
1698 case op_fload_3:
1699 case op_dload_0:
1700 case op_dload_1:
1701 case op_dload_2:
1702 case op_dload_3:
1703 case op_aload_0:
1704 case op_aload_1:
1705 case op_aload_2:
1706 case op_aload_3:
1707 case op_iaload:
1708 case op_laload:
1709 case op_faload:
1710 case op_daload:
1711 case op_aaload:
1712 case op_baload:
1713 case op_caload:
1714 case op_saload:
1715 case op_istore_0:
1716 case op_istore_1:
1717 case op_istore_2:
1718 case op_istore_3:
1719 case op_lstore_0:
1720 case op_lstore_1:
1721 case op_lstore_2:
1722 case op_lstore_3:
1723 case op_fstore_0:
1724 case op_fstore_1:
1725 case op_fstore_2:
1726 case op_fstore_3:
1727 case op_dstore_0:
1728 case op_dstore_1:
1729 case op_dstore_2:
1730 case op_dstore_3:
1731 case op_astore_0:
1732 case op_astore_1:
1733 case op_astore_2:
1734 case op_astore_3:
1735 case op_iastore:
1736 case op_lastore:
1737 case op_fastore:
1738 case op_dastore:
1739 case op_aastore:
1740 case op_bastore:
1741 case op_castore:
1742 case op_sastore:
1743 case op_pop:
1744 case op_pop2:
1745 case op_dup:
1746 case op_dup_x1:
1747 case op_dup_x2:
1748 case op_dup2:
1749 case op_dup2_x1:
1750 case op_dup2_x2:
1751 case op_swap:
1752 case op_iadd:
1753 case op_isub:
1754 case op_imul:
1755 case op_idiv:
1756 case op_irem:
1757 case op_ishl:
1758 case op_ishr:
1759 case op_iushr:
1760 case op_iand:
1761 case op_ior:
1762 case op_ixor:
1763 case op_ladd:
1764 case op_lsub:
1765 case op_lmul:
1766 case op_ldiv:
1767 case op_lrem:
1768 case op_lshl:
1769 case op_lshr:
1770 case op_lushr:
1771 case op_land:
1772 case op_lor:
1773 case op_lxor:
1774 case op_fadd:
1775 case op_fsub:
1776 case op_fmul:
1777 case op_fdiv:
1778 case op_frem:
1779 case op_dadd:
1780 case op_dsub:
1781 case op_dmul:
1782 case op_ddiv:
1783 case op_drem:
1784 case op_ineg:
1785 case op_i2b:
1786 case op_i2c:
1787 case op_i2s:
1788 case op_lneg:
1789 case op_fneg:
1790 case op_dneg:
1791 case op_i2l:
1792 case op_i2f:
1793 case op_i2d:
1794 case op_l2i:
1795 case op_l2f:
1796 case op_l2d:
1797 case op_f2i:
1798 case op_f2l:
1799 case op_f2d:
1800 case op_d2i:
1801 case op_d2l:
1802 case op_d2f:
1803 case op_lcmp:
1804 case op_fcmpl:
1805 case op_fcmpg:
1806 case op_dcmpl:
1807 case op_dcmpg:
1808 case op_monitorenter:
1809 case op_monitorexit:
1810 case op_ireturn:
1811 case op_lreturn:
1812 case op_freturn:
1813 case op_dreturn:
1814 case op_areturn:
1815 case op_return:
1816 case op_athrow:
1817 case op_arraylength:
1818 break;
1819
1820 case op_bipush:
1821 case op_ldc:
1822 case op_iload:
1823 case op_lload:
1824 case op_fload:
1825 case op_dload:
1826 case op_aload:
1827 case op_istore:
1828 case op_lstore:
1829 case op_fstore:
1830 case op_dstore:
1831 case op_astore:
1832 case op_ret:
1833 case op_newarray:
1834 get_byte ();
1835 break;
1836
1837 case op_iinc:
1838 case op_sipush:
1839 case op_ldc_w:
1840 case op_ldc2_w:
1841 case op_getstatic:
1842 case op_getfield:
1843 case op_putfield:
1844 case op_putstatic:
1845 case op_new:
1846 case op_anewarray:
1847 case op_instanceof:
1848 case op_checkcast:
1849 case op_invokespecial:
1850 case op_invokestatic:
1851 case op_invokevirtual:
1852 get_short ();
1853 break;
1854
1855 case op_multianewarray:
1856 get_short ();
1857 get_byte ();
1858 break;
1859
1860 case op_jsr:
1861 last_was_jsr = true;
1862 // Fall through.
1863 case op_ifeq:
1864 case op_ifne:
1865 case op_iflt:
1866 case op_ifge:
1867 case op_ifgt:
1868 case op_ifle:
1869 case op_if_icmpeq:
1870 case op_if_icmpne:
1871 case op_if_icmplt:
1872 case op_if_icmpge:
1873 case op_if_icmpgt:
1874 case op_if_icmple:
1875 case op_if_acmpeq:
1876 case op_if_acmpne:
1877 case op_ifnull:
1878 case op_ifnonnull:
1879 case op_goto:
1880 note_branch_target (compute_jump (get_short ()), last_was_jsr);
1881 break;
1882
1883 case op_tableswitch:
1884 {
1885 skip_padding ();
1886 note_branch_target (compute_jump (get_int ()));
1887 jint low = get_int ();
1888 jint hi = get_int ();
1889 if (low > hi)
1890 verify_fail ("invalid tableswitch", start_PC);
1891 for (int i = low; i <= hi; ++i)
1892 note_branch_target (compute_jump (get_int ()));
1893 }
1894 break;
1895
1896 case op_lookupswitch:
1897 {
1898 skip_padding ();
1899 note_branch_target (compute_jump (get_int ()));
1900 int npairs = get_int ();
1901 if (npairs < 0)
1902 verify_fail ("too few pairs in lookupswitch", start_PC);
1903 while (npairs-- > 0)
1904 {
1905 get_int ();
1906 note_branch_target (compute_jump (get_int ()));
1907 }
1908 }
1909 break;
1910
1911 case op_invokeinterface:
1912 get_short ();
1913 get_byte ();
1914 get_byte ();
1915 break;
1916
1917 case op_wide:
1918 {
1919 opcode = (java_opcode) get_byte ();
1920 get_short ();
1921 if (opcode == op_iinc)
1922 get_short ();
1923 }
1924 break;
1925
1926 case op_jsr_w:
1927 last_was_jsr = true;
1928 // Fall through.
1929 case op_goto_w:
1930 note_branch_target (compute_jump (get_int ()), last_was_jsr);
1931 break;
1932
1933 // These are unused here, but we call them out explicitly
1934 // so that -Wswitch-enum doesn't complain.
1935 case op_putfield_1:
1936 case op_putfield_2:
1937 case op_putfield_4:
1938 case op_putfield_8:
1939 case op_putfield_a:
1940 case op_putstatic_1:
1941 case op_putstatic_2:
1942 case op_putstatic_4:
1943 case op_putstatic_8:
1944 case op_putstatic_a:
1945 case op_getfield_1:
1946 case op_getfield_2s:
1947 case op_getfield_2u:
1948 case op_getfield_4:
1949 case op_getfield_8:
1950 case op_getfield_a:
1951 case op_getstatic_1:
1952 case op_getstatic_2s:
1953 case op_getstatic_2u:
1954 case op_getstatic_4:
1955 case op_getstatic_8:
1956 case op_getstatic_a:
1957 default:
1958 verify_fail ("unrecognized instruction in branch_prepass",
1959 start_PC);
1960 }
1961
1962 // See if any previous branch tried to branch to the middle of
1963 // this instruction.
1964 for (int pc = start_PC + 1; pc < PC; ++pc)
1965 {
1966 if ((flags[pc] & FLAG_BRANCH_TARGET))
1967 verify_fail ("branch to middle of instruction", pc);
1968 }
1969 }
1970
1971 // Verify exception handlers.
1972 for (int i = 0; i < current_method->exc_count; ++i)
1973 {
1974 if (! (flags[exception[i].handler_pc.i] & FLAG_INSN_START))
1975 verify_fail ("exception handler not at instruction start",
1976 exception[i].handler_pc.i);
1977 if (! (flags[exception[i].start_pc.i] & FLAG_INSN_START))
1978 verify_fail ("exception start not at instruction start",
1979 exception[i].start_pc.i);
1980 if (exception[i].end_pc.i != current_method->code_length
1981 && ! (flags[exception[i].end_pc.i] & FLAG_INSN_START))
1982 verify_fail ("exception end not at instruction start",
1983 exception[i].end_pc.i);
1984
1985 flags[exception[i].handler_pc.i] |= FLAG_BRANCH_TARGET;
1986 }
1987 }
1988
1989 void check_pool_index (int index)
1990 {
1991 if (index < 0 || index >= current_class->constants.size)
1992 verify_fail ("constant pool index out of range", start_PC);
1993 }
1994
1995 type check_class_constant (int index)
1996 {
1997 check_pool_index (index);
1998 _Jv_Constants *pool = &current_class->constants;
1999 if (pool->tags[index] == JV_CONSTANT_ResolvedClass)
2000 return type (pool->data[index].clazz);
2001 else if (pool->tags[index] == JV_CONSTANT_Class)
2002 return type (pool->data[index].utf8);
2003 verify_fail ("expected class constant", start_PC);
2004 }
2005
2006 type check_constant (int index)
2007 {
2008 check_pool_index (index);
2009 _Jv_Constants *pool = &current_class->constants;
2010 if (pool->tags[index] == JV_CONSTANT_ResolvedString
2011 || pool->tags[index] == JV_CONSTANT_String)
2012 return type (&java::lang::String::class$);
2013 else if (pool->tags[index] == JV_CONSTANT_Integer)
2014 return type (int_type);
2015 else if (pool->tags[index] == JV_CONSTANT_Float)
2016 return type (float_type);
2017 verify_fail ("String, int, or float constant expected", start_PC);
2018 }
2019
2020 type check_wide_constant (int index)
2021 {
2022 check_pool_index (index);
2023 _Jv_Constants *pool = &current_class->constants;
2024 if (pool->tags[index] == JV_CONSTANT_Long)
2025 return type (long_type);
2026 else if (pool->tags[index] == JV_CONSTANT_Double)
2027 return type (double_type);
2028 verify_fail ("long or double constant expected", start_PC);
2029 }
2030
2031 // Helper for both field and method. These are laid out the same in
2032 // the constant pool.
2033 type handle_field_or_method (int index, int expected,
2034 _Jv_Utf8Const **name,
2035 _Jv_Utf8Const **fmtype)
2036 {
2037 check_pool_index (index);
2038 _Jv_Constants *pool = &current_class->constants;
2039 if (pool->tags[index] != expected)
2040 verify_fail ("didn't see expected constant", start_PC);
2041 // Once we know we have a Fieldref or Methodref we assume that it
2042 // is correctly laid out in the constant pool. I think the code
2043 // in defineclass.cc guarantees this.
2044 _Jv_ushort class_index, name_and_type_index;
2045 _Jv_loadIndexes (&pool->data[index],
2046 class_index,
2047 name_and_type_index);
2048 _Jv_ushort name_index, desc_index;
2049 _Jv_loadIndexes (&pool->data[name_and_type_index],
2050 name_index, desc_index);
2051
2052 *name = pool->data[name_index].utf8;
2053 *fmtype = pool->data[desc_index].utf8;
2054
2055 return check_class_constant (class_index);
2056 }
2057
2058 // Return field's type, compute class' type if requested.
2059 type check_field_constant (int index, type *class_type = NULL)
2060 {
2061 _Jv_Utf8Const *name, *field_type;
2062 type ct = handle_field_or_method (index,
2063 JV_CONSTANT_Fieldref,
2064 &name, &field_type);
2065 if (class_type)
2066 *class_type = ct;
2067 if (field_type->data[0] == '[' || field_type->data[0] == 'L')
2068 return type (field_type);
2069 return get_type_val_for_signature (field_type->data[0]);
2070 }
2071
2072 type check_method_constant (int index, bool is_interface,
2073 _Jv_Utf8Const **method_name,
2074 _Jv_Utf8Const **method_signature)
2075 {
2076 return handle_field_or_method (index,
2077 (is_interface
2078 ? JV_CONSTANT_InterfaceMethodref
2079 : JV_CONSTANT_Methodref),
2080 method_name, method_signature);
2081 }
2082
2083 type get_one_type (char *&p)
2084 {
2085 char *start = p;
2086
2087 int arraycount = 0;
2088 while (*p == '[')
2089 {
2090 ++arraycount;
2091 ++p;
2092 }
2093
2094 char v = *p++;
2095
2096 if (v == 'L')
2097 {
2098 while (*p != ';')
2099 ++p;
2100 ++p;
2101 _Jv_Utf8Const *name = make_utf8_const (start, p - start);
2102 return type (name);
2103 }
2104
2105 // Casting to jchar here is ok since we are looking at an ASCII
2106 // character.
2107 type_val rt = get_type_val_for_signature (jchar (v));
2108
2109 if (arraycount == 0)
2110 {
2111 // Callers of this function eventually push their arguments on
2112 // the stack. So, promote them here.
2113 return type (rt).promote ();
2114 }
2115
2116 jclass k = construct_primitive_array_type (rt);
2117 while (--arraycount > 0)
2118 k = _Jv_GetArrayClass (k, NULL);
2119 return type (k);
2120 }
2121
2122 void compute_argument_types (_Jv_Utf8Const *signature,
2123 type *types)
2124 {
2125 char *p = signature->data;
2126 // Skip `('.
2127 ++p;
2128
2129 int i = 0;
2130 while (*p != ')')
2131 types[i++] = get_one_type (p);
2132 }
2133
2134 type compute_return_type (_Jv_Utf8Const *signature)
2135 {
2136 char *p = signature->data;
2137 while (*p != ')')
2138 ++p;
2139 ++p;
2140 return get_one_type (p);
2141 }
2142
2143 void check_return_type (type onstack)
2144 {
2145 type rt = compute_return_type (current_method->self->signature);
2146 if (! rt.compatible (onstack, this))
2147 verify_fail ("incompatible return type");
2148 }
2149
2150 // Initialize the stack for the new method. Returns true if this
2151 // method is an instance initializer.
2152 bool initialize_stack ()
2153 {
2154 int var = 0;
2155 bool is_init = _Jv_equalUtf8Consts (current_method->self->name,
2156 gcj::init_name);
2157 bool is_clinit = _Jv_equalUtf8Consts (current_method->self->name,
2158 gcj::clinit_name);
2159
2160 using namespace java::lang::reflect;
2161 if (! Modifier::isStatic (current_method->self->accflags))
2162 {
2163 type kurr (current_class);
2164 if (is_init)
2165 {
2166 kurr.set_uninitialized (type::SELF, this);
2167 is_init = true;
2168 }
2169 else if (is_clinit)
2170 verify_fail ("<clinit> method must be static");
2171 set_variable (0, kurr);
2172 current_state->set_this_type (kurr);
2173 ++var;
2174 }
2175 else
2176 {
2177 if (is_init)
2178 verify_fail ("<init> method must be non-static");
2179 }
2180
2181 // We have to handle wide arguments specially here.
2182 int arg_count = _Jv_count_arguments (current_method->self->signature);
2183 type arg_types[arg_count];
2184 compute_argument_types (current_method->self->signature, arg_types);
2185 for (int i = 0; i < arg_count; ++i)
2186 {
2187 set_variable (var, arg_types[i]);
2188 ++var;
2189 if (arg_types[i].iswide ())
2190 ++var;
2191 }
2192
2193 return is_init;
2194 }
2195
2196 void verify_instructions_0 ()
2197 {
2198 current_state = new state (current_method->max_stack,
2199 current_method->max_locals);
2200
2201 PC = 0;
2202 start_PC = 0;
2203
2204 // True if we are verifying an instance initializer.
2205 bool this_is_init = initialize_stack ();
2206
2207 states = (state **) _Jv_Malloc (sizeof (state *)
2208 * current_method->code_length);
2209 for (int i = 0; i < current_method->code_length; ++i)
2210 states[i] = NULL;
2211
2212 next_verify_pc = state::NO_NEXT;
2213
2214 while (true)
2215 {
2216 // If the PC was invalidated, get a new one from the work list.
2217 if (PC == state::NO_NEXT)
2218 {
2219 PC = pop_jump ();
2220 if (PC == state::INVALID)
2221 verify_fail ("can't happen: saw state::INVALID");
2222 if (PC == state::NO_NEXT)
2223 break;
2224 debug_print ("== State pop from pending list\n");
2225 // Set up the current state.
2226 current_state->copy (states[PC], current_method->max_stack,
2227 current_method->max_locals);
2228 }
2229 else
2230 {
2231 // Control can't fall off the end of the bytecode. We
2232 // only need to check this in the fall-through case,
2233 // because branch bounds are checked when they are
2234 // pushed.
2235 if (PC >= current_method->code_length)
2236 verify_fail ("fell off end");
2237
2238 // We only have to do this checking in the situation where
2239 // control flow falls through from the previous
2240 // instruction. Otherwise merging is done at the time we
2241 // push the branch.
2242 if (states[PC] != NULL)
2243 {
2244 // We've already visited this instruction. So merge
2245 // the states together. If this yields no change then
2246 // we don't have to re-verify. However, if the new
2247 // state is an the result of an unmerged `ret', we
2248 // must continue through it.
2249 debug_print ("== Fall through merge\n");
2250 states[PC]->print ("Old", PC, current_method->max_stack,
2251 current_method->max_locals);
2252 current_state->print ("Cur", PC, current_method->max_stack,
2253 current_method->max_locals);
2254 if (! current_state->merge (states[PC], false,
2255 current_method->max_locals, this)
2256 && ! states[PC]->is_unmerged_ret_state (current_method->max_locals))
2257 {
2258 debug_print ("== Fall through optimization\n");
2259 invalidate_pc ();
2260 continue;
2261 }
2262 // Save a copy of it for later.
2263 states[PC]->copy (current_state, current_method->max_stack,
2264 current_method->max_locals);
2265 current_state->print ("New", PC, current_method->max_stack,
2266 current_method->max_locals);
2267 }
2268 }
2269
2270 // We only have to keep saved state at branch targets. If
2271 // we're at a branch target and the state here hasn't been set
2272 // yet, we set it now.
2273 if (states[PC] == NULL && (flags[PC] & FLAG_BRANCH_TARGET))
2274 {
2275 states[PC] = new state (current_state, current_method->max_stack,
2276 current_method->max_locals);
2277 }
2278
2279 // Set this before handling exceptions so that debug output is
2280 // sane.
2281 start_PC = PC;
2282
2283 // Update states for all active exception handlers. Ordinarily
2284 // there are not many exception handlers. So we simply run
2285 // through them all.
2286 for (int i = 0; i < current_method->exc_count; ++i)
2287 {
2288 if (PC >= exception[i].start_pc.i && PC < exception[i].end_pc.i)
2289 {
2290 type handler (&java::lang::Throwable::class$);
2291 if (exception[i].handler_type.i != 0)
2292 handler = check_class_constant (exception[i].handler_type.i);
2293 push_exception_jump (handler, exception[i].handler_pc.i);
2294 }
2295 }
2296
2297 current_state->print (" ", PC, current_method->max_stack,
2298 current_method->max_locals);
2299 java_opcode opcode = (java_opcode) bytecode[PC++];
2300 switch (opcode)
2301 {
2302 case op_nop:
2303 break;
2304
2305 case op_aconst_null:
2306 push_type (null_type);
2307 break;
2308
2309 case op_iconst_m1:
2310 case op_iconst_0:
2311 case op_iconst_1:
2312 case op_iconst_2:
2313 case op_iconst_3:
2314 case op_iconst_4:
2315 case op_iconst_5:
2316 push_type (int_type);
2317 break;
2318
2319 case op_lconst_0:
2320 case op_lconst_1:
2321 push_type (long_type);
2322 break;
2323
2324 case op_fconst_0:
2325 case op_fconst_1:
2326 case op_fconst_2:
2327 push_type (float_type);
2328 break;
2329
2330 case op_dconst_0:
2331 case op_dconst_1:
2332 push_type (double_type);
2333 break;
2334
2335 case op_bipush:
2336 get_byte ();
2337 push_type (int_type);
2338 break;
2339
2340 case op_sipush:
2341 get_short ();
2342 push_type (int_type);
2343 break;
2344
2345 case op_ldc:
2346 push_type (check_constant (get_byte ()));
2347 break;
2348 case op_ldc_w:
2349 push_type (check_constant (get_ushort ()));
2350 break;
2351 case op_ldc2_w:
2352 push_type (check_wide_constant (get_ushort ()));
2353 break;
2354
2355 case op_iload:
2356 push_type (get_variable (get_byte (), int_type));
2357 break;
2358 case op_lload:
2359 push_type (get_variable (get_byte (), long_type));
2360 break;
2361 case op_fload:
2362 push_type (get_variable (get_byte (), float_type));
2363 break;
2364 case op_dload:
2365 push_type (get_variable (get_byte (), double_type));
2366 break;
2367 case op_aload:
2368 push_type (get_variable (get_byte (), reference_type));
2369 break;
2370
2371 case op_iload_0:
2372 case op_iload_1:
2373 case op_iload_2:
2374 case op_iload_3:
2375 push_type (get_variable (opcode - op_iload_0, int_type));
2376 break;
2377 case op_lload_0:
2378 case op_lload_1:
2379 case op_lload_2:
2380 case op_lload_3:
2381 push_type (get_variable (opcode - op_lload_0, long_type));
2382 break;
2383 case op_fload_0:
2384 case op_fload_1:
2385 case op_fload_2:
2386 case op_fload_3:
2387 push_type (get_variable (opcode - op_fload_0, float_type));
2388 break;
2389 case op_dload_0:
2390 case op_dload_1:
2391 case op_dload_2:
2392 case op_dload_3:
2393 push_type (get_variable (opcode - op_dload_0, double_type));
2394 break;
2395 case op_aload_0:
2396 case op_aload_1:
2397 case op_aload_2:
2398 case op_aload_3:
2399 push_type (get_variable (opcode - op_aload_0, reference_type));
2400 break;
2401 case op_iaload:
2402 pop_type (int_type);
2403 push_type (require_array_type (pop_init_ref (reference_type),
2404 int_type));
2405 break;
2406 case op_laload:
2407 pop_type (int_type);
2408 push_type (require_array_type (pop_init_ref (reference_type),
2409 long_type));
2410 break;
2411 case op_faload:
2412 pop_type (int_type);
2413 push_type (require_array_type (pop_init_ref (reference_type),
2414 float_type));
2415 break;
2416 case op_daload:
2417 pop_type (int_type);
2418 push_type (require_array_type (pop_init_ref (reference_type),
2419 double_type));
2420 break;
2421 case op_aaload:
2422 pop_type (int_type);
2423 push_type (require_array_type (pop_init_ref (reference_type),
2424 reference_type));
2425 break;
2426 case op_baload:
2427 pop_type (int_type);
2428 require_array_type (pop_init_ref (reference_type), byte_type);
2429 push_type (int_type);
2430 break;
2431 case op_caload:
2432 pop_type (int_type);
2433 require_array_type (pop_init_ref (reference_type), char_type);
2434 push_type (int_type);
2435 break;
2436 case op_saload:
2437 pop_type (int_type);
2438 require_array_type (pop_init_ref (reference_type), short_type);
2439 push_type (int_type);
2440 break;
2441 case op_istore:
2442 set_variable (get_byte (), pop_type (int_type));
2443 break;
2444 case op_lstore:
2445 set_variable (get_byte (), pop_type (long_type));
2446 break;
2447 case op_fstore:
2448 set_variable (get_byte (), pop_type (float_type));
2449 break;
2450 case op_dstore:
2451 set_variable (get_byte (), pop_type (double_type));
2452 break;
2453 case op_astore:
2454 set_variable (get_byte (), pop_ref_or_return ());
2455 break;
2456 case op_istore_0:
2457 case op_istore_1:
2458 case op_istore_2:
2459 case op_istore_3:
2460 set_variable (opcode - op_istore_0, pop_type (int_type));
2461 break;
2462 case op_lstore_0:
2463 case op_lstore_1:
2464 case op_lstore_2:
2465 case op_lstore_3:
2466 set_variable (opcode - op_lstore_0, pop_type (long_type));
2467 break;
2468 case op_fstore_0:
2469 case op_fstore_1:
2470 case op_fstore_2:
2471 case op_fstore_3:
2472 set_variable (opcode - op_fstore_0, pop_type (float_type));
2473 break;
2474 case op_dstore_0:
2475 case op_dstore_1:
2476 case op_dstore_2:
2477 case op_dstore_3:
2478 set_variable (opcode - op_dstore_0, pop_type (double_type));
2479 break;
2480 case op_astore_0:
2481 case op_astore_1:
2482 case op_astore_2:
2483 case op_astore_3:
2484 set_variable (opcode - op_astore_0, pop_ref_or_return ());
2485 break;
2486 case op_iastore:
2487 pop_type (int_type);
2488 pop_type (int_type);
2489 require_array_type (pop_init_ref (reference_type), int_type);
2490 break;
2491 case op_lastore:
2492 pop_type (long_type);
2493 pop_type (int_type);
2494 require_array_type (pop_init_ref (reference_type), long_type);
2495 break;
2496 case op_fastore:
2497 pop_type (float_type);
2498 pop_type (int_type);
2499 require_array_type (pop_init_ref (reference_type), float_type);
2500 break;
2501 case op_dastore:
2502 pop_type (double_type);
2503 pop_type (int_type);
2504 require_array_type (pop_init_ref (reference_type), double_type);
2505 break;
2506 case op_aastore:
2507 pop_type (reference_type);
2508 pop_type (int_type);
2509 require_array_type (pop_init_ref (reference_type), reference_type);
2510 break;
2511 case op_bastore:
2512 pop_type (int_type);
2513 pop_type (int_type);
2514 require_array_type (pop_init_ref (reference_type), byte_type);
2515 break;
2516 case op_castore:
2517 pop_type (int_type);
2518 pop_type (int_type);
2519 require_array_type (pop_init_ref (reference_type), char_type);
2520 break;
2521 case op_sastore:
2522 pop_type (int_type);
2523 pop_type (int_type);
2524 require_array_type (pop_init_ref (reference_type), short_type);
2525 break;
2526 case op_pop:
2527 pop32 ();
2528 break;
2529 case op_pop2:
2530 {
2531 type t = pop_raw ();
2532 if (! t.iswide ())
2533 pop32 ();
2534 }
2535 break;
2536 case op_dup:
2537 {
2538 type t = pop32 ();
2539 push_type (t);
2540 push_type (t);
2541 }
2542 break;
2543 case op_dup_x1:
2544 {
2545 type t1 = pop32 ();
2546 type t2 = pop32 ();
2547 push_type (t1);
2548 push_type (t2);
2549 push_type (t1);
2550 }
2551 break;
2552 case op_dup_x2:
2553 {
2554 type t1 = pop32 ();
2555 type t2 = pop_raw ();
2556 if (! t2.iswide ())
2557 {
2558 type t3 = pop32 ();
2559 push_type (t1);
2560 push_type (t3);
2561 }
2562 else
2563 push_type (t1);
2564 push_type (t2);
2565 push_type (t1);
2566 }
2567 break;
2568 case op_dup2:
2569 {
2570 type t = pop_raw ();
2571 if (! t.iswide ())
2572 {
2573 type t2 = pop32 ();
2574 push_type (t2);
2575 push_type (t);
2576 push_type (t2);
2577 }
2578 else
2579 push_type (t);
2580 push_type (t);
2581 }
2582 break;
2583 case op_dup2_x1:
2584 {
2585 type t1 = pop_raw ();
2586 type t2 = pop32 ();
2587 if (! t1.iswide ())
2588 {
2589 type t3 = pop32 ();
2590 push_type (t2);
2591 push_type (t1);
2592 push_type (t3);
2593 }
2594 else
2595 push_type (t1);
2596 push_type (t2);
2597 push_type (t1);
2598 }
2599 break;
2600 case op_dup2_x2:
2601 {
2602 type t1 = pop_raw ();
2603 if (t1.iswide ())
2604 {
2605 type t2 = pop_raw ();
2606 if (t2.iswide ())
2607 {
2608 push_type (t1);
2609 push_type (t2);
2610 }
2611 else
2612 {
2613 type t3 = pop32 ();
2614 push_type (t1);
2615 push_type (t3);
2616 push_type (t2);
2617 }
2618 push_type (t1);
2619 }
2620 else
2621 {
2622 type t2 = pop32 ();
2623 type t3 = pop_raw ();
2624 if (t3.iswide ())
2625 {
2626 push_type (t2);
2627 push_type (t1);
2628 }
2629 else
2630 {
2631 type t4 = pop32 ();
2632 push_type (t2);
2633 push_type (t1);
2634 push_type (t4);
2635 }
2636 push_type (t3);
2637 push_type (t2);
2638 push_type (t1);
2639 }
2640 }
2641 break;
2642 case op_swap:
2643 {
2644 type t1 = pop32 ();
2645 type t2 = pop32 ();
2646 push_type (t1);
2647 push_type (t2);
2648 }
2649 break;
2650 case op_iadd:
2651 case op_isub:
2652 case op_imul:
2653 case op_idiv:
2654 case op_irem:
2655 case op_ishl:
2656 case op_ishr:
2657 case op_iushr:
2658 case op_iand:
2659 case op_ior:
2660 case op_ixor:
2661 pop_type (int_type);
2662 push_type (pop_type (int_type));
2663 break;
2664 case op_ladd:
2665 case op_lsub:
2666 case op_lmul:
2667 case op_ldiv:
2668 case op_lrem:
2669 case op_land:
2670 case op_lor:
2671 case op_lxor:
2672 pop_type (long_type);
2673 push_type (pop_type (long_type));
2674 break;
2675 case op_lshl:
2676 case op_lshr:
2677 case op_lushr:
2678 pop_type (int_type);
2679 push_type (pop_type (long_type));
2680 break;
2681 case op_fadd:
2682 case op_fsub:
2683 case op_fmul:
2684 case op_fdiv:
2685 case op_frem:
2686 pop_type (float_type);
2687 push_type (pop_type (float_type));
2688 break;
2689 case op_dadd:
2690 case op_dsub:
2691 case op_dmul:
2692 case op_ddiv:
2693 case op_drem:
2694 pop_type (double_type);
2695 push_type (pop_type (double_type));
2696 break;
2697 case op_ineg:
2698 case op_i2b:
2699 case op_i2c:
2700 case op_i2s:
2701 push_type (pop_type (int_type));
2702 break;
2703 case op_lneg:
2704 push_type (pop_type (long_type));
2705 break;
2706 case op_fneg:
2707 push_type (pop_type (float_type));
2708 break;
2709 case op_dneg:
2710 push_type (pop_type (double_type));
2711 break;
2712 case op_iinc:
2713 get_variable (get_byte (), int_type);
2714 get_byte ();
2715 break;
2716 case op_i2l:
2717 pop_type (int_type);
2718 push_type (long_type);
2719 break;
2720 case op_i2f:
2721 pop_type (int_type);
2722 push_type (float_type);
2723 break;
2724 case op_i2d:
2725 pop_type (int_type);
2726 push_type (double_type);
2727 break;
2728 case op_l2i:
2729 pop_type (long_type);
2730 push_type (int_type);
2731 break;
2732 case op_l2f:
2733 pop_type (long_type);
2734 push_type (float_type);
2735 break;
2736 case op_l2d:
2737 pop_type (long_type);
2738 push_type (double_type);
2739 break;
2740 case op_f2i:
2741 pop_type (float_type);
2742 push_type (int_type);
2743 break;
2744 case op_f2l:
2745 pop_type (float_type);
2746 push_type (long_type);
2747 break;
2748 case op_f2d:
2749 pop_type (float_type);
2750 push_type (double_type);
2751 break;
2752 case op_d2i:
2753 pop_type (double_type);
2754 push_type (int_type);
2755 break;
2756 case op_d2l:
2757 pop_type (double_type);
2758 push_type (long_type);
2759 break;
2760 case op_d2f:
2761 pop_type (double_type);
2762 push_type (float_type);
2763 break;
2764 case op_lcmp:
2765 pop_type (long_type);
2766 pop_type (long_type);
2767 push_type (int_type);
2768 break;
2769 case op_fcmpl:
2770 case op_fcmpg:
2771 pop_type (float_type);
2772 pop_type (float_type);
2773 push_type (int_type);
2774 break;
2775 case op_dcmpl:
2776 case op_dcmpg:
2777 pop_type (double_type);
2778 pop_type (double_type);
2779 push_type (int_type);
2780 break;
2781 case op_ifeq:
2782 case op_ifne:
2783 case op_iflt:
2784 case op_ifge:
2785 case op_ifgt:
2786 case op_ifle:
2787 pop_type (int_type);
2788 push_jump (get_short ());
2789 break;
2790 case op_if_icmpeq:
2791 case op_if_icmpne:
2792 case op_if_icmplt:
2793 case op_if_icmpge:
2794 case op_if_icmpgt:
2795 case op_if_icmple:
2796 pop_type (int_type);
2797 pop_type (int_type);
2798 push_jump (get_short ());
2799 break;
2800 case op_if_acmpeq:
2801 case op_if_acmpne:
2802 pop_type (reference_type);
2803 pop_type (reference_type);
2804 push_jump (get_short ());
2805 break;
2806 case op_goto:
2807 push_jump (get_short ());
2808 invalidate_pc ();
2809 break;
2810 case op_jsr:
2811 handle_jsr_insn (get_short ());
2812 break;
2813 case op_ret:
2814 handle_ret_insn (get_byte ());
2815 break;
2816 case op_tableswitch:
2817 {
2818 pop_type (int_type);
2819 skip_padding ();
2820 push_jump (get_int ());
2821 jint low = get_int ();
2822 jint high = get_int ();
2823 // Already checked LOW -vs- HIGH.
2824 for (int i = low; i <= high; ++i)
2825 push_jump (get_int ());
2826 invalidate_pc ();
2827 }
2828 break;
2829
2830 case op_lookupswitch:
2831 {
2832 pop_type (int_type);
2833 skip_padding ();
2834 push_jump (get_int ());
2835 jint npairs = get_int ();
2836 // Already checked NPAIRS >= 0.
2837 jint lastkey = 0;
2838 for (int i = 0; i < npairs; ++i)
2839 {
2840 jint key = get_int ();
2841 if (i > 0 && key <= lastkey)
2842 verify_fail ("lookupswitch pairs unsorted", start_PC);
2843 lastkey = key;
2844 push_jump (get_int ());
2845 }
2846 invalidate_pc ();
2847 }
2848 break;
2849 case op_ireturn:
2850 check_return_type (pop_type (int_type));
2851 invalidate_pc ();
2852 break;
2853 case op_lreturn:
2854 check_return_type (pop_type (long_type));
2855 invalidate_pc ();
2856 break;
2857 case op_freturn:
2858 check_return_type (pop_type (float_type));
2859 invalidate_pc ();
2860 break;
2861 case op_dreturn:
2862 check_return_type (pop_type (double_type));
2863 invalidate_pc ();
2864 break;
2865 case op_areturn:
2866 check_return_type (pop_init_ref (reference_type));
2867 invalidate_pc ();
2868 break;
2869 case op_return:
2870 // We only need to check this when the return type is
2871 // void, because all instance initializers return void.
2872 if (this_is_init)
2873 current_state->check_this_initialized (this);
2874 check_return_type (void_type);
2875 invalidate_pc ();
2876 break;
2877 case op_getstatic:
2878 push_type (check_field_constant (get_ushort ()));
2879 break;
2880 case op_putstatic:
2881 pop_type (check_field_constant (get_ushort ()));
2882 break;
2883 case op_getfield:
2884 {
2885 type klass;
2886 type field = check_field_constant (get_ushort (), &klass);
2887 pop_type (klass);
2888 push_type (field);
2889 }
2890 break;
2891 case op_putfield:
2892 {
2893 type klass;
2894 type field = check_field_constant (get_ushort (), &klass);
2895 pop_type (field);
2896
2897 // We have an obscure special case here: we can use
2898 // `putfield' on a field declared in this class, even if
2899 // `this' has not yet been initialized.
2900 if (! current_state->this_type.isinitialized ()
2901 && current_state->this_type.pc == type::SELF)
2902 klass.set_uninitialized (type::SELF, this);
2903 pop_type (klass);
2904 }
2905 break;
2906
2907 case op_invokevirtual:
2908 case op_invokespecial:
2909 case op_invokestatic:
2910 case op_invokeinterface:
2911 {
2912 _Jv_Utf8Const *method_name, *method_signature;
2913 type class_type
2914 = check_method_constant (get_ushort (),
2915 opcode == op_invokeinterface,
2916 &method_name,
2917 &method_signature);
2918 // NARGS is only used when we're processing
2919 // invokeinterface. It is simplest for us to compute it
2920 // here and then verify it later.
2921 int nargs = 0;
2922 if (opcode == op_invokeinterface)
2923 {
2924 nargs = get_byte ();
2925 if (get_byte () != 0)
2926 verify_fail ("invokeinterface dummy byte is wrong");
2927 }
2928
2929 bool is_init = false;
2930 if (_Jv_equalUtf8Consts (method_name, gcj::init_name))
2931 {
2932 is_init = true;
2933 if (opcode != op_invokespecial)
2934 verify_fail ("can't invoke <init>");
2935 }
2936 else if (method_name->data[0] == '<')
2937 verify_fail ("can't invoke method starting with `<'");
2938
2939 // Pop arguments and check types.
2940 int arg_count = _Jv_count_arguments (method_signature);
2941 type arg_types[arg_count];
2942 compute_argument_types (method_signature, arg_types);
2943 for (int i = arg_count - 1; i >= 0; --i)
2944 {
2945 // This is only used for verifying the byte for
2946 // invokeinterface.
2947 nargs -= arg_types[i].depth ();
2948 pop_init_ref (arg_types[i]);
2949 }
2950
2951 if (opcode == op_invokeinterface
2952 && nargs != 1)
2953 verify_fail ("wrong argument count for invokeinterface");
2954
2955 if (opcode != op_invokestatic)
2956 {
2957 type t = class_type;
2958 if (is_init)
2959 {
2960 // In this case the PC doesn't matter.
2961 t.set_uninitialized (type::UNINIT, this);
2962 }
2963 type raw = pop_raw ();
2964 bool ok = false;
2965 if (! is_init && ! raw.isinitialized ())
2966 {
2967 // This is a failure.
2968 }
2969 else if (is_init && raw.isnull ())
2970 {
2971 // Another failure.
2972 }
2973 else if (t.compatible (raw, this))
2974 {
2975 ok = true;
2976 }
2977 else if (opcode == op_invokeinterface)
2978 {
2979 // This is a hack. We might have merged two
2980 // items and gotten `Object'. This can happen
2981 // because we don't keep track of where merges
2982 // come from. This is safe as long as the
2983 // interpreter checks interfaces at runtime.
2984 type obj (&java::lang::Object::class$);
2985 ok = raw.compatible (obj, this);
2986 }
2987
2988 if (! ok)
2989 verify_fail ("incompatible type on stack");
2990
2991 if (is_init)
2992 current_state->set_initialized (raw.get_pc (),
2993 current_method->max_locals);
2994 }
2995
2996 type rt = compute_return_type (method_signature);
2997 if (! rt.isvoid ())
2998 push_type (rt);
2999 }
3000 break;
3001
3002 case op_new:
3003 {
3004 type t = check_class_constant (get_ushort ());
3005 if (t.isarray () || t.isinterface (this) || t.isabstract (this))
3006 verify_fail ("type is array, interface, or abstract");
3007 t.set_uninitialized (start_PC, this);
3008 push_type (t);
3009 }
3010 break;
3011
3012 case op_newarray:
3013 {
3014 int atype = get_byte ();
3015 // We intentionally have chosen constants to make this
3016 // valid.
3017 if (atype < boolean_type || atype > long_type)
3018 verify_fail ("type not primitive", start_PC);
3019 pop_type (int_type);
3020 push_type (construct_primitive_array_type (type_val (atype)));
3021 }
3022 break;
3023 case op_anewarray:
3024 pop_type (int_type);
3025 push_type (check_class_constant (get_ushort ()).to_array (this));
3026 break;
3027 case op_arraylength:
3028 {
3029 type t = pop_init_ref (reference_type);
3030 if (! t.isarray () && ! t.isnull ())
3031 verify_fail ("array type expected");
3032 push_type (int_type);
3033 }
3034 break;
3035 case op_athrow:
3036 pop_type (type (&java::lang::Throwable::class$));
3037 invalidate_pc ();
3038 break;
3039 case op_checkcast:
3040 pop_init_ref (reference_type);
3041 push_type (check_class_constant (get_ushort ()));
3042 break;
3043 case op_instanceof:
3044 pop_init_ref (reference_type);
3045 check_class_constant (get_ushort ());
3046 push_type (int_type);
3047 break;
3048 case op_monitorenter:
3049 pop_init_ref (reference_type);
3050 break;
3051 case op_monitorexit:
3052 pop_init_ref (reference_type);
3053 break;
3054 case op_wide:
3055 {
3056 switch (get_byte ())
3057 {
3058 case op_iload:
3059 push_type (get_variable (get_ushort (), int_type));
3060 break;
3061 case op_lload:
3062 push_type (get_variable (get_ushort (), long_type));
3063 break;
3064 case op_fload:
3065 push_type (get_variable (get_ushort (), float_type));
3066 break;
3067 case op_dload:
3068 push_type (get_variable (get_ushort (), double_type));
3069 break;
3070 case op_aload:
3071 push_type (get_variable (get_ushort (), reference_type));
3072 break;
3073 case op_istore:
3074 set_variable (get_ushort (), pop_type (int_type));
3075 break;
3076 case op_lstore:
3077 set_variable (get_ushort (), pop_type (long_type));
3078 break;
3079 case op_fstore:
3080 set_variable (get_ushort (), pop_type (float_type));
3081 break;
3082 case op_dstore:
3083 set_variable (get_ushort (), pop_type (double_type));
3084 break;
3085 case op_astore:
3086 set_variable (get_ushort (), pop_init_ref (reference_type));
3087 break;
3088 case op_ret:
3089 handle_ret_insn (get_short ());
3090 break;
3091 case op_iinc:
3092 get_variable (get_ushort (), int_type);
3093 get_short ();
3094 break;
3095 default:
3096 verify_fail ("unrecognized wide instruction", start_PC);
3097 }
3098 }
3099 break;
3100 case op_multianewarray:
3101 {
3102 type atype = check_class_constant (get_ushort ());
3103 int dim = get_byte ();
3104 if (dim < 1)
3105 verify_fail ("too few dimensions to multianewarray", start_PC);
3106 atype.verify_dimensions (dim, this);
3107 for (int i = 0; i < dim; ++i)
3108 pop_type (int_type);
3109 push_type (atype);
3110 }
3111 break;
3112 case op_ifnull:
3113 case op_ifnonnull:
3114 pop_type (reference_type);
3115 push_jump (get_short ());
3116 break;
3117 case op_goto_w:
3118 push_jump (get_int ());
3119 invalidate_pc ();
3120 break;
3121 case op_jsr_w:
3122 handle_jsr_insn (get_int ());
3123 break;
3124
3125 // These are unused here, but we call them out explicitly
3126 // so that -Wswitch-enum doesn't complain.
3127 case op_putfield_1:
3128 case op_putfield_2:
3129 case op_putfield_4:
3130 case op_putfield_8:
3131 case op_putfield_a:
3132 case op_putstatic_1:
3133 case op_putstatic_2:
3134 case op_putstatic_4:
3135 case op_putstatic_8:
3136 case op_putstatic_a:
3137 case op_getfield_1:
3138 case op_getfield_2s:
3139 case op_getfield_2u:
3140 case op_getfield_4:
3141 case op_getfield_8:
3142 case op_getfield_a:
3143 case op_getstatic_1:
3144 case op_getstatic_2s:
3145 case op_getstatic_2u:
3146 case op_getstatic_4:
3147 case op_getstatic_8:
3148 case op_getstatic_a:
3149 default:
3150 // Unrecognized opcode.
3151 verify_fail ("unrecognized instruction in verify_instructions_0",
3152 start_PC);
3153 }
3154 }
3155 }
3156
3157 public:
3158
3159 void verify_instructions ()
3160 {
3161 branch_prepass ();
3162 verify_instructions_0 ();
3163 }
3164
3165 _Jv_BytecodeVerifier (_Jv_InterpMethod *m)
3166 {
3167 // We just print the text as utf-8. This is just for debugging
3168 // anyway.
3169 debug_print ("--------------------------------\n");
3170 debug_print ("-- Verifying method `%s'\n", m->self->name->data);
3171
3172 current_method = m;
3173 bytecode = m->bytecode ();
3174 exception = m->exceptions ();
3175 current_class = m->defining_class;
3176
3177 states = NULL;
3178 flags = NULL;
3179 jsr_ptrs = NULL;
3180 utf8_list = NULL;
3181 entry_points = NULL;
3182 }
3183
3184 ~_Jv_BytecodeVerifier ()
3185 {
3186 if (states)
3187 _Jv_Free (states);
3188 if (flags)
3189 _Jv_Free (flags);
3190
3191 if (jsr_ptrs)
3192 {
3193 for (int i = 0; i < current_method->code_length; ++i)
3194 {
3195 if (jsr_ptrs[i] != NULL)
3196 {
3197 subr_info *info = jsr_ptrs[i];
3198 while (info != NULL)
3199 {
3200 subr_info *next = info->next;
3201 _Jv_Free (info);
3202 info = next;
3203 }
3204 }
3205 }
3206 _Jv_Free (jsr_ptrs);
3207 }
3208
3209 while (utf8_list != NULL)
3210 {
3211 linked_utf8 *n = utf8_list->next;
3212 _Jv_Free (utf8_list->val);
3213 _Jv_Free (utf8_list);
3214 utf8_list = n;
3215 }
3216
3217 while (entry_points != NULL)
3218 {
3219 subr_entry_info *next = entry_points->next;
3220 _Jv_Free (entry_points);
3221 entry_points = next;
3222 }
3223 }
3224 };
3225
3226 void
3227 _Jv_VerifyMethod (_Jv_InterpMethod *meth)
3228 {
3229 _Jv_BytecodeVerifier v (meth);
3230 v.verify_instructions ();
3231 }
3232 #endif /* INTERPRETER */