# HG changeset patch # User asaha # Date 1403068524 25200 # Node ID bba95ce6b634baaf39b6edff8c314e637b73fad8 # Parent a2221bbf68125c1e9e9eadb5de8431c9b2003fb4# Parent 7ef8ab2bf2b094dd83399786945d9c8067fb0b1b Merge diff -r a2221bbf6812 -r bba95ce6b634 .hgtags --- a/.hgtags Tue Jun 17 16:12:09 2014 -0700 +++ b/.hgtags Tue Jun 17 22:15:24 2014 -0700 @@ -486,6 +486,7 @@ 8ea4732884ccd5586f0afe9478b80add90231455 jdk8u20-b17 b685b4e870b159ea5731984199d275879d427038 hs25.20-b18 11159d7ec80462a422e39c9b3a39ae932923622d jdk8u20-b18 +3e1cec358ab95ef985f821219104141b9ffda83f hs25.20-b19 a4d44dfb7d30eea54bc172e4429a655454ae0bbf jdk8u25-b00 9a2152fbd929b0d8b2f5c326a5526214ae71731a jdk8u25-b01 d3d5604ea0dea3812e87ba76ac199d0a8be6f49f jdk8u25-b02 diff -r a2221bbf6812 -r bba95ce6b634 make/hotspot_version diff -r a2221bbf6812 -r bba95ce6b634 src/cpu/ppc/vm/cppInterpreter_ppc.cpp --- a/src/cpu/ppc/vm/cppInterpreter_ppc.cpp Tue Jun 17 16:12:09 2014 -0700 +++ b/src/cpu/ppc/vm/cppInterpreter_ppc.cpp Tue Jun 17 22:15:24 2014 -0700 @@ -1,7 +1,7 @@ /* - * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. - * Copyright 2012, 2013 SAP AG. All rights reserved. + * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2014 SAP AG. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -2948,17 +2948,60 @@ istate->_last_Java_fp = last_Java_fp; } -int AbstractInterpreter::layout_activation(Method* method, - int temps, // Number of slots on java expression stack in use. - int popframe_args, - int monitors, // Number of active monitors. - int caller_actual_parameters, - int callee_params,// Number of slots for callee parameters. - int callee_locals,// Number of slots for locals. - frame* caller, - frame* interpreter_frame, - bool is_top_frame, - bool is_bottom_frame) { +// Computes monitor_size and top_frame_size in bytes. +static void frame_size_helper(int max_stack, + int monitors, + int& monitor_size, + int& top_frame_size) { + monitor_size = frame::interpreter_frame_monitor_size_in_bytes() * monitors; + top_frame_size = round_to(frame::interpreter_frame_cinterpreterstate_size_in_bytes() + + monitor_size + + max_stack * Interpreter::stackElementSize + + 2 * Interpreter::stackElementSize, + frame::alignment_in_bytes) + + frame::top_ijava_frame_abi_size; +} + +// Returns number of stackElementWords needed for the interpreter frame with the +// given sections. +int AbstractInterpreter::size_activation(int max_stack, + int temps, + int extra_args, + int monitors, + int callee_params, + int callee_locals, + bool is_top_frame) { + int monitor_size = 0; + int top_frame_size = 0; + frame_size_helper(max_stack, monitors, monitor_size, top_frame_size); + + int frame_size; + if (is_top_frame) { + frame_size = top_frame_size; + } else { + frame_size = round_to(frame::interpreter_frame_cinterpreterstate_size_in_bytes() + + monitor_size + + (temps - callee_params + callee_locals) * Interpreter::stackElementSize + + 2 * Interpreter::stackElementSize, + frame::alignment_in_bytes) + + frame::parent_ijava_frame_abi_size; + assert(extra_args == 0, "non-zero for top_frame only"); + } + + return frame_size / Interpreter::stackElementSize; +} + +void AbstractInterpreter::layout_activation(Method* method, + int temps, // Number of slots on java expression stack in use. + int popframe_args, + int monitors, // Number of active monitors. + int caller_actual_parameters, + int callee_params,// Number of slots for callee parameters. + int callee_locals,// Number of slots for locals. + frame* caller, + frame* interpreter_frame, + bool is_top_frame, + bool is_bottom_frame) { // NOTE this code must exactly mimic what // InterpreterGenerator::generate_compute_interpreter_state() does @@ -2968,86 +3011,64 @@ // both the abi scratch area and a place to hold a result from a // callee on its way to the callers stack. - int monitor_size = frame::interpreter_frame_monitor_size_in_bytes() * monitors; - int frame_size; - int top_frame_size = round_to(frame::interpreter_frame_cinterpreterstate_size_in_bytes() - + monitor_size - + (method->max_stack() *Interpreter::stackElementWords * BytesPerWord) - + 2*BytesPerWord, - frame::alignment_in_bytes) - + frame::top_ijava_frame_abi_size; - if (is_top_frame) { - frame_size = top_frame_size; + int monitor_size = 0; + int top_frame_size = 0; + frame_size_helper(method->max_stack(), monitors, monitor_size, top_frame_size); + + intptr_t sp = (intptr_t)interpreter_frame->sp(); + intptr_t fp = *(intptr_t *)sp; + assert(fp == (intptr_t)caller->sp(), "fp must match"); + interpreterState cur_state = + (interpreterState)(fp - frame::interpreter_frame_cinterpreterstate_size_in_bytes()); + + // Now fill in the interpreterState object. + + intptr_t* locals; + if (caller->is_interpreted_frame()) { + // Locals must agree with the caller because it will be used to set the + // caller's tos when we return. + interpreterState prev = caller->get_interpreterState(); + // Calculate start of "locals" for MH calls. For MH calls, the + // current method() (= MH target) and prev->callee() (= + // MH.invoke*()) are different and especially have different + // signatures. To pop the argumentsof the caller, we must use + // the prev->callee()->size_of_arguments() because that's what + // the caller actually pushed. Currently, for synthetic MH + // calls (deoptimized from inlined MH calls), detected by + // is_method_handle_invoke(), we use the callee's arguments + // because here, the caller's and callee's signature match. + if (true /*!caller->is_at_mh_callsite()*/) { + locals = prev->stack() + method->size_of_parameters(); + } else { + // Normal MH call. + locals = prev->stack() + prev->callee()->size_of_parameters(); + } } else { - frame_size = round_to(frame::interpreter_frame_cinterpreterstate_size_in_bytes() - + monitor_size - + ((temps - callee_params + callee_locals) * - Interpreter::stackElementWords * BytesPerWord) - + 2*BytesPerWord, - frame::alignment_in_bytes) - + frame::parent_ijava_frame_abi_size; - assert(popframe_args==0, "non-zero for top_frame only"); + bool is_deopted; + locals = (intptr_t*) (fp + ((method->max_locals() - 1) * BytesPerWord) + + frame::parent_ijava_frame_abi_size); } - // If we actually have a frame to layout we must now fill in all the pieces. - if (interpreter_frame != NULL) { - - intptr_t sp = (intptr_t)interpreter_frame->sp(); - intptr_t fp = *(intptr_t *)sp; - assert(fp == (intptr_t)caller->sp(), "fp must match"); - interpreterState cur_state = - (interpreterState)(fp - frame::interpreter_frame_cinterpreterstate_size_in_bytes()); - - // Now fill in the interpreterState object. - - intptr_t* locals; - if (caller->is_interpreted_frame()) { - // Locals must agree with the caller because it will be used to set the - // caller's tos when we return. - interpreterState prev = caller->get_interpreterState(); - // Calculate start of "locals" for MH calls. For MH calls, the - // current method() (= MH target) and prev->callee() (= - // MH.invoke*()) are different and especially have different - // signatures. To pop the argumentsof the caller, we must use - // the prev->callee()->size_of_arguments() because that's what - // the caller actually pushed. Currently, for synthetic MH - // calls (deoptimized from inlined MH calls), detected by - // is_method_handle_invoke(), we use the callee's arguments - // because here, the caller's and callee's signature match. - if (true /*!caller->is_at_mh_callsite()*/) { - locals = prev->stack() + method->size_of_parameters(); - } else { - // Normal MH call. - locals = prev->stack() + prev->callee()->size_of_parameters(); - } - } else { - bool is_deopted; - locals = (intptr_t*) (fp + ((method->max_locals() - 1) * BytesPerWord) + - frame::parent_ijava_frame_abi_size); - } - - intptr_t* monitor_base = (intptr_t*) cur_state; - intptr_t* stack_base = (intptr_t*) ((intptr_t) monitor_base - monitor_size); - - // Provide pop_frame capability on PPC64, add popframe_args. - // +1 because stack is always prepushed. - intptr_t* stack = (intptr_t*) ((intptr_t) stack_base - (temps + popframe_args + 1) * BytesPerWord); - - BytecodeInterpreter::layout_interpreterState(cur_state, - caller, - interpreter_frame, - method, - locals, - stack, - stack_base, - monitor_base, - (intptr_t*)(((intptr_t)fp)-top_frame_size), - is_top_frame); - - BytecodeInterpreter::pd_layout_interpreterState(cur_state, interpreter_return_address, - interpreter_frame->fp()); - } - return frame_size/BytesPerWord; + intptr_t* monitor_base = (intptr_t*) cur_state; + intptr_t* stack_base = (intptr_t*) ((intptr_t) monitor_base - monitor_size); + + // Provide pop_frame capability on PPC64, add popframe_args. + // +1 because stack is always prepushed. + intptr_t* stack = (intptr_t*) ((intptr_t) stack_base - (temps + popframe_args + 1) * BytesPerWord); + + BytecodeInterpreter::layout_interpreterState(cur_state, + caller, + interpreter_frame, + method, + locals, + stack, + stack_base, + monitor_base, + (intptr_t*)(((intptr_t)fp) - top_frame_size), + is_top_frame); + + BytecodeInterpreter::pd_layout_interpreterState(cur_state, interpreter_return_address, + interpreter_frame->fp()); } #endif // CC_INTERP diff -r a2221bbf6812 -r bba95ce6b634 src/cpu/ppc/vm/ppc.ad --- a/src/cpu/ppc/vm/ppc.ad Tue Jun 17 16:12:09 2014 -0700 +++ b/src/cpu/ppc/vm/ppc.ad Tue Jun 17 22:15:24 2014 -0700 @@ -1,6 +1,6 @@ // -// Copyright (c) 2011, 2013, Oracle and/or its affiliates. All rights reserved. -// Copyright 2012, 2013 SAP AG. All rights reserved. +// Copyright (c) 2011, 2014, Oracle and/or its affiliates. All rights reserved. +// Copyright 2012, 2014 SAP AG. All rights reserved. // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. // // This code is free software; you can redistribute it and/or modify it @@ -1363,8 +1363,8 @@ Compile* C = ra_->C; MacroAssembler _masm(&cbuf); - const long framesize = ((long)C->frame_slots()) << LogBytesPerInt; - assert(framesize%(2*wordSize) == 0, "must preserve 2*wordSize alignment"); + const long framesize = C->frame_size_in_bytes(); + assert(framesize % (2 * wordSize) == 0, "must preserve 2*wordSize alignment"); const bool method_is_frameless = false /* TODO: PPC port C->is_frameless_method()*/; @@ -1389,19 +1389,22 @@ // careful, because some VM calls (such as call site linkage) can // use several kilobytes of stack. But the stack safety zone should // account for that. See bugs 4446381, 4468289, 4497237. - if (C->need_stack_bang(framesize) && UseStackBanging) { + + int bangsize = C->bang_size_in_bytes(); + assert(bangsize >= framesize || bangsize <= 0, "stack bang size incorrect"); + if (C->need_stack_bang(bangsize) && UseStackBanging) { // Unfortunately we cannot use the function provided in // assembler.cpp as we have to emulate the pipes. So I had to // insert the code of generate_stack_overflow_check(), see // assembler.cpp for some illuminative comments. const int page_size = os::vm_page_size(); - int bang_end = StackShadowPages*page_size; + int bang_end = StackShadowPages * page_size; // This is how far the previous frame's stack banging extended. const int bang_end_safe = bang_end; - if (framesize > page_size) { - bang_end += framesize; + if (bangsize > page_size) { + bang_end += bangsize; } int bang_offset = bang_end_safe; @@ -1447,7 +1450,7 @@ unsigned int bytes = (unsigned int)framesize; long offset = Assembler::align_addr(bytes, frame::alignment_in_bytes); - ciMethod *currMethod = C -> method(); + ciMethod *currMethod = C->method(); // Optimized version for most common case. if (UsePower6SchedulerPPC64 && diff -r a2221bbf6812 -r bba95ce6b634 src/cpu/ppc/vm/templateInterpreter_ppc.cpp --- a/src/cpu/ppc/vm/templateInterpreter_ppc.cpp Tue Jun 17 16:12:09 2014 -0700 +++ b/src/cpu/ppc/vm/templateInterpreter_ppc.cpp Tue Jun 17 22:15:24 2014 -0700 @@ -1328,21 +1328,42 @@ int AbstractInterpreter::size_top_interpreter_activation(Method* method) { const int max_alignment_size = 2; const int abi_scratch = frame::abi_reg_args_size; - return method->max_locals() + method->max_stack() + frame::interpreter_frame_monitor_size() + max_alignment_size + abi_scratch; + return method->max_locals() + method->max_stack() + + frame::interpreter_frame_monitor_size() + max_alignment_size + abi_scratch; } -// Fills a sceletal interpreter frame generated during deoptimizations -// and returns the frame size in slots. +// Returns number of stackElementWords needed for the interpreter frame with the +// given sections. +// This overestimates the stack by one slot in case of alignments. +int AbstractInterpreter::size_activation(int max_stack, + int temps, + int extra_args, + int monitors, + int callee_params, + int callee_locals, + bool is_top_frame) { + // Note: This calculation must exactly parallel the frame setup + // in AbstractInterpreterGenerator::generate_method_entry. + assert(Interpreter::stackElementWords == 1, "sanity"); + const int max_alignment_space = StackAlignmentInBytes / Interpreter::stackElementSize; + const int abi_scratch = is_top_frame ? (frame::abi_reg_args_size / Interpreter::stackElementSize) : + (frame::abi_minframe_size / Interpreter::stackElementSize); + const int size = + max_stack + + (callee_locals - callee_params) + + monitors * frame::interpreter_frame_monitor_size() + + max_alignment_space + + abi_scratch + + frame::ijava_state_size / Interpreter::stackElementSize; + + // Fixed size of an interpreter frame, align to 16-byte. + return (size & -2); +} + +// Fills a sceletal interpreter frame generated during deoptimizations. // // Parameters: // -// interpreter_frame == NULL: -// Only calculate the size of an interpreter activation, no actual layout. -// Note: This calculation must exactly parallel the frame setup -// in TemplateInterpreter::generate_normal_entry. But it does not -// account for the SP alignment, that might further enhance the -// frame size, depending on FP. -// // interpreter_frame != NULL: // set up the method, locals, and monitors. // The frame interpreter_frame, if not NULL, is guaranteed to be the @@ -1359,59 +1380,41 @@ // the arguments off advance the esp by dummy popframe_extra_args slots. // Popping off those will establish the stack layout as it was before the call. // -int AbstractInterpreter::layout_activation(Method* method, - int tempcount, - int popframe_extra_args, - int moncount, - int caller_actual_parameters, - int callee_param_count, - int callee_locals, - frame* caller, - frame* interpreter_frame, - bool is_top_frame, - bool is_bottom_frame) { +void AbstractInterpreter::layout_activation(Method* method, + int tempcount, + int popframe_extra_args, + int moncount, + int caller_actual_parameters, + int callee_param_count, + int callee_locals_count, + frame* caller, + frame* interpreter_frame, + bool is_top_frame, + bool is_bottom_frame) { - const int max_alignment_space = 2; const int abi_scratch = is_top_frame ? (frame::abi_reg_args_size / Interpreter::stackElementSize) : - (frame::abi_minframe_size / Interpreter::stackElementSize) ; - const int conservative_framesize_in_slots = - method->max_stack() + callee_locals - callee_param_count + - (moncount * frame::interpreter_frame_monitor_size()) + max_alignment_space + - abi_scratch + frame::ijava_state_size / Interpreter::stackElementSize; - - assert(!is_top_frame || conservative_framesize_in_slots * 8 > frame::abi_reg_args_size + frame::ijava_state_size, "frame too small"); + (frame::abi_minframe_size / Interpreter::stackElementSize); - if (interpreter_frame == NULL) { - // Since we don't know the exact alignment, we return the conservative size. - return (conservative_framesize_in_slots & -2); - } else { - // Now we know our caller, calc the exact frame layout and size. - intptr_t* locals_base = (caller->is_interpreted_frame()) ? - caller->interpreter_frame_esp() + caller_actual_parameters : - caller->sp() + method->max_locals() - 1 + (frame::abi_minframe_size / Interpreter::stackElementSize) ; + intptr_t* locals_base = (caller->is_interpreted_frame()) ? + caller->interpreter_frame_esp() + caller_actual_parameters : + caller->sp() + method->max_locals() - 1 + (frame::abi_minframe_size / Interpreter::stackElementSize) ; - intptr_t* monitor_base = caller->sp() - frame::ijava_state_size / Interpreter::stackElementSize ; - intptr_t* monitor = monitor_base - (moncount * frame::interpreter_frame_monitor_size()); - intptr_t* esp_base = monitor - 1; - intptr_t* esp = esp_base - tempcount - popframe_extra_args; - intptr_t* sp = (intptr_t *) (((intptr_t) (esp_base- callee_locals + callee_param_count - method->max_stack()- abi_scratch)) & -StackAlignmentInBytes); - intptr_t* sender_sp = caller->sp() + (frame::abi_minframe_size - frame::abi_reg_args_size) / Interpreter::stackElementSize; - intptr_t* top_frame_sp = is_top_frame ? sp : sp + (frame::abi_minframe_size - frame::abi_reg_args_size) / Interpreter::stackElementSize; + intptr_t* monitor_base = caller->sp() - frame::ijava_state_size / Interpreter::stackElementSize ; + intptr_t* monitor = monitor_base - (moncount * frame::interpreter_frame_monitor_size()); + intptr_t* esp_base = monitor - 1; + intptr_t* esp = esp_base - tempcount - popframe_extra_args; + intptr_t* sp = (intptr_t *) (((intptr_t) (esp_base - callee_locals_count + callee_param_count - method->max_stack()- abi_scratch)) & -StackAlignmentInBytes); + intptr_t* sender_sp = caller->sp() + (frame::abi_minframe_size - frame::abi_reg_args_size) / Interpreter::stackElementSize; + intptr_t* top_frame_sp = is_top_frame ? sp : sp + (frame::abi_minframe_size - frame::abi_reg_args_size) / Interpreter::stackElementSize; - interpreter_frame->interpreter_frame_set_method(method); - interpreter_frame->interpreter_frame_set_locals(locals_base); - interpreter_frame->interpreter_frame_set_cpcache(method->constants()->cache()); - interpreter_frame->interpreter_frame_set_esp(esp); - interpreter_frame->interpreter_frame_set_monitor_end((BasicObjectLock *)monitor); - interpreter_frame->interpreter_frame_set_top_frame_sp(top_frame_sp); - if (!is_bottom_frame) { - interpreter_frame->interpreter_frame_set_sender_sp(sender_sp); - } - - int framesize_in_slots = caller->sp() - sp; - assert(!is_top_frame ||framesize_in_slots >= (frame::abi_reg_args_size / Interpreter::stackElementSize) + frame::ijava_state_size / Interpreter::stackElementSize, "frame too small"); - assert(framesize_in_slots <= conservative_framesize_in_slots, "exact frame size must be smaller than the convervative size!"); - return framesize_in_slots; + interpreter_frame->interpreter_frame_set_method(method); + interpreter_frame->interpreter_frame_set_locals(locals_base); + interpreter_frame->interpreter_frame_set_cpcache(method->constants()->cache()); + interpreter_frame->interpreter_frame_set_esp(esp); + interpreter_frame->interpreter_frame_set_monitor_end((BasicObjectLock *)monitor); + interpreter_frame->interpreter_frame_set_top_frame_sp(top_frame_sp); + if (!is_bottom_frame) { + interpreter_frame->interpreter_frame_set_sender_sp(sender_sp); } } diff -r a2221bbf6812 -r bba95ce6b634 src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp --- a/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp Tue Jun 17 16:12:09 2014 -0700 +++ b/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp Tue Jun 17 22:15:24 2014 -0700 @@ -152,7 +152,7 @@ } -int LIR_Assembler::initial_frame_size_in_bytes() { +int LIR_Assembler::initial_frame_size_in_bytes() const { return in_bytes(frame_map()->framesize_in_bytes()); } @@ -182,7 +182,7 @@ int number_of_locks = entry_state->locks_size(); // Create a frame for the compiled activation. - __ build_frame(initial_frame_size_in_bytes()); + __ build_frame(initial_frame_size_in_bytes(), bang_size_in_bytes()); // OSR buffer is // diff -r a2221bbf6812 -r bba95ce6b634 src/cpu/sparc/vm/c1_MacroAssembler_sparc.cpp --- a/src/cpu/sparc/vm/c1_MacroAssembler_sparc.cpp Tue Jun 17 16:12:09 2014 -0700 +++ b/src/cpu/sparc/vm/c1_MacroAssembler_sparc.cpp Tue Jun 17 22:15:24 2014 -0700 @@ -55,9 +55,9 @@ } -void C1_MacroAssembler::build_frame(int frame_size_in_bytes) { - - generate_stack_overflow_check(frame_size_in_bytes); +void C1_MacroAssembler::build_frame(int frame_size_in_bytes, int bang_size_in_bytes) { + assert(bang_size_in_bytes >= frame_size_in_bytes, "stack bang size incorrect"); + generate_stack_overflow_check(bang_size_in_bytes); // Create the frame. save_frame_c1(frame_size_in_bytes); } diff -r a2221bbf6812 -r bba95ce6b634 src/cpu/sparc/vm/cppInterpreter_sparc.cpp --- a/src/cpu/sparc/vm/cppInterpreter_sparc.cpp Tue Jun 17 16:12:09 2014 -0700 +++ b/src/cpu/sparc/vm/cppInterpreter_sparc.cpp Tue Jun 17 22:15:24 2014 -0700 @@ -2101,7 +2101,7 @@ int monitor_size = method->is_synchronized() ? 1*frame::interpreter_frame_monitor_size() : 0; return size_activation_helper(method->max_locals(), method->max_stack(), - monitor_size) + call_stub_size; + monitor_size) + call_stub_size; } void BytecodeInterpreter::layout_interpreterState(interpreterState to_fill, @@ -2185,31 +2185,31 @@ istate->_last_Java_pc = (intptr_t*) last_Java_pc; } - -int AbstractInterpreter::layout_activation(Method* method, - int tempcount, // Number of slots on java expression stack in use - int popframe_extra_args, - int moncount, // Number of active monitors - int caller_actual_parameters, - int callee_param_size, - int callee_locals_size, - frame* caller, - frame* interpreter_frame, - bool is_top_frame, - bool is_bottom_frame) { +static int frame_size_helper(int max_stack, + int moncount, + int callee_param_size, + int callee_locals_size, + bool is_top_frame, + int& monitor_size, + int& full_frame_words) { + int extra_locals_size = callee_locals_size - callee_param_size; + monitor_size = (sizeof(BasicObjectLock) * moncount) / wordSize; + full_frame_words = size_activation_helper(extra_locals_size, max_stack, monitor_size); + int short_frame_words = size_activation_helper(extra_locals_size, max_stack, monitor_size); + int frame_words = is_top_frame ? full_frame_words : short_frame_words; - assert(popframe_extra_args == 0, "NEED TO FIX"); - // NOTE this code must exactly mimic what InterpreterGenerator::generate_compute_interpreter_state() - // does as far as allocating an interpreter frame. - // If interpreter_frame!=NULL, set up the method, locals, and monitors. - // The frame interpreter_frame, if not NULL, is guaranteed to be the right size, - // as determined by a previous call to this method. - // It is also guaranteed to be walkable even though it is in a skeletal state + return frame_words; +} + +int AbstractInterpreter::size_activation(int max_stack, + int tempcount, + int extra_args, + int moncount, + int callee_param_size, + int callee_locals_size, + bool is_top_frame) { + assert(extra_args == 0, "NEED TO FIX"); // NOTE: return size is in words not bytes - // NOTE: tempcount is the current size of the java expression stack. For top most - // frames we will allocate a full sized expression stack and not the curback - // version that non-top frames have. - // Calculate the amount our frame will be adjust by the callee. For top frame // this is zero. @@ -2218,87 +2218,108 @@ // to it. So it ignores last_frame_adjust value. Seems suspicious as far // as getting sender_sp correct. - int extra_locals_size = callee_locals_size - callee_param_size; - int monitor_size = (sizeof(BasicObjectLock) * moncount) / wordSize; - int full_frame_words = size_activation_helper(extra_locals_size, method->max_stack(), monitor_size); - int short_frame_words = size_activation_helper(extra_locals_size, method->max_stack(), monitor_size); - int frame_words = is_top_frame ? full_frame_words : short_frame_words; + int unused_monitor_size = 0; + int unused_full_frame_words = 0; + return frame_size_helper(max_stack, moncount, callee_param_size, callee_locals_size, is_top_frame, + unused_monitor_size, unused_full_frame_words); +} +void AbstractInterpreter::layout_activation(Method* method, + int tempcount, // Number of slots on java expression stack in use + int popframe_extra_args, + int moncount, // Number of active monitors + int caller_actual_parameters, + int callee_param_size, + int callee_locals_size, + frame* caller, + frame* interpreter_frame, + bool is_top_frame, + bool is_bottom_frame) { + assert(popframe_extra_args == 0, "NEED TO FIX"); + // NOTE this code must exactly mimic what InterpreterGenerator::generate_compute_interpreter_state() + // does as far as allocating an interpreter frame. + // Set up the method, locals, and monitors. + // The frame interpreter_frame is guaranteed to be the right size, + // as determined by a previous call to the size_activation() method. + // It is also guaranteed to be walkable even though it is in a skeletal state + // NOTE: tempcount is the current size of the java expression stack. For top most + // frames we will allocate a full sized expression stack and not the curback + // version that non-top frames have. + int monitor_size = 0; + int full_frame_words = 0; + int frame_words = frame_size_helper(method->max_stack(), moncount, callee_param_size, callee_locals_size, + is_top_frame, monitor_size, full_frame_words); /* - if we actually have a frame to layout we must now fill in all the pieces. This means both + We must now fill in all the pieces of the frame. This means both the interpreterState and the registers. */ - if (interpreter_frame != NULL) { - // MUCHO HACK + // MUCHO HACK - intptr_t* frame_bottom = interpreter_frame->sp() - (full_frame_words - frame_words); - // 'interpreter_frame->sp()' is unbiased while 'frame_bottom' must be a biased value in 64bit mode. - assert(((intptr_t)frame_bottom & 0xf) == 0, "SP biased in layout_activation"); - frame_bottom = (intptr_t*)((intptr_t)frame_bottom - STACK_BIAS); + intptr_t* frame_bottom = interpreter_frame->sp() - (full_frame_words - frame_words); + // 'interpreter_frame->sp()' is unbiased while 'frame_bottom' must be a biased value in 64bit mode. + assert(((intptr_t)frame_bottom & 0xf) == 0, "SP biased in layout_activation"); + frame_bottom = (intptr_t*)((intptr_t)frame_bottom - STACK_BIAS); - /* Now fillin the interpreterState object */ + /* Now fillin the interpreterState object */ - interpreterState cur_state = (interpreterState) ((intptr_t)interpreter_frame->fp() - sizeof(BytecodeInterpreter)); + interpreterState cur_state = (interpreterState) ((intptr_t)interpreter_frame->fp() - sizeof(BytecodeInterpreter)); - intptr_t* locals; + intptr_t* locals; + + // Calculate the postion of locals[0]. This is painful because of + // stack alignment (same as ia64). The problem is that we can + // not compute the location of locals from fp(). fp() will account + // for the extra locals but it also accounts for aligning the stack + // and we can't determine if the locals[0] was misaligned but max_locals + // was enough to have the + // calculate postion of locals. fp already accounts for extra locals. + // +2 for the static long no_params() issue. - // Calculate the postion of locals[0]. This is painful because of - // stack alignment (same as ia64). The problem is that we can - // not compute the location of locals from fp(). fp() will account - // for the extra locals but it also accounts for aligning the stack - // and we can't determine if the locals[0] was misaligned but max_locals - // was enough to have the - // calculate postion of locals. fp already accounts for extra locals. - // +2 for the static long no_params() issue. + if (caller->is_interpreted_frame()) { + // locals must agree with the caller because it will be used to set the + // caller's tos when we return. + interpreterState prev = caller->get_interpreterState(); + // stack() is prepushed. + locals = prev->stack() + method->size_of_parameters(); + } else { + // Lay out locals block in the caller adjacent to the register window save area. + // + // Compiled frames do not allocate a varargs area which is why this if + // statement is needed. + // + intptr_t* fp = interpreter_frame->fp(); + int local_words = method->max_locals() * Interpreter::stackElementWords; - if (caller->is_interpreted_frame()) { - // locals must agree with the caller because it will be used to set the - // caller's tos when we return. - interpreterState prev = caller->get_interpreterState(); - // stack() is prepushed. - locals = prev->stack() + method->size_of_parameters(); + if (caller->is_compiled_frame()) { + locals = fp + frame::register_save_words + local_words - 1; } else { - // Lay out locals block in the caller adjacent to the register window save area. - // - // Compiled frames do not allocate a varargs area which is why this if - // statement is needed. - // - intptr_t* fp = interpreter_frame->fp(); - int local_words = method->max_locals() * Interpreter::stackElementWords; + locals = fp + frame::memory_parameter_word_sp_offset + local_words - 1; + } - if (caller->is_compiled_frame()) { - locals = fp + frame::register_save_words + local_words - 1; - } else { - locals = fp + frame::memory_parameter_word_sp_offset + local_words - 1; - } + } + // END MUCHO HACK - } - // END MUCHO HACK - - intptr_t* monitor_base = (intptr_t*) cur_state; - intptr_t* stack_base = monitor_base - monitor_size; - /* +1 because stack is always prepushed */ - intptr_t* stack = stack_base - (tempcount + 1); + intptr_t* monitor_base = (intptr_t*) cur_state; + intptr_t* stack_base = monitor_base - monitor_size; + /* +1 because stack is always prepushed */ + intptr_t* stack = stack_base - (tempcount + 1); - BytecodeInterpreter::layout_interpreterState(cur_state, - caller, - interpreter_frame, - method, - locals, - stack, - stack_base, - monitor_base, - frame_bottom, - is_top_frame); + BytecodeInterpreter::layout_interpreterState(cur_state, + caller, + interpreter_frame, + method, + locals, + stack, + stack_base, + monitor_base, + frame_bottom, + is_top_frame); - BytecodeInterpreter::pd_layout_interpreterState(cur_state, interpreter_return_address, interpreter_frame->fp()); - - } - return frame_words; + BytecodeInterpreter::pd_layout_interpreterState(cur_state, interpreter_return_address, interpreter_frame->fp()); } #endif // CC_INTERP diff -r a2221bbf6812 -r bba95ce6b634 src/cpu/sparc/vm/macroAssembler_sparc.cpp --- a/src/cpu/sparc/vm/macroAssembler_sparc.cpp Tue Jun 17 16:12:09 2014 -0700 +++ b/src/cpu/sparc/vm/macroAssembler_sparc.cpp Tue Jun 17 22:15:24 2014 -0700 @@ -3531,7 +3531,7 @@ // was post-decremented.) Skip this address by starting at i=1, and // touch a few more pages below. N.B. It is important to touch all // the way down to and including i=StackShadowPages. - for (int i = 1; i <= StackShadowPages; i++) { + for (int i = 1; i < StackShadowPages; i++) { set((-i*offset)+STACK_BIAS, Rscratch); st(G0, Rtsp, Rscratch); } diff -r a2221bbf6812 -r bba95ce6b634 src/cpu/sparc/vm/sharedRuntime_sparc.cpp --- a/src/cpu/sparc/vm/sharedRuntime_sparc.cpp Tue Jun 17 16:12:09 2014 -0700 +++ b/src/cpu/sparc/vm/sharedRuntime_sparc.cpp Tue Jun 17 22:15:24 2014 -0700 @@ -3355,13 +3355,16 @@ Register O4array_size = O4; Label loop; - // Before we make new frames, check to see if stack is available. - // Do this after the caller's return address is on top of stack +#ifdef ASSERT + // Compilers generate code that bang the stack by as much as the + // interpreter would need. So this stack banging should never + // trigger a fault. Verify that it does not on non product builds. if (UseStackBanging) { // Get total frame size for interpreted frames __ ld(O2UnrollBlock, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes(), O4); __ bang_stack_size(O4, O3, G3_scratch); } +#endif __ ld(O2UnrollBlock, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes(), O4array_size); __ ld_ptr(O2UnrollBlock, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes(), G3pcs); @@ -3409,9 +3412,11 @@ ResourceMark rm; // setup code generation tools int pad = VerifyThread ? 512 : 0;// Extra slop space for more verify code +#ifdef ASSERT if (UseStackBanging) { pad += StackShadowPages*16 + 32; } +#endif #ifdef _LP64 CodeBuffer buffer("deopt_blob", 2100+pad, 512); #else @@ -3632,9 +3637,11 @@ ResourceMark rm; // setup code generation tools int pad = VerifyThread ? 512 : 0; +#ifdef ASSERT if (UseStackBanging) { pad += StackShadowPages*16 + 32; } +#endif #ifdef _LP64 CodeBuffer buffer("uncommon_trap_blob", 2700+pad, 512); #else diff -r a2221bbf6812 -r bba95ce6b634 src/cpu/sparc/vm/sparc.ad --- a/src/cpu/sparc/vm/sparc.ad Tue Jun 17 16:12:09 2014 -0700 +++ b/src/cpu/sparc/vm/sparc.ad Tue Jun 17 22:15:24 2014 -0700 @@ -1193,15 +1193,16 @@ st->print_cr("Verify_Thread"); st->print("\t"); } - size_t framesize = C->frame_slots() << LogBytesPerInt; + size_t framesize = C->frame_size_in_bytes(); + int bangsize = C->bang_size_in_bytes(); // Calls to C2R adapters often do not accept exceptional returns. // We require that their callers must bang for them. But be careful, because // some VM calls (such as call site linkage) can use several kilobytes of // stack. But the stack safety zone should account for that. // See bugs 4446381, 4468289, 4497237. - if (C->need_stack_bang(framesize)) { - st->print_cr("! stack bang"); st->print("\t"); + if (C->need_stack_bang(bangsize)) { + st->print_cr("! stack bang (%d bytes)", bangsize); st->print("\t"); } if (Assembler::is_simm13(-framesize)) { @@ -1225,17 +1226,18 @@ __ verify_thread(); - size_t framesize = C->frame_slots() << LogBytesPerInt; + size_t framesize = C->frame_size_in_bytes(); assert(framesize >= 16*wordSize, "must have room for reg. save area"); assert(framesize%(2*wordSize) == 0, "must preserve 2*wordSize alignment"); + int bangsize = C->bang_size_in_bytes(); // Calls to C2R adapters often do not accept exceptional returns. // We require that their callers must bang for them. But be careful, because // some VM calls (such as call site linkage) can use several kilobytes of // stack. But the stack safety zone should account for that. // See bugs 4446381, 4468289, 4497237. - if (C->need_stack_bang(framesize)) { - __ generate_stack_overflow_check(framesize); + if (C->need_stack_bang(bangsize)) { + __ generate_stack_overflow_check(bangsize); } if (Assembler::is_simm13(-framesize)) { @@ -2547,7 +2549,7 @@ enc_class call_epilog %{ if( VerifyStackAtCalls ) { MacroAssembler _masm(&cbuf); - int framesize = ra_->C->frame_slots() << LogBytesPerInt; + int framesize = ra_->C->frame_size_in_bytes(); Register temp_reg = G3; __ add(SP, framesize, temp_reg); __ cmp(temp_reg, FP); diff -r a2221bbf6812 -r bba95ce6b634 src/cpu/sparc/vm/templateInterpreter_sparc.cpp --- a/src/cpu/sparc/vm/templateInterpreter_sparc.cpp Tue Jun 17 16:12:09 2014 -0700 +++ b/src/cpu/sparc/vm/templateInterpreter_sparc.cpp Tue Jun 17 22:15:24 2014 -0700 @@ -1564,37 +1564,23 @@ int monitor_size = method->is_synchronized() ? 1*frame::interpreter_frame_monitor_size() : 0; return size_activation_helper(method->max_locals(), method->max_stack(), - monitor_size) + call_stub_size; + monitor_size) + call_stub_size; } -int AbstractInterpreter::layout_activation(Method* method, - int tempcount, - int popframe_extra_args, - int moncount, - int caller_actual_parameters, - int callee_param_count, - int callee_local_count, - frame* caller, - frame* interpreter_frame, - bool is_top_frame, - bool is_bottom_frame) { +int AbstractInterpreter::size_activation(int max_stack, + int temps, + int extra_args, + int monitors, + int callee_params, + int callee_locals, + bool is_top_frame) { // Note: This calculation must exactly parallel the frame setup // in InterpreterGenerator::generate_fixed_frame. - // If f!=NULL, set up the following variables: - // - Lmethod - // - Llocals - // - Lmonitors (to the indicated number of monitors) - // - Lesp (to the indicated number of temps) - // The frame f (if not NULL) on entry is a description of the caller of the frame - // we are about to layout. We are guaranteed that we will be able to fill in a - // new interpreter frame as its callee (i.e. the stack space is allocated and - // the amount was determined by an earlier call to this method with f == NULL). - // On return f (if not NULL) while describe the interpreter frame we just layed out. - int monitor_size = moncount * frame::interpreter_frame_monitor_size(); - int rounded_vm_local_words = round_to(frame::interpreter_frame_vm_local_words,WordsPerLong); + int monitor_size = monitors * frame::interpreter_frame_monitor_size(); assert(monitor_size == round_to(monitor_size, WordsPerLong), "must align"); + // // Note: if you look closely this appears to be doing something much different // than generate_fixed_frame. What is happening is this. On sparc we have to do @@ -1619,146 +1605,171 @@ // there is no sense in messing working code. // - int rounded_cls = round_to((callee_local_count - callee_param_count), WordsPerLong); + int rounded_cls = round_to((callee_locals - callee_params), WordsPerLong); assert(rounded_cls == round_to(rounded_cls, WordsPerLong), "must align"); - int raw_frame_size = size_activation_helper(rounded_cls, method->max_stack(), - monitor_size); + int raw_frame_size = size_activation_helper(rounded_cls, max_stack, monitor_size); - if (interpreter_frame != NULL) { - // The skeleton frame must already look like an interpreter frame - // even if not fully filled out. - assert(interpreter_frame->is_interpreted_frame(), "Must be interpreted frame"); - - intptr_t* fp = interpreter_frame->fp(); + return raw_frame_size; +} - JavaThread* thread = JavaThread::current(); - RegisterMap map(thread, false); - // More verification that skeleton frame is properly walkable - assert(fp == caller->sp(), "fp must match"); - - intptr_t* montop = fp - rounded_vm_local_words; +void AbstractInterpreter::layout_activation(Method* method, + int tempcount, + int popframe_extra_args, + int moncount, + int caller_actual_parameters, + int callee_param_count, + int callee_local_count, + frame* caller, + frame* interpreter_frame, + bool is_top_frame, + bool is_bottom_frame) { + // Set up the following variables: + // - Lmethod + // - Llocals + // - Lmonitors (to the indicated number of monitors) + // - Lesp (to the indicated number of temps) + // The frame caller on entry is a description of the caller of the + // frame we are about to layout. We are guaranteed that we will be + // able to fill in a new interpreter frame as its callee (i.e. the + // stack space is allocated and the amount was determined by an + // earlier call to the size_activation() method). On return caller + // while describe the interpreter frame we just layed out. - // preallocate monitors (cf. __ add_monitor_to_stack) - intptr_t* monitors = montop - monitor_size; + // The skeleton frame must already look like an interpreter frame + // even if not fully filled out. + assert(interpreter_frame->is_interpreted_frame(), "Must be interpreted frame"); + + int rounded_vm_local_words = round_to(frame::interpreter_frame_vm_local_words,WordsPerLong); + int monitor_size = moncount * frame::interpreter_frame_monitor_size(); + assert(monitor_size == round_to(monitor_size, WordsPerLong), "must align"); + + intptr_t* fp = interpreter_frame->fp(); - // preallocate stack space - intptr_t* esp = monitors - 1 - - (tempcount * Interpreter::stackElementWords) - - popframe_extra_args; + JavaThread* thread = JavaThread::current(); + RegisterMap map(thread, false); + // More verification that skeleton frame is properly walkable + assert(fp == caller->sp(), "fp must match"); + + intptr_t* montop = fp - rounded_vm_local_words; + + // preallocate monitors (cf. __ add_monitor_to_stack) + intptr_t* monitors = montop - monitor_size; + + // preallocate stack space + intptr_t* esp = monitors - 1 - + (tempcount * Interpreter::stackElementWords) - + popframe_extra_args; - int local_words = method->max_locals() * Interpreter::stackElementWords; - NEEDS_CLEANUP; - intptr_t* locals; - if (caller->is_interpreted_frame()) { - // Can force the locals area to end up properly overlapping the top of the expression stack. - intptr_t* Lesp_ptr = caller->interpreter_frame_tos_address() - 1; - // Note that this computation means we replace size_of_parameters() values from the caller - // interpreter frame's expression stack with our argument locals - int parm_words = caller_actual_parameters * Interpreter::stackElementWords; - locals = Lesp_ptr + parm_words; - int delta = local_words - parm_words; - int computed_sp_adjustment = (delta > 0) ? round_to(delta, WordsPerLong) : 0; - *interpreter_frame->register_addr(I5_savedSP) = (intptr_t) (fp + computed_sp_adjustment) - STACK_BIAS; - if (!is_bottom_frame) { - // Llast_SP is set below for the current frame to SP (with the - // extra space for the callee's locals). Here we adjust - // Llast_SP for the caller's frame, removing the extra space - // for the current method's locals. - *caller->register_addr(Llast_SP) = *interpreter_frame->register_addr(I5_savedSP); - } else { - assert(*caller->register_addr(Llast_SP) >= *interpreter_frame->register_addr(I5_savedSP), "strange Llast_SP"); - } + int local_words = method->max_locals() * Interpreter::stackElementWords; + NEEDS_CLEANUP; + intptr_t* locals; + if (caller->is_interpreted_frame()) { + // Can force the locals area to end up properly overlapping the top of the expression stack. + intptr_t* Lesp_ptr = caller->interpreter_frame_tos_address() - 1; + // Note that this computation means we replace size_of_parameters() values from the caller + // interpreter frame's expression stack with our argument locals + int parm_words = caller_actual_parameters * Interpreter::stackElementWords; + locals = Lesp_ptr + parm_words; + int delta = local_words - parm_words; + int computed_sp_adjustment = (delta > 0) ? round_to(delta, WordsPerLong) : 0; + *interpreter_frame->register_addr(I5_savedSP) = (intptr_t) (fp + computed_sp_adjustment) - STACK_BIAS; + if (!is_bottom_frame) { + // Llast_SP is set below for the current frame to SP (with the + // extra space for the callee's locals). Here we adjust + // Llast_SP for the caller's frame, removing the extra space + // for the current method's locals. + *caller->register_addr(Llast_SP) = *interpreter_frame->register_addr(I5_savedSP); } else { - assert(caller->is_compiled_frame() || caller->is_entry_frame(), "only possible cases"); - // Don't have Lesp available; lay out locals block in the caller - // adjacent to the register window save area. - // - // Compiled frames do not allocate a varargs area which is why this if - // statement is needed. - // - if (caller->is_compiled_frame()) { - locals = fp + frame::register_save_words + local_words - 1; - } else { - locals = fp + frame::memory_parameter_word_sp_offset + local_words - 1; - } - if (!caller->is_entry_frame()) { - // Caller wants his own SP back - int caller_frame_size = caller->cb()->frame_size(); - *interpreter_frame->register_addr(I5_savedSP) = (intptr_t)(caller->fp() - caller_frame_size) - STACK_BIAS; + assert(*caller->register_addr(Llast_SP) >= *interpreter_frame->register_addr(I5_savedSP), "strange Llast_SP"); + } + } else { + assert(caller->is_compiled_frame() || caller->is_entry_frame(), "only possible cases"); + // Don't have Lesp available; lay out locals block in the caller + // adjacent to the register window save area. + // + // Compiled frames do not allocate a varargs area which is why this if + // statement is needed. + // + if (caller->is_compiled_frame()) { + locals = fp + frame::register_save_words + local_words - 1; + } else { + locals = fp + frame::memory_parameter_word_sp_offset + local_words - 1; + } + if (!caller->is_entry_frame()) { + // Caller wants his own SP back + int caller_frame_size = caller->cb()->frame_size(); + *interpreter_frame->register_addr(I5_savedSP) = (intptr_t)(caller->fp() - caller_frame_size) - STACK_BIAS; + } + } + if (TraceDeoptimization) { + if (caller->is_entry_frame()) { + // make sure I5_savedSP and the entry frames notion of saved SP + // agree. This assertion duplicate a check in entry frame code + // but catches the failure earlier. + assert(*caller->register_addr(Lscratch) == *interpreter_frame->register_addr(I5_savedSP), + "would change callers SP"); + } + if (caller->is_entry_frame()) { + tty->print("entry "); + } + if (caller->is_compiled_frame()) { + tty->print("compiled "); + if (caller->is_deoptimized_frame()) { + tty->print("(deopt) "); } } - if (TraceDeoptimization) { - if (caller->is_entry_frame()) { - // make sure I5_savedSP and the entry frames notion of saved SP - // agree. This assertion duplicate a check in entry frame code - // but catches the failure earlier. - assert(*caller->register_addr(Lscratch) == *interpreter_frame->register_addr(I5_savedSP), - "would change callers SP"); - } - if (caller->is_entry_frame()) { - tty->print("entry "); - } - if (caller->is_compiled_frame()) { - tty->print("compiled "); - if (caller->is_deoptimized_frame()) { - tty->print("(deopt) "); - } - } - if (caller->is_interpreted_frame()) { - tty->print("interpreted "); - } - tty->print_cr("caller fp=0x%x sp=0x%x", caller->fp(), caller->sp()); - tty->print_cr("save area = 0x%x, 0x%x", caller->sp(), caller->sp() + 16); - tty->print_cr("save area = 0x%x, 0x%x", caller->fp(), caller->fp() + 16); - tty->print_cr("interpreter fp=0x%x sp=0x%x", interpreter_frame->fp(), interpreter_frame->sp()); - tty->print_cr("save area = 0x%x, 0x%x", interpreter_frame->sp(), interpreter_frame->sp() + 16); - tty->print_cr("save area = 0x%x, 0x%x", interpreter_frame->fp(), interpreter_frame->fp() + 16); - tty->print_cr("Llocals = 0x%x", locals); - tty->print_cr("Lesp = 0x%x", esp); - tty->print_cr("Lmonitors = 0x%x", monitors); + if (caller->is_interpreted_frame()) { + tty->print("interpreted "); } + tty->print_cr("caller fp=0x%x sp=0x%x", caller->fp(), caller->sp()); + tty->print_cr("save area = 0x%x, 0x%x", caller->sp(), caller->sp() + 16); + tty->print_cr("save area = 0x%x, 0x%x", caller->fp(), caller->fp() + 16); + tty->print_cr("interpreter fp=0x%x sp=0x%x", interpreter_frame->fp(), interpreter_frame->sp()); + tty->print_cr("save area = 0x%x, 0x%x", interpreter_frame->sp(), interpreter_frame->sp() + 16); + tty->print_cr("save area = 0x%x, 0x%x", interpreter_frame->fp(), interpreter_frame->fp() + 16); + tty->print_cr("Llocals = 0x%x", locals); + tty->print_cr("Lesp = 0x%x", esp); + tty->print_cr("Lmonitors = 0x%x", monitors); + } - if (method->max_locals() > 0) { - assert(locals < caller->sp() || locals >= (caller->sp() + 16), "locals in save area"); - assert(locals < caller->fp() || locals > (caller->fp() + 16), "locals in save area"); - assert(locals < interpreter_frame->sp() || locals > (interpreter_frame->sp() + 16), "locals in save area"); - assert(locals < interpreter_frame->fp() || locals >= (interpreter_frame->fp() + 16), "locals in save area"); - } + if (method->max_locals() > 0) { + assert(locals < caller->sp() || locals >= (caller->sp() + 16), "locals in save area"); + assert(locals < caller->fp() || locals > (caller->fp() + 16), "locals in save area"); + assert(locals < interpreter_frame->sp() || locals > (interpreter_frame->sp() + 16), "locals in save area"); + assert(locals < interpreter_frame->fp() || locals >= (interpreter_frame->fp() + 16), "locals in save area"); + } #ifdef _LP64 - assert(*interpreter_frame->register_addr(I5_savedSP) & 1, "must be odd"); + assert(*interpreter_frame->register_addr(I5_savedSP) & 1, "must be odd"); #endif - *interpreter_frame->register_addr(Lmethod) = (intptr_t) method; - *interpreter_frame->register_addr(Llocals) = (intptr_t) locals; - *interpreter_frame->register_addr(Lmonitors) = (intptr_t) monitors; - *interpreter_frame->register_addr(Lesp) = (intptr_t) esp; - // Llast_SP will be same as SP as there is no adapter space - *interpreter_frame->register_addr(Llast_SP) = (intptr_t) interpreter_frame->sp() - STACK_BIAS; - *interpreter_frame->register_addr(LcpoolCache) = (intptr_t) method->constants()->cache(); + *interpreter_frame->register_addr(Lmethod) = (intptr_t) method; + *interpreter_frame->register_addr(Llocals) = (intptr_t) locals; + *interpreter_frame->register_addr(Lmonitors) = (intptr_t) monitors; + *interpreter_frame->register_addr(Lesp) = (intptr_t) esp; + // Llast_SP will be same as SP as there is no adapter space + *interpreter_frame->register_addr(Llast_SP) = (intptr_t) interpreter_frame->sp() - STACK_BIAS; + *interpreter_frame->register_addr(LcpoolCache) = (intptr_t) method->constants()->cache(); #ifdef FAST_DISPATCH - *interpreter_frame->register_addr(IdispatchTables) = (intptr_t) Interpreter::dispatch_table(); + *interpreter_frame->register_addr(IdispatchTables) = (intptr_t) Interpreter::dispatch_table(); #endif #ifdef ASSERT - BasicObjectLock* mp = (BasicObjectLock*)monitors; - - assert(interpreter_frame->interpreter_frame_method() == method, "method matches"); - assert(interpreter_frame->interpreter_frame_local_at(9) == (intptr_t *)((intptr_t)locals - (9 * Interpreter::stackElementSize)), "locals match"); - assert(interpreter_frame->interpreter_frame_monitor_end() == mp, "monitor_end matches"); - assert(((intptr_t *)interpreter_frame->interpreter_frame_monitor_begin()) == ((intptr_t *)mp)+monitor_size, "monitor_begin matches"); - assert(interpreter_frame->interpreter_frame_tos_address()-1 == esp, "esp matches"); + BasicObjectLock* mp = (BasicObjectLock*)monitors; - // check bounds - intptr_t* lo = interpreter_frame->sp() + (frame::memory_parameter_word_sp_offset - 1); - intptr_t* hi = interpreter_frame->fp() - rounded_vm_local_words; - assert(lo < monitors && montop <= hi, "monitors in bounds"); - assert(lo <= esp && esp < monitors, "esp in bounds"); + assert(interpreter_frame->interpreter_frame_method() == method, "method matches"); + assert(interpreter_frame->interpreter_frame_local_at(9) == (intptr_t *)((intptr_t)locals - (9 * Interpreter::stackElementSize)), "locals match"); + assert(interpreter_frame->interpreter_frame_monitor_end() == mp, "monitor_end matches"); + assert(((intptr_t *)interpreter_frame->interpreter_frame_monitor_begin()) == ((intptr_t *)mp)+monitor_size, "monitor_begin matches"); + assert(interpreter_frame->interpreter_frame_tos_address()-1 == esp, "esp matches"); + + // check bounds + intptr_t* lo = interpreter_frame->sp() + (frame::memory_parameter_word_sp_offset - 1); + intptr_t* hi = interpreter_frame->fp() - rounded_vm_local_words; + assert(lo < monitors && montop <= hi, "monitors in bounds"); + assert(lo <= esp && esp < monitors, "esp in bounds"); #endif // ASSERT - } - - return raw_frame_size; } //---------------------------------------------------------------------------------------------------- diff -r a2221bbf6812 -r bba95ce6b634 src/cpu/x86/vm/c1_LIRAssembler_x86.cpp --- a/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp Tue Jun 17 16:12:09 2014 -0700 +++ b/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp Tue Jun 17 22:15:24 2014 -0700 @@ -288,7 +288,7 @@ // build frame ciMethod* m = compilation()->method(); - __ build_frame(initial_frame_size_in_bytes()); + __ build_frame(initial_frame_size_in_bytes(), bang_size_in_bytes()); // OSR buffer is // @@ -376,7 +376,7 @@ } // This specifies the rsp decrement needed to build the frame -int LIR_Assembler::initial_frame_size_in_bytes() { +int LIR_Assembler::initial_frame_size_in_bytes() const { // if rounding, must let FrameMap know! // The frame_map records size in slots (32bit word) diff -r a2221bbf6812 -r bba95ce6b634 src/cpu/x86/vm/c1_MacroAssembler_x86.cpp --- a/src/cpu/x86/vm/c1_MacroAssembler_x86.cpp Tue Jun 17 16:12:09 2014 -0700 +++ b/src/cpu/x86/vm/c1_MacroAssembler_x86.cpp Tue Jun 17 22:15:24 2014 -0700 @@ -349,13 +349,14 @@ } -void C1_MacroAssembler::build_frame(int frame_size_in_bytes) { +void C1_MacroAssembler::build_frame(int frame_size_in_bytes, int bang_size_in_bytes) { + assert(bang_size_in_bytes >= frame_size_in_bytes, "stack bang size incorrect"); // Make sure there is enough stack space for this method's activation. // Note that we do this before doing an enter(). This matches the // ordering of C2's stack overflow check / rsp decrement and allows // the SharedRuntime stack overflow handling to be consistent // between the two compilers. - generate_stack_overflow_check(frame_size_in_bytes); + generate_stack_overflow_check(bang_size_in_bytes); push(rbp); #ifdef TIERED diff -r a2221bbf6812 -r bba95ce6b634 src/cpu/x86/vm/cppInterpreter_x86.cpp --- a/src/cpu/x86/vm/cppInterpreter_x86.cpp Tue Jun 17 16:12:09 2014 -0700 +++ b/src/cpu/x86/vm/cppInterpreter_x86.cpp Tue Jun 17 22:15:24 2014 -0700 @@ -2336,29 +2336,42 @@ "Stack top out of range"); } -int AbstractInterpreter::layout_activation(Method* method, - int tempcount, // - int popframe_extra_args, - int moncount, - int caller_actual_parameters, - int callee_param_count, - int callee_locals, - frame* caller, - frame* interpreter_frame, - bool is_top_frame, - bool is_bottom_frame) { - - assert(popframe_extra_args == 0, "FIX ME"); - // NOTE this code must exactly mimic what InterpreterGenerator::generate_compute_interpreter_state() - // does as far as allocating an interpreter frame. - // If interpreter_frame!=NULL, set up the method, locals, and monitors. - // The frame interpreter_frame, if not NULL, is guaranteed to be the right size, - // as determined by a previous call to this method. - // It is also guaranteed to be walkable even though it is in a skeletal state + +static int frame_size_helper(int max_stack, + int tempcount, + int moncount, + int callee_param_count, + int callee_locals, + bool is_top_frame, + int& monitor_size, + int& full_frame_size) { + int extra_locals_size = (callee_locals - callee_param_count) * BytesPerWord; + monitor_size = sizeof(BasicObjectLock) * moncount; + + // First calculate the frame size without any java expression stack + int short_frame_size = size_activation_helper(extra_locals_size, + monitor_size); + + // Now with full size expression stack + full_frame_size = short_frame_size + max_stack * BytesPerWord; + + // and now with only live portion of the expression stack + short_frame_size = short_frame_size + tempcount * BytesPerWord; + + // the size the activation is right now. Only top frame is full size + int frame_size = (is_top_frame ? full_frame_size : short_frame_size); + return frame_size; +} + +int AbstractInterpreter::size_activation(int max_stack, + int tempcount, + int extra_args, + int moncount, + int callee_param_count, + int callee_locals, + bool is_top_frame) { + assert(extra_args == 0, "FIX ME"); // NOTE: return size is in words not bytes - // NOTE: tempcount is the current size of the java expression stack. For top most - // frames we will allocate a full sized expression stack and not the curback - // version that non-top frames have. // Calculate the amount our frame will be adjust by the callee. For top frame // this is zero. @@ -2368,87 +2381,102 @@ // to it. So it ignores last_frame_adjust value. Seems suspicious as far // as getting sender_sp correct. - int extra_locals_size = (callee_locals - callee_param_count) * BytesPerWord; - int monitor_size = sizeof(BasicObjectLock) * moncount; - - // First calculate the frame size without any java expression stack - int short_frame_size = size_activation_helper(extra_locals_size, - monitor_size); - - // Now with full size expression stack - int full_frame_size = short_frame_size + method->max_stack() * BytesPerWord; - - // and now with only live portion of the expression stack - short_frame_size = short_frame_size + tempcount * BytesPerWord; - - // the size the activation is right now. Only top frame is full size - int frame_size = (is_top_frame ? full_frame_size : short_frame_size); - - if (interpreter_frame != NULL) { + int unused_monitor_size = 0; + int unused_full_frame_size = 0; + return frame_size_helper(max_stack, tempcount, moncount, callee_param_count, callee_locals, + is_top_frame, unused_monitor_size, unused_full_frame_size)/BytesPerWord; +} + +void AbstractInterpreter::layout_activation(Method* method, + int tempcount, // + int popframe_extra_args, + int moncount, + int caller_actual_parameters, + int callee_param_count, + int callee_locals, + frame* caller, + frame* interpreter_frame, + bool is_top_frame, + bool is_bottom_frame) { + + assert(popframe_extra_args == 0, "FIX ME"); + // NOTE this code must exactly mimic what InterpreterGenerator::generate_compute_interpreter_state() + // does as far as allocating an interpreter frame. + // Set up the method, locals, and monitors. + // The frame interpreter_frame is guaranteed to be the right size, + // as determined by a previous call to the size_activation() method. + // It is also guaranteed to be walkable even though it is in a skeletal state + // NOTE: tempcount is the current size of the java expression stack. For top most + // frames we will allocate a full sized expression stack and not the curback + // version that non-top frames have. + + int monitor_size = 0; + int full_frame_size = 0; + int frame_size = frame_size_helper(method->max_stack(), tempcount, moncount, callee_param_count, callee_locals, + is_top_frame, monitor_size, full_frame_size); + #ifdef ASSERT - assert(caller->unextended_sp() == interpreter_frame->interpreter_frame_sender_sp(), "Frame not properly walkable"); + assert(caller->unextended_sp() == interpreter_frame->interpreter_frame_sender_sp(), "Frame not properly walkable"); #endif - // MUCHO HACK - - intptr_t* frame_bottom = (intptr_t*) ((intptr_t)interpreter_frame->sp() - (full_frame_size - frame_size)); - - /* Now fillin the interpreterState object */ - - // The state object is the first thing on the frame and easily located - - interpreterState cur_state = (interpreterState) ((intptr_t)interpreter_frame->fp() - sizeof(BytecodeInterpreter)); - - - // Find the locals pointer. This is rather simple on x86 because there is no - // confusing rounding at the callee to account for. We can trivially locate - // our locals based on the current fp(). - // Note: the + 2 is for handling the "static long no_params() method" issue. - // (too bad I don't really remember that issue well...) - - intptr_t* locals; - // If the caller is interpreted we need to make sure that locals points to the first - // argument that the caller passed and not in an area where the stack might have been extended. - // because the stack to stack to converter needs a proper locals value in order to remove the - // arguments from the caller and place the result in the proper location. Hmm maybe it'd be - // simpler if we simply stored the result in the BytecodeInterpreter object and let the c++ code - // adjust the stack?? HMMM QQQ - // - if (caller->is_interpreted_frame()) { - // locals must agree with the caller because it will be used to set the - // caller's tos when we return. - interpreterState prev = caller->get_interpreterState(); - // stack() is prepushed. - locals = prev->stack() + method->size_of_parameters(); - // locals = caller->unextended_sp() + (method->size_of_parameters() - 1); - if (locals != interpreter_frame->fp() + frame::sender_sp_offset + (method->max_locals() - 1) + 2) { - // os::breakpoint(); - } - } else { - // this is where a c2i would have placed locals (except for the +2) - locals = interpreter_frame->fp() + frame::sender_sp_offset + (method->max_locals() - 1) + 2; + // MUCHO HACK + + intptr_t* frame_bottom = (intptr_t*) ((intptr_t)interpreter_frame->sp() - (full_frame_size - frame_size)); + + /* Now fillin the interpreterState object */ + + // The state object is the first thing on the frame and easily located + + interpreterState cur_state = (interpreterState) ((intptr_t)interpreter_frame->fp() - sizeof(BytecodeInterpreter)); + + + // Find the locals pointer. This is rather simple on x86 because there is no + // confusing rounding at the callee to account for. We can trivially locate + // our locals based on the current fp(). + // Note: the + 2 is for handling the "static long no_params() method" issue. + // (too bad I don't really remember that issue well...) + + intptr_t* locals; + // If the caller is interpreted we need to make sure that locals points to the first + // argument that the caller passed and not in an area where the stack might have been extended. + // because the stack to stack to converter needs a proper locals value in order to remove the + // arguments from the caller and place the result in the proper location. Hmm maybe it'd be + // simpler if we simply stored the result in the BytecodeInterpreter object and let the c++ code + // adjust the stack?? HMMM QQQ + // + if (caller->is_interpreted_frame()) { + // locals must agree with the caller because it will be used to set the + // caller's tos when we return. + interpreterState prev = caller->get_interpreterState(); + // stack() is prepushed. + locals = prev->stack() + method->size_of_parameters(); + // locals = caller->unextended_sp() + (method->size_of_parameters() - 1); + if (locals != interpreter_frame->fp() + frame::sender_sp_offset + (method->max_locals() - 1) + 2) { + // os::breakpoint(); } - - intptr_t* monitor_base = (intptr_t*) cur_state; - intptr_t* stack_base = (intptr_t*) ((intptr_t) monitor_base - monitor_size); - /* +1 because stack is always prepushed */ - intptr_t* stack = (intptr_t*) ((intptr_t) stack_base - (tempcount + 1) * BytesPerWord); - - - BytecodeInterpreter::layout_interpreterState(cur_state, - caller, - interpreter_frame, - method, - locals, - stack, - stack_base, - monitor_base, - frame_bottom, - is_top_frame); - - // BytecodeInterpreter::pd_layout_interpreterState(cur_state, interpreter_return_address, interpreter_frame->fp()); + } else { + // this is where a c2i would have placed locals (except for the +2) + locals = interpreter_frame->fp() + frame::sender_sp_offset + (method->max_locals() - 1) + 2; } - return frame_size/BytesPerWord; + + intptr_t* monitor_base = (intptr_t*) cur_state; + intptr_t* stack_base = (intptr_t*) ((intptr_t) monitor_base - monitor_size); + /* +1 because stack is always prepushed */ + intptr_t* stack = (intptr_t*) ((intptr_t) stack_base - (tempcount + 1) * BytesPerWord); + + + BytecodeInterpreter::layout_interpreterState(cur_state, + caller, + interpreter_frame, + method, + locals, + stack, + stack_base, + monitor_base, + frame_bottom, + is_top_frame); + + // BytecodeInterpreter::pd_layout_interpreterState(cur_state, interpreter_return_address, interpreter_frame->fp()); } #endif // CC_INTERP (all) diff -r a2221bbf6812 -r bba95ce6b634 src/cpu/x86/vm/macroAssembler_x86.cpp --- a/src/cpu/x86/vm/macroAssembler_x86.cpp Tue Jun 17 16:12:09 2014 -0700 +++ b/src/cpu/x86/vm/macroAssembler_x86.cpp Tue Jun 17 22:15:24 2014 -0700 @@ -1052,7 +1052,7 @@ // was post-decremented.) Skip this address by starting at i=1, and // touch a few more pages below. N.B. It is important to touch all // the way down to and including i=StackShadowPages. - for (int i = 1; i <= StackShadowPages; i++) { + for (int i = 1; i < StackShadowPages; i++) { // this could be any sized move but this is can be a debugging crumb // so the bigger the better. movptr(Address(tmp, (-i*os::vm_page_size())), size ); @@ -6096,7 +6096,7 @@ // C2 compiled method's prolog code. -void MacroAssembler::verified_entry(int framesize, bool stack_bang, bool fp_mode_24b) { +void MacroAssembler::verified_entry(int framesize, int stack_bang_size, bool fp_mode_24b) { // WARNING: Initial instruction MUST be 5 bytes or longer so that // NativeJump::patch_verified_entry will be able to patch out the entry @@ -6104,18 +6104,20 @@ // the frame allocation can be either 3 or 6 bytes. So if we don't do // stack bang then we must use the 6 byte frame allocation even if // we have no frame. :-( + assert(stack_bang_size >= framesize || stack_bang_size <= 0, "stack bang size incorrect"); assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); // Remove word for return addr framesize -= wordSize; + stack_bang_size -= wordSize; // Calls to C2R adapters often do not accept exceptional returns. // We require that their callers must bang for them. But be careful, because // some VM calls (such as call site linkage) can use several kilobytes of // stack. But the stack safety zone should account for that. // See bugs 4446381, 4468289, 4497237. - if (stack_bang) { - generate_stack_overflow_check(framesize); + if (stack_bang_size > 0) { + generate_stack_overflow_check(stack_bang_size); // We always push rbp, so that on return to interpreter rbp, will be // restored correctly and we can correct the stack. diff -r a2221bbf6812 -r bba95ce6b634 src/cpu/x86/vm/macroAssembler_x86.hpp --- a/src/cpu/x86/vm/macroAssembler_x86.hpp Tue Jun 17 16:12:09 2014 -0700 +++ b/src/cpu/x86/vm/macroAssembler_x86.hpp Tue Jun 17 22:15:24 2014 -0700 @@ -1170,7 +1170,7 @@ void movl2ptr(Register dst, Register src) { LP64_ONLY(movslq(dst, src)) NOT_LP64(if (dst != src) movl(dst, src)); } // C2 compiled method's prolog code. - void verified_entry(int framesize, bool stack_bang, bool fp_mode_24b); + void verified_entry(int framesize, int stack_bang_size, bool fp_mode_24b); // clear memory of size 'cnt' qwords, starting at 'base'. void clear_mem(Register base, Register cnt, Register rtmp); diff -r a2221bbf6812 -r bba95ce6b634 src/cpu/x86/vm/sharedRuntime_x86_32.cpp --- a/src/cpu/x86/vm/sharedRuntime_x86_32.cpp Tue Jun 17 16:12:09 2014 -0700 +++ b/src/cpu/x86/vm/sharedRuntime_x86_32.cpp Tue Jun 17 22:15:24 2014 -0700 @@ -3014,11 +3014,15 @@ // restore rbp before stack bang because if stack overflow is thrown it needs to be pushed (and preserved) __ movptr(rbp, Address(rdi, Deoptimization::UnrollBlock::initial_info_offset_in_bytes())); - // Stack bang to make sure there's enough room for these interpreter frames. +#ifdef ASSERT + // Compilers generate code that bang the stack by as much as the + // interpreter would need. So this stack banging should never + // trigger a fault. Verify that it does not on non product builds. if (UseStackBanging) { __ movl(rbx, Address(rdi ,Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes())); __ bang_stack_size(rbx, rcx); } +#endif // Load array of frame pcs into ECX __ movptr(rcx,Address(rdi,Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes())); @@ -3240,12 +3244,15 @@ // restore rbp before stack bang because if stack overflow is thrown it needs to be pushed (and preserved) __ movptr(rbp, Address(rdi, Deoptimization::UnrollBlock::initial_info_offset_in_bytes())); - // Stack bang to make sure there's enough room for these interpreter frames. +#ifdef ASSERT + // Compilers generate code that bang the stack by as much as the + // interpreter would need. So this stack banging should never + // trigger a fault. Verify that it does not on non product builds. if (UseStackBanging) { __ movl(rbx, Address(rdi ,Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes())); __ bang_stack_size(rbx, rcx); } - +#endif // Load array of frame pcs into ECX __ movl(rcx,Address(rdi,Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes())); diff -r a2221bbf6812 -r bba95ce6b634 src/cpu/x86/vm/sharedRuntime_x86_64.cpp --- a/src/cpu/x86/vm/sharedRuntime_x86_64.cpp Tue Jun 17 16:12:09 2014 -0700 +++ b/src/cpu/x86/vm/sharedRuntime_x86_64.cpp Tue Jun 17 22:15:24 2014 -0700 @@ -3484,11 +3484,15 @@ // restore rbp before stack bang because if stack overflow is thrown it needs to be pushed (and preserved) __ movptr(rbp, Address(rdi, Deoptimization::UnrollBlock::initial_info_offset_in_bytes())); - // Stack bang to make sure there's enough room for these interpreter frames. +#ifdef ASSERT + // Compilers generate code that bang the stack by as much as the + // interpreter would need. So this stack banging should never + // trigger a fault. Verify that it does not on non product builds. if (UseStackBanging) { __ movl(rbx, Address(rdi, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes())); __ bang_stack_size(rbx, rcx); } +#endif // Load address of array of frame pcs into rcx __ movptr(rcx, Address(rdi, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes())); @@ -3682,11 +3686,15 @@ // restore rbp before stack bang because if stack overflow is thrown it needs to be pushed (and preserved) __ movptr(rbp, Address(rdi, Deoptimization::UnrollBlock::initial_info_offset_in_bytes())); - // Stack bang to make sure there's enough room for these interpreter frames. +#ifdef ASSERT + // Compilers generate code that bang the stack by as much as the + // interpreter would need. So this stack banging should never + // trigger a fault. Verify that it does not on non product builds. if (UseStackBanging) { __ movl(rbx, Address(rdi ,Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes())); __ bang_stack_size(rbx, rcx); } +#endif // Load address of array of frame pcs into rcx (address*) __ movptr(rcx, Address(rdi, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes())); diff -r a2221bbf6812 -r bba95ce6b634 src/cpu/x86/vm/templateInterpreter_x86.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/x86/vm/templateInterpreter_x86.cpp Tue Jun 17 22:15:24 2014 -0700 @@ -0,0 +1,124 @@ +/* + * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "ci/ciMethod.hpp" +#include "interpreter/interpreter.hpp" +#include "runtime/frame.inline.hpp" + +#ifndef CC_INTERP + +// asm based interpreter deoptimization helpers +int AbstractInterpreter::size_activation(int max_stack, + int temps, + int extra_args, + int monitors, + int callee_params, + int callee_locals, + bool is_top_frame) { + // Note: This calculation must exactly parallel the frame setup + // in AbstractInterpreterGenerator::generate_method_entry. + + // fixed size of an interpreter frame: + int overhead = frame::sender_sp_offset - + frame::interpreter_frame_initial_sp_offset; + // Our locals were accounted for by the caller (or last_frame_adjust + // on the transistion) Since the callee parameters already account + // for the callee's params we only need to account for the extra + // locals. + int size = overhead + + (callee_locals - callee_params)*Interpreter::stackElementWords + + monitors * frame::interpreter_frame_monitor_size() + + temps* Interpreter::stackElementWords + extra_args; + + return size; +} + +void AbstractInterpreter::layout_activation(Method* method, + int tempcount, + int popframe_extra_args, + int moncount, + int caller_actual_parameters, + int callee_param_count, + int callee_locals, + frame* caller, + frame* interpreter_frame, + bool is_top_frame, + bool is_bottom_frame) { + // The frame interpreter_frame is guaranteed to be the right size, + // as determined by a previous call to the size_activation() method. + // It is also guaranteed to be walkable even though it is in a + // skeletal state + + int max_locals = method->max_locals() * Interpreter::stackElementWords; + int extra_locals = (method->max_locals() - method->size_of_parameters()) * + Interpreter::stackElementWords; + +#ifdef ASSERT + if (!EnableInvokeDynamic) { + // @@@ FIXME: Should we correct interpreter_frame_sender_sp in the calling sequences? + // Probably, since deoptimization doesn't work yet. + assert(caller->unextended_sp() == interpreter_frame->interpreter_frame_sender_sp(), "Frame not properly walkable"); + } + assert(caller->sp() == interpreter_frame->sender_sp(), "Frame not properly walkable(2)"); +#endif + + interpreter_frame->interpreter_frame_set_method(method); + // NOTE the difference in using sender_sp and + // interpreter_frame_sender_sp interpreter_frame_sender_sp is + // the original sp of the caller (the unextended_sp) and + // sender_sp is fp+8/16 (32bit/64bit) XXX + intptr_t* locals = interpreter_frame->sender_sp() + max_locals - 1; + +#ifdef ASSERT + if (caller->is_interpreted_frame()) { + assert(locals < caller->fp() + frame::interpreter_frame_initial_sp_offset, "bad placement"); + } +#endif + + interpreter_frame->interpreter_frame_set_locals(locals); + BasicObjectLock* montop = interpreter_frame->interpreter_frame_monitor_begin(); + BasicObjectLock* monbot = montop - moncount; + interpreter_frame->interpreter_frame_set_monitor_end(monbot); + + // Set last_sp + intptr_t* esp = (intptr_t*) monbot - + tempcount*Interpreter::stackElementWords - + popframe_extra_args; + interpreter_frame->interpreter_frame_set_last_sp(esp); + + // All frames but the initial (oldest) interpreter frame we fill in have + // a value for sender_sp that allows walking the stack but isn't + // truly correct. Correct the value here. + if (extra_locals != 0 && + interpreter_frame->sender_sp() == + interpreter_frame->interpreter_frame_sender_sp()) { + interpreter_frame->set_interpreter_frame_sender_sp(caller->sp() + + extra_locals); + } + *interpreter_frame->interpreter_frame_cache_addr() = + method->constants()->cache(); +} + +#endif // CC_INTERP diff -r a2221bbf6812 -r bba95ce6b634 src/cpu/x86/vm/templateInterpreter_x86_32.cpp --- a/src/cpu/x86/vm/templateInterpreter_x86_32.cpp Tue Jun 17 16:12:09 2014 -0700 +++ b/src/cpu/x86/vm/templateInterpreter_x86_32.cpp Tue Jun 17 22:15:24 2014 -0700 @@ -1686,91 +1686,6 @@ return overhead_size + method_stack + stub_code; } -// asm based interpreter deoptimization helpers - -int AbstractInterpreter::layout_activation(Method* method, - int tempcount, - int popframe_extra_args, - int moncount, - int caller_actual_parameters, - int callee_param_count, - int callee_locals, - frame* caller, - frame* interpreter_frame, - bool is_top_frame, - bool is_bottom_frame) { - // Note: This calculation must exactly parallel the frame setup - // in AbstractInterpreterGenerator::generate_method_entry. - // If interpreter_frame!=NULL, set up the method, locals, and monitors. - // The frame interpreter_frame, if not NULL, is guaranteed to be the right size, - // as determined by a previous call to this method. - // It is also guaranteed to be walkable even though it is in a skeletal state - // NOTE: return size is in words not bytes - - // fixed size of an interpreter frame: - int max_locals = method->max_locals() * Interpreter::stackElementWords; - int extra_locals = (method->max_locals() - method->size_of_parameters()) * - Interpreter::stackElementWords; - - int overhead = frame::sender_sp_offset - frame::interpreter_frame_initial_sp_offset; - - // Our locals were accounted for by the caller (or last_frame_adjust on the transistion) - // Since the callee parameters already account for the callee's params we only need to account for - // the extra locals. - - - int size = overhead + - ((callee_locals - callee_param_count)*Interpreter::stackElementWords) + - (moncount*frame::interpreter_frame_monitor_size()) + - tempcount*Interpreter::stackElementWords + popframe_extra_args; - - if (interpreter_frame != NULL) { -#ifdef ASSERT - if (!EnableInvokeDynamic) - // @@@ FIXME: Should we correct interpreter_frame_sender_sp in the calling sequences? - // Probably, since deoptimization doesn't work yet. - assert(caller->unextended_sp() == interpreter_frame->interpreter_frame_sender_sp(), "Frame not properly walkable"); - assert(caller->sp() == interpreter_frame->sender_sp(), "Frame not properly walkable(2)"); -#endif - - interpreter_frame->interpreter_frame_set_method(method); - // NOTE the difference in using sender_sp and interpreter_frame_sender_sp - // interpreter_frame_sender_sp is the original sp of the caller (the unextended_sp) - // and sender_sp is fp+8 - intptr_t* locals = interpreter_frame->sender_sp() + max_locals - 1; - -#ifdef ASSERT - if (caller->is_interpreted_frame()) { - assert(locals < caller->fp() + frame::interpreter_frame_initial_sp_offset, "bad placement"); - } -#endif - - interpreter_frame->interpreter_frame_set_locals(locals); - BasicObjectLock* montop = interpreter_frame->interpreter_frame_monitor_begin(); - BasicObjectLock* monbot = montop - moncount; - interpreter_frame->interpreter_frame_set_monitor_end(monbot); - - // Set last_sp - intptr_t* rsp = (intptr_t*) monbot - - tempcount*Interpreter::stackElementWords - - popframe_extra_args; - interpreter_frame->interpreter_frame_set_last_sp(rsp); - - // All frames but the initial (oldest) interpreter frame we fill in have a - // value for sender_sp that allows walking the stack but isn't - // truly correct. Correct the value here. - - if (extra_locals != 0 && - interpreter_frame->sender_sp() == interpreter_frame->interpreter_frame_sender_sp() ) { - interpreter_frame->set_interpreter_frame_sender_sp(caller->sp() + extra_locals); - } - *interpreter_frame->interpreter_frame_cache_addr() = - method->constants()->cache(); - } - return size; -} - - //------------------------------------------------------------------------------------------------------------------------ // Exceptions diff -r a2221bbf6812 -r bba95ce6b634 src/cpu/x86/vm/templateInterpreter_x86_64.cpp --- a/src/cpu/x86/vm/templateInterpreter_x86_64.cpp Tue Jun 17 16:12:09 2014 -0700 +++ b/src/cpu/x86/vm/templateInterpreter_x86_64.cpp Tue Jun 17 22:15:24 2014 -0700 @@ -1695,87 +1695,6 @@ return (overhead_size + method_stack + stub_code); } -int AbstractInterpreter::layout_activation(Method* method, - int tempcount, - int popframe_extra_args, - int moncount, - int caller_actual_parameters, - int callee_param_count, - int callee_locals, - frame* caller, - frame* interpreter_frame, - bool is_top_frame, - bool is_bottom_frame) { - // Note: This calculation must exactly parallel the frame setup - // in AbstractInterpreterGenerator::generate_method_entry. - // If interpreter_frame!=NULL, set up the method, locals, and monitors. - // The frame interpreter_frame, if not NULL, is guaranteed to be the - // right size, as determined by a previous call to this method. - // It is also guaranteed to be walkable even though it is in a skeletal state - - // fixed size of an interpreter frame: - int max_locals = method->max_locals() * Interpreter::stackElementWords; - int extra_locals = (method->max_locals() - method->size_of_parameters()) * - Interpreter::stackElementWords; - - int overhead = frame::sender_sp_offset - - frame::interpreter_frame_initial_sp_offset; - // Our locals were accounted for by the caller (or last_frame_adjust - // on the transistion) Since the callee parameters already account - // for the callee's params we only need to account for the extra - // locals. - int size = overhead + - (callee_locals - callee_param_count)*Interpreter::stackElementWords + - moncount * frame::interpreter_frame_monitor_size() + - tempcount* Interpreter::stackElementWords + popframe_extra_args; - if (interpreter_frame != NULL) { -#ifdef ASSERT - if (!EnableInvokeDynamic) - // @@@ FIXME: Should we correct interpreter_frame_sender_sp in the calling sequences? - // Probably, since deoptimization doesn't work yet. - assert(caller->unextended_sp() == interpreter_frame->interpreter_frame_sender_sp(), "Frame not properly walkable"); - assert(caller->sp() == interpreter_frame->sender_sp(), "Frame not properly walkable(2)"); -#endif - - interpreter_frame->interpreter_frame_set_method(method); - // NOTE the difference in using sender_sp and - // interpreter_frame_sender_sp interpreter_frame_sender_sp is - // the original sp of the caller (the unextended_sp) and - // sender_sp is fp+16 XXX - intptr_t* locals = interpreter_frame->sender_sp() + max_locals - 1; - -#ifdef ASSERT - if (caller->is_interpreted_frame()) { - assert(locals < caller->fp() + frame::interpreter_frame_initial_sp_offset, "bad placement"); - } -#endif - - interpreter_frame->interpreter_frame_set_locals(locals); - BasicObjectLock* montop = interpreter_frame->interpreter_frame_monitor_begin(); - BasicObjectLock* monbot = montop - moncount; - interpreter_frame->interpreter_frame_set_monitor_end(monbot); - - // Set last_sp - intptr_t* esp = (intptr_t*) monbot - - tempcount*Interpreter::stackElementWords - - popframe_extra_args; - interpreter_frame->interpreter_frame_set_last_sp(esp); - - // All frames but the initial (oldest) interpreter frame we fill in have - // a value for sender_sp that allows walking the stack but isn't - // truly correct. Correct the value here. - if (extra_locals != 0 && - interpreter_frame->sender_sp() == - interpreter_frame->interpreter_frame_sender_sp()) { - interpreter_frame->set_interpreter_frame_sender_sp(caller->sp() + - extra_locals); - } - *interpreter_frame->interpreter_frame_cache_addr() = - method->constants()->cache(); - } - return size; -} - //----------------------------------------------------------------------------- // Exceptions diff -r a2221bbf6812 -r bba95ce6b634 src/cpu/x86/vm/x86_32.ad --- a/src/cpu/x86/vm/x86_32.ad Tue Jun 17 16:12:09 2014 -0700 +++ b/src/cpu/x86/vm/x86_32.ad Tue Jun 17 22:15:24 2014 -0700 @@ -512,14 +512,15 @@ void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const { Compile* C = ra_->C; - int framesize = C->frame_slots() << LogBytesPerInt; + int framesize = C->frame_size_in_bytes(); + int bangsize = C->bang_size_in_bytes(); assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); // Remove wordSize for return addr which is already pushed. framesize -= wordSize; - if (C->need_stack_bang(framesize)) { + if (C->need_stack_bang(bangsize)) { framesize -= wordSize; - st->print("# stack bang"); + st->print("# stack bang (%d bytes)", bangsize); st->print("\n\t"); st->print("PUSH EBP\t# Save EBP"); if (framesize) { @@ -563,9 +564,10 @@ Compile* C = ra_->C; MacroAssembler _masm(&cbuf); - int framesize = C->frame_slots() << LogBytesPerInt; - - __ verified_entry(framesize, C->need_stack_bang(framesize), C->in_24_bit_fp_mode()); + int framesize = C->frame_size_in_bytes(); + int bangsize = C->bang_size_in_bytes(); + + __ verified_entry(framesize, C->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode()); C->set_frame_complete(cbuf.insts_size()); @@ -589,7 +591,7 @@ #ifndef PRODUCT void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { Compile *C = ra_->C; - int framesize = C->frame_slots() << LogBytesPerInt; + int framesize = C->frame_size_in_bytes(); assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); // Remove two words for return addr and rbp, framesize -= 2*wordSize; @@ -629,7 +631,7 @@ masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); } - int framesize = C->frame_slots() << LogBytesPerInt; + int framesize = C->frame_size_in_bytes(); assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); // Remove two words for return addr and rbp, framesize -= 2*wordSize; @@ -663,7 +665,7 @@ if (C->max_vector_size() > 16) size += 3; // vzeroupper if (do_polling() && C->is_method_compilation()) size += 6; - int framesize = C->frame_slots() << LogBytesPerInt; + int framesize = C->frame_size_in_bytes(); assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); // Remove two words for return addr and rbp, framesize -= 2*wordSize; diff -r a2221bbf6812 -r bba95ce6b634 src/cpu/x86/vm/x86_64.ad --- a/src/cpu/x86/vm/x86_64.ad Tue Jun 17 16:12:09 2014 -0700 +++ b/src/cpu/x86/vm/x86_64.ad Tue Jun 17 22:15:24 2014 -0700 @@ -713,14 +713,15 @@ void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const { Compile* C = ra_->C; - int framesize = C->frame_slots() << LogBytesPerInt; + int framesize = C->frame_size_in_bytes(); + int bangsize = C->bang_size_in_bytes(); assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); // Remove wordSize for return addr which is already pushed. framesize -= wordSize; - if (C->need_stack_bang(framesize)) { + if (C->need_stack_bang(bangsize)) { framesize -= wordSize; - st->print("# stack bang"); + st->print("# stack bang (%d bytes)", bangsize); st->print("\n\t"); st->print("pushq rbp\t# Save rbp"); if (framesize) { @@ -751,9 +752,10 @@ Compile* C = ra_->C; MacroAssembler _masm(&cbuf); - int framesize = C->frame_slots() << LogBytesPerInt; - - __ verified_entry(framesize, C->need_stack_bang(framesize), false); + int framesize = C->frame_size_in_bytes(); + int bangsize = C->bang_size_in_bytes(); + + __ verified_entry(framesize, C->need_stack_bang(bangsize)?bangsize:0, false); C->set_frame_complete(cbuf.insts_size()); @@ -786,7 +788,7 @@ st->cr(); st->print("\t"); } - int framesize = C->frame_slots() << LogBytesPerInt; + int framesize = C->frame_size_in_bytes(); assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); // Remove word for return adr already pushed // and RBP @@ -822,7 +824,7 @@ __ vzeroupper(); } - int framesize = C->frame_slots() << LogBytesPerInt; + int framesize = C->frame_size_in_bytes(); assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); // Remove word for return adr already pushed // and RBP diff -r a2221bbf6812 -r bba95ce6b634 src/cpu/zero/vm/cppInterpreter_zero.cpp --- a/src/cpu/zero/vm/cppInterpreter_zero.cpp Tue Jun 17 16:12:09 2014 -0700 +++ b/src/cpu/zero/vm/cppInterpreter_zero.cpp Tue Jun 17 22:15:24 2014 -0700 @@ -916,17 +916,32 @@ return (InterpreterFrame *) fp; } -int AbstractInterpreter::layout_activation(Method* method, - int tempcount, - int popframe_extra_args, - int moncount, - int caller_actual_parameters, - int callee_param_count, - int callee_locals, - frame* caller, - frame* interpreter_frame, - bool is_top_frame, - bool is_bottom_frame) { +int AbstractInterpreter::size_activation(int max_stack, + int tempcount, + int extra_args, + int moncount, + int callee_param_count, + int callee_locals, + bool is_top_frame) { + int header_words = InterpreterFrame::header_words; + int monitor_words = moncount * frame::interpreter_frame_monitor_size(); + int stack_words = is_top_frame ? max_stack : tempcount; + int callee_extra_locals = callee_locals - callee_param_count; + + return header_words + monitor_words + stack_words + callee_extra_locals; +} + +void AbstractInterpreter::layout_activation(Method* method, + int tempcount, + int popframe_extra_args, + int moncount, + int caller_actual_parameters, + int callee_param_count, + int callee_locals, + frame* caller, + frame* interpreter_frame, + bool is_top_frame, + bool is_bottom_frame) { assert(popframe_extra_args == 0, "what to do?"); assert(!is_top_frame || (!callee_locals && !callee_param_count), "top frame should have no caller"); @@ -935,39 +950,31 @@ // does (the full InterpreterFrame::build, that is, not the // one that creates empty frames for the deoptimizer). // - // If interpreter_frame is not NULL then it will be filled in. - // It's size is determined by a previous call to this method, - // so it should be correct. + // interpreter_frame will be filled in. It's size is determined by + // a previous call to the size_activation() method, // // Note that tempcount is the current size of the expression // stack. For top most frames we will allocate a full sized // expression stack and not the trimmed version that non-top // frames have. - int header_words = InterpreterFrame::header_words; int monitor_words = moncount * frame::interpreter_frame_monitor_size(); - int stack_words = is_top_frame ? method->max_stack() : tempcount; - int callee_extra_locals = callee_locals - callee_param_count; - - if (interpreter_frame) { - intptr_t *locals = interpreter_frame->fp() + method->max_locals(); - interpreterState istate = interpreter_frame->get_interpreterState(); - intptr_t *monitor_base = (intptr_t*) istate; - intptr_t *stack_base = monitor_base - monitor_words; - intptr_t *stack = stack_base - tempcount - 1; + intptr_t *locals = interpreter_frame->fp() + method->max_locals(); + interpreterState istate = interpreter_frame->get_interpreterState(); + intptr_t *monitor_base = (intptr_t*) istate; + intptr_t *stack_base = monitor_base - monitor_words; + intptr_t *stack = stack_base - tempcount - 1; - BytecodeInterpreter::layout_interpreterState(istate, - caller, - NULL, - method, - locals, - stack, - stack_base, - monitor_base, - NULL, - is_top_frame); - } - return header_words + monitor_words + stack_words + callee_extra_locals; + BytecodeInterpreter::layout_interpreterState(istate, + caller, + NULL, + method, + locals, + stack, + stack_base, + monitor_base, + NULL, + is_top_frame); } void BytecodeInterpreter::layout_interpreterState(interpreterState istate, diff -r a2221bbf6812 -r bba95ce6b634 src/share/vm/c1/c1_Compilation.cpp --- a/src/share/vm/c1/c1_Compilation.cpp Tue Jun 17 16:12:09 2014 -0700 +++ b/src/share/vm/c1/c1_Compilation.cpp Tue Jun 17 22:15:24 2014 -0700 @@ -546,6 +546,7 @@ , _code(buffer_blob) , _has_access_indexed(false) , _current_instruction(NULL) +, _interpreter_frame_size(0) #ifndef PRODUCT , _last_instruction_printed(NULL) #endif // PRODUCT diff -r a2221bbf6812 -r bba95ce6b634 src/share/vm/c1/c1_Compilation.hpp --- a/src/share/vm/c1/c1_Compilation.hpp Tue Jun 17 16:12:09 2014 -0700 +++ b/src/share/vm/c1/c1_Compilation.hpp Tue Jun 17 22:15:24 2014 -0700 @@ -88,6 +88,7 @@ CodeOffsets _offsets; CodeBuffer _code; bool _has_access_indexed; + int _interpreter_frame_size; // Stack space needed in case of a deoptimization // compilation helpers void initialize(); @@ -262,6 +263,18 @@ // Dump inlining replay data to the stream. void dump_inline_data(outputStream* out) { /* do nothing now */ } + + // How much stack space would the interpreter need in case of a + // deoptimization (worst case) + void update_interpreter_frame_size(int size) { + if (_interpreter_frame_size < size) { + _interpreter_frame_size = size; + } + } + + int interpreter_frame_size() const { + return _interpreter_frame_size; + } }; diff -r a2221bbf6812 -r bba95ce6b634 src/share/vm/c1/c1_IR.cpp --- a/src/share/vm/c1/c1_IR.cpp Tue Jun 17 16:12:09 2014 -0700 +++ b/src/share/vm/c1/c1_IR.cpp Tue Jun 17 22:15:24 2014 -0700 @@ -226,8 +226,38 @@ _oop_map->set_oop(name); } +// Mirror the stack size calculation in the deopt code +// How much stack space would we need at this point in the program in +// case of deoptimization? +int CodeEmitInfo::interpreter_frame_size() const { + ValueStack* state = _stack; + int size = 0; + int callee_parameters = 0; + int callee_locals = 0; + int extra_args = state->scope()->method()->max_stack() - state->stack_size(); + while (state != NULL) { + int locks = state->locks_size(); + int temps = state->stack_size(); + bool is_top_frame = (state == _stack); + ciMethod* method = state->scope()->method(); + int frame_size = BytesPerWord * Interpreter::size_activation(method->max_stack(), + temps + callee_parameters, + extra_args, + locks, + callee_parameters, + callee_locals, + is_top_frame); + size += frame_size; + + callee_parameters = method->size_of_parameters(); + callee_locals = method->max_locals(); + extra_args = 0; + state = state->caller_state(); + } + return size + Deoptimization::last_frame_adjust(0, callee_locals) * BytesPerWord; +} // Implementation of IR diff -r a2221bbf6812 -r bba95ce6b634 src/share/vm/c1/c1_IR.hpp --- a/src/share/vm/c1/c1_IR.hpp Tue Jun 17 16:12:09 2014 -0700 +++ b/src/share/vm/c1/c1_IR.hpp Tue Jun 17 22:15:24 2014 -0700 @@ -280,6 +280,8 @@ bool is_method_handle_invoke() const { return _is_method_handle_invoke; } void set_is_method_handle_invoke(bool x) { _is_method_handle_invoke = x; } + + int interpreter_frame_size() const; }; diff -r a2221bbf6812 -r bba95ce6b634 src/share/vm/c1/c1_LIRAssembler.cpp --- a/src/share/vm/c1/c1_LIRAssembler.cpp Tue Jun 17 16:12:09 2014 -0700 +++ b/src/share/vm/c1/c1_LIRAssembler.cpp Tue Jun 17 22:15:24 2014 -0700 @@ -185,6 +185,13 @@ return _masm->pc(); } +// To bang the stack of this compiled method we use the stack size +// that the interpreter would need in case of a deoptimization. This +// removes the need to bang the stack in the deoptimization blob which +// in turn simplifies stack overflow handling. +int LIR_Assembler::bang_size_in_bytes() const { + return MAX2(initial_frame_size_in_bytes(), _compilation->interpreter_frame_size()); +} void LIR_Assembler::emit_exception_entries(ExceptionInfoList* info_list) { for (int i = 0; i < info_list->length(); i++) { @@ -792,7 +799,7 @@ void LIR_Assembler::build_frame() { - _masm->build_frame(initial_frame_size_in_bytes()); + _masm->build_frame(initial_frame_size_in_bytes(), bang_size_in_bytes()); } diff -r a2221bbf6812 -r bba95ce6b634 src/share/vm/c1/c1_LIRAssembler.hpp --- a/src/share/vm/c1/c1_LIRAssembler.hpp Tue Jun 17 16:12:09 2014 -0700 +++ b/src/share/vm/c1/c1_LIRAssembler.hpp Tue Jun 17 22:15:24 2014 -0700 @@ -132,7 +132,8 @@ int code_offset() const; address pc() const; - int initial_frame_size_in_bytes(); + int initial_frame_size_in_bytes() const; + int bang_size_in_bytes() const; // test for constants which can be encoded directly in instructions static bool is_small_constant(LIR_Opr opr); diff -r a2221bbf6812 -r bba95ce6b634 src/share/vm/c1/c1_LinearScan.cpp --- a/src/share/vm/c1/c1_LinearScan.cpp Tue Jun 17 16:12:09 2014 -0700 +++ b/src/share/vm/c1/c1_LinearScan.cpp Tue Jun 17 22:15:24 2014 -0700 @@ -2441,6 +2441,9 @@ CodeEmitInfo* info = visitor.info_at(i); OopMap* oop_map = first_oop_map; + // compute worst case interpreter size in case of a deoptimization + _compilation->update_interpreter_frame_size(info->interpreter_frame_size()); + if (info->stack()->locks_size() != first_info->stack()->locks_size()) { // this info has a different number of locks then the precomputed oop map // (possible for lock and unlock instructions) -> compute oop map with diff -r a2221bbf6812 -r bba95ce6b634 src/share/vm/c1/c1_MacroAssembler.hpp --- a/src/share/vm/c1/c1_MacroAssembler.hpp Tue Jun 17 16:12:09 2014 -0700 +++ b/src/share/vm/c1/c1_MacroAssembler.hpp Tue Jun 17 22:15:24 2014 -0700 @@ -39,7 +39,7 @@ void explicit_null_check(Register base); void inline_cache_check(Register receiver, Register iCache); - void build_frame(int frame_size_in_bytes); + void build_frame(int frame_size_in_bytes, int bang_size_in_bytes); void remove_frame(int frame_size_in_bytes); void unverified_entry(Register receiver, Register ic_klass); diff -r a2221bbf6812 -r bba95ce6b634 src/share/vm/ci/ciMethod.cpp --- a/src/share/vm/ci/ciMethod.cpp Tue Jun 17 16:12:09 2014 -0700 +++ b/src/share/vm/ci/ciMethod.cpp Tue Jun 17 22:15:24 2014 -0700 @@ -80,6 +80,7 @@ _code_size = h_m()->code_size(); _intrinsic_id = h_m()->intrinsic_id(); _handler_count = h_m()->exception_table_length(); + _size_of_parameters = h_m()->size_of_parameters(); _uses_monitors = h_m()->access_flags().has_monitor_bytecodes(); _balanced_monitors = !_uses_monitors || h_m()->access_flags().is_monitor_matching(); _is_c1_compilable = !h_m()->is_not_c1_compilable(); diff -r a2221bbf6812 -r bba95ce6b634 src/share/vm/ci/ciMethod.hpp --- a/src/share/vm/ci/ciMethod.hpp Tue Jun 17 16:12:09 2014 -0700 +++ b/src/share/vm/ci/ciMethod.hpp Tue Jun 17 22:15:24 2014 -0700 @@ -71,6 +71,7 @@ int _interpreter_invocation_count; int _interpreter_throwout_count; int _instructions_size; + int _size_of_parameters; bool _uses_monitors; bool _balanced_monitors; @@ -166,6 +167,7 @@ int exception_table_length() const { check_is_loaded(); return _handler_count; } int interpreter_invocation_count() const { check_is_loaded(); return _interpreter_invocation_count; } int interpreter_throwout_count() const { check_is_loaded(); return _interpreter_throwout_count; } + int size_of_parameters() const { check_is_loaded(); return _size_of_parameters; } // Code size for inlining decisions. int code_size_for_inlining(); @@ -241,7 +243,6 @@ ciField* get_field_at_bci( int bci, bool &will_link); ciMethod* get_method_at_bci(int bci, bool &will_link, ciSignature* *declared_signature); - // Given a certain calling environment, find the monomorphic target // for the call. Return NULL if the call is not monomorphic in // its calling environment. diff -r a2221bbf6812 -r bba95ce6b634 src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp --- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp Tue Jun 17 16:12:09 2014 -0700 +++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp Tue Jun 17 22:15:24 2014 -0700 @@ -310,8 +310,7 @@ _cmsGen->refs_discovery_is_mt(), // mt discovery (int) MAX2(ConcGCThreads, ParallelGCThreads), // mt discovery degree _cmsGen->refs_discovery_is_atomic(), // discovery is not atomic - &_is_alive_closure, // closure for liveness info - false); // next field updates do not need write barrier + &_is_alive_closure); // closure for liveness info // Initialize the _ref_processor field of CMSGen _cmsGen->set_ref_processor(_ref_processor); diff -r a2221bbf6812 -r bba95ce6b634 src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Tue Jun 17 16:12:09 2014 -0700 +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Tue Jun 17 22:15:24 2014 -0700 @@ -2258,12 +2258,9 @@ // degree of mt discovery false, // Reference discovery is not atomic - &_is_alive_closure_cm, + &_is_alive_closure_cm); // is alive closure // (for efficiency/performance) - true); - // Setting next fields of discovered - // lists requires a barrier. // STW ref processor _ref_processor_stw = @@ -2278,12 +2275,9 @@ // degree of mt discovery true, // Reference discovery is atomic - &_is_alive_closure_stw, + &_is_alive_closure_stw); // is alive closure // (for efficiency/performance) - false); - // Setting next fields of discovered - // lists does not require a barrier. } size_t G1CollectedHeap::capacity() const { diff -r a2221bbf6812 -r bba95ce6b634 src/share/vm/gc_implementation/parNew/parNewGeneration.cpp --- a/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp Tue Jun 17 16:12:09 2014 -0700 +++ b/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp Tue Jun 17 22:15:24 2014 -0700 @@ -1638,8 +1638,7 @@ refs_discovery_is_mt(), // mt discovery (int) ParallelGCThreads, // mt discovery degree refs_discovery_is_atomic(), // atomic_discovery - NULL, // is_alive_non_header - false); // write barrier for next field updates + NULL); // is_alive_non_header } } diff -r a2221bbf6812 -r bba95ce6b634 src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp --- a/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp Tue Jun 17 16:12:09 2014 -0700 +++ b/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp Tue Jun 17 22:15:24 2014 -0700 @@ -853,8 +853,7 @@ true, // mt discovery (int) ParallelGCThreads, // mt discovery degree true, // atomic_discovery - &_is_alive_closure, // non-header is alive closure - false); // write barrier for next field updates + &_is_alive_closure); // non-header is alive closure _counters = new CollectorCounters("PSParallelCompact", 1); // Initialize static fields in ParCompactionManager. diff -r a2221bbf6812 -r bba95ce6b634 src/share/vm/gc_implementation/parallelScavenge/psScavenge.cpp --- a/src/share/vm/gc_implementation/parallelScavenge/psScavenge.cpp Tue Jun 17 16:12:09 2014 -0700 +++ b/src/share/vm/gc_implementation/parallelScavenge/psScavenge.cpp Tue Jun 17 22:15:24 2014 -0700 @@ -861,8 +861,7 @@ true, // mt discovery (int) ParallelGCThreads, // mt discovery degree true, // atomic_discovery - NULL, // header provides liveness info - false); // next field updates do not need write barrier + NULL); // header provides liveness info // Cache the cardtable BarrierSet* bs = Universe::heap()->barrier_set(); diff -r a2221bbf6812 -r bba95ce6b634 src/share/vm/interpreter/abstractInterpreter.hpp --- a/src/share/vm/interpreter/abstractInterpreter.hpp Tue Jun 17 16:12:09 2014 -0700 +++ b/src/share/vm/interpreter/abstractInterpreter.hpp Tue Jun 17 22:15:24 2014 -0700 @@ -181,30 +181,16 @@ // Deoptimization should reexecute this bytecode static bool bytecode_should_reexecute(Bytecodes::Code code); - // share implementation of size_activation and layout_activation: - static int size_activation(Method* method, + // deoptimization support + static int size_activation(int max_stack, int temps, - int popframe_args, + int extra_args, int monitors, - int caller_actual_parameters, int callee_params, int callee_locals, - bool is_top_frame, - bool is_bottom_frame) { - return layout_activation(method, - temps, - popframe_args, - monitors, - caller_actual_parameters, - callee_params, - callee_locals, - (frame*)NULL, - (frame*)NULL, - is_top_frame, - is_bottom_frame); - } + bool is_top_frame); - static int layout_activation(Method* method, + static void layout_activation(Method* method, int temps, int popframe_args, int monitors, diff -r a2221bbf6812 -r bba95ce6b634 src/share/vm/memory/metaspace.cpp --- a/src/share/vm/memory/metaspace.cpp Tue Jun 17 16:12:09 2014 -0700 +++ b/src/share/vm/memory/metaspace.cpp Tue Jun 17 22:15:24 2014 -0700 @@ -1423,6 +1423,17 @@ return (size_t)Atomic::add_ptr(-(intptr_t)v, &_capacity_until_GC); } +void MetaspaceGC::initialize() { + // Set the high-water mark to MaxMetapaceSize during VM initializaton since + // we can't do a GC during initialization. + _capacity_until_GC = MaxMetaspaceSize; +} + +void MetaspaceGC::post_initialize() { + // Reset the high-water mark once the VM initialization is done. + _capacity_until_GC = MAX2(MetaspaceAux::committed_bytes(), MetaspaceSize); +} + bool MetaspaceGC::can_expand(size_t word_size, bool is_class) { // Check if the compressed class space is full. if (is_class && Metaspace::using_class_space()) { @@ -1443,21 +1454,13 @@ size_t MetaspaceGC::allowed_expansion() { size_t committed_bytes = MetaspaceAux::committed_bytes(); + size_t capacity_until_gc = capacity_until_GC(); + + assert(capacity_until_gc >= committed_bytes, + err_msg("capacity_until_gc: " SIZE_FORMAT " < committed_bytes: " SIZE_FORMAT, + capacity_until_gc, committed_bytes)); size_t left_until_max = MaxMetaspaceSize - committed_bytes; - - // Always grant expansion if we are initiating the JVM, - // or if the GC_locker is preventing GCs. - if (!is_init_completed() || GC_locker::is_active_and_needs_gc()) { - return left_until_max / BytesPerWord; - } - - size_t capacity_until_gc = capacity_until_GC(); - - if (capacity_until_gc <= committed_bytes) { - return 0; - } - size_t left_until_GC = capacity_until_gc - committed_bytes; size_t left_to_commit = MIN2(left_until_GC, left_until_max); @@ -1469,7 +1472,15 @@ uint current_shrink_factor = _shrink_factor; _shrink_factor = 0; - const size_t used_after_gc = MetaspaceAux::capacity_bytes(); + // Using committed_bytes() for used_after_gc is an overestimation, since the + // chunk free lists are included in committed_bytes() and the memory in an + // un-fragmented chunk free list is available for future allocations. + // However, if the chunk free lists becomes fragmented, then the memory may + // not be available for future allocations and the memory is therefore "in use". + // Including the chunk free lists in the definition of "in use" is therefore + // necessary. Not including the chunk free lists can cause capacity_until_GC to + // shrink below committed_bytes() and this has caused serious bugs in the past. + const size_t used_after_gc = MetaspaceAux::committed_bytes(); const size_t capacity_until_GC = MetaspaceGC::capacity_until_GC(); const double minimum_free_percentage = MinMetaspaceFreeRatio / 100.0; @@ -3093,6 +3104,8 @@ } void Metaspace::global_initialize() { + MetaspaceGC::initialize(); + // Initialize the alignment for shared spaces. int max_alignment = os::vm_page_size(); size_t cds_total = 0; @@ -3200,10 +3213,13 @@ } } - MetaspaceGC::initialize(); _tracer = new MetaspaceTracer(); } +void Metaspace::post_initialize() { + MetaspaceGC::post_initialize(); +} + Metachunk* Metaspace::get_initialization_chunk(MetadataType mdtype, size_t chunk_word_size, size_t chunk_bunch) { diff -r a2221bbf6812 -r bba95ce6b634 src/share/vm/memory/metaspace.hpp --- a/src/share/vm/memory/metaspace.hpp Tue Jun 17 16:12:09 2014 -0700 +++ b/src/share/vm/memory/metaspace.hpp Tue Jun 17 22:15:24 2014 -0700 @@ -208,6 +208,7 @@ static void ergo_initialize(); static void global_initialize(); + static void post_initialize(); static size_t first_chunk_word_size() { return _first_chunk_word_size; } static size_t first_class_chunk_word_size() { return _first_class_chunk_word_size; } @@ -398,7 +399,8 @@ public: - static void initialize() { _capacity_until_GC = MetaspaceSize; } + static void initialize(); + static void post_initialize(); static size_t capacity_until_GC(); static size_t inc_capacity_until_GC(size_t v); diff -r a2221bbf6812 -r bba95ce6b634 src/share/vm/memory/referenceProcessor.cpp --- a/src/share/vm/memory/referenceProcessor.cpp Tue Jun 17 16:12:09 2014 -0700 +++ b/src/share/vm/memory/referenceProcessor.cpp Tue Jun 17 22:15:24 2014 -0700 @@ -96,12 +96,10 @@ bool mt_discovery, uint mt_discovery_degree, bool atomic_discovery, - BoolObjectClosure* is_alive_non_header, - bool discovered_list_needs_post_barrier) : + BoolObjectClosure* is_alive_non_header) : _discovering_refs(false), _enqueuing_is_done(false), _is_alive_non_header(is_alive_non_header), - _discovered_list_needs_post_barrier(discovered_list_needs_post_barrier), _processing_is_mt(mt_processing), _next_id(0) { @@ -340,10 +338,18 @@ // (java.lang.ref.Reference.discovered), self-loop their "next" field // thus distinguishing them from active References, then // prepend them to the pending list. + // + // The Java threads will see the Reference objects linked together through + // the discovered field. Instead of trying to do the write barrier updates + // in all places in the reference processor where we manipulate the discovered + // field we make sure to do the barrier here where we anyway iterate through + // all linked Reference objects. Note that it is important to not dirty any + // cards during reference processing since this will cause card table + // verification to fail for G1. + // // BKWRD COMPATIBILITY NOTE: For older JDKs (prior to the fix for 4956777), // the "next" field is used to chain the pending list, not the discovered // field. - if (TraceReferenceGC && PrintGCDetails) { gclog_or_tty->print_cr("ReferenceProcessor::enqueue_discovered_reflist list " INTPTR_FORMAT, (address)refs_list.head()); @@ -351,7 +357,7 @@ oop obj = NULL; oop next_d = refs_list.head(); - if (pending_list_uses_discovered_field()) { // New behaviour + if (pending_list_uses_discovered_field()) { // New behavior // Walk down the list, self-looping the next field // so that the References are not considered active. while (obj != next_d) { @@ -365,15 +371,15 @@ assert(java_lang_ref_Reference::next(obj) == NULL, "Reference not active; should not be discovered"); // Self-loop next, so as to make Ref not active. - // Post-barrier not needed when looping to self. java_lang_ref_Reference::set_next_raw(obj, obj); - if (next_d == obj) { // obj is last - // Swap refs_list into pendling_list_addr and + if (next_d != obj) { + oopDesc::bs()->write_ref_field(java_lang_ref_Reference::discovered_addr(obj), next_d); + } else { + // This is the last object. + // Swap refs_list into pending_list_addr and // set obj's discovered to what we read from pending_list_addr. oop old = oopDesc::atomic_exchange_oop(refs_list.head(), pending_list_addr); - // Need post-barrier on pending_list_addr above; - // see special post-barrier code at the end of - // enqueue_discovered_reflists() further below. + // Need post-barrier on pending_list_addr. See enqueue_discovered_ref_helper() above. java_lang_ref_Reference::set_discovered_raw(obj, old); // old may be NULL oopDesc::bs()->write_ref_field(java_lang_ref_Reference::discovered_addr(obj), old); } @@ -496,20 +502,15 @@ // pre-barrier here because we know the Reference has already been found/marked, // that's how it ended up in the discovered list in the first place. oop_store_raw(_prev_next, new_next); - if (_discovered_list_needs_post_barrier && _prev_next != _refs_list.adr_head()) { - // Needs post-barrier and this is not the list head (which is not on the heap) - oopDesc::bs()->write_ref_field(_prev_next, new_next); - } NOT_PRODUCT(_removed++); _refs_list.dec_length(1); } // Make the Reference object active again. void DiscoveredListIterator::make_active() { - // For G1 we don't want to use set_next - it - // will dirty the card for the next field of - // the reference object and will fail - // CT verification. + // The pre barrier for G1 is probably just needed for the old + // reference processing behavior. Should we guard this with + // ReferenceProcessor::pending_list_uses_discovered_field() ? if (UseG1GC) { HeapWord* next_addr = java_lang_ref_Reference::next_addr(_ref); if (UseCompressedOops) { @@ -517,10 +518,8 @@ } else { oopDesc::bs()->write_ref_field_pre((oop*)next_addr, NULL); } - java_lang_ref_Reference::set_next_raw(_ref, NULL); - } else { - java_lang_ref_Reference::set_next(_ref, NULL); } + java_lang_ref_Reference::set_next_raw(_ref, NULL); } void DiscoveredListIterator::clear_referent() { @@ -546,7 +545,7 @@ OopClosure* keep_alive, VoidClosure* complete_gc) { assert(policy != NULL, "Must have a non-NULL policy"); - DiscoveredListIterator iter(refs_list, keep_alive, is_alive, _discovered_list_needs_post_barrier); + DiscoveredListIterator iter(refs_list, keep_alive, is_alive); // Decide which softly reachable refs should be kept alive. while (iter.has_next()) { iter.load_ptrs(DEBUG_ONLY(!discovery_is_atomic() /* allow_null_referent */)); @@ -586,7 +585,7 @@ BoolObjectClosure* is_alive, OopClosure* keep_alive) { assert(discovery_is_atomic(), "Error"); - DiscoveredListIterator iter(refs_list, keep_alive, is_alive, _discovered_list_needs_post_barrier); + DiscoveredListIterator iter(refs_list, keep_alive, is_alive); while (iter.has_next()) { iter.load_ptrs(DEBUG_ONLY(false /* allow_null_referent */)); DEBUG_ONLY(oop next = java_lang_ref_Reference::next(iter.obj());) @@ -623,7 +622,7 @@ OopClosure* keep_alive, VoidClosure* complete_gc) { assert(!discovery_is_atomic(), "Error"); - DiscoveredListIterator iter(refs_list, keep_alive, is_alive, _discovered_list_needs_post_barrier); + DiscoveredListIterator iter(refs_list, keep_alive, is_alive); while (iter.has_next()) { iter.load_ptrs(DEBUG_ONLY(true /* allow_null_referent */)); HeapWord* next_addr = java_lang_ref_Reference::next_addr(iter.obj()); @@ -666,7 +665,7 @@ OopClosure* keep_alive, VoidClosure* complete_gc) { ResourceMark rm; - DiscoveredListIterator iter(refs_list, keep_alive, is_alive, _discovered_list_needs_post_barrier); + DiscoveredListIterator iter(refs_list, keep_alive, is_alive); while (iter.has_next()) { iter.update_discovered(); iter.load_ptrs(DEBUG_ONLY(false /* allow_null_referent */)); @@ -782,13 +781,6 @@ bool _clear_referent; }; -void ReferenceProcessor::set_discovered(oop ref, oop value) { - java_lang_ref_Reference::set_discovered_raw(ref, value); - if (_discovered_list_needs_post_barrier) { - oopDesc::bs()->write_ref_field(java_lang_ref_Reference::discovered_addr(ref), value); - } -} - // Balances reference queues. // Move entries from all queues[0, 1, ..., _max_num_q-1] to // queues[0, 1, ..., _num_q-1] because only the first _num_q @@ -846,9 +838,9 @@ // Add the chain to the to list. if (ref_lists[to_idx].head() == NULL) { // to list is empty. Make a loop at the end. - set_discovered(move_tail, move_tail); + java_lang_ref_Reference::set_discovered_raw(move_tail, move_tail); } else { - set_discovered(move_tail, ref_lists[to_idx].head()); + java_lang_ref_Reference::set_discovered_raw(move_tail, ref_lists[to_idx].head()); } ref_lists[to_idx].set_head(move_head); ref_lists[to_idx].inc_length(refs_to_move); @@ -982,7 +974,7 @@ void ReferenceProcessor::clean_up_discovered_reflist(DiscoveredList& refs_list) { assert(!discovery_is_atomic(), "Else why call this method?"); - DiscoveredListIterator iter(refs_list, NULL, NULL, _discovered_list_needs_post_barrier); + DiscoveredListIterator iter(refs_list, NULL, NULL); while (iter.has_next()) { iter.load_ptrs(DEBUG_ONLY(true /* allow_null_referent */)); oop next = java_lang_ref_Reference::next(iter.obj()); @@ -1071,16 +1063,6 @@ // The last ref must have its discovered field pointing to itself. oop next_discovered = (current_head != NULL) ? current_head : obj; - // Note: In the case of G1, this specific pre-barrier is strictly - // not necessary because the only case we are interested in - // here is when *discovered_addr is NULL (see the CAS further below), - // so this will expand to nothing. As a result, we have manually - // elided this out for G1, but left in the test for some future - // collector that might have need for a pre-barrier here, e.g.:- - // oopDesc::bs()->write_ref_field_pre((oop* or narrowOop*)discovered_addr, next_discovered); - assert(!_discovered_list_needs_post_barrier || UseG1GC, - "Need to check non-G1 collector: " - "may need a pre-write-barrier for CAS from NULL below"); oop retest = oopDesc::atomic_compare_exchange_oop(next_discovered, discovered_addr, NULL); if (retest == NULL) { @@ -1089,9 +1071,6 @@ // is necessary. refs_list.set_head(obj); refs_list.inc_length(1); - if (_discovered_list_needs_post_barrier) { - oopDesc::bs()->write_ref_field((void*)discovered_addr, next_discovered); - } if (TraceReferenceGC) { gclog_or_tty->print_cr("Discovered reference (mt) (" INTPTR_FORMAT ": %s)", @@ -1242,24 +1221,14 @@ if (_discovery_is_mt) { add_to_discovered_list_mt(*list, obj, discovered_addr); } else { - // If "_discovered_list_needs_post_barrier", we do write barriers when - // updating the discovered reference list. Otherwise, we do a raw store - // here: the field will be visited later when processing the discovered - // references. + // We do a raw store here: the field will be visited later when processing + // the discovered references. oop current_head = list->head(); // The last ref must have its discovered field pointing to itself. oop next_discovered = (current_head != NULL) ? current_head : obj; - // As in the case further above, since we are over-writing a NULL - // pre-value, we can safely elide the pre-barrier here for the case of G1. - // e.g.:- oopDesc::bs()->write_ref_field_pre((oop* or narrowOop*)discovered_addr, next_discovered); assert(discovered == NULL, "control point invariant"); - assert(!_discovered_list_needs_post_barrier || UseG1GC, - "For non-G1 collector, may need a pre-write-barrier for CAS from NULL below"); oop_store_raw(discovered_addr, next_discovered); - if (_discovered_list_needs_post_barrier) { - oopDesc::bs()->write_ref_field((void*)discovered_addr, next_discovered); - } list->set_head(obj); list->inc_length(1); @@ -1353,7 +1322,7 @@ OopClosure* keep_alive, VoidClosure* complete_gc, YieldClosure* yield) { - DiscoveredListIterator iter(refs_list, keep_alive, is_alive, _discovered_list_needs_post_barrier); + DiscoveredListIterator iter(refs_list, keep_alive, is_alive); while (iter.has_next()) { iter.load_ptrs(DEBUG_ONLY(true /* allow_null_referent */)); oop obj = iter.obj(); diff -r a2221bbf6812 -r bba95ce6b634 src/share/vm/memory/referenceProcessor.hpp --- a/src/share/vm/memory/referenceProcessor.hpp Tue Jun 17 16:12:09 2014 -0700 +++ b/src/share/vm/memory/referenceProcessor.hpp Tue Jun 17 22:15:24 2014 -0700 @@ -99,7 +99,6 @@ oop _referent; OopClosure* _keep_alive; BoolObjectClosure* _is_alive; - bool _discovered_list_needs_post_barrier; DEBUG_ONLY( oop _first_seen; // cyclic linked list check @@ -113,8 +112,7 @@ public: inline DiscoveredListIterator(DiscoveredList& refs_list, OopClosure* keep_alive, - BoolObjectClosure* is_alive, - bool discovered_list_needs_post_barrier = false): + BoolObjectClosure* is_alive): _refs_list(refs_list), _prev_next(refs_list.adr_head()), _prev(NULL), @@ -128,8 +126,7 @@ #endif _next(NULL), _keep_alive(keep_alive), - _is_alive(is_alive), - _discovered_list_needs_post_barrier(discovered_list_needs_post_barrier) + _is_alive(is_alive) { } // End Of List. @@ -230,14 +227,6 @@ // other collectors in configuration bool _discovery_is_mt; // true if reference discovery is MT. - // If true, setting "next" field of a discovered refs list requires - // write post barrier. (Must be true if used in a collector in which - // elements of a discovered list may be moved during discovery: for - // example, a collector like Garbage-First that moves objects during a - // long-term concurrent marking phase that does weak reference - // discovery.) - bool _discovered_list_needs_post_barrier; - bool _enqueuing_is_done; // true if all weak references enqueued bool _processing_is_mt; // true during phases when // reference processing is MT. @@ -382,11 +371,6 @@ void enqueue_discovered_reflists(HeapWord* pending_list_addr, AbstractRefProcTaskExecutor* task_executor); protected: - // Set the 'discovered' field of the given reference to - // the given value - emitting post barriers depending upon - // the value of _discovered_list_needs_post_barrier. - void set_discovered(oop ref, oop value); - // "Preclean" the given discovered reference list // by removing references with strongly reachable referents. // Currently used in support of CMS only. @@ -427,8 +411,7 @@ bool mt_processing = false, uint mt_processing_degree = 1, bool mt_discovery = false, uint mt_discovery_degree = 1, bool atomic_discovery = true, - BoolObjectClosure* is_alive_non_header = NULL, - bool discovered_list_needs_post_barrier = false); + BoolObjectClosure* is_alive_non_header = NULL); // RefDiscoveryPolicy values enum DiscoveryPolicy { diff -r a2221bbf6812 -r bba95ce6b634 src/share/vm/oops/cpCache.cpp --- a/src/share/vm/oops/cpCache.cpp Tue Jun 17 16:12:09 2014 -0700 +++ b/src/share/vm/oops/cpCache.cpp Tue Jun 17 22:15:24 2014 -0700 @@ -406,7 +406,7 @@ oop ConstantPoolCacheEntry::appendix_if_resolved(constantPoolHandle cpool) { - if (is_f1_null() || !has_appendix()) + if (!has_appendix()) return NULL; const int ref_index = f2_as_index() + _indy_resolved_references_appendix_offset; objArrayOop resolved_references = cpool->resolved_references(); @@ -415,7 +415,7 @@ oop ConstantPoolCacheEntry::method_type_if_resolved(constantPoolHandle cpool) { - if (is_f1_null() || !has_method_type()) + if (!has_method_type()) return NULL; const int ref_index = f2_as_index() + _indy_resolved_references_method_type_offset; objArrayOop resolved_references = cpool->resolved_references(); diff -r a2221bbf6812 -r bba95ce6b634 src/share/vm/oops/cpCache.hpp --- a/src/share/vm/oops/cpCache.hpp Tue Jun 17 16:12:09 2014 -0700 +++ b/src/share/vm/oops/cpCache.hpp Tue Jun 17 22:15:24 2014 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1998, 2014, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -346,8 +346,8 @@ bool is_final() const { return (_flags & (1 << is_final_shift)) != 0; } bool is_forced_virtual() const { return (_flags & (1 << is_forced_virtual_shift)) != 0; } bool is_vfinal() const { return (_flags & (1 << is_vfinal_shift)) != 0; } - bool has_appendix() const { return (_flags & (1 << has_appendix_shift)) != 0; } - bool has_method_type() const { return (_flags & (1 << has_method_type_shift)) != 0; } + bool has_appendix() const { return (!is_f1_null()) && (_flags & (1 << has_appendix_shift)) != 0; } + bool has_method_type() const { return (!is_f1_null()) && (_flags & (1 << has_method_type_shift)) != 0; } bool is_method_entry() const { return (_flags & (1 << is_field_entry_shift)) == 0; } bool is_field_entry() const { return (_flags & (1 << is_field_entry_shift)) != 0; } bool is_byte() const { return flag_state() == btos; } diff -r a2221bbf6812 -r bba95ce6b634 src/share/vm/oops/instanceKlass.cpp --- a/src/share/vm/oops/instanceKlass.cpp Tue Jun 17 16:12:09 2014 -0700 +++ b/src/share/vm/oops/instanceKlass.cpp Tue Jun 17 22:15:24 2014 -0700 @@ -2771,7 +2771,7 @@ Method* m = n->method(); // Search for match while(cur != NULL && cur != n) { - if (TieredCompilation) { + if (TieredCompilation && m == cur->method()) { // Find max level before n max_level = MAX2(max_level, cur->comp_level()); } @@ -2793,7 +2793,9 @@ cur = next; while (cur != NULL) { // Find max level after n - max_level = MAX2(max_level, cur->comp_level()); + if (m == cur->method()) { + max_level = MAX2(max_level, cur->comp_level()); + } cur = cur->osr_link(); } m->set_highest_osr_comp_level(max_level); diff -r a2221bbf6812 -r bba95ce6b634 src/share/vm/opto/callnode.cpp --- a/src/share/vm/opto/callnode.cpp Tue Jun 17 16:12:09 2014 -0700 +++ b/src/share/vm/opto/callnode.cpp Tue Jun 17 22:15:24 2014 -0700 @@ -607,6 +607,39 @@ } } +// Mirror the stack size calculation in the deopt code +// How much stack space would we need at this point in the program in +// case of deoptimization? +int JVMState::interpreter_frame_size() const { + const JVMState* jvms = this; + int size = 0; + int callee_parameters = 0; + int callee_locals = 0; + int extra_args = method()->max_stack() - stk_size(); + + while (jvms != NULL) { + int locks = jvms->nof_monitors(); + int temps = jvms->stk_size(); + bool is_top_frame = (jvms == this); + ciMethod* method = jvms->method(); + + int frame_size = BytesPerWord * Interpreter::size_activation(method->max_stack(), + temps + callee_parameters, + extra_args, + locks, + callee_parameters, + callee_locals, + is_top_frame); + size += frame_size; + + callee_parameters = method->size_of_parameters(); + callee_locals = method->max_locals(); + extra_args = 0; + jvms = jvms->caller(); + } + return size + Deoptimization::last_frame_adjust(0, callee_locals) * BytesPerWord; +} + //============================================================================= uint CallNode::cmp( const Node &n ) const { return _tf == ((CallNode&)n)._tf && _jvms == ((CallNode&)n)._jvms; } diff -r a2221bbf6812 -r bba95ce6b634 src/share/vm/opto/callnode.hpp --- a/src/share/vm/opto/callnode.hpp Tue Jun 17 16:12:09 2014 -0700 +++ b/src/share/vm/opto/callnode.hpp Tue Jun 17 22:15:24 2014 -0700 @@ -300,6 +300,7 @@ JVMState* clone_shallow(Compile* C) const; // retains uncloned caller void set_map_deep(SafePointNode *map);// reset map for all callers void adapt_position(int delta); // Adapt offsets in in-array after adding an edge. + int interpreter_frame_size() const; #ifndef PRODUCT void format(PhaseRegAlloc *regalloc, const Node *n, outputStream* st) const; diff -r a2221bbf6812 -r bba95ce6b634 src/share/vm/opto/compile.cpp --- a/src/share/vm/opto/compile.cpp Tue Jun 17 16:12:09 2014 -0700 +++ b/src/share/vm/opto/compile.cpp Tue Jun 17 22:15:24 2014 -0700 @@ -439,6 +439,14 @@ return words; } +// To bang the stack of this compiled method we use the stack size +// that the interpreter would need in case of a deoptimization. This +// removes the need to bang the stack in the deoptimization blob which +// in turn simplifies stack overflow handling. +int Compile::bang_size_in_bytes() const { + return MAX2(_interpreter_frame_size, frame_size_in_bytes()); +} + // ============================================================================ //------------------------------CompileWrapper--------------------------------- class CompileWrapper : public StackObj { @@ -662,7 +670,8 @@ _inlining_incrementally(false), _print_inlining_list(NULL), _print_inlining_idx(0), - _preserve_jvm_state(0) { + _preserve_jvm_state(0), + _interpreter_frame_size(0) { C = this; CompileWrapper cw(this); @@ -969,7 +978,8 @@ _print_inlining_list(NULL), _print_inlining_idx(0), _preserve_jvm_state(0), - _allowed_reasons(0) { + _allowed_reasons(0), + _interpreter_frame_size(0) { C = this; #ifndef PRODUCT @@ -3078,8 +3088,12 @@ Node* m = n->in(i); ++i; if (m != NULL && !frc._visited.test_set(m->_idx)) { - if (m->is_SafePoint() && m->as_SafePoint()->jvms() != NULL) + if (m->is_SafePoint() && m->as_SafePoint()->jvms() != NULL) { + // compute worst case interpreter size in case of a deoptimization + update_interpreter_frame_size(m->as_SafePoint()->jvms()->interpreter_frame_size()); + sfpt.push(m); + } cnt = m->req(); nstack.push(n, i); // put on stack parent and next input's index n = m; diff -r a2221bbf6812 -r bba95ce6b634 src/share/vm/opto/compile.hpp --- a/src/share/vm/opto/compile.hpp Tue Jun 17 16:12:09 2014 -0700 +++ b/src/share/vm/opto/compile.hpp Tue Jun 17 22:15:24 2014 -0700 @@ -487,6 +487,7 @@ RegMask _FIRST_STACK_mask; // All stack slots usable for spills (depends on frame layout) Arena* _indexSet_arena; // control IndexSet allocation within PhaseChaitin void* _indexSet_free_block_list; // free list of IndexSet bit blocks + int _interpreter_frame_size; uint _node_bundling_limit; Bundle* _node_bundling_base; // Information for instruction bundling @@ -946,6 +947,7 @@ PhaseRegAlloc* regalloc() { return _regalloc; } int frame_slots() const { return _frame_slots; } int frame_size_in_words() const; // frame_slots in units of the polymorphic 'words' + int frame_size_in_bytes() const { return _frame_slots << LogBytesPerInt; } RegMask& FIRST_STACK_mask() { return _FIRST_STACK_mask; } Arena* indexSet_arena() { return _indexSet_arena; } void* indexSet_free_block_list() { return _indexSet_free_block_list; } @@ -957,6 +959,13 @@ bool need_stack_bang(int frame_size_in_bytes) const; bool need_register_stack_bang() const; + void update_interpreter_frame_size(int size) { + if (_interpreter_frame_size < size) { + _interpreter_frame_size = size; + } + } + int bang_size_in_bytes() const; + void set_matcher(Matcher* m) { _matcher = m; } //void set_regalloc(PhaseRegAlloc* ra) { _regalloc = ra; } void set_indexSet_arena(Arena* a) { _indexSet_arena = a; } diff -r a2221bbf6812 -r bba95ce6b634 src/share/vm/opto/loopopts.cpp --- a/src/share/vm/opto/loopopts.cpp Tue Jun 17 16:12:09 2014 -0700 +++ b/src/share/vm/opto/loopopts.cpp Tue Jun 17 22:15:24 2014 -0700 @@ -1401,7 +1401,8 @@ // loop. Happens if people set a loop-exit flag; then test the flag // in the loop to break the loop, then test is again outside of the // loop to determine which way the loop exited. - if( use->is_If() || use->is_CMove() ) { + // Loop predicate If node connects to Bool node through Opaque1 node. + if (use->is_If() || use->is_CMove() || C->is_predicate_opaq(use)) { // Since this code is highly unlikely, we lazily build the worklist // of such Nodes to go split. if( !split_if_set ) @@ -2768,11 +2769,11 @@ // Hit! Refactor use to use the post-incremented tripcounter. // Compute a post-increment tripcounter. Node *opaq = new (C) Opaque2Node( C, cle->incr() ); - register_new_node( opaq, u_ctrl ); + register_new_node(opaq, exit); Node *neg_stride = _igvn.intcon(-cle->stride_con()); set_ctrl(neg_stride, C->root()); Node *post = new (C) AddINode( opaq, neg_stride); - register_new_node( post, u_ctrl ); + register_new_node(post, exit); _igvn.rehash_node_delayed(use); for (uint j = 1; j < use->req(); j++) { if (use->in(j) == phi) diff -r a2221bbf6812 -r bba95ce6b634 src/share/vm/opto/output.cpp --- a/src/share/vm/opto/output.cpp Tue Jun 17 16:12:09 2014 -0700 +++ b/src/share/vm/opto/output.cpp Tue Jun 17 22:15:24 2014 -0700 @@ -165,8 +165,13 @@ // Determine if we need to generate a stack overflow check. // Do it if the method is not a stub function and // has java calls or has frame size > vm_page_size/8. + // The debug VM checks that deoptimization doesn't trigger an + // unexpected stack overflow (compiled method stack banging should + // guarantee it doesn't happen) so we always need the stack bang in + // a debug VM. return (UseStackBanging && stub_function() == NULL && - (has_java_calls() || frame_size_in_bytes > os::vm_page_size()>>3)); + (has_java_calls() || frame_size_in_bytes > os::vm_page_size()>>3 + DEBUG_ONLY(|| true))); } bool Compile::need_register_stack_bang() const { diff -r a2221bbf6812 -r bba95ce6b634 src/share/vm/runtime/deoptimization.cpp --- a/src/share/vm/runtime/deoptimization.cpp Tue Jun 17 16:12:09 2014 -0700 +++ b/src/share/vm/runtime/deoptimization.cpp Tue Jun 17 22:15:24 2014 -0700 @@ -422,15 +422,9 @@ // frame[number_of_frames - 1 ] = on_stack_size(youngest) // frame[number_of_frames - 2 ] = on_stack_size(sender(youngest)) // frame[number_of_frames - 3 ] = on_stack_size(sender(sender(youngest))) - int caller_parms = callee_parameters; - if ((index == array->frames() - 1) && caller_was_method_handle) { - caller_parms = 0; - } - frame_sizes[number_of_frames - 1 - index] = BytesPerWord * array->element(index)->on_stack_size(caller_parms, - callee_parameters, + frame_sizes[number_of_frames - 1 - index] = BytesPerWord * array->element(index)->on_stack_size(callee_parameters, callee_locals, index == 0, - index == array->frames() - 1, popframe_extra_args); // This pc doesn't have to be perfect just good enough to identify the frame // as interpreted so the skeleton frame will be walkable diff -r a2221bbf6812 -r bba95ce6b634 src/share/vm/runtime/sharedRuntime.cpp --- a/src/share/vm/runtime/sharedRuntime.cpp Tue Jun 17 16:12:09 2014 -0700 +++ b/src/share/vm/runtime/sharedRuntime.cpp Tue Jun 17 22:15:24 2014 -0700 @@ -785,10 +785,13 @@ // going to be unwound. Dispatch to a shared runtime stub // which will cause the StackOverflowError to be fabricated // and processed. - // For stack overflow in deoptimization blob, cleanup thread. - if (thread->deopt_mark() != NULL) { - Deoptimization::cleanup_deopt_info(thread, NULL); - } + // Stack overflow should never occur during deoptimization: + // the compiled method bangs the stack by as much as the + // interpreter would need in case of a deoptimization. The + // deoptimization blob and uncommon trap blob bang the stack + // in a debug VM to verify the correctness of the compiled + // method stack banging. + assert(thread->deopt_mark() == NULL, "no stack overflow from deopt blob/uncommon trap"); Events::log_exception(thread, "StackOverflowError at " INTPTR_FORMAT, pc); return StubRoutines::throw_StackOverflowError_entry(); } diff -r a2221bbf6812 -r bba95ce6b634 src/share/vm/runtime/thread.cpp --- a/src/share/vm/runtime/thread.cpp Tue Jun 17 16:12:09 2014 -0700 +++ b/src/share/vm/runtime/thread.cpp Tue Jun 17 22:15:24 2014 -0700 @@ -3574,6 +3574,8 @@ // debug stuff, that does not work until all basic classes have been initialized. set_init_completed(); + Metaspace::post_initialize(); + #ifndef USDT2 HS_DTRACE_PROBE(hotspot, vm__init__end); #else /* USDT2 */ diff -r a2221bbf6812 -r bba95ce6b634 src/share/vm/runtime/vframeArray.cpp --- a/src/share/vm/runtime/vframeArray.cpp Tue Jun 17 16:12:09 2014 -0700 +++ b/src/share/vm/runtime/vframeArray.cpp Tue Jun 17 22:15:24 2014 -0700 @@ -419,24 +419,20 @@ } -int vframeArrayElement::on_stack_size(int caller_actual_parameters, - int callee_parameters, +int vframeArrayElement::on_stack_size(int callee_parameters, int callee_locals, bool is_top_frame, - bool is_bottom_frame, int popframe_extra_stack_expression_els) const { assert(method()->max_locals() == locals()->size(), "just checking"); int locks = monitors() == NULL ? 0 : monitors()->number_of_monitors(); int temps = expressions()->size(); - return Interpreter::size_activation(method(), + return Interpreter::size_activation(method()->max_stack(), temps + callee_parameters, popframe_extra_stack_expression_els, locks, - caller_actual_parameters, callee_parameters, callee_locals, - is_top_frame, - is_bottom_frame); + is_top_frame); } diff -r a2221bbf6812 -r bba95ce6b634 src/share/vm/runtime/vframeArray.hpp --- a/src/share/vm/runtime/vframeArray.hpp Tue Jun 17 16:12:09 2014 -0700 +++ b/src/share/vm/runtime/vframeArray.hpp Tue Jun 17 22:15:24 2014 -0700 @@ -85,10 +85,8 @@ // Returns the on stack word size for this frame // callee_parameters is the number of callee locals residing inside this frame - int on_stack_size(int caller_actual_parameters, - int callee_parameters, + int on_stack_size(int callee_parameters, int callee_locals, - bool is_bottom_frame, bool is_top_frame, int popframe_extra_stack_expression_els) const; diff -r a2221bbf6812 -r bba95ce6b634 test/compiler/loopopts/TestLogSum.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/compiler/loopopts/TestLogSum.java Tue Jun 17 22:15:24 2014 -0700 @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + * @test + * @bug 8046516 + * @summary Segmentation fault in JVM (easily reproducible) + * @run main/othervm -XX:-TieredCompilation -Xbatch TestLogSum + * @author jackkamm@gmail.com + */ + +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +public class TestLogSum { + public static void main(String[] args) { + double sum; + + for (int i = 0; i < 6; i++) { + for (int n = 2; n < 30; n++) { + for (int j = 1; j <= n; j++) { + for (int k = 1; k <= j; k++) { + // System.out.println(computeSum(k, j)); + sum = computeSum(k, j); + } + } + } + } + } + + private static Map, Double> cache = new HashMap, Double>(); + public static double computeSum(int x, int y) { + List key = Arrays.asList(new Integer[] {x, y}); + + if (!cache.containsKey(key)) { + + // explicitly creating/updating a double[] array, instead of using the LogSumArray wrapper object, will prevent the error + LogSumArray toReturn = new LogSumArray(x); + + // changing loop indices will prevent the error + // in particular, for(z=0; z l = new ArrayList<>(); + l.add(new Internal(17)); + } + + public static void main(String[] args) { + new TestMetaspaceInitialization().test(); + } +} diff -r a2221bbf6812 -r bba95ce6b634 test/runtime/Thread/TestThreadDumpMonitorContention.java --- a/test/runtime/Thread/TestThreadDumpMonitorContention.java Tue Jun 17 16:12:09 2014 -0700 +++ b/test/runtime/Thread/TestThreadDumpMonitorContention.java Tue Jun 17 22:15:24 2014 -0700 @@ -24,6 +24,7 @@ /* * @test * @bug 8036823 + * @bug 8046287 * @summary Creates two threads contending for the same lock and checks * whether jstack reports "locked" by more than one thread. * @@ -52,10 +53,13 @@ // looking for header lines with these patterns: // "ContendingThread-1" #19 prio=5 os_prio=64 tid=0x000000000079c000 nid=0x23 runnable [0xffff80ffb8b87000] // "ContendingThread-2" #21 prio=5 os_prio=64 tid=0x0000000000780000 nid=0x2f waiting for monitor entry [0xfffffd7fc1111000] + // "ContendingThread-2" #24 prio=5 os_prio=64 tid=0x0000000000ec8800 nid=0x31 waiting on condition [0xfffffd7bbfffe000] final static Pattern HEADER_PREFIX_PATTERN = Pattern.compile( "^\"ContendingThread-.*"); - final static Pattern HEADER_WAITING_PATTERN = Pattern.compile( + final static Pattern HEADER_WAITING_PATTERN1 = Pattern.compile( "^\"ContendingThread-.* waiting for monitor entry .*"); + final static Pattern HEADER_WAITING_PATTERN2 = Pattern.compile( + "^\"ContendingThread-.* waiting on condition .*"); final static Pattern HEADER_RUNNABLE_PATTERN = Pattern.compile( "^\"ContendingThread-.* runnable .*"); @@ -80,17 +84,34 @@ final static Pattern WAITING_PATTERN = Pattern.compile( ".* waiting to lock \\<.*\\(a TestThreadDumpMonitorContention.*"); + final static Object barrier = new Object(); volatile static boolean done = false; + static int barrier_cnt = 0; + static int blank_line_match_cnt = 0; static int error_cnt = 0; - static String header_line = null; static boolean have_header_line = false; static boolean have_thread_state_line = false; - static int match_cnt = 0; - static String[] match_list = new String[2]; + static String header_line = null; + static int header_prefix_match_cnt = 0; + static int locked_line_match_cnt = 0; + static String[] locked_match_list = new String[2]; static int n_samples = 15; + static int sum_both_running_cnt = 0; + static int sum_both_waiting_cnt = 0; + static int sum_contended_cnt = 0; + static int sum_locked_hdr_runnable_cnt = 0; + static int sum_locked_hdr_waiting1_cnt = 0; + static int sum_locked_hdr_waiting2_cnt = 0; + static int sum_locked_thr_state_blocked_cnt = 0; + static int sum_locked_thr_state_runnable_cnt = 0; + static int sum_one_waiting_cnt = 0; + static int sum_uncontended_cnt = 0; + static int sum_waiting_hdr_waiting1_cnt = 0; + static int sum_waiting_thr_state_blocked_cnt = 0; static String thread_state_line = null; static boolean verbose = false; + static int waiting_line_match_cnt = 0; public static void main(String[] args) throws Exception { if (args.length != 0) { @@ -110,6 +131,11 @@ Runnable runnable = new Runnable() { public void run() { + synchronized (barrier) { + // let the main thread know we're running + barrier_cnt++; + barrier.notify(); + } while (!done) { synchronized (this) { } } @@ -118,8 +144,16 @@ Thread[] thread_list = new Thread[2]; thread_list[0] = new Thread(runnable, "ContendingThread-1"); thread_list[1] = new Thread(runnable, "ContendingThread-2"); - thread_list[0].start(); - thread_list[1].start(); + synchronized (barrier) { + thread_list[0].start(); + thread_list[1].start(); + + // Wait until the contending threads are running so that + // we don't sample any thread init states. + while (barrier_cnt < 2) { + barrier.wait(); + } + } doSamples(); @@ -143,11 +177,12 @@ // Example: // "ContendingThread-1" #21 prio=5 os_prio=64 tid=0x00000000007b9000 nid=0x2f runnable [0xfffffd7fc1111000] // java.lang.Thread.State: RUNNABLE - // at TestThreadDumpMonitorContention$1.run(TestThreadDumpMonitorContention.java:67) + // at TestThreadDumpMonitorContention$1.run(TestThreadDumpMonitorContention.java:140) // at java.lang.Thread.run(Thread.java:745) // static boolean checkBlankLine(String line) { if (line.length() == 0) { + blank_line_match_cnt++; have_header_line = false; have_thread_state_line = false; return true; @@ -161,49 +196,73 @@ // Example 1: // "ContendingThread-1" #21 prio=5 os_prio=64 tid=0x00000000007b9000 nid=0x2f runnable [0xfffffd7fc1111000] // java.lang.Thread.State: RUNNABLE - // at TestThreadDumpMonitorContention$1.run(TestThreadDumpMonitorContention.java:67) + // at TestThreadDumpMonitorContention$1.run(TestThreadDumpMonitorContention.java:140) // - locked <0xfffffd7e6a2912f8> (a TestThreadDumpMonitorContention$1) // at java.lang.Thread.run(Thread.java:745) // // Example 2: // "ContendingThread-1" #21 prio=5 os_prio=64 tid=0x00000000007b9000 nid=0x2f waiting for monitor entry [0xfffffd7fc1111000] // java.lang.Thread.State: BLOCKED (on object monitor) - // at TestThreadDumpMonitorContention$1.run(TestThreadDumpMonitorContention.java:67) + // at TestThreadDumpMonitorContention$1.run(TestThreadDumpMonitorContention.java:140) // - locked <0xfffffd7e6a2912f8> (a TestThreadDumpMonitorContention$1) // at java.lang.Thread.run(Thread.java:745) // + // Example 3: + // "ContendingThread-2" #24 prio=5 os_prio=64 tid=0x0000000000ec8800 nid=0x31 waiting on condition [0xfffffd7bbfffe000] + // java.lang.Thread.State: RUNNABLE + // JavaThread state: _thread_blocked + // Thread: 0x0000000000ec8800 [0x31] State: _at_safepoint _has_called_back 0 _at_poll_safepoint 0 + // JavaThread state: _thread_blocked + // at TestThreadDumpMonitorContention$1.run(TestThreadDumpMonitorContention.java:140) + // - locked <0xfffffd7e6d03eb28> (a TestThreadDumpMonitorContention$1) + // at java.lang.Thread.run(Thread.java:745) + // static boolean checkLockedLine(String line) { Matcher matcher = LOCK_PATTERN.matcher(line); if (matcher.matches()) { if (verbose) { System.out.println("locked_line='" + line + "'"); } - match_list[match_cnt] = new String(line); - match_cnt++; + locked_match_list[locked_line_match_cnt] = new String(line); + locked_line_match_cnt++; matcher = HEADER_RUNNABLE_PATTERN.matcher(header_line); - if (!matcher.matches()) { + if (matcher.matches()) { + sum_locked_hdr_runnable_cnt++; + } else { // It's strange, but a locked line can also - // match the HEADER_WAITING_PATTERN. - matcher = HEADER_WAITING_PATTERN.matcher(header_line); - if (!matcher.matches()) { - System.err.println(); - System.err.println("ERROR: header line does " + - "not match runnable or waiting patterns."); - System.err.println("ERROR: header_line='" + - header_line + "'"); - System.err.println("ERROR: locked_line='" + line + "'"); - error_cnt++; + // match the HEADER_WAITING_PATTERN{1,2}. + matcher = HEADER_WAITING_PATTERN1.matcher(header_line); + if (matcher.matches()) { + sum_locked_hdr_waiting1_cnt++; + } else { + matcher = HEADER_WAITING_PATTERN2.matcher(header_line); + if (matcher.matches()) { + sum_locked_hdr_waiting2_cnt++; + } else { + System.err.println(); + System.err.println("ERROR: header line does " + + "not match runnable or waiting patterns."); + System.err.println("ERROR: header_line='" + + header_line + "'"); + System.err.println("ERROR: locked_line='" + line + + "'"); + error_cnt++; + } } } matcher = THREAD_STATE_RUNNABLE_PATTERN.matcher(thread_state_line); - if (!matcher.matches()) { + if (matcher.matches()) { + sum_locked_thr_state_runnable_cnt++; + } else { // It's strange, but a locked line can also // match the THREAD_STATE_BLOCKED_PATTERN. matcher = THREAD_STATE_BLOCKED_PATTERN.matcher( thread_state_line); - if (!matcher.matches()) { + if (matcher.matches()) { + sum_locked_thr_state_blocked_cnt++; + } else { System.err.println(); System.err.println("ERROR: thread state line does not " + "match runnable or waiting patterns."); @@ -229,19 +288,22 @@ // Example: // "ContendingThread-2" #22 prio=5 os_prio=64 tid=0x00000000007b9800 nid=0x30 waiting for monitor entry [0xfffffd7fc1010000] // java.lang.Thread.State: BLOCKED (on object monitor) - // at TestThreadDumpMonitorContention$1.run(TestThreadDumpMonitorContention.java:67) + // at TestThreadDumpMonitorContention$1.run(TestThreadDumpMonitorContention.java:140) // - waiting to lock <0xfffffd7e6a2912f8> (a TestThreadDumpMonitorContention$1) // at java.lang.Thread.run(Thread.java:745) // static boolean checkWaitingLine(String line) { Matcher matcher = WAITING_PATTERN.matcher(line); if (matcher.matches()) { + waiting_line_match_cnt++; if (verbose) { System.out.println("waiting_line='" + line + "'"); } - matcher = HEADER_WAITING_PATTERN.matcher(header_line); - if (!matcher.matches()) { + matcher = HEADER_WAITING_PATTERN1.matcher(header_line); + if (matcher.matches()) { + sum_waiting_hdr_waiting1_cnt++; + } else { System.err.println(); System.err.println("ERROR: header line does " + "not match a waiting pattern."); @@ -251,7 +313,9 @@ } matcher = THREAD_STATE_BLOCKED_PATTERN.matcher(thread_state_line); - if (!matcher.matches()) { + if (matcher.matches()) { + sum_waiting_thr_state_blocked_cnt++; + } else { System.err.println(); System.err.println("ERROR: thread state line " + "does not match a waiting pattern."); @@ -273,7 +337,10 @@ static void doSamples() throws Exception { for (int count = 0; count < n_samples; count++) { - match_cnt = 0; + blank_line_match_cnt = 0; + header_prefix_match_cnt = 0; + locked_line_match_cnt = 0; + waiting_line_match_cnt = 0; // verbose mode or an error has a lot of output so add more space if (verbose || error_cnt > 0) System.out.println(); System.out.println("Sample #" + count); @@ -290,12 +357,12 @@ // a failure and we report it // - for a stack trace that matches LOCKED_PATTERN, we verify: // - the header line matches HEADER_RUNNABLE_PATTERN - // or HEADER_WAITING_PATTERN + // or HEADER_WAITING_PATTERN{1,2} // - the thread state line matches THREAD_STATE_BLOCKED_PATTERN // or THREAD_STATE_RUNNABLE_PATTERN // - we report any mismatches as failures // - for a stack trace that matches WAITING_PATTERN, we verify: - // - the header line matches HEADER_WAITING_PATTERN + // - the header line matches HEADER_WAITING_PATTERN1 // - the thread state line matches THREAD_STATE_BLOCKED_PATTERN // - we report any mismatches as failures // - the stack traces that match HEADER_PREFIX_PATTERN may @@ -324,6 +391,7 @@ if (!have_header_line) { matcher = HEADER_PREFIX_PATTERN.matcher(line); if (matcher.matches()) { + header_prefix_match_cnt++; if (verbose) { System.out.println(); System.out.println("header='" + line + "'"); @@ -366,19 +434,80 @@ } process.waitFor(); - if (match_cnt == 2) { - if (match_list[0].equals(match_list[1])) { - System.err.println(); - System.err.println("ERROR: matching lock lines:"); - System.err.println("ERROR: line[0]'" + match_list[0] + "'"); - System.err.println("ERROR: line[1]'" + match_list[1] + "'"); - error_cnt++; - } - } + if (header_prefix_match_cnt != 2) { + System.err.println(); + System.err.println("ERROR: should match exactly two headers."); + System.err.println("ERROR: header_prefix_match_cnt=" + + header_prefix_match_cnt); + error_cnt++; + } + + if (locked_line_match_cnt == 2) { + if (locked_match_list[0].equals(locked_match_list[1])) { + System.err.println(); + System.err.println("ERROR: matching lock lines:"); + System.err.println("ERROR: line[0]'" + + locked_match_list[0] + "'"); + System.err.println("ERROR: line[1]'" + + locked_match_list[1] + "'"); + error_cnt++; + } + } + + if (locked_line_match_cnt == 1) { + // one thread has the lock + if (waiting_line_match_cnt == 1) { + // and the other contended for it + sum_contended_cnt++; + } else { + // and the other is just running + sum_uncontended_cnt++; + } + } else if (waiting_line_match_cnt == 1) { + // one thread is waiting + sum_one_waiting_cnt++; + } else if (waiting_line_match_cnt == 2) { + // both threads are waiting + sum_both_waiting_cnt++; + } else { + // both threads are running + sum_both_running_cnt++; + } // slight delay between jstack launches Thread.sleep(500); } + + if (error_cnt != 0) { + // skip summary info since there were errors + return; + } + + System.out.println("INFO: Summary for all samples:"); + System.out.println("INFO: both_running_cnt=" + sum_both_running_cnt); + System.out.println("INFO: both_waiting_cnt=" + sum_both_waiting_cnt); + System.out.println("INFO: contended_cnt=" + sum_contended_cnt); + System.out.println("INFO: one_waiting_cnt=" + sum_one_waiting_cnt); + System.out.println("INFO: uncontended_cnt=" + sum_uncontended_cnt); + System.out.println("INFO: locked_hdr_runnable_cnt=" + + sum_locked_hdr_runnable_cnt); + System.out.println("INFO: locked_hdr_waiting1_cnt=" + + sum_locked_hdr_waiting1_cnt); + System.out.println("INFO: locked_hdr_waiting2_cnt=" + + sum_locked_hdr_waiting2_cnt); + System.out.println("INFO: locked_thr_state_blocked_cnt=" + + sum_locked_thr_state_blocked_cnt); + System.out.println("INFO: locked_thr_state_runnable_cnt=" + + sum_locked_thr_state_runnable_cnt); + System.out.println("INFO: waiting_hdr_waiting1_cnt=" + + sum_waiting_hdr_waiting1_cnt); + System.out.println("INFO: waiting_thr_state_blocked_cnt=" + + sum_waiting_thr_state_blocked_cnt); + + if (sum_contended_cnt == 0) { + System.err.println("WARNING: the primary scenario for 8036823" + + " has not been exercised by this test run."); + } } // This helper relies on RuntimeMXBean.getName() returning a string diff -r a2221bbf6812 -r bba95ce6b634 test/serviceability/ParserTest.java --- a/test/serviceability/ParserTest.java Tue Jun 17 16:12:09 2014 -0700 +++ b/test/serviceability/ParserTest.java Tue Jun 17 22:15:24 2014 -0700 @@ -22,10 +22,10 @@ */ /* - * @test ParserTest + * @test * @summary Test that the diagnostic command arguemnt parser works * @library /testlibrary /testlibrary/whitebox - * @build ParserTest + * @build ClassFileInstaller sun.hotspot.WhiteBox sun.hotspot.parser.* * @run main ClassFileInstaller sun.hotspot.WhiteBox * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI ParserTest */ diff -r a2221bbf6812 -r bba95ce6b634 test/serviceability/attach/AttachWithStalePidFile.java --- a/test/serviceability/attach/AttachWithStalePidFile.java Tue Jun 17 16:12:09 2014 -0700 +++ b/test/serviceability/attach/AttachWithStalePidFile.java Tue Jun 17 22:15:24 2014 -0700 @@ -27,7 +27,7 @@ * @key regression * @summary Regression test for attach issue where stale pid files in /tmp lead to connection issues * @library /testlibrary - * @compile AttachWithStalePidFileTarget.java + * @build com.oracle.java.testlibrary.* AttachWithStalePidFileTarget * @run main AttachWithStalePidFile */ diff -r a2221bbf6812 -r bba95ce6b634 test/serviceability/jvmti/GetObjectSizeOverflow.java --- a/test/serviceability/jvmti/GetObjectSizeOverflow.java Tue Jun 17 16:12:09 2014 -0700 +++ b/test/serviceability/jvmti/GetObjectSizeOverflow.java Tue Jun 17 22:15:24 2014 -0700 @@ -29,7 +29,7 @@ * @test * @bug 8027230 * @library /testlibrary - * @build GetObjectSizeOverflowAgent + * @build ClassFileInstaller com.oracle.java.testlibrary.* GetObjectSizeOverflowAgent * @run main ClassFileInstaller GetObjectSizeOverflowAgent * @run main GetObjectSizeOverflow */ diff -r a2221bbf6812 -r bba95ce6b634 test/serviceability/jvmti/TestRedefineWithUnresolvedClass.java --- a/test/serviceability/jvmti/TestRedefineWithUnresolvedClass.java Tue Jun 17 16:12:09 2014 -0700 +++ b/test/serviceability/jvmti/TestRedefineWithUnresolvedClass.java Tue Jun 17 22:15:24 2014 -0700 @@ -26,7 +26,7 @@ * @summary Redefine a class with an UnresolvedClass reference in the constant pool. * @bug 8035150 * @library /testlibrary - * @build UnresolvedClassAgent com.oracle.java.testlibrary.ProcessTools com.oracle.java.testlibrary.OutputAnalyzer + * @build com.oracle.java.testlibrary.* UnresolvedClassAgent * @run main TestRedefineWithUnresolvedClass */ diff -r a2221bbf6812 -r bba95ce6b634 test/serviceability/sa/jmap-hashcode/Test8028623.java --- a/test/serviceability/sa/jmap-hashcode/Test8028623.java Tue Jun 17 16:12:09 2014 -0700 +++ b/test/serviceability/sa/jmap-hashcode/Test8028623.java Tue Jun 17 22:15:24 2014 -0700 @@ -26,6 +26,7 @@ * @bug 8028623 * @summary Test hashing of extended characters in Serviceability Agent. * @library /testlibrary + * @build com.oracle.java.testlibrary.* * @compile -encoding utf8 Test8028623.java * @run main Test8028623 */ diff -r a2221bbf6812 -r bba95ce6b634 test/serviceability/sa/jmap-hprof/JMapHProfLargeHeapTest.java --- a/test/serviceability/sa/jmap-hprof/JMapHProfLargeHeapTest.java Tue Jun 17 16:12:09 2014 -0700 +++ b/test/serviceability/sa/jmap-hprof/JMapHProfLargeHeapTest.java Tue Jun 17 22:15:24 2014 -0700 @@ -44,7 +44,7 @@ * @key regression * @summary Regression test for hprof export issue due to large heaps (>2G) * @library /testlibrary - * @compile JMapHProfLargeHeapProc.java + * @build com.oracle.java.testlibrary.* JMapHProfLargeHeapProc * @run main JMapHProfLargeHeapTest */ diff -r a2221bbf6812 -r bba95ce6b634 test/testlibrary/ctw/test/ClassesDirTest.java --- a/test/testlibrary/ctw/test/ClassesDirTest.java Tue Jun 17 16:12:09 2014 -0700 +++ b/test/testlibrary/ctw/test/ClassesDirTest.java Tue Jun 17 22:15:24 2014 -0700 @@ -22,10 +22,10 @@ */ /* - * @test ClassesDirTest + * @test * @bug 8012447 * @library /testlibrary /testlibrary/whitebox /testlibrary/ctw/src - * @build sun.hotspot.tools.ctw.CompileTheWorld sun.hotspot.WhiteBox ClassesDirTest Foo Bar + * @build ClassFileInstaller sun.hotspot.tools.ctw.CompileTheWorld sun.hotspot.WhiteBox Foo Bar * @run main ClassFileInstaller sun.hotspot.WhiteBox Foo Bar * @run main ClassesDirTest prepare * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -Dsun.hotspot.tools.ctw.logfile=ctw.log sun.hotspot.tools.ctw.CompileTheWorld classes diff -r a2221bbf6812 -r bba95ce6b634 test/testlibrary/ctw/test/ClassesListTest.java --- a/test/testlibrary/ctw/test/ClassesListTest.java Tue Jun 17 16:12:09 2014 -0700 +++ b/test/testlibrary/ctw/test/ClassesListTest.java Tue Jun 17 22:15:24 2014 -0700 @@ -22,10 +22,10 @@ */ /* - * @test ClassesListTest + * @test * @bug 8012447 * @library /testlibrary /testlibrary/whitebox /testlibrary/ctw/src - * @build sun.hotspot.tools.ctw.CompileTheWorld sun.hotspot.WhiteBox ClassesListTest Foo Bar + * @build ClassFileInstaller sun.hotspot.tools.ctw.CompileTheWorld sun.hotspot.WhiteBox Foo Bar * @run main ClassFileInstaller sun.hotspot.WhiteBox Foo Bar * @run main ClassesListTest prepare * @run main/othervm/timeout=600 -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -Dsun.hotspot.tools.ctw.logfile=ctw.log sun.hotspot.tools.ctw.CompileTheWorld classes.lst diff -r a2221bbf6812 -r bba95ce6b634 test/testlibrary/ctw/test/JarDirTest.java --- a/test/testlibrary/ctw/test/JarDirTest.java Tue Jun 17 16:12:09 2014 -0700 +++ b/test/testlibrary/ctw/test/JarDirTest.java Tue Jun 17 22:15:24 2014 -0700 @@ -22,10 +22,10 @@ */ /* - * @test JarDirTest + * @test * @bug 8012447 * @library /testlibrary /testlibrary/whitebox /testlibrary/ctw/src - * @build sun.hotspot.tools.ctw.CompileTheWorld sun.hotspot.WhiteBox JarDirTest Foo Bar + * @build ClassFileInstaller com.oracle.java.testlibrary.* sun.hotspot.tools.ctw.CompileTheWorld sun.hotspot.WhiteBox Foo Bar * @run main ClassFileInstaller sun.hotspot.WhiteBox Foo Bar * @run main JarDirTest prepare * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -Dsun.hotspot.tools.ctw.logfile=ctw.log sun.hotspot.tools.ctw.CompileTheWorld jars/* diff -r a2221bbf6812 -r bba95ce6b634 test/testlibrary/ctw/test/JarsTest.java --- a/test/testlibrary/ctw/test/JarsTest.java Tue Jun 17 16:12:09 2014 -0700 +++ b/test/testlibrary/ctw/test/JarsTest.java Tue Jun 17 22:15:24 2014 -0700 @@ -22,10 +22,10 @@ */ /* - * @test JarsTest + * @test * @bug 8012447 * @library /testlibrary /testlibrary/whitebox /testlibrary/ctw/src - * @build sun.hotspot.tools.ctw.CompileTheWorld sun.hotspot.WhiteBox JarsTest Foo Bar + * @build ClassFileInstaller com.oracle.java.testlibrary.* sun.hotspot.tools.ctw.CompileTheWorld sun.hotspot.WhiteBox Foo Bar * @run main ClassFileInstaller sun.hotspot.WhiteBox Foo Bar * @run main JarsTest prepare * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -Dsun.hotspot.tools.ctw.logfile=ctw.log sun.hotspot.tools.ctw.CompileTheWorld foo.jar bar.jar