comparison src/share/vm/opto/macro.cpp @ 2100:b1a2afa37ec4

7003271: Hotspot should track cumulative Java heap bytes allocated on a per-thread basis Summary: Track allocated bytes in Thread's, update on TLAB retirement and direct allocation in Eden and tenured, add JNI methods for ThreadMXBean. Reviewed-by: coleenp, kvn, dholmes, ysr
author phh
date Fri, 07 Jan 2011 10:42:32 -0500
parents f95d63e2154a
children 149bb459be66
comparison
equal deleted inserted replaced
2097:039eb4201e06 2100:b1a2afa37ec4
1 /* 1 /*
2 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 2 * Copyright (c) 2005, 2011, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 * 4 *
5 * This code is free software; you can redistribute it and/or modify it 5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as 6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
1156 // Load Eden::end. Loop invariant and hoisted. 1156 // Load Eden::end. Loop invariant and hoisted.
1157 // 1157 //
1158 // Note: We set the control input on "eden_end" and "old_eden_top" when using 1158 // Note: We set the control input on "eden_end" and "old_eden_top" when using
1159 // a TLAB to work around a bug where these values were being moved across 1159 // a TLAB to work around a bug where these values were being moved across
1160 // a safepoint. These are not oops, so they cannot be include in the oop 1160 // a safepoint. These are not oops, so they cannot be include in the oop
1161 // map, but the can be changed by a GC. The proper way to fix this would 1161 // map, but they can be changed by a GC. The proper way to fix this would
1162 // be to set the raw memory state when generating a SafepointNode. However 1162 // be to set the raw memory state when generating a SafepointNode. However
1163 // this will require extensive changes to the loop optimization in order to 1163 // this will require extensive changes to the loop optimization in order to
1164 // prevent a degradation of the optimization. 1164 // prevent a degradation of the optimization.
1165 // See comment in memnode.hpp, around line 227 in class LoadPNode. 1165 // See comment in memnode.hpp, around line 227 in class LoadPNode.
1166 Node *eden_end = make_load(ctrl, mem, eden_end_adr, 0, TypeRawPtr::BOTTOM, T_ADDRESS); 1166 Node *eden_end = make_load(ctrl, mem, eden_end_adr, 0, TypeRawPtr::BOTTOM, T_ADDRESS);
1167 1167
1168 // allocate the Region and Phi nodes for the result 1168 // allocate the Region and Phi nodes for the result
1169 result_region = new (C, 3) RegionNode(3); 1169 result_region = new (C, 3) RegionNode(3);
1170 result_phi_rawmem = new (C, 3) PhiNode( result_region, Type::MEMORY, TypeRawPtr::BOTTOM ); 1170 result_phi_rawmem = new (C, 3) PhiNode(result_region, Type::MEMORY, TypeRawPtr::BOTTOM);
1171 result_phi_rawoop = new (C, 3) PhiNode( result_region, TypeRawPtr::BOTTOM ); 1171 result_phi_rawoop = new (C, 3) PhiNode(result_region, TypeRawPtr::BOTTOM);
1172 result_phi_i_o = new (C, 3) PhiNode( result_region, Type::ABIO ); // I/O is used for Prefetch 1172 result_phi_i_o = new (C, 3) PhiNode(result_region, Type::ABIO); // I/O is used for Prefetch
1173 1173
1174 // We need a Region for the loop-back contended case. 1174 // We need a Region for the loop-back contended case.
1175 enum { fall_in_path = 1, contended_loopback_path = 2 }; 1175 enum { fall_in_path = 1, contended_loopback_path = 2 };
1176 Node *contended_region; 1176 Node *contended_region;
1177 Node *contended_phi_rawmem; 1177 Node *contended_phi_rawmem;
1178 if( UseTLAB ) { 1178 if (UseTLAB) {
1179 contended_region = toobig_false; 1179 contended_region = toobig_false;
1180 contended_phi_rawmem = mem; 1180 contended_phi_rawmem = mem;
1181 } else { 1181 } else {
1182 contended_region = new (C, 3) RegionNode(3); 1182 contended_region = new (C, 3) RegionNode(3);
1183 contended_phi_rawmem = new (C, 3) PhiNode( contended_region, Type::MEMORY, TypeRawPtr::BOTTOM); 1183 contended_phi_rawmem = new (C, 3) PhiNode(contended_region, Type::MEMORY, TypeRawPtr::BOTTOM);
1184 // Now handle the passing-too-big test. We fall into the contended 1184 // Now handle the passing-too-big test. We fall into the contended
1185 // loop-back merge point. 1185 // loop-back merge point.
1186 contended_region ->init_req( fall_in_path, toobig_false ); 1186 contended_region ->init_req(fall_in_path, toobig_false);
1187 contended_phi_rawmem->init_req( fall_in_path, mem ); 1187 contended_phi_rawmem->init_req(fall_in_path, mem);
1188 transform_later(contended_region); 1188 transform_later(contended_region);
1189 transform_later(contended_phi_rawmem); 1189 transform_later(contended_phi_rawmem);
1190 } 1190 }
1191 1191
1192 // Load(-locked) the heap top. 1192 // Load(-locked) the heap top.
1193 // See note above concerning the control input when using a TLAB 1193 // See note above concerning the control input when using a TLAB
1194 Node *old_eden_top = UseTLAB 1194 Node *old_eden_top = UseTLAB
1195 ? new (C, 3) LoadPNode ( ctrl, contended_phi_rawmem, eden_top_adr, TypeRawPtr::BOTTOM, TypeRawPtr::BOTTOM ) 1195 ? new (C, 3) LoadPNode (ctrl, contended_phi_rawmem, eden_top_adr, TypeRawPtr::BOTTOM, TypeRawPtr::BOTTOM)
1196 : new (C, 3) LoadPLockedNode( contended_region, contended_phi_rawmem, eden_top_adr ); 1196 : new (C, 3) LoadPLockedNode(contended_region, contended_phi_rawmem, eden_top_adr);
1197 1197
1198 transform_later(old_eden_top); 1198 transform_later(old_eden_top);
1199 // Add to heap top to get a new heap top 1199 // Add to heap top to get a new heap top
1200 Node *new_eden_top = new (C, 4) AddPNode( top(), old_eden_top, size_in_bytes ); 1200 Node *new_eden_top = new (C, 4) AddPNode(top(), old_eden_top, size_in_bytes);
1201 transform_later(new_eden_top); 1201 transform_later(new_eden_top);
1202 // Check for needing a GC; compare against heap end 1202 // Check for needing a GC; compare against heap end
1203 Node *needgc_cmp = new (C, 3) CmpPNode( new_eden_top, eden_end ); 1203 Node *needgc_cmp = new (C, 3) CmpPNode(new_eden_top, eden_end);
1204 transform_later(needgc_cmp); 1204 transform_later(needgc_cmp);
1205 Node *needgc_bol = new (C, 2) BoolNode( needgc_cmp, BoolTest::ge ); 1205 Node *needgc_bol = new (C, 2) BoolNode(needgc_cmp, BoolTest::ge);
1206 transform_later(needgc_bol); 1206 transform_later(needgc_bol);
1207 IfNode *needgc_iff = new (C, 2) IfNode(contended_region, needgc_bol, PROB_UNLIKELY_MAG(4), COUNT_UNKNOWN ); 1207 IfNode *needgc_iff = new (C, 2) IfNode(contended_region, needgc_bol, PROB_UNLIKELY_MAG(4), COUNT_UNKNOWN);
1208 transform_later(needgc_iff); 1208 transform_later(needgc_iff);
1209 1209
1210 // Plug the failing-heap-space-need-gc test into the slow-path region 1210 // Plug the failing-heap-space-need-gc test into the slow-path region
1211 Node *needgc_true = new (C, 1) IfTrueNode( needgc_iff ); 1211 Node *needgc_true = new (C, 1) IfTrueNode(needgc_iff);
1212 transform_later(needgc_true); 1212 transform_later(needgc_true);
1213 if( initial_slow_test ) { 1213 if (initial_slow_test) {
1214 slow_region ->init_req( need_gc_path, needgc_true ); 1214 slow_region->init_req(need_gc_path, needgc_true);
1215 // This completes all paths into the slow merge point 1215 // This completes all paths into the slow merge point
1216 transform_later(slow_region); 1216 transform_later(slow_region);
1217 } else { // No initial slow path needed! 1217 } else { // No initial slow path needed!
1218 // Just fall from the need-GC path straight into the VM call. 1218 // Just fall from the need-GC path straight into the VM call.
1219 slow_region = needgc_true; 1219 slow_region = needgc_true;
1220 } 1220 }
1221 // No need for a GC. Setup for the Store-Conditional 1221 // No need for a GC. Setup for the Store-Conditional
1222 Node *needgc_false = new (C, 1) IfFalseNode( needgc_iff ); 1222 Node *needgc_false = new (C, 1) IfFalseNode(needgc_iff);
1223 transform_later(needgc_false); 1223 transform_later(needgc_false);
1224 1224
1225 // Grab regular I/O before optional prefetch may change it. 1225 // Grab regular I/O before optional prefetch may change it.
1226 // Slow-path does no I/O so just set it to the original I/O. 1226 // Slow-path does no I/O so just set it to the original I/O.
1227 result_phi_i_o->init_req( slow_result_path, i_o ); 1227 result_phi_i_o->init_req(slow_result_path, i_o);
1228 1228
1229 i_o = prefetch_allocation(i_o, needgc_false, contended_phi_rawmem, 1229 i_o = prefetch_allocation(i_o, needgc_false, contended_phi_rawmem,
1230 old_eden_top, new_eden_top, length); 1230 old_eden_top, new_eden_top, length);
1231
1232 // Name successful fast-path variables
1233 Node* fast_oop = old_eden_top;
1234 Node* fast_oop_ctrl;
1235 Node* fast_oop_rawmem;
1231 1236
1232 // Store (-conditional) the modified eden top back down. 1237 // Store (-conditional) the modified eden top back down.
1233 // StorePConditional produces flags for a test PLUS a modified raw 1238 // StorePConditional produces flags for a test PLUS a modified raw
1234 // memory state. 1239 // memory state.
1235 Node *store_eden_top; 1240 if (UseTLAB) {
1236 Node *fast_oop_ctrl; 1241 Node* store_eden_top =
1237 if( UseTLAB ) { 1242 new (C, 4) StorePNode(needgc_false, contended_phi_rawmem, eden_top_adr,
1238 store_eden_top = new (C, 4) StorePNode( needgc_false, contended_phi_rawmem, eden_top_adr, TypeRawPtr::BOTTOM, new_eden_top ); 1243 TypeRawPtr::BOTTOM, new_eden_top);
1239 transform_later(store_eden_top); 1244 transform_later(store_eden_top);
1240 fast_oop_ctrl = needgc_false; // No contention, so this is the fast path 1245 fast_oop_ctrl = needgc_false; // No contention, so this is the fast path
1246 fast_oop_rawmem = store_eden_top;
1241 } else { 1247 } else {
1242 store_eden_top = new (C, 5) StorePConditionalNode( needgc_false, contended_phi_rawmem, eden_top_adr, new_eden_top, old_eden_top ); 1248 Node* store_eden_top =
1249 new (C, 5) StorePConditionalNode(needgc_false, contended_phi_rawmem, eden_top_adr,
1250 new_eden_top, fast_oop/*old_eden_top*/);
1243 transform_later(store_eden_top); 1251 transform_later(store_eden_top);
1244 Node *contention_check = new (C, 2) BoolNode( store_eden_top, BoolTest::ne ); 1252 Node *contention_check = new (C, 2) BoolNode(store_eden_top, BoolTest::ne);
1245 transform_later(contention_check); 1253 transform_later(contention_check);
1246 store_eden_top = new (C, 1) SCMemProjNode(store_eden_top); 1254 store_eden_top = new (C, 1) SCMemProjNode(store_eden_top);
1247 transform_later(store_eden_top); 1255 transform_later(store_eden_top);
1248 1256
1249 // If not using TLABs, check to see if there was contention. 1257 // If not using TLABs, check to see if there was contention.
1250 IfNode *contention_iff = new (C, 2) IfNode ( needgc_false, contention_check, PROB_MIN, COUNT_UNKNOWN ); 1258 IfNode *contention_iff = new (C, 2) IfNode (needgc_false, contention_check, PROB_MIN, COUNT_UNKNOWN);
1251 transform_later(contention_iff); 1259 transform_later(contention_iff);
1252 Node *contention_true = new (C, 1) IfTrueNode( contention_iff ); 1260 Node *contention_true = new (C, 1) IfTrueNode(contention_iff);
1253 transform_later(contention_true); 1261 transform_later(contention_true);
1254 // If contention, loopback and try again. 1262 // If contention, loopback and try again.
1255 contended_region->init_req( contended_loopback_path, contention_true ); 1263 contended_region->init_req(contended_loopback_path, contention_true);
1256 contended_phi_rawmem->init_req( contended_loopback_path, store_eden_top ); 1264 contended_phi_rawmem->init_req(contended_loopback_path, store_eden_top);
1257 1265
1258 // Fast-path succeeded with no contention! 1266 // Fast-path succeeded with no contention!
1259 Node *contention_false = new (C, 1) IfFalseNode( contention_iff ); 1267 Node *contention_false = new (C, 1) IfFalseNode(contention_iff);
1260 transform_later(contention_false); 1268 transform_later(contention_false);
1261 fast_oop_ctrl = contention_false; 1269 fast_oop_ctrl = contention_false;
1262 } 1270
1263 1271 // Bump total allocated bytes for this thread
1264 // Rename successful fast-path variables to make meaning more obvious 1272 Node* thread = new (C, 1) ThreadLocalNode();
1265 Node* fast_oop = old_eden_top; 1273 transform_later(thread);
1266 Node* fast_oop_rawmem = store_eden_top; 1274 Node* alloc_bytes_adr = basic_plus_adr(top()/*not oop*/, thread,
1275 in_bytes(JavaThread::allocated_bytes_offset()));
1276 Node* alloc_bytes = make_load(fast_oop_ctrl, store_eden_top, alloc_bytes_adr,
1277 0, TypeLong::LONG, T_LONG);
1278 #ifdef _LP64
1279 Node* alloc_size = size_in_bytes;
1280 #else
1281 Node* alloc_size = new (C, 2) ConvI2LNode(size_in_bytes);
1282 transform_later(alloc_size);
1283 #endif
1284 Node* new_alloc_bytes = new (C, 3) AddLNode(alloc_bytes, alloc_size);
1285 transform_later(new_alloc_bytes);
1286 fast_oop_rawmem = make_store(fast_oop_ctrl, store_eden_top, alloc_bytes_adr,
1287 0, new_alloc_bytes, T_LONG);
1288 }
1289
1267 fast_oop_rawmem = initialize_object(alloc, 1290 fast_oop_rawmem = initialize_object(alloc,
1268 fast_oop_ctrl, fast_oop_rawmem, fast_oop, 1291 fast_oop_ctrl, fast_oop_rawmem, fast_oop,
1269 klass_node, length, size_in_bytes); 1292 klass_node, length, size_in_bytes);
1270 1293
1271 if (C->env()->dtrace_extended_probes()) { 1294 if (C->env()->dtrace_extended_probes()) {
1280 Node* thread = new (C, 1) ThreadLocalNode(); 1303 Node* thread = new (C, 1) ThreadLocalNode();
1281 transform_later(thread); 1304 transform_later(thread);
1282 1305
1283 call->init_req(TypeFunc::Parms+0, thread); 1306 call->init_req(TypeFunc::Parms+0, thread);
1284 call->init_req(TypeFunc::Parms+1, fast_oop); 1307 call->init_req(TypeFunc::Parms+1, fast_oop);
1285 call->init_req( TypeFunc::Control, fast_oop_ctrl ); 1308 call->init_req(TypeFunc::Control, fast_oop_ctrl);
1286 call->init_req( TypeFunc::I_O , top() ) ; // does no i/o 1309 call->init_req(TypeFunc::I_O , top()); // does no i/o
1287 call->init_req( TypeFunc::Memory , fast_oop_rawmem ); 1310 call->init_req(TypeFunc::Memory , fast_oop_rawmem);
1288 call->init_req( TypeFunc::ReturnAdr, alloc->in(TypeFunc::ReturnAdr) ); 1311 call->init_req(TypeFunc::ReturnAdr, alloc->in(TypeFunc::ReturnAdr));
1289 call->init_req( TypeFunc::FramePtr, alloc->in(TypeFunc::FramePtr) ); 1312 call->init_req(TypeFunc::FramePtr, alloc->in(TypeFunc::FramePtr));
1290 transform_later(call); 1313 transform_later(call);
1291 fast_oop_ctrl = new (C, 1) ProjNode(call,TypeFunc::Control); 1314 fast_oop_ctrl = new (C, 1) ProjNode(call,TypeFunc::Control);
1292 transform_later(fast_oop_ctrl); 1315 transform_later(fast_oop_ctrl);
1293 fast_oop_rawmem = new (C, 1) ProjNode(call,TypeFunc::Memory); 1316 fast_oop_rawmem = new (C, 1) ProjNode(call,TypeFunc::Memory);
1294 transform_later(fast_oop_rawmem); 1317 transform_later(fast_oop_rawmem);
1295 } 1318 }
1296 1319
1297 // Plug in the successful fast-path into the result merge point 1320 // Plug in the successful fast-path into the result merge point
1298 result_region ->init_req( fast_result_path, fast_oop_ctrl ); 1321 result_region ->init_req(fast_result_path, fast_oop_ctrl);
1299 result_phi_rawoop->init_req( fast_result_path, fast_oop ); 1322 result_phi_rawoop->init_req(fast_result_path, fast_oop);
1300 result_phi_i_o ->init_req( fast_result_path, i_o ); 1323 result_phi_i_o ->init_req(fast_result_path, i_o);
1301 result_phi_rawmem->init_req( fast_result_path, fast_oop_rawmem ); 1324 result_phi_rawmem->init_req(fast_result_path, fast_oop_rawmem);
1302 } else { 1325 } else {
1303 slow_region = ctrl; 1326 slow_region = ctrl;
1304 } 1327 }
1305 1328
1306 // Generate slow-path call 1329 // Generate slow-path call