Mercurial > hg > truffle
annotate src/share/vm/runtime/orderAccess.hpp @ 8804:91bf0bdae37b
8008217: CDS: Class data sharing limits the malloc heap on Solaris
Summary: In 64bit VM move CDS archive address to 32G on all platforms using new flag SharedBaseAddress. In 32bit VM set CDS archive address to 3Gb on Linux and let other OSs pick the address.
Reviewed-by: kvn, dcubed, zgu, hseigel
author | coleenp |
---|---|
date | Wed, 20 Mar 2013 08:04:54 -0400 |
parents | f95d63e2154a |
children | 63a4eb8bcd23 |
rev | line source |
---|---|
0 | 1 /* |
1711
a6bff45449bc
6973570: OrderAccess::storestore() scales poorly on multi-socket x64 and sparc: cache-line ping-ponging
ysr
parents:
1552
diff
changeset
|
2 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. |
0 | 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 * | |
5 * This code is free software; you can redistribute it and/or modify it | |
6 * under the terms of the GNU General Public License version 2 only, as | |
7 * published by the Free Software Foundation. | |
8 * | |
9 * This code is distributed in the hope that it will be useful, but WITHOUT | |
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
12 * version 2 for more details (a copy is included in the LICENSE file that | |
13 * accompanied this code). | |
14 * | |
15 * You should have received a copy of the GNU General Public License version | |
16 * 2 along with this work; if not, write to the Free Software Foundation, | |
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | |
18 * | |
1552
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
844
diff
changeset
|
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
844
diff
changeset
|
20 * or visit www.oracle.com if you need additional information or have any |
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
844
diff
changeset
|
21 * questions. |
0 | 22 * |
23 */ | |
24 | |
1972 | 25 #ifndef SHARE_VM_RUNTIME_ORDERACCESS_HPP |
26 #define SHARE_VM_RUNTIME_ORDERACCESS_HPP | |
27 | |
28 #include "memory/allocation.hpp" | |
29 | |
0 | 30 // Memory Access Ordering Model |
31 // | |
32 // This interface is based on the JSR-133 Cookbook for Compiler Writers | |
33 // and on the IA64 memory model. It is the dynamic equivalent of the | |
34 // C/C++ volatile specifier. I.e., volatility restricts compile-time | |
35 // memory access reordering in a way similar to what we want to occur | |
36 // at runtime. | |
37 // | |
38 // In the following, the terms 'previous', 'subsequent', 'before', | |
605 | 39 // 'after', 'preceding' and 'succeeding' refer to program order. The |
0 | 40 // terms 'down' and 'below' refer to forward load or store motion |
41 // relative to program order, while 'up' and 'above' refer to backward | |
42 // motion. | |
43 // | |
44 // | |
45 // We define four primitive memory barrier operations. | |
46 // | |
47 // LoadLoad: Load1(s); LoadLoad; Load2 | |
48 // | |
49 // Ensures that Load1 completes (obtains the value it loads from memory) | |
50 // before Load2 and any subsequent load operations. Loads before Load1 | |
51 // may *not* float below Load2 and any subsequent load operations. | |
52 // | |
53 // StoreStore: Store1(s); StoreStore; Store2 | |
54 // | |
55 // Ensures that Store1 completes (the effect on memory of Store1 is made | |
56 // visible to other processors) before Store2 and any subsequent store | |
57 // operations. Stores before Store1 may *not* float below Store2 and any | |
58 // subsequent store operations. | |
59 // | |
60 // LoadStore: Load1(s); LoadStore; Store2 | |
61 // | |
62 // Ensures that Load1 completes before Store2 and any subsequent store | |
63 // operations. Loads before Load1 may *not* float below Store2 and any | |
64 // subseqeuent store operations. | |
65 // | |
66 // StoreLoad: Store1(s); StoreLoad; Load2 | |
67 // | |
68 // Ensures that Store1 completes before Load2 and any subsequent load | |
69 // operations. Stores before Store1 may *not* float below Load2 and any | |
70 // subseqeuent load operations. | |
71 // | |
72 // | |
73 // We define two further operations, 'release' and 'acquire'. They are | |
74 // mirror images of each other. | |
75 // | |
76 // Execution by a processor of release makes the effect of all memory | |
77 // accesses issued by it previous to the release visible to all | |
78 // processors *before* the release completes. The effect of subsequent | |
79 // memory accesses issued by it *may* be made visible *before* the | |
80 // release. I.e., subsequent memory accesses may float above the | |
81 // release, but prior ones may not float below it. | |
82 // | |
83 // Execution by a processor of acquire makes the effect of all memory | |
84 // accesses issued by it subsequent to the acquire visible to all | |
85 // processors *after* the acquire completes. The effect of prior memory | |
86 // accesses issued by it *may* be made visible *after* the acquire. | |
87 // I.e., prior memory accesses may float below the acquire, but | |
88 // subsequent ones may not float above it. | |
89 // | |
90 // Finally, we define a 'fence' operation, which conceptually is a | |
91 // release combined with an acquire. In the real world these operations | |
92 // require one or more machine instructions which can float above and | |
93 // below the release or acquire, so we usually can't just issue the | |
94 // release-acquire back-to-back. All machines we know of implement some | |
95 // sort of memory fence instruction. | |
96 // | |
97 // | |
98 // The standalone implementations of release and acquire need an associated | |
99 // dummy volatile store or load respectively. To avoid redundant operations, | |
100 // we can define the composite operators: 'release_store', 'store_fence' and | |
101 // 'load_acquire'. Here's a summary of the machine instructions corresponding | |
102 // to each operation. | |
103 // | |
104 // sparc RMO ia64 x86 | |
105 // --------------------------------------------------------------------- | |
106 // fence membar #LoadStore | mf lock addl 0,(sp) | |
107 // #StoreStore | | |
108 // #LoadLoad | | |
109 // #StoreLoad | |
110 // | |
111 // release membar #LoadStore | st.rel [sp]=r0 movl $0,<dummy> | |
112 // #StoreStore | |
113 // st %g0,[] | |
114 // | |
115 // acquire ld [%sp],%g0 ld.acq <r>=[sp] movl (sp),<r> | |
116 // membar #LoadLoad | | |
117 // #LoadStore | |
118 // | |
119 // release_store membar #LoadStore | st.rel <store> | |
120 // #StoreStore | |
121 // st | |
122 // | |
123 // store_fence st st lock xchg | |
124 // fence mf | |
125 // | |
126 // load_acquire ld ld.acq <load> | |
127 // membar #LoadLoad | | |
128 // #LoadStore | |
129 // | |
130 // Using only release_store and load_acquire, we can implement the | |
131 // following ordered sequences. | |
132 // | |
133 // 1. load, load == load_acquire, load | |
134 // or load_acquire, load_acquire | |
135 // 2. load, store == load, release_store | |
136 // or load_acquire, store | |
137 // or load_acquire, release_store | |
138 // 3. store, store == store, release_store | |
139 // or release_store, release_store | |
140 // | |
141 // These require no membar instructions for sparc-TSO and no extra | |
142 // instructions for ia64. | |
143 // | |
144 // Ordering a load relative to preceding stores requires a store_fence, | |
145 // which implies a membar #StoreLoad between the store and load under | |
146 // sparc-TSO. A fence is required by ia64. On x86, we use locked xchg. | |
147 // | |
148 // 4. store, load == store_fence, load | |
149 // | |
150 // Use store_fence to make sure all stores done in an 'interesting' | |
151 // region are made visible prior to both subsequent loads and stores. | |
152 // | |
153 // Conventional usage is to issue a load_acquire for ordered loads. Use | |
154 // release_store for ordered stores when you care only that prior stores | |
155 // are visible before the release_store, but don't care exactly when the | |
156 // store associated with the release_store becomes visible. Use | |
157 // release_store_fence to update values like the thread state, where we | |
158 // don't want the current thread to continue until all our prior memory | |
159 // accesses (including the new thread state) are visible to other threads. | |
160 // | |
161 // | |
162 // C++ Volatility | |
163 // | |
164 // C++ guarantees ordering at operations termed 'sequence points' (defined | |
165 // to be volatile accesses and calls to library I/O functions). 'Side | |
166 // effects' (defined as volatile accesses, calls to library I/O functions | |
167 // and object modification) previous to a sequence point must be visible | |
168 // at that sequence point. See the C++ standard, section 1.9, titled | |
169 // "Program Execution". This means that all barrier implementations, | |
170 // including standalone loadload, storestore, loadstore, storeload, acquire | |
171 // and release must include a sequence point, usually via a volatile memory | |
172 // access. Other ways to guarantee a sequence point are, e.g., use of | |
173 // indirect calls and linux's __asm__ volatile. | |
1711
a6bff45449bc
6973570: OrderAccess::storestore() scales poorly on multi-socket x64 and sparc: cache-line ping-ponging
ysr
parents:
1552
diff
changeset
|
174 // Note: as of 6973570, we have replaced the originally static "dummy" field |
a6bff45449bc
6973570: OrderAccess::storestore() scales poorly on multi-socket x64 and sparc: cache-line ping-ponging
ysr
parents:
1552
diff
changeset
|
175 // (see above) by a volatile store to the stack. All of the versions of the |
a6bff45449bc
6973570: OrderAccess::storestore() scales poorly on multi-socket x64 and sparc: cache-line ping-ponging
ysr
parents:
1552
diff
changeset
|
176 // compilers that we currently use (SunStudio, gcc and VC++) respect the |
a6bff45449bc
6973570: OrderAccess::storestore() scales poorly on multi-socket x64 and sparc: cache-line ping-ponging
ysr
parents:
1552
diff
changeset
|
177 // semantics of volatile here. If you build HotSpot using other |
a6bff45449bc
6973570: OrderAccess::storestore() scales poorly on multi-socket x64 and sparc: cache-line ping-ponging
ysr
parents:
1552
diff
changeset
|
178 // compilers, you may need to verify that no compiler reordering occurs |
a6bff45449bc
6973570: OrderAccess::storestore() scales poorly on multi-socket x64 and sparc: cache-line ping-ponging
ysr
parents:
1552
diff
changeset
|
179 // across the sequence point respresented by the volatile access. |
0 | 180 // |
181 // | |
182 // os::is_MP Considered Redundant | |
183 // | |
184 // Callers of this interface do not need to test os::is_MP() before | |
185 // issuing an operation. The test is taken care of by the implementation | |
186 // of the interface (depending on the vm version and platform, the test | |
187 // may or may not be actually done by the implementation). | |
188 // | |
189 // | |
190 // A Note on Memory Ordering and Cache Coherency | |
191 // | |
192 // Cache coherency and memory ordering are orthogonal concepts, though they | |
193 // interact. E.g., all existing itanium machines are cache-coherent, but | |
194 // the hardware can freely reorder loads wrt other loads unless it sees a | |
195 // load-acquire instruction. All existing sparc machines are cache-coherent | |
196 // and, unlike itanium, TSO guarantees that the hardware orders loads wrt | |
197 // loads and stores, and stores wrt to each other. | |
198 // | |
199 // Consider the implementation of loadload. *If* your platform *isn't* | |
200 // cache-coherent, then loadload must not only prevent hardware load | |
201 // instruction reordering, but it must *also* ensure that subsequent | |
202 // loads from addresses that could be written by other processors (i.e., | |
203 // that are broadcast by other processors) go all the way to the first | |
204 // level of memory shared by those processors and the one issuing | |
205 // the loadload. | |
206 // | |
207 // So if we have a MP that has, say, a per-processor D$ that doesn't see | |
208 // writes by other processors, and has a shared E$ that does, the loadload | |
209 // barrier would have to make sure that either | |
210 // | |
211 // 1. cache lines in the issuing processor's D$ that contained data from | |
212 // addresses that could be written by other processors are invalidated, so | |
213 // subsequent loads from those addresses go to the E$, (it could do this | |
214 // by tagging such cache lines as 'shared', though how to tell the hardware | |
215 // to do the tagging is an interesting problem), or | |
216 // | |
217 // 2. there never are such cache lines in the issuing processor's D$, which | |
218 // means all references to shared data (however identified: see above) | |
219 // bypass the D$ (i.e., are satisfied from the E$). | |
220 // | |
221 // If your machine doesn't have an E$, substitute 'main memory' for 'E$'. | |
222 // | |
223 // Either of these alternatives is a pain, so no current machine we know of | |
224 // has incoherent caches. | |
225 // | |
226 // If loadload didn't have these properties, the store-release sequence for | |
227 // publishing a shared data structure wouldn't work, because a processor | |
228 // trying to read data newly published by another processor might go to | |
229 // its own incoherent caches to satisfy the read instead of to the newly | |
230 // written shared memory. | |
231 // | |
232 // | |
233 // NOTE WELL!! | |
234 // | |
235 // A Note on MutexLocker and Friends | |
236 // | |
237 // See mutexLocker.hpp. We assume throughout the VM that MutexLocker's | |
238 // and friends' constructors do a fence, a lock and an acquire *in that | |
239 // order*. And that their destructors do a release and unlock, in *that* | |
240 // order. If their implementations change such that these assumptions | |
241 // are violated, a whole lot of code will break. | |
242 | |
243 class OrderAccess : AllStatic { | |
244 public: | |
245 static void loadload(); | |
246 static void storestore(); | |
247 static void loadstore(); | |
248 static void storeload(); | |
249 | |
250 static void acquire(); | |
251 static void release(); | |
252 static void fence(); | |
253 | |
254 static jbyte load_acquire(volatile jbyte* p); | |
255 static jshort load_acquire(volatile jshort* p); | |
256 static jint load_acquire(volatile jint* p); | |
257 static jlong load_acquire(volatile jlong* p); | |
258 static jubyte load_acquire(volatile jubyte* p); | |
259 static jushort load_acquire(volatile jushort* p); | |
260 static juint load_acquire(volatile juint* p); | |
261 static julong load_acquire(volatile julong* p); | |
262 static jfloat load_acquire(volatile jfloat* p); | |
263 static jdouble load_acquire(volatile jdouble* p); | |
264 | |
265 static intptr_t load_ptr_acquire(volatile intptr_t* p); | |
266 static void* load_ptr_acquire(volatile void* p); | |
267 static void* load_ptr_acquire(const volatile void* p); | |
268 | |
269 static void release_store(volatile jbyte* p, jbyte v); | |
270 static void release_store(volatile jshort* p, jshort v); | |
271 static void release_store(volatile jint* p, jint v); | |
272 static void release_store(volatile jlong* p, jlong v); | |
273 static void release_store(volatile jubyte* p, jubyte v); | |
274 static void release_store(volatile jushort* p, jushort v); | |
275 static void release_store(volatile juint* p, juint v); | |
276 static void release_store(volatile julong* p, julong v); | |
277 static void release_store(volatile jfloat* p, jfloat v); | |
278 static void release_store(volatile jdouble* p, jdouble v); | |
279 | |
280 static void release_store_ptr(volatile intptr_t* p, intptr_t v); | |
281 static void release_store_ptr(volatile void* p, void* v); | |
282 | |
283 static void store_fence(jbyte* p, jbyte v); | |
284 static void store_fence(jshort* p, jshort v); | |
285 static void store_fence(jint* p, jint v); | |
286 static void store_fence(jlong* p, jlong v); | |
287 static void store_fence(jubyte* p, jubyte v); | |
288 static void store_fence(jushort* p, jushort v); | |
289 static void store_fence(juint* p, juint v); | |
290 static void store_fence(julong* p, julong v); | |
291 static void store_fence(jfloat* p, jfloat v); | |
292 static void store_fence(jdouble* p, jdouble v); | |
293 | |
294 static void store_ptr_fence(intptr_t* p, intptr_t v); | |
295 static void store_ptr_fence(void** p, void* v); | |
296 | |
297 static void release_store_fence(volatile jbyte* p, jbyte v); | |
298 static void release_store_fence(volatile jshort* p, jshort v); | |
299 static void release_store_fence(volatile jint* p, jint v); | |
300 static void release_store_fence(volatile jlong* p, jlong v); | |
301 static void release_store_fence(volatile jubyte* p, jubyte v); | |
302 static void release_store_fence(volatile jushort* p, jushort v); | |
303 static void release_store_fence(volatile juint* p, juint v); | |
304 static void release_store_fence(volatile julong* p, julong v); | |
305 static void release_store_fence(volatile jfloat* p, jfloat v); | |
306 static void release_store_fence(volatile jdouble* p, jdouble v); | |
307 | |
308 static void release_store_ptr_fence(volatile intptr_t* p, intptr_t v); | |
309 static void release_store_ptr_fence(volatile void* p, void* v); | |
310 | |
671
d0994e5bebce
6822204: volatile fences should prefer lock:addl to actual mfence instructions
never
parents:
605
diff
changeset
|
311 private: |
d0994e5bebce
6822204: volatile fences should prefer lock:addl to actual mfence instructions
never
parents:
605
diff
changeset
|
312 // This is a helper that invokes the StubRoutines::fence_entry() |
d0994e5bebce
6822204: volatile fences should prefer lock:addl to actual mfence instructions
never
parents:
605
diff
changeset
|
313 // routine if it exists, It should only be used by platforms that |
d0994e5bebce
6822204: volatile fences should prefer lock:addl to actual mfence instructions
never
parents:
605
diff
changeset
|
314 // don't another way to do the inline eassembly. |
d0994e5bebce
6822204: volatile fences should prefer lock:addl to actual mfence instructions
never
parents:
605
diff
changeset
|
315 static void StubRoutines_fence(); |
0 | 316 }; |
1972 | 317 |
318 #endif // SHARE_VM_RUNTIME_ORDERACCESS_HPP |