0
|
1 /*
|
|
2 * Copyright 2006-2007 Sun Microsystems, Inc. All Rights Reserved.
|
|
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
4 *
|
|
5 * This code is free software; you can redistribute it and/or modify it
|
|
6 * under the terms of the GNU General Public License version 2 only, as
|
|
7 * published by the Free Software Foundation.
|
|
8 *
|
|
9 * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
12 * version 2 for more details (a copy is included in the LICENSE file that
|
|
13 * accompanied this code).
|
|
14 *
|
|
15 * You should have received a copy of the GNU General Public License version
|
|
16 * 2 along with this work; if not, write to the Free Software Foundation,
|
|
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
18 *
|
|
19 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
|
|
20 * CA 95054 USA or visit www.sun.com if you need additional information or
|
|
21 * have any questions.
|
|
22 *
|
|
23 */
|
|
24
|
|
25 /*
|
|
26 * The NUMA-aware allocator (MutableNUMASpace) is basically a modification
|
|
27 * of MutableSpace which preserves interfaces but implements different
|
|
28 * functionality. The space is split into chunks for each locality group
|
|
29 * (resizing for adaptive size policy is also supported). For each thread
|
|
30 * allocations are performed in the chunk corresponding to the home locality
|
|
31 * group of the thread. Whenever any chunk fills-in the young generation
|
|
32 * collection occurs.
|
|
33 * The chunks can be also be adaptively resized. The idea behind the adaptive
|
|
34 * sizing is to reduce the loss of the space in the eden due to fragmentation.
|
|
35 * The main cause of fragmentation is uneven allocation rates of threads.
|
|
36 * The allocation rate difference between locality groups may be caused either by
|
|
37 * application specifics or by uneven LWP distribution by the OS. Besides,
|
|
38 * application can have less threads then the number of locality groups.
|
|
39 * In order to resize the chunk we measure the allocation rate of the
|
|
40 * application between collections. After that we reshape the chunks to reflect
|
|
41 * the allocation rate pattern. The AdaptiveWeightedAverage exponentially
|
|
42 * decaying average is used to smooth the measurements. The NUMASpaceResizeRate
|
|
43 * parameter is used to control the adaptation speed by restricting the number of
|
|
44 * bytes that can be moved during the adaptation phase.
|
|
45 * Chunks may contain pages from a wrong locality group. The page-scanner has
|
|
46 * been introduced to address the problem. Remote pages typically appear due to
|
|
47 * the memory shortage in the target locality group. Besides Solaris would
|
|
48 * allocate a large page from the remote locality group even if there are small
|
|
49 * local pages available. The page-scanner scans the pages right after the
|
|
50 * collection and frees remote pages in hope that subsequent reallocation would
|
|
51 * be more successful. This approach proved to be useful on systems with high
|
|
52 * load where multiple processes are competing for the memory.
|
|
53 */
|
|
54
|
|
55 class MutableNUMASpace : public MutableSpace {
|
|
56 friend class VMStructs;
|
|
57
|
|
58 class LGRPSpace : public CHeapObj {
|
|
59 int _lgrp_id;
|
|
60 MutableSpace* _space;
|
|
61 MemRegion _invalid_region;
|
|
62 AdaptiveWeightedAverage *_alloc_rate;
|
|
63
|
|
64 struct SpaceStats {
|
|
65 size_t _local_space, _remote_space, _unbiased_space, _uncommited_space;
|
|
66 size_t _large_pages, _small_pages;
|
|
67
|
|
68 SpaceStats() {
|
|
69 _local_space = 0;
|
|
70 _remote_space = 0;
|
|
71 _unbiased_space = 0;
|
|
72 _uncommited_space = 0;
|
|
73 _large_pages = 0;
|
|
74 _small_pages = 0;
|
|
75 }
|
|
76 };
|
|
77
|
|
78 SpaceStats _space_stats;
|
|
79
|
|
80 char* _last_page_scanned;
|
|
81 char* last_page_scanned() { return _last_page_scanned; }
|
|
82 void set_last_page_scanned(char* p) { _last_page_scanned = p; }
|
|
83 public:
|
|
84 LGRPSpace(int l) : _lgrp_id(l), _last_page_scanned(NULL) {
|
|
85 _space = new MutableSpace();
|
|
86 _alloc_rate = new AdaptiveWeightedAverage(NUMAChunkResizeWeight);
|
|
87 }
|
|
88 ~LGRPSpace() {
|
|
89 delete _space;
|
|
90 delete _alloc_rate;
|
|
91 }
|
|
92
|
|
93 void add_invalid_region(MemRegion r) {
|
|
94 if (!_invalid_region.is_empty()) {
|
|
95 _invalid_region.set_start(MIN2(_invalid_region.start(), r.start()));
|
|
96 _invalid_region.set_end(MAX2(_invalid_region.end(), r.end()));
|
|
97 } else {
|
|
98 _invalid_region = r;
|
|
99 }
|
|
100 }
|
|
101
|
|
102 static bool equals(void* lgrp_id_value, LGRPSpace* p) {
|
|
103 return *(int*)lgrp_id_value == p->lgrp_id();
|
|
104 }
|
|
105
|
|
106 void sample() {
|
|
107 alloc_rate()->sample(space()->used_in_bytes());
|
|
108 }
|
|
109
|
|
110 MemRegion invalid_region() const { return _invalid_region; }
|
|
111 void set_invalid_region(MemRegion r) { _invalid_region = r; }
|
|
112 int lgrp_id() const { return _lgrp_id; }
|
|
113 MutableSpace* space() const { return _space; }
|
|
114 AdaptiveWeightedAverage* alloc_rate() const { return _alloc_rate; }
|
|
115 SpaceStats* space_stats() { return &_space_stats; }
|
|
116 void clear_space_stats() { _space_stats = SpaceStats(); }
|
|
117
|
|
118 void accumulate_statistics(size_t page_size);
|
|
119 void scan_pages(size_t page_size, size_t page_count);
|
|
120 };
|
|
121
|
|
122 GrowableArray<LGRPSpace*>* _lgrp_spaces;
|
|
123 size_t _page_size;
|
|
124 unsigned _adaptation_cycles, _samples_count;
|
|
125
|
|
126 void set_page_size(size_t psz) { _page_size = psz; }
|
|
127 size_t page_size() const { return _page_size; }
|
|
128
|
|
129 unsigned adaptation_cycles() { return _adaptation_cycles; }
|
|
130 void set_adaptation_cycles(int v) { _adaptation_cycles = v; }
|
|
131
|
|
132 unsigned samples_count() { return _samples_count; }
|
|
133 void increment_samples_count() { ++_samples_count; }
|
|
134
|
|
135 size_t _base_space_size;
|
|
136 void set_base_space_size(size_t v) { _base_space_size = v; }
|
|
137 size_t base_space_size() const { return _base_space_size; }
|
|
138
|
|
139 // Check if the NUMA topology has changed. Add and remove spaces if needed.
|
|
140 // The update can be forced by setting the force parameter equal to true.
|
|
141 bool update_layout(bool force);
|
141
|
142 // Bias region towards the lgrp.
|
|
143 void bias_region(MemRegion mr, int lgrp_id);
|
0
|
144 // Free pages in a given region.
|
|
145 void free_region(MemRegion mr);
|
|
146 // Get current chunk size.
|
|
147 size_t current_chunk_size(int i);
|
|
148 // Get default chunk size (equally divide the space).
|
|
149 size_t default_chunk_size();
|
|
150 // Adapt the chunk size to follow the allocation rate.
|
|
151 size_t adaptive_chunk_size(int i, size_t limit);
|
|
152 // Scan and free invalid pages.
|
|
153 void scan_pages(size_t page_count);
|
|
154 // Return the bottom_region and the top_region. Align them to page_size() boundary.
|
|
155 // |------------------new_region---------------------------------|
|
|
156 // |----bottom_region--|---intersection---|------top_region------|
|
|
157 void select_tails(MemRegion new_region, MemRegion intersection,
|
|
158 MemRegion* bottom_region, MemRegion *top_region);
|
|
159 // Try to merge the invalid region with the bottom or top region by decreasing
|
|
160 // the intersection area. Return the invalid_region aligned to the page_size()
|
|
161 // boundary if it's inside the intersection. Return non-empty invalid_region
|
|
162 // if it lies inside the intersection (also page-aligned).
|
|
163 // |------------------new_region---------------------------------|
|
|
164 // |----------------|-------invalid---|--------------------------|
|
|
165 // |----bottom_region--|---intersection---|------top_region------|
|
|
166 void merge_regions(MemRegion new_region, MemRegion* intersection,
|
|
167 MemRegion *invalid_region);
|
|
168
|
|
169 public:
|
|
170 GrowableArray<LGRPSpace*>* lgrp_spaces() const { return _lgrp_spaces; }
|
|
171 MutableNUMASpace();
|
|
172 virtual ~MutableNUMASpace();
|
|
173 // Space initialization.
|
|
174 virtual void initialize(MemRegion mr, bool clear_space);
|
|
175 // Update space layout if necessary. Do all adaptive resizing job.
|
|
176 virtual void update();
|
|
177 // Update allocation rate averages.
|
|
178 virtual void accumulate_statistics();
|
|
179
|
|
180 virtual void clear();
|
|
181 virtual void mangle_unused_area();
|
|
182 virtual void ensure_parsability();
|
|
183 virtual size_t used_in_words() const;
|
|
184 virtual size_t free_in_words() const;
|
|
185 virtual size_t tlab_capacity(Thread* thr) const;
|
|
186 virtual size_t unsafe_max_tlab_alloc(Thread* thr) const;
|
|
187
|
|
188 // Allocation (return NULL if full)
|
|
189 virtual HeapWord* allocate(size_t word_size);
|
|
190 virtual HeapWord* cas_allocate(size_t word_size);
|
|
191
|
|
192 // Debugging
|
|
193 virtual void print_on(outputStream* st) const;
|
|
194 virtual void print_short_on(outputStream* st) const;
|
|
195 virtual void verify(bool allow_dirty) const;
|
|
196
|
|
197 virtual void set_top(HeapWord* value);
|
|
198 };
|