graal-compiler: src/gpu/hsail/vm/gpu_hsail

comparison src/gpu/hsail/vm/gpu_hsail_Tlab.hpp @ 16795:a29e6e7b7a86

Replace hsail donor threads with hsail tlabs

author	Tom Rodriguez <tom.rodriguez@oracle.com>
date	Tue, 12 Aug 2014 16:30:17 -0700
parents	f1d1ec9bcf24
children	82e5b5ccdb0c

comparison

equal deleted inserted replaced

-:74c02c90a3f9
+:a29e6e7b7a86
 HeapWord* _start;
 HeapWord* _top;
 HeapWord* _end;
 HeapWord* _last_good_top;
 HeapWord* _original_top;
-JavaThread* _donor_thread;         // donor thread associated with this tlabInfo
+ThreadLocalAllocBuffer* _tlab;      // tlab associated with this tlabInfo
 HSAILAllocationInfo* _alloc_info;   // same as what is in HSAILDeoptimizationInfo
 // Accessors
 HeapWord* start() { return _start; }
 HeapWord* top() { return _top; }
 HeapWord* end() { return _end; }
 HeapWord* last_good_top() { return _last_good_top; }
 HeapWord* original_top() { return _original_top; }
-void initialize(HeapWord* start, HeapWord* top, HeapWord* end, JavaThread* donorThread, HSAILAllocationInfo* allocInfo) {
+ThreadLocalAllocBuffer* tlab() { return _tlab; }
+void initialize(HeapWord* start, HeapWord* top, HeapWord* end, ThreadLocalAllocBuffer* tlab, HSAILAllocationInfo* allocInfo) {
 _start = start;
 _top = _original_top = top;
 _end = end;
-_donor_thread = donorThread;
+_tlab = tlab;
 _alloc_info = allocInfo;
 }
 };
 class HSAILAllocationInfo : public CHeapObj<mtInternal> {
 friend class VMStructs;
 private:
-JavaThread** donorThreads;
+jint   _num_tlabs;
-jint _num_donor_threads;
+size_t _tlab_align_reserve_bytes;         // filled in from ThreadLocalAllocBuffer::alignment_reserve_in_bytes()
-size_t _tlab_align_reserve_bytes;    // filled in from ThreadLocalAllocBuffer::alignment_reserve_in_bytes()
+HSAILTlabInfo** _cur_tlab_infos;          // array of current tlab info pointers, one per num_tlabs
-HSAILTlabInfo** _cur_tlab_infos;    // array of current tlab info pointers, one per donor_thread
 HSAILTlabInfo* _tlab_infos_pool_start;    // pool for new tlab_infos
 HSAILTlabInfo* _tlab_infos_pool_next;     // where next will be allocated from
 HSAILTlabInfo* _tlab_infos_pool_end;      // where next will be allocated from
 public:
-HSAILAllocationInfo(jobject donor_threads_jobj, int dimX, int allocBytesPerWorkitem) {
+HSAILAllocationInfo(jint num_tlabs, int dimX, int allocBytesPerWorkitem) {
-// fill in the donorThreads array
+_num_tlabs = num_tlabs;
-objArrayOop donorThreadObjects = (objArrayOop) JNIHandles::resolve(donor_threads_jobj);
+// if this thread doesn't have gpu_hsail_tlabs allocated yet, do so now
-_num_donor_threads = donorThreadObjects->length();
+JavaThread* thread = JavaThread::current();
-guarantee(_num_donor_threads > 0, "need at least one donor thread");
+if (thread->get_gpu_hsail_tlabs_count() == 0) {
-donorThreads = NEW_C_HEAP_ARRAY(JavaThread*, _num_donor_threads, mtInternal);
+thread->initialize_gpu_hsail_tlabs(num_tlabs);
-for (int i = 0; i < _num_donor_threads; i++) {
+if (TraceGPUInteraction) {
-donorThreads[i] = java_lang_Thread::thread(donorThreadObjects->obj_at(i));
+for (int i = 0; i < num_tlabs; i++) {
-}
+ThreadLocalAllocBuffer* tlab = thread->get_gpu_hsail_tlab_at(i);
+tty->print("initialized gpu_hsail_tlab %d at %p -> ", i, tlab);
+printTlabInfoFromThread(tlab);
+}
+}
+}
 // Compute max_tlab_infos based on amount of free heap space
 size_t max_tlab_infos;
 {
-JavaThread* donorThread = donorThreads[0];
+ThreadLocalAllocBuffer* tlab = &thread->tlab();
-ThreadLocalAllocBuffer* tlab = &donorThread->tlab();
 size_t new_tlab_size = tlab->compute_size(0);
-size_t heap_bytes_free = Universe::heap()->unsafe_max_tlab_alloc(donorThread);
+size_t heap_bytes_free = Universe::heap()->unsafe_max_tlab_alloc(thread);
 if (new_tlab_size != 0) {
-max_tlab_infos = MIN2(heap_bytes_free / new_tlab_size, (size_t)(64 * _num_donor_threads));
+max_tlab_infos = MIN2(heap_bytes_free / new_tlab_size, (size_t)(64 * _num_tlabs));
 } else {
-max_tlab_infos = 8 * _num_donor_threads;   // an arbitrary multiple
+max_tlab_infos = 8 * _num_tlabs;   // an arbitrary multiple
 }
 if (TraceGPUInteraction) {
 tty->print_cr("heapFree = %ld, newTlabSize=%ld, tlabInfos allocated = %ld", heap_bytes_free, new_tlab_size, max_tlab_infos);
 }
 }
-_cur_tlab_infos = NEW_C_HEAP_ARRAY(HSAILTlabInfo*, _num_donor_threads, mtInternal);
+_cur_tlab_infos = NEW_C_HEAP_ARRAY(HSAILTlabInfo*, _num_tlabs, mtInternal);
 _tlab_infos_pool_start = NEW_C_HEAP_ARRAY(HSAILTlabInfo, max_tlab_infos, mtInternal);
-_tlab_infos_pool_next = &_tlab_infos_pool_start[_num_donor_threads];
+_tlab_infos_pool_next = &_tlab_infos_pool_start[_num_tlabs];
 _tlab_infos_pool_end = &_tlab_infos_pool_start[max_tlab_infos];
 _tlab_align_reserve_bytes = ThreadLocalAllocBuffer::alignment_reserve_in_bytes();
-// we will fill the first N tlabInfos from the donor threads
+// we will fill the first N tlabInfos from the gpu_hsail_tlabs
-for (int i = 0; i < _num_donor_threads; i++) {
+for (int i = 0; i < _num_tlabs; i++) {
-JavaThread* donorThread = donorThreads[i];
+ThreadLocalAllocBuffer* tlab = thread->get_gpu_hsail_tlab_at(i);
-ThreadLocalAllocBuffer* tlab = &donorThread->tlab();
+if (TraceGPUInteraction) {
-if (TraceGPUInteraction) {
+tty->print("gpu_hsail_tlab %d at %p -> ", i, tlab);
-tty->print("donorThread %d, is %p, tlab at %p -> ", i, donorThread, tlab);
 printTlabInfoFromThread(tlab);
 }
 // Here we try to get a new tlab if current one is null. Note:
 // eventually we may want to test if the size is too small based
 // allocate, but for now we can just let it overflow and let the
 // GPU allocate new tlabs. Actually, if we can't prime a tlab
 // here, it might make sense to do a gc now rather than to start
 // the kernel and have it deoptimize.  How to do that?
 if (tlab->end() == NULL) {
-bool success = getNewTlabForDonorThread(tlab, i);
+bool success = getNewGpuHsailTlab(tlab);
 if (TraceGPUInteraction) {
 if (success) {
-tty->print("donorThread %d, refilled tlab, -> ", i);
+tty->print("gpu_hsail_tlab %d, refilled tlab, -> ", i);
 printTlabInfoFromThread(tlab);
 } else {
-tty->print("donorThread %d, could not refill tlab, left as ", i);
+tty->print("gpu_hsail_tlab %d, could not refill tlab, left as ", i);
 printTlabInfoFromThread(tlab);
 }
 }
 }
 // extract the necessary tlab fields into a TlabInfo record
 HSAILTlabInfo* pTlabInfo = &_tlab_infos_pool_start[i];
 _cur_tlab_infos[i] = pTlabInfo;
-pTlabInfo->initialize(tlab->start(), tlab->top(), tlab->end(), donorThread, this);
+pTlabInfo->initialize(tlab->start(), tlab->top(), tlab->end(), tlab, this);
-// reset the real tlab fields to zero so we are sure the thread doesn't use it
-tlab->set_start(NULL);
-tlab->set_top(NULL);
-tlab->set_pf_top(NULL);
-tlab->set_end(NULL);
 }
 }
 ~HSAILAllocationInfo() {
 FREE_C_HEAP_ARRAY(HSAILTlabInfo*, _cur_tlab_infos, mtInternal);
 FREE_C_HEAP_ARRAY(HSAILTlabInfo, _tlab_infos_pool_start, mtInternal);
-FREE_C_HEAP_ARRAY(JavaThread*, donorThreads, mtInternal);
 }
 void postKernelCleanup() {
 // go thru all the tlabInfos, fix up any tlab tops that overflowed
 // complete the tlabs if they overflowed
-// update the donor threads tlabs when appropriate
+// update the gpu_hsail_tlabs when appropriate
 bool anyOverflows = false;
 size_t bytesAllocated = 0;
 // if there was an overflow in allocating tlabInfos, correct it here
 if (_tlab_infos_pool_next > _tlab_infos_pool_end) {
 if (TraceGPUInteraction) {
 for (HSAILTlabInfo* tlabInfo = _tlab_infos_pool_start; tlabInfo < _tlab_infos_pool_next; tlabInfo++) {
 if (TraceGPUInteraction) {
 tty->print_cr("postprocess tlabInfo %p, start=%p, top=%p, end=%p, last_good_top=%p", tlabInfo,
 tlabInfo->start(), tlabInfo->top(), tlabInfo->end(), tlabInfo->last_good_top());
 }
-JavaThread* donorThread = tlabInfo->_donor_thread;
+ThreadLocalAllocBuffer* tlab = tlabInfo->tlab();
-ThreadLocalAllocBuffer* tlab = &donorThread->tlab();
 bool overflowed = false;
 // if a tlabInfo has NULL fields, i.e. we could not prime it on entry,
 // or we could not get a tlab from the gpu, so ignore tlabInfo here
 if (tlabInfo->start() != NULL) {
 if (tlabInfo->top() > tlabInfo->end()) {
 anyOverflows = true;
 overflowed = true;
 if (TraceGPUInteraction) {
 long overflowAmount = (long) tlabInfo->top() - (long) tlabInfo->last_good_top();
-tty->print_cr("tlabInfo %p (donorThread = %p) overflowed by %ld bytes, setting last good top to %p", tlabInfo, donorThread, overflowAmount, tlabInfo->last_good_top());
+tty->print_cr("tlabInfo %p (tlab = %p) overflowed by %ld bytes, setting last good top to %p", tlabInfo, tlab, overflowAmount, tlabInfo->last_good_top());
 }
 tlabInfo->_top = tlabInfo->last_good_top();
 }
-// if the donor thread allocated anything while we were running
+// fill the gpu_hsail_tlab with the tlabInfo information
-// we will retire its tlab before overwriting with our new one
-if (tlab->top() != NULL) {
-if (TraceGPUInteraction) {
-tty->print("Donor Thread allocated new tlab");
-printTlabInfoFromThread(tlab);
-}
-tlab->make_parsable(true);
-}
-// fill the donor thread tlab with the tlabInfo information
 // we do this even if it will get overwritten by a later tlabinfo
-// because it helps with tlab statistics for that donor thread
+// because it helps with tlab statistics for that tlab
 tlab->fill(tlabInfo->start(), tlabInfo->top(), (tlabInfo->end() - tlabInfo->start()) + tlab->alignment_reserve());
 // if there was an overflow, make it parsable with retire = true
 if (overflowed) {
 tlab->make_parsable(true);
 }
 private:
 // fill and retire old tlab and get a new one
 // if we can't get one, no problem someone will eventually do a gc
-bool getNewTlabForDonorThread(ThreadLocalAllocBuffer* tlab, int idx) {
+bool getNewGpuHsailTlab(ThreadLocalAllocBuffer* tlab) {
 tlab->clear_before_allocation();    // fill and retire old tlab (will also check for null)
 // get a size for a new tlab that is based on the desired_size
 size_t new_tlab_size = tlab->compute_size(0);

Mercurial > hg > graal-compiler

comparison src/gpu/hsail/vm/gpu_hsail_Tlab.hpp @ 16795:a29e6e7b7a86