c++ - pthread and multicore on windows -
my question relates pthread library , making use of multicore system. system seems improve under proper parameters , small data sizes, improvement being around 65000. data suggests when increase threads begins decrease time takes increases shortly afterward. when thread number = 1,2,4 might increase , 8, 16 time begins decreasing again. in large data sizes there no improvement , times remain close together. if tell me if forcing threads act sequentially or issue awesome.
heres data
1395525080 0 num thread: 1 data size: 1024 0 1395525080 1395525080 0 num thread: 2 data size: 1024 0 1395525080 1395525080 0 num thread: 4 data size: 1024 0 1395525080 1395525080 15 num thread: 8 data size: 1024 0 1395525080 1395525080 47 num thread: 16 data size: 1024 0 1395525080 1395525080 31 num thread: 32 data size: 1024 0 1395525080 1395525080 16 num thread: 1 data size: 4096 0 1395525080 1395525080 0 num thread: 2 data size: 4096 0 1395525080 1395525080 0 num thread: 4 data size: 4096 0 1395525080 1395525080 15 num thread: 8 data size: 4096 0 1395525080 1395525080 78 num thread: 16 data size: 4096 0 1395525080 1395525080 31 num thread: 32 data size: 4096 0 1395525080 1395525080 140 num thread: 1 data size: 65536 0 1395525080 1395525081 156 num thread: 2 data size: 65536 0 1395525081 1395525081 109 num thread: 4 data size: 65536 0 1395525081 1395525081 94 num thread: 8 data size: 65536 0 1395525081 1395525081 93 num thread: 16 data size: 65536 0 1395525081 1395525081 187 num thread: 32 data size: 65536 0 1395525082 1395525082 171 num thread: 1 data size: 75536 0 1395525082 1395525082 172 num thread: 2 data size: 75536 0 1395525082 1395525082 141 num thread: 4 data size: 75536 0 1395525083 1395525083 109 num thread: 8 data size: 75536 0 1395525083 1395525083 140 num thread: 16 data size: 75536 0 1395525083 1395525083 234 num thread: 32 data size: 75536 0 1395525084 1395525084 203 num thread: 1 data size: 85536 0 1395525084 1395525084 203 num thread: 2 data size: 85536 0 1395525084 1395525084 172 num thread: 4 data size: 85536 0 1395525085 1395525085 202 num thread: 8 data size: 85536 0 1395525085 1395525085 125 num thread: 16 data size: 85536 0 1395525085 1395525085 187 num thread: 32 data size: 85536 0 1395525086 1395525086 125 num thread: 1 data size: 55536 0 1395525086 1395525086 109 num thread: 2 data size: 55536 0 1395525086 1395525086 141 num thread: 4 data size: 55536 0 1395525086 1395525086 78 num thread: 8 data size: 55536 0 1395525086 1395525087 140 num thread: 16 data size: 55536 0 1395525087 1395525087 156 num thread: 32 data size: 55536 0 1395525087 1395525120 153271 num thread: 1 data size: 70000000 153 1395525274 1395525398 152630 num thread: 2 data size: 70000000 152 1395525551 1395525675 154846 num thread: 4 data size: 70000000 154 1395525830 1395525956 153988 num thread: 8 data size: 70000000 153 1395526110 1395526236 153956 num thread: 16 data size: 70000000 153 1395526390 1395526515 157935 num thread: 32 data size: 70000000 157 1395526673
heres code, traditional bucket sort, have 2 other similar ones similar data bucket sorts, sequential code generates same values.
struct bucket { std::vector<int> data; } ; void *sort_bucket(void *unsorted_bucket); int _tmain(int argc, _tchar* argv[]) { int array_n[] = {1024, 4096, 65536,75536,85536,55536, 70000000, 16777216}; int array_number_of_threads[] = {1, 2, 4, 8, 16, 32}; std::vector<int> n; std::vector<int> number_of_threads; number_of_threads.assign(array_number_of_threads, array_number_of_threads+6); n.assign(array_n, array_n+7); for(int size_index = 0; size_index < n.size(); size_index++) { for(int thread_index = 0; thread_index < number_of_threads.size(); thread_index++) { std::vector<int> unsorted_data; std::vector<int> sorted_data; std::vector<std::thread> thread_array; std::vector<bucket> buckets; std::vector<pthread_t> thread; while(buckets.size() < number_of_threads[thread_index]){ // checks against number of threads , creates number of buckets bucket new_bucket; pthread_t new_thread; buckets.push_back(new_bucket); thread.push_back(new_thread); } for(int index = 0; index < n[size_index]; index++) // gathers data { unsorted_data.push_back(rand() % n[size_index]); } clock_t t = 0; t = clock(); time_t start = 0; time_t end = 0; time(&start); std::cout << start << " "; int difference = n[size_index]/number_of_threads[thread_index]; int placeholder = 0; for(int index = 0; index < n[size_index]; index++) {//calculates bucket data belong in , places data in bucket //std::cout << unsorted_data[index] << " " << difference << " "; placeholder = unsorted_data[index]/difference; //std::cout << placeholder << std::endl; buckets[placeholder].data.push_back(unsorted_data[index]); } for(int index = 0; index < number_of_threads[thread_index]; index++){ // sends data threads //thread_array.push_back(std::thread(sort_bucket ,buckets[index])); pthread_create(&thread[index], null, sort_bucket , (void*) &buckets[index].data); } // bring data root process for(int index = 0; index < number_of_threads[thread_index]; index++) { void *data; struct bucket *ret_bucket; pthread_join(thread[index],(void**) &data); ret_bucket = (struct bucket *) data; sorted_data.insert(sorted_data.end(), ret_bucket->data.begin(), ret_bucket->data.end()); //sorted_data.assign(ret_bucket->data.begin(), ret_bucket->data.end()); } /* for(int index = 0; index < sorted_data.size(); index++) { std::cout << sorted_data[index] << " "; } */ t = clock() - t; std::cout << t << " "; t = t/clocks_per_sec; std::cout << "num thread: " << number_of_threads[thread_index] << " "; std::cout << "data size: " << n[size_index] << " "; std::cout << t << " "; time(&end); std::cout << end << std::endl; sort(unsorted_data.begin(), unsorted_data.end()); for(int index = 0; index < unsorted_data.size(); index++) { if(unsorted_data[index] != sorted_data[index]) { std::cout << "data sorting failed" << std::endl; } } } } int placeholder; std::cin >> placeholder; return 0; } void *sort_bucket(void *unsorted_bucket) { bucket *temp_sorted_bucket = (struct bucket *) unsorted_bucket; std::sort(temp_sorted_bucket->data.begin(), temp_sorted_bucket->data.end()); /*for(int index = 0; index < temp_sorted_bucket->data.size(); index++) { std::cout << temp_sorted_bucket->data.at(index) << " "; }*/ pthread_exit(temp_sorted_bucket); return 0; }
remember threads limited number of physical cores on cpu. once hit limit, must use resources switch between threads on same core, takes time. example, i3 processor has 2 physical cores hyperthreading provides 4 virtual cores on cpu, past 4 threads result in no benefit.
Comments
Post a Comment