/* * Copyright (c) 2024 Intel Corporation * * SPDX-License-Identifier: Apache-2.0 */ /* * @file * This file contains tests that will measure the length of time required * to add and remove threads from a wait queue that holds a varying number of * threads. Each thread added to (and removed from) the wait queue is a dummy * thread. As these dummy threads are inherently non-executable, this helps * prevent the addition/removal of threads to/from the ready queue from being * included in these measurements. Furthermore, the use of dummy threads helps * reduce the memory footprint as not only are thread stacks not required, * but we also do not need the full k_thread structure for each of these * dummy threads. */ #include #include #include #include "utils.h" #include #include #include #include uint32_t tm_off; static struct _thread_base dummy_thread[CONFIG_BENCHMARK_NUM_THREADS]; static _wait_q_t wait_q; uint64_t add_cycles[CONFIG_BENCHMARK_NUM_THREADS]; uint64_t remove_cycles[CONFIG_BENCHMARK_NUM_THREADS]; /** * Initialize each dummy thread. */ static void dummy_threads_init(unsigned int num_threads) { unsigned int i; unsigned int bucket_size; bucket_size = (num_threads / CONFIG_NUM_PREEMPT_PRIORITIES) + 1; for (i = 0; i < num_threads; i++) { z_init_thread_base(&dummy_thread[i], i / bucket_size, _THREAD_DUMMY, 0); } } static void cycles_reset(unsigned int num_threads) { unsigned int i; for (i = 0; i < num_threads; i++) { add_cycles[i] = 0ULL; remove_cycles[i] = 0ULL; } } /** * Each successive dummy thread added to the wait queue is either of the * same or lower priority. Each dummy thread removed from the wait queue * is of the same or lower priority than the one previous. */ static void test_decreasing_priority(_wait_q_t *q, unsigned int num_threads) { unsigned int i; timing_t start; timing_t finish; /* Add to tail of wait queue */ for (i = 0; i < num_threads; i++) { start = timing_counter_get(); z_pend_thread((struct k_thread *)&dummy_thread[i], q, K_FOREVER); finish = timing_counter_get(); add_cycles[i] += timing_cycles_get(&start, &finish); } /* Remove from head of wait queue */ for (i = 0; i < num_threads; i++) { start = timing_counter_get(); z_unpend_thread((struct k_thread *)&dummy_thread[i]); finish = timing_counter_get(); remove_cycles[i] += timing_cycles_get(&start, &finish); } } static void test_increasing_priority(_wait_q_t *q, unsigned int num_threads) { unsigned int i; timing_t start; timing_t finish; struct k_thread *thread; /* Add to head of wait queue */ for (i = 0; i < num_threads; i++) { start = timing_counter_get(); thread = (struct k_thread *) &dummy_thread[num_threads - i - 1]; z_pend_thread(thread, q, K_FOREVER); finish = timing_counter_get(); add_cycles[i] += timing_cycles_get(&start, &finish); } /* Remove from tail of wait queue */ for (i = 0; i < num_threads; i++) { start = timing_counter_get(); thread = (struct k_thread *) &dummy_thread[num_threads - i - 1]; z_unpend_thread(thread); finish = timing_counter_get(); remove_cycles[i] += timing_cycles_get(&start, &finish); } } static uint64_t sqrt_u64(uint64_t square) { if (square > 1) { uint64_t lo = sqrt_u64(square >> 2) << 1; uint64_t hi = lo + 1; return ((hi * hi) > square) ? lo : hi; } return square; } static void compute_and_report_stats(unsigned int num_threads, unsigned int num_iterations, uint64_t *cycles, const char *tag, const char *str) { uint64_t minimum = cycles[0]; uint64_t maximum = cycles[0]; uint64_t total = cycles[0]; uint64_t average; uint64_t std_dev = 0; uint64_t tmp; uint64_t diff; unsigned int i; for (i = 1; i < num_threads; i++) { if (cycles[i] > maximum) { maximum = cycles[i]; } if (cycles[i] < minimum) { minimum = cycles[i]; } total += cycles[i]; } minimum /= (uint64_t)num_iterations; maximum /= (uint64_t)num_iterations; average = total / (num_threads * num_iterations); /* Calculate standard deviation */ for (i = 0; i < num_threads; i++) { tmp = cycles[i] / num_iterations; diff = (average > tmp) ? (average - tmp) : (tmp - average); std_dev += (diff * diff); } std_dev /= num_threads; std_dev = sqrt_u64(std_dev); #ifdef CONFIG_BENCHMARK_RECORDING int tag_len = strlen(tag); int descr_len = strlen(str); int stag_len = strlen(".stddev"); int sdescr_len = strlen(", stddev."); stag_len = (tag_len + stag_len < 40) ? 40 - tag_len : stag_len; sdescr_len = (descr_len + sdescr_len < 50) ? 50 - descr_len : sdescr_len; printk("REC: %s%-*s - %s%-*s : %7llu cycles , %7u ns :\n", tag, stag_len, ".min", str, sdescr_len, ", min.", minimum, (uint32_t)timing_cycles_to_ns(minimum)); printk("REC: %s%-*s - %s%-*s : %7llu cycles , %7u ns :\n", tag, stag_len, ".max", str, sdescr_len, ", max.", maximum, (uint32_t)timing_cycles_to_ns(maximum)); printk("REC: %s%-*s - %s%-*s : %7llu cycles , %7u ns :\n", tag, stag_len, ".avg", str, sdescr_len, ", avg.", average, (uint32_t)timing_cycles_to_ns(average)); printk("REC: %s%-*s - %s%-*s : %7llu cycles , %7u ns :\n", tag, stag_len, ".stddev", str, sdescr_len, ", stddev.", std_dev, (uint32_t)timing_cycles_to_ns(std_dev)); #else ARG_UNUSED(tag); printk("------------------------------------\n"); printk("%s\n", str); printk(" Minimum : %7llu cycles (%7u nsec)\n", minimum, (uint32_t)timing_cycles_to_ns(minimum)); printk(" Maximum : %7llu cycles (%7u nsec)\n", maximum, (uint32_t)timing_cycles_to_ns(maximum)); printk(" Average : %7llu cycles (%7u nsec)\n", average, (uint32_t)timing_cycles_to_ns(average)); printk(" Std Deviation: %7llu cycles (%7u nsec)\n", std_dev, (uint32_t)timing_cycles_to_ns(std_dev)); #endif } int main(void) { unsigned int i; unsigned int freq; #ifdef CONFIG_BENCHMARK_VERBOSE char description[120]; char tag[50]; struct k_thread *thread; #endif timing_init(); bench_test_init(); freq = timing_freq_get_mhz(); printk("Time Measurements for %s wait queues\n", IS_ENABLED(CONFIG_WAITQ_DUMB) ? "dumb" : "scalable"); printk("Timing results: Clock frequency: %u MHz\n", freq); z_waitq_init(&wait_q); dummy_threads_init(CONFIG_BENCHMARK_NUM_THREADS); timing_start(); cycles_reset(CONFIG_BENCHMARK_NUM_THREADS); for (i = 0; i < CONFIG_BENCHMARK_NUM_ITERATIONS; i++) { test_decreasing_priority(&wait_q, CONFIG_BENCHMARK_NUM_THREADS); } compute_and_report_stats(CONFIG_BENCHMARK_NUM_THREADS, CONFIG_BENCHMARK_NUM_ITERATIONS, add_cycles, "sched.add.thread.WaitQ_tail", "Add threads of decreasing priority"); #ifdef CONFIG_BENCHMARK_VERBOSE for (i = 0; i < CONFIG_BENCHMARK_NUM_THREADS; i++) { snprintf(tag, sizeof(tag), "WaitQ.add.to.tail.%04u.waiters", i); snprintf(description, sizeof(description), "%-40s - Add thread of priority %u", tag, dummy_thread[i].prio); PRINT_STATS_AVG(description, (uint32_t)add_cycles[i], CONFIG_BENCHMARK_NUM_ITERATIONS); } #endif compute_and_report_stats(CONFIG_BENCHMARK_NUM_THREADS, CONFIG_BENCHMARK_NUM_ITERATIONS, remove_cycles, "sched.remove.thread.WaitQ_head", "Remove threads of decreasing priority"); #ifdef CONFIG_BENCHMARK_VERBOSE for (i = 0; i < CONFIG_BENCHMARK_NUM_THREADS; i++) { snprintf(tag, sizeof(tag), "WaitQ.remove.from.head.%04u.waiters", CONFIG_BENCHMARK_NUM_THREADS - i); snprintf(description, sizeof(description), "%-40s - Remove thread of priority %u", tag, dummy_thread[i].prio); PRINT_STATS_AVG(description, (uint32_t)remove_cycles[i], CONFIG_BENCHMARK_NUM_ITERATIONS); } #endif cycles_reset(CONFIG_BENCHMARK_NUM_THREADS); for (i = 0; i < CONFIG_BENCHMARK_NUM_ITERATIONS; i++) { test_increasing_priority(&wait_q, CONFIG_BENCHMARK_NUM_THREADS); } compute_and_report_stats(CONFIG_BENCHMARK_NUM_THREADS, CONFIG_BENCHMARK_NUM_ITERATIONS, add_cycles, "sched.add.thread.WaitQ_head", "Add threads of increasing priority"); #ifdef CONFIG_BENCHMARK_VERBOSE for (i = 0; i < CONFIG_BENCHMARK_NUM_THREADS; i++) { snprintf(tag, sizeof(tag), "WaitQ.add.to.head.%04u.waiters", i); thread = (struct k_thread *) &dummy_thread[CONFIG_BENCHMARK_NUM_THREADS - i - 1]; snprintf(description, sizeof(description), "%-40s - Add priority %u to waitq", tag, thread->base.prio); PRINT_STATS_AVG(description, (uint32_t)add_cycles[i], CONFIG_BENCHMARK_NUM_ITERATIONS); } #endif compute_and_report_stats(CONFIG_BENCHMARK_NUM_THREADS, CONFIG_BENCHMARK_NUM_ITERATIONS, remove_cycles, "sched.remove.thread.WaitQ_tail", "Remove threads of increasing priority"); #ifdef CONFIG_BENCHMARK_VERBOSE for (i = 0; i < CONFIG_BENCHMARK_NUM_THREADS; i++) { snprintf(tag, sizeof(tag), "WaitQ.remove.from.tail.%04u.waiters", CONFIG_BENCHMARK_NUM_THREADS - i); thread = (struct k_thread *) &dummy_thread[CONFIG_BENCHMARK_NUM_THREADS - i - 1]; snprintf(description, sizeof(description), "%-40s - Remove priority %u from waitq", tag, thread->base.prio); PRINT_STATS_AVG(description, (uint32_t)remove_cycles[i], CONFIG_BENCHMARK_NUM_ITERATIONS); } #endif timing_stop(); TC_END_REPORT(0); return 0; }