123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439 |
- /*
- * This confidential and proprietary software may be used only as
- * authorised by a licensing agreement from ARM Limited
- * (C) COPYRIGHT 2010-2013 ARM Limited
- * ALL RIGHTS RESERVED
- * The entire notice above must be reproduced on all authorised
- * copies and copies may only be made to the extent permitted
- * by a licensing agreement from ARM Limited.
- */
- #include "mali_kernel_utilization.h"
- #include "mali_osk.h"
- #include "mali_osk_mali.h"
- #include "mali_kernel_common.h"
- #include "mali_session.h"
- #include "mali_scheduler.h"
- /* Thresholds for GP bound detection. */
- #define MALI_GP_BOUND_GP_UTILIZATION_THRESHOLD 240
- #define MALI_GP_BOUND_PP_UTILIZATION_THRESHOLD 250
- /* Define how often to calculate and report GPU utilization, in milliseconds */
- static _mali_osk_spinlock_irq_t *time_data_lock;
- static u32 num_running_gp_cores;
- static u32 num_running_pp_cores;
- static u64 work_start_time_gpu = 0;
- static u64 work_start_time_gp = 0;
- static u64 work_start_time_pp = 0;
- static u64 accumulated_work_time_gpu = 0;
- static u64 accumulated_work_time_gp = 0;
- static u64 accumulated_work_time_pp = 0;
- static u64 period_start_time = 0;
- static _mali_osk_timer_t *utilization_timer = NULL;
- static mali_bool timer_running = MALI_FALSE;
- static u32 last_utilization_gpu = 0 ;
- static u32 last_utilization_gp = 0 ;
- static u32 last_utilization_pp = 0 ;
- static u32 mali_utilization_timeout = 1000;
- void (*mali_utilization_callback)(struct mali_gpu_utilization_data *data) = NULL;
- #if defined(CONFIG_MALI400_POWER_PERFORMANCE_POLICY)
- extern void mali_power_performance_policy_callback(struct mali_gpu_utilization_data *data);
- #define NUMBER_OF_NANOSECONDS_PER_SECOND 1000000000ULL
- static u32 calculate_window_render_fps(u64 time_period)
- {
- u32 max_window_number;
- u64 tmp;
- u64 max = time_period;
- u32 leading_zeroes;
- u32 shift_val;
- u32 time_period_shift;
- u32 max_window_number_shift;
- u32 ret_val;
- max_window_number = mali_session_max_window_num();
- /* To avoid float division, extend the dividend to ns unit */
- tmp = (u64)max_window_number * NUMBER_OF_NANOSECONDS_PER_SECOND;
- if (tmp > time_period) {
- max = tmp;
- }
- /*
- * We may have 64-bit values, a dividend or a divisor or both
- * To avoid dependencies to a 64-bit divider, we shift down the two values
- * equally first.
- */
- leading_zeroes = _mali_osk_clz((u32)(max >> 32));
- shift_val = 32 - leading_zeroes;
- time_period_shift = (u32)(time_period >> shift_val);
- max_window_number_shift = (u32)(tmp >> shift_val);
- ret_val = max_window_number_shift / time_period_shift;
- return ret_val;
- }
- #endif /* defined(CONFIG_MALI400_POWER_PERFORMANCE_POLICY) */
- static void calculate_gpu_utilization(void* arg)
- {
- u64 time_now;
- u64 time_period;
- u32 leading_zeroes;
- u32 shift_val;
- u32 work_normalized_gpu;
- u32 work_normalized_gp;
- u32 work_normalized_pp;
- u32 period_normalized;
- u32 utilization_gpu;
- u32 utilization_gp;
- u32 utilization_pp;
- #if defined(CONFIG_MALI400_POWER_PERFORMANCE_POLICY)
- u32 window_render_fps;
- #endif
- _mali_osk_spinlock_irq_lock(time_data_lock);
- if (accumulated_work_time_gpu == 0 && work_start_time_gpu == 0) {
- /*
- * No work done for this period
- * - No need to reschedule timer
- * - Report zero usage
- */
- timer_running = MALI_FALSE;
- last_utilization_gpu = 0;
- last_utilization_gp = 0;
- last_utilization_pp = 0;
- _mali_osk_spinlock_irq_unlock(time_data_lock);
- if (NULL != mali_utilization_callback) {
- struct mali_gpu_utilization_data data = { 0, };
- mali_utilization_callback(&data);
- }
- mali_scheduler_hint_disable(MALI_SCHEDULER_HINT_GP_BOUND);
- return;
- }
- time_now = _mali_osk_time_get_ns();
- time_period = time_now - period_start_time;
- /* If we are currently busy, update working period up to now */
- if (work_start_time_gpu != 0) {
- accumulated_work_time_gpu += (time_now - work_start_time_gpu);
- work_start_time_gpu = time_now;
- /* GP and/or PP will also be busy if the GPU is busy at this point */
- if (work_start_time_gp != 0) {
- accumulated_work_time_gp += (time_now - work_start_time_gp);
- work_start_time_gp = time_now;
- }
- if (work_start_time_pp != 0) {
- accumulated_work_time_pp += (time_now - work_start_time_pp);
- work_start_time_pp = time_now;
- }
- }
- /*
- * We have two 64-bit values, a dividend and a divisor.
- * To avoid dependencies to a 64-bit divider, we shift down the two values
- * equally first.
- * We shift the dividend up and possibly the divisor down, making the result X in 256.
- */
- /* Shift the 64-bit values down so they fit inside a 32-bit integer */
- leading_zeroes = _mali_osk_clz((u32)(time_period >> 32));
- shift_val = 32 - leading_zeroes;
- work_normalized_gpu = (u32)(accumulated_work_time_gpu >> shift_val);
- work_normalized_gp = (u32)(accumulated_work_time_gp >> shift_val);
- work_normalized_pp = (u32)(accumulated_work_time_pp >> shift_val);
- period_normalized = (u32)(time_period >> shift_val);
- /*
- * Now, we should report the usage in parts of 256
- * this means we must shift up the dividend or down the divisor by 8
- * (we could do a combination, but we just use one for simplicity,
- * but the end result should be good enough anyway)
- */
- if (period_normalized > 0x00FFFFFF) {
- /* The divisor is so big that it is safe to shift it down */
- period_normalized >>= 8;
- } else {
- /*
- * The divisor is so small that we can shift up the dividend, without loosing any data.
- * (dividend is always smaller than the divisor)
- */
- work_normalized_gpu <<= 8;
- work_normalized_gp <<= 8;
- work_normalized_pp <<= 8;
- }
- utilization_gpu = work_normalized_gpu / period_normalized;
- utilization_gp = work_normalized_gp / period_normalized;
- utilization_pp = work_normalized_pp / period_normalized;
- #if defined(CONFIG_MALI400_POWER_PERFORMANCE_POLICY)
- window_render_fps = calculate_window_render_fps(time_period);
- #endif
- last_utilization_gpu = utilization_gpu;
- last_utilization_gp = utilization_gp;
- last_utilization_pp = utilization_pp;
- if ((MALI_GP_BOUND_GP_UTILIZATION_THRESHOLD < last_utilization_gp) &&
- (MALI_GP_BOUND_PP_UTILIZATION_THRESHOLD > last_utilization_pp)) {
- mali_scheduler_hint_enable(MALI_SCHEDULER_HINT_GP_BOUND);
- } else {
- mali_scheduler_hint_disable(MALI_SCHEDULER_HINT_GP_BOUND);
- }
- /* starting a new period */
- accumulated_work_time_gpu = 0;
- accumulated_work_time_gp = 0;
- accumulated_work_time_pp = 0;
- period_start_time = time_now;
- _mali_osk_spinlock_irq_unlock(time_data_lock);
- _mali_osk_timer_add(utilization_timer, _mali_osk_time_mstoticks(mali_utilization_timeout));
- if (NULL != mali_utilization_callback) {
- struct mali_gpu_utilization_data data = {
- utilization_gpu, utilization_gp, utilization_pp,
- #if defined(CONFIG_MALI400_POWER_PERFORMANCE_POLICY)
- window_render_fps, window_render_fps
- #endif
- };
- mali_utilization_callback(&data);
- }
- }
- _mali_osk_errcode_t mali_utilization_init(void)
- {
- #if USING_GPU_UTILIZATION
- struct _mali_osk_device_data data;
- if (_MALI_OSK_ERR_OK == _mali_osk_device_data_get(&data)) {
- /* Use device specific settings (if defined) */
- if (0 != data.utilization_interval) {
- mali_utilization_timeout = data.utilization_interval;
- }
- if (NULL != data.utilization_callback) {
- mali_utilization_callback = data.utilization_callback;
- MALI_DEBUG_PRINT(2, ("Mali GPU Utilization: Platform has it's own policy \n"));
- MALI_DEBUG_PRINT(2, ("Mali GPU Utilization: Utilization handler installed with interval %u\n", mali_utilization_timeout));
- }
- }
- #endif
- #if defined(CONFIG_MALI400_POWER_PERFORMANCE_POLICY)
- if (mali_utilization_callback == NULL) {
- MALI_DEBUG_PRINT(2, ("Mali GPU Utilization: MALI Power Performance Policy Algorithm \n"));
- mali_utilization_callback = mali_power_performance_policy_callback;
- }
- #endif
- if (NULL == mali_utilization_callback) {
- MALI_DEBUG_PRINT(2, ("Mali GPU Utilization: No utilization handler installed\n"));
- }
- time_data_lock = _mali_osk_spinlock_irq_init(_MALI_OSK_LOCKFLAG_ORDERED, _MALI_OSK_LOCK_ORDER_UTILIZATION);
- if (NULL == time_data_lock) {
- return _MALI_OSK_ERR_FAULT;
- }
- num_running_gp_cores = 0;
- num_running_pp_cores = 0;
- utilization_timer = _mali_osk_timer_init();
- if (NULL == utilization_timer) {
- _mali_osk_spinlock_irq_term(time_data_lock);
- return _MALI_OSK_ERR_FAULT;
- }
- _mali_osk_timer_setcallback(utilization_timer, calculate_gpu_utilization, NULL);
- return _MALI_OSK_ERR_OK;
- }
- void mali_utilization_suspend(void)
- {
- _mali_osk_spinlock_irq_lock(time_data_lock);
- if (timer_running == MALI_TRUE) {
- timer_running = MALI_FALSE;
- _mali_osk_spinlock_irq_unlock(time_data_lock);
- _mali_osk_timer_del(utilization_timer);
- return;
- }
- _mali_osk_spinlock_irq_unlock(time_data_lock);
- }
- void mali_utilization_term(void)
- {
- if (NULL != utilization_timer) {
- _mali_osk_timer_del(utilization_timer);
- timer_running = MALI_FALSE;
- _mali_osk_timer_term(utilization_timer);
- utilization_timer = NULL;
- }
- _mali_osk_spinlock_irq_term(time_data_lock);
- }
- void mali_utilization_gp_start(void)
- {
- _mali_osk_spinlock_irq_lock(time_data_lock);
- ++num_running_gp_cores;
- if (1 == num_running_gp_cores) {
- u64 time_now = _mali_osk_time_get_ns();
- /* First GP core started, consider GP busy from now and onwards */
- work_start_time_gp = time_now;
- if (0 == num_running_pp_cores) {
- /*
- * There are no PP cores running, so this is also the point
- * at which we consider the GPU to be busy as well.
- */
- work_start_time_gpu = time_now;
- }
- /* Start a new period (and timer) if needed */
- if (timer_running != MALI_TRUE) {
- timer_running = MALI_TRUE;
- period_start_time = time_now;
- /* Clear session->number_of_window_jobs */
- #if defined(CONFIG_MALI400_POWER_PERFORMANCE_POLICY)
- mali_session_max_window_num();
- #endif
- _mali_osk_spinlock_irq_unlock(time_data_lock);
- _mali_osk_timer_add(utilization_timer, _mali_osk_time_mstoticks(mali_utilization_timeout));
- } else {
- _mali_osk_spinlock_irq_unlock(time_data_lock);
- }
- } else {
- /* Nothing to do */
- _mali_osk_spinlock_irq_unlock(time_data_lock);
- }
- }
- void mali_utilization_pp_start(void)
- {
- _mali_osk_spinlock_irq_lock(time_data_lock);
- ++num_running_pp_cores;
- if (1 == num_running_pp_cores) {
- u64 time_now = _mali_osk_time_get_ns();
- /* First PP core started, consider PP busy from now and onwards */
- work_start_time_pp = time_now;
- if (0 == num_running_gp_cores) {
- /*
- * There are no GP cores running, so this is also the point
- * at which we consider the GPU to be busy as well.
- */
- work_start_time_gpu = time_now;
- }
- /* Start a new period (and timer) if needed */
- if (timer_running != MALI_TRUE) {
- timer_running = MALI_TRUE;
- period_start_time = time_now;
- /* Clear session->number_of_window_jobs */
- #if defined(CONFIG_MALI400_POWER_PERFORMANCE_POLICY)
- mali_session_max_window_num();
- #endif
- _mali_osk_spinlock_irq_unlock(time_data_lock);
- _mali_osk_timer_add(utilization_timer, _mali_osk_time_mstoticks(mali_utilization_timeout));
- } else {
- _mali_osk_spinlock_irq_unlock(time_data_lock);
- }
- } else {
- /* Nothing to do */
- _mali_osk_spinlock_irq_unlock(time_data_lock);
- }
- }
- void mali_utilization_gp_end(void)
- {
- _mali_osk_spinlock_irq_lock(time_data_lock);
- --num_running_gp_cores;
- if (0 == num_running_gp_cores) {
- u64 time_now = _mali_osk_time_get_ns();
- /* Last GP core ended, consider GP idle from now and onwards */
- accumulated_work_time_gp += (time_now - work_start_time_gp);
- work_start_time_gp = 0;
- if (0 == num_running_pp_cores) {
- /*
- * There are no PP cores running, so this is also the point
- * at which we consider the GPU to be idle as well.
- */
- accumulated_work_time_gpu += (time_now - work_start_time_gpu);
- work_start_time_gpu = 0;
- }
- }
- _mali_osk_spinlock_irq_unlock(time_data_lock);
- }
- void mali_utilization_pp_end(void)
- {
- _mali_osk_spinlock_irq_lock(time_data_lock);
- --num_running_pp_cores;
- if (0 == num_running_pp_cores) {
- u64 time_now = _mali_osk_time_get_ns();
- /* Last PP core ended, consider PP idle from now and onwards */
- accumulated_work_time_pp += (time_now - work_start_time_pp);
- work_start_time_pp = 0;
- if (0 == num_running_gp_cores) {
- /*
- * There are no GP cores running, so this is also the point
- * at which we consider the GPU to be idle as well.
- */
- accumulated_work_time_gpu += (time_now - work_start_time_gpu);
- work_start_time_gpu = 0;
- }
- }
- _mali_osk_spinlock_irq_unlock(time_data_lock);
- }
- u32 _mali_ukk_utilization_gp_pp(void)
- {
- return last_utilization_gpu;
- }
- u32 _mali_ukk_utilization_gp(void)
- {
- return last_utilization_gp;
- }
- u32 _mali_ukk_utilization_pp(void)
- {
- return last_utilization_pp;
- }
|