mali_kernel_utilization.c 12 KB


  1. /*
  2. * This confidential and proprietary software may be used only as
  3. * authorised by a licensing agreement from ARM Limited
  4. * (C) COPYRIGHT 2010-2013 ARM Limited
  5. * ALL RIGHTS RESERVED
  6. * The entire notice above must be reproduced on all authorised
  7. * copies and copies may only be made to the extent permitted
  8. * by a licensing agreement from ARM Limited.
  9. */
  10. #include "mali_kernel_utilization.h"
  11. #include "mali_osk.h"
  12. #include "mali_osk_mali.h"
  13. #include "mali_kernel_common.h"
  14. #include "mali_session.h"
  15. #include "mali_scheduler.h"
  16. /* Thresholds for GP bound detection. */
  17. #define MALI_GP_BOUND_GP_UTILIZATION_THRESHOLD 240
  18. #define MALI_GP_BOUND_PP_UTILIZATION_THRESHOLD 250
  19. /* Define how often to calculate and report GPU utilization, in milliseconds */
  20. static _mali_osk_spinlock_irq_t *time_data_lock;
  21. static u32 num_running_gp_cores;
  22. static u32 num_running_pp_cores;
  23. static u64 work_start_time_gpu = 0;
  24. static u64 work_start_time_gp = 0;
  25. static u64 work_start_time_pp = 0;
  26. static u64 accumulated_work_time_gpu = 0;
  27. static u64 accumulated_work_time_gp = 0;
  28. static u64 accumulated_work_time_pp = 0;
  29. static u64 period_start_time = 0;
  30. static _mali_osk_timer_t *utilization_timer = NULL;
  31. static mali_bool timer_running = MALI_FALSE;
  32. static u32 last_utilization_gpu = 0 ;
  33. static u32 last_utilization_gp = 0 ;
  34. static u32 last_utilization_pp = 0 ;
  35. static u32 mali_utilization_timeout = 1000;
  36. void (*mali_utilization_callback)(struct mali_gpu_utilization_data *data) = NULL;
  37. #if defined(CONFIG_MALI400_POWER_PERFORMANCE_POLICY)
  38. extern void mali_power_performance_policy_callback(struct mali_gpu_utilization_data *data);
  39. #define NUMBER_OF_NANOSECONDS_PER_SECOND 1000000000ULL
  40. static u32 calculate_window_render_fps(u64 time_period)
  41. {
  42. u32 max_window_number;
  43. u64 tmp;
  44. u64 max = time_period;
  45. u32 leading_zeroes;
  46. u32 shift_val;
  47. u32 time_period_shift;
  48. u32 max_window_number_shift;
  49. u32 ret_val;
  50. max_window_number = mali_session_max_window_num();
  51. /* To avoid float division, extend the dividend to ns unit */
  52. tmp = (u64)max_window_number * NUMBER_OF_NANOSECONDS_PER_SECOND;
  53. if (tmp > time_period) {
  54. max = tmp;
  55. }
  56. /*
  57. * We may have 64-bit values, a dividend or a divisor or both
  58. * To avoid dependencies to a 64-bit divider, we shift down the two values
  59. * equally first.
  60. */
  61. leading_zeroes = _mali_osk_clz((u32)(max >> 32));
  62. shift_val = 32 - leading_zeroes;
  63. time_period_shift = (u32)(time_period >> shift_val);
  64. max_window_number_shift = (u32)(tmp >> shift_val);
  65. ret_val = max_window_number_shift / time_period_shift;
  66. return ret_val;
  67. }
  68. #endif /* defined(CONFIG_MALI400_POWER_PERFORMANCE_POLICY) */
  69. static void calculate_gpu_utilization(void* arg)
  70. {
  71. u64 time_now;
  72. u64 time_period;
  73. u32 leading_zeroes;
  74. u32 shift_val;
  75. u32 work_normalized_gpu;
  76. u32 work_normalized_gp;
  77. u32 work_normalized_pp;
  78. u32 period_normalized;
  79. u32 utilization_gpu;
  80. u32 utilization_gp;
  81. u32 utilization_pp;
  82. #if defined(CONFIG_MALI400_POWER_PERFORMANCE_POLICY)
  83. u32 window_render_fps;
  84. #endif
  85. _mali_osk_spinlock_irq_lock(time_data_lock);
  86. if (accumulated_work_time_gpu == 0 && work_start_time_gpu == 0) {
  87. /*
  88. * No work done for this period
  89. * - No need to reschedule timer
  90. * - Report zero usage
  91. */
  92. timer_running = MALI_FALSE;
  93. last_utilization_gpu = 0;
  94. last_utilization_gp = 0;
  95. last_utilization_pp = 0;
  96. _mali_osk_spinlock_irq_unlock(time_data_lock);
  97. if (NULL != mali_utilization_callback) {
  98. struct mali_gpu_utilization_data data = { 0, };
  99. mali_utilization_callback(&data);
  100. }
  101. mali_scheduler_hint_disable(MALI_SCHEDULER_HINT_GP_BOUND);
  102. return;
  103. }
  104. time_now = _mali_osk_time_get_ns();
  105. time_period = time_now - period_start_time;
  106. /* If we are currently busy, update working period up to now */
  107. if (work_start_time_gpu != 0) {
  108. accumulated_work_time_gpu += (time_now - work_start_time_gpu);
  109. work_start_time_gpu = time_now;
  110. /* GP and/or PP will also be busy if the GPU is busy at this point */
  111. if (work_start_time_gp != 0) {
  112. accumulated_work_time_gp += (time_now - work_start_time_gp);
  113. work_start_time_gp = time_now;
  114. }
  115. if (work_start_time_pp != 0) {
  116. accumulated_work_time_pp += (time_now - work_start_time_pp);
  117. work_start_time_pp = time_now;
  118. }
  119. }
  120. /*
  121. * We have two 64-bit values, a dividend and a divisor.
  122. * To avoid dependencies to a 64-bit divider, we shift down the two values
  123. * equally first.
  124. * We shift the dividend up and possibly the divisor down, making the result X in 256.
  125. */
  126. /* Shift the 64-bit values down so they fit inside a 32-bit integer */
  127. leading_zeroes = _mali_osk_clz((u32)(time_period >> 32));
  128. shift_val = 32 - leading_zeroes;
  129. work_normalized_gpu = (u32)(accumulated_work_time_gpu >> shift_val);
  130. work_normalized_gp = (u32)(accumulated_work_time_gp >> shift_val);
  131. work_normalized_pp = (u32)(accumulated_work_time_pp >> shift_val);
  132. period_normalized = (u32)(time_period >> shift_val);
  133. /*
  134. * Now, we should report the usage in parts of 256
  135. * this means we must shift up the dividend or down the divisor by 8
  136. * (we could do a combination, but we just use one for simplicity,
  137. * but the end result should be good enough anyway)
  138. */
  139. if (period_normalized > 0x00FFFFFF) {
  140. /* The divisor is so big that it is safe to shift it down */
  141. period_normalized >>= 8;
  142. } else {
  143. /*
  144. * The divisor is so small that we can shift up the dividend, without loosing any data.
  145. * (dividend is always smaller than the divisor)
  146. */
  147. work_normalized_gpu <<= 8;
  148. work_normalized_gp <<= 8;
  149. work_normalized_pp <<= 8;
  150. }
  151. utilization_gpu = work_normalized_gpu / period_normalized;
  152. utilization_gp = work_normalized_gp / period_normalized;
  153. utilization_pp = work_normalized_pp / period_normalized;
  154. #if defined(CONFIG_MALI400_POWER_PERFORMANCE_POLICY)
  155. window_render_fps = calculate_window_render_fps(time_period);
  156. #endif
  157. last_utilization_gpu = utilization_gpu;
  158. last_utilization_gp = utilization_gp;
  159. last_utilization_pp = utilization_pp;
  160. if ((MALI_GP_BOUND_GP_UTILIZATION_THRESHOLD < last_utilization_gp) &&
  161. (MALI_GP_BOUND_PP_UTILIZATION_THRESHOLD > last_utilization_pp)) {
  162. mali_scheduler_hint_enable(MALI_SCHEDULER_HINT_GP_BOUND);
  163. } else {
  164. mali_scheduler_hint_disable(MALI_SCHEDULER_HINT_GP_BOUND);
  165. }
  166. /* starting a new period */
  167. accumulated_work_time_gpu = 0;
  168. accumulated_work_time_gp = 0;
  169. accumulated_work_time_pp = 0;
  170. period_start_time = time_now;
  171. _mali_osk_spinlock_irq_unlock(time_data_lock);
  172. _mali_osk_timer_add(utilization_timer, _mali_osk_time_mstoticks(mali_utilization_timeout));
  173. if (NULL != mali_utilization_callback) {
  174. struct mali_gpu_utilization_data data = {
  175. utilization_gpu, utilization_gp, utilization_pp,
  176. #if defined(CONFIG_MALI400_POWER_PERFORMANCE_POLICY)
  177. window_render_fps, window_render_fps
  178. #endif
  179. };
  180. mali_utilization_callback(&data);
  181. }
  182. }
  183. _mali_osk_errcode_t mali_utilization_init(void)
  184. {
  185. #if USING_GPU_UTILIZATION
  186. struct _mali_osk_device_data data;
  187. if (_MALI_OSK_ERR_OK == _mali_osk_device_data_get(&data)) {
  188. /* Use device specific settings (if defined) */
  189. if (0 != data.utilization_interval) {
  190. mali_utilization_timeout = data.utilization_interval;
  191. }
  192. if (NULL != data.utilization_callback) {
  193. mali_utilization_callback = data.utilization_callback;
  194. MALI_DEBUG_PRINT(2, ("Mali GPU Utilization: Platform has it's own policy \n"));
  195. MALI_DEBUG_PRINT(2, ("Mali GPU Utilization: Utilization handler installed with interval %u\n", mali_utilization_timeout));
  196. }
  197. }
  198. #endif
  199. #if defined(CONFIG_MALI400_POWER_PERFORMANCE_POLICY)
  200. if (mali_utilization_callback == NULL) {
  201. MALI_DEBUG_PRINT(2, ("Mali GPU Utilization: MALI Power Performance Policy Algorithm \n"));
  202. mali_utilization_callback = mali_power_performance_policy_callback;
  203. }
  204. #endif
  205. if (NULL == mali_utilization_callback) {
  206. MALI_DEBUG_PRINT(2, ("Mali GPU Utilization: No utilization handler installed\n"));
  207. }
  208. time_data_lock = _mali_osk_spinlock_irq_init(_MALI_OSK_LOCKFLAG_ORDERED, _MALI_OSK_LOCK_ORDER_UTILIZATION);
  209. if (NULL == time_data_lock) {
  210. return _MALI_OSK_ERR_FAULT;
  211. }
  212. num_running_gp_cores = 0;
  213. num_running_pp_cores = 0;
  214. utilization_timer = _mali_osk_timer_init();
  215. if (NULL == utilization_timer) {
  216. _mali_osk_spinlock_irq_term(time_data_lock);
  217. return _MALI_OSK_ERR_FAULT;
  218. }
  219. _mali_osk_timer_setcallback(utilization_timer, calculate_gpu_utilization, NULL);
  220. return _MALI_OSK_ERR_OK;
  221. }
  222. void mali_utilization_suspend(void)
  223. {
  224. _mali_osk_spinlock_irq_lock(time_data_lock);
  225. if (timer_running == MALI_TRUE) {
  226. timer_running = MALI_FALSE;
  227. _mali_osk_spinlock_irq_unlock(time_data_lock);
  228. _mali_osk_timer_del(utilization_timer);
  229. return;
  230. }
  231. _mali_osk_spinlock_irq_unlock(time_data_lock);
  232. }
  233. void mali_utilization_term(void)
  234. {
  235. if (NULL != utilization_timer) {
  236. _mali_osk_timer_del(utilization_timer);
  237. timer_running = MALI_FALSE;
  238. _mali_osk_timer_term(utilization_timer);
  239. utilization_timer = NULL;
  240. }
  241. _mali_osk_spinlock_irq_term(time_data_lock);
  242. }
  243. void mali_utilization_gp_start(void)
  244. {
  245. _mali_osk_spinlock_irq_lock(time_data_lock);
  246. ++num_running_gp_cores;
  247. if (1 == num_running_gp_cores) {
  248. u64 time_now = _mali_osk_time_get_ns();
  249. /* First GP core started, consider GP busy from now and onwards */
  250. work_start_time_gp = time_now;
  251. if (0 == num_running_pp_cores) {
  252. /*
  253. * There are no PP cores running, so this is also the point
  254. * at which we consider the GPU to be busy as well.
  255. */
  256. work_start_time_gpu = time_now;
  257. }
  258. /* Start a new period (and timer) if needed */
  259. if (timer_running != MALI_TRUE) {
  260. timer_running = MALI_TRUE;
  261. period_start_time = time_now;
  262. /* Clear session->number_of_window_jobs */
  263. #if defined(CONFIG_MALI400_POWER_PERFORMANCE_POLICY)
  264. mali_session_max_window_num();
  265. #endif
  266. _mali_osk_spinlock_irq_unlock(time_data_lock);
  267. _mali_osk_timer_add(utilization_timer, _mali_osk_time_mstoticks(mali_utilization_timeout));
  268. } else {
  269. _mali_osk_spinlock_irq_unlock(time_data_lock);
  270. }
  271. } else {
  272. /* Nothing to do */
  273. _mali_osk_spinlock_irq_unlock(time_data_lock);
  274. }
  275. }
  276. void mali_utilization_pp_start(void)
  277. {
  278. _mali_osk_spinlock_irq_lock(time_data_lock);
  279. ++num_running_pp_cores;
  280. if (1 == num_running_pp_cores) {
  281. u64 time_now = _mali_osk_time_get_ns();
  282. /* First PP core started, consider PP busy from now and onwards */
  283. work_start_time_pp = time_now;
  284. if (0 == num_running_gp_cores) {
  285. /*
  286. * There are no GP cores running, so this is also the point
  287. * at which we consider the GPU to be busy as well.
  288. */
  289. work_start_time_gpu = time_now;
  290. }
  291. /* Start a new period (and timer) if needed */
  292. if (timer_running != MALI_TRUE) {
  293. timer_running = MALI_TRUE;
  294. period_start_time = time_now;
  295. /* Clear session->number_of_window_jobs */
  296. #if defined(CONFIG_MALI400_POWER_PERFORMANCE_POLICY)
  297. mali_session_max_window_num();
  298. #endif
  299. _mali_osk_spinlock_irq_unlock(time_data_lock);
  300. _mali_osk_timer_add(utilization_timer, _mali_osk_time_mstoticks(mali_utilization_timeout));
  301. } else {
  302. _mali_osk_spinlock_irq_unlock(time_data_lock);
  303. }
  304. } else {
  305. /* Nothing to do */
  306. _mali_osk_spinlock_irq_unlock(time_data_lock);
  307. }
  308. }
  309. void mali_utilization_gp_end(void)
  310. {
  311. _mali_osk_spinlock_irq_lock(time_data_lock);
  312. --num_running_gp_cores;
  313. if (0 == num_running_gp_cores) {
  314. u64 time_now = _mali_osk_time_get_ns();
  315. /* Last GP core ended, consider GP idle from now and onwards */
  316. accumulated_work_time_gp += (time_now - work_start_time_gp);
  317. work_start_time_gp = 0;
  318. if (0 == num_running_pp_cores) {
  319. /*
  320. * There are no PP cores running, so this is also the point
  321. * at which we consider the GPU to be idle as well.
  322. */
  323. accumulated_work_time_gpu += (time_now - work_start_time_gpu);
  324. work_start_time_gpu = 0;
  325. }
  326. }
  327. _mali_osk_spinlock_irq_unlock(time_data_lock);
  328. }
  329. void mali_utilization_pp_end(void)
  330. {
  331. _mali_osk_spinlock_irq_lock(time_data_lock);
  332. --num_running_pp_cores;
  333. if (0 == num_running_pp_cores) {
  334. u64 time_now = _mali_osk_time_get_ns();
  335. /* Last PP core ended, consider PP idle from now and onwards */
  336. accumulated_work_time_pp += (time_now - work_start_time_pp);
  337. work_start_time_pp = 0;
  338. if (0 == num_running_gp_cores) {
  339. /*
  340. * There are no GP cores running, so this is also the point
  341. * at which we consider the GPU to be idle as well.
  342. */
  343. accumulated_work_time_gpu += (time_now - work_start_time_gpu);
  344. work_start_time_gpu = 0;
  345. }
  346. }
  347. _mali_osk_spinlock_irq_unlock(time_data_lock);
  348. }
  349. u32 _mali_ukk_utilization_gp_pp(void)
  350. {
  351. return last_utilization_gpu;
  352. }
  353. u32 _mali_ukk_utilization_gp(void)
  354. {
  355. return last_utilization_gp;
  356. }
  357. u32 _mali_ukk_utilization_pp(void)
  358. {
  359. return last_utilization_pp;
  360. }