udivsi3.S 1.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960
  1. /* SPDX-License-Identifier: GPL-2.0 */
  2. .global __udivsi3
  3. .section .text..SHmedia32,"ax"
  4. .align 2
  5. /*
  6. inputs: r4,r5
  7. clobbered: r18,r19,r20,r21,r22,r25,tr0
  8. result in r0.
  9. */
  10. __udivsi3:
  11. addz.l r5,r63,r22
  12. nsb r22,r0
  13. shlld r22,r0,r25
  14. shlri r25,48,r25
  15. movi 0xffffffffffffbb0c,r20 /* shift count eqiv 76 */
  16. sub r20,r25,r21
  17. mmulfx.w r21,r21,r19
  18. mshflo.w r21,r63,r21
  19. ptabs r18,tr0
  20. mmulfx.w r25,r19,r19
  21. sub r20,r0,r0
  22. /* bubble */
  23. msub.w r21,r19,r19
  24. /*
  25. * It would be nice for scheduling to do this add to r21 before
  26. * the msub.w, but we need a different value for r19 to keep
  27. * errors under control.
  28. */
  29. addi r19,-2,r21
  30. mulu.l r4,r21,r18
  31. mmulfx.w r19,r19,r19
  32. shlli r21,15,r21
  33. shlrd r18,r0,r18
  34. mulu.l r18,r22,r20
  35. mmacnfx.wl r25,r19,r21
  36. /* bubble */
  37. sub r4,r20,r25
  38. mulu.l r25,r21,r19
  39. addi r0,14,r0
  40. /* bubble */
  41. shlrd r19,r0,r19
  42. mulu.l r19,r22,r20
  43. add r18,r19,r18
  44. /* bubble */
  45. sub.l r25,r20,r25
  46. mulu.l r25,r21,r19
  47. addz.l r25,r63,r25
  48. sub r25,r22,r25
  49. shlrd r19,r0,r19
  50. mulu.l r19,r22,r20
  51. addi r25,1,r25
  52. add r18,r19,r18
  53. cmpgt r25,r20,r25
  54. add.l r18,r25,r0
  55. blink tr0,r63