xor_simd.c 1.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293
  1. // SPDX-License-Identifier: GPL-2.0-or-later
  2. /*
  3. * LoongArch SIMD XOR operations
  4. *
  5. * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
  6. */
  7. #include "xor_simd.h"
  8. /*
  9. * Process one cache line (64 bytes) per loop. This is assuming all future
  10. * popular LoongArch cores are similar performance-characteristics-wise to the
  11. * current models.
  12. */
  13. #define LINE_WIDTH 64
  14. #ifdef CONFIG_CPU_HAS_LSX
  15. #define LD(reg, base, offset) \
  16. "vld $vr" #reg ", %[" #base "], " #offset "\n\t"
  17. #define ST(reg, base, offset) \
  18. "vst $vr" #reg ", %[" #base "], " #offset "\n\t"
  19. #define XOR(dj, k) "vxor.v $vr" #dj ", $vr" #dj ", $vr" #k "\n\t"
  20. #define LD_INOUT_LINE(base) \
  21. LD(0, base, 0) \
  22. LD(1, base, 16) \
  23. LD(2, base, 32) \
  24. LD(3, base, 48)
  25. #define LD_AND_XOR_LINE(base) \
  26. LD(4, base, 0) \
  27. LD(5, base, 16) \
  28. LD(6, base, 32) \
  29. LD(7, base, 48) \
  30. XOR(0, 4) \
  31. XOR(1, 5) \
  32. XOR(2, 6) \
  33. XOR(3, 7)
  34. #define ST_LINE(base) \
  35. ST(0, base, 0) \
  36. ST(1, base, 16) \
  37. ST(2, base, 32) \
  38. ST(3, base, 48)
  39. #define XOR_FUNC_NAME(nr) __xor_lsx_##nr
  40. #include "xor_template.c"
  41. #undef LD
  42. #undef ST
  43. #undef XOR
  44. #undef LD_INOUT_LINE
  45. #undef LD_AND_XOR_LINE
  46. #undef ST_LINE
  47. #undef XOR_FUNC_NAME
  48. #endif /* CONFIG_CPU_HAS_LSX */
  49. #ifdef CONFIG_CPU_HAS_LASX
  50. #define LD(reg, base, offset) \
  51. "xvld $xr" #reg ", %[" #base "], " #offset "\n\t"
  52. #define ST(reg, base, offset) \
  53. "xvst $xr" #reg ", %[" #base "], " #offset "\n\t"
  54. #define XOR(dj, k) "xvxor.v $xr" #dj ", $xr" #dj ", $xr" #k "\n\t"
  55. #define LD_INOUT_LINE(base) \
  56. LD(0, base, 0) \
  57. LD(1, base, 32)
  58. #define LD_AND_XOR_LINE(base) \
  59. LD(2, base, 0) \
  60. LD(3, base, 32) \
  61. XOR(0, 2) \
  62. XOR(1, 3)
  63. #define ST_LINE(base) \
  64. ST(0, base, 0) \
  65. ST(1, base, 32)
  66. #define XOR_FUNC_NAME(nr) __xor_lasx_##nr
  67. #include "xor_template.c"
  68. #undef LD
  69. #undef ST
  70. #undef XOR
  71. #undef LD_INOUT_LINE
  72. #undef LD_AND_XOR_LINE
  73. #undef ST_LINE
  74. #undef XOR_FUNC_NAME
  75. #endif /* CONFIG_CPU_HAS_LASX */