spdxcheck.py 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286
  1. #!/usr/bin/env python
  2. # SPDX-License-Identifier: GPL-2.0
  3. # Copyright Thomas Gleixner <tglx@linutronix.de>
  4. from argparse import ArgumentParser
  5. from ply import lex, yacc
  6. import locale
  7. import traceback
  8. import sys
  9. import git
  10. import re
  11. import os
  12. class ParserException(Exception):
  13. def __init__(self, tok, txt):
  14. self.tok = tok
  15. self.txt = txt
  16. class SPDXException(Exception):
  17. def __init__(self, el, txt):
  18. self.el = el
  19. self.txt = txt
  20. class SPDXdata(object):
  21. def __init__(self):
  22. self.license_files = 0
  23. self.exception_files = 0
  24. self.licenses = [ ]
  25. self.exceptions = { }
  26. # Read the spdx data from the LICENSES directory
  27. def read_spdxdata(repo):
  28. # The subdirectories of LICENSES in the kernel source
  29. license_dirs = [ "preferred", "other", "exceptions" ]
  30. lictree = repo.head.commit.tree['LICENSES']
  31. spdx = SPDXdata()
  32. for d in license_dirs:
  33. for el in lictree[d].traverse():
  34. if not os.path.isfile(el.path):
  35. continue
  36. exception = None
  37. for l in open(el.path).readlines():
  38. if l.startswith('Valid-License-Identifier:'):
  39. lid = l.split(':')[1].strip().upper()
  40. if lid in spdx.licenses:
  41. raise SPDXException(el, 'Duplicate License Identifier: %s' %lid)
  42. else:
  43. spdx.licenses.append(lid)
  44. elif l.startswith('SPDX-Exception-Identifier:'):
  45. exception = l.split(':')[1].strip().upper()
  46. spdx.exceptions[exception] = []
  47. elif l.startswith('SPDX-Licenses:'):
  48. for lic in l.split(':')[1].upper().strip().replace(' ', '').replace('\t', '').split(','):
  49. if not lic in spdx.licenses:
  50. raise SPDXException(None, 'Exception %s missing license %s' %(ex, lic))
  51. spdx.exceptions[exception].append(lic)
  52. elif l.startswith("License-Text:"):
  53. if exception:
  54. if not len(spdx.exceptions[exception]):
  55. raise SPDXException(el, 'Exception %s is missing SPDX-Licenses' %excid)
  56. spdx.exception_files += 1
  57. else:
  58. spdx.license_files += 1
  59. break
  60. return spdx
  61. class id_parser(object):
  62. reserved = [ 'AND', 'OR', 'WITH' ]
  63. tokens = [ 'LPAR', 'RPAR', 'ID', 'EXC' ] + reserved
  64. precedence = ( ('nonassoc', 'AND', 'OR'), )
  65. t_ignore = ' \t'
  66. def __init__(self, spdx):
  67. self.spdx = spdx
  68. self.lasttok = None
  69. self.lastid = None
  70. self.lexer = lex.lex(module = self, reflags = re.UNICODE)
  71. # Initialize the parser. No debug file and no parser rules stored on disk
  72. # The rules are small enough to be generated on the fly
  73. self.parser = yacc.yacc(module = self, write_tables = False, debug = False)
  74. self.lines_checked = 0
  75. self.checked = 0
  76. self.spdx_valid = 0
  77. self.spdx_errors = 0
  78. self.curline = 0
  79. self.deepest = 0
  80. # Validate License and Exception IDs
  81. def validate(self, tok):
  82. id = tok.value.upper()
  83. if tok.type == 'ID':
  84. if not id in self.spdx.licenses:
  85. raise ParserException(tok, 'Invalid License ID')
  86. self.lastid = id
  87. elif tok.type == 'EXC':
  88. if id not in self.spdx.exceptions:
  89. raise ParserException(tok, 'Invalid Exception ID')
  90. if self.lastid not in self.spdx.exceptions[id]:
  91. raise ParserException(tok, 'Exception not valid for license %s' %self.lastid)
  92. self.lastid = None
  93. elif tok.type != 'WITH':
  94. self.lastid = None
  95. # Lexer functions
  96. def t_RPAR(self, tok):
  97. r'\)'
  98. self.lasttok = tok.type
  99. return tok
  100. def t_LPAR(self, tok):
  101. r'\('
  102. self.lasttok = tok.type
  103. return tok
  104. def t_ID(self, tok):
  105. r'[A-Za-z.0-9\-+]+'
  106. if self.lasttok == 'EXC':
  107. print(tok)
  108. raise ParserException(tok, 'Missing parentheses')
  109. tok.value = tok.value.strip()
  110. val = tok.value.upper()
  111. if val in self.reserved:
  112. tok.type = val
  113. elif self.lasttok == 'WITH':
  114. tok.type = 'EXC'
  115. self.lasttok = tok.type
  116. self.validate(tok)
  117. return tok
  118. def t_error(self, tok):
  119. raise ParserException(tok, 'Invalid token')
  120. def p_expr(self, p):
  121. '''expr : ID
  122. | ID WITH EXC
  123. | expr AND expr
  124. | expr OR expr
  125. | LPAR expr RPAR'''
  126. pass
  127. def p_error(self, p):
  128. if not p:
  129. raise ParserException(None, 'Unfinished license expression')
  130. else:
  131. raise ParserException(p, 'Syntax error')
  132. def parse(self, expr):
  133. self.lasttok = None
  134. self.lastid = None
  135. self.parser.parse(expr, lexer = self.lexer)
  136. def parse_lines(self, fd, maxlines, fname):
  137. self.checked += 1
  138. self.curline = 0
  139. try:
  140. for line in fd:
  141. line = line.decode(locale.getpreferredencoding(False), errors='ignore')
  142. self.curline += 1
  143. if self.curline > maxlines:
  144. break
  145. self.lines_checked += 1
  146. if line.find("SPDX-License-Identifier:") < 0:
  147. continue
  148. expr = line.split(':')[1].replace('*/', '').strip()
  149. self.parse(expr)
  150. self.spdx_valid += 1
  151. #
  152. # Should we check for more SPDX ids in the same file and
  153. # complain if there are any?
  154. #
  155. break
  156. except ParserException as pe:
  157. if pe.tok:
  158. col = line.find(expr) + pe.tok.lexpos
  159. tok = pe.tok.value
  160. sys.stdout.write('%s: %d:%d %s: %s\n' %(fname, self.curline, col, pe.txt, tok))
  161. else:
  162. sys.stdout.write('%s: %d:0 %s\n' %(fname, self.curline, col, pe.txt))
  163. self.spdx_errors += 1
  164. def scan_git_tree(tree):
  165. for el in tree.traverse():
  166. # Exclude stuff which would make pointless noise
  167. # FIXME: Put this somewhere more sensible
  168. if el.path.startswith("LICENSES"):
  169. continue
  170. if el.path.find("license-rules.rst") >= 0:
  171. continue
  172. if not os.path.isfile(el.path):
  173. continue
  174. with open(el.path, 'rb') as fd:
  175. parser.parse_lines(fd, args.maxlines, el.path)
  176. def scan_git_subtree(tree, path):
  177. for p in path.strip('/').split('/'):
  178. tree = tree[p]
  179. scan_git_tree(tree)
  180. if __name__ == '__main__':
  181. ap = ArgumentParser(description='SPDX expression checker')
  182. ap.add_argument('path', nargs='*', help='Check path or file. If not given full git tree scan. For stdin use "-"')
  183. ap.add_argument('-m', '--maxlines', type=int, default=15,
  184. help='Maximum number of lines to scan in a file. Default 15')
  185. ap.add_argument('-v', '--verbose', action='store_true', help='Verbose statistics output')
  186. args = ap.parse_args()
  187. # Sanity check path arguments
  188. if '-' in args.path and len(args.path) > 1:
  189. sys.stderr.write('stdin input "-" must be the only path argument\n')
  190. sys.exit(1)
  191. try:
  192. # Use git to get the valid license expressions
  193. repo = git.Repo(os.getcwd())
  194. assert not repo.bare
  195. # Initialize SPDX data
  196. spdx = read_spdxdata(repo)
  197. # Initilize the parser
  198. parser = id_parser(spdx)
  199. except SPDXException as se:
  200. if se.el:
  201. sys.stderr.write('%s: %s\n' %(se.el.path, se.txt))
  202. else:
  203. sys.stderr.write('%s\n' %se.txt)
  204. sys.exit(1)
  205. except Exception as ex:
  206. sys.stderr.write('FAIL: %s\n' %ex)
  207. sys.stderr.write('%s\n' %traceback.format_exc())
  208. sys.exit(1)
  209. try:
  210. if len(args.path) and args.path[0] == '-':
  211. stdin = os.fdopen(sys.stdin.fileno(), 'rb')
  212. parser.parse_lines(stdin, args.maxlines, '-')
  213. else:
  214. if args.path:
  215. for p in args.path:
  216. if os.path.isfile(p):
  217. parser.parse_lines(open(p, 'rb'), args.maxlines, p)
  218. elif os.path.isdir(p):
  219. scan_git_subtree(repo.head.reference.commit.tree, p)
  220. else:
  221. sys.stderr.write('path %s does not exist\n' %p)
  222. sys.exit(1)
  223. else:
  224. # Full git tree scan
  225. scan_git_tree(repo.head.commit.tree)
  226. if args.verbose:
  227. sys.stderr.write('\n')
  228. sys.stderr.write('License files: %12d\n' %spdx.license_files)
  229. sys.stderr.write('Exception files: %12d\n' %spdx.exception_files)
  230. sys.stderr.write('License IDs %12d\n' %len(spdx.licenses))
  231. sys.stderr.write('Exception IDs %12d\n' %len(spdx.exceptions))
  232. sys.stderr.write('\n')
  233. sys.stderr.write('Files checked: %12d\n' %parser.checked)
  234. sys.stderr.write('Lines checked: %12d\n' %parser.lines_checked)
  235. sys.stderr.write('Files with SPDX: %12d\n' %parser.spdx_valid)
  236. sys.stderr.write('Files with errors: %12d\n' %parser.spdx_errors)
  237. sys.exit(0)
  238. except Exception as ex:
  239. sys.stderr.write('FAIL: %s\n' %ex)
  240. sys.stderr.write('%s\n' %traceback.format_exc())
  241. sys.exit(1)