patchstream.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526
  1. # SPDX-License-Identifier: GPL-2.0+
  2. # Copyright (c) 2011 The Chromium OS Authors.
  3. #
  4. import math
  5. import os
  6. import re
  7. import shutil
  8. import tempfile
  9. import command
  10. import commit
  11. import gitutil
  12. from series import Series
  13. # Tags that we detect and remove
  14. re_remove = re.compile('^BUG=|^TEST=|^BRANCH=|^Change-Id:|^Review URL:'
  15. '|Reviewed-on:|Commit-\w*:')
  16. # Lines which are allowed after a TEST= line
  17. re_allowed_after_test = re.compile('^Signed-off-by:')
  18. # Signoffs
  19. re_signoff = re.compile('^Signed-off-by: *(.*)')
  20. # The start of the cover letter
  21. re_cover = re.compile('^Cover-letter:')
  22. # A cover letter Cc
  23. re_cover_cc = re.compile('^Cover-letter-cc: *(.*)')
  24. # Patch series tag
  25. re_series_tag = re.compile('^Series-([a-z-]*): *(.*)')
  26. # Commit series tag
  27. re_commit_tag = re.compile('^Commit-([a-z-]*): *(.*)')
  28. # Commit tags that we want to collect and keep
  29. re_tag = re.compile('^(Tested-by|Acked-by|Reviewed-by|Patch-cc): (.*)')
  30. # The start of a new commit in the git log
  31. re_commit = re.compile('^commit ([0-9a-f]*)$')
  32. # We detect these since checkpatch doesn't always do it
  33. re_space_before_tab = re.compile('^[+].* \t')
  34. # States we can be in - can we use range() and still have comments?
  35. STATE_MSG_HEADER = 0 # Still in the message header
  36. STATE_PATCH_SUBJECT = 1 # In patch subject (first line of log for a commit)
  37. STATE_PATCH_HEADER = 2 # In patch header (after the subject)
  38. STATE_DIFFS = 3 # In the diff part (past --- line)
  39. class PatchStream:
  40. """Class for detecting/injecting tags in a patch or series of patches
  41. We support processing the output of 'git log' to read out the tags we
  42. are interested in. We can also process a patch file in order to remove
  43. unwanted tags or inject additional ones. These correspond to the two
  44. phases of processing.
  45. """
  46. def __init__(self, series, name=None, is_log=False):
  47. self.skip_blank = False # True to skip a single blank line
  48. self.found_test = False # Found a TEST= line
  49. self.lines_after_test = 0 # MNumber of lines found after TEST=
  50. self.warn = [] # List of warnings we have collected
  51. self.linenum = 1 # Output line number we are up to
  52. self.in_section = None # Name of start...END section we are in
  53. self.notes = [] # Series notes
  54. self.section = [] # The current section...END section
  55. self.series = series # Info about the patch series
  56. self.is_log = is_log # True if indent like git log
  57. self.in_change = 0 # Non-zero if we are in a change list
  58. self.blank_count = 0 # Number of blank lines stored up
  59. self.state = STATE_MSG_HEADER # What state are we in?
  60. self.signoff = [] # Contents of signoff line
  61. self.commit = None # Current commit
  62. def AddToSeries(self, line, name, value):
  63. """Add a new Series-xxx tag.
  64. When a Series-xxx tag is detected, we come here to record it, if we
  65. are scanning a 'git log'.
  66. Args:
  67. line: Source line containing tag (useful for debug/error messages)
  68. name: Tag name (part after 'Series-')
  69. value: Tag value (part after 'Series-xxx: ')
  70. """
  71. if name == 'notes':
  72. self.in_section = name
  73. self.skip_blank = False
  74. if self.is_log:
  75. self.series.AddTag(self.commit, line, name, value)
  76. def AddToCommit(self, line, name, value):
  77. """Add a new Commit-xxx tag.
  78. When a Commit-xxx tag is detected, we come here to record it.
  79. Args:
  80. line: Source line containing tag (useful for debug/error messages)
  81. name: Tag name (part after 'Commit-')
  82. value: Tag value (part after 'Commit-xxx: ')
  83. """
  84. if name == 'notes':
  85. self.in_section = 'commit-' + name
  86. self.skip_blank = False
  87. def CloseCommit(self):
  88. """Save the current commit into our commit list, and reset our state"""
  89. if self.commit and self.is_log:
  90. self.series.AddCommit(self.commit)
  91. self.commit = None
  92. # If 'END' is missing in a 'Cover-letter' section, and that section
  93. # happens to show up at the very end of the commit message, this is
  94. # the chance for us to fix it up.
  95. if self.in_section == 'cover' and self.is_log:
  96. self.series.cover = self.section
  97. self.in_section = None
  98. self.skip_blank = True
  99. self.section = []
  100. def ProcessLine(self, line):
  101. """Process a single line of a patch file or commit log
  102. This process a line and returns a list of lines to output. The list
  103. may be empty or may contain multiple output lines.
  104. This is where all the complicated logic is located. The class's
  105. state is used to move between different states and detect things
  106. properly.
  107. We can be in one of two modes:
  108. self.is_log == True: This is 'git log' mode, where most output is
  109. indented by 4 characters and we are scanning for tags
  110. self.is_log == False: This is 'patch' mode, where we already have
  111. all the tags, and are processing patches to remove junk we
  112. don't want, and add things we think are required.
  113. Args:
  114. line: text line to process
  115. Returns:
  116. list of output lines, or [] if nothing should be output
  117. """
  118. # Initially we have no output. Prepare the input line string
  119. out = []
  120. line = line.rstrip('\n')
  121. commit_match = re_commit.match(line) if self.is_log else None
  122. if self.is_log:
  123. if line[:4] == ' ':
  124. line = line[4:]
  125. # Handle state transition and skipping blank lines
  126. series_tag_match = re_series_tag.match(line)
  127. commit_tag_match = re_commit_tag.match(line)
  128. cover_match = re_cover.match(line)
  129. cover_cc_match = re_cover_cc.match(line)
  130. signoff_match = re_signoff.match(line)
  131. tag_match = None
  132. if self.state == STATE_PATCH_HEADER:
  133. tag_match = re_tag.match(line)
  134. is_blank = not line.strip()
  135. if is_blank:
  136. if (self.state == STATE_MSG_HEADER
  137. or self.state == STATE_PATCH_SUBJECT):
  138. self.state += 1
  139. # We don't have a subject in the text stream of patch files
  140. # It has its own line with a Subject: tag
  141. if not self.is_log and self.state == STATE_PATCH_SUBJECT:
  142. self.state += 1
  143. elif commit_match:
  144. self.state = STATE_MSG_HEADER
  145. # If a tag is detected, or a new commit starts
  146. if series_tag_match or commit_tag_match or \
  147. cover_match or cover_cc_match or signoff_match or \
  148. self.state == STATE_MSG_HEADER:
  149. # but we are already in a section, this means 'END' is missing
  150. # for that section, fix it up.
  151. if self.in_section:
  152. self.warn.append("Missing 'END' in section '%s'" % self.in_section)
  153. if self.in_section == 'cover':
  154. self.series.cover = self.section
  155. elif self.in_section == 'notes':
  156. if self.is_log:
  157. self.series.notes += self.section
  158. elif self.in_section == 'commit-notes':
  159. if self.is_log:
  160. self.commit.notes += self.section
  161. else:
  162. self.warn.append("Unknown section '%s'" % self.in_section)
  163. self.in_section = None
  164. self.skip_blank = True
  165. self.section = []
  166. # but we are already in a change list, that means a blank line
  167. # is missing, fix it up.
  168. if self.in_change:
  169. self.warn.append("Missing 'blank line' in section 'Series-changes'")
  170. self.in_change = 0
  171. # If we are in a section, keep collecting lines until we see END
  172. if self.in_section:
  173. if line == 'END':
  174. if self.in_section == 'cover':
  175. self.series.cover = self.section
  176. elif self.in_section == 'notes':
  177. if self.is_log:
  178. self.series.notes += self.section
  179. elif self.in_section == 'commit-notes':
  180. if self.is_log:
  181. self.commit.notes += self.section
  182. else:
  183. self.warn.append("Unknown section '%s'" % self.in_section)
  184. self.in_section = None
  185. self.skip_blank = True
  186. self.section = []
  187. else:
  188. self.section.append(line)
  189. # Detect the commit subject
  190. elif not is_blank and self.state == STATE_PATCH_SUBJECT:
  191. self.commit.subject = line
  192. # Detect the tags we want to remove, and skip blank lines
  193. elif re_remove.match(line) and not commit_tag_match:
  194. self.skip_blank = True
  195. # TEST= should be the last thing in the commit, so remove
  196. # everything after it
  197. if line.startswith('TEST='):
  198. self.found_test = True
  199. elif self.skip_blank and is_blank:
  200. self.skip_blank = False
  201. # Detect the start of a cover letter section
  202. elif cover_match:
  203. self.in_section = 'cover'
  204. self.skip_blank = False
  205. elif cover_cc_match:
  206. value = cover_cc_match.group(1)
  207. self.AddToSeries(line, 'cover-cc', value)
  208. # If we are in a change list, key collected lines until a blank one
  209. elif self.in_change:
  210. if is_blank:
  211. # Blank line ends this change list
  212. self.in_change = 0
  213. elif line == '---':
  214. self.in_change = 0
  215. out = self.ProcessLine(line)
  216. else:
  217. if self.is_log:
  218. self.series.AddChange(self.in_change, self.commit, line)
  219. self.skip_blank = False
  220. # Detect Series-xxx tags
  221. elif series_tag_match:
  222. name = series_tag_match.group(1)
  223. value = series_tag_match.group(2)
  224. if name == 'changes':
  225. # value is the version number: e.g. 1, or 2
  226. try:
  227. value = int(value)
  228. except ValueError as str:
  229. raise ValueError("%s: Cannot decode version info '%s'" %
  230. (self.commit.hash, line))
  231. self.in_change = int(value)
  232. else:
  233. self.AddToSeries(line, name, value)
  234. self.skip_blank = True
  235. # Detect Commit-xxx tags
  236. elif commit_tag_match:
  237. name = commit_tag_match.group(1)
  238. value = commit_tag_match.group(2)
  239. if name == 'notes':
  240. self.AddToCommit(line, name, value)
  241. self.skip_blank = True
  242. # Detect the start of a new commit
  243. elif commit_match:
  244. self.CloseCommit()
  245. self.commit = commit.Commit(commit_match.group(1))
  246. # Detect tags in the commit message
  247. elif tag_match:
  248. # Remove Tested-by self, since few will take much notice
  249. if (tag_match.group(1) == 'Tested-by' and
  250. tag_match.group(2).find(os.getenv('USER') + '@') != -1):
  251. self.warn.append("Ignoring %s" % line)
  252. elif tag_match.group(1) == 'Patch-cc':
  253. self.commit.AddCc(tag_match.group(2).split(','))
  254. else:
  255. out = [line]
  256. # Suppress duplicate signoffs
  257. elif signoff_match:
  258. if (self.is_log or not self.commit or
  259. self.commit.CheckDuplicateSignoff(signoff_match.group(1))):
  260. out = [line]
  261. # Well that means this is an ordinary line
  262. else:
  263. # Look for space before tab
  264. m = re_space_before_tab.match(line)
  265. if m:
  266. self.warn.append('Line %d/%d has space before tab' %
  267. (self.linenum, m.start()))
  268. # OK, we have a valid non-blank line
  269. out = [line]
  270. self.linenum += 1
  271. self.skip_blank = False
  272. if self.state == STATE_DIFFS:
  273. pass
  274. # If this is the start of the diffs section, emit our tags and
  275. # change log
  276. elif line == '---':
  277. self.state = STATE_DIFFS
  278. # Output the tags (signeoff first), then change list
  279. out = []
  280. log = self.series.MakeChangeLog(self.commit)
  281. out += [line]
  282. if self.commit:
  283. out += self.commit.notes
  284. out += [''] + log
  285. elif self.found_test:
  286. if not re_allowed_after_test.match(line):
  287. self.lines_after_test += 1
  288. return out
  289. def Finalize(self):
  290. """Close out processing of this patch stream"""
  291. self.CloseCommit()
  292. if self.lines_after_test:
  293. self.warn.append('Found %d lines after TEST=' %
  294. self.lines_after_test)
  295. def ProcessStream(self, infd, outfd):
  296. """Copy a stream from infd to outfd, filtering out unwanting things.
  297. This is used to process patch files one at a time.
  298. Args:
  299. infd: Input stream file object
  300. outfd: Output stream file object
  301. """
  302. # Extract the filename from each diff, for nice warnings
  303. fname = None
  304. last_fname = None
  305. re_fname = re.compile('diff --git a/(.*) b/.*')
  306. while True:
  307. line = infd.readline()
  308. if not line:
  309. break
  310. out = self.ProcessLine(line)
  311. # Try to detect blank lines at EOF
  312. for line in out:
  313. match = re_fname.match(line)
  314. if match:
  315. last_fname = fname
  316. fname = match.group(1)
  317. if line == '+':
  318. self.blank_count += 1
  319. else:
  320. if self.blank_count and (line == '-- ' or match):
  321. self.warn.append("Found possible blank line(s) at "
  322. "end of file '%s'" % last_fname)
  323. outfd.write('+\n' * self.blank_count)
  324. outfd.write(line + '\n')
  325. self.blank_count = 0
  326. self.Finalize()
  327. def GetMetaDataForList(commit_range, git_dir=None, count=None,
  328. series = None, allow_overwrite=False):
  329. """Reads out patch series metadata from the commits
  330. This does a 'git log' on the relevant commits and pulls out the tags we
  331. are interested in.
  332. Args:
  333. commit_range: Range of commits to count (e.g. 'HEAD..base')
  334. git_dir: Path to git repositiory (None to use default)
  335. count: Number of commits to list, or None for no limit
  336. series: Series object to add information into. By default a new series
  337. is started.
  338. allow_overwrite: Allow tags to overwrite an existing tag
  339. Returns:
  340. A Series object containing information about the commits.
  341. """
  342. if not series:
  343. series = Series()
  344. series.allow_overwrite = allow_overwrite
  345. params = gitutil.LogCmd(commit_range, reverse=True, count=count,
  346. git_dir=git_dir)
  347. stdout = command.RunPipe([params], capture=True).stdout
  348. ps = PatchStream(series, is_log=True)
  349. for line in stdout.splitlines():
  350. ps.ProcessLine(line)
  351. ps.Finalize()
  352. return series
  353. def GetMetaData(start, count):
  354. """Reads out patch series metadata from the commits
  355. This does a 'git log' on the relevant commits and pulls out the tags we
  356. are interested in.
  357. Args:
  358. start: Commit to start from: 0=HEAD, 1=next one, etc.
  359. count: Number of commits to list
  360. """
  361. return GetMetaDataForList('HEAD~%d' % start, None, count)
  362. def GetMetaDataForTest(text):
  363. """Process metadata from a file containing a git log. Used for tests
  364. Args:
  365. text:
  366. """
  367. series = Series()
  368. ps = PatchStream(series, is_log=True)
  369. for line in text.splitlines():
  370. ps.ProcessLine(line)
  371. ps.Finalize()
  372. return series
  373. def FixPatch(backup_dir, fname, series, commit):
  374. """Fix up a patch file, by adding/removing as required.
  375. We remove our tags from the patch file, insert changes lists, etc.
  376. The patch file is processed in place, and overwritten.
  377. A backup file is put into backup_dir (if not None).
  378. Args:
  379. fname: Filename to patch file to process
  380. series: Series information about this patch set
  381. commit: Commit object for this patch file
  382. Return:
  383. A list of errors, or [] if all ok.
  384. """
  385. handle, tmpname = tempfile.mkstemp()
  386. outfd = os.fdopen(handle, 'w')
  387. infd = open(fname, 'r')
  388. ps = PatchStream(series)
  389. ps.commit = commit
  390. ps.ProcessStream(infd, outfd)
  391. infd.close()
  392. outfd.close()
  393. # Create a backup file if required
  394. if backup_dir:
  395. shutil.copy(fname, os.path.join(backup_dir, os.path.basename(fname)))
  396. shutil.move(tmpname, fname)
  397. return ps.warn
  398. def FixPatches(series, fnames):
  399. """Fix up a list of patches identified by filenames
  400. The patch files are processed in place, and overwritten.
  401. Args:
  402. series: The series object
  403. fnames: List of patch files to process
  404. """
  405. # Current workflow creates patches, so we shouldn't need a backup
  406. backup_dir = None #tempfile.mkdtemp('clean-patch')
  407. count = 0
  408. for fname in fnames:
  409. commit = series.commits[count]
  410. commit.patch = fname
  411. result = FixPatch(backup_dir, fname, series, commit)
  412. if result:
  413. print('%d warnings for %s:' % (len(result), fname))
  414. for warn in result:
  415. print('\t', warn)
  416. print
  417. count += 1
  418. print('Cleaned %d patches' % count)
  419. def InsertCoverLetter(fname, series, count):
  420. """Inserts a cover letter with the required info into patch 0
  421. Args:
  422. fname: Input / output filename of the cover letter file
  423. series: Series object
  424. count: Number of patches in the series
  425. """
  426. fd = open(fname, 'r')
  427. lines = fd.readlines()
  428. fd.close()
  429. fd = open(fname, 'w')
  430. text = series.cover
  431. prefix = series.GetPatchPrefix()
  432. for line in lines:
  433. if line.startswith('Subject:'):
  434. # if more than 10 or 100 patches, it should say 00/xx, 000/xxx, etc
  435. zero_repeat = int(math.log10(count)) + 1
  436. zero = '0' * zero_repeat
  437. line = 'Subject: [%s %s/%d] %s\n' % (prefix, zero, count, text[0])
  438. # Insert our cover letter
  439. elif line.startswith('*** BLURB HERE ***'):
  440. # First the blurb test
  441. line = '\n'.join(text[1:]) + '\n'
  442. if series.get('notes'):
  443. line += '\n'.join(series.notes) + '\n'
  444. # Now the change list
  445. out = series.MakeChangeLog(None)
  446. line += '\n' + '\n'.join(out)
  447. fd.write(line)
  448. fd.close()