@@ -107,86 +107,102 @@ def split_paragraphs(text: str):
107107def resolve_reviewer (login : str ) -> tuple :
108108 """Map a GitHub login to (name, email).
109109
110- Tries three tiers in order: repo commit history, GitHub user profile,
111- and past `Reviewers:` trailers in git log (matched by name).
112- Noreply emails (@users.noreply.github.com) are treated as missing since
113- they are GitHub privacy placeholders that do not identify the reviewer.
114- Returns (name, None) when no usable email is found; the caller falls
115- back to the '(@login)' form in the Reviewers trailer.
110+ Tries reviewer email sources in order: repo commit author email, past
111+ `Reviewers:` trailers searched via GitHub commit search API (matched
112+ by name and verified by PR review login), and GitHub user profile
113+ public email. Noreply emails (@users.noreply.github.com) are treated
114+ as missing since they are GitHub privacy placeholders that do not
115+ identify the reviewer. Returns (name, None) when no usable email is
116+ found; the caller falls back to the '(github:login)' form in the
117+ Reviewers trailer.
116118 """
117119 def _usable_email (e ):
118120 if not e or e .endswith ("@users.noreply.github.com" ):
119121 return None
120122 return e
121123
122- name = None
123- email = None
124-
125- # Tier 1: find from repo commit history. Misses when the reviewer has no
126- # merged commit in apache/kafka, or had "Keep my email private" enabled
127- # at commit time (GitHub rewrites the author to the noreply form).
128- try :
129- cmd = f"gh api repos/apache/kafka/commits?author={ login } &per_page=1"
130- p = subprocess .run (shlex .split (cmd ), capture_output = True , text = True )
131- if p .returncode == 0 :
132- commits = json .loads (p .stdout )
133- if commits :
134- author = commits [0 ].get ("commit" , {}).get ("author" , {})
135- name = author .get ("name" )
136- email = _usable_email (author .get ("email" ))
137- except Exception as e :
138- logger .debug (f"Failed to resolve { login } from commit history: { e } " )
139-
140- # Tier 2: GitHub user profile. Only exposes an email when the reviewer
141- # has set a Public email in their profile settings.
142- if not name or not email :
143- try :
144- cmd = f"gh api users/{ login } "
145- p = subprocess .run (shlex .split (cmd ), capture_output = True , text = True )
146- if p .returncode == 0 :
147- user = json .loads (p .stdout )
148- if not name :
149- name = user .get ("name" )
150- if not email :
151- email = _usable_email (user .get ("email" ))
152- except Exception as e :
153- logger .debug (f"Failed to resolve { login } from GitHub profile: { e } " )
154-
155- # Tier 3: past Reviewers: trailers in git log, matched by name. Catches
156- # pure reviewers (no commits in apache/kafka, no public profile email)
157- # who have been credited with a real email in an earlier merged PR.
158- # git log is newest-first, so the first usable match is the most recent.
159- if name and not email :
124+ def _run_json (cmd , source ):
160125 try :
161- p = subprocess .run (
162- ["git" , "log" ,
163- "--pretty=format:%(trailers:key=Reviewers,valueonly=true,unfold=true)" ],
164- capture_output = True , text = True ,
165- )
126+ p = subprocess .run (cmd , capture_output = True , text = True )
166127 if p .returncode == 0 :
167- pattern = re .compile (rf"{ re .escape (name )} \s*<([^>]+)>" )
168- for line in p .stdout .splitlines ():
169- for m in pattern .finditer (line ):
170- candidate = _usable_email (m .group (1 ))
171- if candidate :
172- email = candidate
173- break
174- if email :
175- break
128+ return json .loads (p .stdout )
129+ logger .debug (f"Failed to resolve { login } from { source } : { p .stderr } " )
176130 except Exception as e :
177- logger .debug (f"Failed to resolve { login } from past Reviewers trailers: { e } " )
178-
179- if not name :
180- name = login
131+ logger .debug (f"Failed to resolve { login } from { source } : { e } " )
132+ return None
181133
182- return (name , email )
134+ def _has_pr_review_from_login (commit_sha ):
135+ pulls = _run_json (["gh" , "api" , f"repos/apache/kafka/commits/{ commit_sha } /pulls" ],
136+ f"associated PRs for commit { commit_sha } " ) or []
137+ for pull in pulls :
138+ pr_number = pull .get ("number" )
139+ if not pr_number :
140+ continue
141+ reviews = _run_json (["gh" , "api" , f"repos/apache/kafka/pulls/{ pr_number } /reviews?per_page=100" ],
142+ f"reviews for PR { pr_number } " ) or []
143+ if any ((review .get ("user" ) or {}).get ("login" , "" ).lower () == login .lower ()
144+ for review in reviews ):
145+ return True
146+ return False
147+
148+ commits = _run_json (["gh" , "api" , f"repos/apache/kafka/commits?author={ login } &per_page=1" ],
149+ "commit history" ) or []
150+ author = commits [0 ].get ("commit" , {}).get ("author" , {}) if commits else {}
151+
152+ # Tier 1: latest repo commit authored by this GitHub login. Misses
153+ # when the reviewer has no merged commit in apache/kafka, or had
154+ # "Keep my email private" enabled at commit time (GitHub rewrites
155+ # the author to the noreply form).
156+ email = _usable_email (author .get ("email" ))
157+ if email :
158+ return (author .get ("name" ) or login , email )
159+
160+ user = _run_json (["gh" , "api" , f"users/{ login } " ], "GitHub profile" ) or {}
161+
162+ name_candidates = []
163+ for candidate in (user .get ("name" ), author .get ("name" ), login ):
164+ if candidate and candidate not in name_candidates :
165+ name_candidates .append (candidate )
166+
167+ name = name_candidates [0 ] if name_candidates else login
168+
169+ # Tier 2: past Reviewers: trailers in commit history, matched by name,
170+ # via the GitHub commit search API. Catches pure reviewers (no commits
171+ # in apache/kafka, no public profile email) who have been credited
172+ # with a real email in an earlier merged PR. Sort by committer-date
173+ # desc so the most recent email wins if a reviewer has changed it.
174+ # Full-text search is tokenized (not strict substring), so we re-verify
175+ # with a regex client-side. To avoid same-name matches, we only accept
176+ # a trailer email when the matched commit's associated PR includes a
177+ # review from this GitHub login.
178+ for candidate in name_candidates :
179+ results = _run_json (["gh" , "search" , "commits" ,
180+ "--repo" , "apache/kafka" ,
181+ f'"{ candidate } <"' ,
182+ "--limit" , "10" ,
183+ "--sort" , "committer-date" ,
184+ "--order" , "desc" ,
185+ "--json" , "sha,commit" ],
186+ "commit search" ) or []
187+ pattern = re .compile (rf"{ re .escape (candidate )} \s*<([^>]+)>" )
188+ for result in results :
189+ msg = result .get ("commit" , {}).get ("message" , "" )
190+ commit_sha = result .get ("sha" )
191+ for match in pattern .finditer (msg ):
192+ candidate_email = _usable_email (match .group (1 ))
193+ if candidate_email and commit_sha and _has_pr_review_from_login (commit_sha ):
194+ return (candidate , candidate_email )
195+
196+ # Tier 3: GitHub user profile. Only exposes an email when the reviewer
197+ # has set a Public email in their profile settings.
198+ return (name , _usable_email (user .get ("email" )))
183199
184200
185201def already_exists (identity : str , existing_reviewers : List [str ]) -> bool :
186202 """Check if a reviewer identity is already in the existing reviewers list.
187203
188204 identity is the delimited token that uniquely identifies a reviewer, either
189- '<email>' (for the email form) or '(@ login)' (for the login fallback).
205+ '<email>' (for the email form) or '(github: login)' (for the login fallback).
190206 """
191207 return identity .lower () in ", " .join (existing_reviewers ).lower ()
192208
@@ -246,7 +262,8 @@ def update_reviewers_trailer(body: str, trailer: str) -> str:
246262 if email :
247263 identity = f"<{ email } >"
248264 else :
249- identity = f"(@{ reviewer_login } )"
265+ # Tier 4: fall back to the GitHub handle without tagging the reviewer.
266+ identity = f"(github:{ reviewer_login } )"
250267 resolved = f"{ name } { identity } "
251268 existing_reviewers = parse_trailers (title , body ).get ("Reviewers" , [])
252269 if not already_exists (identity , existing_reviewers ):
0 commit comments