Skip to content

Commit be94b47

Browse files
committed
Merge branch 'trunk' into KAFKA-20167
2 parents 3a3646a + d0e0ec4 commit be94b47

1,604 files changed

Lines changed: 81793 additions & 30479 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.asf.yaml

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,17 @@ notifications:
2525
pullrequests: jira@kafka.apache.org
2626
jira_options: link label
2727

28-
# This list allows you to triage pull requests and trigger workflow runs on GitHub Actions. It can have a maximum of 10 collaborators.
29-
# Read more here: https://github.com/apache/infrastructure-asfyaml
3028
github:
29+
description: "Apache Kafka - A distributed event streaming platform"
30+
homepage: https://kafka.apache.org/
31+
labels:
32+
- java
33+
- scala
34+
- kafka
35+
- streaming
36+
37+
# This list allows you to triage pull requests and trigger workflow runs on GitHub Actions. It can have a maximum of 10 collaborators.
38+
# Read more here: https://github.com/apache/infrastructure-asfyaml
3139
collaborators:
3240
- brandboat
3341
- chirag-wadhwa5

.github/dependabot.yml

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one or more
2+
# contributor license agreements. See the NOTICE file distributed with
3+
# this work for additional information regarding copyright ownership.
4+
# The ASF licenses this file to You under the Apache License, Version 2.0
5+
# (the "License"); you may not use this file except in compliance with
6+
# the License. You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
version: 2
17+
updates:
18+
- package-ecosystem: "github-actions"
19+
directory: "/"
20+
schedule:
21+
# 'daily' only runs on weekdays
22+
interval: "cron"
23+
cronjob: "00 20 * * *"
24+
open-pull-requests-limit: 10
25+
cooldown:
26+
default: 4

.github/scripts/pr-format.py

Lines changed: 122 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,13 @@
1818
import json
1919
import logging
2020
import os
21+
import re
2122
import subprocess
2223
import shlex
2324
import sys
2425
import tempfile
2526
import textwrap
26-
from typing import Dict, Optional, TextIO
27+
from typing import Dict, List, Optional, TextIO
2728

2829
logger = logging.getLogger()
2930
logger.setLevel(logging.DEBUG)
@@ -103,6 +104,108 @@ def split_paragraphs(text: str):
103104
yield paragraph, markdown
104105

105106

107+
def resolve_reviewer(login: str) -> tuple:
108+
"""Map a GitHub login to (name, email).
109+
110+
Tries three tiers in order: repo commit history, GitHub user profile,
111+
and past `Reviewers:` trailers in git log (matched by name).
112+
Noreply emails (@users.noreply.github.com) are treated as missing since
113+
they are GitHub privacy placeholders that do not identify the reviewer.
114+
Returns (name, None) when no usable email is found; the caller falls
115+
back to the '(@login)' form in the Reviewers trailer.
116+
"""
117+
def _usable_email(e):
118+
if not e or e.endswith("@users.noreply.github.com"):
119+
return None
120+
return e
121+
122+
name = None
123+
email = None
124+
125+
# Tier 1: find from repo commit history. Misses when the reviewer has no
126+
# merged commit in apache/kafka, or had "Keep my email private" enabled
127+
# at commit time (GitHub rewrites the author to the noreply form).
128+
try:
129+
cmd = f"gh api repos/apache/kafka/commits?author={login}&per_page=1"
130+
p = subprocess.run(shlex.split(cmd), capture_output=True, text=True)
131+
if p.returncode == 0:
132+
commits = json.loads(p.stdout)
133+
if commits:
134+
author = commits[0].get("commit", {}).get("author", {})
135+
name = author.get("name")
136+
email = _usable_email(author.get("email"))
137+
except Exception as e:
138+
logger.debug(f"Failed to resolve {login} from commit history: {e}")
139+
140+
# Tier 2: GitHub user profile. Only exposes an email when the reviewer
141+
# has set a Public email in their profile settings.
142+
if not name or not email:
143+
try:
144+
cmd = f"gh api users/{login}"
145+
p = subprocess.run(shlex.split(cmd), capture_output=True, text=True)
146+
if p.returncode == 0:
147+
user = json.loads(p.stdout)
148+
if not name:
149+
name = user.get("name")
150+
if not email:
151+
email = _usable_email(user.get("email"))
152+
except Exception as e:
153+
logger.debug(f"Failed to resolve {login} from GitHub profile: {e}")
154+
155+
# Tier 3: past Reviewers: trailers in git log, matched by name. Catches
156+
# pure reviewers (no commits in apache/kafka, no public profile email)
157+
# who have been credited with a real email in an earlier merged PR.
158+
# git log is newest-first, so the first usable match is the most recent.
159+
if name and not email:
160+
try:
161+
p = subprocess.run(
162+
["git", "log",
163+
"--pretty=format:%(trailers:key=Reviewers,valueonly=true,unfold=true)"],
164+
capture_output=True, text=True,
165+
)
166+
if p.returncode == 0:
167+
pattern = re.compile(rf"{re.escape(name)}\s*<([^>]+)>")
168+
for line in p.stdout.splitlines():
169+
for m in pattern.finditer(line):
170+
candidate = _usable_email(m.group(1))
171+
if candidate:
172+
email = candidate
173+
break
174+
if email:
175+
break
176+
except Exception as e:
177+
logger.debug(f"Failed to resolve {login} from past Reviewers trailers: {e}")
178+
179+
if not name:
180+
name = login
181+
182+
return (name, email)
183+
184+
185+
def already_exists(identity: str, existing_reviewers: List[str]) -> bool:
186+
"""Check if a reviewer identity is already in the existing reviewers list.
187+
188+
identity is the delimited token that uniquely identifies a reviewer, either
189+
'<email>' (for the email form) or '(@login)' (for the login fallback).
190+
"""
191+
return identity.lower() in ", ".join(existing_reviewers).lower()
192+
193+
194+
def update_reviewers_trailer(body: str, trailer: str) -> str:
195+
"""Update the Reviewers trailer in the body using git interpret-trailers."""
196+
with tempfile.NamedTemporaryFile() as fp:
197+
fp.write(body.strip().encode())
198+
fp.write(b"\n")
199+
fp.flush()
200+
cmd = f"git interpret-trailers --if-exists replace --trailer {shlex.quote(trailer)} {fp.name}"
201+
p = subprocess.run(shlex.split(cmd), capture_output=True)
202+
fp.close()
203+
204+
if p.returncode == 0:
205+
return p.stdout.decode()
206+
return body
207+
208+
106209
if __name__ == "__main__":
107210
"""
108211
This script performs some basic linting of our PR titles and body. The PR number is read from the PR_NUMBER
@@ -123,7 +226,7 @@ def split_paragraphs(text: str):
123226
"""
124227

125228
pr_number = get_env("PR_NUMBER")
126-
cmd = f"gh pr view {pr_number} --json 'title,body,reviews'"
229+
cmd = f"gh pr view {pr_number} --json 'title,body,reviews,author'"
127230
p = subprocess.run(shlex.split(cmd), capture_output=True)
128231
if p.returncode != 0:
129232
logger.error(f"GitHub CLI failed with exit code {p.returncode}.\nSTDOUT: {p.stdout.decode()}\nSTDERR:{p.stderr.decode()}")
@@ -134,6 +237,23 @@ def split_paragraphs(text: str):
134237
body = gh_json["body"]
135238
reviews = gh_json["reviews"]
136239

240+
# Auto-fill reviewer from the current review event.
241+
# Approvals are also review events, so approvers are automatically added.
242+
reviewer_login = get_env("REVIEWER_LOGIN")
243+
pr_author = (gh_json.get("author") or {}).get("login")
244+
if reviewer_login and reviewer_login != pr_author:
245+
name, email = resolve_reviewer(reviewer_login)
246+
if email:
247+
identity = f"<{email}>"
248+
else:
249+
identity = f"(@{reviewer_login})"
250+
resolved = f"{name} {identity}"
251+
existing_reviewers = parse_trailers(title, body).get("Reviewers", [])
252+
if not already_exists(identity, existing_reviewers):
253+
existing_value = ", ".join(existing_reviewers)
254+
new_value = f"{existing_value}, {resolved}" if existing_value else resolved
255+
body = update_reviewers_trailer(body, f"Reviewers: {new_value}")
256+
137257
checks = [] # (bool (0=ok, 1=error), message)
138258

139259
def check(positive_assertion, ok_msg, err_msg):

.github/scripts/requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,4 +16,4 @@
1616
# Note: Ensure the 'requests' version here matches the version in tests/setup.py
1717
PyYAML~=6.0
1818
pytz==2024.2
19-
requests==2.32.4
19+
requests==2.33.0

.github/workflows/ci-complete.yml

Lines changed: 42 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -32,30 +32,54 @@ run-name: Build Scans for ${{ github.event.workflow_run.display_title}}
3232
# the repository secrets. Here we can download the build scan files produced by a PR and publish
3333
# them to develocity.apache.org.
3434
#
35+
# The CI workflow itself runs according to the branch of the PR or push event. However, this
36+
# ci-complete workflow always runs using the trunk branch, regardless of the source branch of the
37+
# CI workflow.
38+
#
3539
# If we need to do things like comment on, label, or otherwise modify PRs from public forks. This
3640
# workflow is the place to do it. PR number is ${{ github.event.workflow_run.pull_requests[0].number }}
3741

3842
jobs:
43+
get-build-scan-names:
44+
if: (github.event.workflow_run.conclusion == 'success' || github.event.workflow_run.conclusion == 'failure')
45+
runs-on: ubuntu-latest
46+
outputs:
47+
build-scan-names: ${{ steps.build-scan-names.outputs.build-scan-names }}
48+
steps:
49+
- name: List buildscan artifacts
50+
id: build-scan-names
51+
env:
52+
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
53+
RUN_ID: ${{ github.event.workflow_run.id }}
54+
run: |
55+
names=$(gh api \
56+
-H "Accept: application/vnd.github+json" \
57+
/repos/${{ github.repository }}/actions/runs/$RUN_ID/artifacts \
58+
--jq '[.artifacts[] | select(.name | startswith("build-scan-")) | .name]')
59+
echo "Found: $names"
60+
61+
echo "build-scan-names=$names" >> $GITHUB_OUTPUT
62+
3963
upload-build-scan:
64+
needs: get-build-scan-names
4065
# Skip this workflow if the CI run was skipped or cancelled
4166
if: (github.event.workflow_run.conclusion == 'success' || github.event.workflow_run.conclusion == 'failure')
4267
runs-on: ubuntu-latest
4368
strategy:
4469
fail-fast: false
4570
matrix:
46-
# Make sure these match build.yml and also keep in mind that GitHub Actions build will always use this file from the trunk branch.
47-
java: [ 25, 17 ]
48-
run-flaky: [ true, false ]
49-
run-new: [ true, false ]
50-
exclude:
51-
- run-flaky: true
52-
run-new: true
53-
54-
env:
55-
job-variation: ${{ matrix.java }}-${{ matrix.run-flaky == true && 'flaky' || 'noflaky' }}-${{ matrix.run-new == true && 'new' || 'nonew' }}
56-
status-context: Java ${{ matrix.java }}${{ matrix.run-flaky == true && ' / Flaky' || '' }}${{ matrix.run-new == true && ' / New' || '' }}
57-
71+
build-scan-name: ${{ fromJson(needs.get-build-scan-names.outputs.build-scan-names) }}
5872
steps:
73+
- name: Setup build scan info
74+
run: |
75+
BUILD_SCAN_NAME="${{ matrix.build-scan-name }}"
76+
JAVA=$(echo "$BUILD_SCAN_NAME" | grep -oE '[0-9]+')
77+
STATUS_CONTEXT="Java $JAVA"
78+
[[ "$BUILD_SCAN_NAME" == *"-flaky"* ]] && STATUS_CONTEXT="$STATUS_CONTEXT / Flaky"
79+
[[ "$BUILD_SCAN_NAME" == *"-new"* ]] && STATUS_CONTEXT="$STATUS_CONTEXT / New"
80+
81+
echo "JAVA=$JAVA" >> $GITHUB_ENV
82+
echo "STATUS_CONTEXT=$STATUS_CONTEXT" >> $GITHUB_ENV
5983
- name: Env
6084
run: printenv
6185
env:
@@ -68,7 +92,7 @@ jobs:
6892
- name: Setup Gradle
6993
uses: ./.github/actions/setup-gradle
7094
with:
71-
java-version: ${{ matrix.java }}
95+
java-version: ${{ env.JAVA }}
7296
develocity-access-key: ${{ secrets.DEVELOCITY_ACCESS_KEY }}
7397
- name: Download build scan archive
7498
id: download-build-scan
@@ -77,7 +101,7 @@ jobs:
77101
with:
78102
github-token: ${{ github.token }}
79103
run-id: ${{ github.event.workflow_run.id }}
80-
name: build-scan-${{ env.job-variation }}
104+
name: ${{ matrix.build-scan-name }}
81105
path: ~/.gradle/build-scan-data # This is where Gradle buffers unpublished build scan data when --no-scan is given
82106
- name: Handle missing scan
83107
if: ${{ steps.download-build-scan.outcome == 'failure' }}
@@ -88,8 +112,8 @@ jobs:
88112
commit_sha: ${{ github.event.workflow_run.head_sha }}
89113
url: '${{ github.event.workflow_run.html_url }}'
90114
description: 'Could not find build scan'
91-
context: Gradle Build Scan / ${{ env.status-context }}
92-
state: 'success' # Always mark as successful as a temporary fix; non-trunk branches will miss build scan. Real fix in KAFKA-19768
115+
context: Gradle Build Scan / ${{ env.STATUS_CONTEXT }}
116+
state: 'error'
93117
- name: Publish Scan
94118
id: publish-build-scan
95119
if: ${{ steps.download-build-scan.outcome == 'success' }}
@@ -116,7 +140,7 @@ jobs:
116140
commit_sha: ${{ github.event.workflow_run.head_sha }}
117141
url: '${{ github.event.repository.html_url }}/actions/runs/${{ github.run_id }}'
118142
description: 'The build scan failed to be published'
119-
context: Gradle Build Scan / ${{ env.status-context }}
143+
context: Gradle Build Scan / ${{ env.STATUS_CONTEXT }}
120144
state: 'error'
121145
- name: Update Status Check
122146
if: ${{ steps.publish-build-scan.outcome == 'success' }}
@@ -127,5 +151,5 @@ jobs:
127151
commit_sha: ${{ github.event.workflow_run.head_sha }}
128152
url: ${{ steps.publish-build-scan.outputs.build-scan-url }}
129153
description: 'The build scan was successfully published'
130-
context: Gradle Build Scan / ${{ env.status-context }}
154+
context: Gradle Build Scan / ${{ env.STATUS_CONTEXT }}
131155
state: 'success'

.github/workflows/ci.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ on:
1919
push:
2020
branches:
2121
- 'trunk'
22-
- '4.0'
22+
- '4.3'
2323

2424
schedule:
2525
- cron: '0 0 * * 6,0' # Run on Saturday and Sunday at midnight UTC
@@ -28,7 +28,7 @@ on:
2828
types: [ opened, synchronize, ready_for_review, reopened ]
2929
branches:
3030
- 'trunk'
31-
- '4.0'
31+
- '4.3'
3232

3333
concurrency:
3434
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}

.github/workflows/docker_build_and_test.yml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,9 +54,10 @@ jobs:
5454
run: |
5555
python docker_build_test.py kafka/test -tag=test -type=$IMAGE_TYPE -u=$KAFKA_URL
5656
- name: Run CVE scan
57-
uses: aquasecurity/trivy-action@b6643a29fecd7f34b3597bc6acb0a98b03d33ff8 # v0.33.1
57+
uses: lhotari/sandboxed-trivy-action@555963036b2012b44c1071508a236e569db28ebb # v1.0.1
5858
with:
59-
image-ref: 'kafka/test:test'
59+
scan-type: 'image'
60+
scan-ref: 'kafka/test:test'
6061
format: 'table'
6162
severity: 'CRITICAL,HIGH'
6263
output: scan_report_${{ github.event.inputs.image_type }}.txt

.github/workflows/docker_official_image_build_and_test.yml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,9 +53,10 @@ jobs:
5353
run: |
5454
python docker_official_image_build_test.py kafka/test -tag=test -type=$IMAGE_TYPE -v=$KAFKA_VERSION
5555
- name: Run CVE scan
56-
uses: aquasecurity/trivy-action@b6643a29fecd7f34b3597bc6acb0a98b03d33ff8 # v0.33.1
56+
uses: lhotari/sandboxed-trivy-action@555963036b2012b44c1071508a236e569db28ebb # v1.0.1
5757
with:
58-
image-ref: 'kafka/test:test'
58+
scan-type: 'image'
59+
scan-ref: 'kafka/test:test'
5960
format: 'table'
6061
severity: 'CRITICAL,HIGH'
6162
output: scan_report_${{ github.event.inputs.image_type }}.txt

.github/workflows/docker_promote.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,11 +31,11 @@ jobs:
3131
runs-on: ubuntu-latest
3232
steps:
3333
- name: Set up QEMU
34-
uses: docker/setup-qemu-action@c7c53464625b32c7a7e944ae62b3e17d2b600130 # v3.7.0
34+
uses: docker/setup-qemu-action@ce360397dd3f832beb865e1373c09c0e9f86d70a # v4.0.0
3535
- name: Set up Docker Buildx
36-
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # v3.11.1
36+
uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd # v4.0.0
3737
- name: Login to Docker Hub
38-
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # v3.6.0
38+
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
3939
with:
4040
username: ${{ secrets.DOCKERHUB_USER }}
4141
password: ${{ secrets.DOCKERHUB_TOKEN }}

.github/workflows/docker_rc_release.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,11 +47,11 @@ jobs:
4747
python -m pip install --upgrade pip
4848
pip install -r docker/requirements.txt
4949
- name: Set up QEMU
50-
uses: docker/setup-qemu-action@c7c53464625b32c7a7e944ae62b3e17d2b600130 # v3.7.0
50+
uses: docker/setup-qemu-action@ce360397dd3f832beb865e1373c09c0e9f86d70a # v4.0.0
5151
- name: Set up Docker Buildx
52-
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # v3.11.1
52+
uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd # v4.0.0
5353
- name: Login to Docker Hub
54-
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # v3.6.0
54+
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
5555
with:
5656
username: ${{ secrets.DOCKERHUB_USER }}
5757
password: ${{ secrets.DOCKERHUB_TOKEN }}

0 commit comments

Comments
 (0)