Skip to content

Commit 0413d68

Browse files
Copilotlpcoxgithub-advanced-security[bot]
authored
feat: publish versioned schemas for JSONL audit/telemetry artifacts (#2380)
* Initial plan * feat: publish versioned schemas for JSONL audit/telemetry artifacts - Add schemas/token-usage.v1.schema.json and schemas/audit.v1.schema.json - Add schemas/README.md documenting schema versioning policy - Embed _schema field in token-usage.jsonl records (token-tracker.js) - Add lightweight validateTokenUsageRecord() validation before writing - Embed _schema:"audit/v1" in Squid audit_jsonl logformat - Update sample audit.jsonl to include _schema field - Add validateTokenUsageRecord tests and schema-field integration tests - Add squid-config tests for audit_jsonl schema fields - Add log-parser test for parsing records with _schema field Closes #<issue>" Agent-Logs-Url: https://github.com/github/gh-aw-firewall/sessions/5a50cced-80d7-463b-9c96-c0662581a136 * refactor: remove redundant requires in token-tracker tests per code review * Potential fix for pull request finding 'CodeQL / Unused variable, import, function or class' Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com> * fix: address code review feedback on schema tests and null guard - Add null/non-object guard to validateTokenUsageRecord to prevent TypeError on writeTokenUsage(null/undefined) - Rewrite 'token-usage JSONL record schema field' tests to actually capture the bytes written to the JSONL stream via jest.spyOn(fs, 'createWriteStream'), instead of the weaker not.toThrow() / metrics.increment assertions - Add null/undefined/primitive rejection tests for validateTokenUsageRecord Agent-Logs-Url: https://github.com/github/gh-aw-firewall/sessions/4a05b5f0-4b15-4797-96da-617484b64562 --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: Landon Cox <landon.cox@microsoft.com> Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com>
1 parent 34b4e73 commit 0413d68

9 files changed

Lines changed: 527 additions & 4 deletions

File tree

containers/api-proxy/token-tracker.js

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,11 +84,76 @@ function getLogStream() {
8484
}
8585
}
8686

87+
/**
88+
* Validate a token usage record against the token-usage/v1 schema contract.
89+
*
90+
* Checks that all required fields are present and have the expected types.
91+
* Logs a warning and returns false if the record is non-conformant; does
92+
* not throw, so a bad record is dropped rather than crashing the proxy.
93+
*
94+
* @param {object} record - The record to validate
95+
* @returns {boolean} true if the record is valid, false otherwise
96+
*/
97+
function validateTokenUsageRecord(record) {
98+
if (!record || typeof record !== 'object') {
99+
logRequest('warn', 'token_record_schema_violation', {
100+
field: 'record',
101+
expected: 'object',
102+
actual: record === null ? 'null' : typeof record,
103+
});
104+
return false;
105+
}
106+
107+
const required = [
108+
['_schema', 'string'],
109+
['timestamp', 'string'],
110+
['request_id', 'string'],
111+
['provider', 'string'],
112+
['model', 'string'],
113+
['path', 'string'],
114+
['status', 'number'],
115+
['streaming', 'boolean'],
116+
['input_tokens', 'number'],
117+
['output_tokens', 'number'],
118+
['cache_read_tokens', 'number'],
119+
['cache_write_tokens', 'number'],
120+
['duration_ms', 'number'],
121+
];
122+
123+
for (const [field, expectedType] of required) {
124+
// eslint-disable-next-line valid-typeof
125+
if (typeof record[field] !== expectedType) {
126+
logRequest('warn', 'token_record_schema_violation', {
127+
request_id: record.request_id,
128+
field,
129+
expected: expectedType,
130+
actual: typeof record[field],
131+
});
132+
return false;
133+
}
134+
}
135+
136+
if (record._schema !== 'token-usage/v1') {
137+
logRequest('warn', 'token_record_schema_violation', {
138+
request_id: record.request_id,
139+
field: '_schema',
140+
expected: 'token-usage/v1',
141+
actual: record._schema,
142+
});
143+
return false;
144+
}
145+
146+
return true;
147+
}
148+
87149
/**
88150
* Write a token usage record to the JSONL log file.
151+
* Validates the record against the token-usage/v1 schema before writing.
89152
* Handles backpressure by dropping writes when the stream buffer is full.
90153
*/
91154
function writeTokenUsage(record) {
155+
if (!validateTokenUsageRecord(record)) return;
156+
92157
const stream = getLogStream();
93158
if (stream && !stream.writableEnded) {
94159
const ok = stream.write(JSON.stringify(record) + '\n');
@@ -478,6 +543,7 @@ function trackTokenUsage(proxyRes, opts) {
478543

479544
// Build log record
480545
const record = {
546+
_schema: 'token-usage/v1',
481547
timestamp: new Date().toISOString(),
482548
request_id: requestId,
483549
provider,
@@ -693,6 +759,7 @@ function trackWebSocketTokenUsage(upstreamSocket, opts) {
693759
}
694760

695761
const record = {
762+
_schema: 'token-usage/v1',
696763
timestamp: new Date().toISOString(),
697764
request_id: requestId,
698765
provider,
@@ -759,6 +826,7 @@ module.exports = {
759826
normalizeUsage,
760827
isStreamingResponse,
761828
isCompressedResponse,
829+
validateTokenUsageRecord,
762830
writeTokenUsage,
763831
TOKEN_LOG_FILE,
764832
};

containers/api-proxy/token-tracker.test.js

Lines changed: 208 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@ const {
1212
isCompressedResponse,
1313
trackTokenUsage,
1414
trackWebSocketTokenUsage,
15+
validateTokenUsageRecord,
16+
writeTokenUsage,
17+
closeLogStream,
1518
} = require('./token-tracker');
1619
const { EventEmitter } = require('events');
1720
const os = require('os');
@@ -1044,3 +1047,208 @@ describe('trackWebSocketTokenUsage', () => {
10441047
}, 10);
10451048
});
10461049
});
1050+
1051+
// ── validateTokenUsageRecord ─────────────────────────────────────────
1052+
1053+
describe('validateTokenUsageRecord', () => {
1054+
const validRecord = {
1055+
_schema: 'token-usage/v1',
1056+
timestamp: '2025-01-01T00:00:00.000Z',
1057+
request_id: 'req-123',
1058+
provider: 'anthropic',
1059+
model: 'claude-sonnet-4-20250514',
1060+
path: '/v1/messages',
1061+
status: 200,
1062+
streaming: false,
1063+
input_tokens: 100,
1064+
output_tokens: 50,
1065+
cache_read_tokens: 0,
1066+
cache_write_tokens: 0,
1067+
duration_ms: 1234,
1068+
};
1069+
1070+
test('accepts a valid record', () => {
1071+
expect(validateTokenUsageRecord(validRecord)).toBe(true);
1072+
});
1073+
1074+
test('accepts a record with optional response_bytes', () => {
1075+
expect(validateTokenUsageRecord({ ...validRecord, response_bytes: 512 })).toBe(true);
1076+
});
1077+
1078+
test('rejects a record with wrong _schema', () => {
1079+
expect(validateTokenUsageRecord({ ...validRecord, _schema: 'wrong/v99' })).toBe(false);
1080+
});
1081+
1082+
test('rejects a record missing _schema', () => {
1083+
const { _schema, ...noSchema } = validRecord;
1084+
expect(validateTokenUsageRecord(noSchema)).toBe(false);
1085+
});
1086+
1087+
test('rejects a record with non-string timestamp', () => {
1088+
expect(validateTokenUsageRecord({ ...validRecord, timestamp: 1234567890 })).toBe(false);
1089+
});
1090+
1091+
test('rejects a record with non-number input_tokens', () => {
1092+
expect(validateTokenUsageRecord({ ...validRecord, input_tokens: '100' })).toBe(false);
1093+
});
1094+
1095+
test('rejects a record with non-boolean streaming', () => {
1096+
expect(validateTokenUsageRecord({ ...validRecord, streaming: 'true' })).toBe(false);
1097+
});
1098+
1099+
test('rejects a record missing a required field', () => {
1100+
const { model, ...noModel } = validRecord;
1101+
expect(validateTokenUsageRecord(noModel)).toBe(false);
1102+
});
1103+
1104+
test('rejects null without throwing', () => {
1105+
expect(validateTokenUsageRecord(null)).toBe(false);
1106+
});
1107+
1108+
test('rejects undefined without throwing', () => {
1109+
expect(validateTokenUsageRecord(undefined)).toBe(false);
1110+
});
1111+
1112+
test('rejects a non-object primitive without throwing', () => {
1113+
expect(validateTokenUsageRecord('not-an-object')).toBe(false);
1114+
expect(validateTokenUsageRecord(42)).toBe(false);
1115+
});
1116+
});
1117+
1118+
// ── JSONL records include _schema field ───────────────────────────────
1119+
1120+
/**
1121+
* Build a writable mock stream that captures all written chunks.
1122+
* The `written` getter parses the accumulated JSONL and returns records.
1123+
*/
1124+
function makeMockStream() {
1125+
const chunks = [];
1126+
const stream = {
1127+
writableEnded: false,
1128+
write: jest.fn((chunk) => { chunks.push(chunk); return true; }),
1129+
end: jest.fn((cb) => { stream.writableEnded = true; if (cb) cb(); }),
1130+
on: jest.fn(),
1131+
get writtenRecords() {
1132+
return chunks.map(c => JSON.parse(c.trim()));
1133+
},
1134+
};
1135+
return stream;
1136+
}
1137+
1138+
describe('token-usage JSONL record schema field', () => {
1139+
let mockStream;
1140+
let mkdirSyncSpy;
1141+
let createWriteStreamSpy;
1142+
1143+
beforeEach(async () => {
1144+
// Close any open log stream so the next getLogStream() call creates a fresh one.
1145+
await closeLogStream();
1146+
1147+
mockStream = makeMockStream();
1148+
1149+
// Redirect fs.mkdirSync and fs.createWriteStream so the module writes to our
1150+
// in-memory stream rather than the unwritable /var/log/api-proxy path.
1151+
mkdirSyncSpy = jest.spyOn(fs, 'mkdirSync').mockReturnValue(undefined);
1152+
createWriteStreamSpy = jest.spyOn(fs, 'createWriteStream').mockReturnValue(mockStream);
1153+
});
1154+
1155+
afterEach(async () => {
1156+
mkdirSyncSpy.mockRestore();
1157+
createWriteStreamSpy.mockRestore();
1158+
await closeLogStream();
1159+
});
1160+
1161+
test('writeTokenUsage serializes _schema:"token-usage/v1" into the JSONL stream', () => {
1162+
const record = {
1163+
_schema: 'token-usage/v1',
1164+
timestamp: new Date().toISOString(),
1165+
request_id: 'direct-write-test',
1166+
provider: 'openai',
1167+
model: 'gpt-4o',
1168+
path: '/v1/chat/completions',
1169+
status: 200,
1170+
streaming: false,
1171+
input_tokens: 1,
1172+
output_tokens: 1,
1173+
cache_read_tokens: 0,
1174+
cache_write_tokens: 0,
1175+
duration_ms: 10,
1176+
};
1177+
1178+
writeTokenUsage(record);
1179+
1180+
expect(mockStream.write).toHaveBeenCalledTimes(1);
1181+
const parsed = mockStream.writtenRecords[0];
1182+
expect(parsed._schema).toBe('token-usage/v1');
1183+
expect(parsed.request_id).toBe('direct-write-test');
1184+
});
1185+
1186+
test('trackTokenUsage HTTP path writes _schema:"token-usage/v1" to the stream', (done) => {
1187+
const proxyRes = new EventEmitter();
1188+
proxyRes.headers = { 'content-type': 'application/json' };
1189+
proxyRes.statusCode = 200;
1190+
1191+
trackTokenUsage(proxyRes, {
1192+
requestId: 'schema-field-http',
1193+
provider: 'openai',
1194+
path: '/v1/chat/completions',
1195+
startTime: Date.now(),
1196+
metrics: null,
1197+
});
1198+
1199+
proxyRes.emit('data', Buffer.from(JSON.stringify({
1200+
model: 'gpt-4o',
1201+
usage: { prompt_tokens: 10, completion_tokens: 5, total_tokens: 15 },
1202+
})));
1203+
proxyRes.emit('end');
1204+
1205+
setTimeout(() => {
1206+
expect(mockStream.write).toHaveBeenCalledTimes(1);
1207+
const parsed = mockStream.writtenRecords[0];
1208+
expect(parsed._schema).toBe('token-usage/v1');
1209+
expect(parsed.request_id).toBe('schema-field-http');
1210+
done();
1211+
}, 20);
1212+
});
1213+
1214+
test('trackWebSocketTokenUsage path writes _schema:"token-usage/v1" to the stream', (done) => {
1215+
const socket = new EventEmitter();
1216+
1217+
function buildFrame(text) {
1218+
const payload = Buffer.from(text, 'utf8');
1219+
const header = Buffer.alloc(2);
1220+
header[0] = 0x81;
1221+
header[1] = payload.length;
1222+
return Buffer.concat([header, payload]);
1223+
}
1224+
1225+
const httpHeader = Buffer.from('HTTP/1.1 101 Switching Protocols\r\nUpgrade: websocket\r\n\r\n');
1226+
const frame1 = buildFrame(JSON.stringify({
1227+
type: 'message_start',
1228+
message: { model: 'claude-sonnet-4-20250514', usage: { input_tokens: 20, output_tokens: 0 } },
1229+
}));
1230+
const frame2 = buildFrame(JSON.stringify({
1231+
type: 'message_delta',
1232+
usage: { output_tokens: 8 },
1233+
}));
1234+
1235+
trackWebSocketTokenUsage(socket, {
1236+
requestId: 'schema-field-ws',
1237+
provider: 'anthropic',
1238+
path: '/v1/messages',
1239+
startTime: Date.now(),
1240+
metrics: null,
1241+
});
1242+
1243+
socket.emit('data', Buffer.concat([httpHeader, frame1, frame2]));
1244+
socket.emit('close');
1245+
1246+
setTimeout(() => {
1247+
expect(mockStream.write).toHaveBeenCalledTimes(1);
1248+
const parsed = mockStream.writtenRecords[0];
1249+
expect(parsed._schema).toBe('token-usage/v1');
1250+
expect(parsed.request_id).toBe('schema-field-ws');
1251+
done();
1252+
}, 20);
1253+
});
1254+
});

samples/audit/audit.jsonl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
{"ts":1774290908.910,"client":"172.30.0.20","host":"api.github.com:443","dest":"140.82.116.5:443","method":"CONNECT","status":200,"decision":"TCP_TUNNEL","url":"api.github.com:443"}
2-
{"ts":1774290909.180,"client":"172.30.0.20","host":"api.github.com:443","dest":"140.82.116.5:443","method":"CONNECT","status":200,"decision":"TCP_TUNNEL","url":"api.github.com:443"}
3-
{"ts":1774290909.186,"client":"172.30.0.20","host":"evil.example.com:443","dest":"-:-","method":"CONNECT","status":403,"decision":"TCP_DENIED","url":"evil.example.com:443"}
1+
{"_schema":"audit/v1","ts":1774290908.910,"client":"172.30.0.20","host":"api.github.com:443","dest":"140.82.116.5:443","method":"CONNECT","status":200,"decision":"TCP_TUNNEL","url":"api.github.com:443"}
2+
{"_schema":"audit/v1","ts":1774290909.180,"client":"172.30.0.20","host":"api.github.com:443","dest":"140.82.116.5:443","method":"CONNECT","status":200,"decision":"TCP_TUNNEL","url":"api.github.com:443"}
3+
{"_schema":"audit/v1","ts":1774290909.186,"client":"172.30.0.20","host":"evil.example.com:443","dest":"-:-","method":"CONNECT","status":403,"decision":"TCP_DENIED","url":"evil.example.com:443"}

schemas/README.md

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
# AWF JSONL Schemas
2+
3+
This directory contains versioned [JSON Schema](https://json-schema.org/) files for the JSONL artifact files emitted by AWF at runtime.
4+
5+
## Files
6+
7+
| Schema file | JSONL file | Writer |
8+
|---|---|---|
9+
| [`token-usage.v1.schema.json`](token-usage.v1.schema.json) | `token-usage.jsonl` | `containers/api-proxy/token-tracker.js` |
10+
| [`audit.v1.schema.json`](audit.v1.schema.json) | `audit.jsonl` | Squid proxy (`src/squid-config.ts`) |
11+
12+
## Schema versioning policy
13+
14+
- **Additive changes** (new optional fields) → update the existing `v1` schema, no version bump required.
15+
- **Breaking changes** (field removal, rename, type change, new required field) → create a new `v2` schema file and bump the `_schema` value in the writer.
16+
17+
## Record identification
18+
19+
Every JSONL record includes a `_schema` field that identifies the schema name and version:
20+
21+
```json
22+
{ "_schema": "token-usage/v1", "timestamp": "2025-01-01T00:00:00.000Z", ... }
23+
{ "_schema": "audit/v1", "ts": 1761074374.646, ... }
24+
```
25+
26+
Consumers should check `_schema` before parsing fields so they can handle future versions gracefully.
27+
28+
## Validation
29+
30+
You can validate a JSONL file against its schema using any JSON Schema validator. Example using [`ajv-cli`](https://github.com/ajv-validator/ajv-cli):
31+
32+
```bash
33+
# Install validator
34+
npm install -g ajv-cli
35+
36+
# Validate all records in audit.jsonl
37+
while IFS= read -r line; do
38+
echo "$line" | ajv validate -s schemas/audit.v1.schema.json -d /dev/stdin
39+
done < /path/to/audit.jsonl
40+
```

0 commit comments

Comments
 (0)