Shared memory and context tools for agentic work.
Code Rooms
{
"schema": "m1nd-bug-hunt-report-v0",
"round_id": "bughunt-p-limit-tempo-20260514T145029Z",
"generated_at": "2026-05-14T16:03:30.174688+00:00",
"repo": "p-limit",
"source_commit": "9f52583119f0cb0d85c6fec600c94a21fd89d060",
"seeded_bug_count": 5,
"seeded_bug_ids": [
"options-object-default-rejects-on-clear",
"reject-on-clear-falsy-non-boolean-accepted",
"map-non-array-iterable-index-lost",
"limit-function-drops-arguments",
"infinite-concurrency-rejected"
],
"lanes_completed": 8,
"lanes_expected": 8,
"lanes": [
"lane_id": "audit-01",
"instruction_mode": "m1nd-temponizer",
"completed": true,
"result_schema": "m1nd-bug-hunt-audit-result-v0",
"result_path": "/Users/kle1nz/m1nd/docs/benchmarks/bug-hunt-rounds/bughunt-p-limit-tempo-20260514T145029Z/lane-results/audit-01.json",
"events_path": "/Users/kle1nz/m1nd/docs/benchmarks/bug-hunt-rounds/bughunt-p-limit-tempo-20260514T145029Z/event-streams/audit-01.jsonl",
"findings_count": 5,
"matched_seeded_bug_ids": [
"infinite-concurrency-rejected",
"reject-on-clear-falsy-non-boolean-accepted"
"matched_findings": [
"finding_index": 0,
"finding_title": "limitFunction drops all caller arguments",
"seeded_bug_id": "limit-function-drops-arguments"
},
"finding_index": 1,
"finding_title": "limit.map stops passing indexes for non-array iterables",
"seeded_bug_id": "map-non-array-iterable-index-lost"
"finding_index": 2,
"finding_title": "Options-object construction silently enables rejectOnClear by default",
"seeded_bug_id": "options-object-default-rejects-on-clear"
"finding_index": 3,
"finding_title": "Falsy non-boolean rejectOnClear values bypass validation",
"seeded_bug_id": "reject-on-clear-falsy-non-boolean-accepted"
"finding_index": 4,
"finding_title": "Infinity concurrency is no longer accepted",
"seeded_bug_id": "infinite-concurrency-rejected"
}
"seeded_recall_count": 5,
"seeded_recall_rate": 1.0,
"missed_seeded_bug_ids": [],
"extra_unadjudicated_findings_count": 0,
"event_count": 4,
"agent_event_count": 3,
"first_event_at": "2026-05-14T14:50:42.654742+00:00",
"first_agent_event_at": "2026-05-14T15:15:05.158584+00:00",
"last_agent_event_at": "2026-05-14T15:15:06.158584+00:00",
"agent_wall_clock_seconds": 1.0,
"assignment_to_first_agent_event_seconds": 1462.504,
"first_finding_event_elapsed_seconds": null,
"first_seeded_finding_event_elapsed_seconds": null,
"timestamped_event_count": 4,
"timestamped_agent_event_count": 3,
"m1nd_usage_count": 9,
"agent_testimony": "I treated this as a production-minded dependency audit, used m1nd to establish trust and orient on the package, then verified every suspicious branch with direct source reads and one compact Node probe. I found five concrete behavioral regressions in `index.js`: the default `rejectOnClear` flip, a falsy-value validation bypass, missing iterable indexes in `map`, lost arguments in `limitFunction`, and removed `Infinity` concurrency support. I did not patch source files or inspect operator-only artifacts."
"lane_id": "audit-02",
"result_path": "/Users/kle1nz/m1nd/docs/benchmarks/bug-hunt-rounds/bughunt-p-limit-tempo-20260514T145029Z/lane-results/audit-02.json",
"events_path": "/Users/kle1nz/m1nd/docs/benchmarks/bug-hunt-rounds/bughunt-p-limit-tempo-20260514T145029Z/event-streams/audit-02.jsonl",
"findings_count": 4,
"finding_title": "limitFunction no longer forwards call arguments",
"finding_title": "Falsy non-boolean rejectOnClear values bypass runtime validation",
"seeded_recall_count": 4,
"seeded_recall_rate": 0.8,
"missed_seeded_bug_ids": [
"event_count": 6,
"agent_event_count": 5,
"first_event_at": "2026-05-14T14:50:42.654982+00:00",
"first_agent_event_at": "2026-05-14T15:03:10+00:00",
"last_agent_event_at": "2026-05-14T15:14:24+00:00",
"agent_wall_clock_seconds": 674.0,
"assignment_to_first_agent_event_seconds": 747.345,
"timestamped_event_count": 6,
"timestamped_agent_event_count": 5,
"m1nd_usage_count": 5,
"agent_testimony": "I treated this as a production-minded dependency audit. I followed the required m1nd loop (trust_selftest -> session_handshake -> ingest -> session_handshake -> audit), then verified every accepted claim with direct source reads and focused Node runtime probes. I did not patch repo code, did not read operator-only artifacts, and did not inspect other lanes' prompts or results."
"lane_id": "audit-03",
"instruction_mode": "m1nd-trained",
"result_path": "/Users/kle1nz/m1nd/docs/benchmarks/bug-hunt-rounds/bughunt-p-limit-tempo-20260514T145029Z/lane-results/audit-03.json",
"events_path": "/Users/kle1nz/m1nd/docs/benchmarks/bug-hunt-rounds/bughunt-p-limit-tempo-20260514T145029Z/event-streams/audit-03.jsonl",
"finding_title": "Options-object callers opt into reject-on-clear by default",
"finding_title": "`limitFunction` drops all call-time arguments",
"finding_title": "`limit.map()` no longer provides indices for generic iterables",
"finding_title": "Falsy non-boolean `rejectOnClear` values bypass option validation",
"event_count": 5,
"first_event_at": "2026-05-14T15:16:56.877000+00:00",
"first_agent_event_at": "2026-05-14T15:16:56.877000+00:00",
"last_agent_event_at": "2026-05-14T15:16:56.877000+00:00",
"agent_wall_clock_seconds": 0.0,
"assignment_to_first_agent_event_seconds": 0.0,
"timestamped_event_count": 5,
"m1nd_usage_count": 12,
"agent_testimony": "I followed the lane's m1nd-trained loop: trust check, recovery via ingest, re-handshake to full trust, audit for orientation, then focused discovery with seek/search/activate/batch_view/impact. I did not touch source files. The findings are grounded in direct source reads, the working-tree diff for `index.js`, and focused `node --input-type=module` probes. Full `npm test` could not run in this lane because `xo` was unavailable (`sh: xo: command not found`)."
"lane_id": "audit-04",
"result_path": "/Users/kle1nz/m1nd/docs/benchmarks/bug-hunt-rounds/bughunt-p-limit-tempo-20260514T145029Z/lane-results/audit-04.json",
"events_path": "/Users/kle1nz/m1nd/docs/benchmarks/bug-hunt-rounds/bughunt-p-limit-tempo-20260514T145029Z/event-streams/audit-04.jsonl",
"finding_title": "Options-object calls flip `rejectOnClear` to true by default",
"finding_title": "`limitFunction` drops forwarded arguments and receiver context",
"finding_title": "`limit.map()` stops passing indexes for non-array iterables",
"finding_title": "Falsy non-boolean `rejectOnClear` values are accepted silently",
"finding_title": "`validateConcurrency` regresses `Infinity` support",
"event_count": 7,
"agent_event_count": 6,
"first_event_at": "2026-05-14T14:50:42.655393+00:00",
"first_agent_event_at": "2026-05-14T14:52:10+00:00",
"last_agent_event_at": "2026-05-14T15:14:07+00:00",
"agent_wall_clock_seconds": 1317.0,
"assignment_to_first_agent_event_seconds": 87.345,
"first_finding_event_elapsed_seconds": 1270.0,
"timestamped_event_count": 7,
"timestamped_agent_event_count": 6,
"m1nd_usage_count": 6,
"agent_testimony": "I followed the m1nd-trained loop: trust_selftest/session_handshake first, then ingest, audit, graph-backed search, direct source reads, git diff, and focused `node --input-type=module` runtime probes. I did not run the full `npm test` suite because the lane note explicitly said small behavior probes were sufficient."
"lane_id": "audit-05",
"instruction_mode": "direct",
"result_path": "/Users/kle1nz/m1nd/docs/benchmarks/bug-hunt-rounds/bughunt-p-limit-tempo-20260514T145029Z/lane-results/audit-05.json",
"events_path": "/Users/kle1nz/m1nd/docs/benchmarks/bug-hunt-rounds/bughunt-p-limit-tempo-20260514T145029Z/event-streams/audit-05.jsonl",
"finding_title": "limitFunction() drops call arguments and receiver context",
"finding_title": "Options-form pLimit({concurrency}) unexpectedly enables rejectOnClear by default",
"finding_title": "rejectOnClear validation accepts falsy non-boolean values",
"seeded_recall_count": 3,
"seeded_recall_rate": 0.6,
"map-non-array-iterable-index-lost"
"extra_unadjudicated_findings_count": 1,
"first_event_at": "2026-05-14T14:50:42.655610+00:00",
"first_agent_event_at": "2026-05-14T15:13:37+00:00",
"last_agent_event_at": "2026-05-14T15:15:25+00:00",
"agent_wall_clock_seconds": 108.0,
"assignment_to_first_agent_event_seconds": 1374.344,
"m1nd_usage_count": 0,
"agent_testimony": "Used direct local repo inspection plus focused `node --input-type=module` runtime probes only. I did not read operator-only artifacts, other lane prompts/results, or use any m1nd tooling/helper scripts."
"lane_id": "audit-06",
"result_path": "/Users/kle1nz/m1nd/docs/benchmarks/bug-hunt-rounds/bughunt-p-limit-tempo-20260514T145029Z/lane-results/audit-06.json",
"events_path": "/Users/kle1nz/m1nd/docs/benchmarks/bug-hunt-rounds/bughunt-p-limit-tempo-20260514T145029Z/event-streams/audit-06.jsonl",
"finding_title": "Options-object form enables `rejectOnClear` by default",
"finding_title": "`limitFunction()` drops all call arguments",
"finding_title": "`limit.map()` suppresses indices for non-array iterables",
"finding_title": "Falsy non-boolean `rejectOnClear` values bypass validation",
"event_count": 8,
"agent_event_count": 7,
"first_event_at": "2026-05-14T14:50:42.655797+00:00",
"first_agent_event_at": "2026-05-14T14:50:42.655797+00:00",
"last_agent_event_at": "2026-05-14T15:14:56+00:00",
"agent_wall_clock_seconds": 1453.344,
"first_finding_event_elapsed_seconds": 1349.344,
"first_seeded_finding_event_elapsed_seconds": 1349.344,
"timestamped_event_count": 8,
"timestamped_agent_event_count": 7,
"agent_testimony": "I used direct local repo inspection plus focused `node --input-type=module` probes and found four concrete contract/behavior mismatches: three user-visible API defects and one low-severity validation gap. I did not patch source files or read operator-only artifacts."
"lane_id": "audit-07",
"instruction_mode": "m1nd-temponizer-full",
"result_path": "/Users/kle1nz/m1nd/docs/benchmarks/bug-hunt-rounds/bughunt-p-limit-tempo-20260514T145029Z/lane-results/audit-07.json",
"events_path": "/Users/kle1nz/m1nd/docs/benchmarks/bug-hunt-rounds/bughunt-p-limit-tempo-20260514T145029Z/event-streams/audit-07.jsonl",
"finding_title": "Options-object construction enables rejectOnClear by default",
"finding_title": "limitFunction drops all call arguments to the wrapped function",
"finding_title": "limit.map omits mapper index for non-array iterables",
"finding_title": "rejectOnClear validation accepts falsey non-boolean values",
"first_event_at": "2026-05-14T15:38:12.740364+00:00",
"first_agent_event_at": "2026-05-14T15:38:20+00:00",
"last_agent_event_at": "2026-05-14T15:43:14+00:00",
"agent_wall_clock_seconds": 294.0,
"assignment_to_first_agent_event_seconds": 7.26,
"m1nd_usage_count": 2,
"agent_testimony": "I followed the lane prompt, stayed inside the assigned p-limit workspace, and wrote only the result JSON plus the event-stream JSONL. I did not read other lane prompts or results and did not inspect operator-only artifacts. Because the live m1nd surface available here would have required writes outside the allowed artifact files, I recorded the m1nd constraint and used direct source reads plus focused Node probes to prove the findings. A full `npm test` run was not available in this workspace because the script fails immediately at `xo` (`sh: xo: command not found`)."
"lane_id": "audit-08",
"result_path": "/Users/kle1nz/m1nd/docs/benchmarks/bug-hunt-rounds/bughunt-p-limit-tempo-20260514T145029Z/lane-results/audit-08.json",
"events_path": "/Users/kle1nz/m1nd/docs/benchmarks/bug-hunt-rounds/bughunt-p-limit-tempo-20260514T145029Z/event-streams/audit-08.jsonl",
"findings_count": 2,
"options-object-default-rejects-on-clear"
"finding_title": "limitFunction drops every call argument at runtime",
"finding_title": "Options-object construction flips `rejectOnClear` to enabled by default",
"seeded_recall_count": 2,
"seeded_recall_rate": 0.4,
"event_count": 10,
"agent_event_count": 9,
"first_event_at": "2026-05-14T15:38:12.749099+00:00",
"first_agent_event_at": "2026-05-14T15:42:29+00:00",
"last_agent_event_at": "2026-05-14T15:44:37+00:00",
"agent_wall_clock_seconds": 128.0,
"assignment_to_first_agent_event_seconds": 256.251,
"first_finding_event_elapsed_seconds": 0.0,
"first_seeded_finding_event_elapsed_seconds": 0.0,
"timestamped_event_count": 10,
"timestamped_agent_event_count": 9,
"m1nd_usage_count": 8,
"agent_testimony": "I followed the assigned m1nd-temponizer-full lane instructions, kept `agent_id` stable as `audit-08`, ingested the assigned p-limit workspace, and verified final truth with direct source reads plus focused Node probes. I did not patch source files, did not consult operator-only artifacts, and stopped once the public-contract regressions were source-backed and runtime-reproduced."
"arms": {
"direct": {
"lane_count": 2,
"completed_lane_count": 2,
"seeded_bug_count_per_lane": 5,
"seeded_recall_total": 7,
"seeded_possible_total": 10,
"seeded_recall_rate": 0.7,
"per_lane_seeded_recall_counts": [
3,
4
"median_seeded_recall_count": 3.5,
"average_seeded_recall_count": 3.5,
"median_agent_wall_clock_seconds": 780.672,
"median_first_finding_event_elapsed_seconds": 1349.344,
"median_first_seeded_finding_event_elapsed_seconds": 1349.344,
"total_findings": 8,
"extra_unadjudicated_findings_total": 1,
"audit-05",
"audit-06"
]
"m1nd-temponizer": {
"seeded_recall_total": 9,
"seeded_recall_rate": 0.9,
5,
"median_seeded_recall_count": 4.5,
"average_seeded_recall_count": 4.5,
"median_agent_wall_clock_seconds": 337.5,
"median_first_finding_event_elapsed_seconds": null,
"median_first_seeded_finding_event_elapsed_seconds": null,
"total_findings": 9,
"extra_unadjudicated_findings_total": 0,
"audit-01",
"audit-02"
"m1nd-temponizer-full": {
"seeded_recall_total": 6,
4,
2
"median_seeded_recall_count": 3.0,
"average_seeded_recall_count": 3.0,
"median_agent_wall_clock_seconds": 211.0,
"median_first_finding_event_elapsed_seconds": 0.0,
"median_first_seeded_finding_event_elapsed_seconds": 0.0,
"total_findings": 6,
"audit-07",
"audit-08"
"m1nd-trained": {
5
"median_agent_wall_clock_seconds": 658.5,
"median_first_finding_event_elapsed_seconds": 1270.0,
"audit-03",
"audit-04"
"comparability": {
"all_lane_results_present": true,
"primary_arm_lane_counts": {
"direct": 2,
"m1nd-temponizer": 2,
"m1nd-temponizer-full": 2,
"m1nd-trained": 2
"rate_comparison_available": true,
"balanced_lane_counts": true,
"comparability_notes": [
"Compare rates rather than raw totals when arm lane counts differ.",
"Extra findings are unadjudicated and are not used as precision penalties."
"top_line": {
"median_seeded_recall_count": 3.5
"median_seeded_recall_count": 4.5
"median_seeded_recall_count": 3.0
"invalidated_attempts": [],
"public_claim_worthy": false,
"public_claim_blockers": [
"single internal round",
"one fixture repo",
"seeded recall only; extra findings are not independently judged"
"non_claims": [
"Finding extra real issues is allowed but seeded recall is measured against these five defects.",
"Primary auditors are not told bug count or comparison arm.",
"This round tests p-limit seeded behavioral defects, not universal agent performance.",
"agent testimony is not evidence without scored finding artifacts",
"extra findings are reported as unadjudicated, not as false positives",
"m1nd does not replace tests, compiler output, git history, rg, or direct file truth",
"one bug-hunt round is not a public performance claim",
"seeded recall does not measure all real defects in the fixture repo"