posthog/pi-ci-integrator.html

<!DOCTYPE html>
<html lang="en">
<head>
  <meta charset="UTF-8">
  <meta name="viewport" content="width=device-width, initial-scale=1.0">

  <title>Pi as CI Integrator: Agents That Fix Their Own Builds — Tinqs Blog</title>
  <meta name="description" content="Most coding agents stop at git push. Our Pi fork watches CI, reads failure logs, and fixes its own code until the pipeline goes green.">
  <meta name="robots" content="index, follow">
  <link rel="canonical" href="https://www.tinqs.com/blog/pi-ci-integrator">

  <meta property="og:type" content="article">
  <meta property="og:url" content="https://www.tinqs.com/blog/pi-ci-integrator">
  <meta property="og:title" content="Pi as CI Integrator: Agents That Fix Their Own Builds">
  <meta property="og:description" content="Coding agents that watch CI and fix their own builds.">
  <meta property="og:image" content="https://www.tinqs.com/img/og-cover.jpg">

  <meta name="twitter:card" content="summary_large_image">
  <meta name="twitter:title" content="Pi as CI Integrator: Agents That Fix Their Own Builds">
  <meta name="twitter:description" content="Coding agents that watch CI and fix their own builds.">
  <meta name="twitter:image" content="https://www.tinqs.com/img/og-cover.jpg">

  <script type="application/ld+json">
  {
    "@context": "https://schema.org",
    "@type": "BlogPosting",
    "headline": "Pi as CI Integrator: Agents That Fix Their Own Builds",
    "datePublished": "2026-05-25",
    "author": {
      "@type": "Person",
      "name": "Ozan Bozkurt"
    },
    "publisher": {
      "@type": "Organization",
      "name": "Tinqs Limited",
      "url": "https://www.tinqs.com"
    },
    "description": "Most coding agents stop at git push. Our Pi fork watches CI, reads failure logs, and fixes its own code until the pipeline goes green."
  }
  </script>

  <!-- PostHog (EU) -->
  <script>
    !function(t,e){var o,n,p,r;e.__SV||(window.posthog=e,e._i=[],e.init=function(i,s,a){function g(t,e){var o=e.split(".");2==o.length&&(t=t[o[0]],e=o[1]),t[e]=function(){t.push([e].concat(Array.prototype.slice.call(arguments,0)))}}(p=t.createElement("script")).type="text/javascript",p.crossOrigin="anonymous",p.async=!0,p.src=s.api_host.replace(".i.posthog.com","-assets.i.posthog.com")+"/static/array.js",(r=t.getElementsByTagName("script")[0]).parentNode.insertBefore(p,r);var u=e;for(void 0!==a?u=e[a]=[]:a="posthog",u.people=u.people||[],u.toString=function(t){var e="posthog";return"posthog"!==a&&(e+="."+a),t||(e+=" (stub)"),e},u.people.toString=function(){return u.toString(1)+".people (stub)"},o="init capture register register_once register_for_session unregister unregister_for_session getFeatureFlag getFeatureFlagPayload isFeatureEnabled reloadFeatureFlags updateEarlyAccessFeatureEnrollment getEarlyAccessFeatures on onFeatureFlags onSessionId getSurveys getActiveMatchingSurveys renderSurvey canRenderSurvey getNextSurveyStep identify setPersonProperties group resetGroups setPersonPropertiesForFlags resetPersonPropertiesForFlags setGroupPropertiesForFlags resetGroupPropertiesForFlags reset get_distinct_id getGroups get_session_id get_session_replay_url alias set_config startSessionRecording stopSessionRecording sessionRecordingStarted captureException loadToolbar get_property getSessionProperty createPersonProfile opt_in_capturing opt_out_capturing has_opted_in_capturing has_opted_out_capturing clear_opt_in_out_capturing debug".split(" "),n=0;n<o.length;n++)g(u,o[n]);e._i.push([i,s,a])},e.__SV=1)}(document,window.posthog||[]);
    posthog.init('phc_teG6p5oxf6poQHPThq5AGKzWQNhw4bHW9arLwWAVXm3f',{api_host:'https://eu.i.posthog.com',ui_host:'https://eu.posthog.com',person_profiles:'identified_only',defaults:'2026-01-30'})
  </script>

  <link rel="icon" type="image/svg+xml" href="/img/favicon.svg">
  <link rel="preconnect" href="https://fonts.googleapis.com">
  <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
  <link href="https://fonts.googleapis.com/css2?family=IBM+Plex+Sans:ital,wght@0,300;0,400;0,500;0,600;0,700;1,300;1,400;1,500;1,600;1,700&display=swap" rel="stylesheet">
  <link rel="stylesheet" href="../style.css">
</head>
<body>

  <!-- NAV -->
  <nav class="nav nav--scrolled" id="nav">
    <a href="/" class="nav__logo" aria-label="Tinqs home">
      <span class="nav__wordmark">TINQS</span>
    </a>
    <div class="nav__links">
      <a href="/#game" class="nav__link">Games</a>
      <a href="/#tech" class="nav__link">Technology</a>
      <a href="/#about" class="nav__link">About</a>
      <a href="/blog/" class="nav__link" style="color: var(--c-accent-l);">Blog</a>
      <a href="/#signup" class="nav__link">Contact</a>
      <a href="/press" class="nav__link">Press</a>
    </div>
    <button class="nav__burger" aria-label="Open menu" id="navBurger">
      <span></span><span></span><span></span>
    </button>
  </nav>

  <!-- MOBILE MENU -->
  <div class="mobile-menu" id="mobileMenu">
    <a href="/#game" class="mobile-menu__link">Games</a>
    <a href="/#tech" class="mobile-menu__link">Technology</a>
    <a href="/#about" class="mobile-menu__link">About</a>
    <a href="/blog/" class="mobile-menu__link">Blog</a>
    <a href="/#signup" class="mobile-menu__link">Contact</a>
    <a href="/press" class="mobile-menu__link">Press</a>
  </div>

  <!-- POST -->
  <article class="post">
    <a href="/blog/" class="post__back">&larr; All Posts</a>
    <span class="post__date">25 May 2026</span>
    <h1 class="post__title">Pi as CI Integrator: Agents That Fix Their Own Builds</h1>
    <p class="post__lead">Most coding agents have a dirty secret: they don't care if the code compiles. They write, they push, they walk away. The human discovers the broken build an hour later. We built a Pi extension that closes the loop &mdash; agents that watch CI, read failure logs, and fix their own mistakes.</p>

    <div class="post__body">
<h2>The Gap</h2>
<p>Every agent demo looks the same. The AI writes code, commits, pushes. The presenter says "and now we have a pull request!" Cut. End of demo.</p>
<p>What happens next? The CI pipeline runs. Tests fail. Linting screams. The build breaks because someone forgot an import. A human opens the PR, reads the red badge, clicks into the logs, finds the error, fixes it, pushes again. The agent did 90% of the work but left the last 10% &mdash; the most tedious part &mdash; for a person.</p>
<p>We wanted agents that finish the job.</p>
<h2>The tinqs-ci Extension</h2>
<p>Our <a href="https://tinqs.com/tinqs/pi" style="color: var(&ndash;c-accent-l);">Pi fork</a> has a <code>tinqs-ci</code> extension &mdash; a single TypeScript file, about 200 lines &mdash; that gives the agent three tools:</p>
<ul>
  <li><strong>ci_status</strong> &mdash; checks the current pipeline state for a branch (pending, running, success, failure)</li>
  <li><strong>ci_logs</strong> &mdash; fetches the full build log from the most recent failed run</li>
  <li><strong>ci_wait</strong> &mdash; polls the pipeline every 15 seconds until it finishes, then returns the result</li>
</ul>
<p>These are Gitea Actions API calls under the hood. The agent authenticates with the same PAT it uses for git push. No extra credentials, no special CI service account.</p>
<h2>The Loop</h2>
<p>Here's what a Pi task looks like end to end:</p>
<pre><code>Agent receives task brief
  → reads codebase, plans approach
  → writes code
  → runs local tests (bash tool)
  → commits and pushes branch
  → calls ci_wait
  → CI passes → opens PR via Gitea API
  → CI fails → calls ci_logs
  → reads error output
  → fixes the issue
  → pushes again
  → calls ci_wait again
  → repeats until green (max 3 retries)</code></pre>
<p>The key is that <code>ci_logs</code> returns the raw build output &mdash; compiler errors, test failures, lint violations &mdash; as plain text in the agent's context. DeepSeek V4 is surprisingly good at reading build logs. It parses a Go compiler error, identifies the file and line, and fixes it. It reads a test assertion failure, understands what the test expected, and corrects the implementation.</p>
<p>Three retries is the hard limit. If the agent can't fix it in three rounds, it opens the PR anyway with a comment explaining what failed and why. A human takes over from there. In practice, most failures resolve on the first retry &mdash; it's usually a missing import or a type mismatch.</p>
<h2>What This Actually Looks Like</h2>
<p>A real run from last week. The task: add a health check endpoint to a Go service.</p>
<ul>
  <li><strong>Turn 1:</strong> Agent reads the codebase, writes the handler and test, pushes. CI fails &mdash; the test imports a package that doesn't exist on the runner.</li>
  <li><strong>Turn 2:</strong> Agent reads <code>ci_logs</code>, sees the <code>go: module not found</code> error, adds the missing <code>go.mod</code> replace directive, pushes. CI passes.</li>
  <li><strong>Turn 3:</strong> Agent opens PR with passing checks.</li>
</ul>
<p>Total time: 4 minutes. Total cost: $0.06. No human touched the keyboard.</p>
<p>Without the CI extension, this would have been a PR with a red badge and a Slack message saying "hey, the agent's PR is broken again." Someone would have context-switched, opened the logs, seen the trivial error, fixed it, and lost 20 minutes of flow state.</p>
<h2>Why This Matters More Than You Think</h2>
<p>CI integration isn't a feature. It's the difference between an agent that helps and an agent that creates work.</p>
<p>An agent that pushes broken code is worse than no agent at all. It creates a false sense of progress &mdash; "the PR is up!" &mdash; while actually adding a task to someone's plate. Every broken PR is an interruption. Every interruption costs 15 minutes of context-switching.</p>
<p>An agent that watches CI and fixes its own builds is genuinely autonomous. You submit a task, you walk away, you come back to a green PR ready for review. The agent handled the mechanical iteration that a human would have done anyway &mdash; the fix-push-wait-check cycle that eats hours of developer time every week.</p>
<h2>The Guardrail Problem</h2>
<p>Letting an agent retry its own builds sounds dangerous. What if it enters an infinite loop? What if it starts making increasingly wild changes to get the build to pass?</p>
<p>Three safeguards:</p>
<p><strong>Retry limit.</strong> Three attempts maximum. After that, the agent stops and reports. This is a hard limit in the orchestrator, not a suggestion to the model.</p>
<p><strong>Diff budget.</strong> Each retry can only touch files that were already in the original changeset. The agent can't "fix" a build failure by rewriting the test suite or disabling the linter. If the fix requires touching new files, it fails and escalates.</p>
<p><strong>Hallucination detection.</strong> The guardrail extension monitors every turn. If the agent claims "the build passed" without having called <code>ci_status</code> or <code>ci_wait</code>, it gets corrected. Agents are not allowed to guess the CI result.</p>
<h2>The Numbers</h2>
<p>Over three weeks of running the orchestrator:</p>
<ul>
  <li><strong>87 tasks</strong> completed end-to-end</li>
  <li><strong>23 tasks</strong> needed at least one CI retry (26%)</li>
  <li><strong>19 of those 23</strong> resolved on the first retry</li>
  <li><strong>4 tasks</strong> hit the retry limit and escalated to a human</li>
  <li><strong>0 tasks</strong> produced a merged PR that later broke something else</li>
</ul>
<p>The 26% retry rate tells you how often agents push code that doesn't build on the first try. That's not a bad number &mdash; it's the same rate you'd see from a junior developer. The difference is the agent fixes it in 30 seconds instead of 20 minutes.</p>
<hr>
<p><em>The CI extension is part of our <a href="https://tinqs.com/tinqs/pi" style="color: var(&ndash;c-accent-l);">Pi fork</a>, which runs inside <a href="https://tinqs.com" style="color: var(&ndash;c-accent-l);">Tinqs Studio</a> &mdash; a Gitea-based platform for game development with built-in AI agents. The whole thing is MIT licensed.</em></p>

    </div>

    <div class="post__author">
      <div class="post__author-avatar">OB</div>
      <div class="post__author-info">
        <span class="post__author-name">Ozan Bozkurt</span><br>
        CTO & Developer, Tinqs
      </div>
    </div>
  </article>

  <!-- FOOTER -->
  <footer class="footer">
    <div class="footer__inner">
      <span class="footer__wordmark">TINQS</span>
      <div class="footer__links">
        <a href="/#game">Games</a>
        <a href="/#tech">Technology</a>
        <a href="/#about">About</a>
        <a href="/blog/">Blog</a>
        <a href="mailto:hello@tinqs.com">hello@tinqs.com</a>
        <a href="/press">Press Kit</a>
      </div>
      <p class="footer__copy">Tinqs Limited &mdash; London, est. 2020</p>
    </div>
  </footer>

  <script>
    const burger = document.getElementById('navBurger');
    const mobileMenu = document.getElementById('mobileMenu');
    burger.addEventListener('click', () => {
      const open = mobileMenu.classList.toggle('mobile-menu--open');
      burger.classList.toggle('nav__burger--open', open);
      document.body.style.overflow = open ? 'hidden' : '';
    });
    mobileMenu.querySelectorAll('a').forEach(link => {
      link.addEventListener('click', () => {
        mobileMenu.classList.remove('mobile-menu--open');
        burger.classList.remove('nav__burger--open');
        document.body.style.overflow = '';
      });
    });
  </script>

</body>
</html>
feat: blog build system + all HTML generated by Pi agent 2026-05-26 11:12:08 +01:00			`<!DOCTYPE html>`
			`<html lang="en">`
			`<head>`
			`<meta charset="UTF-8">`
			`<meta name="viewport" content="width=device-width, initial-scale=1.0">`

			`<title>Pi as CI Integrator: Agents That Fix Their Own Builds — Tinqs Blog</title>`
			`<meta name="description" content="Most coding agents stop at git push. Our Pi fork watches CI, reads failure logs, and fixes its own code until the pipeline goes green.">`
			`<meta name="robots" content="index, follow">`
			`<link rel="canonical" href="https://www.tinqs.com/blog/pi-ci-integrator">`

			`<meta property="og:type" content="article">`
			`<meta property="og:url" content="https://www.tinqs.com/blog/pi-ci-integrator">`
			`<meta property="og:title" content="Pi as CI Integrator: Agents That Fix Their Own Builds">`
			`<meta property="og:description" content="Coding agents that watch CI and fix their own builds.">`
			`<meta property="og:image" content="https://www.tinqs.com/img/og-cover.jpg">`

			`<meta name="twitter:card" content="summary_large_image">`
			`<meta name="twitter:title" content="Pi as CI Integrator: Agents That Fix Their Own Builds">`
			`<meta name="twitter:description" content="Coding agents that watch CI and fix their own builds.">`
			`<meta name="twitter:image" content="https://www.tinqs.com/img/og-cover.jpg">`

			`<script type="application/ld+json">`
			`{`
			`"@context": "https://schema.org",`
			`"@type": "BlogPosting",`
			`"headline": "Pi as CI Integrator: Agents That Fix Their Own Builds",`
			`"datePublished": "2026-05-25",`
			`"author": {`
			`"@type": "Person",`
			`"name": "Ozan Bozkurt"`
			`},`
			`"publisher": {`
			`"@type": "Organization",`
			`"name": "Tinqs Limited",`
			`"url": "https://www.tinqs.com"`
			`},`
			`"description": "Most coding agents stop at git push. Our Pi fork watches CI, reads failure logs, and fixes its own code until the pipeline goes green."`
			`}`
			`</script>`

feat(analytics): add PostHog (EU) to blog templates + rebuild 2026-06-02 22:19:34 +01:00			`<!-- PostHog (EU) -->`
			`<script>`
			!function(t,e){var o,n,p,r;e.__SV\|\|(window.posthog=e,e._i=[],e.init=function(i,s,a){function g(t,e){var o=e.split(".");2==o.length&&(t=t[o[0]],e=o[1]),t[e]=function(){t.push([e].concat(Array.prototype.slice.call(arguments,0)))}}(p=t.createElement("script")).type="text/javascript",p.crossOrigin="anonymous",p.async=!0,p.src=s.api_host.replace(".i.posthog.com","-assets.i.posthog.com")+"/static/array.js",(r=t.getElementsByTagName("script")[0]).parentNode.insertBefore(p,r);var u=e;for(void 0!==a?u=e[a]=[]:a="posthog",u.people=u.people\|\|[],u.toString=function(t){var e="posthog";return"posthog"!==a&&(e+="."+a),t\|\|(e+=" (stub)"),e},u.people.toString=function(){return u.toString(1)+".people (stub)"},o="init capture register register_once register_for_session unregister unregister_for_session getFeatureFlag getFeatureFlagPayload isFeatureEnabled reloadFeatureFlags updateEarlyAccessFeatureEnrollment getEarlyAccessFeatures on onFeatureFlags onSessionId getSurveys getActiveMatchingSurveys renderSurvey canRenderSurvey getNextSurveyStep identify setPersonProperties group resetGroups setPersonPropertiesForFlags resetPersonPropertiesForFlags setGroupPropertiesForFlags resetGroupPropertiesForFlags reset get_distinct_id getGroups get_session_id get_session_replay_url alias set_config startSessionRecording stopSessionRecording sessionRecordingStarted captureException loadToolbar get_property getSessionProperty createPersonProfile opt_in_capturing opt_out_capturing has_opted_in_capturing has_opted_out_capturing clear_opt_in_out_capturing debug".split(" "),n=0;n<o.length;n++)g(u,o[n]);e._i.push([i,s,a])},e.__SV=1)}(document,window.posthog\|\|[]);
			`posthog.init('phc_teG6p5oxf6poQHPThq5AGKzWQNhw4bHW9arLwWAVXm3f',{api_host:'https://eu.i.posthog.com',ui_host:'https://eu.posthog.com',person_profiles:'identified_only',defaults:'2026-01-30'})`
			`</script>`

feat: blog build system + all HTML generated by Pi agent 2026-05-26 11:12:08 +01:00			`<link rel="icon" type="image/svg+xml" href="/img/favicon.svg">`
			`<link rel="preconnect" href="https://fonts.googleapis.com">`
			`<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>`
			`<link href="https://fonts.googleapis.com/css2?family=IBM+Plex+Sans:ital,wght@0,300;0,400;0,500;0,600;0,700;1,300;1,400;1,500;1,600;1,700&display=swap" rel="stylesheet">`
			`<link rel="stylesheet" href="../style.css">`
			`</head>`
			`<body>`

			`<!-- NAV -->`
			`<nav class="nav nav--scrolled" id="nav">`
			`<a href="/" class="nav__logo" aria-label="Tinqs home">`
			`<span class="nav__wordmark">TINQS</span>`
			`</a>`
			`<div class="nav__links">`
			`<a href="/#game" class="nav__link">Games</a>`
			`<a href="/#tech" class="nav__link">Technology</a>`
			`<a href="/#about" class="nav__link">About</a>`
			`<a href="/blog/" class="nav__link" style="color: var(--c-accent-l);">Blog</a>`
			`<a href="/#signup" class="nav__link">Contact</a>`
			`<a href="/press" class="nav__link">Press</a>`
			`</div>`
			`<button class="nav__burger" aria-label="Open menu" id="navBurger">`
			`<span></span><span></span><span></span>`
			`</button>`
			`</nav>`

			`<!-- MOBILE MENU -->`
			`<div class="mobile-menu" id="mobileMenu">`
			`<a href="/#game" class="mobile-menu__link">Games</a>`
			`<a href="/#tech" class="mobile-menu__link">Technology</a>`
			`<a href="/#about" class="mobile-menu__link">About</a>`
			`<a href="/blog/" class="mobile-menu__link">Blog</a>`
			`<a href="/#signup" class="mobile-menu__link">Contact</a>`
			`<a href="/press" class="mobile-menu__link">Press</a>`
			`</div>`

			`<!-- POST -->`
			`<article class="post">`
			`<a href="/blog/" class="post__back">← All Posts</a>`
			`<span class="post__date">25 May 2026</span>`
			`<h1 class="post__title">Pi as CI Integrator: Agents That Fix Their Own Builds</h1>`
			`<p class="post__lead">Most coding agents have a dirty secret: they don't care if the code compiles. They write, they push, they walk away. The human discovers the broken build an hour later. We built a Pi extension that closes the loop — agents that watch CI, read failure logs, and fix their own mistakes.</p>`

			`<div class="post__body">`
			`<h2>The Gap</h2>`
			`<p>Every agent demo looks the same. The AI writes code, commits, pushes. The presenter says "and now we have a pull request!" Cut. End of demo.</p>`
			`<p>What happens next? The CI pipeline runs. Tests fail. Linting screams. The build breaks because someone forgot an import. A human opens the PR, reads the red badge, clicks into the logs, finds the error, fixes it, pushes again. The agent did 90% of the work but left the last 10% — the most tedious part — for a person.</p>`
			`<p>We wanted agents that finish the job.</p>`
			`<h2>The tinqs-ci Extension</h2>`
			`<p>Our <a href="https://tinqs.com/tinqs/pi" style="color: var(–c-accent-l);">Pi fork</a> has a <code>tinqs-ci</code> extension — a single TypeScript file, about 200 lines — that gives the agent three tools:</p>`
			`<ul>`
			`<li><strong>ci_status</strong> — checks the current pipeline state for a branch (pending, running, success, failure)</li>`
			`<li><strong>ci_logs</strong> — fetches the full build log from the most recent failed run</li>`
			`<li><strong>ci_wait</strong> — polls the pipeline every 15 seconds until it finishes, then returns the result</li>`
			`</ul>`
			`<p>These are Gitea Actions API calls under the hood. The agent authenticates with the same PAT it uses for git push. No extra credentials, no special CI service account.</p>`
			`<h2>The Loop</h2>`
			`<p>Here's what a Pi task looks like end to end:</p>`
			`<pre><code>Agent receives task brief`
			`→ reads codebase, plans approach`
			`→ writes code`
			`→ runs local tests (bash tool)`
			`→ commits and pushes branch`
			`→ calls ci_wait`
			`→ CI passes → opens PR via Gitea API`
			`→ CI fails → calls ci_logs`
			`→ reads error output`
			`→ fixes the issue`
			`→ pushes again`
			`→ calls ci_wait again`
			`→ repeats until green (max 3 retries)</code></pre>`
			`<p>The key is that <code>ci_logs</code> returns the raw build output — compiler errors, test failures, lint violations — as plain text in the agent's context. DeepSeek V4 is surprisingly good at reading build logs. It parses a Go compiler error, identifies the file and line, and fixes it. It reads a test assertion failure, understands what the test expected, and corrects the implementation.</p>`
			`<p>Three retries is the hard limit. If the agent can't fix it in three rounds, it opens the PR anyway with a comment explaining what failed and why. A human takes over from there. In practice, most failures resolve on the first retry — it's usually a missing import or a type mismatch.</p>`
			`<h2>What This Actually Looks Like</h2>`
			`<p>A real run from last week. The task: add a health check endpoint to a Go service.</p>`
			`<ul>`
			`<li><strong>Turn 1:</strong> Agent reads the codebase, writes the handler and test, pushes. CI fails — the test imports a package that doesn't exist on the runner.</li>`
			`<li><strong>Turn 2:</strong> Agent reads <code>ci_logs</code>, sees the <code>go: module not found</code> error, adds the missing <code>go.mod</code> replace directive, pushes. CI passes.</li>`
			`<li><strong>Turn 3:</strong> Agent opens PR with passing checks.</li>`
			`</ul>`
			`<p>Total time: 4 minutes. Total cost: $0.06. No human touched the keyboard.</p>`
			`<p>Without the CI extension, this would have been a PR with a red badge and a Slack message saying "hey, the agent's PR is broken again." Someone would have context-switched, opened the logs, seen the trivial error, fixed it, and lost 20 minutes of flow state.</p>`
			`<h2>Why This Matters More Than You Think</h2>`
			`<p>CI integration isn't a feature. It's the difference between an agent that helps and an agent that creates work.</p>`
			`<p>An agent that pushes broken code is worse than no agent at all. It creates a false sense of progress — "the PR is up!" — while actually adding a task to someone's plate. Every broken PR is an interruption. Every interruption costs 15 minutes of context-switching.</p>`
			`<p>An agent that watches CI and fixes its own builds is genuinely autonomous. You submit a task, you walk away, you come back to a green PR ready for review. The agent handled the mechanical iteration that a human would have done anyway — the fix-push-wait-check cycle that eats hours of developer time every week.</p>`
			`<h2>The Guardrail Problem</h2>`
			`<p>Letting an agent retry its own builds sounds dangerous. What if it enters an infinite loop? What if it starts making increasingly wild changes to get the build to pass?</p>`
			`<p>Three safeguards:</p>`
			`<p><strong>Retry limit.</strong> Three attempts maximum. After that, the agent stops and reports. This is a hard limit in the orchestrator, not a suggestion to the model.</p>`
			`<p><strong>Diff budget.</strong> Each retry can only touch files that were already in the original changeset. The agent can't "fix" a build failure by rewriting the test suite or disabling the linter. If the fix requires touching new files, it fails and escalates.</p>`
			`<p><strong>Hallucination detection.</strong> The guardrail extension monitors every turn. If the agent claims "the build passed" without having called <code>ci_status</code> or <code>ci_wait</code>, it gets corrected. Agents are not allowed to guess the CI result.</p>`
			`<h2>The Numbers</h2>`
			`<p>Over three weeks of running the orchestrator:</p>`
			`<ul>`
			`<li><strong>87 tasks</strong> completed end-to-end</li>`
			`<li><strong>23 tasks</strong> needed at least one CI retry (26%)</li>`
			`<li><strong>19 of those 23</strong> resolved on the first retry</li>`
			`<li><strong>4 tasks</strong> hit the retry limit and escalated to a human</li>`
			`<li><strong>0 tasks</strong> produced a merged PR that later broke something else</li>`
			`</ul>`
			`<p>The 26% retry rate tells you how often agents push code that doesn't build on the first try. That's not a bad number — it's the same rate you'd see from a junior developer. The difference is the agent fixes it in 30 seconds instead of 20 minutes.</p>`
			`<hr>`
			`<p><em>The CI extension is part of our <a href="https://tinqs.com/tinqs/pi" style="color: var(–c-accent-l);">Pi fork</a>, which runs inside <a href="https://tinqs.com" style="color: var(–c-accent-l);">Tinqs Studio</a> — a Gitea-based platform for game development with built-in AI agents. The whole thing is MIT licensed.</em></p>`

			`</div>`

			`<div class="post__author">`
			`<div class="post__author-avatar">OB</div>`
			`<div class="post__author-info">`
			`<span class="post__author-name">Ozan Bozkurt</span><br>`
			`CTO & Developer, Tinqs`
			`</div>`
			`</div>`
			`</article>`

			`<!-- FOOTER -->`
			`<footer class="footer">`
			`<div class="footer__inner">`
			`<span class="footer__wordmark">TINQS</span>`
			`<div class="footer__links">`
			`<a href="/#game">Games</a>`
			`<a href="/#tech">Technology</a>`
			`<a href="/#about">About</a>`
			`<a href="/blog/">Blog</a>`
			`<a href="mailto:hello@tinqs.com">hello@tinqs.com</a>`
			`<a href="/press">Press Kit</a>`
			`</div>`
			`<p class="footer__copy">Tinqs Limited — London, est. 2020</p>`
			`</div>`
			`</footer>`

			`<script>`
			`const burger = document.getElementById('navBurger');`
			`const mobileMenu = document.getElementById('mobileMenu');`
			`burger.addEventListener('click', () => {`
			`const open = mobileMenu.classList.toggle('mobile-menu--open');`
			`burger.classList.toggle('nav__burger--open', open);`
			`document.body.style.overflow = open ? 'hidden' : '';`
			`});`
			`mobileMenu.querySelectorAll('a').forEach(link => {`
			`link.addEventListener('click', () => {`
			`mobileMenu.classList.remove('mobile-menu--open');`
			`burger.classList.remove('nav__burger--open');`
			`document.body.style.overflow = '';`
			`});`
			`});`
			`</script>`

			`</body>`
			`</html>`