<?xml version="1.0" encoding="UTF-8" ?>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
  <channel>
    <title>PACKWOLF Engineering</title>
    <link>https://packwolf.ai/engineering</link>
    <atom:link href="https://packwolf.ai/feed.xml" rel="self" type="application/rss+xml" />
    <description>How we build PACKWOLF. Posts on the agent runtime, observability, and scheduling.</description>
    <language>en-us</language>
    <lastBuildDate>Tue, 21 Apr 2026 00:00:00 GMT</lastBuildDate>
    
    <item>
      <title>Why we wrote our own context-compaction stack</title>
      <link>https://packwolf.ai/engineering/context-compaction</link>
      <guid isPermaLink="true">https://packwolf.ai/engineering/context-compaction</guid>
      <pubDate>Tue, 21 Apr 2026 00:00:00 GMT</pubDate>
      <author>noreply@packwolf.ai (PACKWOLF engineering)</author>
      <category>Context</category>
      <description>Long contexts cost more, run slower, and recall worse on the middle. Compaction keeps prompts focused.</description>
    </item>
    <item>
      <title>Building a flame graph for agent execution</title>
      <link>https://packwolf.ai/engineering/flame-graph-for-agents</link>
      <guid isPermaLink="true">https://packwolf.ai/engineering/flame-graph-for-agents</guid>
      <pubDate>Wed, 18 Mar 2026 00:00:00 GMT</pubDate>
      <author>noreply@packwolf.ai (PACKWOLF engineering)</author>
      <category>Observability</category>
      <description>Agent runtimes have a different failure shape than web apps. The model can emit a tool name with no arguments.</description>
    </item>
    <item>
      <title>A priority queue for shared local LLMs</title>
      <link>https://packwolf.ai/engineering/priority-queue-for-local-llms</link>
      <guid isPermaLink="true">https://packwolf.ai/engineering/priority-queue-for-local-llms</guid>
      <pubDate>Mon, 09 Feb 2026 00:00:00 GMT</pubDate>
      <author>noreply@packwolf.ai (PACKWOLF engineering)</author>
      <category>Models</category>
      <description>Most local inference servers handle one request at a time per model. Concurrent requests cause crashes, model swap thrash, or both.</description>
    </item>
  </channel>
</rss>