<mxfile host="Electron" agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) draw.io/29.6.6 Chrome/144.0.7559.236 Electron/40.8.4 Safari/537.36" version="29.6.6">
  <diagram name="调度全景" id="sched_overview">
    <mxGraphModel dx="1645" dy="1064" grid="1" gridSize="10" guides="1" tooltips="1" connect="1" arrows="1" fold="1" page="0" pageScale="1" pageWidth="3200" pageHeight="2400" math="0" shadow="0">
      <root>
        <mxCell id="0" />
        <mxCell id="1" parent="0" />
        <mxCell id="2" parent="1" style="text;fontSize=24;fontStyle=1;fillColor=none;strokeColor=none;align=left;" value="vLLM Scheduler — Qwen3VL 64路视频理解 调度全景" vertex="1">
          <mxGeometry height="36" width="900" x="40" y="20" as="geometry" />
        </mxCell>
        <mxCell id="3" parent="1" style="text;fontSize=13;fontStyle=2;fontColor=#666666;fillColor=none;strokeColor=none;align=left;" value="以实际数字演示：64个视频请求如何被调度、如何 Continuous Batching、Prefill 与 Decode 如何组合推理" vertex="1">
          <mxGeometry height="20" width="1100" x="40" y="56" as="geometry" />
        </mxCell>
        <mxCell id="10" parent="1" style="rounded=1;fillColor=#FFF8E1;strokeColor=#F9A825;strokeWidth=2;" value="" vertex="1">
          <mxGeometry height="380" width="980" x="40" y="90" as="geometry" />
        </mxCell>
        <mxCell id="11" parent="1" style="text;fontSize=16;fontStyle=1;fontColor=#E65100;fillColor=none;strokeColor=none;align=left;" value="A. 单个 Qwen3VL 视频请求的 Token 构成" vertex="1">
          <mxGeometry height="26" width="600" x="60" y="100" as="geometry" />
        </mxCell>
        <mxCell id="12" parent="1" style="rounded=1;fillColor=#FFE0B2;strokeColor=#F57C00;fontSize=12;fontStyle=1;align=left;spacingLeft=10;" value="输入: 视频 64 帧, 分辨率 448×448" vertex="1">
          <mxGeometry height="32" width="440" x="60" y="138" as="geometry" />
        </mxCell>
        <mxCell id="13" parent="1" style="rounded=1;fillColor=#FFFFFF;strokeColor=#F57C00;fontSize=12;align=left;spacingLeft=10;whiteSpace=wrap;" value="temporal_patch_size = 2  →  grid_t = 64 / 2 = 32 组&#xa;patch_size = 16  →  grid_h = grid_w = 448 / 16 = 28&#xa;spatial_merge_size = 2  →  每组 merged tokens = (28×28) / 2² = 196" vertex="1">
          <mxGeometry height="72" width="440" x="60" y="182" as="geometry" />
        </mxCell>
        <mxCell id="14" parent="1" style="rounded=1;fillColor=#FFE0B2;strokeColor=#F57C00;fontSize=13;fontStyle=1;align=center;" value="Vision Tokens (video_token_id): 32 × 196 = 6,272" vertex="1">
          <mxGeometry height="32" width="440" x="60" y="266" as="geometry" />
        </mxCell>
        <mxCell id="15" parent="1" style="rounded=1;fillColor=#FFFFFF;strokeColor=#BDBDBD;fontSize=11;align=left;spacingLeft=10;whiteSpace=wrap;" value="时间戳 tokens: ~10 tokens/帧 × 32 组 ≈ 320&#xa;vision_start / vision_end: 2 × 32 = 64&#xa;文本 prompt (问题描述): ~50&#xa;系统/角色 tokens: ~44" vertex="1">
          <mxGeometry height="72" width="440" x="60" y="310" as="geometry" />
        </mxCell>
        <mxCell id="16" parent="1" style="rounded=1;fillColor=#E65100;strokeColor=#BF360C;fontColor=#FFFFFF;fontSize=14;fontStyle=1;align=center;" value="总 prompt ≈ 6,272 + 728 ≈ 7,000 decoder tokens" vertex="1">
          <mxGeometry height="36" width="440" x="60" y="396" as="geometry" />
        </mxCell>
        <mxCell id="17" parent="1" style="rounded=0;fillColor=#FFE0B2;strokeColor=#F57C00;" value="" vertex="1">
          <mxGeometry height="40" width="448" x="540" y="160" as="geometry" />
        </mxCell>
        <mxCell id="18" parent="1" style="text;fontSize=10;fontStyle=1;fontColor=#E65100;fillColor=none;strokeColor=none;align=center;" value="Vision 6272" vertex="1">
          <mxGeometry height="16" width="448" x="540" y="162" as="geometry" />
        </mxCell>
        <mxCell id="19" parent="1" style="rounded=0;fillColor=#E0E0E0;strokeColor=#9E9E9E;" value="" vertex="1">
          <mxGeometry height="40" width="66" x="540" y="200" as="geometry" />
        </mxCell>
        <mxCell id="191" parent="1" style="text;fontSize=9;fontColor=#616161;fillColor=none;strokeColor=none;align=center;" value="时间戳 320" vertex="1">
          <mxGeometry height="14" width="66" x="540" y="204" as="geometry" />
        </mxCell>
        <mxCell id="192" parent="1" style="rounded=0;fillColor=#C8E6C9;strokeColor=#66BB6A;" value="" vertex="1">
          <mxGeometry height="40" width="46" x="606" y="200" as="geometry" />
        </mxCell>
        <mxCell id="193" parent="1" style="text;fontSize=9;fontColor=#2E7D32;fillColor=none;strokeColor=none;align=center;" value="结构 108" vertex="1">
          <mxGeometry height="14" width="46" x="606" y="204" as="geometry" />
        </mxCell>
        <mxCell id="194" parent="1" style="rounded=0;fillColor=#BBDEFB;strokeColor=#42A5F5;" value="" vertex="1">
          <mxGeometry height="40" width="30" x="652" y="200" as="geometry" />
        </mxCell>
        <mxCell id="195" parent="1" style="text;fontSize=9;fontColor=#1565C0;fillColor=none;strokeColor=none;align=center;" value="文本" vertex="1">
          <mxGeometry height="14" width="30" x="652" y="204" as="geometry" />
        </mxCell>
        <mxCell id="196" parent="1" style="text;fontSize=11;fontStyle=2;fontColor=#E65100;fillColor=none;strokeColor=none;align=left;" value="← 占比: Vision 89.6% 远超文本" vertex="1">
          <mxGeometry height="20" width="280" x="700" y="208" as="geometry" />
        </mxCell>
        <mxCell id="197" parent="1" style="rounded=1;fillColor=#FFAB91;strokeColor=#E64A19;fontSize=13;fontStyle=1;align=center;" value="64路总 prompt tokens ≈ 64 × 7000 = 448,000" vertex="1">
          <mxGeometry height="36" width="440" x="540" y="270" as="geometry" />
        </mxCell>
        <mxCell id="198" parent="1" style="rounded=1;fillColor=#F3E5F5;strokeColor=#9C27B0;fontSize=11;align=left;spacingLeft=10;whiteSpace=wrap;" value="Vision Encoder (ViT): 每个视频独立运行一次&#xa;输入: pixel_values [32×2, 3, 448, 448]&#xa;输出: 6272 个 hidden_state embeddings&#xa;运行后缓存在 EncoderCache 中，不重复计算" vertex="1">
          <mxGeometry height="80" width="440" x="540" y="330" as="geometry" />
        </mxCell>
        <mxCell id="30" parent="1" style="rounded=1;fillColor=#E3F2FD;strokeColor=#1976D2;strokeWidth=2;" value="" vertex="1">
          <mxGeometry height="380" width="560" x="1060" y="90" as="geometry" />
        </mxCell>
        <mxCell id="31" parent="1" style="text;fontSize=16;fontStyle=1;fontColor=#0D47A1;fillColor=none;strokeColor=none;align=left;" value="B. 调度器关键参数" vertex="1">
          <mxGeometry height="26" width="400" x="1080" y="100" as="geometry" />
        </mxCell>
        <mxCell id="32" parent="1" style="rounded=1;fillColor=#BBDEFB;strokeColor=#1976D2;fontSize=13;fontStyle=1;align=left;spacingLeft=10;" value="max_num_batched_tokens = 8,192" vertex="1">
          <mxGeometry height="30" width="360" x="1080" y="138" as="geometry" />
        </mxCell>
        <mxCell id="33" parent="1" style="text;fontSize=11;fontColor=#1565C0;fontStyle=2;fillColor=none;strokeColor=none;align=left;" value="每个 Step (一次 forward) 最多调度 8192 个 token" vertex="1">
          <mxGeometry height="18" width="500" x="1080" y="170" as="geometry" />
        </mxCell>
        <mxCell id="34" parent="1" style="rounded=1;fillColor=#BBDEFB;strokeColor=#1976D2;fontSize=13;fontStyle=1;align=left;spacingLeft=10;" value="max_num_seqs = 32" vertex="1">
          <mxGeometry height="30" width="360" x="1080" y="198" as="geometry" />
        </mxCell>
        <mxCell id="35" parent="1" style="text;fontSize=11;fontColor=#1565C0;fontStyle=2;fillColor=none;strokeColor=none;align=left;" value="running 队列最多同时容纳 32 个请求" vertex="1">
          <mxGeometry height="18" width="500" x="1080" y="230" as="geometry" />
        </mxCell>
        <mxCell id="36" parent="1" style="rounded=1;fillColor=#BBDEFB;strokeColor=#1976D2;fontSize=13;fontStyle=1;align=left;spacingLeft=10;" value="enable_chunked_prefill = true" vertex="1">
          <mxGeometry height="30" width="360" x="1080" y="258" as="geometry" />
        </mxCell>
        <mxCell id="37" parent="1" style="text;fontSize=11;fontColor=#1565C0;fontStyle=2;fillColor=none;strokeColor=none;align=left;" value="长 prompt 可分多个 Step 完成 prefill" vertex="1">
          <mxGeometry height="18" width="500" x="1080" y="290" as="geometry" />
        </mxCell>
        <mxCell id="38" parent="1" style="rounded=1;fillColor=#E8EAF6;strokeColor=#3F51B5;fontSize=12;align=left;spacingLeft=10;whiteSpace=wrap;" value="schedule() 每步执行:&#xa;① 先调度 running 中的请求 (decode 优先)&#xa;② 剩余 budget 调度 waiting 中的请求 (prefill)&#xa;③ KV block 不足时 → 抢占低优先级请求&#xa;④ 新请求: waiting → running → 参与 batch" vertex="1">
          <mxGeometry height="100" width="500" x="1080" y="324" as="geometry" />
        </mxCell>
        <mxCell id="50" parent="1" style="rounded=1;fillColor=#FAFAFA;strokeColor=#BDBDBD;" value="" vertex="1">
          <mxGeometry height="200" width="260" x="1660" y="90" as="geometry" />
        </mxCell>
        <mxCell id="51" parent="1" style="text;fontSize=14;fontStyle=1;fillColor=none;strokeColor=none;align=left;" value="图例" vertex="1">
          <mxGeometry height="22" width="80" x="1680" y="96" as="geometry" />
        </mxCell>
        <mxCell id="52" parent="1" style="rounded=1;fillColor=#FFE0B2;strokeColor=#F57C00;" value="" vertex="1">
          <mxGeometry height="20" width="30" x="1680" y="126" as="geometry" />
        </mxCell>
        <mxCell id="53" parent="1" style="text;fontSize=12;fillColor=none;strokeColor=none;align=left;" value="Prefill (首次计算 prompt)" vertex="1">
          <mxGeometry height="20" width="200" x="1720" y="126" as="geometry" />
        </mxCell>
        <mxCell id="54" parent="1" style="rounded=1;fillColor=#BBDEFB;strokeColor=#1976D2;" value="" vertex="1">
          <mxGeometry height="20" width="30" x="1680" y="156" as="geometry" />
        </mxCell>
        <mxCell id="55" parent="1" style="text;fontSize=12;fillColor=none;strokeColor=none;align=left;" value="Decode (逐 token 生成)" vertex="1">
          <mxGeometry height="20" width="200" x="1720" y="156" as="geometry" />
        </mxCell>
        <mxCell id="56" parent="1" style="rounded=1;fillColor=#F5F5F5;strokeColor=#BDBDBD;" value="" vertex="1">
          <mxGeometry height="20" width="30" x="1680" y="186" as="geometry" />
        </mxCell>
        <mxCell id="57" parent="1" style="text;fontSize=12;fillColor=none;strokeColor=none;align=left;" value="Waiting (排队等待)" vertex="1">
          <mxGeometry height="20" width="200" x="1720" y="186" as="geometry" />
        </mxCell>
        <mxCell id="58" parent="1" style="rounded=1;fillColor=#C8E6C9;strokeColor=#4CAF50;" value="" vertex="1">
          <mxGeometry height="20" width="30" x="1680" y="216" as="geometry" />
        </mxCell>
        <mxCell id="59" parent="1" style="text;fontSize=12;fillColor=none;strokeColor=none;align=left;" value="Done (已完成)" vertex="1">
          <mxGeometry height="20" width="200" x="1720" y="216" as="geometry" />
        </mxCell>
        <mxCell id="591" parent="1" style="rounded=1;fillColor=#CE93D8;strokeColor=#8E24AA;" value="" vertex="1">
          <mxGeometry height="20" width="30" x="1680" y="246" as="geometry" />
        </mxCell>
        <mxCell id="592" parent="1" style="text;fontSize=12;fillColor=none;strokeColor=none;align=left;" value="Prefill chunk (分块)" vertex="1">
          <mxGeometry height="20" width="200" x="1720" y="246" as="geometry" />
        </mxCell>
        <mxCell id="60" parent="1" style="text;fontSize=16;fontStyle=1;fillColor=none;strokeColor=none;align=left;" value="C. Token Budget 分配 — 每个 Step 的 8192 token 预算如何使用" vertex="1">
          <mxGeometry height="26" width="900" x="40" y="490" as="geometry" />
        </mxCell>
        <mxCell id="601" parent="1" style="text;fontSize=11;fontStyle=2;fontColor=#757575;fillColor=none;strokeColor=none;align=left;" value="每根横条 = 一次 forward pass 的 8192 token 预算，颜色 = 请求类型，宽度 ∝ token 数" vertex="1">
          <mxGeometry height="18" width="900" x="40" y="516" as="geometry" />
        </mxCell>
        <mxCell id="61" parent="1" style="text;fontSize=13;fontStyle=1;fillColor=none;strokeColor=none;align=right;" value="Step 1" vertex="1">
          <mxGeometry height="44" width="80" x="40" y="548" as="geometry" />
        </mxCell>
        <mxCell id="62" parent="1" style="rounded=1;fillColor=#FFE0B2;strokeColor=#F57C00;fontSize=11;fontStyle=1;align=center;whiteSpace=wrap;" value="R1 Prefill&#xa;7000 tokens" vertex="1">
          <mxGeometry height="44" width="1194" x="130" y="548" as="geometry" />
        </mxCell>
        <mxCell id="63" parent="1" style="rounded=1;fillColor=#CE93D8;strokeColor=#8E24AA;fontSize=11;fontStyle=1;align=center;whiteSpace=wrap;fontColor=#FFFFFF;" value="R2 Prefill&#xa;1192" vertex="1">
          <mxGeometry height="44" width="204" x="1324" y="548" as="geometry" />
        </mxCell>
        <mxCell id="631" parent="1" style="text;fontSize=11;fontStyle=2;fontColor=#8E24AA;fillColor=none;strokeColor=none;align=left;" value="← 2 prefill, 分块" vertex="1">
          <mxGeometry height="18" width="200" x="1540" y="556" as="geometry" />
        </mxCell>
        <mxCell id="64" parent="1" style="text;fontSize=13;fontStyle=1;fillColor=none;strokeColor=none;align=right;" value="Step 4" vertex="1">
          <mxGeometry height="44" width="80" x="40" y="608" as="geometry" />
        </mxCell>
        <mxCell id="65" parent="1" style="rounded=1;fillColor=#BBDEFB;strokeColor=#1976D2;fontSize=10;fontStyle=1;align=center;" value="R1-R3 D×3" vertex="1">
          <mxGeometry height="44" width="60" x="130" y="608" as="geometry" />
        </mxCell>
        <mxCell id="66" parent="1" style="rounded=1;fillColor=#FFE0B2;strokeColor=#F57C00;fontSize=11;fontStyle=1;align=center;" value="R4 Prefill 3427" vertex="1">
          <mxGeometry height="44" width="586" x="190" y="608" as="geometry" />
        </mxCell>
        <mxCell id="67" parent="1" style="rounded=1;fillColor=#CE93D8;strokeColor=#8E24AA;fontSize=11;fontStyle=1;align=center;fontColor=#FFFFFF;" value="R5 Prefill chunk 4762" vertex="1">
          <mxGeometry height="44" width="752" x="776" y="608" as="geometry" />
        </mxCell>
        <mxCell id="671" parent="1" style="text;fontSize=11;fontStyle=2;fontColor=#1976D2;fillColor=none;strokeColor=none;align=left;" value="← 3 decode + 2 prefill 混合" vertex="1">
          <mxGeometry height="18" width="300" x="1540" y="616" as="geometry" />
        </mxCell>
        <mxCell id="68" parent="1" style="text;fontSize=13;fontStyle=1;fillColor=none;strokeColor=none;align=right;" value="Step 8" vertex="1">
          <mxGeometry height="44" width="80" x="40" y="668" as="geometry" />
        </mxCell>
        <mxCell id="69" parent="1" style="rounded=1;fillColor=#BBDEFB;strokeColor=#1976D2;fontSize=10;fontStyle=1;align=center;whiteSpace=wrap;" value="R1-R8&#xa;D×8" vertex="1">
          <mxGeometry height="44" width="60" x="130" y="668" as="geometry" />
        </mxCell>
        <mxCell id="691" parent="1" style="rounded=1;fillColor=#FFE0B2;strokeColor=#F57C00;fontSize=11;fontStyle=1;align=center;" value="R9 Prefill 5678" vertex="1">
          <mxGeometry height="44" width="940" x="190" y="668" as="geometry" />
        </mxCell>
        <mxCell id="692" parent="1" style="rounded=1;fillColor=#CE93D8;strokeColor=#8E24AA;fontSize=11;fontStyle=1;align=center;fontColor=#FFFFFF;" value="R10 chunk 2506" vertex="1">
          <mxGeometry height="44" width="398" x="1130" y="668" as="geometry" />
        </mxCell>
        <mxCell id="693" parent="1" style="text;fontSize=11;fontStyle=2;fontColor=#1976D2;fillColor=none;strokeColor=none;align=left;" value="← 8 decode + 2 prefill" vertex="1">
          <mxGeometry height="18" width="300" x="1540" y="676" as="geometry" />
        </mxCell>
        <mxCell id="70" parent="1" style="text;fontSize=13;fontStyle=1;fillColor=none;strokeColor=none;align=right;fontColor=#4CAF50;" value="Step 40+" vertex="1">
          <mxGeometry height="44" width="90" x="30" y="728" as="geometry" />
        </mxCell>
        <mxCell id="71" parent="1" style="rounded=1;fillColor=#BBDEFB;strokeColor=#1976D2;fontSize=10;fontStyle=1;align=center;" value="R1-R32 &#xa;Decode × 32&#xa; = 32 tokens" vertex="1">
          <mxGeometry height="44" width="100" x="130" y="728" as="geometry" />
        </mxCell>
        <mxCell id="72" parent="1" style="rounded=1;fillColor=#FFE0B2;strokeColor=#F57C00;fontSize=11;fontStyle=1;align=center;" value="R33 Prefill 7000" vertex="1">
          <mxGeometry height="44" width="1100" x="230" y="728" as="geometry" />
        </mxCell>
        <mxCell id="73" parent="1" style="rounded=1;fillColor=#CE93D8;strokeColor=#8E24AA;fontSize=11;fontStyle=1;align=center;fontColor=#FFFFFF;" value="R34 chunk 1160" vertex="1">
          <mxGeometry height="44" width="198" x="1330" y="728" as="geometry" />
        </mxCell>
        <mxCell id="731" parent="1" style="text;fontSize=11;fontStyle=2;fontColor=#4CAF50;fillColor=none;strokeColor=none;align=left;" value="← 32 decode (仅占 0.4%!) + 新 prefill 充分利用 GPU" vertex="1">
          <mxGeometry height="18" width="400" x="1540" y="736" as="geometry" />
        </mxCell>
        <mxCell id="74" parent="1" style="text;fontSize=13;fontStyle=1;fillColor=none;strokeColor=none;align=right;fontColor=#D32F2F;" value="Step 200+" vertex="1">
          <mxGeometry height="44" width="94" x="26" y="788" as="geometry" />
        </mxCell>
        <mxCell id="75" parent="1" style="rounded=1;fillColor=#BBDEFB;strokeColor=#1976D2;fontSize=10;fontStyle=1;align=center;" value="R2-R32 D×31" vertex="1">
          <mxGeometry height="44" width="90" x="130" y="788" as="geometry" />
        </mxCell>
        <mxCell id="76" parent="1" style="rounded=1;fillColor=#90CAF9;strokeColor=#1976D2;fontSize=10;fontStyle=1;align=center;" value="R33-R63 D×31" vertex="1">
          <mxGeometry height="44" width="90" x="220" y="788" as="geometry" />
        </mxCell>
        <mxCell id="77" parent="1" style="rounded=1;fillColor=#FFE0B2;strokeColor=#F57C00;fontSize=11;fontStyle=1;align=center;" value="R64 Prefill 7000" vertex="1">
          <mxGeometry height="44" width="1020" x="310" y="788" as="geometry" />
        </mxCell>
        <mxCell id="771" parent="1" style="rounded=1;fillColor=#C8E6C9;strokeColor=#4CAF50;fontSize=11;fontStyle=1;align=center;" value="R1 完成!" vertex="1">
          <mxGeometry height="44" width="70" x="1330" y="788" as="geometry" />
        </mxCell>
        <mxCell id="772" parent="1" style="rounded=1;fillColor=#CE93D8;strokeColor=#8E24AA;fontSize=10;fontStyle=1;align=center;fontColor=#FFFFFF;" value="new chunk" vertex="1">
          <mxGeometry height="44" width="128" x="1400" y="788" as="geometry" />
        </mxCell>
        <mxCell id="773" parent="1" style="text;fontSize=11;fontStyle=2;fontColor=#D32F2F;fillColor=none;strokeColor=none;align=left;" value="← R1 完成释放 slot → R64 立即进入!" vertex="1">
          <mxGeometry height="18" width="400" x="1540" y="796" as="geometry" />
        </mxCell>
        <mxCell id="78" parent="1" style="text;fontSize=10;fontColor=#9E9E9E;fillColor=none;strokeColor=none;align=center;" value="8192" vertex="1">
          <mxGeometry height="16" width="40" x="130" y="836" as="geometry" />
        </mxCell>
        <mxCell id="79" parent="1" style="text;fontSize=10;fontColor=#9E9E9E;fillColor=none;strokeColor=none;align=center;" value="8192" vertex="1">
          <mxGeometry height="16" width="40" x="1488" y="836" as="geometry" />
        </mxCell>
        <mxCell id="791" parent="1" style="text;fontSize=10;fontColor=#BDBDBD;fillColor=none;strokeColor=none;align=center;" value="← Token Budget →" vertex="1">
          <mxGeometry height="16" width="200" x="680" y="836" as="geometry" />
        </mxCell>
        <mxCell id="100" parent="1" style="text;fontSize=16;fontStyle=1;fillColor=none;strokeColor=none;align=left;" value="D. 请求生命周期 — Continuous Batching 阶梯模式" vertex="1">
          <mxGeometry height="26" width="900" x="40" y="870" as="geometry" />
        </mxCell>
        <mxCell id="101" parent="1" style="text;fontSize=11;fontStyle=2;fontColor=#757575;fillColor=none;strokeColor=none;align=left;" value="每行=一个请求，横轴=调度Step。可以清晰看到&quot;阶梯&quot;模式：decode 请求越多 → 剩余 budget 越多 → 新请求 prefill 越快" vertex="1">
          <mxGeometry height="18" width="1200" x="40" y="896" as="geometry" />
        </mxCell>
        <mxCell id="102" parent="1" style="text;fontSize=11;fontStyle=1;fontColor=#616161;fillColor=none;strokeColor=none;align=center;" value="" vertex="1">
          <mxGeometry height="32" width="90" x="40" y="920" as="geometry" />
        </mxCell>
        <mxCell id="103" parent="1" style="text;fontSize=11;fontStyle=1;fontColor=#616161;fillColor=none;strokeColor=none;align=center;" value="Step 1" vertex="1">
          <mxGeometry height="32" width="120" x="140" y="920" as="geometry" />
        </mxCell>
        <mxCell id="104" parent="1" style="text;fontSize=11;fontStyle=1;fontColor=#616161;fillColor=none;strokeColor=none;align=center;" value="Step 2" vertex="1">
          <mxGeometry height="32" width="120" x="260" y="920" as="geometry" />
        </mxCell>
        <mxCell id="105" parent="1" style="text;fontSize=11;fontStyle=1;fontColor=#616161;fillColor=none;strokeColor=none;align=center;" value="Step 3" vertex="1">
          <mxGeometry height="32" width="120" x="380" y="920" as="geometry" />
        </mxCell>
        <mxCell id="106" parent="1" style="text;fontSize=11;fontStyle=1;fontColor=#616161;fillColor=none;strokeColor=none;align=center;" value="Step 4" vertex="1">
          <mxGeometry height="32" width="120" x="500" y="920" as="geometry" />
        </mxCell>
        <mxCell id="107" parent="1" style="text;fontSize=11;fontStyle=1;fontColor=#616161;fillColor=none;strokeColor=none;align=center;" value="Step 5" vertex="1">
          <mxGeometry height="32" width="120" x="620" y="920" as="geometry" />
        </mxCell>
        <mxCell id="108" parent="1" style="text;fontSize=11;fontStyle=1;fontColor=#616161;fillColor=none;strokeColor=none;align=center;" value="Step 6" vertex="1">
          <mxGeometry height="32" width="120" x="740" y="920" as="geometry" />
        </mxCell>
        <mxCell id="109" parent="1" style="text;fontSize=11;fontStyle=1;fontColor=#616161;fillColor=none;strokeColor=none;align=center;" value="Step 7" vertex="1">
          <mxGeometry height="32" width="120" x="860" y="920" as="geometry" />
        </mxCell>
        <mxCell id="1091" parent="1" style="text;fontSize=11;fontStyle=1;fontColor=#616161;fillColor=none;strokeColor=none;align=center;" value="Step 8" vertex="1">
          <mxGeometry height="32" width="120" x="980" y="920" as="geometry" />
        </mxCell>
        <mxCell id="1092" parent="1" style="text;fontSize=11;fontStyle=1;fontColor=#BDBDBD;fillColor=none;strokeColor=none;align=center;" value="Step 9-12 ..." vertex="1">
          <mxGeometry height="32" width="200" x="1100" y="920" as="geometry" />
        </mxCell>
        <mxCell id="110" parent="1" style="text;fontSize=12;fontStyle=1;fontColor=#333;fillColor=none;strokeColor=none;align=right;" value="R1" vertex="1">
          <mxGeometry height="44" width="90" x="40" y="952" as="geometry" />
        </mxCell>
        <mxCell id="120" parent="1" style="text;fontSize=12;fontStyle=1;fontColor=#333;fillColor=none;strokeColor=none;align=right;" value="R2" vertex="1">
          <mxGeometry height="44" width="90" x="40" y="996" as="geometry" />
        </mxCell>
        <mxCell id="130" parent="1" style="text;fontSize=12;fontStyle=1;fontColor=#333;fillColor=none;strokeColor=none;align=right;" value="R3" vertex="1">
          <mxGeometry height="44" width="90" x="40" y="1040" as="geometry" />
        </mxCell>
        <mxCell id="140" parent="1" style="text;fontSize=12;fontStyle=1;fontColor=#333;fillColor=none;strokeColor=none;align=right;" value="R4" vertex="1">
          <mxGeometry height="44" width="90" x="40" y="1084" as="geometry" />
        </mxCell>
        <mxCell id="150" parent="1" style="text;fontSize=12;fontStyle=1;fontColor=#333;fillColor=none;strokeColor=none;align=right;" value="R5" vertex="1">
          <mxGeometry height="44" width="90" x="40" y="1128" as="geometry" />
        </mxCell>
        <mxCell id="160" parent="1" style="text;fontSize=12;fontStyle=1;fontColor=#333;fillColor=none;strokeColor=none;align=right;" value="R6" vertex="1">
          <mxGeometry height="44" width="90" x="40" y="1172" as="geometry" />
        </mxCell>
        <mxCell id="170" parent="1" style="text;fontSize=12;fontStyle=1;fontColor=#333;fillColor=none;strokeColor=none;align=right;" value="R7" vertex="1">
          <mxGeometry height="44" width="90" x="40" y="1216" as="geometry" />
        </mxCell>
        <mxCell id="180" parent="1" style="text;fontSize=12;fontStyle=1;fontColor=#333;fillColor=none;strokeColor=none;align=right;" value="R8" vertex="1">
          <mxGeometry height="44" width="90" x="40" y="1260" as="geometry" />
        </mxCell>
        <mxCell id="111" parent="1" style="rounded=1;fillColor=#FFE0B2;strokeColor=#F57C00;fontSize=10;fontStyle=1;align=center;" value="P 7000" vertex="1">
          <mxGeometry height="44" width="120" x="140" y="952" as="geometry" />
        </mxCell>
        <mxCell id="112" parent="1" style="rounded=1;fillColor=#BBDEFB;strokeColor=#1976D2;fontSize=10;align=center;" value="D 1" vertex="1">
          <mxGeometry height="44" width="840" x="260" y="952" as="geometry" />
        </mxCell>
        <mxCell id="1121" parent="1" style="text;fontSize=10;fontColor=#1976D2;fontStyle=2;fillColor=none;strokeColor=none;align=left;" value="→ decode 持续到生成完毕 (约200步)" vertex="1">
          <mxGeometry height="16" width="300" x="1110" y="958" as="geometry" />
        </mxCell>
        <mxCell id="121" parent="1" style="rounded=1;fillColor=#CE93D8;strokeColor=#8E24AA;fontSize=10;fontStyle=1;align=center;fontColor=#FFF;" value="P 1192" vertex="1">
          <mxGeometry height="44" width="120" x="140" y="996" as="geometry" />
        </mxCell>
        <mxCell id="122" parent="1" style="rounded=1;fillColor=#FFE0B2;strokeColor=#F57C00;fontSize=10;fontStyle=1;align=center;" value="P 5808" vertex="1">
          <mxGeometry height="44" width="120" x="260" y="996" as="geometry" />
        </mxCell>
        <mxCell id="123" parent="1" style="rounded=1;fillColor=#BBDEFB;strokeColor=#1976D2;fontSize=10;align=center;" value="D 1" vertex="1">
          <mxGeometry height="44" width="720" x="380" y="996" as="geometry" />
        </mxCell>
        <mxCell id="1231" parent="1" style="text;fontSize=10;fontColor=#8E24AA;fontStyle=2;fillColor=none;strokeColor=none;align=left;" value="← 分块 prefill: step1=1192, step2=5808" vertex="1">
          <mxGeometry height="16" width="300" x="1110" y="1002" as="geometry" />
        </mxCell>
        <mxCell id="131" parent="1" style="rounded=1;fillColor=#F5F5F5;strokeColor=#E0E0E0;fontSize=10;align=center;" value="" vertex="1">
          <mxGeometry height="44" width="120" x="140" y="1040" as="geometry" />
        </mxCell>
        <mxCell id="132" parent="1" style="rounded=1;fillColor=#CE93D8;strokeColor=#8E24AA;fontSize=10;fontStyle=1;align=center;fontColor=#FFF;" value="P 2383" vertex="1">
          <mxGeometry height="44" width="120" x="260" y="1040" as="geometry" />
        </mxCell>
        <mxCell id="133" parent="1" style="rounded=1;fillColor=#FFE0B2;strokeColor=#F57C00;fontSize=10;fontStyle=1;align=center;" value="P 4617" vertex="1">
          <mxGeometry height="44" width="120" x="380" y="1040" as="geometry" />
        </mxCell>
        <mxCell id="134" parent="1" style="rounded=1;fillColor=#BBDEFB;strokeColor=#1976D2;fontSize=10;align=center;" value="D 1" vertex="1">
          <mxGeometry height="44" width="600" x="500" y="1040" as="geometry" />
        </mxCell>
        <mxCell id="141" parent="1" style="rounded=1;fillColor=#F5F5F5;strokeColor=#E0E0E0;fontSize=10;align=center;" value="" vertex="1">
          <mxGeometry height="44" width="240" x="140" y="1084" as="geometry" />
        </mxCell>
        <mxCell id="142" parent="1" style="rounded=1;fillColor=#CE93D8;strokeColor=#8E24AA;fontSize=10;fontStyle=1;align=center;fontColor=#FFF;" value="P 3573" vertex="1">
          <mxGeometry height="44" width="120" x="380" y="1084" as="geometry" />
        </mxCell>
        <mxCell id="143" parent="1" style="rounded=1;fillColor=#FFE0B2;strokeColor=#F57C00;fontSize=10;fontStyle=1;align=center;" value="P 3427" vertex="1">
          <mxGeometry height="44" width="120" x="500" y="1084" as="geometry" />
        </mxCell>
        <mxCell id="144" parent="1" style="rounded=1;fillColor=#BBDEFB;strokeColor=#1976D2;fontSize=10;align=center;" value="D 1" vertex="1">
          <mxGeometry height="44" width="480" x="620" y="1084" as="geometry" />
        </mxCell>
        <mxCell id="151" parent="1" style="rounded=1;fillColor=#F5F5F5;strokeColor=#E0E0E0;fontSize=10;align=center;" value="" vertex="1">
          <mxGeometry height="44" width="360" x="140" y="1128" as="geometry" />
        </mxCell>
        <mxCell id="152" parent="1" style="rounded=1;fillColor=#CE93D8;strokeColor=#8E24AA;fontSize=10;fontStyle=1;align=center;fontColor=#FFF;" value="P 4762" vertex="1">
          <mxGeometry height="44" width="120" x="500" y="1128" as="geometry" />
        </mxCell>
        <mxCell id="153" parent="1" style="rounded=1;fillColor=#FFE0B2;strokeColor=#F57C00;fontSize=10;fontStyle=1;align=center;" value="P 2238" vertex="1">
          <mxGeometry height="44" width="120" x="620" y="1128" as="geometry" />
        </mxCell>
        <mxCell id="154" parent="1" style="rounded=1;fillColor=#BBDEFB;strokeColor=#1976D2;fontSize=10;align=center;" value="D 1" vertex="1">
          <mxGeometry height="44" width="360" x="740" y="1128" as="geometry" />
        </mxCell>
        <mxCell id="161" parent="1" style="rounded=1;fillColor=#F5F5F5;strokeColor=#E0E0E0;fontSize=10;align=center;" value="" vertex="1">
          <mxGeometry height="44" width="480" x="140" y="1172" as="geometry" />
        </mxCell>
        <mxCell id="162" parent="1" style="rounded=1;fillColor=#CE93D8;strokeColor=#8E24AA;fontSize=10;fontStyle=1;align=center;fontColor=#FFF;" value="P 5950" vertex="1">
          <mxGeometry height="44" width="120" x="620" y="1172" as="geometry" />
        </mxCell>
        <mxCell id="163" parent="1" style="rounded=1;fillColor=#FFE0B2;strokeColor=#F57C00;fontSize=10;fontStyle=1;align=center;" value="P 1050" vertex="1">
          <mxGeometry height="44" width="120" x="740" y="1172" as="geometry" />
        </mxCell>
        <mxCell id="164" parent="1" style="rounded=1;fillColor=#BBDEFB;strokeColor=#1976D2;fontSize=10;align=center;" value="D 1" vertex="1">
          <mxGeometry height="44" width="240" x="860" y="1172" as="geometry" />
        </mxCell>
        <mxCell id="171" parent="1" style="rounded=1;fillColor=#F5F5F5;strokeColor=#E0E0E0;fontSize=10;align=center;" value="" vertex="1">
          <mxGeometry height="44" width="600" x="140" y="1216" as="geometry" />
        </mxCell>
        <mxCell id="172" parent="1" style="rounded=1;fillColor=#FFE0B2;strokeColor=#F57C00;fontSize=10;fontStyle=1;align=center;" value="P 7000" vertex="1">
          <mxGeometry height="44" width="120" x="740" y="1216" as="geometry" />
        </mxCell>
        <mxCell id="173" parent="1" style="rounded=1;fillColor=#BBDEFB;strokeColor=#1976D2;fontSize=10;align=center;" value="D 1" vertex="1">
          <mxGeometry height="44" width="240" x="860" y="1216" as="geometry" />
        </mxCell>
        <mxCell id="1731" parent="1" style="text;fontSize=10;fontColor=#4CAF50;fontStyle=2;fillColor=none;strokeColor=none;align=left;" value="← Step 6 budget 充裕, R7 一次 prefill 完成!" vertex="1">
          <mxGeometry height="16" width="340" x="1110" y="1222" as="geometry" />
        </mxCell>
        <mxCell id="181" parent="1" style="rounded=1;fillColor=#F5F5F5;strokeColor=#E0E0E0;fontSize=10;align=center;" value="" vertex="1">
          <mxGeometry height="44" width="600" x="140" y="1260" as="geometry" />
        </mxCell>
        <mxCell id="182" parent="1" style="rounded=1;fillColor=#CE93D8;strokeColor=#8E24AA;fontSize=9;fontStyle=1;align=center;fontColor=#FFF;" value="P 137" vertex="1">
          <mxGeometry height="44" width="120" x="740" y="1260" as="geometry" />
        </mxCell>
        <mxCell id="183" parent="1" style="rounded=1;fillColor=#FFE0B2;strokeColor=#F57C00;fontSize=10;fontStyle=1;align=center;" value="P 6863" vertex="1">
          <mxGeometry height="44" width="120" x="860" y="1260" as="geometry" />
        </mxCell>
        <mxCell id="184" parent="1" style="rounded=1;fillColor=#BBDEFB;strokeColor=#1976D2;fontSize=10;align=center;" value="D" vertex="1">
          <mxGeometry height="44" width="120" x="980" y="1260" as="geometry" />
        </mxCell>
        <mxCell id="185" parent="1" style="text;fontSize=12;fontStyle=1;fontColor=#BDBDBD;fillColor=none;strokeColor=none;align=right;" value="R9" vertex="1">
          <mxGeometry height="30" width="90" x="40" y="1304" as="geometry" />
        </mxCell>
        <mxCell id="186" parent="1" style="text;fontSize=16;fontStyle=1;fontColor=#BDBDBD;fillColor=none;strokeColor=none;align=center;" value="..." vertex="1">
          <mxGeometry height="30" width="90" x="40" y="1334" as="geometry" />
        </mxCell>
        <mxCell id="187" parent="1" style="text;fontSize=12;fontStyle=1;fontColor=#BDBDBD;fillColor=none;strokeColor=none;align=right;" value="R64" vertex="1">
          <mxGeometry height="30" width="90" x="40" y="1364" as="geometry" />
        </mxCell>
        <mxCell id="188" parent="1" style="rounded=1;fillColor=#FFF9C4;strokeColor=#F9A825;fontSize=11;align=left;spacingLeft=10;whiteSpace=wrap;fontStyle=2;" value="R9-R32 以相同的阶梯模式逐步进入 running 队列 (约 step 7-40)&#xa;R33-R64 在 waiting 中等待 → 当 R1-R32 开始完成时才有 slot 空出&#xa;max_num_seqs=32 限制了同时运行的请求数" vertex="1">
          <mxGeometry height="76" width="800" x="140" y="1308" as="geometry" />
        </mxCell>
        <mxCell id="189" parent="1" style="text;fontSize=11;fontStyle=1;fontColor=#4CAF50;fillColor=none;strokeColor=none;align=right;" value="Batch" vertex="1">
          <mxGeometry height="30" width="90" x="40" y="1396" as="geometry" />
        </mxCell>
        <mxCell id="1891" parent="1" style="rounded=1;fillColor=#E8F5E9;strokeColor=#66BB6A;fontSize=12;fontStyle=1;align=center;" value="2" vertex="1">
          <mxGeometry height="30" width="120" x="140" y="1396" as="geometry" />
        </mxCell>
        <mxCell id="1892" parent="1" style="rounded=1;fillColor=#E8F5E9;strokeColor=#66BB6A;fontSize=12;fontStyle=1;align=center;" value="3" vertex="1">
          <mxGeometry height="30" width="120" x="260" y="1396" as="geometry" />
        </mxCell>
        <mxCell id="1893" parent="1" style="rounded=1;fillColor=#E8F5E9;strokeColor=#66BB6A;fontSize=12;fontStyle=1;align=center;" value="4" vertex="1">
          <mxGeometry height="30" width="120" x="380" y="1396" as="geometry" />
        </mxCell>
        <mxCell id="1894" parent="1" style="rounded=1;fillColor=#E8F5E9;strokeColor=#66BB6A;fontSize=12;fontStyle=1;align=center;" value="5" vertex="1">
          <mxGeometry height="30" width="120" x="500" y="1396" as="geometry" />
        </mxCell>
        <mxCell id="1895" parent="1" style="rounded=1;fillColor=#E8F5E9;strokeColor=#66BB6A;fontSize=12;fontStyle=1;align=center;" value="6" vertex="1">
          <mxGeometry height="30" width="120" x="620" y="1396" as="geometry" />
        </mxCell>
        <mxCell id="1896" parent="1" style="rounded=1;fillColor=#C8E6C9;strokeColor=#4CAF50;fontSize=12;fontStyle=1;align=center;" value="8" vertex="1">
          <mxGeometry height="30" width="120" x="740" y="1396" as="geometry" />
        </mxCell>
        <mxCell id="1897" parent="1" style="rounded=1;fillColor=#C8E6C9;strokeColor=#4CAF50;fontSize=12;fontStyle=1;align=center;" value="9" vertex="1">
          <mxGeometry height="30" width="120" x="860" y="1396" as="geometry" />
        </mxCell>
        <mxCell id="1898" parent="1" style="rounded=1;fillColor=#C8E6C9;strokeColor=#4CAF50;fontSize=12;fontStyle=1;align=center;" value="10" vertex="1">
          <mxGeometry height="30" width="120" x="980" y="1396" as="geometry" />
        </mxCell>
        <mxCell id="1899" parent="1" style="text;fontSize=11;fontStyle=2;fontColor=#4CAF50;fillColor=none;strokeColor=none;align=left;" value="→ 32 (满)" vertex="1">
          <mxGeometry height="30" width="150" x="1110" y="1396" as="geometry" />
        </mxCell>
        <mxCell id="190" edge="1" parent="1" style="endArrow=none;dashed=1;strokeColor=#E0E0E0;" value="">
          <mxGeometry relative="1" as="geometry">
            <mxPoint x="260" y="940" as="sourcePoint" />
            <mxPoint x="260" y="1430" as="targetPoint" />
          </mxGeometry>
        </mxCell>
        <mxCell id="1901" edge="1" parent="1" style="endArrow=none;dashed=1;strokeColor=#E0E0E0;" value="">
          <mxGeometry relative="1" as="geometry">
            <mxPoint x="380" y="940" as="sourcePoint" />
            <mxPoint x="380" y="1430" as="targetPoint" />
          </mxGeometry>
        </mxCell>
        <mxCell id="1902" edge="1" parent="1" style="endArrow=none;dashed=1;strokeColor=#E0E0E0;" value="">
          <mxGeometry relative="1" as="geometry">
            <mxPoint x="500" y="940" as="sourcePoint" />
            <mxPoint x="500" y="1430" as="targetPoint" />
          </mxGeometry>
        </mxCell>
        <mxCell id="1903" edge="1" parent="1" style="endArrow=none;dashed=1;strokeColor=#E0E0E0;" value="">
          <mxGeometry relative="1" as="geometry">
            <mxPoint x="620" y="940" as="sourcePoint" />
            <mxPoint x="620" y="1430" as="targetPoint" />
          </mxGeometry>
        </mxCell>
        <mxCell id="1904" edge="1" parent="1" style="endArrow=none;dashed=1;strokeColor=#E0E0E0;" value="">
          <mxGeometry relative="1" as="geometry">
            <mxPoint x="740" y="940" as="sourcePoint" />
            <mxPoint x="740" y="1430" as="targetPoint" />
          </mxGeometry>
        </mxCell>
        <mxCell id="1905" edge="1" parent="1" style="endArrow=none;dashed=1;strokeColor=#E0E0E0;" value="">
          <mxGeometry relative="1" as="geometry">
            <mxPoint x="860" y="940" as="sourcePoint" />
            <mxPoint x="860" y="1430" as="targetPoint" />
          </mxGeometry>
        </mxCell>
        <mxCell id="1906" edge="1" parent="1" style="endArrow=none;dashed=1;strokeColor=#E0E0E0;" value="">
          <mxGeometry relative="1" as="geometry">
            <mxPoint x="980" y="940" as="sourcePoint" />
            <mxPoint x="980" y="1430" as="targetPoint" />
          </mxGeometry>
        </mxCell>
        <mxCell id="1907" edge="1" parent="1" style="endArrow=none;dashed=1;strokeColor=#E0E0E0;" value="">
          <mxGeometry relative="1" as="geometry">
            <mxPoint x="1100" y="940" as="sourcePoint" />
            <mxPoint x="1100" y="1430" as="targetPoint" />
          </mxGeometry>
        </mxCell>
        <mxCell id="1908" edge="1" parent="1" style="endArrow=classic;strokeColor=#D32F2F;strokeWidth=2;dashed=1;" value="">
          <mxGeometry relative="1" as="geometry">
            <mxPoint x="140" y="974" as="sourcePoint" />
            <mxPoint x="980" y="1282" as="targetPoint" />
          </mxGeometry>
        </mxCell>
        <mxCell id="1909" parent="1" style="text;fontSize=11;fontStyle=1;fontColor=#D32F2F;fillColor=none;strokeColor=none;align=left;rotation=-20;" value="&quot;阶梯&quot;模式：&#xa;新请求逐步进入" vertex="1">
          <mxGeometry height="36" width="160" x="480" y="1070" as="geometry" />
        </mxCell>
        <mxCell id="200" parent="1" style="text;fontSize=16;fontStyle=1;fillColor=none;strokeColor=none;align=left;" value="E. 关键洞察 — 为什么 Continuous Batching 如此重要" vertex="1">
          <mxGeometry height="26" width="900" x="40" y="1450" as="geometry" />
        </mxCell>
        <mxCell id="201" parent="1" style="rounded=1;fillColor=#E8F5E9;strokeColor=#4CAF50;strokeWidth=2;" value="" vertex="1">
          <mxGeometry height="170" width="460" x="40" y="1486" as="geometry" />
        </mxCell>
        <mxCell id="202" parent="1" style="text;fontSize=14;fontStyle=1;fontColor=#2E7D32;fillColor=none;strokeColor=none;align=left;" value="Continuous Batching 核心机制" vertex="1">
          <mxGeometry height="22" width="400" x="56" y="1492" as="geometry" />
        </mxCell>
        <mxCell id="203" parent="1" style="text;fontSize=12;fontColor=#333;fillColor=none;strokeColor=none;align=left;whiteSpace=wrap;" value="1. 请求随到随加：新请求可以在任意 Step 插入 batch&#xa;2. 完成即走：请求生成完毕后立即释放 KV 和 slot&#xa;3. Prefill+Decode 混合：同一 batch 中既有首次计算&#xa;   的请求，也有正在生成的请求&#xa;4. Token Budget 自动平衡：decode 请求越多(每个仅1&#xa;   token)，剩余 budget 越大，新 prefill 越快" vertex="1">
          <mxGeometry height="120" width="430" x="56" y="1518" as="geometry" />
        </mxCell>
        <mxCell id="204" parent="1" style="rounded=1;fillColor=#FFF3E0;strokeColor=#FF9800;strokeWidth=2;" value="" vertex="1">
          <mxGeometry height="170" width="460" x="520" y="1486" as="geometry" />
        </mxCell>
        <mxCell id="205" parent="1" style="text;fontSize=14;fontStyle=1;fontColor=#E65100;fillColor=none;strokeColor=none;align=left;" value="64路视频的完整调度过程" vertex="1">
          <mxGeometry height="22" width="400" x="536" y="1492" as="geometry" />
        </mxCell>
        <mxCell id="206" parent="1" style="text;fontSize=12;fontColor=#333;fillColor=none;strokeColor=none;align=left;whiteSpace=wrap;" value="Phase 1 (step 1-40): 阶梯式接入&#xa;  → R1-R32 逐步从 waiting 进入 running&#xa;  → 每步混合 prefill chunk + decode tokens&#xa;Phase 2 (step 41-200): 全速 decode&#xa;  → 32 个请求全部在 decode，每步 32 tokens&#xa;  → 剩余 8160 budget 给新 prefill (R33-R64 排队中)&#xa;Phase 3 (step 200+): 请求轮替&#xa;  → R1 完成 → R33 立即进入 → 循环直到 R64" vertex="1">
          <mxGeometry height="130" width="430" x="536" y="1518" as="geometry" />
        </mxCell>
        <mxCell id="207" parent="1" style="rounded=1;fillColor=#FCE4EC;strokeColor=#E91E63;strokeWidth=2;" value="" vertex="1">
          <mxGeometry height="170" width="460" x="1000" y="1486" as="geometry" />
        </mxCell>
        <mxCell id="208" parent="1" style="text;fontSize=14;fontStyle=1;fontColor=#C62828;fillColor=none;strokeColor=none;align=left;" value="对比：如果没有 Continuous Batching" vertex="1">
          <mxGeometry height="22" width="400" x="1016" y="1492" as="geometry" />
        </mxCell>
        <mxCell id="209" parent="1" style="text;fontSize=12;fontColor=#333;fillColor=none;strokeColor=none;align=left;whiteSpace=wrap;" value="Static Batching 的问题:&#xa;① 必须等 batch 内所有请求都完成才能处理下一批&#xa;② 短请求空等长请求 → GPU 空转浪费&#xa;③ 新请求必须等当前 batch 完全结束&#xa;&#xa;对比: 64 × 7000 tokens&#xa;  Static: 需要 2 批 × (7000 + 200) = 约 14400 步&#xa;  Continuous: 阶梯式并行, 约 440 步全部完成 (33x加速)" vertex="1">
          <mxGeometry height="130" width="430" x="1016" y="1518" as="geometry" />
        </mxCell>
        <mxCell id="300" parent="1" style="text;fontSize=16;fontStyle=1;fillColor=none;strokeColor=none;align=left;" value="F. 每个 Step 的调度决策流程" vertex="1">
          <mxGeometry height="26" width="600" x="40" y="1680" as="geometry" />
        </mxCell>
        <mxCell id="301" parent="1" style="rounded=1;fillColor=#E3F2FD;strokeColor=#1976D2;fontSize=12;fontStyle=1;align=center;" value="token_budget = 8192" vertex="1">
          <mxGeometry height="36" width="210" x="40" y="1720" as="geometry" />
        </mxCell>
        <mxCell id="302" parent="1" style="rounded=1;fillColor=#BBDEFB;strokeColor=#1976D2;fontSize=12;fontStyle=1;align=center;" value="遍历 running 队列" vertex="1">
          <mxGeometry height="36" width="200" x="290" y="1720" as="geometry" />
        </mxCell>
        <mxCell id="3021" edge="1" parent="1" source="301" style="edgeStyle=orthogonalEdgeStyle;" target="302">
          <mxGeometry relative="1" as="geometry" />
        </mxCell>
        <mxCell id="303" parent="1" style="rounded=1;fillColor=#E8EAF6;strokeColor=#5C6BC0;fontSize=11;align=left;spacingLeft=8;whiteSpace=wrap;" value="每个 request:&#xa;num_new = target - computed&#xa;→ decode: 通常 = 1&#xa;→ prefill 续: 可能 = 数千&#xa;budget -= num_new" vertex="1">
          <mxGeometry height="90" width="200" x="290" y="1770" as="geometry" />
        </mxCell>
        <mxCell id="3031" edge="1" parent="1" source="302" style="edgeStyle=orthogonalEdgeStyle;" target="303">
          <mxGeometry relative="1" as="geometry" />
        </mxCell>
        <mxCell id="304" parent="1" style="rhombus;fillColor=#FFCDD2;strokeColor=#E53935;fontSize=11;fontStyle=1;align=center;whiteSpace=wrap;" value="KV block&#xa;分配失败?" vertex="1">
          <mxGeometry height="80" width="120" x="530" y="1720" as="geometry" />
        </mxCell>
        <mxCell id="3041" edge="1" parent="1" source="302" style="edgeStyle=orthogonalEdgeStyle;" target="304">
          <mxGeometry relative="1" as="geometry" />
        </mxCell>
        <mxCell id="305" parent="1" style="rounded=1;fillColor=#FFCDD2;strokeColor=#E53935;fontSize=10;align=center;whiteSpace=wrap;" value="抢占最低优先级&#xa;请求 → 释放 blocks&#xa;→ 重试分配" vertex="1">
          <mxGeometry height="60" width="120" x="530" y="1820" as="geometry" />
        </mxCell>
        <mxCell id="3051" edge="1" parent="1" source="304" style="edgeStyle=orthogonalEdgeStyle;" target="305" value="是">
          <mxGeometry relative="1" as="geometry" />
        </mxCell>
        <mxCell id="306" parent="1" style="rounded=1;fillColor=#FFE0B2;strokeColor=#F57C00;fontSize=12;fontStyle=1;align=center;whiteSpace=wrap;" value="遍历 waiting 队列&#xa;(budget &gt; 0 且 running &lt; 32)" vertex="1">
          <mxGeometry height="42" width="250" x="690" y="1720" as="geometry" />
        </mxCell>
        <mxCell id="3061" edge="1" parent="1" source="304" style="edgeStyle=orthogonalEdgeStyle;" target="306" value="否">
          <mxGeometry relative="1" as="geometry" />
        </mxCell>
        <mxCell id="307" parent="1" style="rounded=1;fillColor=#FFF3E0;strokeColor=#FF9800;fontSize=11;align=left;spacingLeft=8;whiteSpace=wrap;" value="每个新 request:&#xa;num_new = min(prompt_len, budget)&#xa;→ chunked prefill 或 full prefill&#xa;分配 KV blocks&#xa;移入 running 队列&#xa;budget -= num_new" vertex="1">
          <mxGeometry height="100" width="250" x="690" y="1776" as="geometry" />
        </mxCell>
        <mxCell id="3071" edge="1" parent="1" source="306" style="edgeStyle=orthogonalEdgeStyle;" target="307">
          <mxGeometry relative="1" as="geometry" />
        </mxCell>
        <mxCell id="308" parent="1" style="rounded=1;fillColor=#C8E6C9;strokeColor=#4CAF50;fontSize=12;fontStyle=1;align=center;whiteSpace=wrap;" value="输出 SchedulerOutput&#xa;{req_id: num_tokens}&#xa;→ GPU forward" vertex="1">
          <mxGeometry height="56" width="210" x="980" y="1720" as="geometry" />
        </mxCell>
        <mxCell id="3081" edge="1" parent="1" source="306" style="edgeStyle=orthogonalEdgeStyle;" target="308">
          <mxGeometry relative="1" as="geometry" />
        </mxCell>
        <mxCell id="309" parent="1" style="rounded=1;fillColor=#E8F5E9;strokeColor=#66BB6A;fontSize=11;align=center;whiteSpace=wrap;" value="update_from_output()&#xa;完成的请求 → 释放 KV&#xa;未完成的 → 继续下一 Step" vertex="1">
          <mxGeometry height="56" width="210" x="980" y="1796" as="geometry" />
        </mxCell>
        <mxCell id="3091" edge="1" parent="1" source="308" style="edgeStyle=orthogonalEdgeStyle;" target="309">
          <mxGeometry relative="1" as="geometry" />
        </mxCell>
        <mxCell id="310" edge="1" parent="1" style="endArrow=classic;dashed=1;strokeColor=#4CAF50;strokeWidth=2;" value="">
          <mxGeometry relative="1" as="geometry">
            <mxPoint x="1085" y="1852" as="sourcePoint" />
            <mxPoint x="1085" y="1900" as="targetPoint" />
          </mxGeometry>
        </mxCell>
        <mxCell id="311" parent="1" style="text;fontSize=11;fontStyle=2;fontColor=#4CAF50;fillColor=none;strokeColor=none;align=left;" value="→ 回到 Step 开头，循环" vertex="1">
          <mxGeometry height="20" width="300" x="1100" y="1870" as="geometry" />
        </mxCell>
      </root>
    </mxGraphModel>
  </diagram>
</mxfile>
