{
  "id": "bbg-p0364-ai-models-and-llm-systems",
  "title": "How LLMs See Text",
  "chapter": "ai-models-and-llm-systems",
  "batch": "25",
  "rank": 241,
  "sourcePage": 364,
  "sourcePointer": "p. 364",
  "status": "accepted",
  "reviewerStatus": "reviewed",
  "fidelityScore": 0.9,
  "canvas": {
    "width": 960,
    "height": 640
  },
  "fireworksTechGraph": {
    "style": "style-1-flat-icon",
    "diagramType": "data-flow",
    "topologyNotes": [
      "source page render inspected",
      "extracted page text inspected",
      "source page render and extracted text inspected",
      "preserve preprocessing and tokenization path from raw text to tokens, embeddings, model processing, and output",
      "omit long prose"
    ],
    "publicBoundary": [
      "original vector output",
      "no source pixels",
      "no source mark or long wording"
    ]
  },
  "callouts": [],
  "sourceReview": {
    "conceptAnchors": [
      "concept: preprocessing",
      "concept: tokenization",
      "concept: embeddings",
      "concept: transformer processing",
      "concept: output probabilities"
    ],
    "labelSource": "curated",
    "semanticStatus": "reviewed"
  },
  "groups": [
    {
      "id": "path",
      "label": "Text to model representation",
      "x": 54,
      "y": 150,
      "w": 852,
      "h": 250
    }
  ],
  "shapes": [
    {
      "id": "text",
      "kind": "rect",
      "label": "Raw text",
      "detail": "Hello world",
      "x": 88,
      "y": 228,
      "w": 118,
      "h": 58,
      "tone": "blue"
    },
    {
      "id": "clean",
      "kind": "rect",
      "label": "Preprocess",
      "detail": "normalize",
      "x": 244,
      "y": 228,
      "w": 118,
      "h": 58,
      "tone": "orange"
    },
    {
      "id": "tokens",
      "kind": "rect",
      "label": "Tokens",
      "detail": "subwords",
      "x": 400,
      "y": 228,
      "w": 118,
      "h": 58,
      "tone": "green"
    },
    {
      "id": "embed",
      "kind": "rect",
      "label": "Embeddings",
      "detail": "vectors",
      "x": 556,
      "y": 228,
      "w": 118,
      "h": 58,
      "tone": "purple"
    },
    {
      "id": "model",
      "kind": "hex",
      "label": "Model layers",
      "detail": "attention",
      "x": 710,
      "y": 216,
      "w": 124,
      "h": 82,
      "tone": "teal"
    }
  ],
  "connectors": [
    {
      "from": "text",
      "to": "clean",
      "label": "standardize",
      "flow": "main"
    },
    {
      "from": "clean",
      "to": "tokens",
      "label": "split",
      "flow": "main"
    },
    {
      "from": "tokens",
      "to": "embed",
      "label": "map",
      "flow": "data"
    },
    {
      "from": "embed",
      "to": "model",
      "label": "process",
      "flow": "main"
    }
  ]
}
