moved finished notes to own folder; started work on fine tuning

2026-01-09 04:48:16 +01:00
parent b426fd87d2
commit 750f067b75
6 changed files with 297 additions and 52 deletions
--- a/train.ipynb
+++ b/train.ipynb
@@ -0,0 +1,212 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "01aabcdb",
+   "metadata": {},
+   "source": [
+    "import stuff"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "85710d55",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from transformers import (\n",
+    "    AutoTokenizer,\n",
+    "    AutoModelForCausalLM,\n",
+    "    TextIteratorStreamer,\n",
+    ")\n",
+    "from sys import stderr as err\n",
+    "import threading\n",
+    "import torch"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "827268e2",
+   "metadata": {},
+   "source": [
+    "load model and set max tokens to 131072 (using rope yarn thingy whatever)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "f6453597",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Using device: cuda\n",
+      "Loading checkpoint shards: 100%|██████████| 2/2 [00:03<00:00,  1.95s/it]\n",
+      "max_length = 131072\n",
+      "max_embeds = 131072\n"
+     ]
+    }
+   ],
+   "source": [
+    "model_name = \"Qwen/Qwen3-8B-FP8\"\n",
+    "\n",
+    "# 1) Choose device (use CUDA if available)\n",
+    "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
+    "print(\"Using device:\", device, file=err)\n",
+    "\n",
+    "# 2) Load tokenizer and model\n",
+    "tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
+    "\n",
+    "# If GPU and limited VRAM, consider dtype=torch.float16 for half precision\n",
+    "model = AutoModelForCausalLM.from_pretrained(\n",
+    "    model_name,\n",
+    "    dtype=torch.float16 if device.type == \"cuda\" else None,\n",
+    "    device_map=device,\n",
+    ")\n",
+    "\n",
+    "print(\"max_length =\", tokenizer.model_max_length, file=err)\n",
+    "print(\"max_embeds =\", model.config.max_position_embeddings, file=err)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b1699f5e",
+   "metadata": {},
+   "source": [
+    "prep prompt"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0d78c3dd",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "input tokens = 11541\n"
+     ]
+    }
+   ],
+   "source": [
+    "# 3) Prepare chat inputs (tokenized tensors)\n",
+    "prompt = open(\"prompt\").read().strip()\n",
+    "messages = [{\"role\": \"user\", \"content\": prompt}]\n",
+    "inputs = tokenizer.apply_chat_template(\n",
+    "    messages,\n",
+    "    add_generation_prompt=True,\n",
+    "    tokenize=True,\n",
+    "    return_dict=True,\n",
+    "    return_tensors=\"pt\",\n",
+    ")\n",
+    "\n",
+    "num_input_tokens = inputs[\"input_ids\"].shape[1]\n",
+    "tokens = num_input_tokens\n",
+    "print(\"input tokens =\", num_input_tokens, file=err)\n",
+    "\n",
+    "# Move input tensors to the same device as the model\n",
+    "inputs = {k: v.to(device) for k, v in inputs.items()}\n",
+    "\n",
+    "# 4) Create streamer\n",
+    "streamer = TextIteratorStreamer(\n",
+    "    tokenizer, \n",
+    "    skip_prompt=True, \n",
+    "    skip_special_tokens=True\n",
+    ")\n",
+    "\n",
+    "# 5) Start generation in background thread (generate is blocking)\n",
+    "gen_kwargs = dict(\n",
+    "    **inputs,\n",
+    "    max_new_tokens=131072,\n",
+    "    streamer=streamer,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "bc6bf4f8",
+   "metadata": {},
+   "source": [
+    "do inference"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "ad2f8968",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<think>\n",
+      "Okay, let me try to figure out how to transform the given news into the MediaWiki format based on the examples provided. First, I need to understand the structure of the examples to replicate it accurately.\n",
+      "\n",
+      "Looking at the first example, the news is structured with a date header, then under \"New Contents\" there are several list items, each starting with a bold title. Each list item has bullet points with specific details. For instance, \"New Chapter\" has subpoints about the chapter availability, obtainable ships, enemy levels, and level caps. Then there's a section for \"System Optimization\" with numbered points.\n",
+      "\n",
+      "The second example has more sections, like \"Limited Time Event\" and \"New [Skins]\" with different sub-sections. The third example includes \"New Contents\" with various subcategories like \"New Chapter\", \"New gameplay added\", \"New Character\", \"Augment Update\", \"New Memory\", \"FleetChat Update\", \"CV Update\", and \"System Optimization\". Each of these has specific formatting, such as using ShipDisplay templates with parameters, and sometimes tables for skins or furniture.\n",
+      "\n",
+      "Now, the news I need to convert is from January 8, 2026. Let's parse the content step by step.\n",
+      "\n",
+      "Starting with the date: \"Posted on January 8, 2026\" becomes \"==January "
+     ]
+    },
+    {
+     "ename": "KeyboardInterrupt",
+     "evalue": "",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
+      "\u001b[31mKeyboardInterrupt\u001b[39m                         Traceback (most recent call last)",
+      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[7]\u001b[39m\u001b[32m, line 5\u001b[39m\n\u001b[32m      2\u001b[39m thread.start()\n\u001b[32m      4\u001b[39m \u001b[38;5;66;03m# 6) Consume and display streamed text in real time\u001b[39;00m\n\u001b[32m----> \u001b[39m\u001b[32m5\u001b[39m \u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mchunk\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mstreamer\u001b[49m\u001b[43m:\u001b[49m\n\u001b[32m      6\u001b[39m \u001b[43m    \u001b[49m\u001b[43mtokens\u001b[49m\u001b[43m \u001b[49m\u001b[43m+\u001b[49m\u001b[43m=\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mlen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mtokenizer\u001b[49m\u001b[43m.\u001b[49m\u001b[43mencode\u001b[49m\u001b[43m(\u001b[49m\u001b[43mchunk\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43madd_special_tokens\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m      7\u001b[39m \u001b[43m    \u001b[49m\u001b[38;5;28;43mprint\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mchunk\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mend\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mflush\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n",
+      "\u001b[36mFile \u001b[39m\u001b[32m~/inference/.venv/lib/python3.12/site-packages/transformers/generation/streamers.py:226\u001b[39m, in \u001b[36mTextIteratorStreamer.__next__\u001b[39m\u001b[34m(self)\u001b[39m\n\u001b[32m    225\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34m__next__\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[32m--> \u001b[39m\u001b[32m226\u001b[39m     value = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mtext_queue\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m    227\u001b[39m     \u001b[38;5;28;01mif\u001b[39;00m value == \u001b[38;5;28mself\u001b[39m.stop_signal:\n\u001b[32m    228\u001b[39m         \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mStopIteration\u001b[39;00m()\n",
+      "\u001b[36mFile \u001b[39m\u001b[32m/usr/lib/python3.12/queue.py:171\u001b[39m, in \u001b[36mQueue.get\u001b[39m\u001b[34m(self, block, timeout)\u001b[39m\n\u001b[32m    169\u001b[39m \u001b[38;5;28;01melif\u001b[39;00m timeout \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[32m    170\u001b[39m     \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m._qsize():\n\u001b[32m--> \u001b[39m\u001b[32m171\u001b[39m         \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mnot_empty\u001b[49m\u001b[43m.\u001b[49m\u001b[43mwait\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m    172\u001b[39m \u001b[38;5;28;01melif\u001b[39;00m timeout < \u001b[32m0\u001b[39m:\n\u001b[32m    173\u001b[39m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[33m\"\u001b[39m\u001b[33m'\u001b[39m\u001b[33mtimeout\u001b[39m\u001b[33m'\u001b[39m\u001b[33m must be a non-negative number\u001b[39m\u001b[33m\"\u001b[39m)\n",
+      "\u001b[36mFile \u001b[39m\u001b[32m/usr/lib/python3.12/threading.py:355\u001b[39m, in \u001b[36mCondition.wait\u001b[39m\u001b[34m(self, timeout)\u001b[39m\n\u001b[32m    353\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:    \u001b[38;5;66;03m# restore state no matter what (e.g., KeyboardInterrupt)\u001b[39;00m\n\u001b[32m    354\u001b[39m     \u001b[38;5;28;01mif\u001b[39;00m timeout \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m355\u001b[39m         \u001b[43mwaiter\u001b[49m\u001b[43m.\u001b[49m\u001b[43macquire\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m    356\u001b[39m         gotit = \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[32m    357\u001b[39m     \u001b[38;5;28;01melse\u001b[39;00m:\n",
+      "\u001b[31mKeyboardInterrupt\u001b[39m: "
+     ]
+    }
+   ],
+   "source": [
+    "thread = threading.Thread(target=model.generate, kwargs=gen_kwargs)\n",
+    "thread.start()\n",
+    "\n",
+    "# 6) Consume and display streamed text in real time\n",
+    "for chunk in streamer:\n",
+    "    tokens += len(tokenizer.encode(chunk, add_special_tokens=False))\n",
+    "    print(chunk, end=\"\", flush=True)\n",
+    "    # print(tokens, \"/131072 of token limit\", end=\"\\r\", sep=\"\", file=err)\n",
+    "print()\n",
+    "\n",
+    "thread.join()\n",
+    "print()  # final newline"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}