moved finished notes to own folder; started work on fine tuning

This commit is contained in:
2026-01-09 04:48:16 +01:00
parent b426fd87d2
commit 750f067b75
6 changed files with 297 additions and 52 deletions

212
train.ipynb Normal file
View File

@@ -0,0 +1,212 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "01aabcdb",
"metadata": {},
"source": [
"import stuff"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "85710d55",
"metadata": {},
"outputs": [],
"source": [
"from transformers import (\n",
" AutoTokenizer,\n",
" AutoModelForCausalLM,\n",
" TextIteratorStreamer,\n",
")\n",
"from sys import stderr as err\n",
"import threading\n",
"import torch"
]
},
{
"cell_type": "markdown",
"id": "827268e2",
"metadata": {},
"source": [
"load model and set max tokens to 131072 (using rope yarn thingy whatever)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "f6453597",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Using device: cuda\n",
"Loading checkpoint shards: 100%|██████████| 2/2 [00:03<00:00, 1.95s/it]\n",
"max_length = 131072\n",
"max_embeds = 131072\n"
]
}
],
"source": [
"model_name = \"Qwen/Qwen3-8B-FP8\"\n",
"\n",
"# 1) Choose device (use CUDA if available)\n",
"device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
"print(\"Using device:\", device, file=err)\n",
"\n",
"# 2) Load tokenizer and model\n",
"tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
"\n",
"# If GPU and limited VRAM, consider dtype=torch.float16 for half precision\n",
"model = AutoModelForCausalLM.from_pretrained(\n",
" model_name,\n",
" dtype=torch.float16 if device.type == \"cuda\" else None,\n",
" device_map=device,\n",
")\n",
"\n",
"print(\"max_length =\", tokenizer.model_max_length, file=err)\n",
"print(\"max_embeds =\", model.config.max_position_embeddings, file=err)"
]
},
{
"cell_type": "markdown",
"id": "b1699f5e",
"metadata": {},
"source": [
"prep prompt"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0d78c3dd",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"input tokens = 11541\n"
]
}
],
"source": [
"# 3) Prepare chat inputs (tokenized tensors)\n",
"prompt = open(\"prompt\").read().strip()\n",
"messages = [{\"role\": \"user\", \"content\": prompt}]\n",
"inputs = tokenizer.apply_chat_template(\n",
" messages,\n",
" add_generation_prompt=True,\n",
" tokenize=True,\n",
" return_dict=True,\n",
" return_tensors=\"pt\",\n",
")\n",
"\n",
"num_input_tokens = inputs[\"input_ids\"].shape[1]\n",
"tokens = num_input_tokens\n",
"print(\"input tokens =\", num_input_tokens, file=err)\n",
"\n",
"# Move input tensors to the same device as the model\n",
"inputs = {k: v.to(device) for k, v in inputs.items()}\n",
"\n",
"# 4) Create streamer\n",
"streamer = TextIteratorStreamer(\n",
" tokenizer, \n",
" skip_prompt=True, \n",
" skip_special_tokens=True\n",
")\n",
"\n",
"# 5) Start generation in background thread (generate is blocking)\n",
"gen_kwargs = dict(\n",
" **inputs,\n",
" max_new_tokens=131072,\n",
" streamer=streamer,\n",
")"
]
},
{
"cell_type": "markdown",
"id": "bc6bf4f8",
"metadata": {},
"source": [
"do inference"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "ad2f8968",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<think>\n",
"Okay, let me try to figure out how to transform the given news into the MediaWiki format based on the examples provided. First, I need to understand the structure of the examples to replicate it accurately.\n",
"\n",
"Looking at the first example, the news is structured with a date header, then under \"New Contents\" there are several list items, each starting with a bold title. Each list item has bullet points with specific details. For instance, \"New Chapter\" has subpoints about the chapter availability, obtainable ships, enemy levels, and level caps. Then there's a section for \"System Optimization\" with numbered points.\n",
"\n",
"The second example has more sections, like \"Limited Time Event\" and \"New [Skins]\" with different sub-sections. The third example includes \"New Contents\" with various subcategories like \"New Chapter\", \"New gameplay added\", \"New Character\", \"Augment Update\", \"New Memory\", \"FleetChat Update\", \"CV Update\", and \"System Optimization\". Each of these has specific formatting, such as using ShipDisplay templates with parameters, and sometimes tables for skins or furniture.\n",
"\n",
"Now, the news I need to convert is from January 8, 2026. Let's parse the content step by step.\n",
"\n",
"Starting with the date: \"Posted on January 8, 2026\" becomes \"==January "
]
},
{
"ename": "KeyboardInterrupt",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[31m---------------------------------------------------------------------------\u001b[39m",
"\u001b[31mKeyboardInterrupt\u001b[39m Traceback (most recent call last)",
"\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[7]\u001b[39m\u001b[32m, line 5\u001b[39m\n\u001b[32m 2\u001b[39m thread.start()\n\u001b[32m 4\u001b[39m \u001b[38;5;66;03m# 6) Consume and display streamed text in real time\u001b[39;00m\n\u001b[32m----> \u001b[39m\u001b[32m5\u001b[39m \u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mchunk\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mstreamer\u001b[49m\u001b[43m:\u001b[49m\n\u001b[32m 6\u001b[39m \u001b[43m \u001b[49m\u001b[43mtokens\u001b[49m\u001b[43m \u001b[49m\u001b[43m+\u001b[49m\u001b[43m=\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mlen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mtokenizer\u001b[49m\u001b[43m.\u001b[49m\u001b[43mencode\u001b[49m\u001b[43m(\u001b[49m\u001b[43mchunk\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43madd_special_tokens\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 7\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;28;43mprint\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mchunk\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mend\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mflush\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n",
"\u001b[36mFile \u001b[39m\u001b[32m~/inference/.venv/lib/python3.12/site-packages/transformers/generation/streamers.py:226\u001b[39m, in \u001b[36mTextIteratorStreamer.__next__\u001b[39m\u001b[34m(self)\u001b[39m\n\u001b[32m 225\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34m__next__\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[32m--> \u001b[39m\u001b[32m226\u001b[39m value = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mtext_queue\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 227\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m value == \u001b[38;5;28mself\u001b[39m.stop_signal:\n\u001b[32m 228\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mStopIteration\u001b[39;00m()\n",
"\u001b[36mFile \u001b[39m\u001b[32m/usr/lib/python3.12/queue.py:171\u001b[39m, in \u001b[36mQueue.get\u001b[39m\u001b[34m(self, block, timeout)\u001b[39m\n\u001b[32m 169\u001b[39m \u001b[38;5;28;01melif\u001b[39;00m timeout \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[32m 170\u001b[39m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m._qsize():\n\u001b[32m--> \u001b[39m\u001b[32m171\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mnot_empty\u001b[49m\u001b[43m.\u001b[49m\u001b[43mwait\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 172\u001b[39m \u001b[38;5;28;01melif\u001b[39;00m timeout < \u001b[32m0\u001b[39m:\n\u001b[32m 173\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[33m\"\u001b[39m\u001b[33m'\u001b[39m\u001b[33mtimeout\u001b[39m\u001b[33m'\u001b[39m\u001b[33m must be a non-negative number\u001b[39m\u001b[33m\"\u001b[39m)\n",
"\u001b[36mFile \u001b[39m\u001b[32m/usr/lib/python3.12/threading.py:355\u001b[39m, in \u001b[36mCondition.wait\u001b[39m\u001b[34m(self, timeout)\u001b[39m\n\u001b[32m 353\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m: \u001b[38;5;66;03m# restore state no matter what (e.g., KeyboardInterrupt)\u001b[39;00m\n\u001b[32m 354\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m timeout \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m355\u001b[39m \u001b[43mwaiter\u001b[49m\u001b[43m.\u001b[49m\u001b[43macquire\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 356\u001b[39m gotit = \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[32m 357\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n",
"\u001b[31mKeyboardInterrupt\u001b[39m: "
]
}
],
"source": [
"thread = threading.Thread(target=model.generate, kwargs=gen_kwargs)\n",
"thread.start()\n",
"\n",
"# 6) Consume and display streamed text in real time\n",
"for chunk in streamer:\n",
" tokens += len(tokenizer.encode(chunk, add_special_tokens=False))\n",
" print(chunk, end=\"\", flush=True)\n",
" # print(tokens, \"/131072 of token limit\", end=\"\\r\", sep=\"\", file=err)\n",
"print()\n",
"\n",
"thread.join()\n",
"print() # final newline"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}