moved finished notes to own folder; started work on fine tuning

2026-01-09 04:48:16 +01:00
parent b426fd87d2
commit 750f067b75
6 changed files with 297 additions and 52 deletions
--- a/gwen.py
+++ b/gwen.py
@@ -1,15 +1,13 @@
-from sys import stderr as err
-from sys import argv
-
 from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    TextIteratorStreamer,
 )
+from sys import argv, stderr as err
 import threading
 import torch

-model_name = "Qwen/Qwen3-8B-FP8" 
+model_name = "Qwen/Qwen3-8B-FP8"

 # 1) Choose device (use CUDA if available)
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
@@ -20,12 +18,13 @@ tokenizer = AutoTokenizer.from_pretrained(model_name)

 # If GPU and limited VRAM, consider dtype=torch.float16 for half precision
 model = AutoModelForCausalLM.from_pretrained(
-    model_name, 
-    dtype=torch.float16 if device.type == "cuda" else None, 
-    device_map=device)
+    model_name,
+    dtype=torch.float16 if device.type == "cuda" else None,
+    device_map=device,
+)

-print("tokenizer.model_max_length =", tokenizer.model_max_length, file=err)
-print("model.config.max_position_embeddings =", model.config.max_position_embeddings, file=err)
+print("max_length =", tokenizer.model_max_length, file=err)
+print("max_embeds =", model.config.max_position_embeddings, file=err)

 # 3) Prepare chat inputs (tokenized tensors)
 if len(argv) > 1:
@@ -41,7 +40,7 @@ inputs = tokenizer.apply_chat_template(
    return_tensors="pt",
 )

-num_input_tokens = inputs["input_ids"].shape[1] 
+num_input_tokens = inputs["input_ids"].shape[1]
 tokens = num_input_tokens
 print("input tokens =", num_input_tokens, file=err)