moved finished notes to own folder; started work on fine tuning

This commit is contained in:
2026-01-09 04:48:16 +01:00
parent b426fd87d2
commit 750f067b75
6 changed files with 297 additions and 52 deletions

19
gwen.py
View File

@@ -1,15 +1,13 @@
from sys import stderr as err
from sys import argv
from transformers import (
AutoTokenizer,
AutoModelForCausalLM,
TextIteratorStreamer,
)
from sys import argv, stderr as err
import threading
import torch
model_name = "Qwen/Qwen3-8B-FP8"
model_name = "Qwen/Qwen3-8B-FP8"
# 1) Choose device (use CUDA if available)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
@@ -20,12 +18,13 @@ tokenizer = AutoTokenizer.from_pretrained(model_name)
# If GPU and limited VRAM, consider dtype=torch.float16 for half precision
model = AutoModelForCausalLM.from_pretrained(
model_name,
dtype=torch.float16 if device.type == "cuda" else None,
device_map=device)
model_name,
dtype=torch.float16 if device.type == "cuda" else None,
device_map=device,
)
print("tokenizer.model_max_length =", tokenizer.model_max_length, file=err)
print("model.config.max_position_embeddings =", model.config.max_position_embeddings, file=err)
print("max_length =", tokenizer.model_max_length, file=err)
print("max_embeds =", model.config.max_position_embeddings, file=err)
# 3) Prepare chat inputs (tokenized tensors)
if len(argv) > 1:
@@ -41,7 +40,7 @@ inputs = tokenizer.apply_chat_template(
return_tensors="pt",
)
num_input_tokens = inputs["input_ids"].shape[1]
num_input_tokens = inputs["input_ids"].shape[1]
tokens = num_input_tokens
print("input tokens =", num_input_tokens, file=err)