moved finished notes to own folder; started work on fine tuning
This commit is contained in:
19
gwen.py
19
gwen.py
@@ -1,15 +1,13 @@
|
||||
from sys import stderr as err
|
||||
from sys import argv
|
||||
|
||||
from transformers import (
|
||||
AutoTokenizer,
|
||||
AutoModelForCausalLM,
|
||||
TextIteratorStreamer,
|
||||
)
|
||||
from sys import argv, stderr as err
|
||||
import threading
|
||||
import torch
|
||||
|
||||
model_name = "Qwen/Qwen3-8B-FP8"
|
||||
model_name = "Qwen/Qwen3-8B-FP8"
|
||||
|
||||
# 1) Choose device (use CUDA if available)
|
||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
@@ -20,12 +18,13 @@ tokenizer = AutoTokenizer.from_pretrained(model_name)
|
||||
|
||||
# If GPU and limited VRAM, consider dtype=torch.float16 for half precision
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
model_name,
|
||||
dtype=torch.float16 if device.type == "cuda" else None,
|
||||
device_map=device)
|
||||
model_name,
|
||||
dtype=torch.float16 if device.type == "cuda" else None,
|
||||
device_map=device,
|
||||
)
|
||||
|
||||
print("tokenizer.model_max_length =", tokenizer.model_max_length, file=err)
|
||||
print("model.config.max_position_embeddings =", model.config.max_position_embeddings, file=err)
|
||||
print("max_length =", tokenizer.model_max_length, file=err)
|
||||
print("max_embeds =", model.config.max_position_embeddings, file=err)
|
||||
|
||||
# 3) Prepare chat inputs (tokenized tensors)
|
||||
if len(argv) > 1:
|
||||
@@ -41,7 +40,7 @@ inputs = tokenizer.apply_chat_template(
|
||||
return_tensors="pt",
|
||||
)
|
||||
|
||||
num_input_tokens = inputs["input_ids"].shape[1]
|
||||
num_input_tokens = inputs["input_ids"].shape[1]
|
||||
tokens = num_input_tokens
|
||||
print("input tokens =", num_input_tokens, file=err)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user