- SFT: Use bf16 or fp32 for training; avoid 8bit. For evaluation, fp16, bf16, or fp32 is fine. Follow established scripts for reliability.
- Unsloth: Train LoRA with fp16, bf16, or fp32. Avoid 8bit or lower unless validated through replication of original experiments. No QLoRA unless core setups are stable and everything before this has worked.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def main(): | |
import os | |
import sys | |
import socket | |
print(sys.executable) | |
if socket.gethostname() == 'skampere1': | |
print('Hardcoding the path since we are in skampere') | |
sys.path = ['', '/lfs/skampere1/0/brando9/miniconda/envs/beyond_scale_2/lib/python311.zip', '/lfs/skampere1/0/brando9/miniconda/envs/beyond_scale_2/lib/python3.11', '/lfs/skampere1/0/brando9/miniconda/envs/beyond_scale_2/lib/python3.11/lib-dynload', '/lfs/skampere1/0/brando9/miniconda/envs/beyond_scale_2/lib/python3.11/site-packages', '/afs/cs.stanford.edu/u/brando9/beyond-scale-2-alignment-coeff/py_src', '/afs/cs.stanford.edu/u/brando9/ultimate-utils/py_src'] | |
print(f'{sys.path=}') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
# Create two matrices on the GPU | |
matrix_a = torch.rand((1000, 1000), device='cuda') | |
matrix_b = torch.rand((1000, 1000), device='cuda') | |
# Perform matrix sum | |
result = matrix_a + matrix_b | |
# Verify and print device of the result |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# ref: https://chatgpt.com/c/673e8232-0a18-8001-9fb5-ed1262bf267f | |
# ref: https://gist.github.com/brando90/4cd94ad3730218dca75dba779f770c9d | |
from transformers import AutoTokenizer | |
def analyze_tokenizer_output(model_name, text, pad_token="<pad>", eos_token="</s>", max_length=20): | |
""" | |
Analyzes the tokenizer output, including the attention mask and labels, | |
when eos_token and pad_token are present. | |
""" | |
# Load the tokenizer |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#ref: https://chatgpt.com/share/673e7ef2-23cc-8001-b682-3ff4b66c797a | |
import torch | |
from transformers import AutoTokenizer, AutoModelForCausalLM | |
def compute_tfa(model, tokenizer, input_texts): | |
""" | |
Computes Teacher-Forced Accuracy (TFA), rewarding the model for correctly predicting | |
the first EOS token while ignoring predictions for padding tokens. | |
Parameters: |
{
"source": "...",
"id": "...",
"attributes": {
"compression_ratio_zstd": 0.7
}
}
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# # 'torch==2.1.2', # 2.2 not supported due to vllm see: https://github.com/vllm-project/vllm/issues/2747 | |
... | |
# 'transformers>=4.40', | |
# 'accelerate==0.29.2', | |
... | |
# 'datasets==2.14.7', | |
# 'evaluate==0.4.1', | |
# 'bitsandbytes== 0.43.0', | |
# 'einops', | |
# 'flash-attn>=2.5.8', |
Simplified version of Dually Ground BackTranslation for AutoFormalization:
def train_to_af_for_maf(mdl : causal_lm,
formal_data_set, # e.g., ITP lib like mathlib
informal_data_set, # e.g., time-tested maths textbook e.g., Rudin, CLRS.
):
for (nl, fl*) in formal_data_set; for (nl*, fl) in informal_data_set;
# -- Learn to Formalize: nl_i->fl* from fl* -> [nl_i]_i -> fl*
[nl_i]_i := mdl("informalize " + fl*, sampling=top_p, num_out=k) # noise is good for robustness!
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from datasets import load_dataset | |
from transformers import GPT2Tokenizer, GPT2LMHeadModel, Trainer, TrainingArguments, DataCollatorForLanguageModeling | |
# Load dataset from a JSON file | |
data_files = {"train": "path/to/your/train.json", "test": "path/to/your/test.json"} | |
dataset = load_dataset("json", data_files=data_files) | |
# Load pre-trained GPT-2 tokenizer and model | |
tokenizer = GPT2Tokenizer.from_pretrained("gpt2") | |
model = GPT2LMHeadModel.from_pretrained("gpt2") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -- HELM prompt, 8 shot, CoT? ref: https://storage.googleapis.com/crfm-helm-public/lite/benchmark_output/runs/v1.0.0/math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=01-ai_yi-34b/scenario_state.json, https://crfm.stanford.edu/helm/lite/latest/#/runs/math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=01-ai_yi-34b | |
HELM_MATH_PROMPT: str = ( | |
"""Given a mathematics problem, determine the answer. Simplify your answer as much as possible.### | |
Problem: Let $r=3^s-s$ and $s=2^n+1$. What is the value of $r$ when $n=2$? | |
Answer: First substitute $n=2$ into the expression for $s$ to find $s=2^2+1=5$. Then substitute $s=5$ into the expression for $r$ to find $r=3^5-5=243-5=\\boxed{238}.### | |
Problem: If $x^{2y}= 4$ and $x = 4$, what is the value of $y$? Express your answer as a common fraction. | |
Answer: Plugging $x = 4$ into the first equation, we get $4^{2y} = 4^1 \\Rightarrow 2y = 1 \\Rightarrow y = \\boxed{\\frac{1}{2}}.### | |
Problem: If $y = \\dis |
NewerOlder