Brando Miranda brando90

## multiple_gpus_1_file.py
def main():
    import os
    import sys
    import socket
    print(sys.executable)
    if socket.gethostname() == 'skampere1':
        print('Hardcoding the path since we are in skampere')
        sys.path = ['', '/lfs/skampere1/0/brando9/miniconda/envs/beyond_scale_2/lib/python311.zip', '/lfs/skampere1/0/brando9/miniconda/envs/beyond_scale_2/lib/python3.11', '/lfs/skampere1/0/brando9/miniconda/envs/beyond_scale_2/lib/python3.11/lib-dynload', '/lfs/skampere1/0/brando9/miniconda/envs/beyond_scale_2/lib/python3.11/site-packages', '/afs/cs.stanford.edu/u/brando9/beyond-scale-2-alignment-coeff/py_src', '/afs/cs.stanford.edu/u/brando9/ultimate-utils/py_src']
        print(f'{sys.path=}')

## training_guidelines.md

      
              1 file
            
          
              0 forks
            
          
                0 comments
              
            
              0 stars
            
          
                brando90
                / training_guidelines.md
            
            
              Created
              November 29, 2024 21:59
            
              
                nothing bellow 16 bits for training
              
          
    Training Guidelines Summary


SFT: Use bf16 or fp32 for training; avoid 8bit. For evaluation, fp16, bf16, or fp32 is fine. Follow established scripts for reliability.
Unsloth: Train LoRA with fp16, bf16, or fp32. Avoid 8bit or lower unless validated through replication of original experiments. No QLoRA unless core setups are stable and everything before this has worked.


## run_gpu.py
import torch

# Create two matrices on the GPU
matrix_a = torch.rand((1000, 1000), device='cuda')
matrix_b = torch.rand((1000, 1000), device='cuda')

# Perform matrix sum
result = matrix_a + matrix_b

# Verify and print device of the result

## gemma_tok_how_does_mask_look_if_eos_pad_both_present_in_tok.py
# ref: https://chatgpt.com/c/673e8232-0a18-8001-9fb5-ed1262bf267f
# ref: https://gist.github.com/brando90/4cd94ad3730218dca75dba779f770c9d
from transformers import AutoTokenizer

def analyze_tokenizer_output(model_name, text, pad_token="<pad>", eos_token="</s>", max_length=20):
    """
    Analyzes the tokenizer output, including the attention mask and labels,
    when eos_token and pad_token are present.
    """
    # Load the tokenizer

## teacher_forced_accuracy.py
#ref: https://chatgpt.com/share/673e7ef2-23cc-8001-b682-3ff4b66c797a
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

def compute_tfa(model, tokenizer, input_texts):
    """
    Computes Teacher-Forced Accuracy (TFA), rewarding the model for correctly predicting
    the first EOS token while ignoring predictions for padding tokens.

    Parameters:

## meeting_david_hall.md

      
              1 file
            
          
              0 forks
            
          
                0 comments
              
            
              0 stars
            
          
                brando90
                / meeting_david_hall.md
            
            
              Created
              November 15, 2024 23:19
            
          
    {
    "source": "...",
    "id": "...",
    "attributes": {
      "compression_ratio_zstd": 0.7
    }
}

  
## gist:c55c74e840d42c952d4aec7b74e0be6c
        # # 'torch==2.1.2',  # 2.2 not supported due to vllm see: https://github.com/vllm-project/vllm/issues/2747
...
        # 'transformers>=4.40',
        # 'accelerate==0.29.2',
...
        # 'datasets==2.14.7',
        # 'evaluate==0.4.1',
        # 'bitsandbytes== 0.43.0',
        # 'einops',
        # 'flash-attn>=2.5.8',

## maf_dual_backtranslation_self_improving.md

      
              1 file
            
          
              0 forks
            
          
                0 comments
              
            
              0 stars
            
          
                brando90
                / maf_dual_backtranslation_self_improving.md
            
            
              Created
              June 7, 2024 17:53
            
              
                MAF dual backtranslation self-improving loop
              
          
    Simplified version of Dually Ground BackTranslation for AutoFormalization:
def train_to_af_for_maf(mdl : causal_lm,
                        formal_data_set, # e.g., ITP lib like mathlib
                        informal_data_set,  # e.g., time-tested maths textbook e.g., Rudin, CLRS.
                        ):
    for (nl, fl*) in formal_data_set; for (nl*, fl) in informal_data_set;
        # -- Learn to Formalize: nl_i->fl* from fl* -> [nl_i]_i -> fl*
        [nl_i]_i := mdl("informalize " + fl*, sampling=top_p, num_out=k)  # noise is good for robustness!

  
## gist:23e96b1cf93372fb846b3e55aac162f1
from datasets import load_dataset
from transformers import GPT2Tokenizer, GPT2LMHeadModel, Trainer, TrainingArguments, DataCollatorForLanguageModeling

# Load dataset from a JSON file
data_files = {"train": "path/to/your/train.json", "test": "path/to/your/test.json"}
dataset = load_dataset("json", data_files=data_files)

# Load pre-trained GPT-2 tokenizer and model
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
model = GPT2LMHeadModel.from_pretrained("gpt2")

## helm_prompt.python
# -- HELM prompt, 8 shot, CoT? ref: https://storage.googleapis.com/crfm-helm-public/lite/benchmark_output/runs/v1.0.0/math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=01-ai_yi-34b/scenario_state.json, https://crfm.stanford.edu/helm/lite/latest/#/runs/math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=01-ai_yi-34b
HELM_MATH_PROMPT: str = (
"""Given a mathematics problem, determine the answer. Simplify your answer as much as possible.###
Problem: Let $r=3^s-s$ and $s=2^n+1$. What is the value of $r$ when $n=2$?
Answer: First substitute $n=2$ into the expression for $s$ to find $s=2^2+1=5$. Then substitute $s=5$ into the expression for $r$ to find $r=3^5-5=243-5=\\boxed{238}.###
Problem: If $x^{2y}= 4$ and $x = 4$, what is the value of $y$? Express your answer as a common fraction.
Answer: Plugging $x = 4$ into the first equation, we get $4^{2y} = 4^1 \\Rightarrow 2y = 1 \\Rightarrow y = \\boxed{\\frac{1}{2}}.###
Problem: If $y = \\dis
	def main():
	import os
	import sys
	import socket
	print(sys.executable)
	if socket.gethostname() == 'skampere1':
	print('Hardcoding the path since we are in skampere')
	sys.path = ['', '/lfs/skampere1/0/brando9/miniconda/envs/beyond_scale_2/lib/python311.zip', '/lfs/skampere1/0/brando9/miniconda/envs/beyond_scale_2/lib/python3.11', '/lfs/skampere1/0/brando9/miniconda/envs/beyond_scale_2/lib/python3.11/lib-dynload', '/lfs/skampere1/0/brando9/miniconda/envs/beyond_scale_2/lib/python3.11/site-packages', '/afs/cs.stanford.edu/u/brando9/beyond-scale-2-alignment-coeff/py_src', '/afs/cs.stanford.edu/u/brando9/ultimate-utils/py_src']
	print(f'{sys.path=}')
	import torch

	# Create two matrices on the GPU
	matrix_a = torch.rand((1000, 1000), device='cuda')
	matrix_b = torch.rand((1000, 1000), device='cuda')

	# Perform matrix sum
	result = matrix_a + matrix_b

	# Verify and print device of the result
	# ref: https://chatgpt.com/c/673e8232-0a18-8001-9fb5-ed1262bf267f
	# ref: https://gist.github.com/brando90/4cd94ad3730218dca75dba779f770c9d
	from transformers import AutoTokenizer

	def analyze_tokenizer_output(model_name, text, pad_token="<pad>", eos_token="</s>", max_length=20):
	"""
	Analyzes the tokenizer output, including the attention mask and labels,
	when eos_token and pad_token are present.
	"""
	# Load the tokenizer
	#ref: https://chatgpt.com/share/673e7ef2-23cc-8001-b682-3ff4b66c797a
	import torch
	from transformers import AutoTokenizer, AutoModelForCausalLM

	def compute_tfa(model, tokenizer, input_texts):
	"""
	Computes Teacher-Forced Accuracy (TFA), rewarding the model for correctly predicting
	the first EOS token while ignoring predictions for padding tokens.

	Parameters:
	# # 'torch==2.1.2', # 2.2 not supported due to vllm see: https://github.com/vllm-project/vllm/issues/2747
	...
	# 'transformers>=4.40',
	# 'accelerate==0.29.2',
	...
	# 'datasets==2.14.7',
	# 'evaluate==0.4.1',
	# 'bitsandbytes== 0.43.0',
	# 'einops',
	# 'flash-attn>=2.5.8',
	from datasets import load_dataset
	from transformers import GPT2Tokenizer, GPT2LMHeadModel, Trainer, TrainingArguments, DataCollatorForLanguageModeling

	# Load dataset from a JSON file
	data_files = {"train": "path/to/your/train.json", "test": "path/to/your/test.json"}
	dataset = load_dataset("json", data_files=data_files)

	# Load pre-trained GPT-2 tokenizer and model
	tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
	model = GPT2LMHeadModel.from_pretrained("gpt2")
	# -- HELM prompt, 8 shot, CoT? ref: https://storage.googleapis.com/crfm-helm-public/lite/benchmark_output/runs/v1.0.0/math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=01-ai_yi-34b/scenario_state.json, https://crfm.stanford.edu/helm/lite/latest/#/runs/math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=01-ai_yi-34b
	HELM_MATH_PROMPT: str = (
	"""Given a mathematics problem, determine the answer. Simplify your answer as much as possible.###
	Problem: Let $r=3^s-s$ and $s=2^n+1$. What is the value of $r$ when $n=2$?
	Answer: First substitute $n=2$ into the expression for $s$ to find $s=2^2+1=5$. Then substitute $s=5$ into the expression for $r$ to find $r=3^5-5=243-5=\\boxed{238}.###
	Problem: If $x^{2y}= 4$ and $x = 4$, what is the value of $y$? Express your answer as a common fraction.
	Answer: Plugging $x = 4$ into the first equation, we get $4^{2y} = 4^1 \\Rightarrow 2y = 1 \\Rightarrow y = \\boxed{\\frac{1}{2}}.###
	Problem: If $y = \\dis