gblazex/mt_sglang.py

## mt_sglang.py
# 1. Install sglang: https://github.com/sgl-project/sglang?tab=readme-ov-file#install

# 2. MT-bench setup, follow steps here: https://github.com/sgl-project/sglang/tree/main/benchmark/mtbench

# (Benchmark code is here: https://github.com/sgl-project/sglang/blob/main/benchmark/mtbench/bench_sglang.py)

# 3. Only thing missing to be representative is temperature based on category:

# ...also change this line if you need in that file:
# "num_gpus": 1,

# Sampling temperature configs for (directly taken from MT-bench repo)
temperature_config = {
    "writing": 0.7,
    "roleplay": 0.7,
    "extraction": 0.0,
    "math": 0.0,
    "coding": 0.0,
    "reasoning": 0.0,
    "stem": 0.1,
    "humanities": 0.1,
    "arena-hard-200": 0.0,
}

def temperature_for_question(question):
    if question["category"] in temperature_config:
        temperature = temperature_config[question["category"]]
    else:
        temperature = 0.7
    return temperature

# <--- use temperature_for_question() in bench_sglang.py

# 4. Download questions
# wget -O question.jsonl https://raw.githubusercontent.com/lm-sys/FastChat/main/fastchat/llm_judge/data/mt_bench/question.jsonl

# 5. Start endpoint
# python -m sglang.launch_server --model-path meta-llama/Llama-2-7b-chat-hf --port 30000

# 6. Generate model responses (your bench_sglang.py)
# python3 bench_sglang.py --num-questions 80

# 7. Put answers file here:
# [MT-bench-dir]/data/mt_bench/model_answer/vicuna-13b-v1.3.jsonl

# 8. Run judge on responses (adjust model & parallel)
# https://github.com/lm-sys/FastChat/tree/main/fastchat/llm_judge#mt-bench
# python gen_judgment.py --model-list vicuna-13b-v1.3 --parallel 2
	# 1. Install sglang: https://github.com/sgl-project/sglang?tab=readme-ov-file#install

	# 2. MT-bench setup, follow steps here: https://github.com/sgl-project/sglang/tree/main/benchmark/mtbench

	# (Benchmark code is here: https://github.com/sgl-project/sglang/blob/main/benchmark/mtbench/bench_sglang.py)

	# 3. Only thing missing to be representative is temperature based on category:

	# ...also change this line if you need in that file:
	# "num_gpus": 1,

	# Sampling temperature configs for (directly taken from MT-bench repo)
	temperature_config = {
	"writing": 0.7,
	"roleplay": 0.7,
	"extraction": 0.0,
	"math": 0.0,
	"coding": 0.0,
	"reasoning": 0.0,
	"stem": 0.1,
	"humanities": 0.1,
	"arena-hard-200": 0.0,
	}

	def temperature_for_question(question):
	if question["category"] in temperature_config:
	temperature = temperature_config[question["category"]]
	else:
	temperature = 0.7
	return temperature

	# <--- use temperature_for_question() in bench_sglang.py

	# 4. Download questions
	# wget -O question.jsonl https://raw.githubusercontent.com/lm-sys/FastChat/main/fastchat/llm_judge/data/mt_bench/question.jsonl

	# 5. Start endpoint
	# python -m sglang.launch_server --model-path meta-llama/Llama-2-7b-chat-hf --port 30000

	# 6. Generate model responses (your bench_sglang.py)
	# python3 bench_sglang.py --num-questions 80

	# 7. Put answers file here:
	# [MT-bench-dir]/data/mt_bench/model_answer/vicuna-13b-v1.3.jsonl

	# 8. Run judge on responses (adjust model & parallel)
	# https://github.com/lm-sys/FastChat/tree/main/fastchat/llm_judge#mt-bench
	# python gen_judgment.py --model-list vicuna-13b-v1.3 --parallel 2