sk2decompile-struct-6.7b / llm_server.py
LLM4Binary's picture
inference
4c55900 verified
from vllm import LLM, SamplingParams
from argparse import ArgumentParser
import os
import json
from transformers import AutoTokenizer
os.environ["TOKENIZERS_PARALLELISM"] = "true"
inputs = []
def parse_args() -> ArgumentParser:
parser = ArgumentParser()
parser.add_argument("--model_path", type=str)
parser.add_argument("--gpus", type=int, default=1)
parser.add_argument("--max_num_seqs", type=int, default=1)
parser.add_argument("--gpu_memory_utilization", type=float, default=0.95)
parser.add_argument("--temperature", type=float, default=0)
parser.add_argument("--max_total_tokens", type=int, default=8192)
parser.add_argument("--max_new_tokens", type=int, default=512)
parser.add_argument("--stop_sequences", type=str, default=None)
parser.add_argument("--testset_path", type=str)
parser.add_argument("--output_path", type=str, default=None)
return parser.parse_args()
# def llm_inference(inputs, args):
# llm = LLM(
# model=args.model_path,
# tensor_parallel_size=args.gpus,
# max_model_len=args.max_total_tokens,
# gpu_memory_utilization=args.gpu_memory_utilization,
# )
# sampling_params = SamplingParams(
# temperature=args.temperature,
# max_tokens=args.max_new_tokens,
# stop=args.stop_sequences,
# )
# gen_results = llm.generate(inputs, sampling_params)
# gen_results = [[output.outputs[0].text] for output in gen_results]
# return gen_results
def llm_inference(inputs,
model_path,
gpus=1,
max_total_tokens=8192,
gpu_memory_utilization=0.95,
temperature=0,
max_new_tokens=512,
stop_sequences=None):
llm = LLM(
model=model_path,
tensor_parallel_size=gpus,
max_model_len=max_total_tokens,
gpu_memory_utilization=gpu_memory_utilization,
)
sampling_params = SamplingParams(
temperature=temperature,
max_tokens=max_new_tokens,
stop=stop_sequences,
)
gen_results = llm.generate(inputs, sampling_params)
gen_results = [[output.outputs[0].text] for output in gen_results]
return gen_results
if __name__ == "__main__":
args = parse_args()
with open(args.testset_path, "r") as f:
samples = json.load(f)
before = "# This is the assembly code:\n"
after = "\n# What is the source code?\n"
for sample in samples:
prompt = before + sample["input_asm_prompt"].strip() + after
inputs.append(prompt)
tokenizer = AutoTokenizer.from_pretrained(args.model_path)
if args.stop_sequences is None:
args.stop_sequences = [tokenizer.eos_token]
gen_results = llm_inference(inputs, args.model_path,
args.gpus,
args.max_total_tokens,
args.gpu_memory_utilization,
args.temperature,
args.max_new_tokens,
args.stop_sequences)
if not os.path.exists(args.output_path):
os.mkdir(args.output_path)
idx = 0
for gen_result in gen_results:
with open(args.output_path + '/' + str(idx) + '.c', 'w') as f:
f.write(gen_result[0])
idx += 1