Spaces:
Paused
Paused
| # coding=utf-8 | |
| import json | |
| if __name__ == "__main__": | |
| dataset = [] | |
| with open("comparison_data_v2.json", "r", encoding="utf-8") as f: | |
| data = json.load(f) | |
| for example in data: | |
| instruction = example["user_input"] | |
| resp_with_score = [(float(resp["score"]), resp["response"]) for resp in example["responses_and_scores"]] | |
| resp_with_score.sort() | |
| while len(resp_with_score[0][1]) == 0: | |
| resp_with_score.pop(0) | |
| if len(resp_with_score) == 0: | |
| continue | |
| min_score, max_score = resp_with_score[0][0], resp_with_score[-1][0] | |
| if min_score < 5.0 and max_score > 5.0: | |
| dataset.append({ | |
| "instruction": instruction, | |
| "input": "", | |
| "output": [resp_with_score[-1][1], resp_with_score[0][1]] | |
| }) | |
| with open("comparison_gpt4_data_en.json", "w", encoding="utf-8", newline="\n") as f: | |
| json.dump(dataset, f, indent=2, ensure_ascii=False) | |