Spaces:
Runtime error
Runtime error
| def drop_duplicates_in_input(untokenized_dataset): | |
| indices_to_keep = [] | |
| id_to_idx = {} | |
| outputs = [] | |
| for i, (id_, output) in enumerate(zip(untokenized_dataset["id"], untokenized_dataset["output"])): | |
| if id_ in id_to_idx: | |
| outputs[id_to_idx[id_]].append(output) | |
| continue | |
| indices_to_keep.append(i) | |
| id_to_idx[id_] = len(outputs) | |
| outputs.append([output]) | |
| untokenized_dataset = untokenized_dataset.select(indices_to_keep).flatten_indices() | |
| untokenized_dataset = untokenized_dataset.remove_columns("output") | |
| untokenized_dataset = untokenized_dataset.add_column("outputs", outputs) | |
| return untokenized_dataset | |