Spaces:
Runtime error
Runtime error
| import os | |
| import cv2 | |
| import json | |
| import time | |
| import pickle | |
| import openai | |
| import re | |
| from word2number import w2n | |
| def create_dir(output_dir): | |
| if not os.path.exists(output_dir): | |
| os.makedirs(output_dir) | |
| def read_csv(file): | |
| data = [] | |
| with open(file, 'r') as f: | |
| for line in f: | |
| data.append(line.strip()) | |
| return data | |
| def read_pandas_csv(csv_path): | |
| # read a pandas csv sheet | |
| import pandas as pd | |
| df = pd.read_csv(csv_path) | |
| return df | |
| def read_json(path): | |
| with open(path, 'r', encoding='utf-8') as f: | |
| return json.load(f) | |
| def read_jsonl(file): | |
| with open(file, 'r') as f: | |
| data = [json.loads(line) for line in f] | |
| return data | |
| def read_pickle(path): | |
| with open(path, 'rb') as f: | |
| return pickle.load(f) | |
| def save_json(data, path): | |
| with open(path, 'w') as f: | |
| json.dump(data, f, indent=4) | |
| def save_array_img(path, image): | |
| cv2.imwrite(path, image) | |
| def contains_digit(text): | |
| # check if text contains a digit | |
| if any(char.isdigit() for char in text): | |
| return True | |
| return False | |
| def contains_number_word(text): | |
| # check if text contains a number word | |
| ignore_words = ["a", "an", "point"] | |
| words = re.findall(r'\b\w+\b', text) # This regex pattern matches any word in the text | |
| for word in words: | |
| if word in ignore_words: | |
| continue | |
| try: | |
| w2n.word_to_num(word) | |
| return True # If the word can be converted to a number, return True | |
| except ValueError: | |
| continue # If the word can't be converted to a number, continue with the next word | |
| # check if text contains a digit | |
| if any(char.isdigit() for char in text): | |
| return True | |
| return False # If none of the words could be converted to a number, return False | |
| def contains_quantity_word(text, special_keep_words=[]): | |
| # check if text contains a quantity word | |
| quantity_words = ["most", "least", "fewest" | |
| "more", "less", "fewer", | |
| "largest", "smallest", "greatest", | |
| "larger", "smaller", "greater", | |
| "highest", "lowest", "higher", "lower", | |
| "increase", "decrease", | |
| "minimum", "maximum", "max", "min", | |
| "mean", "average", "median", | |
| "total", "sum", "add", "subtract", | |
| "difference", "quotient", "gap", | |
| "half", "double", "twice", "triple", | |
| "square", "cube", "root", | |
| "approximate", "approximation", | |
| "triangle", "rectangle", "circle", "square", "cube", "sphere", "cylinder", "cone", "pyramid", | |
| "multiply", "divide", | |
| "percentage", "percent", "ratio", "proportion", "fraction", "rate", | |
| ] | |
| quantity_words += special_keep_words # dataset specific words | |
| words = re.findall(r'\b\w+\b', text) # This regex pattern matches any word in the text | |
| if any(word in quantity_words for word in words): | |
| return True | |
| return False # If none of the words could be converted to a number, return False | |
| def is_bool_word(text): | |
| if text in ["Yes", "No", "True", "False", | |
| "yes", "no", "true", "false", | |
| "YES", "NO", "TRUE", "FALSE"]: | |
| return True | |
| return False | |
| def is_digit_string(text): | |
| # remove ".0000" | |
| text = text.strip() | |
| text = re.sub(r'\.0+$', '', text) | |
| try: | |
| int(text) | |
| return True | |
| except ValueError: | |
| return False | |
| def is_float_string(text): | |
| # text is a float string if it contains a "." and can be converted to a float | |
| if "." in text: | |
| try: | |
| float(text) | |
| return True | |
| except ValueError: | |
| return False | |
| return False | |
| def copy_image(image_path, output_image_path): | |
| from shutil import copyfile | |
| copyfile(image_path, output_image_path) | |
| def copy_dir(src_dir, dst_dir): | |
| from shutil import copytree | |
| # copy the source directory to the target directory | |
| copytree(src_dir, dst_dir) | |
| import PIL.Image as Image | |
| def get_image_size(img_path): | |
| img = Image.open(img_path) | |
| width, height = img.size | |
| return width, height | |
| def get_chat_response(promot, api_key, api_base, model="gpt-3.5-turbo", temperature=0, max_tokens=256, n=1, patience=10000000, | |
| sleep_time=0): | |
| messages = [ | |
| {"role": "user", "content": promot}, | |
| ] | |
| # print("I am here") | |
| while patience > 0: | |
| patience -= 1 | |
| try: | |
| response = openai.ChatCompletion.create(model=model, | |
| messages=messages, | |
| api_key=api_key, | |
| api_base=api_base, | |
| temperature=temperature, | |
| max_tokens=max_tokens, | |
| n=n) | |
| if n == 1: | |
| prediction = response['choices'][0]['message']['content'].strip() | |
| if prediction != "" and prediction != None: | |
| return prediction | |
| else: | |
| prediction = [choice['message']['content'].strip() for choice in response['choices']] | |
| if prediction[0] != "" and prediction[0] != None: | |
| return prediction | |
| except Exception as e: | |
| if "Rate limit" not in str(e): | |
| print(e) | |
| if "Please reduce the length of the messages" in str(e): | |
| print("!!Reduce promot size") | |
| # reduce input prompt and keep the tail | |
| new_size = int(len(promot) * 0.9) | |
| new_start = len(promot) - new_size | |
| promot = promot[new_start:] | |
| messages = [ | |
| {"role": "user", "content": promot}, | |
| ] | |
| if sleep_time > 0: | |
| time.sleep(sleep_time) | |
| return "" |