Spaces:
				
			
			
	
			
			
		Running
		
			on 
			
			Zero
	
	
	
			
			
	
	
	
	
		
		
		Running
		
			on 
			
			Zero
	Changed to nomic
Browse files
    	
        app.py
    CHANGED
    
    | 
         @@ -6,15 +6,16 @@ import spaces 
     | 
|
| 6 | 
         
             
            import torch
         
     | 
| 7 | 
         | 
| 8 | 
         
             
            # neuralmind/bert-base-portuguese-cased
         
     | 
| 9 | 
         
            -
            ModelName = "neuralmind/bert-base-portuguese-cased"
         
     | 
| 10 | 
         
            -
            model = AutoModel.from_pretrained(ModelName)
         
     | 
| 11 | 
         
            -
            tokenizer = AutoTokenizer.from_pretrained(ModelName, do_lower_case=False)
         
     | 
| 12 | 
         
            -
            processor = AutoImageProcessor.from_pretrained("nomic-ai/nomic-embed-vision-v1.5")
         
     | 
| 13 | 
         
            -
            vision_model = AutoModel.from_pretrained("nomic-ai/nomic-embed-vision-v1.5", trust_remote_code=True)
         
     | 
| 14 | 
         | 
| 15 | 
         
            -
             
     | 
| 16 | 
         
            -
            #  
     | 
| 17 | 
         
            -
            #  
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 18 | 
         | 
| 19 | 
         
             
            def mean_pooling(model_output, attention_mask):
         
     | 
| 20 | 
         
             
                token_embeddings = model_output[0]
         
     | 
| 
         @@ -26,25 +27,25 @@ def TxtEmbed(text): 
     | 
|
| 26 | 
         | 
| 27 | 
         | 
| 28 | 
         | 
| 29 | 
         
            -
                input_ids = tokenizer.encode(text, return_tensors='pt')
         
     | 
| 30 | 
         | 
| 31 | 
         
            -
                with torch.no_grad():
         
     | 
| 32 | 
         
            -
             
     | 
| 33 | 
         
            -
             
     | 
| 34 | 
         
            -
                return (encoded.tolist())[0];
         
     | 
| 35 | 
         | 
| 36 | 
         | 
| 37 | 
         
            -
                
     | 
| 38 | 
         
            -
                
     | 
| 39 | 
         
            -
                
     | 
| 40 | 
         
            -
                
     | 
| 41 | 
         
            -
             
     | 
| 42 | 
         
            -
                
     | 
| 43 | 
         
            -
                
     | 
| 44 | 
         
            -
                
     | 
| 45 | 
         
            -
                
     | 
| 46 | 
         
            -
             
     | 
| 47 | 
         
            -
                
     | 
| 48 | 
         | 
| 49 | 
         | 
| 50 | 
         | 
| 
         | 
|
| 6 | 
         
             
            import torch
         
     | 
| 7 | 
         | 
| 8 | 
         
             
            # neuralmind/bert-base-portuguese-cased
         
     | 
| 9 | 
         
            +
            #ModelName = "neuralmind/bert-base-portuguese-cased"
         
     | 
| 10 | 
         
            +
            #model = AutoModel.from_pretrained(ModelName)
         
     | 
| 11 | 
         
            +
            #tokenizer = AutoTokenizer.from_pretrained(ModelName, do_lower_case=False)
         
     | 
| 
         | 
|
| 
         | 
|
| 12 | 
         | 
| 13 | 
         
            +
             
     | 
| 14 | 
         
            +
            #processor = AutoImageProcessor.from_pretrained("nomic-ai/nomic-embed-vision-v1.5")
         
     | 
| 15 | 
         
            +
            #vision_model = AutoModel.from_pretrained("nomic-ai/nomic-embed-vision-v1.5", trust_remote_code=True)
         
     | 
| 16 | 
         
            +
            tokenizer = AutoTokenizer.from_pretrained('nomic-ai/nomic-embed-text-v1.5')
         
     | 
| 17 | 
         
            +
            text_model = AutoModel.from_pretrained('nomic-ai/nomic-embed-text-v1.5', trust_remote_code=True)
         
     | 
| 18 | 
         
            +
            text_model.eval()
         
     | 
| 19 | 
         | 
| 20 | 
         
             
            def mean_pooling(model_output, attention_mask):
         
     | 
| 21 | 
         
             
                token_embeddings = model_output[0]
         
     | 
| 
         | 
|
| 27 | 
         | 
| 28 | 
         | 
| 29 | 
         | 
| 30 | 
         
            +
                #input_ids = tokenizer.encode(text, return_tensors='pt')
         
     | 
| 31 | 
         | 
| 32 | 
         
            +
                #with torch.no_grad():
         
     | 
| 33 | 
         
            +
                #    outs = model(input_ids)
         
     | 
| 34 | 
         
            +
                #    encoded = outs[0][0, 1:-1]  # Ignore [CLS] and [SEP] special tokens
         
     | 
| 35 | 
         
            +
                #return (encoded.tolist())[0];
         
     | 
| 36 | 
         | 
| 37 | 
         | 
| 38 | 
         
            +
               sentences = [text]
         
     | 
| 39 | 
         
            +
               encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')
         
     | 
| 40 | 
         
            +
               
         
     | 
| 41 | 
         
            +
               with torch.no_grad():
         
     | 
| 42 | 
         
            +
                   model_output = text_model(**encoded_input)
         
     | 
| 43 | 
         
            +
               
         
     | 
| 44 | 
         
            +
               text_embeddings = mean_pooling(model_output, encoded_input['attention_mask'])
         
     | 
| 45 | 
         
            +
               text_embeddings = F.layer_norm(text_embeddings, normalized_shape=(text_embeddings.shape[1],))
         
     | 
| 46 | 
         
            +
               text_embeddings = F.normalize(text_embeddings, p=2, dim=1)
         
     | 
| 47 | 
         
            +
                
         
     | 
| 48 | 
         
            +
               return (text_embeddings.tolist)[0]
         
     | 
| 49 | 
         | 
| 50 | 
         | 
| 51 | 
         |