import functools
import torch

from .sentence_encoder import SentenceEncoder
from textattack.shared import utils, TokenizedText, WordEmbedding

class ThoughtVector(SentenceEncoder):
    """
    A constraint on the distance between two sentences' thought vectors.
    
    Args:
        word_embedding (str): The word embedding to use
        min_cos_sim: the minimum cosine similarity between thought vectors
        max_mse_dist: the maximum euclidean distance between thought vectors
    """
    def __init__(self, embedding_type='paragramcf', **kwargs):
        self.word_embedding = WordEmbedding(embedding_type)
        self.embedding_type = embedding_type
        super().__init__(**kwargs)
    
    @functools.lru_cache(maxsize=2**10)
    def _get_thought_vector(self, text):
        """ Sums the embeddings of all the words in ``text`` into a
            "thought vector".
        """
        embeddings = []
        for word in utils.words_from_text(text):
            embedding = self.word_embedding[word]
            if embedding is not None: # out-of-vocab words do not have embeddings
                embeddings.append(embedding)
        embeddings = torch.tensor(embeddings)
        return torch.mean(embeddings, dim=0)
    
    def encode(self, raw_text_list):
        return [self._get_thought_vector(text) for text in raw_text_list]
        
    def extra_repr_keys(self):
        """Set the extra representation of the constraint using these keys.
        """ 
        return ['embedding_type'] + super().extra_repr_keys()