1 2 3 4 5 6 7 8 9 10 11 12 13
| def extract_fea(seqs): sequence_examples=[] sequence_examples.append(seqs) sequence_examples = [" ".join(list(re.sub(r"[UZOB]", "X", sequence))) for sequence in sequence_examples] ids = tokenizer.batch_encode_plus(sequence_examples, add_special_tokens=True, padding="longest") input_ids = torch.tensor(ids['input_ids']).to(device) attention_mask = torch.tensor(ids['attention_mask']).to(device) with torch.no_grad(): embedding_rpr = model(input_ids=input_ids, attention_mask=attention_mask) emb_0 = embedding_rpr.last_hidden_state[0, :] return emb_0.detach().cpu().numpy()
|