DownloadProteins

Fasta

# 读取数据
id = df['NAME']
seq = df['Protein sequence']

with open('human.fasta', 'w') as f:
    for i in range(len(id)):
        f.write(f">{id[i]}\n")
        f.write(f"{seq[i]}\n")

PDB

import requests
import json
import os

def get_PDBFile(dir,proteins_list):
    '''
        create dir/MetaJson, dir/PDB and dir/null_proteins.txt
    '''
    cnt = 0
    null_proteins = []
    # 存储json文件
    meta_dir = os.path.join(dir,'MetaJson')
    if not os.path.exists(meta_dir):
        os.makedirs(meta_dir)
    # 存储pdb文件
    pdb_dir = os.path.join(dir,'PDB')
    if not os.path.exists(pdb_dir):
        os.makedirs(pdb_dir)

    for i in proteins_list:
        # *** 10元刮刮乐
        resp = requests.get('https://alphafold.com/api/prediction/'+str(i)+'?key=***')
        
        if resp.status_code == 200:
            json_res = json.loads(resp.text)[0]
            
            path_meta = os.path.join(meta_dir,str(i)+'.json')
            with open(path_meta, 'w', encoding='utf-8') as json_file:
                json.dump(json_res, json_file, ensure_ascii=False, indent=4)
            
            pdb = requests.get(json_res.get('pdbUrl'))

            path_pdb = os.path.join(pdb_dir,str(i)+'.pdb')
            open(path_pdb,'wb').write(pdb.content)
            cnt += 1
        else:
            print(i)
            null_proteins.append(i)
    
    # 存储缺失proteins
    path_null = os.path.join(dir,'null_proteins.txt')
    with open(path_null, 'w', encoding='utf-8') as file:
        for item in null_proteins:
            file.write(f"{item}\n")
    print(f'{cnt} pdbfiles were collected !')

Seq

def getFasta(src, dst):
    # txt 文件
    with open(src, 'r', encoding='utf-8') as file:
        content = file.read()
    list = content.split('\n')[:-1]
    cnt = 0

    for i in list:
        resp = requests.get(f'https://www.uniprot.org/uniprot/{i}.fasta')
        if resp.status_code == 200:
            with open(dst, 'a', encoding='utf-8') as output_file:
                output_file.write(resp.text)
                cnt += 1
    print(cnt)