首页 > 代码库 > 利用python将二值csv格式转换为矩阵

利用python将二值csv格式转换为矩阵

#!/usr/bin/env python
# coding:utf-8
#import pandas as pd, numpy as np;

‘‘‘
将csv文件转换为对应的邻接矩阵mat
‘‘‘

from numpy import *;


def protein_complexes_trans():
    file = open(protein_complexes.csv);
    filePro = open(complexes, a);
    fileTarget = open(targets(complexes),a);
    fileInter = open(protein_complexes_interaction_matrix, a);
    proteins = [];
    targets = [];
    while 1:
        line = file.readline();
        if not line:
            break;
        token = line.split(;);
        targ = token[1].split(,);
        for i in range(0,len(targ)):
            targets.append(targ[i]);
        proteins.append(token[0]);

    file.seek(0);  #将指针重置回第一行

    proArr = unique(array(proteins)).tolist();  #去重,并删去最后一项(标题)
    tarArr = unique(array(targets)).tolist();  #删去最后

#mat为邻接矩阵 
    mat = zeros((len(proArr),len(tarArr)), dtype = int16);

    while 1:
        line = file.readline();
        if not line:
            break;
        token = line.split(;);
        targ = token[1].split(,);
        row = proArr.index(token[0]);
        for i in range(0, len(targ)):
            col = tarArr.index(targ[i]);
            mat[row][col] = 1;
    #输出药物和靶向的列表
    for i in proArr:
        filePro.write(%s\n % i);
    for i in tarArr:
        fileTarget.write(%s\n % i);
    
    print "%d&%d" % (len(proArr), len(tarArr));
    
    interMat = mat.tolist();
    
    #将矩阵写入文件
    for i in range(0, len(proArr)):
        fileInter.write(%s\n % (str(interMat[i])));
    return 0;

def drugs_targets_trans():
    #print ‘hello world‘;
    file = open(drugs_targets_sum);
    fileDrug = open(drugs, a);
    fileTarget = open(targets,a);
    fileInter = open(drugs_targets_interaction_matrix, a);
    drugs = [];
    targets = [];
    while 1:
        line = file.readline();
        if not line:
            break;
        token = line.split(;);
        targ = token[1].split(,);
        for i in range(0,len(targ)):
            targets.append(targ[i]);
            drugs.append(token[0]);

    file.seek(0);  #将指针重置回第一行

    drugArr = unique(array(drugs)).tolist();  #去重,并删去最后一项(标题)
    tarArr = unique(array(targets)).tolist();  #删去最后

#mat为邻接矩阵 
    mat = zeros((len(drugArr),len(tarArr)), dtype = int16);

    while 1:
        line = file.readline();
        if not line:
            break;
        token = line.split(;);
        targ = token[1].split(,);
        row = drugArr.index(token[0]);
        for i in range(0, len(targ)):
            col = tarArr.index(targ[i]);
            mat[row][col] = 1;

    #输出药物和靶向的列表
    for i in drugArr:
        fileDrug.write(%s\n % i);
    for i in tarArr:
        fileTarget.write(%s\n % i);
    
    interMat = mat.tolist();
    for i in range(0, len(drugArr)):
        fileInter.write(%s\n % (str(interMat[i])));

#protein_complexes_trans();
drugs_targets_trans();

 

利用python将二值csv格式转换为矩阵