首页 > 代码库 > python goatools使用
python goatools使用
用david搞定了所有的GOterm后,接下来就是利用goslim处理这些term。
用的包是goatools,需要下载几个obo文件以及,fisher,pygraphviz以及graphviz等几个模块。
# -*- coding: utf-8 -*-"""Created on Fri Nov 21 20:06:42 2014@author: hluo"""#!/usr/bin/python# -*- coding: UTF-8 -*-import osimport os.path as opimport syssys.path.insert(0, op.join(op.dirname(__file__), ".."))from obo_parser import GODagfrom mapslim import mapslimimport jsonimport re#from goatools.mapslim import mapslim# copied from find_enrichment.pydef get_goslim(term, godag, goslimdag): if not term in godag: return [None, None] else: direct_anc, all_anc = mapslim(term, godag, goslimdag) return [direct_anc, all_anc] def mygofun(json_file): obo_file = ‘/home/hluo/Desktop/goslim/go-basic.obo‘ assert os.path.exists(obo_file), "file %s not found!" % obo_file slim_obo_file = ‘/home/hluo/Desktop/goslim/goslim_generic.obo‘ assert os.path.exists(slim_obo_file), "file %s not found!" % slim_obo_file # load DAGs go_dag = GODag(obo_file) goslim_dag = GODag(slim_obo_file) #json_file = ‘NC_000913.gbk.json‘ myrecord = json.load(open(json_file)) #re_obj = re.compile(r‘GO\:\d+‘) re_obj = re.compile(r‘(?<=\$)\S+(?=\~)‘) #a = re.match(re_Obj, text) #mylist = [] for item in myrecord: mykey = filter(lambda x: x.startswith(‘GO‘), item.keys()) for k in mykey: tplist = [set(), set()] for text in item[k]: tplist1 = [] goterm = re.findall(re_obj, text)[0] tplist1 = get_goslim(goterm, go_dag, goslim_dag) tplist[0] |= tplist1[0] tplist[1] |= tplist1[1] item[k + ‘_dslim‘] = list(tplist[0]) item[k + ‘_aslim‘] = list(tplist[1]) json.dump(myrecord, open(‘%s.txt‘ % json_file, ‘w‘), indent = 1) if __name__ == ‘__main__‘: mygofun(NC_000913.gbk.json)
the script loads the json format file, and adds the new keys to every record in the file.
ps. dslim: direct slim. aslim: all slim.
Then I run a python batch script to process all the json files.
# -*- coding: utf-8 -*-"""Created on Mon Nov 24 17:37:24 2014@author: hluo"""import osimport reimport sysfrom mygoslim import mygofunif __name__ == ‘__main__‘: mydir = ‘/home/hluo/Desktop/gbk‘ flist = os.listdir(mydir) re_obj = re.compile(‘\.json$‘) #re_obj1 = re.compile(‘NC_\d{6}\.gbk‘) json_file_list = [] for item in flist: re_item = re.findall(re_obj, item) if re_item: json_file_list.append(item) for item in json_file_list: mygofun(‘%s/%s‘ % (mydir, item))
In the script, use the ‘re‘ and ‘os‘ module to get all the json files.
python goatools使用
声明:以上内容来自用户投稿及互联网公开渠道收集整理发布,本网站不拥有所有权,未作人工编辑处理,也不承担相关法律责任,若内容有误或涉及侵权可进行投诉: 投诉/举报 工作人员会在5个工作日内联系你,一经查实,本站将立刻删除涉嫌侵权内容。