首页 > 代码库 > 关于alzheimer disease论文的统计

关于alzheimer disease论文的统计

1.获取2016年的所有关键字,保存到keyword_2016.json中

import pymysql
import json

conn= pymysql.connect(
        host=localhost,
        port = 3306,
        user=root,
        passwd=‘‘,
        db =python,
        )
cursor = conn.cursor()

sql = "SELECT union_kwd_str,pmc_id FROM alzheimer where pub_year = ‘2016‘ && union_kwd_str != ‘‘ "
a = cursor.execute(sql)
print a
b = cursor.fetchmany(a)  #b has 7887 abstract list

abstract_list = []
pmc_id_dict= {}

for j in range(a):
    abstract_list.append(b[j][0])
    pmc_id_dict[j] = b[j][1]



def output_to_json(data,filename):
    with open(filename,w) as file:
        file.write(json.dumps(data))
        file.close()
    return json.dumps(data)

output_data = {
        pub_year: "2016",
        count: a,
        keyword: abstract_list
    }
output_to_json(output_data, keyword_2016.json)

从keyword_2016。json中读取关键词,并统计选出前25的关键词

import re  
import collections  
import json

def input_from_json(filename):
    with open(filename,‘r‘) as file:
        data = http://www.mamicode.com/json.loads(file.read())"keyword_2016.json"  
    fobj2 = open(‘sort_keyword_2016.json‘,‘w‘)
 
    dword = count_word(file_name)  
    dword = sort_by_count(dword)  
      
    jsonlist = []
    num = 0

    for key,value in dword.items():
        num += 1
        key = re.sub("_", " ", key)
        data = http://www.mamicode.com/{>

  

2.获取发表论文量排名前十的国家

1)把所有第一作者的信息保存到authorinfor.json中

import pymysql
import json

conn= pymysql.connect(
        host=localhost,
        port = 3306,
        user=root,
        passwd=‘‘,
        db =python,
        )
cursor = conn.cursor()

sql = "SELECT authorinfor,pmc_id FROM alzheimer WHERE authorinfor != ‘‘"
a = cursor.execute(sql)
print a
b = cursor.fetchmany(a)  #b has 7887 abstract list

authorinfor_list = []
pmc_id_dict= {}

for j in range(a):
    authorinfor_list.append(b[j][0])
    pmc_id_dict[j] = b[j][1]

def output_to_json(data,filename):
    with open(filename,w) as file:
        file.write(json.dumps(data))
        file.close()
    return json.dumps(data)

output_data = {
        pub_year: "2016",
        count: a,
        authorinfor: authorinfor_list,
        pmc_id: pmc_id_dict
    }
output_to_json(output_data, authorinfor.json)

2)选出排名前十的国家

import re  
import collections  
import json

def input_from_json(filename):
    with open(filename,r) as file:
        data = json.loads(file.read())
        file.close()
        return data

def count_word(path):  
    result = {}
    authorinfor_list = input_from_json(path)[authorinfor]  
    for all_the_text in authorinfor_list:
        country = all_the_text.split(,)[-1]
        country = re.sub("\.","",country)
        country = re.sub("\\n","",country)
        country = country.encode(utf-8)

        if country not in result:
            result[country] = 0
        result[country] += 1                
    return result 
      
 
def sort_by_count(d):  

    d = collections.OrderedDict(sorted(d.items(), key = lambda t: -t[1]))  
    return d  

 
if __name__ == __main__:  
    file_name = "authorinfor.json"  
    fobj2 = open(sort_country.json,w)
 
    dword = count_word(file_name)  
    dword = sort_by_count(dword)  
      
    jsonlist = []
    num = 0

    for country,value in dword.items():
        num += 1
        data = {
        name: country,
        value: value
        }
        json_data = json.dumps(data)

        if num < 50:
            fobj2.write(json_data)
            fobj2.write(\n)

    countrylist = dword.keys()
    valuelist = dword.values()

    print countrylist[:11]
    print valuelist[:11]

 

关于alzheimer disease论文的统计