首页 > 代码库 > Python编程系统下的大数据处理(微信公众号的预测评价)

Python编程系统下的大数据处理(微信公众号的预测评价)

import sqlite3
import xlrd

def read_excel():
    c = []
    # 打开文件
    workbook = xlrd.open_workbook(rF:/期中作业/dataWx_org.xlsx)
    # 获取所有sheet
    sheet_name = workbook.sheet_names()[0]
    sheet = workbook.sheet_by_name(sheet_name)
    
    #获取一行的内容
    for i in range(1,sheet.nrows):
        a = []
        for j in range(0,sheet.ncols):
            a.append(sheet.cell(i,j).value)
        c.append(a[0:13])
    #返回整个表单
    return c
            
            
def table_create(c):  
    ‘‘‘创建表并导入数据‘‘‘
    #创建表
    conn = sqlite3.connect(./exp.db)
    curs = conn.cursor()
    try:
        curs.execute(‘‘‘CREATE TABLE IF NOT EXISTS TB_CHECK
               (item FLOAT,
                name TEXT,
                wx_name TEXT,
                title TEXT,
                top FLOAT,
                posttime TEXT,
                day INT,
                readnum_pm FLOAT,
                likenum_pm FLOAT,
                get_time_pm TEXT,
                status FLOAT,
                url TEXT,
                content TEXT);‘‘‘)
    except sqlite3.OperationalError as e:
        print(e)

    #导入数据
#    for each in c:
#        curs.execute("INSERT INTO TB_CHECK VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?)"
#                     , each[0:13])
    conn.commit()
    return conn, curs
 
def final_number(conn, curs):
    ‘‘‘算法A估计超过10万的阅读量‘‘‘
    #设置阅读总量、关键字、时间,三个列表
    key = [i for i in range(1,31)]
    allnumber = []
    DAY = [i for i in range(41883,41913)]
    #计算每天的阅读总量并记录在列表中
    for i in range(0,30):
        onenumber = 0
        for row in curs.execute("SELECT * FROM TB_CHECK WHERE TB_CHECK.day=%d"%DAY[i]):
            onenumber += row[7]
        allnumber.append(onenumber)
    #设置关键字key,排除无效数据
    for row in curs.execute("SELECT * FROM TB_CHECK WHERE TB_CHECK.readnum_pm=100001"):
        for i in range(0,30):
            if DAY[i]==row[6]:
                key[i]=0
    #计算平均每天的阅读总量sumnumber                              
    daytime = 30
    sumnumber = 0                
    for i in range(0,30):
        if key[i]==0:
            daytime -= 1
        else:
            sumnumber += allnumber[i]
    sumnumber /= daytime
    print(sumnumber)       
    #利用算法计算阅读量超过100001的实际阅读量,存入finalnum
    finalnum = []
    for row in curs.execute("SELECT * FROM TB_CHECK WHERE TB_CHECK.readnum_pm=100001"):
        for i in range(0,30):
            if DAY[i]==row[6]:      
                finalnumber=sumnumber-allnumber[i]
                if finalnumber<0:
                    finalnumber *= (-1)
                finalnumber += 100001
                finalnum.append(finalnumber)
                print(finalnumber)
    #将新数据重新导入db文件
#    for row in curs.execute("SELECT * FROM TB_CHECK WHERE TB_CHECK.readnum_pm=100001"):
#        for i in range(0,30):
#            curs.execute(‘UPDATE TB_CHECK SET TB_CHECK.readnum_pm=? WHERE TB_CHECK.readnum_pm=?‘,(finalnum[i], 100001))        
    
    conn.commit()

def final_Rank(conn, curs): 
    ‘‘‘算法B评价微信影响力‘‘‘
    #设置三个等级参数、总等级参数、条目参数
    allRank1 = 0
    allRank2 = 0
    allRank3 = 0
    Rank = []
    Name = []
    ITEM = [i for i in range(1,33)]
    #计算三个总参数,分别是总头条数,总阅读量,总点赞数
    for row in curs.execute("SELECT * FROM TB_CHECK"):
        allRank1 += row[4]
        allRank2 += row[7]
        allRank3 += row[8]
    #求每个微信号的影响力等级,存入Rank中    
    for i in range(0,32):
        Rank1 = 0
        Rank2 = 0
        Rank3 = 0
        for row in curs.execute("SELECT * FROM TB_CHECK WHERE TB_CHECK.item=%f"%ITEM[i]):
            Rank1 += row[4]
            Rank2 += row[7]
            Rank3 += row[8]
        Rank.append(Rank1/allRank1*0.3 + Rank2/allRank2*0.4 +Rank3/allRank3*0.3)
        Name.append(row[1])
    #冒泡排序
    for i in range(0,32):
        for j in range(0,32):
            if Rank[i]>=Rank[j]:
                k = Rank[i]
                Rank[i] = Rank[j]
                Rank[j] = k
                    
                k = ITEM[i]
                ITEM[i] = ITEM[j]
                ITEM[j] = k
                
                k = str(k)
                k = Name[i]
                Name[i] = Name[j]
                Name[j] = k
    #显示计算结果,分别是排名,微信号序列,微信号名称,微信号评分
    for i in range(0,32):
        print(i+1,ITEM[i],Name[i],int(Rank[i]*1000))
        
    conn.commit()

def forecast(conn, curs):
    ‘‘‘算法C预计微信影响力变化‘‘‘
    #设置三个参数,日期,条目,等级
    DAY = [i for i in range(41883,41913)]
    ITEM = [i for i in range(1,33)]
    Rank_DAY = []
    Rank = []
    Name = []
    NAME = []
    IT = []
    #计算每天每个微信号的排名变化,存入IT,算法过程类似于算法B部分
    for i in range(0,30):
        allRank1 = 0
        allRank2 = 0
        allRank3 = 0
        for row in curs.execute("SELECT * FROM TB_CHECK WHERE TB_CHECK.day=%d"%DAY[i]):
            allRank1 += row[4]
            allRank2 += row[7]
            allRank3 += row[8]
        for Item in ITEM:
            Rank1 = 0
            Rank2 = 0
            Rank3 = 0
            for row in curs.execute("SELECT * FROM TB_CHECK WHERE TB_CHECK.day=%f AND TB_CHECK.item=%f"%(DAY[i],Item)):
                Rank1 += row[4]
                Rank2 += row[7]
                Rank3 += row[8]
            Rank_DAY.append(Rank1/allRank1*0.3 + Rank2/allRank2*0.4 +Rank3/allRank3*0.3)
            Name.append(row[1])
        IT_DAY = [i for i in range(1,33)]
        for j in range(0,32):
            for k in range(0,32):
                if Rank_DAY[j]>=Rank_DAY[k]:
                    k = Rank_DAY[i]
                    Rank_DAY[i] = Rank_DAY[j]
                    Rank_DAY[j] = k
                        
                    k = IT_DAY[i]
                    IT_DAY[i] = IT_DAY[j]
                    IT_DAY[j] = k 
                         
                    k = str(k)
                    k = Name[i]
                    Name[i] = Name[j]
                    Name[j] = k
        IT.append(IT_DAY)
        NAME.append(Name)
        Rank.append(Rank_DAY)
#        print(IT)
    #输入变化后的日期,输出变化结果
    K = 1
    while K == 1:
        inputday = int(input("输入未来天数"))
        for i in range(0,30):
            if inputday == i:
                for j in range(0,32):
                    print(IT[i][j],NAME[i][j],int(Rank[i][j]*1000))
    
    conn.commit()             
                      
                    
def main():
    c = read_excel()
    conn, curs = table_create(c)
    i = 1
    while i == 1:
        K = int(input("输入你要的操作:1,算法A   2,算法B   3,算法C    :"))
        if K == 1: 
            final_number(conn, curs)
        elif K == 2:         
            final_Rank(conn, curs)
        elif K == 3:      
            forecast(conn, curs)  
    conn.close()

main()

 

Python编程系统下的大数据处理(微信公众号的预测评价)