首页 > 代码库 > Python 提取Twitter tweets中的元素(包含text, screen names, hashtags)

Python 提取Twitter tweets中的元素(包含text, screen names, hashtags)

#!/usr/bin/python 
# -*- coding: utf-8 -*-

'''
Created on 2014-7-1
@author: guaguastd
@name: tweets.py
'''

import json

# import search, see http://blog.csdn.net/guaguastd/article/details/35537781
from search import search
    
# import login, see http://blog.csdn.net/guaguastd/article/details/31706155
from login import oauth_login

# get the twitter access api
twitter_api = oauth_login()
    
query = raw_input('\nInput the query (eg. #MentionSomeoneImportantForYou): ')
        
statuses = search(twitter_api, query)
print json.dumps(statuses[0], indent=1)
        
# Extracting text, screen names, and hashtags from tweets
status_texts = [status['text'] 
                for status in statuses]

screen_names = [user_mention['screen_name']
                for status in statuses
                    for user_mention in status['entities']['user_mentions']]

hashtags = [hashtag['text']
            for status in statuses
                for hashtag in status['entities']['hashtags']]

# Compute a collection of all words from all tweets
words = [w
         for t in status_texts
             for w in t.split()]

# Explore the first 5 items for each...
print json.dumps(status_texts[0:5], indent=1)
print json.dumps(screen_names[0:5], indent=1)
print json.dumps(hashtags[0:5], indent=1)
print json.dumps(words[0:5], indent=1)