首页 > 代码库 > Python 批量获取Google用户动态 (分页)

Python 批量获取Google用户动态 (分页)

CODE:

#!/usr/bin/python 
# -*- coding: utf-8 -*-

'''
Created on 2014-9-7
@author: guaguastd
@name: user_activity_loop.py
'''

import json
from login import google_api_request
from html import cleanHtml
import os

MAX_RESULTS = 40
while True:
    query = raw_input("Input query(None to quit): ")
    if query.strip() == '':
        break
    
    people_feed = google_api_request(0, action='search', query=query)
    
    for user in people_feed['items']:
        userId = user['id']
        activity_feed = google_api_request(1, action='list', collection='public', maxResults='20', userId=userId)
        #print json.dumps(activity_feed, indent=1)
        activity_results = []
        while activity_feed != None and len(activity_results) < MAX_RESULTS:
            if activity_feed['items'] is None:
                break
            if 'nextPageToken' in activity_feed:
                nextPageToken = activity_feed['nextPageToken']
                endFlag = 0
            else:
                endFlag = 1
                
            activities = activity_feed

            if 'items' in activities:
                for activity in activities['items']:
                    print 'activity: ', activity
                    if activity['object']['objectType'] == 'note' and activity['object']['content'] != '':
                        activity['title'] = cleanHtml(activity['title'])
                        activity['object']['content'] = cleanHtml(activity['object']['content'])
                        activity_results += [activity]

            # use nextPageToken to get the response objects
            if endFlag == 0:
                activity_feed = google_api_request(1, action='list', collection='public', maxResults='20', userId=userId, pageToken=nextPageToken)
            else:
                break
            
        # Write the output to a fie for convenience
        f = open(os.path.join(r"E:", "\\", "eclipse", "Google", "dfile", userId + '.json'), 'w')
        f.write(json.dumps(activity_results, indent=1))
        f.close()

        print str(len(activity_results)), "activities written to", f.name

RESULT:

activity:  {u'kind': u'plus#activity', u'provider': {u'title': u'Google+'}, u'title': u"This is a really great review of my friend +William Janeway's book, Doing Capitalism in the Innovation...", u'url': u'https://plus.google.com/107033731246200681024/posts/Y9xzjP27w4H', u'object': {u'resharers': {u'totalItems': 11, u'selfLink': u'https://www.googleapis.com/plus/v1/activities/z12fhlxhbxrvfdchu04chl3pvxfbszirt5o/people/resharers'}, u'attachments': [{u'displayName': u'The One Book on Wall Street You Haven\u2019t Read\u2014but Should', u'fullImage': {u'url': u'http://www.slate.com/content/dam/slate/articles/arts/books/2013/02/SBR/ILLOS/1302_SBR_DOINGCAPITALISM_IL.jpg/_jcr_content/renditions/cq5dam.web.1280.1280.jpeg', u'type': u'image/jpeg'}, u'url': u'http://www.slate.com/articles/business/books/2013/02/william_janeway_s_doing_capitalism_in_the_innovation_economy_reviewed.html', u'image': {u'url': u'https://lh6.googleusercontent.com/proxy/aq9sbrtgUd_VDXEMdLmCOf4Mf3aCXly3WXZ4_0jz2mgWgc9DLSHN3ptTUNiQ1mDCmBMsSby_b87hMQstwvelwB6ryJJ5G-FnKW9swDu4o4IrPih2tsTqNT13isFWq7M6V2osI2nb-djCTJtnFDVgy9L1oj5X3NPFiNcqxmM8ZUssNch4pQNRO4SeCp2MGDXlEaXMNTXV-XNbe5o7WxQNv6PYRwFx91hTYE44qPOK6A=w506-h303-p', u'width': 506, u'type': u'image/jpeg', u'height': 303}, u'content': u'Why Wall Street? What good does Beezlebubbian finance really provide? Wouldn\u2019t America be better off if we could magically transport ourselves back to the \u201950s\u2014when Wall Street was sleepy, the middle class was robust, and children dreamed of rocket ships? What is the essence of this activity known as finance?...', u'objectType': u'article'}], u'url': u'https://plus.google.com/107033731246200681024/posts/Y9xzjP27w4H', u'content': u'This is a really great review of my friend <span class="proflinkWrapper"><span class="proflinkPrefix">+</span><a class="proflink" href=http://www.mamicode.com/"https://plus.google.com/114912453748528069739" oid="114912453748528069739">William Janeway's book, Doing Capitalism in the Innovation Economy. /xa0It succinctly lays out the key premises of the book, and explains why they are so important to a proper understanding of the role of venture capital, government, and bubbles in the growth of our economy./ufeff', u'plusoners': {u'totalItems': 39, u'selfLink': u'https://www.googleapis.com/plus/v1/activities/z12fhlxhbxrvfdchu04chl3pvxfbszirt5o/people/plusoners'}, u'replies': {u'totalItems': 4, u'selfLink': u'https://www.googleapis.com/plus/v1/activities/z12fhlxhbxrvfdchu04chl3pvxfbszirt5o/comments'}, u'objectType': u'note'}, u'updated': u'2014-04-29T19:33:10.240Z', u'actor': {u'url': u'https://plus.google.com/107033731246200681024', u'image': {u'url': u'https://lh4.googleusercontent.com/-J8nmMwIhpiA/AAAAAAAAAAI/AAAAAAACoLI/0MpUu2BMqTM/photo.jpg?sz=50'}, u'displayName': u"Tim O'Reilly", u'id': u'107033731246200681024'}, u'access': {u'items': [{u'type': u'public'}], u'kind': u'plus#acl', u'description': u'Public'}, u'verb': u'post', u'etag': u'"L2Xbn8bDuSErT6QA3PEQiwYKQxM/DMb7RzmIBBbIbdrrj-R0SGXDbTw"', u'published': u'2014-04-29T19:33:10.240Z', u'id': u'z12fhlxhbxrvfdchu04chl3pvxfbszirt5o'}>

Python 批量获取Google用户动态 (分页)