Twitter's streaming api permits filtering tweets by geolocation. According to the api documentation, only tweets that are created using the Geotagging API can be filtered. The code below uses tweepy to filter tweets for the San Francisco area.
#!/usr/bin/env python import tweepy import ConfigParser import os, sys class Listener(tweepy.StreamListener): def on_status(self, status): print "screen_name='%s' tweet='%s'"%(status.author.screen_name, status.text) def login(config): """Tweepy oauth dance The config file should contain: [auth] CONSUMER_KEY = ... CONSUMER_SECRET = ... ACCESS_TOKEN = ... ACCESS_TOKEN_SECRET = ... """ CONSUMER_KEY = config.get('auth','CONSUMER_KEY') CONSUMER_SECRET = config.get('auth','CONSUMER_SECRET') ACCESS_TOKEN = config.get('auth','ACCESS_TOKEN') ACCESS_TOKEN_SECRET = config.get('auth','ACCESS_TOKEN_SECRET') auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET) auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET) return auth fn=sys.argv[1] config = ConfigParser.RawConfigParser() config.read(fn) try: auth = login(config) streaming_api = tweepy.streaming.Stream(auth, Listener(), timeout=60) # San Francisco area. streaming_api.filter(follow=None, locations=[-122.75,36.8,-121.75,37.8]) except KeyboardInterrupt: print "got keyboardinterrupt"
Find the complete codebase on github at: https://github.com/telvis07/twitter_mining