-
-
Save brenorb/1ec2afb8d66af850acc294309b9e49ea to your computer and use it in GitHub Desktop.
#!/usr/bin/env python | |
# encoding: utf-8 | |
import tweepy #https://github.com/tweepy/tweepy | |
import csv | |
#Twitter API credentials | |
consumer_key = "" | |
consumer_secret = "" | |
access_key = "" | |
access_secret = "" | |
def get_all_tweets(screen_name): | |
#Twitter only allows access to a users most recent 3240 tweets with this method | |
#authorize twitter, initialize tweepy | |
auth = tweepy.OAuthHandler(consumer_key, consumer_secret) | |
auth.set_access_token(access_key, access_secret) | |
api = tweepy.API(auth) | |
#initialize a list to hold all the tweepy Tweets | |
alltweets = [] | |
#make initial request for most recent tweets (200 is the maximum allowed count) | |
new_tweets = api.user_timeline(screen_name = screen_name,count=200) | |
#save most recent tweets | |
alltweets.extend(new_tweets) | |
#save the id of the oldest tweet less one | |
oldest = alltweets[-1].id - 1 | |
#keep grabbing tweets until there are no tweets left to grab | |
while len(new_tweets) > 0: | |
print("getting tweets before {}".format(oldest)) | |
#all subsiquent requests use the max_id param to prevent duplicates | |
new_tweets = api.user_timeline(screen_name = screen_name,count=200,max_id=oldest) | |
#save most recent tweets | |
alltweets.extend(new_tweets) | |
#update the id of the oldest tweet less one | |
oldest = alltweets[-1].id - 1 | |
print("...{} tweets downloaded so far".format(len(alltweets))) | |
#transform the tweepy tweets into a 2D array that will populate the csv | |
outtweets = [[tweet.id_str, tweet.created_at, tweet.text.encode("utf-8")] for tweet in alltweets] | |
#write the csv | |
with open('{}_tweets.csv'.format(screen_name), 'w') as f: | |
writer = csv.writer(f) | |
writer.writerow(["id","created_at","text"]) | |
writer.writerows(outtweets) | |
print('{}_tweets.csv was successfully created.'.format(screen_name)) | |
pass | |
if __name__ == '__main__': | |
#pass in the username of the account you want to download | |
get_all_tweets("J_tsar") |
Amazing. How do I remove "b'" from showing up before every tweet?
Can I use this to get the URLs of tweets ?
Amazing. How do I remove "b'" from showing up before every tweet?
try this: https://www.kaggle.com/code/asimislam/text-mining-wordcloud-from-tweets-ripharambe
Don't you need elevated access to do this stuff?
Not at the time I wrote it. It uses only web scraping, not an API. There's a long time I don't use it, so things might be a little different right now.
Amazing. How do I remove "b'" from showing up before every tweet?
This means it's byte type. I believe that using a .decode() method like below:
name_byte = b'Alice'
name_str = name_byte.decode()
print(name_str)
> Alice
Can I use this to get the URLs of tweets ?
There's a long time I don't use it, but if I remember well, if the tweet link is https://twitter.com/Twitter/status/1577730467436138524, the tweet id is the long number at the end: 1577730467436138524. So you just get 'https://twitter.com/'+ screen_name +'/status/' + id
and it will work.
Don't you need elevated access to do this stuff?