robert8138/view_count_youtube_api.py

## view_count_youtube_api.py
import os
import pandas as pd

import google_auth_oauthlib.flow
import googleapiclient.discovery
import googleapiclient.errors
import matplotlib

scopes = ["https://www.googleapis.com/auth/youtube.readonly"]

# Disable OAuthlib's HTTPS verification when running locally.
# *DO NOT* leave this option enabled in production.
os.environ["OAUTHLIB_INSECURE_TRANSPORT"] = "1"

api_service_name = "youtube"
api_version = "v3"
client_secrets_file = "YOUR_CLIENT_SECRET_FILE.json"

# Get credentials and create an API client
flow = google_auth_oauthlib.flow.InstalledAppFlow.from_client_secrets_file(
    client_secrets_file, scopes)
credentials = flow.run_console()
youtube = googleapiclient.discovery.build(
    api_service_name, api_version, credentials=credentials)


course_dict = {
    "Stanford CS 230": "PLoROMvodv4rOABXSygHTsbvUz4G_YQhOb",
    "Stanford CS 224N": "PLoROMvodv4rOhcuXMZkNm7j3fVwBBY42z",
    "Stanford CS 231N": "PLC1qU-LWwrF64f4QKQT-Vg5Wr4qEE1Zxk",
    "Stanford CS 229": "PLa-Bt050gYuhEeLRG8YBmFxwLvTJ5FqPS",
    "fast.ai Course": "PLCdvEQLhYkYmKTKWTrH7bHtQ1CsKZaQBl",
    "MIT Linear Algebra": "PL49CF3715CB9EF31D",
    "Caltech Learning from Data": "PLD63A284B7615313A",
    "How to Start a Startup": "PL11qn6zM2Y3bMZdChxEqHKaCaKUjwItGL",
    # really any playlist from Youtube
}

def get_view_ts(course_name, api_client):
    """
    For a given channel_id, retrive the list of videos and
    store the video title and video id in a dataframe.

    return: pandas.DataFrame()
    """
    request = youtube.playlistItems().list(
    part="snippet",
    maxResults=50,
    playlistId=course_dict.get(course_name)
    )
    response = request.execute()

    df_video_ids = pd.DataFrame(
        columns = ['course_name', 'title', 'video_id']
    )

    for r in response.get('items'):
        data = r.get('snippet')
        title = data.get('title')
        video_id = data.get('resourceId').get('videoId')
        df_video_ids = df_video_ids.append(
            {'course_name': course_name, 'title' : title, 'video_id': video_id},
            ignore_index=True
        )

    df_views = pd.DataFrame(columns = ['video_id', 'views'])

    for video_id in df_video_ids.video_id:
        request = youtube.videos().list(
                part="statistics",
                id=video_id
            )
        response = request.execute()
        view_count = (response.get('items')[0]
                .get('statistics')
                .get('viewCount')
        )
        df_views = df_views.append(
            {'video_id' : video_id, 'views': float(view_count)},
            ignore_index=True
        )
    df = df_video_ids.merge(df_views, on='video_id')
    df['view_ratio'] = df.views / df.views[0]

    return df


df_result = pd.DataFrame(columns = ['course_name', 'title', 'views'])

for course_name in course_dict.keys():
    df_per_course = get_view_ts(course_name, youtube)
    df_result = df_result.append(
        df_per_course["course_name", "title", "views"]
    )

df_pivot = (
    df_result
        .loc[:, ['course_name', 'views']]
        .pivot(columns='course_name', values='views')
)

ax = (
    df_pivot.plot(
        title = 'View Trends Overtime',
        kind='line',
        figsize=[15,10]
    )
)

ax.set_xlabel("Video # in the YoutTube Playlist", size=14)
ax.set_ylabel("% Views relative to First Video", size=14)
vals = ax.get_yticks()
_ = ax.set_yticklabels(['{:,.1%}'.format(x) for x in vals])
	import os
	import pandas as pd

	import google_auth_oauthlib.flow
	import googleapiclient.discovery
	import googleapiclient.errors
	import matplotlib

	scopes = ["https://www.googleapis.com/auth/youtube.readonly"]

	# Disable OAuthlib's HTTPS verification when running locally.
	# DO NOT leave this option enabled in production.
	os.environ["OAUTHLIB_INSECURE_TRANSPORT"] = "1"

	api_service_name = "youtube"
	api_version = "v3"
	client_secrets_file = "YOUR_CLIENT_SECRET_FILE.json"

	# Get credentials and create an API client
	flow = google_auth_oauthlib.flow.InstalledAppFlow.from_client_secrets_file(
	client_secrets_file, scopes)
	credentials = flow.run_console()
	youtube = googleapiclient.discovery.build(
	api_service_name, api_version, credentials=credentials)


	course_dict = {
	"Stanford CS 230": "PLoROMvodv4rOABXSygHTsbvUz4G_YQhOb",
	"Stanford CS 224N": "PLoROMvodv4rOhcuXMZkNm7j3fVwBBY42z",
	"Stanford CS 231N": "PLC1qU-LWwrF64f4QKQT-Vg5Wr4qEE1Zxk",
	"Stanford CS 229": "PLa-Bt050gYuhEeLRG8YBmFxwLvTJ5FqPS",
	"fast.ai Course": "PLCdvEQLhYkYmKTKWTrH7bHtQ1CsKZaQBl",
	"MIT Linear Algebra": "PL49CF3715CB9EF31D",
	"Caltech Learning from Data": "PLD63A284B7615313A",
	"How to Start a Startup": "PL11qn6zM2Y3bMZdChxEqHKaCaKUjwItGL",
	# really any playlist from Youtube
	}

	def get_view_ts(course_name, api_client):
	"""
	For a given channel_id, retrive the list of videos and
	store the video title and video id in a dataframe.

	return: pandas.DataFrame()
	"""
	request = youtube.playlistItems().list(
	part="snippet",
	maxResults=50,
	playlistId=course_dict.get(course_name)
	)
	response = request.execute()

	df_video_ids = pd.DataFrame(
	columns = ['course_name', 'title', 'video_id']
	)

	for r in response.get('items'):
	data = r.get('snippet')
	title = data.get('title')
	video_id = data.get('resourceId').get('videoId')
	df_video_ids = df_video_ids.append(
	{'course_name': course_name, 'title' : title, 'video_id': video_id},
	ignore_index=True
	)

	df_views = pd.DataFrame(columns = ['video_id', 'views'])

	for video_id in df_video_ids.video_id:
	request = youtube.videos().list(
	part="statistics",
	id=video_id
	)
	response = request.execute()
	view_count = (response.get('items')[0]
	.get('statistics')
	.get('viewCount')
	)
	df_views = df_views.append(
	{'video_id' : video_id, 'views': float(view_count)},
	ignore_index=True
	)
	df = df_video_ids.merge(df_views, on='video_id')
	df['view_ratio'] = df.views / df.views[0]

	return df


	df_result = pd.DataFrame(columns = ['course_name', 'title', 'views'])

	for course_name in course_dict.keys():
	df_per_course = get_view_ts(course_name, youtube)
	df_result = df_result.append(
	df_per_course["course_name", "title", "views"]
	)

	df_pivot = (
	df_result
	.loc[:, ['course_name', 'views']]
	.pivot(columns='course_name', values='views')
	)

	ax = (
	df_pivot.plot(
	title = 'View Trends Overtime',
	kind='line',
	figsize=[15,10]
	)
	)

	ax.set_xlabel("Video # in the YoutTube Playlist", size=14)
	ax.set_ylabel("% Views relative to First Video", size=14)
	vals = ax.get_yticks()
	_ = ax.set_yticklabels(['{:,.1%}'.format(x) for x in vals])