Last active
August 2, 2018 13:36
-
-
Save cab938/cf5468339f114192a33a2085effb10ec to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!pip install html5lib #install html5lib, only needs to be run once | |
#You might need to restart kernel after running with the menu Kernel>Restart | |
import pandas as pd | |
import numpy as np | |
import urllib | |
#description and prices of the Xeon Gold processors | |
df_xeon_golds=pd.read_html('https://proxy.mentoracademy.org/getContentFromUrl/?userid=brooks&url=https://en.wikipedia.org/wiki/List_of_Intel_Xeon_microprocessors', header=0)[78] | |
#statistics about the performance of a range of Intel processors from cpu-monkey | |
df_stats=pd.read_csv('https://proxy.mentoracademy.org/getContentFromUrl/?userid=brooks&url=https://gist.github.com/cab938/6499da85d31cfccc9cc5b13621963312/raw/34db3b55bd14f39fc59e6b5128b667a9061f77d7/cpu_performance.csv') | |
#clean up the price column in df_xeon_golds | |
def clean_price(price): | |
try: | |
if str(price).startswith('$'): | |
return int( str(price)[1:]) | |
else: | |
return None | |
except: | |
return None | |
df_xeon_golds["price"]=df_xeon_golds["Release price (USD)"].apply(clean_price) | |
joined_df=pd.merge(df_xeon_golds, df_stats, left_on=["Model number"], right_on=["processor"], how="left") | |
def calculate_stats(row): | |
if not np.isnan(row['price']) and not np.isnan(row['performance']): | |
#higher is better value for the money | |
row['price_performance']=row['performance']/row['price'] | |
return row | |
joined_df=joined_df.apply(calculate_stats, axis=1) #apply the function above to create a new column with price per performance | |
joined_df=joined_df[joined_df['price_performance'].notnull()] #just consider those processors which have stats | |
joined_df['price_performance_rank']=joined_df['price_performance'].rank(ascending=False) #determine the ranks for each processor in a new column | |
joined_df.sort_values(by='price_performance_rank').head(5) #print out the top five items |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment