Skip to content

Instantly share code, notes, and snippets.

@cab938
Last active August 2, 2018 13:36
Show Gist options
  • Save cab938/cf5468339f114192a33a2085effb10ec to your computer and use it in GitHub Desktop.
Save cab938/cf5468339f114192a33a2085effb10ec to your computer and use it in GitHub Desktop.
#!pip install html5lib #install html5lib, only needs to be run once
#You might need to restart kernel after running with the menu Kernel>Restart
import pandas as pd
import numpy as np
import urllib
#description and prices of the Xeon Gold processors
df_xeon_golds=pd.read_html('https://proxy.mentoracademy.org/getContentFromUrl/?userid=brooks&url=https://en.wikipedia.org/wiki/List_of_Intel_Xeon_microprocessors', header=0)[78]
#statistics about the performance of a range of Intel processors from cpu-monkey
df_stats=pd.read_csv('https://proxy.mentoracademy.org/getContentFromUrl/?userid=brooks&url=https://gist.github.com/cab938/6499da85d31cfccc9cc5b13621963312/raw/34db3b55bd14f39fc59e6b5128b667a9061f77d7/cpu_performance.csv')
#clean up the price column in df_xeon_golds
def clean_price(price):
try:
if str(price).startswith('$'):
return int( str(price)[1:])
else:
return None
except:
return None
df_xeon_golds["price"]=df_xeon_golds["Release price (USD)"].apply(clean_price)
joined_df=pd.merge(df_xeon_golds, df_stats, left_on=["Model number"], right_on=["processor"], how="left")
def calculate_stats(row):
if not np.isnan(row['price']) and not np.isnan(row['performance']):
#higher is better value for the money
row['price_performance']=row['performance']/row['price']
return row
joined_df=joined_df.apply(calculate_stats, axis=1) #apply the function above to create a new column with price per performance
joined_df=joined_df[joined_df['price_performance'].notnull()] #just consider those processors which have stats
joined_df['price_performance_rank']=joined_df['price_performance'].rank(ascending=False) #determine the ranks for each processor in a new column
joined_df.sort_values(by='price_performance_rank').head(5) #print out the top five items
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment