Skip to content

Instantly share code, notes, and snippets.

@adgedenkers
Last active September 24, 2024 18:38
Show Gist options
  • Save adgedenkers/763b28632f27ffb06cf97ccbc63021b1 to your computer and use it in GitHub Desktop.
Save adgedenkers/763b28632f27ffb06cf97ccbc63021b1 to your computer and use it in GitHub Desktop.
import pandas as pd
def compare_excel_sheets(file1, file2, sheet1, sheet2, columns_to_compare, unique_id_column):
# Load the excel files
df1 = pd.read_excel(file1, sheet_name=sheet1)
df2 = pd.read_excel(file2, sheet_name=sheet2)
# Merge the dataframes on 'Position Number'
merged_df = pd.merge(df1, df2, on=unique_id_column, suffixes=('_file1', '_file2'))
# Create a dataframe for storing differences
differences = []
# Iterate through the selected columns and compare each corresponding row
for index, row in merged_df.iterrows():
for col in columns_to_compare:
col_file1 = f'{col}_file1'
col_file2 = f'{col}_file2'
if col_file1 in merged_df.columns and col_file2 in merged_df.columns:
value1 = str(row[col_file1]).strip().lower() # Normalize by trimming and converting to lowercase
value2 = str(row[col_file2]).strip().lower()
if value1 != value2:
differences.append({
unique_id_column: row[unique_id_column], # Add the Position Number ID
'row_index': index,
'column': col,
'file1_value': row[col_file1],
'file2_value': row[col_file2]
})
# Create a DataFrame for the differences
differences_df = pd.DataFrame(differences)
return differences_df
def save_differences_to_excel(differences_df, output_file):
# Save the differences to an Excel file
differences_df.to_excel(output_file, index=False)
print(f"Differences saved to {output_file}")
# Example usage:
if __name__ == "__main__":
file1 = 'file1.xlsx'
file2 = 'file2.xlsx'
sheet1 = 'Sheet1'
sheet2 = 'Sheet1'
columns_to_compare = [
'Position Title',
'Jobcode',
'Reports to Position Number',
'Reports to Name',
'Future Dept Id'
] # Columns to compare (exclude 'Position Number' as it's the join key)
unique_id_column = 'Position Number' # Unique identifier column
# Compare the sheets
differences_df = compare_excel_sheets(file1, file2, sheet1, sheet2, columns_to_compare, unique_id_column)
# Output the differences
if not differences_df.empty:
save_differences_to_excel(differences_df, 'differences_output.xlsx')
else:
print("No differences found!")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment