Last active
September 24, 2024 18:38
-
-
Save adgedenkers/763b28632f27ffb06cf97ccbc63021b1 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
def compare_excel_sheets(file1, file2, sheet1, sheet2, columns_to_compare, unique_id_column): | |
# Load the excel files | |
df1 = pd.read_excel(file1, sheet_name=sheet1) | |
df2 = pd.read_excel(file2, sheet_name=sheet2) | |
# Merge the dataframes on 'Position Number' | |
merged_df = pd.merge(df1, df2, on=unique_id_column, suffixes=('_file1', '_file2')) | |
# Create a dataframe for storing differences | |
differences = [] | |
# Iterate through the selected columns and compare each corresponding row | |
for index, row in merged_df.iterrows(): | |
for col in columns_to_compare: | |
col_file1 = f'{col}_file1' | |
col_file2 = f'{col}_file2' | |
if col_file1 in merged_df.columns and col_file2 in merged_df.columns: | |
value1 = str(row[col_file1]).strip().lower() # Normalize by trimming and converting to lowercase | |
value2 = str(row[col_file2]).strip().lower() | |
if value1 != value2: | |
differences.append({ | |
unique_id_column: row[unique_id_column], # Add the Position Number ID | |
'row_index': index, | |
'column': col, | |
'file1_value': row[col_file1], | |
'file2_value': row[col_file2] | |
}) | |
# Create a DataFrame for the differences | |
differences_df = pd.DataFrame(differences) | |
return differences_df | |
def save_differences_to_excel(differences_df, output_file): | |
# Save the differences to an Excel file | |
differences_df.to_excel(output_file, index=False) | |
print(f"Differences saved to {output_file}") | |
# Example usage: | |
if __name__ == "__main__": | |
file1 = 'file1.xlsx' | |
file2 = 'file2.xlsx' | |
sheet1 = 'Sheet1' | |
sheet2 = 'Sheet1' | |
columns_to_compare = [ | |
'Position Title', | |
'Jobcode', | |
'Reports to Position Number', | |
'Reports to Name', | |
'Future Dept Id' | |
] # Columns to compare (exclude 'Position Number' as it's the join key) | |
unique_id_column = 'Position Number' # Unique identifier column | |
# Compare the sheets | |
differences_df = compare_excel_sheets(file1, file2, sheet1, sheet2, columns_to_compare, unique_id_column) | |
# Output the differences | |
if not differences_df.empty: | |
save_differences_to_excel(differences_df, 'differences_output.xlsx') | |
else: | |
print("No differences found!") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment