# import pandas as pd # def normalize_row(row): # return tuple( # str(cell).strip().replace(".0", "") if isinstance(cell, float) and cell.is_integer() else str(cell).strip() # for cell in row # ) # def find_unmatched_rows(sheet1_df, sheet2_df): # # Ensure column names are clean # sheet1_df.columns = sheet1_df.columns.str.strip() # sheet2_df.columns = sheet2_df.columns.str.strip() # # Normalize rows for comparison # sheet1_normalized = sheet1_df.apply(normalize_row, axis=1) # sheet2_normalized = sheet2_df.apply(normalize_row, axis=1) # # Find unmatched rows # unmatched_in_sheet1 = sheet1_df[~sheet1_normalized.isin(sheet2_normalized)] # unmatched_in_sheet2 = sheet2_df[~sheet2_normalized.isin(sheet1_normalized)] # # Mark source # unmatched_in_sheet1["Source"] = "Sheet1" # unmatched_in_sheet2["Source"] = "Sheet2" # # Combine # unmatched_combined = pd.concat([unmatched_in_sheet1, unmatched_in_sheet2], ignore_index=True) # return unmatched_combined import pandas as pd def normalize_row(row): return tuple( str(cell).strip().replace(".0", "") if isinstance(cell, float) and cell.is_integer() else str(cell).strip() for cell in row ) def find_unmatched_rows(sheet1_df, sheet2_df): # Clean column names sheet1_df.columns = sheet1_df.columns.str.strip() sheet2_df.columns = sheet2_df.columns.str.strip() # Choose the comparison columns comparison_columns = ['Date', 'GSTIN/UIN'] # Ensure required columns exist for col in comparison_columns: if col not in sheet1_df.columns or col not in sheet2_df.columns: raise ValueError(f"Missing column '{col}' in one of the sheets.") # Create keys for comparison sheet1_keys = sheet1_df[comparison_columns].apply(normalize_row, axis=1) sheet2_keys = sheet2_df[comparison_columns].apply(normalize_row, axis=1) # Find unmatched rows unmatched_in_sheet1 = sheet1_df[~sheet1_keys.isin(sheet2_keys)].copy() unmatched_in_sheet2 = sheet2_df[~sheet2_keys.isin(sheet1_keys)].copy() # Mark source unmatched_in_sheet1["Source"] = "Sheet1" unmatched_in_sheet2["Source"] = "Sheet2" # Combine unmatched_combined = pd.concat([unmatched_in_sheet1, unmatched_in_sheet2], ignore_index=True) return unmatched_combined