import pandas as pd class Comparison: @staticmethod def normalize_value(val): """Normalize non-numeric values (date, string).""" if pd.isna(val): return "" # Normalize dates if isinstance(val, pd.Timestamp): return val.date().isoformat() # Normalize strings return str(val).strip().upper().replace(" ", "") @staticmethod def round_amount(val, decimals=0): """Round numeric values safely for comparison.""" if pd.isna(val): return "" try: return round(float(val), decimals) except Exception: return val @staticmethod def find_unmatched_rows(sheet1_df, sheet2_df): # Clean column names sheet1_df.columns = sheet1_df.columns.str.strip() sheet2_df.columns = sheet2_df.columns.str.strip() comparison_columns = [ 'Date', 'GSTIN/UIN', 'Voucher Ref. No.', 'Total Tax', 'Total Amount' ] amount_columns = ['Total Tax', 'Total Amount'] # Validate required columns for col in comparison_columns: if col not in sheet1_df.columns or col not in sheet2_df.columns: raise ValueError(f"Missing column '{col}' in one of the sheets.") # Normalize values for col in comparison_columns: if col in amount_columns: sheet1_df[col] = sheet1_df[col].apply( lambda x: Comparison.round_amount(x, 0) ) sheet2_df[col] = sheet2_df[col].apply( lambda x: Comparison.round_amount(x, 0) ) else: sheet1_df[col] = sheet1_df[col].apply(Comparison.normalize_value) sheet2_df[col] = sheet2_df[col].apply(Comparison.normalize_value) # Create comparison keys sheet1_keys = sheet1_df[comparison_columns].apply(tuple, axis=1) sheet2_keys = sheet2_df[comparison_columns].apply(tuple, axis=1) # Find unmatched rows unmatched_sheet1 = sheet1_df[~sheet1_keys.isin(sheet2_keys)].copy() unmatched_sheet2 = sheet2_df[~sheet2_keys.isin(sheet1_keys)].copy() # Add source column unmatched_sheet1["Source"] = "Portal" unmatched_sheet2["Source"] = "Tally" return unmatched_sheet1, unmatched_sheet2