75 lines
2.4 KiB
Python
75 lines
2.4 KiB
Python
|
|
import pandas as pd
|
||
|
|
|
||
|
|
class Comparison:
|
||
|
|
|
||
|
|
@staticmethod
|
||
|
|
def normalize_value(val):
|
||
|
|
"""Normalize non-numeric values (date, string)."""
|
||
|
|
if pd.isna(val):
|
||
|
|
return ""
|
||
|
|
|
||
|
|
# Normalize dates
|
||
|
|
if isinstance(val, pd.Timestamp):
|
||
|
|
return val.date().isoformat()
|
||
|
|
|
||
|
|
# Normalize strings
|
||
|
|
return str(val).strip().upper().replace(" ", "")
|
||
|
|
|
||
|
|
@staticmethod
|
||
|
|
def round_amount(val, decimals=0):
|
||
|
|
"""Round numeric values safely for comparison."""
|
||
|
|
if pd.isna(val):
|
||
|
|
return ""
|
||
|
|
try:
|
||
|
|
return round(float(val), decimals)
|
||
|
|
except Exception:
|
||
|
|
return val
|
||
|
|
|
||
|
|
@staticmethod
|
||
|
|
def find_unmatched_rows(sheet1_df, sheet2_df):
|
||
|
|
# Clean column names
|
||
|
|
sheet1_df.columns = sheet1_df.columns.str.strip()
|
||
|
|
sheet2_df.columns = sheet2_df.columns.str.strip()
|
||
|
|
|
||
|
|
comparison_columns = [
|
||
|
|
'Date',
|
||
|
|
'GSTIN/UIN',
|
||
|
|
'Voucher Ref. No.',
|
||
|
|
'Total Tax',
|
||
|
|
'Total Amount'
|
||
|
|
]
|
||
|
|
|
||
|
|
amount_columns = ['Total Tax', 'Total Amount']
|
||
|
|
|
||
|
|
# Validate required columns
|
||
|
|
for col in comparison_columns:
|
||
|
|
if col not in sheet1_df.columns or col not in sheet2_df.columns:
|
||
|
|
raise ValueError(f"Missing column '{col}' in one of the sheets.")
|
||
|
|
|
||
|
|
# Normalize values
|
||
|
|
for col in comparison_columns:
|
||
|
|
if col in amount_columns:
|
||
|
|
sheet1_df[col] = sheet1_df[col].apply(
|
||
|
|
lambda x: Comparison.round_amount(x, 0)
|
||
|
|
)
|
||
|
|
sheet2_df[col] = sheet2_df[col].apply(
|
||
|
|
lambda x: Comparison.round_amount(x, 0)
|
||
|
|
)
|
||
|
|
else:
|
||
|
|
sheet1_df[col] = sheet1_df[col].apply(Comparison.normalize_value)
|
||
|
|
sheet2_df[col] = sheet2_df[col].apply(Comparison.normalize_value)
|
||
|
|
|
||
|
|
# Create comparison keys
|
||
|
|
sheet1_keys = sheet1_df[comparison_columns].apply(tuple, axis=1)
|
||
|
|
sheet2_keys = sheet2_df[comparison_columns].apply(tuple, axis=1)
|
||
|
|
|
||
|
|
# Find unmatched rows
|
||
|
|
unmatched_sheet1 = sheet1_df[~sheet1_keys.isin(sheet2_keys)].copy()
|
||
|
|
unmatched_sheet2 = sheet2_df[~sheet2_keys.isin(sheet1_keys)].copy()
|
||
|
|
|
||
|
|
# Add source column
|
||
|
|
unmatched_sheet1["Source"] = "Portal"
|
||
|
|
unmatched_sheet2["Source"] = "Tally"
|
||
|
|
|
||
|
|
return unmatched_sheet1, unmatched_sheet2
|