diff --git a/AppCode/FileHandler.py b/AppCode/FileHandler.py new file mode 100644 index 0000000..543cabb --- /dev/null +++ b/AppCode/FileHandler.py @@ -0,0 +1,14 @@ +import os + + +class FileHandler: + + UPLOAD_FOLDER = 'uploads' + RESULT_FILE = 'unmatched_result.xlsx' + + @staticmethod + def check_or_create_folder_exists(): + + if not os.path.exists(FileHandler.UPLOAD_FOLDER): + os.makedirs(FileHandler.UPLOAD_FOLDER) + diff --git a/AppCode/ServerPort.py b/AppCode/ServerPort.py new file mode 100644 index 0000000..6ded969 --- /dev/null +++ b/AppCode/ServerPort.py @@ -0,0 +1,5 @@ + +# Server config. +class CompGSTServer: + host='0.0.0.0' + port=5001 diff --git a/AppCode/compare_excel.py b/AppCode/compare_excel.py new file mode 100644 index 0000000..baab192 --- /dev/null +++ b/AppCode/compare_excel.py @@ -0,0 +1,74 @@ +import pandas as pd + +class Comparison: + + @staticmethod + def normalize_value(val): + """Normalize non-numeric values (date, string).""" + if pd.isna(val): + return "" + + # Normalize dates + if isinstance(val, pd.Timestamp): + return val.date().isoformat() + + # Normalize strings + return str(val).strip().upper().replace(" ", "") + + @staticmethod + def round_amount(val, decimals=0): + """Round numeric values safely for comparison.""" + if pd.isna(val): + return "" + try: + return round(float(val), decimals) + except Exception: + return val + + @staticmethod + def find_unmatched_rows(sheet1_df, sheet2_df): + # Clean column names + sheet1_df.columns = sheet1_df.columns.str.strip() + sheet2_df.columns = sheet2_df.columns.str.strip() + + comparison_columns = [ + 'Date', + 'GSTIN/UIN', + 'Voucher Ref. No.', + 'Total Tax', + 'Total Amount' + ] + + amount_columns = ['Total Tax', 'Total Amount'] + + # Validate required columns + for col in comparison_columns: + if col not in sheet1_df.columns or col not in sheet2_df.columns: + raise ValueError(f"Missing column '{col}' in one of the sheets.") + + # Normalize values + for col in comparison_columns: + if col in amount_columns: + sheet1_df[col] = sheet1_df[col].apply( + lambda x: Comparison.round_amount(x, 0) + ) + sheet2_df[col] = sheet2_df[col].apply( + lambda x: Comparison.round_amount(x, 0) + ) + else: + sheet1_df[col] = sheet1_df[col].apply(Comparison.normalize_value) + sheet2_df[col] = sheet2_df[col].apply(Comparison.normalize_value) + + # Create comparison keys + sheet1_keys = sheet1_df[comparison_columns].apply(tuple, axis=1) + sheet2_keys = sheet2_df[comparison_columns].apply(tuple, axis=1) + + # Find unmatched rows + unmatched_sheet1 = sheet1_df[~sheet1_keys.isin(sheet2_keys)].copy() + unmatched_sheet2 = sheet2_df[~sheet2_keys.isin(sheet1_keys)].copy() + + # Add source column + unmatched_sheet1["Source"] = "Portal" + unmatched_sheet2["Source"] = "Tally" + + return unmatched_sheet1, unmatched_sheet2 diff --git a/app.py b/app.py index ff642bd..a647425 100644 --- a/app.py +++ b/app.py @@ -1,42 +1,56 @@ from flask import Flask, render_template, request, send_file import pandas as pd -from compare_gst_excel import find_unmatched_rows import os from werkzeug.utils import secure_filename +from AppCode.FileHandler import FileHandler +from AppCode.ServerPort import CompGSTServer +from AppCode.compare_excel import Comparison app = Flask(__name__) -UPLOAD_FOLDER = 'uploads' -RESULT_FILE = 'unmatched_result.xlsx' -if not os.path.exists(UPLOAD_FOLDER): - os.makedirs(UPLOAD_FOLDER) +# check file Folder valid +FileHandler.check_or_create_folder_exists() +# upload page show @app.route('/') def index(): return render_template('upload.html') +# camparison route @app.route('/upload', methods=['POST']) def upload_file(): - file = request.files['excel_file'] + file = request.files.get('excel_file') if not file: return "No file uploaded.", 400 filename = secure_filename(file.filename) - filepath = os.path.join(UPLOAD_FOLDER, filename) + filepath = os.path.join(FileHandler.UPLOAD_FOLDER, filename) file.save(filepath) try: - # Read Excel with header in row 8 (0-indexed), so header=7 - df1 = pd.read_excel(filepath, sheet_name=0, header=7) - df2 = pd.read_excel(filepath, sheet_name=1, header=7) + # Read first two sheets + df1 = pd.read_excel(filepath, sheet_name=0, header=0) + df2 = pd.read_excel(filepath, sheet_name=1, header=0) - unmatched = find_unmatched_rows(df1, df2) - unmatched.to_excel(RESULT_FILE, index=False) + # Get unmatched rows separately + unmatched_sheet1, unmatched_sheet2 = Comparison.find_unmatched_rows(df1, df2) - return send_file(RESULT_FILE, as_attachment=True) + # Write result into two Excel sheets + with pd.ExcelWriter(FileHandler.RESULT_FILE, engine="openpyxl") as writer: + unmatched_sheet1.to_excel( + writer, sheet_name="Not In Tally", index=False + ) + unmatched_sheet2.to_excel( + writer, sheet_name="Not In Portal", index=False + ) + + return send_file(FileHandler.RESULT_FILE, as_attachment=True) except Exception as e: return f"Error processing file: {e}", 500 +# run if __name__ == '__main__': - app.run(host='0.0.0.0', port=5000,debug=True) + app.run(host=CompGSTServer.host, port=CompGSTServer.port, debug=True) + + diff --git a/compare_gst_excel.py b/compare_gst_excel.py deleted file mode 100644 index cda5a58..0000000 --- a/compare_gst_excel.py +++ /dev/null @@ -1,64 +0,0 @@ -# import pandas as pd - -# def normalize_row(row): -# return tuple( -# str(cell).strip().replace(".0", "") if isinstance(cell, float) and cell.is_integer() else str(cell).strip() -# for cell in row -# ) - -# def find_unmatched_rows(sheet1_df, sheet2_df): -# # Ensure column names are clean -# sheet1_df.columns = sheet1_df.columns.str.strip() -# sheet2_df.columns = sheet2_df.columns.str.strip() - -# # Normalize rows for comparison -# sheet1_normalized = sheet1_df.apply(normalize_row, axis=1) -# sheet2_normalized = sheet2_df.apply(normalize_row, axis=1) - -# # Find unmatched rows -# unmatched_in_sheet1 = sheet1_df[~sheet1_normalized.isin(sheet2_normalized)] -# unmatched_in_sheet2 = sheet2_df[~sheet2_normalized.isin(sheet1_normalized)] - -# # Mark source -# unmatched_in_sheet1["Source"] = "Sheet1" -# unmatched_in_sheet2["Source"] = "Sheet2" - -# # Combine -# unmatched_combined = pd.concat([unmatched_in_sheet1, unmatched_in_sheet2], ignore_index=True) -# return unmatched_combined -import pandas as pd - -def normalize_row(row): - return tuple( - str(cell).strip().replace(".0", "") if isinstance(cell, float) and cell.is_integer() else str(cell).strip() - for cell in row - ) - -def find_unmatched_rows(sheet1_df, sheet2_df): - # Clean column names - sheet1_df.columns = sheet1_df.columns.str.strip() - sheet2_df.columns = sheet2_df.columns.str.strip() - - # Choose the comparison columns - comparison_columns = ['Date', 'GSTIN/UIN'] - - # Ensure required columns exist - for col in comparison_columns: - if col not in sheet1_df.columns or col not in sheet2_df.columns: - raise ValueError(f"Missing column '{col}' in one of the sheets.") - - # Create keys for comparison - sheet1_keys = sheet1_df[comparison_columns].apply(normalize_row, axis=1) - sheet2_keys = sheet2_df[comparison_columns].apply(normalize_row, axis=1) - - # Find unmatched rows - unmatched_in_sheet1 = sheet1_df[~sheet1_keys.isin(sheet2_keys)].copy() - unmatched_in_sheet2 = sheet2_df[~sheet2_keys.isin(sheet1_keys)].copy() - - # Mark source - unmatched_in_sheet1["Source"] = "Sheet1" - unmatched_in_sheet2["Source"] = "Sheet2" - - # Combine - unmatched_combined = pd.concat([unmatched_in_sheet1, unmatched_in_sheet2], ignore_index=True) - return unmatched_combined diff --git a/unmatched_result.xlsx b/unmatched_result.xlsx index e28f586..937509f 100644 Binary files a/unmatched_result.xlsx and b/unmatched_result.xlsx differ