Test page
import os
import pandas as pd
import re
# Input Paths
INPUT_directory = r'D:\Data\OneDrive\ipradhan\OneDrive - CFTC\Ish-OneDrive\_SwapsErrorData\Testing'
input_file = 'Staging_v2_5_23_2025_Copy.csv'
input_path = os.path.join(INPUT_directory, input_file)
# Output Paths
OUTPUT_directory = r'D:\Data\OneDrive\ipradhan\OneDrive - CFTC\Ish-OneDrive\_SwapsErrorData\Testing'
output_file = 'Text_output_swap_asset_class_4.csv'
output_path = os.path.join(OUTPUT_directory, output_file)
# Load the csv file into the pandas dataframe
df_csv = pd.read_csv(input_path, dtype=str)
# Create a list of new columns that will be added to the DataFrame
new_columns = ['acl_credit', 'acl_equity', 'acl_fx', 'acl_ir', 'acl_comm', 'acl_other']
for col in new_columns:
df_csv[col] = ''
# Function to assign asset classes
def assign_asset_classes_to_column(col_val: str) -> dict:
result = {
'acl_credit': '',
'acl_equity': '',
'acl_fx': '',
'acl_ir': '',
'acl_comm': '',
'acl_other': ''
}
if pd.isna(col_val):
return result
col_upper = col_val.upper()
pattern = r'\b(?:FX|CO|IR|EQ|CR|COMMODITY|INTEREST RATE|FOREIGN EXCHANGE|COMMODITIES|' \
r'LOAN|SWAP|OPTION|EQUITY DERIVATIVE|CREDIT DEFAULT SWAP|FUTURES)\b'
known_matches = re.findall(pattern, col_upper)
all_words = re.findall(r'\b[A-Z][A-Z ]*\b', col_upper)
unknown_matches = [word for word in all_words if word not in known_matches]
for ac in known_matches:
if ac in ['FX', 'FOREIGN EXCHANGE']:
result['acl_fx'] = 'FX'
elif ac in ['CO', 'COMMODITY', 'COMMODITIES']:
result['acl_comm'] = 'CO'
elif ac in ['IR', 'INTEREST RATE']:
result['acl_ir'] = 'IR'
elif ac == 'EQ':
result['acl_equity'] = 'EQ'
elif ac == 'CR':
result['acl_credit'] = 'CR'
else:
result['acl_other'] = ac
if unknown_matches:
result['acl_other'] = 'OTHER'
return result
# Apply logic row-wise
asset_class_data = df_csv['swap_asset_class_4'].apply(assign_asset_classes_to_column)
asset_df = pd.DataFrame(asset_class_data.tolist())
df_csv.update(asset_df)
# Save to new CSV
df_csv.to_csv(output_path, index=False)
print(f'Processed file saved to: {output_path}')