Use when asked to parse, normalize, standardize, or convert dates from various formats to consistent ISO 8601 or custom formats.
Parse and normalize dates from various formats into consistent, standardized formats for data cleaning and ETL pipelines.
Date standardization for:
from date_normalizer import DateNormalizer
# Normalize single date
normalizer = DateNormalizer()
result = normalizer.normalize("03/14/2024")
print(result) # {'normalized': '2024-03-14', 'format': 'iso8601'}
# Normalize to specific format
result = normalizer.normalize("March 14, 2024", output_format="us")
print(result) # {'normalized': '03/14/2024', 'format': 'us'}
# Batch normalize CSV column
normalizer.normalize_csv(
'data.csv',
date_column='created_at',
output='normalized.csv',
output_format='iso8601'
)
# Normalize single date
python date_normalizer.py --date "March 14, 2024"
# Convert to specific format
python date_normalizer.py --date "14/03/2024" --format us
# Normalize CSV column
python date_normalizer.py --csv data.csv --column date --format iso8601 --output normalized.csv
# Detect ambiguous dates
python date_normalizer.py --date "01/02/03" --detect-ambiguous
class DateNormalizer:
def normalize(self, date_string: str, output_format: str = 'iso8601',
dayfirst: bool = False, yearfirst: bool = False) -> Dict
def normalize_batch(self, dates: List[str], **kwargs) -> List[Dict]
def normalize_csv(self, csv_path: str, date_column: str,
output: str = None, **kwargs) -> str
def detect_format(self, date_string: str) -> str
def is_valid(self, date_string: str) -> bool
def is_ambiguous(self, date_string: str) -> bool
def parse_relative(self, relative_string: str) -> datetime
ISO 8601 (default):
'2024-03-14' # Date only
'2024-03-14T15:30:00' # With time
'2024-03-14T15:30:00+00:00' # With timezone
US Format:
'03/14/2024' # MM/DD/YYYY
EU Format:
'14/03/2024' # DD/MM/YYYY
Long Format:
'March 14, 2024'
Custom Format:
normalizer.normalize(date, output_format='%Y%m%d') # '20240314'
Numeric:
2024-03-14 (ISO)03/14/2024 (US)14/03/2024 (EU)14.03.2024 (German)2024/03/14 (Japanese)20240314 (Compact)Textual:
March 14, 202414 March 2024Mar 14, 202414-Mar-2024Relative:
today, yesterday, tomorrownext week, last month2 days ago, in 3 weeksWith Time:
2024-03-14 15:30:0003/14/2024 3:30 PM2024-03-14T15:30:00ZDates like 01/02/03 are ambiguous. Specify interpretation:
# Day first (EU)
normalizer.normalize("01/02/03", dayfirst=True)
# Result: 2003-02-01
# Month first (US)
normalizer.normalize("01/02/03", dayfirst=False)
# Result: 2003-01-02
# Year first
normalizer.normalize("01/02/03", yearfirst=True)
# Result: 2001-02-03
Clean Messy Data:
messy_dates = [
"March 14, 2024",
"2024-03-15",
"03/16/2024",
"17-Mar-2024"
]
normalized = normalizer.normalize_batch(messy_dates)
# All converted to: ['2024-03-14', '2024-03-15', '2024-03-16', '2024-03-17']
CSV Normalization:
# Input CSV with mixed date formats
# Convert all to ISO 8601
normalizer.normalize_csv(
'orders.csv',
date_column='order_date',
output='orders_normalized.csv',
output_format='iso8601'
)
Validation:
if not normalizer.is_valid("invalid date"):
print("Invalid date detected")
Timezone Conversion:
normalizer.normalize(
"2024-03-14 15:30:00+00:00",
output_timezone='America/New_York'
)