from datetime import date
from datetime import datetime
# Create date
d = date(2017, 6, 21) # ISO format: YYYY-MM-DD
# Create a datetime
dt = datetime(year= 2017 , month= 10 , day= 1 , hour= 15 , minute= 23 , second= 25 , microsecond= 500000 )
# Change value of existing datetime
dt_changed = dt.replace(minute=0, second=0, microsecond=0)
# Sort date
dates_ordered = sorted(date_list)
# Parse datetime
dt = datetime.strptime("12/30/2017 15:19:13", "%m/%d/%Y %H:%M:%S")
d.isoformat() # Express the date in ISO 8601 format
print(d.strftime("%Y/%m/%d")) # Print date in Format: YYYY/MM/DD
print(dt.strftime("%Y-%m-%d %H:%M:%S")) # Print datetime in specific format
##### Date addition / subtraction
from datetime import timedelta
delta = d2 - d # Subtract two dates
delta.days # Elapsed time in days
delta.total_seconds() # Elapsed time in seconds
td = timedelta(days=29) # Create a 29 day timedelta
print(d + td) # Add delta with existing date
# timestamp value
ts = 1514665153.0
# Convert to datetime from timestamp and print
print(datetime.fromtimestamp(ts))
# Parsing date
df = pd.read_csv('filename.csv', parse_dates = ['date_col1', 'date_col2'], index_col='date_col3') # during import
df["date_col"] = pd.to_datetime(df["date_col"], format = "%Y-%m-%d %H:%M:%S", errors='coerce') # Using pandas format specified
df["date_col"] = df["date_col"].dt.strftime("%d-%m-%Y") # Using python library format specified
# Extract information
df["date_col"].dt.month # Extract month information
df["date_col"].dt.day_name() # Extract day name : Sunday, Monday etc
df["date_col"].dt.year # Extract year information
# Shift dates
df["date_col"].shift(periods=1) # Push values 1 row below, first value becomes null. LEAD
df["date_col"].shift(periods=-1) # Pull values 1 row above, last value becomes null. LAG
# Numeric operations on date column
df["date_co1l"].div(df["date_col2"]) # percentage changes between 2 date columns
df["date_co1l"].pct_change(periods=3) # percentage change of same date column after 3 shifts
df["date_co1l"].diff() # Difference in value between 2 adjacent rows of same column
df["date_co1l"].sub(1).mul(100) # subtracting 1 from the column, then multiply 100 with the column
# Creating date
time_stamp1 = pd.Timestamp(datetime(2017, 1, 1))
time_stamp2 = pd.Timestamp('2017-01-01')
# Creating period
period = pd.Period('2017-01') # default: month-end period
period + 2 # period after 2 unit ('2017-03' in this case)
period.asfreq('D') # convert to daily
period.to_timestamp() # Convert period to timestamp
timestamp_1.to_period('M') # Convert timestamp to period
# Add missing time values / change frequency (can be alternative to .asfreq)
monthly_dates = pd.date_range(start, end, freq="M")
monthly = pd.Series(data=df["x"], index=monthly_dates)
weekly_dates = pd.date_range(start, end, freq="W")
monthly.reindex(weekly_dates)
# Create time series
t_series = pd.date_range(start='2017-1-1', periods=12, freq='M')
df.set_index('date_col', inplace=True) # setting the time series as index of dataframe
# Sampling date (Make sure the index of dataframe is time series), try to always use .resample
timed_df.resample('DS').asfreq().agg(['mean']) # Down-sampling to day start using .resample and mean aggregation
timed_df.resample('1H').interpolate(method='linear') # Up-sampling with .resample, and fill missing values linearly
timed_df.asfreq('1H', method='ffill') # Up-sampling with .asfreq and fill missing values with forward fill
timed_df.asfreq(freq='3H', method='linear') # Down-sampling (less values, aggregated values, linearly interpolated)
timed_df.resample('M', on = 'date_col')['col1'].mean() # Standard syntax
resampled_df.size() # Resampling count
# Normalization and comparison of time series data
first_row = time_df.iloc[0]
normalized = time_df.div(first_row).mul(100)
comparison_df = normalized.sub(df['normalized_benchmark_series'], axis=0)
# Add timezone in a datetime column
df['date_col'] = df['date_col'].dt.tz_localize('America/New_York', ambiguous='NaT')
# Convert to another timezone
df['date_col'] = df['date_col'].dt.tz_convert('Europe/London')
# Window functions:
time_df.rolling(window='30D').agg(['mean', 'std']) # moving range / rolling window
time_df.expanding().agg(['mean', 'sum']) # expanding range / cumulative expanding window