Transformer - 时间特征的处理
flyfish
ETTm1.csv有如下内容
假如有2016/7/1 0:45:00
有这样的时间字符串,如何变成时间特征列表
from typing import List
import numpy as np
import pandas as pd
from pandas.tseries import offsets
from pandas.tseries.frequencies import to_offset
class TimeFeature:
def __init__(self):
pass
def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
pass
def __repr__(self):
return self.__class__.__name__ + "()"
class SecondOfMinute(TimeFeature):
"""Minute of hour encoded as value between [-0.5, 0.5]"""
def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
return index.second / 59.0 - 0.5
class MinuteOfHour(TimeFeature):
"""Minute of hour encoded as value between [-0.5, 0.5]"""
def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
return index.minute / 59.0 - 0.5
class HourOfDay(TimeFeature):
"""Hour of day encoded as value between [-0.5, 0.5]"""
def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
return index.hour / 23.0 - 0.5
class DayOfWeek(TimeFeature):
"""Hour of day encoded as value between [-0.5, 0.5]"""
def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
return index.dayofweek / 6.0 - 0.5
class DayOfMonth(TimeFeature):
"""Day of month encoded as value between [-0.5, 0.5]"""
def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
return (index.day - 1) / 30.0 - 0.5
class DayOfYear(TimeFeature):
"""Day of year encoded as value between [-0.5, 0.5]"""
def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
return (index.dayofyear - 1) / 365.0 - 0.5
class MonthOfYear(TimeFeature):
"""Month of year encoded as value between [-0.5, 0.5]"""
def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
return (index.month - 1) / 11.0 - 0.5
class WeekOfYear(TimeFeature):
"""Week of year encoded as value between [-0.5, 0.5]"""
def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
return (index.isocalendar().week - 1) / 52.0 - 0.5
def time_features_from_frequency_str(freq_str: str) -> List[TimeFeature]:
"""
Returns a list of time features that will be appropriate for the given frequency string.
Parameters
----------
freq_str
Frequency string of the form [multiple][granularity] such as "12H", "5min", "1D" etc.
"""
features_by_offsets = {
offsets.YearEnd: [],
offsets.QuarterEnd: [MonthOfYear],
offsets.MonthEnd: [MonthOfYear],
offsets.Week: [DayOfMonth, WeekOfYear],
offsets.Day: [DayOfWeek, DayOfMonth, DayOfYear],
offsets.BusinessDay: [DayOfWeek, DayOfMonth, DayOfYear],
offsets.Hour: [HourOfDay, DayOfWeek, DayOfMonth, DayOfYear],
offsets.Minute: [
MinuteOfHour,
HourOfDay,
DayOfWeek,
DayOfMonth,
DayOfYear,
],
offsets.Second: [
SecondOfMinute,
MinuteOfHour,
HourOfDay,
DayOfWeek,
DayOfMonth,
DayOfYear,
],
}
offset = to_offset(freq_str)
for offset_type, feature_classes in features_by_offsets.items():
if isinstance(offset, offset_type):
return [cls() for cls in feature_classes]
supported_freq_msg = f"""
Unsupported frequency {freq_str}
The following frequencies are supported:
Y - yearly
alias: A
M - monthly
W - weekly
D - daily
B - business days
H - hourly
T - minutely
alias: min
S - secondly
"""
raise RuntimeError(supported_freq_msg)
def printf_time_features():
freq="h"
dates=pd.to_datetime("2016/7/1 0:45:00")
for feat in time_features_from_frequency_str(freq):
print(feat,"\n")
print(feat(dates))
printf_time_features()
#返回适用于给定频率字符串的时间特征列表
# 频率字符串举例
# Y - yearly
# alias: A
# M - monthly
# W - weekly
# D - daily
# B - business days
# H - hourly
# T - minutely
# alias: min
# S - secondly
输出
输出4特征,时间字符串将编码为[-0.5,0.5]
之间的值
# HourOfDay()
# -0.5
# DayOfWeek()
# 0.16666666666666663
# DayOfMonth()
# -0.5
# DayOfYear()
# -0.0013698630136986245
batch_x_mark: tensor([[[-0.5000, 0.1667, -0.5000, -0.0014],
[-0.5000, 0.1667, -0.5000, -0.0014],
[-0.5000, 0.1667, -0.5000, -0.0014],
[-0.5000, 0.1667, -0.5000, -0.0014],
[-0.4565, 0.1667, -0.5000, -0.0014],
[-0.4565, 0.1667, -0.5000, -0.0014],
[-0.4565, 0.1667, -0.5000, -0.0014],
[-0.4565, 0.1667, -0.5000, -0.0014],
[-0.4130, 0.1667, -0.5000, -0.0014],
[-0.4130, 0.1667, -0.5000, -0.0014],
[-0.4130, 0.1667, -0.5000, -0.0014],
[-0.4130, 0.1667, -0.5000, -0.0014],
[-0.3696, 0.1667, -0.5000, -0.0014],
[-0.3696, 0.1667, -0.5000, -0.0014],
[-0.3696, 0.1667, -0.5000, -0.0014],
[-0.3696, 0.1667, -0.5000, -0.0014],
[-0.3261, 0.1667, -0.5000, -0.0014],
[-0.3261, 0.1667, -0.5000, -0.0014],
[-0.3261, 0.1667, -0.5000, -0.0014],
[-0.3261, 0.1667, -0.5000, -0.0014],
[-0.2826, 0.1667, -0.5000, -0.0014],
[-0.2826, 0.1667, -0.5000, -0.0014],
[-0.2826, 0.1667, -0.5000, -0.0014],
[-0.2826, 0.1667, -0.5000, -0.0014]]])
batch_y_mark: tensor([[[-0.3696, 0.1667, -0.5000, -0.0014],
[-0.3696, 0.1667, -0.5000, -0.0014],
[-0.3696, 0.1667, -0.5000, -0.0014],
[-0.3696, 0.1667, -0.5000, -0.0014],
[-0.3261, 0.1667, -0.5000, -0.0014],
[-0.3261, 0.1667, -0.5000, -0.0014],
[-0.3261, 0.1667, -0.5000, -0.0014],
[-0.3261, 0.1667, -0.5000, -0.0014],
[-0.2826, 0.1667, -0.5000, -0.0014],
[-0.2826, 0.1667, -0.5000, -0.0014],
[-0.2826, 0.1667, -0.5000, -0.0014],
[-0.2826, 0.1667, -0.5000, -0.0014],
[-0.2391, 0.1667, -0.5000, -0.0014],
[-0.2391, 0.1667, -0.5000, -0.0014],
[-0.2391, 0.1667, -0.5000, -0.0014],
[-0.2391, 0.1667, -0.5000, -0.0014],
[-0.1957, 0.1667, -0.5000, -0.0014],
[-0.1957, 0.1667, -0.5000, -0.0014],
[-0.1957, 0.1667, -0.5000, -0.0014],
[-0.1957, 0.1667, -0.5000, -0.0014],
[-0.1522, 0.1667, -0.5000, -0.0014],
[-0.1522, 0.1667, -0.5000, -0.0014],
[-0.1522, 0.1667, -0.5000, -0.0014],
[-0.1522, 0.1667, -0.5000, -0.0014],
[-0.1087, 0.1667, -0.5000, -0.0014],
[-0.1087, 0.1667, -0.5000, -0.0014],
[-0.1087, 0.1667, -0.5000, -0.0014],
[-0.1087, 0.1667, -0.5000, -0.0014],
[-0.0652, 0.1667, -0.5000, -0.0014],
[-0.0652, 0.1667, -0.5000, -0.0014],
[-0.0652, 0.1667, -0.5000, -0.0014],
[-0.0652, 0.1667, -0.5000, -0.0014],
[-0.0217, 0.1667, -0.5000, -0.0014],
[-0.0217, 0.1667, -0.5000, -0.0014],
[-0.0217, 0.1667, -0.5000, -0.0014],
[-0.0217, 0.1667, -0.5000, -0.0014]]])
查看配置
打印属性值
print('\n'.join(['%s:%s' % item for item in self.__dict__.items()]) )
seq_len:24
label_len:12
pred_len:24
set_type:0
features:M
target:OT
scale:True
timeenc:1
freq:h
root_path:./dataset/ETT-small/
data_path:ETTm1.csv
scaler:StandardScaler()
batch_x, batch_y, batch_x_mark, batch_y_mark各自的形状
for i, (batch_x, , , ): torch.Size([1, 24, 7])
for i, (, batch_y, , ): torch.Size([1, 36, 7])
for i, (, , batch_x_mark, ): torch.Size([1, 24, 4])
for i, (, , , batch_y_mark): torch.Size([1, 36, 4])