可以调用此部分读取数据,数据的第一列为标签,用0,1表示类别
import pandas as pd import torch import torch.utils.data as Data import numpy as np class TrainDataset(Data.Dataset): def __init__(self,path): source = np.array(pd.read_csv(path, header=None)) col = len(source[0]) row = len(source.T[0]) source_data = source[0: row, 1: col] source_label = source[0: row, 0: 1] source = source_data.reshape(row,1,15200) self.data = np.insert(source, 0, values=source_label, axis=2) def __getitem__(self,idx): data_ori = torch.from_numpy(self.data[idx].T) # torch.Size([15201, 1]) data = data_ori[1::].T # torch.Size([1, 15200]) label = data_ori[0] # 取data_ori的0行作为标签 return data, label # 返回数据和标签 def __len__(self): # 返回数据长度 return len(self.data)此部分也可以调用,输出为类别数
import torch.nn as nn import torch.nn.functional as F class RNN(nn.Module): def __init__(self, input_size, lstm_output_size, hidden_size , output_size): super(RNN, self).__init__() self.rnn = nn.LSTM(input_size, lstm_output_size) # input_size:输入特征的数目 # hidden_size:隐层的特征数目 # num_layers:这个是模型集成的LSTM的个数 记住这里是模型中有多少个LSTM摞起来 一般默认就1个 # bias:用不用偏置,默认是用 # batch_first:默认为假 若输入为二维数组,则应设此处为真 # dropout:默认0 若非0,则为dropout率;如果num_layers = 1 dropout应当为0 # bidirectional:是否为双向LSTM,默认为否(num_directions,默认为1) self.r2h = nn.Linear(lstm_output_size, hidden_size ) self.h2o = nn.Linear(hidden_size , output_size) def forward(self, input): hidden, (hn,cn) = self.rnn(input) # output, (h_n,c_n) = rnn(input, (h0,c0)) # input的格式为(seq_len, batch, input_size)这里默认batch_first为false,否则前两个换顺序 # h_0的格式为(num_layers * num_directions, batch, hidden_size) # c_0的格式为(seq_len, batch, input_size) # 若h_0和c_0不提供,则默认为0 # output的格式为(seq_len, batch, num_directions * hidden_size)(num_directions,默认为1) # h_n, c_n的格式为(num_layers * num_directions, batch, hidden_size) # self.rnn = nn.GRU(input_size, hidden_szie1, 1, dropout = 0.2) # hidden = torch.Size([1, 32, 20]) # h_n = torch.Size([1, 32, 20]) # c_n = torch.Size([1, 32, 20]) fc1 = F.relu(self.r2h(hidden)) # torch.Size([1, 32, 5]) output = self.h2o(fc1) # torch.Size([1, 32, 2]) output = F.softmax(output,dim=2) # torch.Size([1, 32, 2]) (概率表示:行加起来为1) output = output.transpose(0, 1).contiguous() return output此部分也可以调用,输出为交叉熵损失
import torch.nn as nn def train(input_variable, target_variable, rnn, rnn_optimizer, criterion = nn.CrossEntropyLoss()): rnn_optimizer.zero_grad() rnn_output = rnn(input_variable) # print(cnn_output.shape) # torch.Size([32, 1, 2]) # print(target_variable) loss = criterion(rnn_output, target_variable) # 交叉熵 loss.backward() rnn_optimizer.step() return loss def test(input_variable, rnn): rnn_output = rnn(input_variable) top_n, top_i = rnn_output.data.topk(1,largest=False) return top_i[0][0]结果如下: