# 3D CNN

使用MNIST-3D来掌握3D CNN。

三个通道:

from keras.layers import Conv3D, MaxPool3D, Flatten, Dense
from keras.layers import Dropout, Input, BatchNormalization
from sklearn.metrics import confusion_matrix, accuracy_score
from plotly.offline import iplot, init_notebook_mode
from keras.losses import categorical_crossentropy
from keras.optimizers import Adadelta
import plotly.graph_objs as go
from matplotlib.pyplot import cm
from keras.models import Model
import numpy as np
import keras
import h5py


# 读入数据
dataset = h5py.File('./input/full_dataset_vectors.h5', 'r')

x_train = dataset["X_train"][:]
x_test = dataset["X_test"][:]

y_train = dataset["y_train"][:]
y_test = dataset["y_test"][:]


print ("x_train shape: ", x_train.shape)
print ("y_train shape: ", y_train.shape)

print ("x_test shape:  ", x_test.shape)
print ("y_test shape:  ", y_test.shape)

# 观察输入数据: 将三维数据 flatten 成了一维数据
# x_train shape:  (10000, 4096) 训练集:10000张3d图片, 每一张3d图片的形状是16x16x16
# y_train shape:  (10000,) 
# x_test shape:   (2000, 4096)  测试集:2000张3d图片,  每一张3d图片的形状是16x16x16
# y_test shape:   (2000,)

# 可视化数据
# with h5py.File("./input/train_point_clouds.h5", "r") as points_dataset:        
#     digits = []
#     for i in range(10):
#         digit = (points_dataset[str(i)]["img"][:], 
#                  points_dataset[str(i)]["points"][:], 
#                  points_dataset[str(i)].attrs["label"]) 
#         digits.append(digit)
        
# x_c = [r[0] for r in digits[0][1]]
# y_c = [r[1] for r in digits[0][1]]
# z_c = [r[2] for r in digits[0][1]]
# trace1 = go.Scatter3d(x=x_c, y=y_c, z=z_c, mode='markers', 
#                       marker=dict(size=12, color=z_c, colorscale='Viridis', opacity=0.7))

# data = [trace1]
# layout = go.Layout(height=500, width=600, title= "Digit: "+str(digits[0][2]) + " in 3D space")
# fig = go.Figure(data=data, layout=layout)
# iplot(fig)
    
# 要使用2D的卷积,我们首先将每一张图片转化成3D的形状: width, height, channel(r/g/b).
# 要使用3D的卷积,我们首先将每一张图片转化成4D的形状: length, breadth, height, channel(r/g/b).

# np.ndarray的意思是N dimensional array  
## Introduce the channel dimention in the input dataset 
xtrain = np.ndarray((x_train.shape[0], 4096, 3)) # 这里的(10000, 4096, 3)是ndarray的形状,随机初始化
xtest = np.ndarray((x_test.shape[0], 4096, 3))
print('x_train.shape[0]', x_train.shape[0]) # 10000
print('x_test.shape[0]', x_test.shape[0]) # 2000

## 这里有点晕,到时可以只用一个通道便好
## iterate in train and test, add the rgb dimention 
def add_rgb_dimention(array):
    scaler_map = cm.ScalarMappable(cmap="Oranges")
    array = scaler_map.to_rgba(array)[:, : -1]
    return array
for i in range(x_train.shape[0]):
    xtrain[i] = add_rgb_dimention(x_train[i])
for i in range(x_test.shape[0]):
    xtest[i] = add_rgb_dimention(x_test[i])

## convert to 1 + 4D space (1st argument represents number of rows in the dataset)
xtrain = xtrain.reshape(x_train.shape[0], 16, 16, 16, 3)
xtest = xtest.reshape(x_test.shape[0], 16, 16, 16, 3)

# Label变成One-Hot的
## convert target variable into one-hot
y_train = keras.utils.to_categorical(y_train, 10)
y_test = keras.utils.to_categorical(y_test, 10)

# (10000,10)
print(y_train.shape)


# 搭建神经网络结构
## input layer
input_layer = Input((16, 16, 16, 3))

## convolutional layers
conv_layer1 = Conv3D(filters=8, kernel_size=(3, 3, 3), activation='relu')(input_layer)
conv_layer2 = Conv3D(filters=16, kernel_size=(3, 3, 3), activation='relu')(conv_layer1)

## add max pooling to obtain the most imformatic features
pooling_layer1 = MaxPool3D(pool_size=(2, 2, 2))(conv_layer2)

conv_layer3 = Conv3D(filters=32, kernel_size=(3, 3, 3), activation='relu')(pooling_layer1)
conv_layer4 = Conv3D(filters=64, kernel_size=(3, 3, 3), activation='relu')(conv_layer3)
pooling_layer2 = MaxPool3D(pool_size=(2, 2, 2))(conv_layer4)

## perform batch normalization on the convolution outputs before feeding it to MLP architecture
pooling_layer2 = BatchNormalization()(pooling_layer2)
flatten_layer = Flatten()(pooling_layer2)

## create an MLP architecture with dense layers : 4096 -> 512 -> 10
## add dropouts to avoid overfitting / perform regularization
dense_layer1 = Dense(units=2048, activation='relu')(flatten_layer)
dense_layer1 = Dropout(0.4)(dense_layer1)
dense_layer2 = Dense(units=512, activation='relu')(dense_layer1)
dense_layer2 = Dropout(0.4)(dense_layer2)
output_layer = Dense(units=10, activation='softmax')(dense_layer2)

## define the model with input layer and output layer
model = Model(inputs=input_layer, outputs=output_layer)


model.compile(loss=categorical_crossentropy, optimizer=Adadelta(lr=0.1), metrics=['acc'])
model.fit(x=xtrain, y=y_train, batch_size=128, epochs=50, validation_split=0.2)

pred = model.predict(xtest)
pred = np.argmax(pred, axis=1)
print(pred) 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127

其实对于识别字符,多个通道没有什么意义,因此我们使用单通道来实现:

from keras.layers import Conv3D, MaxPool3D, Flatten, Dense
from keras.layers import Dropout, Input, BatchNormalization
from sklearn.metrics import confusion_matrix, accuracy_score
from plotly.offline import iplot, init_notebook_mode
from keras.losses import categorical_crossentropy
from keras.optimizers import Adadelta
import plotly.graph_objs as go
from matplotlib.pyplot import cm
from keras.models import Model
import numpy as np
import keras
import h5py


# 读入数据
dataset = h5py.File('./input/full_dataset_vectors.h5', 'r')

x_train = dataset["X_train"][:]
x_test = dataset["X_test"][:]

y_train = dataset["y_train"][:]
y_test = dataset["y_test"][:]


print ("x_train shape: ", x_train.shape)
print ("y_train shape: ", y_train.shape)

print ("x_test shape:  ", x_test.shape)
print ("y_test shape:  ", y_test.shape)

    
# 要使用2D的卷积,我们首先将每一张图片转化成3D的形状: width, height, channel(r/g/b).
# 要使用3D的卷积,我们首先将每一张图片转化成4D的形状: length, breadth, height, channel(r/g/b).

# np.ndarray的意思是N dimensional array  
## Introduce the channel dimention in the input dataset 
xtrain = np.ndarray((x_train.shape[0], 4096, 1)) # 这里的(10000, 4096, 1)是ndarray的形状,随机初始化
xtest = np.ndarray((x_test.shape[0], 4096, 1))
print('x_train.shape[0]', x_train.shape[0]) # 10000
print('x_test.shape[0]', x_test.shape[0]) # 2000


for i in range(x_train.shape[0]):
    xtrain[i] = x_train[i].reshape(4096,1)
for i in range(x_test.shape[0]):
    xtest[i] = x_test[i].reshape(4096,1)

## convert to 1 + 4D space (1st argument represents number of rows in the dataset)
xtrain = xtrain.reshape(x_train.shape[0], 16, 16, 16, 1)
xtest = xtest.reshape(x_test.shape[0], 16, 16, 16, 1)

# Label变成One-Hot的
## convert target variable into one-hot
y_train = keras.utils.to_categorical(y_train, 10)
y_test = keras.utils.to_categorical(y_test, 10)

# (10000,10)
print(y_train.shape)


# 搭建神经网络结构
## input layer
input_layer = Input((16, 16, 16, 1))

## convolutional layers
conv_layer1 = Conv3D(filters=8, kernel_size=(3, 3, 3), activation='relu')(input_layer)
conv_layer2 = Conv3D(filters=16, kernel_size=(3, 3, 3), activation='relu')(conv_layer1)

## add max pooling to obtain the most imformatic features
pooling_layer1 = MaxPool3D(pool_size=(2, 2, 2))(conv_layer2)

conv_layer3 = Conv3D(filters=32, kernel_size=(3, 3, 3), activation='relu')(pooling_layer1)
conv_layer4 = Conv3D(filters=64, kernel_size=(3, 3, 3), activation='relu')(conv_layer3)
pooling_layer2 = MaxPool3D(pool_size=(2, 2, 2))(conv_layer4)

## perform batch normalization on the convolution outputs before feeding it to MLP architecture
pooling_layer2 = BatchNormalization()(pooling_layer2)
flatten_layer = Flatten()(pooling_layer2)

## create an MLP architecture with dense layers : 4096 -> 512 -> 10
## add dropouts to avoid overfitting / perform regularization
dense_layer1 = Dense(units=2048, activation='relu')(flatten_layer)
dense_layer1 = Dropout(0.4)(dense_layer1)
dense_layer2 = Dense(units=512, activation='relu')(dense_layer1)
dense_layer2 = Dropout(0.4)(dense_layer2)
output_layer = Dense(units=10, activation='softmax')(dense_layer2)

## define the model with input layer and output layer
model = Model(inputs=input_layer, outputs=output_layer)


model.compile(loss=categorical_crossentropy, optimizer=Adadelta(lr=0.1), metrics=['acc'])
model.fit(x=xtrain, y=y_train, batch_size=128, epochs=50, validation_split=0.2)

pred = model.predict(xtest)
pred = np.argmax(pred, axis=1)
print(pred) 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97

最后我写了一个数据可视化的程序:

from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
import numpy as np
import h5py


def show3d_character(img_3d, title):
    fig = plt.figure()
    ax = fig.add_subplot(111, projection='3d')
    ax.grid(False)
    ax.set_xlabel('x')
    ax.set_ylabel('y')
    ax.set_zlabel('z')
    ax.set_xlim(0, 15)
    ax.set_ylim(0, 15)
    ax.set_zlim(0, 15)
    ax.set_title(title)
    box_size = 16
    x = []
    y = []
    z = [] 
    for i in range(box_size):
        for j in range(box_size):
            for k in range(box_size):
                if img_3d[j,i,k] > 0.1:
                    x.append(i)
                    y.append(j)
                    z.append(k)
                
    img = ax.scatter( z, x, y, c=img_3d[x,y,z],s=10, cmap=plt.hot())
    fig.colorbar(img)
    plt.show()


dataset = h5py.File('./input/full_dataset_vectors.h5', 'r')

x_test = dataset["X_test"][:]
y_test = dataset["y_test"][:]

for index in range(10):
    img_3d = np.reshape(x_test[index],(16,16,16))
    img_3d_label = y_test[index]
    show3d_character(img_3d, str(img_3d_label) + ' in 3D')
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43

数据集
参考