在Python中加载MNIST数据集可以通过多种方式实现,以下是几种常见的方法:
使用TensorFlow
import tensorflow as tf
from tensorflow.keras.datasets import mnist
加载MNIST数据集
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()
数据归一化
train_images = train_images / 255.0
test_images = test_images / 255.0
调整数据形状,增加通道维度(因为是灰度图像,通道数为 1)
train_images = train_images.reshape((60000, 28, 28, 1))
test_images = test_images.reshape((10000, 28, 28, 1))
使用TensorFlow Examples
from tensorflow.examples.tutorials.mnist import input_data
加载MNIST数据集
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
查看数据集形状
print(mnist.train.images.shape)
print(mnist.train.labels.shape)
print(mnist.validation.images.shape)
print(mnist.validation.labels.shape)
print(mnist.test.images.shape)
print(mnist.test.labels.shape)
使用PyTorch
import torch
import torchvision.transforms as transforms
from torchvision import datasets
from torch.utils.data import DataLoader
定义转换:将图像转为 Tensor 并标准化
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.5,), (0.5,))
])
下载 MNIST 数据集
train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)
创建数据加载器
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)
手动解压读取
import gzip
import struct
import numpy as np
def load_mnist_train(path):
labels_path = os.path.join(path, 'train-labels-idx1-ubyte.gz')
images_path = os.path.join(path, 'train-images-idx3-ubyte.gz')
使用gzip打开文件
with gzip.open(labels_path, 'rb') as lbpath:
使用struct.unpack方法读取前两个数据,>代表高位在前,I代表32位整型。
magic, n = struct.unpack('>II', lbpath.read(8))
使用np读取图片数据
labels = np.frombuffer(lbpath.read(), dtype=np.uint8)
with gzip.open(images_path, 'rb') as imgpath:
images = np.frombuffer(imgpath.read(), dtype=np.uint8).reshape(n, 784)
return images, labels
以上是几种在Python中加载MNIST数据集的方法,您可以根据您的需求选择合适的方法。