TP1: Introduction à PyTorch (Correction)¶
Avant d'utiliser PyTorch, on va commencer par résoudre un problème de régression polynômiale d'une fonction sur un intervalle borné à l'aide de Numpy. Ensuite, nous verrons comment resoudre ce même problème à l'aire de PyTorch.
On va se donner un jeu de donnée $(x_1, y_1),\dots,(x_n,y_n)$, où les $x_1,\dots,x_n$ correspondant à une grille uniforme de $[-\pi,\pi]$ (données d'entrée), et les $y_1,\dots,y_n$ (données de sortie) sont données par
$$ y_j = \sin(x_j) + \varepsilon_j \qquad j\in\{1,\dots,n\}, $$
où les $\varepsilon_j$ sont des variables aléatoires i.i.d de loi $N(0,\sigma^2)$, et modélisent des erreurs de mesures.
L'idée est de minimiser le coût/risque quadratique en $\theta=(a,b,c,d)$
$$ R_n(\theta) = \sum_{j=1}^n (y_j - P_\theta(x_j))^2 \qquad\text{où}\qquad P_\theta(x) = a + bx + cx^2 + dx^3$$
pour determiner les coefficients du polynôme. On va donc chercher
$$ \theta^\ast = \underset{\theta}{\text{argmin }} R_n(\theta). $$
Pour cela, on peut utiliser une descente de gradient, qui est une méthode itérative pour trouver $\theta^\ast$, dont les étapes sont les suivantes:
- pour $k$ allant de 1 à $n_{iter}$ on effectue l'opération $ \theta_{k} = \theta_{k-1} - \eta \nabla_{\theta} R_n(\theta_{k-1})$,
où $\eta$ est un paramètre qu'on appelle le learning rate. On reviendra sur cette méthode plus tard dans le cours.
import numpy as np
from math import pi
import matplotlib.pyplot as plt
import torch
x = torch.linspace(-pi,pi, 50)
y = torch.sin(x) + 0.1 * torch.randn(len(x))
Avec PyTorch v2¶
On va définir proprement un model via le module nn, ainsi qu'une fonction de coût.
p = torch.tensor([1, 2, 3])
xx = x.unsqueeze(-1).pow(p) # renvoie (x, x^2, x^3)
xx
tensor([[-3.1416e+00, 9.8696e+00, -3.1006e+01], [-3.0134e+00, 9.0804e+00, -2.7362e+01], [-2.8851e+00, 8.3240e+00, -2.4016e+01], [-2.7569e+00, 7.6005e+00, -2.0954e+01], [-2.6287e+00, 6.9100e+00, -1.8164e+01], [-2.5005e+00, 6.2523e+00, -1.5633e+01], [-2.3722e+00, 5.6274e+00, -1.3350e+01], [-2.2440e+00, 5.0355e+00, -1.1300e+01], [-2.1158e+00, 4.4765e+00, -9.4712e+00], [-1.9875e+00, 3.9503e+00, -7.8514e+00], [-1.8593e+00, 3.4570e+00, -6.4277e+00], [-1.7311e+00, 2.9966e+00, -5.1874e+00], [-1.6029e+00, 2.5691e+00, -4.1180e+00], [-1.4746e+00, 2.1745e+00, -3.2066e+00], [-1.3464e+00, 1.8128e+00, -2.4407e+00], [-1.2182e+00, 1.4839e+00, -1.8077e+00], [-1.0899e+00, 1.1880e+00, -1.2948e+00], [-9.6171e-01, 9.2489e-01, -8.8948e-01], [-8.3348e-01, 6.9470e-01, -5.7902e-01], [-7.0526e-01, 4.9739e-01, -3.5078e-01], [-5.7703e-01, 3.3296e-01, -1.9213e-01], [-4.4880e-01, 2.0142e-01, -9.0397e-02], [-3.2057e-01, 1.0277e-01, -3.2944e-02], [-1.9234e-01, 3.6996e-02, -7.1158e-03], [-6.4114e-02, 4.1106e-03, -2.6355e-04], [ 6.4114e-02, 4.1106e-03, 2.6355e-04], [ 1.9234e-01, 3.6996e-02, 7.1158e-03], [ 3.2057e-01, 1.0277e-01, 3.2944e-02], [ 4.4880e-01, 2.0142e-01, 9.0397e-02], [ 5.7703e-01, 3.3296e-01, 1.9213e-01], [ 7.0526e-01, 4.9739e-01, 3.5078e-01], [ 8.3348e-01, 6.9470e-01, 5.7902e-01], [ 9.6171e-01, 9.2489e-01, 8.8948e-01], [ 1.0899e+00, 1.1880e+00, 1.2948e+00], [ 1.2182e+00, 1.4839e+00, 1.8077e+00], [ 1.3464e+00, 1.8128e+00, 2.4407e+00], [ 1.4746e+00, 2.1745e+00, 3.2066e+00], [ 1.6029e+00, 2.5691e+00, 4.1180e+00], [ 1.7311e+00, 2.9966e+00, 5.1874e+00], [ 1.8593e+00, 3.4570e+00, 6.4277e+00], [ 1.9875e+00, 3.9503e+00, 7.8514e+00], [ 2.1158e+00, 4.4765e+00, 9.4712e+00], [ 2.2440e+00, 5.0355e+00, 1.1300e+01], [ 2.3722e+00, 5.6274e+00, 1.3350e+01], [ 2.5005e+00, 6.2523e+00, 1.5633e+01], [ 2.6287e+00, 6.9100e+00, 1.8164e+01], [ 2.7569e+00, 7.6005e+00, 2.0954e+01], [ 2.8851e+00, 8.3240e+00, 2.4016e+01], [ 3.0134e+00, 9.0804e+00, 2.7362e+01], [ 3.1416e+00, 9.8696e+00, 3.1006e+01]])
# on définit le modèle
model = torch.nn.Sequential( # on appelle des fonction de manière séquentielle
torch.nn.Linear(3,1), # renvoie a + b * x + c * x^2 + d * x^3
torch.nn.Flatten(0,1) # transforme la sortie de la couche linear en un tenseur 1D
)
torch.nn.Linear(3,1)(xx)
tensor([[ 19.0719], [ 16.9491], [ 14.9894], [ 13.1861], [ 11.5327], [ 10.0226], [ 8.6491], [ 7.4057], [ 6.2859], [ 5.2829], [ 4.3902], [ 3.6013], [ 2.9094], [ 2.3081], [ 1.7908], [ 1.3508], [ 0.9816], [ 0.6765], [ 0.4290], [ 0.2325], [ 0.0803], [ -0.0340], [ -0.1171], [ -0.1757], [ -0.2161], [ -0.2452], [ -0.2695], [ -0.2954], [ -0.3298], [ -0.3791], [ -0.4499], [ -0.5488], [ -0.6824], [ -0.8574], [ -1.0803], [ -1.3576], [ -1.6961], [ -2.1022], [ -2.5826], [ -3.1438], [ -3.7925], [ -4.5352], [ -5.3786], [ -6.3292], [ -7.3937], [ -8.5785], [ -9.8904], [-11.3358], [-12.9215], [-14.6539]], grad_fn=<AddmmBackward0>)
torch.nn.Flatten(0,1)(torch.nn.Linear(3,1)(xx))
tensor([ 0.1425, 0.1441, 0.1478, 0.1529, 0.1593, 0.1663, 0.1737, 0.1810, 0.1879, 0.1939, 0.1987, 0.2017, 0.2028, 0.2013, 0.1970, 0.1895, 0.1782, 0.1629, 0.1432, 0.1185, 0.0887, 0.0531, 0.0115, -0.0366, -0.0916, -0.1538, -0.2237, -0.3017, -0.3881, -0.4834, -0.5880, -0.7022, -0.8265, -0.9613, -1.1070, -1.2639, -1.4325, -1.6132, -1.8063, -2.0123, -2.2316, -2.4646, -2.7117, -2.9732, -3.2496, -3.5413, -3.8486, -4.1720, -4.5120, -4.8687], grad_fn=<ViewBackward0>)
# On définit notre fonction de coût comme un moyenne quadratique
loss_fct = torch.nn.MSELoss(reduction='sum')
learning_rate = 1e-4
n_iter = 2000
loss_plot = np.zeros(n_iter)
for t in range(n_iter):
y_pred = model(xx)
loss = loss_fct(y_pred, y)
loss_plot[t] = loss
if t%100 == 99:
print("iteration=%s, loss=%s" % (t,loss.item()))
# On met à zéro les gradients avant de les calculer.
model.zero_grad()
# On calcul les gradients
loss.backward()
# On met à jour les paramètres
# On met en veille l'autodifferentiation (car on le fait explicitement ici)
with torch.no_grad():
for param in model.parameters():
param -= learning_rate * param.grad
iteration=99, loss=1.293859601020813 iteration=199, loss=0.9065651297569275 iteration=299, loss=0.7683510184288025 iteration=399, loss=0.7186383605003357 iteration=499, loss=0.7006038427352905 iteration=599, loss=0.6939988136291504 iteration=699, loss=0.6915556192398071 iteration=799, loss=0.690642237663269 iteration=899, loss=0.6902969479560852 iteration=999, loss=0.6901649832725525 iteration=1099, loss=0.6901141405105591 iteration=1199, loss=0.6900944709777832 iteration=1299, loss=0.6900864243507385 iteration=1399, loss=0.6900834441184998 iteration=1499, loss=0.6900821924209595 iteration=1599, loss=0.6900817155838013 iteration=1699, loss=0.6900814771652222 iteration=1799, loss=0.6900812983512878 iteration=1899, loss=0.6900813579559326 iteration=1999, loss=0.6900812387466431
# pour accéder aux coefficients a, b, c et d.
linear_layer = model[0]
print(
linear_layer.bias.item(), # pour a
linear_layer.weight[:,0].item(), # pour b
linear_layer.weight[:,1].item(), # pour c
linear_layer.weight[:,2].item(), # pour d
)
-0.015817703679203987 0.8378247022628784 0.005710783880203962 -0.09147634357213974
plt.plot(x, y, "o", label="données")
plt.plot(x, y_pred.detach(), "o", label="prédictions")
plt.plot(x, y0, lw=2, label="ground truth")
plt.xlabel("x")
plt.legend()
plt.show()
--------------------------------------------------------------------------- NameError Traceback (most recent call last) /tmp/ipykernel_52349/424906192.py in <module> 1 plt.plot(x, y, "o", label="données") 2 plt.plot(x, y_pred.detach(), "o", label="prédictions") ----> 3 plt.plot(x, y0, lw=2, label="ground truth") 4 plt.xlabel("x") 5 plt.legend() NameError: name 'y0' is not defined
plt.plot(loss_plot[10:], label="loss values")
plt.xlabel("nbre epoch")
plt.legend()
plt.show()
Avec PyTorch v3¶
La phase de mise à jour des poids, c'est à dire la descente de gradient, peut se faire automatiquement et non manuellement à l'aide de la librairie Optim.
p = torch.tensor([1, 2, 3])
xx = x.unsqueeze(-1).pow(p)
model = torch.nn.Sequential(
torch.nn.Linear(3,1),
torch.nn.Flatten(0,1)
)
loss_fct = torch.nn.MSELoss(reduction='sum')
learning_rate = 1e-4
# Pour la descente de gradient
# on va utiliser l'algorithme de descente de gradient stochastique que l'on verra plus tard
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
n_iter = 2000
loss_plot = np.zeros(n_iter)
for t in range(n_iter):
y_pred = model(xx)
loss = loss_fct(y_pred, y)
loss_plot[t] = loss
if t%100 == 99:
print("iteration=%s, loss=%s" % (t,loss.item()))
# On met à zéro les gradients avant de les calculer.
optimizer.zero_grad()
# On calcul les gradients
loss.backward()
# On met à jour les paramètres
optimizer.step()
linear_layer = model[0]
print(
linear_layer.bias.item(), # pour a
linear_layer.weight[:,0].item(), # pour b
linear_layer.weight[:,1].item(), # pour c
linear_layer.weight[:,2].item(), # pour d
)
-0.015817703679203987 0.8378247022628784 0.005710783880203962 -0.09147634357213974
plt.plot(x, y, "o", label="données")
plt.plot(x, y_pred.detach(), "o", label="prédictions")
plt.plot(x, y0, lw=2, label="ground truth")
plt.xlabel("x")
plt.legend()
plt.show()
--------------------------------------------------------------------------- NameError Traceback (most recent call last) /tmp/ipykernel_52349/424906192.py in <module> 1 plt.plot(x, y, "o", label="données") 2 plt.plot(x, y_pred.detach(), "o", label="prédictions") ----> 3 plt.plot(x, y0, lw=2, label="ground truth") 4 plt.xlabel("x") 5 plt.legend() NameError: name 'y0' is not defined
plt.plot(loss_plot[10:], label="loss values")
plt.xlabel("nbre epoch")
plt.legend()
plt.show()
Avec PyTorch v4¶
Il est parfois préférable d'écrire une sous-classe pour plus de fléxibilité.
class Poly3(torch.nn.Module):
def __init__(self):
super().__init__()
self.a = torch.nn.Parameter(torch.randn(()))
self.b = torch.nn.Parameter(torch.randn(()))
self.c = torch.nn.Parameter(torch.randn(()))
self.d = torch.nn.Parameter(torch.randn(()))
def forward(self,x):
return self.a + self.b*x + self.c*x**2 + self.d*x**3
def string(self):
print(
self.a.item(),
self.b.item(),
self.c.item(),
self.d.item(),
)
model = Poly3()
loss_fct = torch.nn.MSELoss(reduction='sum')
learning_rate = 1e-4
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
n_iter = 2000
loss_plot = np.zeros(n_iter)
for t in range(n_iter):
y_pred = model(x)
loss = loss_fct(y_pred, y)
loss_plot[t] = loss
if t%100 == 99:
print("iteration=%s, loss=%s" % (t,loss.item()))
optimizer.zero_grad()
loss.backward()
optimizer.step()
iteration=99, loss=5.625065326690674 iteration=199, loss=2.759828567504883 iteration=299, loss=1.5584675073623657 iteration=399, loss=1.0545344352722168 iteration=499, loss=0.8430770635604858 iteration=599, loss=0.7543213963508606 iteration=699, loss=0.7170588374137878 iteration=799, loss=0.7014120221138 iteration=899, loss=0.694840669631958 iteration=999, loss=0.6920806765556335 iteration=1099, loss=0.6909211874008179 iteration=1199, loss=0.6904338598251343 iteration=1299, loss=0.690229594707489 iteration=1399, loss=0.6901434659957886 iteration=1499, loss=0.6901074051856995 iteration=1599, loss=0.6900922060012817 iteration=1699, loss=0.6900859475135803 iteration=1799, loss=0.6900832653045654 iteration=1899, loss=0.6900820136070251 iteration=1999, loss=0.6900815367698669
model.string()
-0.015961814671754837 0.8378353714942932 0.005734664388000965 -0.09147780388593674
plt.plot(x, y, "o", label="données")
plt.plot(x, y_pred.detach(), "o", label="prédictions")
plt.plot(x, y0, lw=2, label="ground truth")
plt.xlabel("x")
plt.legend()
plt.show()
--------------------------------------------------------------------------- NameError Traceback (most recent call last) /tmp/ipykernel_52349/424906192.py in <module> 1 plt.plot(x, y, "o", label="données") 2 plt.plot(x, y_pred.detach(), "o", label="prédictions") ----> 3 plt.plot(x, y0, lw=2, label="ground truth") 4 plt.xlabel("x") 5 plt.legend() NameError: name 'y0' is not defined
plt.plot(loss_plot[10:], label="loss values")
plt.xlabel("nbre epoch")
plt.legend()
plt.show()
Exercice: Régression linéaire¶
On va utiliser Pythorch pour effectuer une régression linéaire simple sur un jeu de donnée de polution de l'air (airquality.csv
qui se trouve sur la page Ametice du cours).
import pandas as pd
data = pd.read_csv('airquality.csv')
data.head()
rownames | Ozone | Solar.R | Wind | Temp | Month | Day | |
---|---|---|---|---|---|---|---|
0 | 1 | 41.0 | 190.0 | 7.4 | 67 | 5 | 1 |
1 | 2 | 36.0 | 118.0 | 8.0 | 72 | 5 | 2 |
2 | 3 | 12.0 | 149.0 | 12.6 | 74 | 5 | 3 |
3 | 4 | 18.0 | 313.0 | 11.5 | 62 | 5 | 4 |
4 | 5 | NaN | NaN | 14.3 | 56 | 5 | 5 |
data.describe()
rownames | Ozone | Solar.R | Wind | Temp | Month | Day | |
---|---|---|---|---|---|---|---|
count | 153.000000 | 116.000000 | 146.000000 | 153.000000 | 153.000000 | 153.000000 | 153.000000 |
mean | 77.000000 | 42.129310 | 185.931507 | 9.957516 | 77.882353 | 6.993464 | 15.803922 |
std | 44.311398 | 32.987885 | 90.058422 | 3.523001 | 9.465270 | 1.416522 | 8.864520 |
min | 1.000000 | 1.000000 | 7.000000 | 1.700000 | 56.000000 | 5.000000 | 1.000000 |
25% | 39.000000 | 18.000000 | 115.750000 | 7.400000 | 72.000000 | 6.000000 | 8.000000 |
50% | 77.000000 | 31.500000 | 205.000000 | 9.700000 | 79.000000 | 7.000000 | 16.000000 |
75% | 115.000000 | 63.250000 | 258.750000 | 11.500000 | 85.000000 | 8.000000 | 23.000000 |
max | 153.000000 | 168.000000 | 334.000000 | 20.700000 | 97.000000 | 9.000000 | 31.000000 |
On s'apperçoit qu'il manque des données, count
est différent selon les collones. Dans data.head()
, on peut voir ligne 4 deux valeurs NaN
(Not A Number) qui décrivent ces valeurs manquantes.
On va retirer toutes les lignes présentant des valeurs manquantes.
data = data.dropna()
data.plot.scatter(x='Wind', y='Ozone')
plt.show()
On va essayer d'expliquer la concentration d'ozone ($y_j$) en fonction de la vitesse du vent ($x_j$) via le modèle
$$ f(x) = \alpha x + \beta.$$
Pour obtenir les parametres $\alpha$ et $\beta$ on va minimser le risque quadratique
$$ R_n(\alpha,\beta) = \sum_{j=1}^n (y_j - \alpha x_j-\beta)^2. $$
Pour effectuer la regression, on va normaliser/standardiser les données.
X = data.Wind
x_mean = X.mean()
x_std = X.std()
x = (X.to_numpy()-x_mean)/x_std
x = torch.tensor(x, dtype=torch.float32)
x
tensor([-0.7138, -0.5452, 0.7478, 0.4386, -0.3765, 1.0851, 2.8559, -0.0674, -0.2079, 0.2699, 0.9164, 0.4386, 0.5791, 2.3780, 0.4386, -0.0674, -0.0674, 1.8721, -0.0674, 0.5791, 0.5791, 1.3943, -1.1917, -0.7138, -0.0674, 1.0851, 0.4386, -0.5452, 1.3943, 3.0245, -0.2079, 0.4386, 0.1013, -1.6414, -0.2079, -0.2079, -1.5009, 0.2699, -1.3603, -1.0230, -1.1917, -0.7138, 1.2256, 1.3943, 1.2256, -0.8544, 0.1013, -1.0230, -1.3603, 0.4386, -0.8544, -0.3765, -0.5452, -0.3765, 0.5791, -0.7138, -0.7138, -0.7138, -0.2079, -0.8544, 1.0851, -0.7138, -1.6695, 0.1013, -0.5452, 0.4386, 0.4386, -0.0674, 0.1013, -1.0230, -0.7138, 0.2699, 0.1013, 1.5629, 1.2256, -0.0674, -1.8382, -0.5452, -0.0674, -2.1473, -1.0230, -1.0230, -0.8544, -1.3603, -2.0068, -1.5009, -0.7138, 1.5629, 0.2699, 0.1013, 0.2699, -0.0674, 1.3943, 1.5629, -1.0230, 0.2699, 0.4386, -0.8544, 1.0851, 0.1013, 0.1013, -0.5452, 0.7478, -0.2079, 0.1013, 0.1013, 1.8721, -0.8544, 1.2256, -0.5452, 0.4386])
Attention, on ne peut pas directement transformer une dataframe Pandas en Torch tensor, il faut d'abord passer par un Numpy array. On va considérer des float sur 32 bits en non 64 bits.
Y = data.Ozone
y_mean = Y.mean()
y_std = Y.std()
y = (Y.to_numpy() - y_mean) / y_std
y = torch.tensor(y, dtype=torch.float32)
plt.scatter(x,y)
plt.show()
model = torch.nn.Sequential( # on appelle des fonction de manière séquentielle
torch.nn.Linear(1,1), # renvoie a + b * x
torch.nn.Flatten(0,1) # transforme la sortie de la couche linear en un tenseur 1D
)
xx = x.reshape((-1,1))
On met x
à la bonne taille pour lui appliquer model
learning_rate = 1e-4
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
loss_fct = torch.nn.MSELoss(reduction='sum')
n_iter = 2000
loss_plot = np.zeros(n_iter)
for t in range(n_iter):
y_pred = model(xx)
loss = loss_fct(y_pred, y)
loss_plot[t] = loss
if t%100 == 99:
print("iteration=%s, loss=%s" % (t,loss.item()))
optimizer.zero_grad()
loss.backward()
optimizer.step()
iteration=99, loss=70.9743881225586 iteration=199, loss=68.75945281982422 iteration=299, loss=68.73358154296875 iteration=399, loss=68.73328399658203 iteration=499, loss=68.7332763671875 iteration=599, loss=68.7332763671875 iteration=699, loss=68.7332763671875 iteration=799, loss=68.7332763671875 iteration=899, loss=68.7332763671875 iteration=999, loss=68.7332763671875 iteration=1099, loss=68.7332763671875 iteration=1199, loss=68.7332763671875 iteration=1299, loss=68.7332763671875 iteration=1399, loss=68.7332763671875 iteration=1499, loss=68.7332763671875 iteration=1599, loss=68.7332763671875 iteration=1699, loss=68.7332763671875 iteration=1799, loss=68.7332763671875 iteration=1899, loss=68.7332763671875 iteration=1999, loss=68.7332763671875
plt.plot(x, y, "o", label="données")
plt.plot(x, y_pred.detach(), label="model de regression")
plt.legend()
plt.show()
plt.scatter(X,Y)
plt.plot(X.to_numpy(), y_std * y_pred.detach() + y_mean, color="orange", label="model de regression")
plt.xlabel("Wind")
plt.ylabel("Ozone")
plt.legend()
plt.show()
plt.plot(loss_plot[10:], label="loss values")
plt.xlabel("nbre epoch")
plt.legend()
plt.show()