I wanted to implement the method discussed in Gatys' paper a neural algorithm of artistic style. The method requires a custom loss function and although I didn't search for it in the documentation of TensorFlow I wanted to implement it myself anyways. The problem is that GradientTape doesn't compute the gradient of the loss function. It seems that GradientTape doesn't watch variables which need to be watched. I just couldn't quiet put my finger on it. Here is my code:
import numpy as np
import tensorflow as tf
from matplotlib import pyplot as plt
def gram(x):
shape = x.shape
x = np.reshape(x, (shape[1] * shape[2], shape[3]))
return tf.matmul(x, x, True)
def load_image(path):
image = tf.io.read_file(path)
image = tf.image.decode_image(image, 3, tf.float32)
assert image.shape[0] == image.shape[1]
image = tf.image.resize(image, (224, 224))
return image[tf.newaxis, :]
def content_loss(F, P):
return tf.reduce_sum(((F - P) ** 2) / 2)
def style_loss(F, A):
shape = F.shape
G = gram(F)
H = gram(A)
return tf.reduce_sum(((G - H) ** 2) / (4 * shape[1] * shape[2] * shape[3] * 4))
def step(layer, activation):
return layer(activation)
def feed(model, image, content_layers, style_layers, activations={}):
activation = image
for layer in model.keys():
activation = step(model[layer], activation)
if layer in content_layers or layer in style_layers:
activations[layer] = activation
return activations
def total_loss(layers, activations, content, style, content_layers, style_layers):
loss = tf.Variable(0, trainable=True, dtype=tf.float32)
for layer in layers:
if layer in content_layers:
loss.assign_add(content_loss(activations[layer], content[layer]))
if layer in style_layers:
loss.assign_add(style_loss(activations[layer], style[layer]) * 1e03)
return loss
content_layers = {'block5_pool'}
style_layers = {'block5_conv1', 'block5_conv2', 'block5_conv3', 'block5_conv4'}
VGG19 = tf.keras.applications.vgg19.VGG19(
include_top=False, weights='imagenet', input_shape=(224, 224, 3))
vgg = {}
content = {}
style = {}
con_activation = load_image('./Data/pokion.png')
stl_activation = load_image('./Data/quadpon.jpg')
for layer in VGG19.layers:
layer.trainable = False
con_activation = step(layer, con_activation)
stl_activation = step(layer, stl_activation)
if layer.name in content_layers:
content[layer.name] = con_activation
if layer.name in style_layers:
style[layer.name] = stl_activation
vgg[layer.name] = layer
assert (len(content) != 0 and len(style) != 0)
del VGG19, con_activation, stl_activation
generated_image = tf.Variable(np.random.rand(1, 224, 224, 3), trainable=True, dtype=tf.float32)
optimizer = tf.optimizers.Adam()
for i in range(200):
with tf.GradientTape(True) as tape:
gen = feed(vgg, generated_image, content_layers, style_layers)
print(generated_image.shape)
loss = total_loss(vgg.keys(), gen, content, style, content_layers, style_layers)
gradients = tape.gradient(loss, generated_image)
optimizer.apply_gradients(zip([gradients], [generated_image]))
fig, ax = plt.subplots()
ax.imshow(generated_image[0])
plt.show()
the output shows something like this:
WARNING: Logging before flag parsing goes to stderr.
W0316 17:30:45.648816 6320 backprop.py:980] Calling GradientTape.gradient on a persistent tape inside its context is significantly less efficient than calling it outside the context (it causes the gradient ops to be recorded on the tape, leading to increased CPU and memory usage). Only call GradientTape.gradient inside the context if you actually want to trace the gradient in order to compute higher order derivatives.
Traceback (most recent call last):
File "C:/Users/amran/Desktop/Projects/Roastilng Jabrils/tests/NST.py", line 85, in <module>
gradients = tape.gradient(loss, generated_image)
File "C:\ProgramData\Anaconda3\lib\site-packages\tensorflow_core\python\eager\backprop.py", line 990, in gradient
with self:
File "C:\ProgramData\Anaconda3\lib\site-packages\tensorflow_core\python\eager\backprop.py", line 803, in __enter__
self._push_tape()
File "C:\ProgramData\Anaconda3\lib\site-packages\tensorflow_core\python\eager\backprop.py", line 813, in _push_tape
raise ValueError("Tape is already recording.")
ValueError: Tape is already recording.
is there anything I missed?
The GradientTape instance is supposed to be used after the "with" block ends, which is a bit counter-intuitive for how context managers are used in Python. Basically you finish running all the operations that need watching within the "with" block, and then use the tape to get the relevant gradients. Try the following for the inside of your block: