!pip install tensorflow-gpu==2.0.0-rc0
import tensorflow as tf
import IPython.display as display
import matplotlib.pyplot as plt
import matplotlib as mpl
mpl.rcParams['figure.figsize'] = (12,12)
mpl.rcParams['axes.grid'] = False
import numpy as np
import time
import functools
!rm -rf nst_colab_assets
!git clone https://github.com/jinh0park/nst_colab_assets.git
Define some useful functions.
def load_img(path_to_img):
max_dim = 512
img = tf.io.read_file(path_to_img)
img = tf.image.decode_image(img, channels=3)
img = tf.image.convert_image_dtype(img, tf.float32)
shape = tf.cast(tf.shape(img)[:-1], tf.float32)
long_dim = max(shape)
scale = max_dim / long_dim
new_shape = tf.cast(shape * scale, tf.int32)
img = tf.image.resize(img, new_shape)
img = img[tf.newaxis, :]
return img
def imshow(image, title=None):
if len(image.shape) > 3:
image = tf.squeeze(image, axis=0)
plt.imshow(image)
if title:
plt.title(title)
content_path = 'nst_colab_assets/images/bg-index.png'
style_path = 'nst_colab_assets/images/starry_night.png'
content_image = load_img(content_path)
style_image = load_img(style_path)
plt.subplot(1, 2, 1)
imshow(content_image, 'Content Image')
plt.subplot(1, 2, 2)
imshow(style_image, 'Style Image')
print("Check the type of the variable 'content_image': {}".format(type(content_image)))
vgg = tf.keras.applications.VGG19(include_top=False, weights='imagenet')
print()
for layer in vgg.layers:
print(layer.name)
# Content layer where will pull our feature maps
content_layers = ['block5_conv2']
# Style layer of interest
style_layers = ['block1_conv1',
'block2_conv1',
'block3_conv1',
'block4_conv1',
'block5_conv1']
def vgg_layers(layer_names):
""" Creates a vgg model that returns a list of intermediate output values."""
# Load our model. Load pretrained VGG, trained on imagenet data
vgg = tf.keras.applications.VGG19(include_top=False, weights='imagenet')
vgg.trainable = False
outputs = [vgg.get_layer(name).output for name in layer_names]
model = tf.keras.Model([vgg.input], outputs)
return model
# style_outputs = vgg_layers(style_layers)(style_image * 255)
# print("Ckeck the type of outputs and inspect them.", type(style_outputs))
# for name, output in zip(style_layers, style_outputs):
# print(name)
# print(" shape: ", output.numpy().shape)
# print(" min: ", output.numpy().min())
# print(" max: ", output.numpy().max())
# print(" mean: ", output.numpy().mean())
# print()
def gram_matrix(input_tensor):
result = tf.linalg.einsum('bijc,bijd->bcd', input_tensor, input_tensor)
input_shape = tf.shape(input_tensor)
num_locations = tf.cast(input_shape[1]*input_shape[2], tf.float32)
return result/(num_locations)
class StyleContentModel(tf.keras.models.Model):
def __init__(self, style_layers, content_layers):
super(StyleContentModel, self).__init__()
self.vgg = vgg_layers(style_layers + content_layers)
self.style_layers = style_layers
self.content_layers = content_layers
self.num_style_layers = len(style_layers)
self.vgg.trainable = False
def call(self, inputs, gram=True):
"Expects float input in [0,1]"
inputs = inputs*255.0
preprocessed_input = tf.keras.applications.vgg19.preprocess_input(inputs)
outputs = self.vgg(preprocessed_input)
style_outputs, content_outputs = (outputs[:self.num_style_layers],
outputs[self.num_style_layers:])
style_outputs = [gram_matrix(style_output)
for style_output in style_outputs]
content_dict = {content_name:value
for content_name, value
in zip(self.content_layers, content_outputs)}
style_dict = {style_name:value
for style_name, value
in zip(self.style_layers, style_outputs)}
return {'content':content_dict, 'style':style_dict}
extractor = StyleContentModel(style_layers, content_layers)
results = extractor(tf.constant(content_image), gram=True)
style_results = results['style']
print('Styles:')
for name, output in sorted(results['style'].items()):
print(" ", name)
print(" shape: ", output.numpy().shape)
print(" min: ", output.numpy().min())
print(" max: ", output.numpy().max())
print(" mean: ", output.numpy().mean())
print()
print("Contents:")
for name, output in sorted(results['content'].items()):
print(" ", name)
print(" shape: ", output.numpy().shape)
print(" min: ", output.numpy().min())
print(" max: ", output.numpy().max())
print(" mean: ", output.numpy().mean())
def style_content_loss(outputs, style_targets, content_targets, style_weight, content_weight):
style_outputs = outputs['style']
content_outputs = outputs['content']
style_loss = tf.add_n([tf.reduce_mean((style_outputs[name]-style_targets[name])**2)
for name in style_outputs.keys()])
style_loss *= style_weight / len(style_outputs)
content_loss = tf.add_n([tf.reduce_mean((content_outputs[name]-content_targets[name])**2)
for name in content_outputs.keys()])
content_loss *= content_weight / len(content_outputs)
loss = style_loss + content_loss
return loss
def total_variation_loss(image):
x_deltas = image[:,:,1:,:] - image[:,:,:-1,:]
y_deltas = image[:,1:,:,:] - image[:,:-1,:,:]
return tf.reduce_mean(x_deltas**2) + tf.reduce_mean(y_deltas**2)
def clip_0_1(image):
return tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=1.0)
def run_style_transfer(style_image, content_image, params):
content_layers = params['content_layers']
style_layers = params['style_layers']
lr = params['lr']
style_weight=params['style_weight']
content_weight=params['content_weight']
total_variation_weight=params['total_variation_weight']
noise = params['noise']
if noise:
image = tf.Variable(tf.random.uniform(content_image.shape, 0, 1))
else:
image = tf.Variable(content_image)
extractor = StyleContentModel(style_layers, content_layers)
style_targets = extractor(style_image)['style']
content_targets = extractor(content_image)['content']
opt = tf.optimizers.Adam(learning_rate=lr, beta_1=0.99, epsilon=1e-1)
@tf.function()
def train_step(image):
with tf.GradientTape() as tape:
outputs = extractor(image)
loss = style_content_loss(outputs, style_targets, content_targets, style_weight, content_weight)
loss += total_variation_weight * total_variation_loss(image)
grad = tape.gradient(loss, image)
opt.apply_gradients([(grad, image)])
image.assign(clip_0_1(image))
epochs = 10
steps_per_epoch = 100
step = 0
for n in range(epochs):
for m in range(steps_per_epoch):
step += 1
train_step(image)
print(".", end='')
display.clear_output(wait=True)
imshow(image.read_value())
plt.title("Train step: {}".format(step))
plt.show()
return image
%%time
params = {
'content_layers': ['block5_conv2'],
'style_layers': ['block1_conv1',
'block2_conv1',
'block3_conv1',
'block4_conv1',
'block5_conv1'],
'style_weight': 1e-2,
'content_weight': 1e4,
'total_variation_weight': 1e8,
'lr': 0.02,
'noise': False
}
run_style_transfer(style_image, content_image, params)
%%time
params = {
'content_layers': ['block5_conv2'],
'style_layers': ['block1_conv1',
'block2_conv1',
'block3_conv1',
'block4_conv1',
'block5_conv1'],
'style_weight': 1e-3,
'content_weight': 1e4,
'total_variation_weight': 1e8,
'lr': 0.02,
'noise': False
}
run_style_transfer(style_image, content_image, params)
%%time
params = {
'content_layers': ['block5_conv2'],
'style_layers': ['block1_conv1',
'block2_conv1',
'block3_conv1',
'block4_conv1',
'block5_conv1'],
'style_weight': 1e-1,
'content_weight': 1e4,
'total_variation_weight': 1e8,
'lr': 0.02,
'noise': False
}
run_style_transfer(style_image, content_image, params)