## ----setup, include = FALSE--------------------------------------------------- library(keras) knitr::opts_chunk$set(comment = NA, eval = FALSE) ## ----------------------------------------------------------------------------- # library(keras) # # # input layer # inputs <- layer_input(shape = c(784)) # # # outputs compose input + dense layers # predictions <- inputs %>% # layer_dense(units = 64, activation = 'relu') %>% # layer_dense(units = 64, activation = 'relu') %>% # layer_dense(units = 10, activation = 'softmax') # # # create and compile model # model <- keras_model(inputs = inputs, outputs = predictions) # model %>% compile( # optimizer = 'rmsprop', # loss = 'categorical_crossentropy', # metrics = c('accuracy') # ) ## ----------------------------------------------------------------------------- # x <- layer_input(shape = c(784)) # # This works, and returns the 10-way softmax we defined above. # y <- x %>% model ## ----------------------------------------------------------------------------- # # Input tensor for sequences of 20 timesteps, # # each containing a 784-dimensional vector # input_sequences <- layer_input(shape = c(20, 784)) # # # This applies our previous model to the input sequence # processed_sequences <- input_sequences %>% # time_distributed(model) ## ----------------------------------------------------------------------------- # library(keras) # # main_input <- layer_input(shape = c(100), dtype = 'int32', name = 'main_input') # # lstm_out <- main_input %>% # layer_embedding(input_dim = 10000, output_dim = 512, input_length = 100) %>% # layer_lstm(units = 32) ## ----------------------------------------------------------------------------- # auxiliary_output <- lstm_out %>% # layer_dense(units = 1, activation = 'sigmoid', name = 'aux_output') ## ----------------------------------------------------------------------------- # auxiliary_input <- layer_input(shape = c(5), name = 'aux_input') # # main_output <- layer_concatenate(c(lstm_out, auxiliary_input)) %>% # layer_dense(units = 64, activation = 'relu') %>% # layer_dense(units = 64, activation = 'relu') %>% # layer_dense(units = 64, activation = 'relu') %>% # layer_dense(units = 1, activation = 'sigmoid', name = 'main_output') ## ----------------------------------------------------------------------------- # model <- keras_model( # inputs = c(main_input, auxiliary_input), # outputs = c(main_output, auxiliary_output) # ) ## ----------------------------------------------------------------------------- # summary(model) ## ----------------------------------------------------------------------------- # model %>% compile( # optimizer = 'rmsprop', # loss = 'binary_crossentropy', # loss_weights = c(1.0, 0.2) # ) ## ----eval = FALSE------------------------------------------------------------- # model %>% fit( # x = list(headline_data, additional_data), # y = list(labels, labels), # epochs = 50, # batch_size = 32 # ) ## ----eval = FALSE------------------------------------------------------------- # model %>% compile( # optimizer = 'rmsprop', # loss = list(main_output = 'binary_crossentropy', aux_output = 'binary_crossentropy'), # loss_weights = list(main_output = 1.0, aux_output = 0.2) # ) # # # And trained it via: # model %>% fit( # x = list(main_input = headline_data, aux_input = additional_data), # y = list(main_output = labels, aux_output = labels), # epochs = 50, # batch_size = 32 # ) ## ----------------------------------------------------------------------------- # library(keras) # # tweet_a <- layer_input(shape = c(280, 256)) # tweet_b <- layer_input(shape = c(280, 256)) ## ----eval=FALSE--------------------------------------------------------------- # # This layer can take as input a matrix and will return a vector of size 64 # shared_lstm <- layer_lstm(units = 64) # # # When we reuse the same layer instance multiple times, the weights of the layer are also # # being reused (it is effectively *the same* layer) # encoded_a <- tweet_a %>% shared_lstm # encoded_b <- tweet_b %>% shared_lstm # # # We can then concatenate the two vectors and add a logistic regression on top # predictions <- layer_concatenate(c(encoded_a, encoded_b), axis=-1) %>% # layer_dense(units = 1, activation = 'sigmoid') # # # We define a trainable model linking the tweet inputs to the predictions # model <- keras_model(inputs = c(tweet_a, tweet_b), outputs = predictions) # # model %>% compile( # optimizer = 'rmsprop', # loss = 'binary_crossentropy', # metrics = c('accuracy') # ) # # model %>% fit(list(data_a, data_b), labels, epochs = 10) # ## ----------------------------------------------------------------------------- # a <- layer_input(shape = c(280, 256)) # # lstm <- layer_lstm(units = 32) # # encoded_a <- a %>% lstm # # lstm$output ## ----------------------------------------------------------------------------- # a <- layer_input(shape = c(280, 256)) # b <- layer_input(shape = c(280, 256)) # # lstm <- layer_lstm(units = 32) # # encoded_a <- a %>% lstm # encoded_b <- b %>% lstm # # lstm$output ## ----------------------------------------------------------------------------- # get_output_at(lstm, 1) # get_output_at(lstm, 2) ## ----------------------------------------------------------------------------- # a <- layer_input(shape = c(32, 32, 3)) # b <- layer_input(shape = c(64, 64, 3)) # # conv <- layer_conv_2d(filters = 16, kernel_size = c(3,3), padding = 'same') # # conved_a <- a %>% conv # # # only one input so far, the following will work # conv$input_shape # # conved_b <- b %>% conv # # now the `$input_shape` property wouldn't work, but this does: # get_input_shape_at(conv, 1) # get_input_shape_at(conv, 2) ## ----------------------------------------------------------------------------- # library(keras) # # input_img <- layer_input(shape = c(256, 256, 3)) # # tower_1 <- input_img %>% # layer_conv_2d(filters = 64, kernel_size = c(1, 1), padding='same', activation='relu') %>% # layer_conv_2d(filters = 64, kernel_size = c(3, 3), padding='same', activation='relu') # # tower_2 <- input_img %>% # layer_conv_2d(filters = 64, kernel_size = c(1, 1), padding='same', activation='relu') %>% # layer_conv_2d(filters = 64, kernel_size = c(5, 5), padding='same', activation='relu') # # tower_3 <- input_img %>% # layer_max_pooling_2d(pool_size = c(3, 3), strides = c(1, 1), padding = 'same') %>% # layer_conv_2d(filters = 64, kernel_size = c(1, 1), padding='same', activation='relu') # # output <- layer_concatenate(c(tower_1, tower_2, tower_3), axis = 1) # ## ----------------------------------------------------------------------------- # # input tensor for a 3-channel 256x256 image # x <- layer_input(shape = c(256, 256, 3)) # # 3x3 conv with 3 output channels (same as input channels) # y <- x %>% layer_conv_2d(filters = 3, kernel_size =c(3, 3), padding = 'same') # # this returns x + y. # z <- layer_add(c(x, y)) ## ----------------------------------------------------------------------------- # # First, define the vision model # digit_input <- layer_input(shape = c(27, 27, 1)) # out <- digit_input %>% # layer_conv_2d(filters = 64, kernel_size = c(3, 3)) %>% # layer_conv_2d(filters = 64, kernel_size = c(3, 3)) %>% # layer_max_pooling_2d(pool_size = c(2, 2)) %>% # layer_flatten() # # vision_model <- keras_model(digit_input, out) # # # Then define the tell-digits-apart model # digit_a <- layer_input(shape = c(27, 27, 1)) # digit_b <- layer_input(shape = c(27, 27, 1)) # # # The vision model will be shared, weights and all # out_a <- digit_a %>% vision_model # out_b <- digit_b %>% vision_model # # out <- layer_concatenate(c(out_a, out_b)) %>% # layer_dense(units = 1, activation = 'sigmoid') # # classification_model <- keras_model(inputs = c(digit_a, digit_b), out) ## ----------------------------------------------------------------------------- # # First, let's define a vision model using a Sequential model. # # This model will encode an image into a vector. # vision_model <- keras_model_sequential() # vision_model %>% # layer_conv_2d(filters = 64, kernel_size = c(3, 3), activation = 'relu', padding = 'same', # input_shape = c(224, 224, 3)) %>% # layer_conv_2d(filters = 64, kernel_size = c(3, 3), activation = 'relu') %>% # layer_max_pooling_2d(pool_size = c(2, 2)) %>% # layer_conv_2d(filters = 128, kernel_size = c(3, 3), activation = 'relu', padding = 'same') %>% # layer_conv_2d(filters = 128, kernel_size = c(3, 3), activation = 'relu') %>% # layer_max_pooling_2d(pool_size = c(2, 2)) %>% # layer_conv_2d(filters = 256, kernel_size = c(3, 3), activation = 'relu', padding = 'same') %>% # layer_conv_2d(filters = 256, kernel_size = c(3, 3), activation = 'relu') %>% # layer_conv_2d(filters = 256, kernel_size = c(3, 3), activation = 'relu') %>% # layer_max_pooling_2d(pool_size = c(2, 2)) %>% # layer_flatten() # # # Now let's get a tensor with the output of our vision model: # image_input <- layer_input(shape = c(224, 224, 3)) # encoded_image <- image_input %>% vision_model # # # Next, let's define a language model to encode the question into a vector. # # Each question will be at most 100 word long, # # and we will index words as integers from 1 to 9999. # question_input <- layer_input(shape = c(100), dtype = 'int32') # encoded_question <- question_input %>% # layer_embedding(input_dim = 10000, output_dim = 256, input_length = 100) %>% # layer_lstm(units = 256) # # # Let's concatenate the question vector and the image vector then # # train a logistic regression over 1000 words on top # output <- layer_concatenate(c(encoded_question, encoded_image)) %>% # layer_dense(units = 1000, activation='softmax') # # # This is our final model: # vqa_model <- keras_model(inputs = c(image_input, question_input), outputs = output) ## ----------------------------------------------------------------------------- # video_input <- layer_input(shape = c(100, 224, 224, 3)) # # # This is our video encoded via the previously trained vision_model (weights are reused) # encoded_video <- video_input %>% # time_distributed(vision_model) %>% # layer_lstm(units = 256) # # # This is a model-level representation of the question encoder, reusing the same weights as before: # question_encoder <- keras_model(inputs = question_input, outputs = encoded_question) # # # Let's use it to encode the question: # video_question_input <- layer_input(shape = c(100), dtype = 'int32') # encoded_video_question <- video_question_input %>% question_encoder # # # And this is our video question answering model: # output <- layer_concatenate(c(encoded_video, encoded_video_question)) %>% # layer_dense(units = 1000, activation = 'softmax') # # video_qa_model <- keras_model(inputs= c(video_input, video_question_input), outputs = output)