I have built a Bi-lstm model for NER Tagging and now I want to introduce CRF layer in it. The CRF Layer I modified as follow:
import tensorflow as tf
import tensorflow.keras.backend as K
import tensorflow.keras.layers as L
from tensorflow_addons.text import crf_log_likelihood, crf_decode
class CRF(L.Layer):
def __init__(self,
output_dim,
sparse_target=True,
use_mask=False,
label2idx_map=None,
**kwargs):
"""
Args:
output_dim (int): the number of labels to tag each temporal input.
sparse_target (bool): whether the ground-truth label is represented in one-hot.
use_mask (bool): whether to use masking as specified in MaskedCRF.
label2idx_map (dict): a mapping from labels to indices for masking.
Input shape:
(batch_size, sentence length, output_dim)
Output shape:
(batch_size, sentence length, output_dim)
"""
super(CRF, self).__init__(**kwargs)
self.output_dim = int(output_dim)
self.sparse_target = sparse_target
self.use_mask = use_mask
self.label2idx_map = label2idx_map
self.input_spec = L.InputSpec(min_ndim=3)
self.supports_masking = False
self.sequence_lengths = None
self.transitions = None
self.mask_tran_matrix = None
def build(self, input_shape):
assert len(input_shape) == 3
f_shape = tf.TensorShape(input_shape)
input_spec = L.InputSpec(min_ndim=3, axes={-1: f_shape[-1]})
if f_shape[-1] is None:
raise ValueError('The last dimension of the inputs to `CRF` '
'should be defined. Found `None`.')
if f_shape[-1] != self.output_dim:
raise ValueError('The last dimension of the input shape must be equal to output'
' shape. Use a linear layer if needed.')
self.input_spec = input_spec
# Initialize transitions
initializer = tf.random_uniform_initializer(minval=-0.1, maxval=0.1, seed=None)
self.transitions = tf.Variable(
name="crf_transitions",
initial_value=initializer(shape=[self.output_dim, self.output_dim])
)
if self.use_mask:
self.mask_tran_matrix = self.get_mask_trans()
self.transitions = tf.minimum(self.transitions, self.mask_tran_matrix)
self.built = True
def compute_mask(self, inputs, mask=None):
# Just pass the received mask from previous layer, to the next layer or
# manipulate it if this layer changes the shape of the input
return mask
def call(self, inputs, sequence_lengths=None, training=None, **kwargs):
sequences = tf.convert_to_tensor(inputs, dtype=self.dtype)
if sequence_lengths is not None:
assert len(sequence_lengths.shape) == 2
assert tf.convert_to_tensor(sequence_lengths).dtype == 'int32'
seq_len_shape = tf.convert_to_tensor(sequence_lengths).get_shape().as_list()
assert seq_len_shape[1] == 1
self.sequence_lengths = K.flatten(sequence_lengths)
else:
self.sequence_lengths = tf.ones(tf.shape(inputs)[0], dtype=tf.int32) * (
tf.shape(inputs)[1]
)
viterbi_sequence, _ = crf_decode(sequences,
self.transitions,
self.sequence_lengths)
output = K.one_hot(viterbi_sequence, self.output_dim)
return K.in_train_phase(sequences, output)
@property
def loss(self):
def crf_loss(y_true, y_pred):
if self.sparse_target:
y_true = tf.argmax(y_true, axis=-1)
y_pred = tf.convert_to_tensor(y_pred, dtype=self.dtype)
log_likelihood, self.transitions = crf_log_likelihood(
y_pred,
tf.cast(K.argmax(y_true), dtype=tf.int32) if self.sparse_target else y_true,
self.sequence_lengths,
transition_params=self.transitions,
)
return tf.reduce_mean(-log_likelihood)
return crf_loss
@property
def accuracy(self):
def viterbi_accuracy(y_true, y_pred):
# -1e10 to avoid zero at sum(mask)
mask = K.cast(
K.all(K.greater(y_pred, -1e10), axis=2), K.floatx())
shape = tf.shape(y_pred)
sequence_lengths = tf.ones(shape[0], dtype=tf.int32) * (shape[1])
y_pred, _ = crf_decode(y_pred, self.transitions, sequence_lengths)
if self.sparse_target:
y_true = K.argmax(y_true, 2)
y_pred = K.cast(y_pred, 'int32')
y_true = K.cast(y_true, 'int32')
corrects = K.cast(K.equal(y_true, y_pred), K.floatx())
return K.sum(corrects * mask) / K.sum(mask)
return viterbi_accuracy
def compute_output_shape(self, input_shape):
tf.TensorShape(input_shape).assert_has_rank(3)
return input_shape[:2] + (self.output_dim,)
def get_config(self):
config = {
'output_dim': self.output_dim,
'sparse_target': self.sparse_target,
'supports_masking': self.supports_masking,
'transitions': K.eval(self.transitions)
}
base_config = super(CRF, self).get_config()
return dict(base_config, **config)
def get_mask_trans(self):
size = len(self.label2idx_map)
tag_lst = self.label2idx_map.keys()
mask_mat = np.ones(shape=(size, size), dtype=np.float32)
# analysis tag schema,BIO or BIOES
is_scheme_bioes = False
flag_e = False
flag_s = False
for tag in tag_lst:
if tag.startswith("E-"):
flag_e = True
if tag.startswith("S-"):
flag_s = True
if flag_e and flag_s:
is_scheme_bioes = True
print("BIOES format tagging scheme detected.")
else:
print("BIO format tagging scheme detected.")
# for col_tag, col_index in self.label2idx_map.items():
# if col_tag.startswith("I-"):
# slot_name = col_tag.replace("I-", "")
# begin_slot = "B-" + slot_name
# for row_tag, row_index in self.label2idx_map.items():
# row_index -= 1 # Adjust the index to start from 0
# # Print values for debugging
# print("row_index:", row_index, "col_index:", col_index)
# # I-city must follow B-city or I-city
# if row_tag != begin_slot and row_tag != col_tag:
# mask_mat[row_index, col_index] = -1.0
for col_tag, col_index in self.label2idx_map.items():
col_index -= 1 # Adjust the index to start from 0
if col_tag.startswith("I-"):
slot_name = col_tag.replace("I-", "")
begin_slot = "B-" + slot_name
for row_tag, row_index in self.label2idx_map.items():
row_index -= 1 # Adjust the index to start from 0
# Print values for debugging
print("row_index:", row_index, "col_index:", col_index)
# I-city must follow B-city or I-city
if row_tag != begin_slot and row_tag != col_tag:
mask_mat[row_index, col_index] = -1.0
if is_scheme_bioes:
if col_tag.startswith("E-"):
slot_name = col_tag.replace("E-", "")
intermediate_slot = "I-" + slot_name
begin_slot = "B-" + slot_name
for row_tag, row_index in self.label2idx_map.items():
# E-city must follow I-city or B-city
if row_tag != intermediate_slot and row_tag != begin_slot:
mask_mat[row_index, col_index] = -1.0
if col_tag.startswith("S-") or col_tag.startswith("B-"):
for row_tag, row_index in self.label2idx_map.items():
# S-city must not follow B-slot or I-slot
if row_tag.startswith("B-") or row_tag.startswith("I-"):
mask_mat[row_index, col_index] = -1.0
return 100 * mask_mat
I have built the model BiLSTM with CRF layer above on this model as follow:
import tensorflow as tf
import numpy as np
from tensorflow.keras.layers import Input, Embedding, LSTM, Dense, Dropout, TimeDistributed, Bidirectional
import tensorflow_addons as tfa
from tensorflow.keras.models import Model
# Input layer
input_layer = Input(shape=(MAX_LEN,), dtype='int32')
# Embedding layer
embedding_layer = Embedding(input_dim=number_words + 1,
input_length=MAX_LEN,
output_dim=DIM_EMBEDDINGS, trainable=True)(input_layer)
# BiLSTM layer
bilstm_layer = Bidirectional(LSTM(units=DIM_EMBEDDINGS,
return_sequences=True,
dropout=0.5,
recurrent_dropout=0.5))(embedding_layer)
# TimeDistributed layer
kernel = TimeDistributed(Dense(number_tags, activation="relu"))(bilstm_layer)
crf = CRF(output_dim=number_tags, sparse_target=True, use_mask=True, label2idx_map=tag2idx)
output = crf(kernel)
model = Model(input_layer, output)
model.compile('adam', loss=crf.loss, metrics=[crf.accuracy])
Summary of this model is as below:

When I want to train this model, I face this error:
ValueError Traceback (most recent call last)
<ipython-input-16-fe62e47f1cc5> in <cell line: 1>()
----> 1 history = model.fit(X_train, np.array(y_train), epochs=40, batch_size=32, validation_split=0.2, verbose=1)
5 frames
/usr/local/lib/python3.10/dist-packages/tensorflow_addons/text/crf.py in _single_seq_fn()
31 batch_inds = ag__.converted_call(ag__.ld(tf).reshape, (ag__.converted_call(ag__.ld(tf).range, (ag__.ld(batch_size),), None, fscope_1), [-1, 1]), None, fscope_1)
32 indices = ag__.converted_call(ag__.ld(tf).concat, ([ag__.ld(batch_inds), ag__.converted_call(ag__.ld(tf).zeros_like, (ag__.ld(batch_inds),), None, fscope_1)],), dict(axis=1), fscope_1)
---> 33 tag_inds = ag__.converted_call(ag__.ld(tf).gather_nd, (ag__.ld(tag_indices), ag__.ld(indices)), None, fscope_1)
34 tag_inds = ag__.converted_call(ag__.ld(tf).reshape, (ag__.ld(tag_inds), [-1, 1]), None, fscope_1)
35 indices = ag__.converted_call(ag__.ld(tf).concat, ([ag__.ld(indices), ag__.ld(tag_inds)],), dict(axis=1), fscope_1)
ValueError: in user code:
File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/training.py", line 1401, in train_function *
return step_function(self, iterator)
File "<ipython-input-12-10fd54fd5d04>", line 93, in crf_loss *
log_likelihood, self.transitions = crf_log_likelihood(
File "/usr/local/lib/python3.10/dist-packages/tensorflow_addons/text/crf.py", line 241, in crf_log_likelihood *
sequence_scores = crf_sequence_score(
File "/usr/local/lib/python3.10/dist-packages/tensorflow_addons/text/crf.py", line 82, in _single_seq_fn *
tag_inds = tf.gather_nd(tag_indices, indices)
ValueError: indices.shape[-1] must be <= params.rank, but saw indices shape: [?,2] and params shape: [?] for '{{node crf_loss/cond/GatherNd}} = GatherNd[Tindices=DT_INT32, Tparams=DT_INT32](crf_loss/cond/GatherNd/crf_loss/Cast, crf_loss/cond/concat)' with input shapes: [?], [?,2].
How to solve the problem?