I'm using NBIOT dataset where i have selected only Provision_PT_737E_Security_Camera dataset of benign, mirai and gagfyt attacks.
The error is:
AttributeError Traceback (most recent call last)
<ipython-input-78-ca197d89c56d> in <cell line: 1>()
----> 1 train_metrics = evaluation(metrics.state, federated_train_data)
AttributeError: 'collections.OrderedDict' object has no attribute 'state'
Am I doing it the right way? or how to improve what i'm doing?
!pip install tensorflow-federated
import numpy as np
import pandas as pd
from google.colab import drive
drive.mount('/content/drive')
import os
benign_df = pd.read_csv('/content/drive/MyDrive/Collab Input dataset/5.benign.csv')
g_c_df = pd.read_csv('/content/drive/MyDrive/Collab Input dataset/5.gafgyt.combo.csv')
g_j_df = pd.read_csv('/content/drive/MyDrive/Collab Input dataset/5.gafgyt.junk.csv')
g_s_df = pd.read_csv('/content/drive/MyDrive/Collab Input dataset/5.gafgyt.scan.csv')
g_t_df = pd.read_csv('/content/drive/MyDrive/Collab Input dataset/5.gafgyt.tcp.csv')
g_u_df = pd.read_csv('/content/drive/MyDrive/Collab Input dataset/5.gafgyt.udp.csv')
m_a_df = pd.read_csv('/content/drive/MyDrive/Collab Input dataset/5.mirai.ack.csv')
m_sc_df = pd.read_csv('/content/drive/MyDrive/Collab Input dataset/5.mirai.scan.csv')
m_sy_df = pd.read_csv('/content/drive/MyDrive/Collab Input dataset/5.mirai.syn.csv')
m_u_df = pd.read_csv('/content/drive/MyDrive/Collab Input dataset/5.mirai.udp.csv')
m_u_p_df = pd.read_csv('/content/drive/MyDrive/Collab Input dataset/5.mirai.udpplain.csv')
benign_df['type'] = 'benign'
m_u_df['type'] = 'mirai_udp'
g_c_df['type'] = 'gafgyt_combo'
g_j_df['type'] = 'gafgyt_junk'
g_s_df['type'] = 'gafgyt_scan'
g_t_df['type'] = 'gafgyt_tcp'
g_u_df['type'] = 'gafgyt_udp'
m_a_df['type'] = 'mirai_ack'
m_sc_df['type'] = 'mirai_scan'
m_sy_df['type'] = 'mirai_syn'
m_u_p_df['type'] = 'mirai_udpplain'
df = pd.concat([benign_df, m_u_df, g_c_df,
g_j_df, g_s_df, g_t_df,
g_u_df, m_a_df, m_sc_df,
m_sy_df, m_u_p_df],
axis=0, sort=False, ignore_index=True)
df["type"].value_counts()
from matplotlib import pyplot as plt
plt.title("Class Distribution")
df.groupby("type").size().plot(kind='pie', autopct='%.2f', figsize=(20,10))
df.info()
df = df.sample(frac=1).reset_index(drop=True)
df.head()
import random
num_client = 4
df["client"] = ["client_{}".format(random.randint(1, num_client)) for _ in range(df.shape[0])]
from sklearn.model_selection import train_test_split
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df["type"])
features = list(train_df.columns)
features.remove("type")
features.remove("client")
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
train_df["type"] = label_encoder.fit_transform(train_df["type"])
test_df["type"] = label_encoder.transform(test_df["type"])
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
train_df[features] = scaler.fit_transform(train_df[features])
test_df[features] = scaler.transform(test_df[features])
train_df[features] = train_df[features].astype("float32")
test_df[features] = test_df[features].astype("float32")
train_df["type"] = train_df["type"].astype("int32")
test_df["type"] = test_df["type"].astype("int32")
import nest_asyncio
nest_asyncio.apply()
%load_ext tensorboard
import collections
import numpy as np
import tensorflow as tf
import tensorflow_federated as tff
np.random.seed(0)
tff.federated_computation(lambda: 'Hello, World!')()
client_id_colname = 'client'
client_ids = df[client_id_colname].unique()
train_client_ids = pd.DataFrame(client_ids).sample(frac=0.8).values.ravel().tolist()
test_client_ids = [x for x in client_ids if x not in train_client_ids]
train_client_ids
from collections import OrderedDict
from tensorflow.keras.utils import to_categorical
NUM_EPOCHS = 1
SHUFFLE_BUFFER = 100
PREFETCH_BUFFER = 5
def create_tf_dataset_for_client_fn(client_id):
client_data = dataframe[dataframe[client_id_colname] == client_id]
client_data_dict = OrderedDict()
client_data_dict["features"] = np.array(client_data[features].values, dtype="float32")
client_data_dict["label"] = np.array(client_data["type"].values, dtype="int32")
dataset = tf.data.Dataset.from_tensor_slices(client_data_dict)
dataset = dataset.shuffle(SHUFFLE_BUFFER).batch(1).repeat(NUM_EPOCHS)
return dataset
dataframe = train_df
train_data = tff.simulation.datasets.ClientData.from_clients_and_tf_fn(
client_ids=train_client_ids,
serializable_dataset_fn=create_tf_dataset_for_client_fn)
dataframe = test_df
test_data = tff.simulation.datasets.ClientData.from_clients_and_tf_fn(
client_ids=test_client_ids,
serializable_dataset_fn=create_tf_dataset_for_client_fn)
train_data.element_type_structure
test_data.element_type_structure
example_dataset = train_data.create_tf_dataset_for_client(train_data.client_ids[0])
example_element = next(iter(example_dataset))
example_element['label'].numpy()
from collections import defaultdict
f = plt.figure(figsize=(20, 10))
f.suptitle('Label Counts for a Sample of Clients')
for i, c_ids in enumerate(train_data.client_ids):
client_dataset = train_data.create_tf_dataset_for_client(c_ids)
plot_data = defaultdict(list)
for example in client_dataset:
label = example['label'].numpy()[0]
plot_data[label].append(label)
plt.subplot(2, 4, i+1)
plt.title('Client {}'.format(c_ids))
for j in range(10):
plt.hist(plot_data[j], density=False, bins=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
import collections
NUM_EPOCHS = 5
BATCH_SIZE = 128
SHUFFLE_BUFFER = 100
PREFETCH_BUFFER = 10
def preprocess(dataset):
def batch_format_fn(element):
return collections.OrderedDict(x=tf.reshape(element['features'], [-1, len(features)]),
y=tf.reshape(element['label'], [-1, 1]))
return dataset.repeat(NUM_EPOCHS).shuffle(SHUFFLE_BUFFER, seed=1).batch(
BATCH_SIZE).map(batch_format_fn).prefetch(PREFETCH_BUFFER)
preprocessed_example_dataset = preprocess(example_dataset)
sample_batch = tf.nest.map_structure(lambda x: x.numpy(),
next(iter(preprocessed_example_dataset)))
from tqdm import tqdm
def make_federated_data(client_data, client_ids):
return [preprocess(client_data.create_tf_dataset_for_client(x)) for x in tqdm(client_ids)]
NUM_CLIENTS = len(np.unique(train_df[client_id_colname]))
sample_clients = train_data.client_ids[0:NUM_CLIENTS]
federated_train_data = make_federated_data(train_data, sample_clients)
print('Number of client datasets: {l}'.format(l=len(federated_train_data)))
print('First dataset: {d}'.format(d=federated_train_data[0]))
def create_keras_model():
filters = 32
input_shape = (len(features))
num_classes = len(label_encoder.classes_)
clf = tf.keras.models.Sequential(
[
tf.keras.layers.Dense(64, input_dim=input_shape, activation='relu'),
tf.keras.layers.Dense(32, activation='relu'),
tf.keras.layers.Dropout(0.2),
tf.keras.layers.Dense(num_classes, activation='softmax')
])
return clf
keras_model = create_keras_model()
keras_model.summary()
import keras.backend as K
class F1Score(tf.keras.metrics.Metric):
def __init__(self, name='F1-Score', **kwargs):
super(F1Score, self).__init__(name=name, **kwargs)
self.f1_score = self.add_weight(name='f1_score', initializer='zeros')
def update_state(self, y_true, y_pred, sample_weight=None):
true_positives = tf.math.reduce_sum(tf.math.round(tf.clip_by_value(y_true * y_pred, 0, 1)))
possible_positives = tf.math.reduce_sum(tf.math.round(tf.clip_by_value(y_true, 0, 1)))
recall = true_positives / (possible_positives + K.epsilon())
predicted_positives = tf.math.reduce_sum(tf.math.round(tf.clip_by_value(y_pred, 0, 1)))
precision = true_positives / (predicted_positives + K.epsilon())
self.f1_score.assign(2*((precision*recall)/(precision+recall+K.epsilon())))
def result(self):
return self.f1_score
def reset_states(self):
self.f1_score.assign(0.0)
from keras.metrics import Recall, Precision
def model_fn():
keras_model = tf.keras.models.Sequential([ tf.keras.layers.Dense(64, input_dim=115, activation='relu'),
tf.keras.layers.Dense(len(features), activation='relu'),
tf.keras.layers.Dropout(0.2),
tf.keras.layers.Dense(len(label_encoder.classes_), activation='softmax')]) # Your model structure
return tff.learning.models.from_keras_model(
keras_model=keras_model,
input_spec=preprocessed_example_dataset.element_spec, # Define the expected input format
loss=tf.keras.losses.SparseCategoricalCrossentropy(),
metrics=[tf.keras.metrics.SparseCategoricalAccuracy()])
keras_model = create_keras_model()
keras_model.summary()
iterative_process = tff.learning.algorithms.build_weighted_fed_avg(
model_fn=model_fn,
client_optimizer_fn=lambda: tf.keras.optimizers.SGD(learning_rate=1),
server_optimizer_fn=lambda: tf.keras.optimizers.SGD(learning_rate=2))
str(iterative_process.initialize.type_signature)
state = iterative_process.initialize()
state, metrics = iterative_process.next(state, federated_train_data)
print('round 1, metrics={}'.format(metrics))
NUM_ROUNDS = 11
for round_num in range(2, NUM_ROUNDS):
state, metrics = iterative_process.next(state, federated_train_data)
print('round {:2d}, metrics={}'.format(round_num, metrics))
train_logdir = "training/"
os.makedirs(train_logdir, exist_ok=True)
test_logdir = "testing/"
os.makedirs(test_logdir, exist_ok=True)
summary_writer = tf.summary.create_file_writer(train_logdir)
state = iterative_process.initialize()
with summary_writer.as_default():
for round_num in range(1, NUM_ROUNDS):
state, metrics = iterative_process.next(state, federated_train_data)
client_work_metrics = metrics['client_work']
for name, value in client_work_metrics['train'].items():
tf.summary.scalar(name, value, step=round_num)
!ls {train_logdir}
%tensorboard --logdir {train_logdir} --port=0
ModelVariables = collections.namedtuple('ModelVariables', 'weights bias num_examples loss_sum accuracy_sum')
def create_model_variables():
return ModelVariables(
weights=tf.Variable(
lambda: tf.zeros(dtype=tf.float32, shape=(len(features), len(label_encoder.classes_))),
name='weights',
trainable=True),
bias=tf.Variable(
lambda: tf.zeros(dtype=tf.float32, shape=(len(label_encoder.classes_))),
name='bias',
trainable=True),
num_examples=tf.Variable(0.0, name='num_examples', trainable=False),
loss_sum=tf.Variable(0.0, name='loss_sum', trainable=False),
accuracy_sum=tf.Variable(0.0, name='accuracy_sum', trainable=False))
def predict_on_batch(variables, x):
return tf.nn.softmax(tf.matmul(x, variables.weights) + variables.bias)
def model_forward_pass(variables, batch):
y = predict_on_batch(variables, batch['x'])
predictions = tf.cast(tf.argmax(y, 1), tf.int32)
flat_labels = tf.reshape(batch['y'], [-1])
loss = -tf.reduce_mean(tf.reduce_sum(tf.one_hot(flat_labels, len(label_encoder.classes_)) * tf.math.log(y), axis=[1]))
accuracy = tf.reduce_mean(tf.cast(tf.equal(predictions, flat_labels), tf.float32))
num_examples = tf.cast(tf.size(batch['y']), tf.float32)
variables.num_examples.assign_add(num_examples)
variables.loss_sum.assign_add(loss * num_examples)
variables.accuracy_sum.assign_add(accuracy * num_examples)
return loss, predictions
def get_local_model_metrics(variables):
return collections.OrderedDict(
num_examples=variables.num_examples,
loss=variables.loss_sum / variables.num_examples,
accuracy=variables.accuracy_sum / variables.num_examples)
@tff.federated_computation
def aggregate_model_metrics_across_clients(metrics):
return collections.OrderedDict(
num_examples=tff.federated_sum(metrics.num_examples),
loss=tff.federated_mean(metrics.loss, metrics.num_examples),
accuracy=tff.federated_mean(metrics.accuracy, metrics.num_examples))
def reset_metrics(self):
self._variables.num_examples.assign(0)
self._variables.loss_sum.assign(0.0)
self._variables.accuracy_sum.assign(0.0)
from typing import Callable, List, OrderedDict
class IOTModel(tff.learning.models.VariableModel):
def reset_metrics(self):
self._variables.num_examples.assign(0)
self._variables.loss_sum.assign(0.0)
self._variables.accuracy_sum.assign(0.0)
def __init__(self):
self._variables = create_model_variables()
@property
def trainable_variables(self):
return [self._variables.weights, self._variables.bias]
@property
def non_trainable_variables(self):
return []
@property
def local_variables(self):
return [
self._variables.num_examples, self._variables.loss_sum,
self._variables.accuracy_sum
]
@property
def input_spec(self):
return OrderedDict(
x=tf.TensorSpec([None, len(features)], tf.float32),
y=tf.TensorSpec([None, 1], tf.int32))
@tf.function
def predict_on_batch(self, x, training=True):
del training
return predict_on_batch(self._variables, x)
@tf.function
def forward_pass(self, batch, training=True):
del training
loss, predictions = model_forward_pass(self._variables, batch)
num_exmaples = tf.shape(batch['x'])[0]
return tff.learning.models.BatchOutput(loss=loss, predictions=predictions, num_examples=num_exmaples)
@tf.function
def report_local_outputs(self):
return get_local_model_metrics(self._variables)
@property
def federated_output_computation(self):
return aggregate_model_metrics_across_clients
@tf.function
def report_local_unfinalized_metrics(self) -> OrderedDict[str, List[tf.Tensor]]:
"""Creates an `OrderedDict` of metric names to unfinalized values."""
return collections.OrderedDict(
num_examples=[self._variables.num_examples],
loss=[self._variables.loss_sum, self._variables.num_examples],
accuracy=[self._variables.accuracy_sum, self._variables.num_examples])
def metric_finalizers(self) -> OrderedDict[str, Callable[[List[tf.Tensor]], tf.Tensor]]:
"""Creates an `OrderedDict` of metric names to finalizers."""
return collections.OrderedDict(
num_examples=tf.function(func=lambda x: x[0]),
loss=tf.function(func=lambda x: x[0] / x[1]),
accuracy=tf.function(func=lambda x: x[0] / x[1]))
iterative_process = tff.learning.algorithms.build_weighted_fed_avg (
IOTModel,
client_optimizer_fn=lambda: tf.keras.optimizers.Adam(learning_rate=0.001))
state = iterative_process.initialize()
state, metrics = iterative_process.next(state, federated_train_data)
print('round 1, metrics={}'.format(metrics))
for round_num in range(2, 11):
state, metrics = iterative_process.next(state, federated_train_data)
print('round {:2d}, metrics={}'.format(round_num, metrics))
evaluation = tff.learning.algorithms.build_weighted_fed_avg(IOTModel)
**str(evaluation.get_model_weights)** //Error is Here
train_metrics = evaluation(metrics.state, federated_train_data)
str(train_metrics)
NUM_CLIENTS = len(np.unique(test_df[client_id_colname]))
sample_clients = test_data.client_ids[0:NUM_CLIENTS]
federated_test_data = make_federated_data(test_data, sample_clients)
len(federated_test_data), federated_test_data[0]
test_metrics = evaluation(state.model, federated_test_data)
The
metrics
object returned byiterative_process.next
used in training does not have astate
attribute, hence the error is being raised on the following line:The definition of
evaluation
may need to be changed, in that it is being defined as a training method (FedAvg, not an evaluation method) on this line:I would suggest following the tutorial at Federated Learning for Image Classification, particularly looking at the API usages of
tff.learning.algorithms.build_weighted_fed_avg
for training, andtff.learning.algorithms.build_fed_eval
for evaluation.