I am trying to use y machine learning model in my Django but I have been getting so many error, and this is my first time of doing something like this. I need help
the following is my view code
# BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))
nltk.download('punkt')
logger = logging.getLogger(__name__)
# Load your AI model
current_directory = os.path.dirname(os.path.abspath(__file__))
model_file_path = os.path.join(current_directory, 'ml_model/phishing_url_model.joblib')
tfidf_vectorizer_path = os.path.join(current_directory, 'ml_model/tfidf_vectorizer.joblib')
# tfidf_vectorizer = joblib.load(tfidf_vectorizer_path)
# model = joblib.load(model_file_path)
class UnregisteredScanCreate(generics.CreateAPIView):
serializer_class = UnregisteredScanSerializer
def tokenizer(self, url):
# Your tokenizer code here
"""Separates feature words from the raw data
Keyword arguments:
url ---- The full URL
:Returns -- The tokenized words; returned as a list
"""
tokens = re.split('[/-]', url)
for i in tokens:
if i.find(".") >= 0:
dot_split = i.split('.')
if "com" in dot_split:
dot_split.remove("com")
if "www" in dot_split:
dot_split.remove("www")
tokens += dot_split
return tokens
def create(self, request, *args, **kwargs):
try:
# Load the trained TF-IDF vectorizer
tfidf_vectorizer = joblib.load(tfidf_vectorizer_path)
# Load the trained model
model = joblib.load(model_file_path)
# Get the URL from the request data
url = request.data.get("url", "")
if not url:
return Response({"error": "URL is required"}, status=status.HTTP_400_BAD_REQUEST)
# Transform the URL into a numeric format using the fitted vectorizer
url_features = tfidf_vectorizer.transform([url])
# Predict the class (bad or good)
prediction = model.predict(url_features)
if prediction[0] == 'bad':
url_status = "This is a Phishing Site"
else:
url_status = "This is not a Phishing Site"
response_data = {
'url': url,
'url_status': url_status
}
return Response(response_data, status=status.HTTP_200_OK)
except Exception as e:
return Response({'error': str(e)}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
It seems like the error is coming from the tokenize function that i defined. how can i sort this out, I have been on this for 2 days now