AttributeError: Can't get attribute 'tokenizer' on <module '__main__'

71 Views Asked by At

I am trying to use y machine learning model in my Django but I have been getting so many error, and this is my first time of doing something like this. I need help

the following is my view code


# BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))
nltk.download('punkt')


logger = logging.getLogger(__name__)
# Load your AI model
current_directory = os.path.dirname(os.path.abspath(__file__))
model_file_path = os.path.join(current_directory, 'ml_model/phishing_url_model.joblib')
tfidf_vectorizer_path = os.path.join(current_directory, 'ml_model/tfidf_vectorizer.joblib')


# tfidf_vectorizer = joblib.load(tfidf_vectorizer_path) 
# model = joblib.load(model_file_path) 

class UnregisteredScanCreate(generics.CreateAPIView):
    serializer_class = UnregisteredScanSerializer

    def tokenizer(self, url):
        # Your tokenizer code here
        """Separates feature words from the raw data
        Keyword arguments:
        url ---- The full URL

        :Returns -- The tokenized words; returned as a list
        """

        tokens = re.split('[/-]', url)

        for i in tokens:
            if i.find(".") >= 0:
                dot_split = i.split('.')
                if "com" in dot_split:
                    dot_split.remove("com")
                if "www" in dot_split:
                    dot_split.remove("www")
                tokens += dot_split

        return tokens

    def create(self, request, *args, **kwargs):
        try:
            # Load the trained TF-IDF vectorizer
            tfidf_vectorizer = joblib.load(tfidf_vectorizer_path)

            # Load the trained model
            model = joblib.load(model_file_path)

            # Get the URL from the request data
            url = request.data.get("url", "")

            if not url:
                return Response({"error": "URL is required"}, status=status.HTTP_400_BAD_REQUEST)

            # Transform the URL into a numeric format using the fitted vectorizer
            url_features = tfidf_vectorizer.transform([url])

            # Predict the class (bad or good)
            prediction = model.predict(url_features)

            if prediction[0] == 'bad':
                url_status = "This is a Phishing Site"
            else:
                url_status = "This is not a Phishing Site"

            response_data = {
                'url': url,
                'url_status': url_status
            }

            return Response(response_data, status=status.HTTP_200_OK)

        except Exception as e:
            return Response({'error': str(e)}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)

It seems like the error is coming from the tokenize function that i defined. how can i sort this out, I have been on this for 2 days now

0

There are 0 best solutions below