import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer, WordNetLemmatizer
import string
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')
def text_preprocessing(text):
# Tokenization
tokens = word_tokenize(text)
# Lowercasing
tokens = [token.lower() for token in tokens]
# Removing punctuation
tokens = [token for token in tokens if token not in string.punctuation]
# Stop word removal
stop_words = set(stopwords.words('english'))
tokens = [token for token in tokens if token not in stop_words]
# Stemming
stemmer = PorterStemmer()
tokens = [stemmer.stem(token) for token in tokens]
# Lemmatization
lemmatizer = WordNetLemmatizer()
tokens = [lemmatizer.lemmatize(token) for token in tokens]
return tokens
# Example usage
text = "The quick brown fox jumps over the lazy dog."
processed_text = text_preprocessing(text)
print(processed_text)