Bom dia Icaro, 
Segue a arvore do projeto. 
import PyPDF2
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from collections import Counter
import os
def extract_text_from_pdf(pdf_path):
    with open(pdf_path, 'rb') as file:
        pdf_reader = PyPDF2.PdfFileReader(file)
        text = ''
        for page_num in range(pdf_reader.numPages):
            text += pdf_reader.getPage(page_num).extractText()
    return text
def clean_and_tokenize(text):
    # Tokenização e remoção de stopwords
    tokens = word_tokenize(text)
    stop_words = set(stopwords.words('english'))  # Pode ser necessário ajustar o idioma
    tokens = [word.lower() for word in tokens if word.isalnum() and word.lower() not in stop_words]
    return tokens
def analyze_swot(tokens):
    # Contagem de palavras para análise SWOT
    counter = Counter(tokens)
    strengths = counter.most_common(5)  # 5 palavras mais comuns como forças
    weaknesses = counter.most_common()[:-6:-1]  # 5 palavras menos comuns como fraquezas
return strengths, weaknesses
def main(pdf_path):
    text = extract_text_from_pdf(pdf_path)
    tokens = clean_and_tokenize(text)
strengths, weaknesses = analyze_swot(tokens)
print("Forças:")
for word, count in strengths:
    print(f"{word}: {count}")
print("\nFraquezas:")
for word, count in weaknesses:
    print(f"{word}: {count}")
if name == "main":
    pdf_path = r"C:/Users/oleperei/Downloads/FLS_report-9m-2023.pdf"    if os.path.exists(pdf_path):
        main(pdf_path)
    else:
        print(f"Erro: O arquivo não existe - {pdf_path}")