thescientist.py

Created by alban-scientifique

Created on June 18, 2023

5.94 KB

Mon script est un chatbot qui répond à des questions scientifiques…


import requests
from bs4 import BeautifulSoup
import wikipediaapi
from googlesearch import search
import stackapi
import wptools
from transformers import T5Tokenizer, T5ForConditionalGeneration, pipeline, ElectraTokenizer, ElectraForQuestionAnswering, GPT2Tokenizer, GPT2LMHeadModel, RobertaTokenizer, RobertaForQuestionAnswering, DistilBertTokenizer, DistilBertForQuestionAnswering
from langdetect import detect
import tensorflow as tf

# Import des modules nécessaires
from langdetect import detect

# Détection automatique de la langue de la question
def detect_language(question):
    return detect(question)

# Résumé des réponses des différents modèles
def summarize_responses(responses):
    return '\n'.join(responses)

# Fonction pour répondre à une question
def answer_question(question):
    responses = []

    # Détecter la langue de la question
    lang = detect_language(question)

    # Obtenir une réponse à partir de Wikipedia
    if lang == 'fr':
        wiki_page_fr = wikipedia_fr.page(question)
        if wiki_page_fr.exists():
            responses.append({'model': 'Wikipedia (FR)', 'answer': wiki_page_fr.summary})

    elif lang == 'en':
        wiki_page_en = wikipedia_en.page(question)
        if wiki_page_en.exists():
            responses.append({'model': 'Wikipedia (EN)', 'answer': wiki_page_en.summary})

    # Obtenir une réponse à partir de Stack Overflow
    stackoverflow_results = stack_exchange.fetch('questions', q=question)
    if stackoverflow_results['items']:
        for item in stackoverflow_results['items']:
            responses.append({'model': 'Stack Overflow', 'answer': item['title']})

    # Obtenir une réponse à partir de Trust My Science
    trust_my_science_response = requests.get(trust_my_science_url, params={'s': question})
    trust_my_science_soup = BeautifulSoup(trust_my_science_response.text, 'html.parser')
    trust_my_science_articles = trust_my_science_soup.select('.post-header h2 a')
    if trust_my_science_articles:
        article_title = trust_my_science_articles[0].text
        responses.append({'model': 'Trust My Science', 'answer': article_title})

    # Obtenir une réponse à partir de Futura Sciences
    futura_sciences_response = requests.get(futura_sciences_url, params={'q': question})
    futura_sciences_soup = BeautifulSoup(futura_sciences_response.text, 'html.parser')
    futura_sciences_articles = futura_sciences_soup.select('.highlight h2 a')
    if futura_sciences_articles:
        article_title = futura_sciences_articles[0].text
        responses.append({'model': 'Futura Sciences', 'answer': article_title})

    # Obtenir une réponse à partir de National Geographic
    national_geographic_response = requests.get(national_geographic_url, params={'q': question})
    national_geographic_soup = BeautifulSoup(national_geographic_response.text, 'html.parser')
    national_geographic_articles = national_geographic_soup.select('.comp-SearchResults li a')
    if national_geographic_articles:
        article_title = national_geographic_articles[0].text
        responses.append({'model': 'National Geographic', 'answer': article_title})

    # Obtenir une réponse à partir de Nature
    nature_response = requests.get(nature_url, params={'q': question})
    nature_soup = BeautifulSoup(nature_response.text, 'html.parser')
    nature_articles = nature_soup.select('.title a')
    if nature_articles:
        article_title = nature_articles[0].text
        responses.append({'model': 'Nature', 'answer': article_title})

    # Obtenir une réponse à partir de ScienceDirect
    sciencedirect_response = requests.get(sciencedirect_url, params={'qs': question})
    sciencedirect_soup = BeautifulSoup(sciencedirect_response.text, 'html.parser')
    sciencedirect_articles = sciencedirect_soup.select('.result-item-content h2 a')
    if sciencedirect_articles:
        article_title = sciencedirect_articles[0].text
        responses.append({'model': 'ScienceDirect', 'answer': article_title})

    # Obtenir une réponse à partir du CNRS
    cnrs_results = recherche_cnrs(question)
    if cnrs_results:
        for result in cnrs_results:
            responses.append({'model': 'CNRS', 'answer': result['titre']})

    return summarize_responses(responses)

# Fonction pour rechercher sur le site du CNRS
def recherche_cnrs(mot_cle):
    url = f"https://www.cnrs.fr/chercheurs-et-laboratoires/resultats-de-recherche/?tx_solr[q]={mot_cle}"
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')

    resultats = []
    articles = soup.find_all('article')
    for article in articles:
        titre = article.find('h2').text.strip()
        description = article.find('p').text.strip()
        lien = article.find('a')['href']
        resultats.append({'titre': titre, 'description': description, 'lien': lien})

    return resultats

# Initialisation des modèles et des tokenizers
tokenizer_t5 = T5Tokenizer.from_pretrained('t5-base')
model_t5 = T5ForConditionalGeneration.from_pretrained('t5-base')

tokenizer_electra = ElectraTokenizer.from_pretrained('google/electra-base-discriminator')
model_electra = ElectraForQuestionAnswering.from_pretrained('google/electra-base-discriminator')

tokenizer_gpt2 = GPT2Tokenizer.from_pretrained('gpt2')
model_gpt2 = GPT2LMHeadModel.from_pretrained('gpt2')

tokenizer_roberta = RobertaTokenizer.from_pretrained('roberta-base')
model_roberta_fr = RobertaForQuestionAnswering.from_pretrained('camembert/camembert-base')
model_roberta_en = RobertaForQuestionAnswering.from_pretrained('deepset/roberta-base-squad2')

tokenizer_distilbert = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
model_distilbert_fr = DistilBertForQuestionAnswering.from_pretrained('distilbert-base-uncased-distilled-squad')
model_distilbert_en = DistilBertForQuestionAnswering.from_pretrained('distilbert-base-uncased-distilled-squad')

# Utilisation des fonctions
question = "Quelle est la température d'ébullition de l'eau ?"
response = answer_question(question)
print(response)

During your visit to our site, NumWorks needs to install "cookies" or use other technologies to collect data about you in order to:

With the exception of Cookies essential to the operation of the site, NumWorks leaves you the choice: you can accept Cookies for audience measurement by clicking on the "Accept and continue" button, or refuse these Cookies by clicking on the "Continue without accepting" button or by continuing your browsing. You can update your choice at any time by clicking on the link "Manage my cookies" at the bottom of the page. For more information, please consult our cookies policy.