initial commit

This commit is contained in:
2025-08-06 20:16:29 +02:00
commit fd2e5cc112
3 changed files with 156 additions and 0 deletions

103
login.py Executable file
View File

@ -0,0 +1,103 @@
# **************************************************************************** #
# #
# ::: :::::::: #
# login.py :+: :+: :+: #
# +:+ +:+ +:+ #
# By: tomoron <tomoron@student.42angouleme.fr> +#+ +:+ +#+ #
# +#+#+#+#+#+ +#+ #
# Created: 2024/11/25 16:22:08 by tomoron #+# #+# #
# Updated: 2025/06/03 03:51:21 by tomoron ### ########.fr #
# #
# **************************************************************************** #
import requests
import time
import subprocess
import os
import re
import json
import urllib.parse
from getpass import getpass
from bs4 import BeautifulSoup
from collections import defaultdict
class Intra42():
user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.6167.160 Safari/537.36"
def __init__(self):
self.s = requests.Session()
self.s.headers.update({"User-Agent": Intra42.user_agent})
#load refresh token
data = {}
self.load_session()
self.login_user()
def load_session():
if(not os.path.isfile('.cookies.json')):
return
with open(".cookies.json", 'r') as f:
data = json.loads(f.read())
for domain, domain_cookies in data.items():
for cookie_name, cookie_value in domain_cookies.items():
new_cookie = requests.cookies.create_cookie(domain = domain, name = cookie_name, value = cookie_value)
self.s.cookies.set_cookie(new_cookie)
def login_user(self):
response = self.s.get("https://intra.42.fr/")
if(response.url == 'https://profile-v3.intra.42.fr/'):
return;
if(response.url == 'https://profile.intra.42.fr/'):
return;
page = BeautifulSoup(response.text, "html.parser")
form_html = page.find_all("form",id="kc-form-login")[0]
form_start = re.compile("^<form.*>").findall(str(form_html))[0]
url = form_start.split(' ')[1].split("\"")[1]
username = input("login : ")
password = getpass("password : ")
username = urllib.parse.quote(username)
password = urllib.parse.quote(password)
req_body = f'username={username}&password={password}&rememberMe=on&credentialId='
url = url.replace("amp;", "")
headers = { "Content-Type": "application/x-www-form-urlencoded"}
response = self.s.post(url, data=req_body ,headers=headers,allow_redirects=False)
if(response.status_code == 200):
print("invalid username or password")
exit()
print("sucessfuly logged in")
cookies = defaultdict(dict)
for cookie in self.s.cookies:
cookies[cookie.domain][cookie.name] = cookie.value
with open(".cookies.json", 'w') as f:
f.write(json.dumps(dict(cookies)))
def request_url(self, url, allow_redirects=False):
res = self.s.get(url, allow_redirects=allow_redirects)
if(res.status_code == 302):
login_intra()
return(self.request_url(url))
return(res)
def get_intra_home(self):
response = self.request_url("https://profile.intra.42.fr/")
return(response.text)
def get_goals(self):
response = self.request_url("https://profile.intra.42.fr/users/me/goals?cursus=42cursus")
return(response.text)
def get_project_page(self, slug):
response = self.request_url(f"https://projects.intra.42.fr/projects/{slug}")
return(response.text)

49
scrap.py Normal file
View File

@ -0,0 +1,49 @@
# **************************************************************************** #
# #
# ::: :::::::: #
# scrap.py :+: :+: :+: #
# +:+ +:+ +:+ #
# By: tomoron <tomoron@student.42angouleme.fr> +#+ +:+ +#+ #
# +#+#+#+#+#+ +#+ #
# Created: 2024/11/25 16:39:19 by tomoron #+# #+# #
# Updated: 2024/11/26 14:54:43 by tomoron ### ########.fr #
# #
# **************************************************************************** #
from login import Intra42
from getpass import getpass
from tqdm import tqdm
import re
import json
import threading
def getUrl(project_list, x, checkPrev):
page_data = connIntra.get_project_page(project_list[x][0])
subject_url_line = [x for x in page_data.split("\n") if "https://cdn.intra.42.fr/pdf/pdf" in x][0]
p = re.compile("([0-9]{3,10})")
subject_url = p.search(subject_url_line).group(1)
if(checkPrev and subject_url == project_list[x][2]):
project_list[x].append(None);
else:
project_list[x].append(subject_url);
def getUrls(project_list, connIntra, checkPrev):
threads = []
for x in range(len(project_list)):
threads.append(threading.Thread(target=getUrl, args=(project_list, x, checkPrev)))
threads[-1].start()
for t in tqdm(range(len(threads))):
#for t in range(len(threads)):
threads[t].join()
with open("intra_projects.json", 'r') as f:
project_list = json.loads(f.read())
connIntra = Intra42()
input("set language to english and press enter (fuck l'intra)...")
getUrls(project_list, connIntra, False)
input("set language to french and press enter (fuck l'intra)...")
getUrls(project_list, connIntra, True)
with open("result.json", 'w') as f:
f.write(json.dumps(project_list, indent=4))

4
test.py Normal file
View File

@ -0,0 +1,4 @@
from login import Intra42
from getpass import getpass
a = Intra42()