initial commit
This commit is contained in:
103
login.py
Executable file
103
login.py
Executable file
@ -0,0 +1,103 @@
|
|||||||
|
# **************************************************************************** #
|
||||||
|
# #
|
||||||
|
# ::: :::::::: #
|
||||||
|
# login.py :+: :+: :+: #
|
||||||
|
# +:+ +:+ +:+ #
|
||||||
|
# By: tomoron <tomoron@student.42angouleme.fr> +#+ +:+ +#+ #
|
||||||
|
# +#+#+#+#+#+ +#+ #
|
||||||
|
# Created: 2024/11/25 16:22:08 by tomoron #+# #+# #
|
||||||
|
# Updated: 2025/06/03 03:51:21 by tomoron ### ########.fr #
|
||||||
|
# #
|
||||||
|
# **************************************************************************** #
|
||||||
|
|
||||||
|
import requests
|
||||||
|
import time
|
||||||
|
import subprocess
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import json
|
||||||
|
import urllib.parse
|
||||||
|
from getpass import getpass
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
from collections import defaultdict
|
||||||
|
|
||||||
|
|
||||||
|
class Intra42():
|
||||||
|
user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.6167.160 Safari/537.36"
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.s = requests.Session()
|
||||||
|
self.s.headers.update({"User-Agent": Intra42.user_agent})
|
||||||
|
|
||||||
|
#load refresh token
|
||||||
|
data = {}
|
||||||
|
self.load_session()
|
||||||
|
self.login_user()
|
||||||
|
|
||||||
|
def load_session():
|
||||||
|
if(not os.path.isfile('.cookies.json')):
|
||||||
|
return
|
||||||
|
|
||||||
|
with open(".cookies.json", 'r') as f:
|
||||||
|
data = json.loads(f.read())
|
||||||
|
|
||||||
|
for domain, domain_cookies in data.items():
|
||||||
|
for cookie_name, cookie_value in domain_cookies.items():
|
||||||
|
new_cookie = requests.cookies.create_cookie(domain = domain, name = cookie_name, value = cookie_value)
|
||||||
|
self.s.cookies.set_cookie(new_cookie)
|
||||||
|
|
||||||
|
|
||||||
|
def login_user(self):
|
||||||
|
response = self.s.get("https://intra.42.fr/")
|
||||||
|
|
||||||
|
if(response.url == 'https://profile-v3.intra.42.fr/'):
|
||||||
|
return;
|
||||||
|
if(response.url == 'https://profile.intra.42.fr/'):
|
||||||
|
return;
|
||||||
|
|
||||||
|
page = BeautifulSoup(response.text, "html.parser")
|
||||||
|
form_html = page.find_all("form",id="kc-form-login")[0]
|
||||||
|
form_start = re.compile("^<form.*>").findall(str(form_html))[0]
|
||||||
|
url = form_start.split(' ')[1].split("\"")[1]
|
||||||
|
|
||||||
|
username = input("login : ")
|
||||||
|
password = getpass("password : ")
|
||||||
|
|
||||||
|
username = urllib.parse.quote(username)
|
||||||
|
password = urllib.parse.quote(password)
|
||||||
|
|
||||||
|
req_body = f'username={username}&password={password}&rememberMe=on&credentialId='
|
||||||
|
url = url.replace("amp;", "")
|
||||||
|
headers = { "Content-Type": "application/x-www-form-urlencoded"}
|
||||||
|
|
||||||
|
response = self.s.post(url, data=req_body ,headers=headers,allow_redirects=False)
|
||||||
|
if(response.status_code == 200):
|
||||||
|
print("invalid username or password")
|
||||||
|
exit()
|
||||||
|
print("sucessfuly logged in")
|
||||||
|
|
||||||
|
cookies = defaultdict(dict)
|
||||||
|
for cookie in self.s.cookies:
|
||||||
|
cookies[cookie.domain][cookie.name] = cookie.value
|
||||||
|
|
||||||
|
with open(".cookies.json", 'w') as f:
|
||||||
|
f.write(json.dumps(dict(cookies)))
|
||||||
|
|
||||||
|
def request_url(self, url, allow_redirects=False):
|
||||||
|
res = self.s.get(url, allow_redirects=allow_redirects)
|
||||||
|
if(res.status_code == 302):
|
||||||
|
login_intra()
|
||||||
|
return(self.request_url(url))
|
||||||
|
return(res)
|
||||||
|
|
||||||
|
def get_intra_home(self):
|
||||||
|
response = self.request_url("https://profile.intra.42.fr/")
|
||||||
|
return(response.text)
|
||||||
|
|
||||||
|
def get_goals(self):
|
||||||
|
response = self.request_url("https://profile.intra.42.fr/users/me/goals?cursus=42cursus")
|
||||||
|
return(response.text)
|
||||||
|
|
||||||
|
def get_project_page(self, slug):
|
||||||
|
response = self.request_url(f"https://projects.intra.42.fr/projects/{slug}")
|
||||||
|
return(response.text)
|
49
scrap.py
Normal file
49
scrap.py
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
# **************************************************************************** #
|
||||||
|
# #
|
||||||
|
# ::: :::::::: #
|
||||||
|
# scrap.py :+: :+: :+: #
|
||||||
|
# +:+ +:+ +:+ #
|
||||||
|
# By: tomoron <tomoron@student.42angouleme.fr> +#+ +:+ +#+ #
|
||||||
|
# +#+#+#+#+#+ +#+ #
|
||||||
|
# Created: 2024/11/25 16:39:19 by tomoron #+# #+# #
|
||||||
|
# Updated: 2024/11/26 14:54:43 by tomoron ### ########.fr #
|
||||||
|
# #
|
||||||
|
# **************************************************************************** #
|
||||||
|
|
||||||
|
from login import Intra42
|
||||||
|
from getpass import getpass
|
||||||
|
from tqdm import tqdm
|
||||||
|
import re
|
||||||
|
import json
|
||||||
|
import threading
|
||||||
|
|
||||||
|
def getUrl(project_list, x, checkPrev):
|
||||||
|
page_data = connIntra.get_project_page(project_list[x][0])
|
||||||
|
subject_url_line = [x for x in page_data.split("\n") if "https://cdn.intra.42.fr/pdf/pdf" in x][0]
|
||||||
|
p = re.compile("([0-9]{3,10})")
|
||||||
|
subject_url = p.search(subject_url_line).group(1)
|
||||||
|
if(checkPrev and subject_url == project_list[x][2]):
|
||||||
|
project_list[x].append(None);
|
||||||
|
else:
|
||||||
|
project_list[x].append(subject_url);
|
||||||
|
|
||||||
|
def getUrls(project_list, connIntra, checkPrev):
|
||||||
|
threads = []
|
||||||
|
for x in range(len(project_list)):
|
||||||
|
threads.append(threading.Thread(target=getUrl, args=(project_list, x, checkPrev)))
|
||||||
|
threads[-1].start()
|
||||||
|
for t in tqdm(range(len(threads))):
|
||||||
|
#for t in range(len(threads)):
|
||||||
|
threads[t].join()
|
||||||
|
|
||||||
|
with open("intra_projects.json", 'r') as f:
|
||||||
|
project_list = json.loads(f.read())
|
||||||
|
|
||||||
|
connIntra = Intra42()
|
||||||
|
input("set language to english and press enter (fuck l'intra)...")
|
||||||
|
getUrls(project_list, connIntra, False)
|
||||||
|
input("set language to french and press enter (fuck l'intra)...")
|
||||||
|
getUrls(project_list, connIntra, True)
|
||||||
|
|
||||||
|
with open("result.json", 'w') as f:
|
||||||
|
f.write(json.dumps(project_list, indent=4))
|
Reference in New Issue
Block a user