Ho scritto uno script in Python per accedere a un sito Web e analizzare il nome utente per assicurarmi di essere davvero in grado di accedere. L'uso del modo che ho provato di seguito sembra portarmi lì. Tuttavia, ho usato i cookie codificati presi dagli strumenti di sviluppo di Chrome all'interno dello script per avere successo.
Ho provato con:
import requests
from bs4 import BeautifulSoup
url = 'https://secure.imdb.com/ap/signin?openid.pape.max_auth_age=0&openid.return_to=https%3A%2F%2Fwww.imdb.com%2Fap-signin-handler&openid.identity=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0%2Fidentifier_select&openid.assoc_handle=imdb_pro_us&openid.mode=checkid_setup&siteState=eyJvcGVuaWQuYXNzb2NfaGFuZGxlIjoiaW1kYl9wcm9fdXMiLCJyZWRpcmVjdFRvIjoiaHR0cHM6Ly9wcm8uaW1kYi5jb20vIn0&openid.claimed_id=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0%2Fidentifier_select&openid.ns=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0'
signin = 'https://secure.imdb.com/ap/signin'
mainurl = 'https://pro.imdb.com/'
with requests.Session() as s:
res = s.get(url,headers={"User-agent":"Mozilla/5.0"})
soup = BeautifulSoup(res.text,"lxml")
payload = {i['name']: i.get('value', '') for i in soup.select('input[name]')}
payload['email'] = 'some username'
payload['password'] = 'some password'
s.post(signin,data=payload,headers={
"User-agent":"Mozilla/5.0",
"Cookie": 'adblk=adblk_yes; ubid-main=130-2884709-6520735; _msuuid_518k2z41603=95C56F3B-E3C1-40E5-A47B-C4F7BAF2FF5D; _fbp=fb.1.1574621403438.97041399; pa=BCYm5GYAag-hj1CWg3cPXjfv2X6NGPUp6kLguepMku7Yf0W9-iSTjgmVNGmQLwUfJ5XJPHqlh84f%0D%0Agrd2voq0Q7TR_rdXU4T1BJw-1a-DdvCNSVuWSm50IXJDC_H4-wM_Qli_%0D%0A; uu=BCYnANeBBdnuTg3UKEVGDiO203C7KR0AQTdyE9Y_Y70vpd04N5QZ2bD3RwWdMBNMAJtdbRbPZMpG%0D%0AbPpC6vZvoMDzucwsE7pTQiKxY24Gr4_-0ONm7hGKPfPbMwvI1NYzy5ZhTIyIUqeVAQ7geCBiS5NS%0D%0A1A%0D%0A; session-id=137-0235974-9052660; session-id-time=2205351554; session-token=jsvzgJ4JY/TCgodelKegvXcqdLyAy4NTDO5/iEvk90VA8qWWEPJpiiRYAZe3V0EYVFlKq590mXU0OU9XMbAzwyKqXIzPLzKfLf3Cc3k0g/VQNTo6roAEa5IxmOGZjWrJuhkRZ1YgeF5uPZLcatWF1y5PFHqvjaDxQrf2LZbgRXF5N7vacTZ8maK0ciJmQEjh; csm-hit=tb:8HH0DWNBDVSWP881GYKG+s-8HH0DWNBDVSWP881GYKG|1574631571950&t:1574631571952&adb:adblk_yes'
})
r = s.get(mainurl,headers={
"Cookie": 'adblk=adblk_yes; ubid-main=130-2884709-6520735; _msuuid_518k2z41603=95C56F3B-E3C1-40E5-A47B-C4F7BAF2FF5D; _fbp=fb.1.1574621403438.97041399; pa=BCYm5GYAag-hj1CWg3cPXjfv2X6NGPUp6kLguepMku7Yf0W9-iSTjgmVNGmQLwUfJ5XJPHqlh84f%0D%0Agrd2voq0Q7TR_rdXU4T1BJw-1a-DdvCNSVuWSm50IXJDC_H4-wM_Qli_%0D%0A; csm-hit=tb:KV47B1QVKP4DNB3QGY95+b-NM69W1Y35R7ARV0639V5|1574631544432&t:1574631544432&adb:adblk_yes; session-id=137-0235974-9052660; session-id-time=2205351554; session-token="EsIzROiSTmFDfXd5jnBPIBOpYG9jAu7tiWXDF8R52sUw5jS6OjddfOOQB+ytCmq0K3UnXs9wKBvQtkB4aVNsXieVbRcIUrKf3iPnYeJchbOlShMjg+MR+O7IQgPKkw0BKihdYQ1YIl7KQS8VeLxZjtzJ5sj5ocnY72fCKdwq/fGOjfieFYbe9Km3a8h++1GpC738JbwcVdpTG08v1pjhQKifqPQXnqhcyVKhi8CD1qk="; x-main="C1KbtQgFFBAYfwttdRSrU5CpCe@Fn6SPHnBTY6dO2ppimt@u1P1L7G0PueQMn6X3"; at-main=Atza|IwEBICfS3UKNp2mwmbyUPY1QzjXRHMcL6fjv2ND7BDXsZ1G-qDPJKsLJXeU9gJOvRpWsofSpOJCyhnap-bIOWCutU6VMIS9bn3UkNVRP8WFVqrs-CLB5opLbrEx6YxVGQlfaxx54gzuuGO4D30z-AgBpGe64_bn0K1iLOT3P3i7S3nBzvP_0AopwKlbU7SRnE5m21cVfVK7bwbtfZO4cf7DrpGcaHK4dlY5jKHPzNx_AR4ypqsEBFbHon36N1j8foty6wLJhFP1gNCvs24mVCec24TRho5ZXFDYqhLB-dw9V3XY1eq7q1QNgtAdYkDSJ6Mq1nllFu59WqIVs1Y3lLEaxDUExLtCt-VQArpS_hZtZR8C_kevhV01jEhWg8RUQaCdYTMwZHwa778MiEOrrrdGqFnR5; sess-at-main="tWwUfkZLx+mDAPqZo+J6yJlnjqBJvYJ0oVMS6/NcIKQ="; id=BCYhnxuM-3g3WFo4uvCv6C5LdGLJKaIcZj8E-rQwU_YsF991I3Tqe94W6IlU27FvaNcnuCyv5Te3%0D%0A0c3O1mMYhEE14wMdByo2SvGXkBS0A4oFMJMEIe0aC1X4fyNRwWYNZ72a6NDzAOqeDQi3_7sZZGH8%0D%0AxQ%0D%0A; uu=BCYsGSOaee6VbhMOMXpG3F_6i7cTIkPCN0S0_Jv7c3bVkUQ5gp9vqtfvVlOMOIOqXv-uHSTSibBp%0D%0ATO1e4tRpT1DolY2qkoOW8yICF7ZrXqAgont_ShTy8zVEg1wxWCxg3_XQX8r8_dGFCO4NWZiyLH-f%0D%0A2RpBF2IJLUSd8R4UCbbbtgo%0D%0A; sid=BCYp9inRAYR9sJgmF1FcA9Vgto81vmiCYHP_gEVv6r2ZdBtz1bKtOQg4_0iSwREudsZrPM8SHMUk%0D%0A5jFMp74veGrdwNTf8DONXPUCExLgkHzfeoZr-KHf4VbI7aI5TrJhqSioYbEhHYqm6q5RGrXfCVPr%0D%0AqA%0D%0A'
})
sauce = BeautifulSoup(r.text,"lxml")
name = sauce.select_one("span.display-name").text
print(name)
Ho provato con quanto segue per vedere se funziona per evitare l'uso di cookie codificati ma purtroppo non è riuscito:
cookie_string = "; ".join([str(x)+"="+str(y) for x,y in s.cookies.items()])
Ecco come ho provato automaticamente:
cookie_string = "; ".join([str(x)+"="+str(y) for x,y in s.cookies.items()])
s.post(signin,data=payload,headers={
"User-agent":"Mozilla/5.0",
"Cookie": cookie_string
})
cookie_string_ano = "; ".join([str(x)+"="+str(y) for x,y in s.cookies.items()])
r = s.get(mainurl,headers={
"Cookie": cookie_string_ano
})
Quando ho provato a usare sopra posso vedere che cookie_string
, cookie_string_ano
stanno producendo session-id=130-0171771-5726549; session-id-time=2205475101l
e session-id=130-0171771-5726549; session-id-time=2205475101l; ubid-main=135-8050026-6353151
.
Come posso recuperare il nome utente senza utilizzare i cookie codificati nello script?
session-id
e session-id-time
ed i loro valori nelle cookie, ma in realtà ci sono molti di più in quelle hardcoded @Simas Joneliunas.
metadata1
parametro, che sembra essere js generato, ma non posso esserne sicuro.
print(s.cookies.items())
? Sei sicuro di ricevere tutti i cookie necessaris.get(url)
?