È possibile ottenere il codice di risposta a una richiesta http utilizzando Selenium e Chrome o Firefox. Tutto quello che devi fare è avviare Chrome o Firefox in modalità di registrazione. Ti mostrerò alcuni esempi di seguito.
java + Selenium + Chrome
Ecco un esempio di java + Selenium + Chrome, ma immagino che possa essere fatto in qualsiasi lingua (python, c #, ...).
Tutto quello che devi fare è dire a chromedriver di fare "Network.enable". Questo può essere fatto abilitando la registrazione delle prestazioni.
LoggingPreferences logPrefs = new LoggingPreferences();
logPrefs.enable(LogType.PERFORMANCE, Level.ALL);
cap.setCapability(CapabilityType.LOGGING_PREFS, logPrefs);
Dopo che la richiesta è stata eseguita, tutto ciò che devi fare è ottenere e iterare i log di Perfomance e trovare "Network.responseReceived" per l'URL richiesto:
LogEntries logs = driver.manage().logs().get("performance");
Ecco il codice:
import java.util.Iterator;
import java.util.logging.Level;
import org.json.JSONException;
import org.json.JSONObject;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.chrome.ChromeOptions;
import org.openqa.selenium.logging.LogEntries;
import org.openqa.selenium.logging.LogEntry;
import org.openqa.selenium.logging.LogType;
import org.openqa.selenium.logging.LoggingPreferences;
import org.openqa.selenium.remote.CapabilityType;
import org.openqa.selenium.remote.DesiredCapabilities;
public class TestResponseCode
{
public static void main(String[] args)
{
// simple page (without many resources so that the output is
// easy to understand
String url = "http://www.york.ac.uk/teaching/cws/wws/webpage1.html";
DownloadPage(url);
}
private static void DownloadPage(String url)
{
ChromeDriver driver = null;
try
{
ChromeOptions options = new ChromeOptions();
// add whatever extensions you need
// for example I needed one of adding proxy, and one for blocking
// images
// options.addExtensions(new File(file, "proxy.zip"));
// options.addExtensions(new File("extensions",
// "Block-image_v1.1.crx"));
DesiredCapabilities cap = DesiredCapabilities.chrome();
cap.setCapability(ChromeOptions.CAPABILITY, options);
// set performance logger
// this sends Network.enable to chromedriver
LoggingPreferences logPrefs = new LoggingPreferences();
logPrefs.enable(LogType.PERFORMANCE, Level.ALL);
cap.setCapability(CapabilityType.LOGGING_PREFS, logPrefs);
driver = new ChromeDriver(cap);
// navigate to the page
System.out.println("Navigate to " + url);
driver.navigate().to(url);
// and capture the last recorded url (it may be a redirect, or the
// original url)
String currentURL = driver.getCurrentUrl();
// then ask for all the performance logs from this request
// one of them will contain the Network.responseReceived method
// and we shall find the "last recorded url" response
LogEntries logs = driver.manage().logs().get("performance");
int status = -1;
System.out.println("\nList of log entries:\n");
for (Iterator<LogEntry> it = logs.iterator(); it.hasNext();)
{
LogEntry entry = it.next();
try
{
JSONObject json = new JSONObject(entry.getMessage());
System.out.println(json.toString());
JSONObject message = json.getJSONObject("message");
String method = message.getString("method");
if (method != null
&& "Network.responseReceived".equals(method))
{
JSONObject params = message.getJSONObject("params");
JSONObject response = params.getJSONObject("response");
String messageUrl = response.getString("url");
if (currentURL.equals(messageUrl))
{
status = response.getInt("status");
System.out.println(
"---------- bingo !!!!!!!!!!!!!! returned response for "
+ messageUrl + ": " + status);
System.out.println(
"---------- bingo !!!!!!!!!!!!!! headers: "
+ response.get("headers"));
}
}
} catch (JSONException e)
{
// TODO Auto-generated catch block
e.printStackTrace();
}
}
System.out.println("\nstatus code: " + status);
} finally
{
if (driver != null)
{
driver.quit();
}
}
}
}
L'output è simile a questo:
Navigate to http://www.york.ac.uk/teaching/cws/wws/webpage1.html
List of log entries:
{"webview":"3b8eaedb-bd0f-4baa-938d-4aee4039abfe","message":{"method":"Page.frameAttached","params":{"parentFrameId":"172.1","frameId":"172.2"}}}
{"webview":"3b8eaedb-bd0f-4baa-938d-4aee4039abfe","message":{"method":"Page.frameStartedLoading","params":{"frameId":"172.2"}}}
{"webview":"3b8eaedb-bd0f-4baa-938d-4aee4039abfe","message":{"method":"Page.frameNavigated","params":{"frame":{"securityOrigin":"://","loaderId":"172.1","name":"chromedriver dummy frame","id":"172.2","mimeType":"text/html","parentId":"172.1","url":"about:blank"}}}}
{"webview":"3b8eaedb-bd0f-4baa-938d-4aee4039abfe","message":{"method":"Page.frameStoppedLoading","params":{"frameId":"172.2"}}}
{"webview":"3b8eaedb-bd0f-4baa-938d-4aee4039abfe","message":{"method":"Page.frameStartedLoading","params":{"frameId":"3928.1"}}}
{"webview":"3b8eaedb-bd0f-4baa-938d-4aee4039abfe","message":{"method":"Network.requestWillBeSent","params":{"request":{"headers":{"Upgrade-Insecure-Requests":"1","User-Agent":"Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36"},"initialPriority":"VeryHigh","method":"GET","mixedContentType":"none","url":"http://www.york.ac.uk/teaching/cws/wws/webpage1.html"},"frameId":"3928.1","requestId":"3928.1","documentURL":"http://www.york.ac.uk/teaching/cws/wws/webpage1.html","initiator":{"type":"other"},"loaderId":"3928.1","wallTime":1.47619492749007E9,"type":"Document","timestamp":20226.652971}}}
{"webview":"3b8eaedb-bd0f-4baa-938d-4aee4039abfe","message":{"method":"Network.responseReceived","params":{"frameId":"3928.1","requestId":"3928.1","response":{"headers":{"Accept-Ranges":"bytes","Keep-Alive":"timeout=4, max=100","Cache-Control":"max-age=300","Server":"Apache/2.2.22 (Ubuntu)","Connection":"Keep-Alive","Content-Encoding":"gzip","Vary":"Accept-Encoding","Expires":"Tue, 11 Oct 2016 14:13:47 GMT","Content-Length":"1957","Date":"Tue, 11 Oct 2016 14:08:47 GMT","Content-Type":"text/html"},"connectionReused":false,"timing":{"pushEnd":0,"workerStart":-1,"proxyEnd":-1,"workerReady":-1,"sslEnd":-1,"pushStart":0,"requestTime":20226.65335,"sslStart":-1,"dnsStart":0,"sendEnd":31.6569999995409,"connectEnd":31.4990000006219,"connectStart":0,"sendStart":31.5860000009707,"dnsEnd":0,"receiveHeadersEnd":115.645999998378,"proxyStart":-1},"encodedDataLength":-1,"remotePort":80,"mimeType":"text/html","headersText":"HTTP/1.1 200 OK\r\nDate: Tue, 11 Oct 2016 14:08:47 GMT\r\nServer: Apache/2.2.22 (Ubuntu)\r\nAccept-Ranges: bytes\r\nCache-Control: max-age=300\r\nExpires: Tue, 11 Oct 2016 14:13:47 GMT\r\nVary: Accept-Encoding\r\nContent-Encoding: gzip\r\nContent-Length: 1957\r\nKeep-Alive: timeout=4, max=100\r\nConnection: Keep-Alive\r\nContent-Type: text/html\r\n\r\n","securityState":"neutral","requestHeadersText":"GET /teaching/cws/wws/webpage1.html HTTP/1.1\r\nHost: www.york.ac.uk\r\nConnection: keep-alive\r\nUpgrade-Insecure-Requests: 1\r\nUser-Agent: Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36\r\nAccept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8\r\nAccept-Encoding: gzip, deflate, sdch\r\nAccept-Language: en-GB,en-US;q=0.8,en;q=0.6\r\n\r\n","url":"http://www.york.ac.uk/teaching/cws/wws/webpage1.html","protocol":"http/1.1","fromDiskCache":false,"fromServiceWorker":false,"requestHeaders":{"Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8","Upgrade-Insecure-Requests":"1","Connection":"keep-alive","User-Agent":"Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36","Host":"www.york.ac.uk","Accept-Encoding":"gzip, deflate, sdch","Accept-Language":"en-GB,en-US;q=0.8,en;q=0.6"},"remoteIPAddress":"144.32.128.84","statusText":"OK","connectionId":11,"status":200},"loaderId":"3928.1","type":"Document","timestamp":20226.770012}}}
---------- bingo !!!!!!!!!!!!!! returned response for http://www.york.ac.uk/teaching/cws/wws/webpage1.html: 200
---------- bingo !!!!!!!!!!!!!! headers: {"Accept-Ranges":"bytes","Keep-Alive":"timeout=4, max=100","Cache-Control":"max-age=300","Server":"Apache/2.2.22 (Ubuntu)","Connection":"Keep-Alive","Content-Encoding":"gzip","Vary":"Accept-Encoding","Expires":"Tue, 11 Oct 2016 14:13:47 GMT","Content-Length":"1957","Date":"Tue, 11 Oct 2016 14:08:47 GMT","Content-Type":"text/html"}
{"webview":"3b8eaedb-bd0f-4baa-938d-4aee4039abfe","message":{"method":"Network.dataReceived","params":{"dataLength":2111,"requestId":"3928.1","encodedDataLength":1460,"timestamp":20226.770425}}}
{"webview":"3b8eaedb-bd0f-4baa-938d-4aee4039abfe","message":{"method":"Page.frameNavigated","params":{"frame":{"securityOrigin":"http://www.york.ac.uk","loaderId":"3928.1","id":"3928.1","mimeType":"text/html","url":"http://www.york.ac.uk/teaching/cws/wws/webpage1.html"}}}}
{"webview":"3b8eaedb-bd0f-4baa-938d-4aee4039abfe","message":{"method":"Network.dataReceived","params":{"dataLength":1943,"requestId":"3928.1","encodedDataLength":825,"timestamp":20226.782673}}}
{"webview":"3b8eaedb-bd0f-4baa-938d-4aee4039abfe","message":{"method":"Network.loadingFinished","params":{"requestId":"3928.1","encodedDataLength":2285,"timestamp":20226.770199}}}
{"webview":"3b8eaedb-bd0f-4baa-938d-4aee4039abfe","message":{"method":"Page.loadEventFired","params":{"timestamp":20226.799391}}}
{"webview":"3b8eaedb-bd0f-4baa-938d-4aee4039abfe","message":{"method":"Page.frameStoppedLoading","params":{"frameId":"3928.1"}}}
{"webview":"3b8eaedb-bd0f-4baa-938d-4aee4039abfe","message":{"method":"Page.domContentEventFired","params":{"timestamp":20226.845769}}}
{"webview":"3b8eaedb-bd0f-4baa-938d-4aee4039abfe","message":{"method":"Network.requestWillBeSent","params":{"request":{"headers":{"Referer":"http://www.york.ac.uk/teaching/cws/wws/webpage1.html","User-Agent":"Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36"},"initialPriority":"High","method":"GET","mixedContentType":"none","url":"http://www.york.ac.uk/favicon.ico"},"frameId":"3928.1","requestId":"3928.2","documentURL":"http://www.york.ac.uk/teaching/cws/wws/webpage1.html","initiator":{"type":"other"},"loaderId":"3928.1","wallTime":1.47619492768527E9,"type":"Other","timestamp":20226.848174}}}
status code: 200
java + Selenium + Firefox
Ho finalmente trovato il trucco anche per Firefox. È necessario avviare Firefox utilizzando le variabili di ambiente MOZ_LOG
e MOZ_LOG_FILE
e registrare le richieste http a livello di debug (4 = PR_LOG_DEBUG) - map.put("MOZ_LOG", "timestamp,sync,nsHttp:4")
. Salva il registro in un file temporaneo. Successivamente, ottenere il contenuto del file di registro salvato e analizzarlo per il codice di risposta (utilizzando alcune semplici espressioni regolari). Per prima cosa rileva l'inizio della richiesta, identificandone l'ID (nsHttpChannel::BeginConnect [this=000000CED8094000])
, quindi al secondo passaggio, trova il codice di risposta per quell'ID richiesta (nsHttpChannel::ProcessResponse [this=000000CED8094000 httpStatus=200])
.
import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.io.FileUtils;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.firefox.FirefoxDriver;
import org.openqa.selenium.firefox.GeckoDriverService;
public class TestFirefoxResponse
{
public static void main(String[] args)
throws InterruptedException, IOException
{
GeckoDriverService service = null;
// tell firefox to log http requests
// at level 4 = PR_LOG_DEBUG: debug messages, notices
// you could log everything at level 5, but the log file will
// be larger.
// create a temporary log file that will be parsed for
// response code
Map<String, String> map = new HashMap<String, String>();
map.put("MOZ_LOG", "timestamp,sync,nsHttp:4");
File tempFile = File.createTempFile("mozLog", ".txt");
map.put("MOZ_LOG_FILE", tempFile.getAbsolutePath());
GeckoDriverService.Builder builder = new GeckoDriverService.Builder();
service = builder.usingAnyFreePort()
.withEnvironment(map)
.build();
service.start();
WebDriver driver = new FirefoxDriver(service);
// test 200
String url = "https://api.ipify.org/?format=text";
// test 404
// String url = "https://www.advancedwebranking.com/lsdkjflksdjfldksfj";
driver.get(url);
driver.quit();
String logContent = FileUtils.readFileToString(tempFile);
ParseLog(logContent, url);
}
private static void ParseLog(String logContent, String url) throws MalformedURLException
{
// this is how the log looks like when the request starts
// I have to get the id of the request using a regular expression
// and use that id later to get the response
//
// 2017-11-02 14:14:01.170000 UTC - [Main Thread]: D/nsHttp nsHttpChannel::BeginConnect [this=000000BFF27A5000]
// 2017-11-02 14:14:01.170000 UTC - [Main Thread]: D/nsHttp host=api.ipify.org port=-1
// 2017-11-02 14:14:01.170000 UTC - [Main Thread]: D/nsHttp uri=https://api.ipify.org/?format=text
String pattern = "BeginConnect \\[this=(.*?)\\](?:.*?)uri=(.*?)\\s";
Pattern p = Pattern.compile(pattern, Pattern.CASE_INSENSITIVE | Pattern.DOTALL);
Matcher m = p.matcher(logContent);
String urlID = null;
while (m.find())
{
String id = m.group(1);
String uri = m.group(2);
if (uri.equals(url))
{
urlID = id;
break;
}
}
System.out.println("request id = " + urlID);
// this is how the response looks like in the log file
// ProcessResponse [this=000000CED8094000 httpStatus=200]
// I will use another regular espression to get the httpStatus
//
// 2017-11-02 14:45:39.296000 UTC - [Main Thread]: D/nsHttp nsHttpChannel::OnStartRequest [this=000000CED8094000 request=000000CED8014BB0 status=0]
// 2017-11-02 14:45:39.296000 UTC - [Main Thread]: D/nsHttp nsHttpChannel::ProcessResponse [this=000000CED8094000 httpStatus=200]
pattern = "ProcessResponse \\[this=" + urlID + " httpStatus=(.*?)\\]";
p = Pattern.compile(pattern, Pattern.CASE_INSENSITIVE | Pattern.DOTALL);
m = p.matcher(logContent);
if (m.find())
{
String responseCode = m.group(1);
System.out.println("response code found " + responseCode);
}
else
{
System.out.println("response code not found");
}
}
}
L'output per questo sarà
id richiesta = 0000007653D67000 codice risposta trovato 200
Le intestazioni delle risposte si trovano anche nel file di registro. Puoi prenderli se vuoi.
2017-11-02 14:54:36.775000 UTC - [Socket Thread]: I/nsHttp http response [
2017-11-02 14:54:36.775000 UTC - [Socket Thread]: I/nsHttp HTTP/1.1 404 Not Found
2017-11-02 14:54:36.775000 UTC - [Socket Thread]: I/nsHttp Accept-Ranges: bytes
2017-11-02 14:54:36.775000 UTC - [Socket Thread]: I/nsHttp Cache-control: no-cache="set-cookie"
2017-11-02 14:54:36.775000 UTC - [Socket Thread]: I/nsHttp Content-Type: text/html; charset=utf-8
2017-11-02 14:54:36.775000 UTC - [Socket Thread]: I/nsHttp Date: Thu, 02 Nov 2017 14:54:36 GMT
2017-11-02 14:54:36.775000 UTC - [Socket Thread]: I/nsHttp ETag: "7969-55bc076a61e80"
2017-11-02 14:54:36.775000 UTC - [Socket Thread]: I/nsHttp Last-Modified: Tue, 17 Oct 2017 16:17:46 GMT
2017-11-02 14:54:36.775000 UTC - [Socket Thread]: I/nsHttp Server: Apache/2.4.23 (Amazon) PHP/5.6.24
2017-11-02 14:54:36.775000 UTC - [Socket Thread]: I/nsHttp Set-Cookie: AWSELB=5F256FFA816C8E72E13AE0B12A17A3D540582F804C87C5FEE323AF3C9B638FD6260FF473FF64E44926DD26221AAD2E9727FD739483E7E4C31784C7A495796B416146EE83;PATH=/
2017-11-02 14:54:36.775000 UTC - [Socket Thread]: I/nsHttp Content-Length: 31081
2017-11-02 14:54:36.775000 UTC - [Socket Thread]: I/nsHttp Connection: keep-alive
2017-11-02 14:54:36.775000 UTC - [Socket Thread]: I/nsHttp OriginalHeaders
2017-11-02 14:54:36.775000 UTC - [Socket Thread]: I/nsHttp Accept-Ranges: bytes
2017-11-02 14:54:36.775000 UTC - [Socket Thread]: I/nsHttp Cache-control: no-cache="set-cookie"
2017-11-02 14:54:36.775000 UTC - [Socket Thread]: I/nsHttp Content-Type: text/html; charset=utf-8
2017-11-02 14:54:36.775000 UTC - [Socket Thread]: I/nsHttp Date: Thu, 02 Nov 2017 14:54:36 GMT
2017-11-02 14:54:36.775000 UTC - [Socket Thread]: I/nsHttp ETag: "7969-55bc076a61e80"
2017-11-02 14:54:36.775000 UTC - [Socket Thread]: I/nsHttp Last-Modified: Tue, 17 Oct 2017 16:17:46 GMT
2017-11-02 14:54:36.775000 UTC - [Socket Thread]: I/nsHttp Server: Apache/2.4.23 (Amazon) PHP/5.6.24
2017-11-02 14:54:36.775000 UTC - [Socket Thread]: I/nsHttp Set-Cookie: AWSELB=5F256FFA816C8E72E13AE0B12A17A3D540582F804C87C5FEE323AF3C9B638FD6260FF473FF64E44926DD26221AAD2E9727FD739483E7E4C31784C7A495796B416146EE83;PATH=/
2017-11-02 14:54:36.775000 UTC - [Socket Thread]: I/nsHttp Content-Length: 31081
2017-11-02 14:54:36.775000 UTC - [Socket Thread]: I/nsHttp Connection: keep-alive
2017-11-02 14:54:36.775000 UTC - [Socket Thread]: I/nsHttp ]
2017-11-02 14:54:36.775000 UTC - [Main Thread]: D/nsHttp nsHttpChannel::OnStartRequest [this=0000008A65D85000 request=0000008A65D1F900 status=0]
2017-11-02 14:54:36.775000 UTC - [Main Thread]: D/nsHttp nsHttpChannel::ProcessResponse [this=0000008A65D85000 httpStatus=404]