Configure session cookies for scraping.

This commit is contained in:
Andreas Schildbach 2014-12-19 17:49:28 +01:00
parent 55a30f56de
commit d69506b7c1
4 changed files with 32 additions and 21 deletions

View file

@ -914,7 +914,7 @@ public abstract class AbstractEfaProvider extends AbstractNetworkProvider
try
{
is = ParserUtils.scrapeInputStream(uri.toString(), httpPost ? parameters.substring(1) : null, null, httpReferer, "NSC_");
is = ParserUtils.scrapeInputStream(uri.toString(), httpPost ? parameters.substring(1) : null, null, httpReferer, null);
firstChars = ParserUtils.peekFirstChars(is);
final XmlPullParser pp = parserFactory.newPullParser();
@ -2173,7 +2173,7 @@ public abstract class AbstractEfaProvider extends AbstractNetworkProvider
try
{
is = ParserUtils.scrapeInputStream(uri.toString(), httpPost ? parameters.substring(1) : null, null, httpRefererTrip, "NSC_");
is = ParserUtils.scrapeInputStream(uri.toString(), httpPost ? parameters.substring(1) : null, null, httpRefererTrip, sessionCookieName);
firstChars = ParserUtils.peekFirstChars(is);
return queryTrips(uri.toString(), is);
@ -2212,7 +2212,7 @@ public abstract class AbstractEfaProvider extends AbstractNetworkProvider
try
{
is = ParserUtils.scrapeInputStream(uri.toString(), httpPost ? parameters.substring(1) : null, null, httpRefererTrip, "NSC_");
is = ParserUtils.scrapeInputStream(uri.toString(), httpPost ? parameters.substring(1) : null, null, httpRefererTrip, sessionCookieName);
firstChars = ParserUtils.peekFirstChars(is);
return queryTripsMobile(uri.toString(), from, via, to, is);
@ -2244,7 +2244,7 @@ public abstract class AbstractEfaProvider extends AbstractNetworkProvider
try
{
is = ParserUtils.scrapeInputStream(uri.toString(), null, null, httpRefererTrip, "NSC_");
is = ParserUtils.scrapeInputStream(uri.toString(), null, null, httpRefererTrip, sessionCookieName);
firstChars = ParserUtils.peekFirstChars(is);
return queryTrips(uri.toString(), is);
@ -2276,7 +2276,7 @@ public abstract class AbstractEfaProvider extends AbstractNetworkProvider
try
{
is = ParserUtils.scrapeInputStream(uri.toString(), null, null, httpRefererTrip, "NSC_");
is = ParserUtils.scrapeInputStream(uri.toString(), null, null, httpRefererTrip, sessionCookieName);
firstChars = ParserUtils.peekFirstChars(is);
is.mark(512);

View file

@ -871,7 +871,7 @@ public abstract class AbstractHafasProvider extends AbstractNetworkProvider
try
{
final String endpoint = extXmlEndpoint != null ? extXmlEndpoint : queryEndpoint;
final InputStream is = ParserUtils.scrapeInputStream(endpoint, request, null, null, null);
final InputStream is = ParserUtils.scrapeInputStream(endpoint, request, null, null, sessionCookieName);
firstChars = ParserUtils.peekFirstChars(is);
reader = new InputStreamReader(is, ISO_8859_1);
@ -1528,7 +1528,7 @@ public abstract class AbstractHafasProvider extends AbstractNetworkProvider
try
{
final CustomBufferedInputStream bis = new CustomBufferedInputStream(ParserUtils.scrapeInputStream(uri));
final CustomBufferedInputStream bis = new CustomBufferedInputStream(ParserUtils.scrapeInputStream(uri, sessionCookieName));
final String firstChars = ParserUtils.peekFirstChars(bis);
// initialize input stream

View file

@ -25,7 +25,6 @@ import java.util.Map;
import java.util.Set;
import java.util.TimeZone;
import de.schildbach.pte.NetworkProvider.Capability;
import de.schildbach.pte.dto.Point;
import de.schildbach.pte.dto.Product;
import de.schildbach.pte.dto.Style;
@ -42,6 +41,7 @@ public abstract class AbstractNetworkProvider implements NetworkProvider
protected TimeZone timeZone = TimeZone.getTimeZone("CET");
protected int numTripsRequested = 6;
private Map<String, Style> styles = null;
protected String sessionCookieName = null;
static
{
@ -80,6 +80,11 @@ public abstract class AbstractNetworkProvider implements NetworkProvider
this.styles = styles;
}
protected void setSessionCookieName(final String sessionCookieName)
{
this.sessionCookieName = sessionCookieName;
}
private static final char STYLES_SEP = '|';
public Style lineStyle(final String network, final String line)

View file

@ -24,6 +24,7 @@ import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
import java.net.HttpCookie;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
@ -59,12 +60,7 @@ public final class ParserUtils
private static final int SCRAPE_READ_TIMEOUT = 15000;
private static final Charset SCRAPE_DEFAULT_ENCODING = Charset.forName("ISO-8859-1");
private static String stateCookie;
public static void resetState()
{
stateCookie = null;
}
private static HttpCookie sessionCookie;
public static final CharSequence scrape(final String url) throws IOException
{
@ -116,7 +112,12 @@ public final class ParserUtils
public static final InputStream scrapeInputStream(final String url) throws IOException
{
return scrapeInputStream(url, null, null, null, null);
return scrapeInputStream(url, null);
}
public static final InputStream scrapeInputStream(final String url, final String sessionCookieName) throws IOException
{
return scrapeInputStream(url, null, null, null, sessionCookieName);
}
public static final InputStream scrapeInputStream(final String urlStr, final String postRequest, final Charset requestEncoding,
@ -151,8 +152,8 @@ public final class ParserUtils
if (referer != null)
connection.addRequestProperty("Referer", referer);
if (sessionCookieName != null && stateCookie != null)
connection.addRequestProperty("Cookie", stateCookie);
if (sessionCookie != null && sessionCookie.getName().equals(sessionCookieName))
connection.addRequestProperty("Cookie", sessionCookie.toString());
// Set authorization.
if (authorization != null)
@ -197,17 +198,22 @@ public final class ParserUtils
if (testInternalError(firstChars))
throw new InternalErrorException(url, new InputStreamReader(is, requestEncoding));
// save cookie
if (sessionCookieName != null)
{
for (final Map.Entry<String, List<String>> entry : connection.getHeaderFields().entrySet())
c: for (final Map.Entry<String, List<String>> entry : connection.getHeaderFields().entrySet())
{
if ("set-cookie".equalsIgnoreCase(entry.getKey()))
if ("set-cookie".equalsIgnoreCase(entry.getKey()) || "set-cookie2".equalsIgnoreCase(entry.getKey()))
{
for (final String value : entry.getValue())
{
if (value.startsWith(sessionCookieName))
for (final HttpCookie cookie : HttpCookie.parse(value))
{
stateCookie = value.split(";", 2)[0];
if (cookie.getName().equals(sessionCookieName))
{
sessionCookie = cookie;
break c;
}
}
}
}