mirror of
https://gitlab.com/oeffi/public-transport-enabler.git
synced 2025-07-17 08:29:50 +00:00
near empty pages count as empty, too
git-svn-id: https://public-transport-enabler.googlecode.com/svn/trunk@356 0924bc21-9374-b0fa-ee44-9ff1593b38f0
This commit is contained in:
parent
a250daa67b
commit
08e83d4a4d
1 changed files with 5 additions and 3 deletions
|
@ -48,6 +48,7 @@ public final class ParserUtils
|
||||||
private static final int SCRAPE_CONNECT_TIMEOUT = 5000;
|
private static final int SCRAPE_CONNECT_TIMEOUT = 5000;
|
||||||
private static final int SCRAPE_READ_TIMEOUT = 15000;
|
private static final int SCRAPE_READ_TIMEOUT = 15000;
|
||||||
private static final String SCRAPE_DEFAULT_ENCODING = "ISO-8859-1";
|
private static final String SCRAPE_DEFAULT_ENCODING = "ISO-8859-1";
|
||||||
|
private static final int SCRAPE_PAGE_EMPTY_THRESHOLD = 2;
|
||||||
|
|
||||||
private static String stateCookie;
|
private static String stateCookie;
|
||||||
|
|
||||||
|
@ -111,7 +112,7 @@ public final class ParserUtils
|
||||||
copy(pageReader, buffer);
|
copy(pageReader, buffer);
|
||||||
pageReader.close();
|
pageReader.close();
|
||||||
|
|
||||||
if (buffer.length() > 0)
|
if (buffer.length() > SCRAPE_PAGE_EMPTY_THRESHOLD)
|
||||||
{
|
{
|
||||||
if (cookieHandling)
|
if (cookieHandling)
|
||||||
{
|
{
|
||||||
|
@ -134,10 +135,11 @@ public final class ParserUtils
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
final String message = "got empty page (length: " + buffer.length() + ")";
|
||||||
if (tries-- > 0)
|
if (tries-- > 0)
|
||||||
System.out.println("got empty page, retrying...");
|
System.out.println(message + ", retrying...");
|
||||||
else
|
else
|
||||||
throw new IOException("got empty page: " + url);
|
throw new IOException(message + ": " + url);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
catch (final SocketTimeoutException x)
|
catch (final SocketTimeoutException x)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue