diff --git a/dependencies/crawler-1.1.2-full.jar b/dependencies/crawler-1.2-full.jar similarity index 86% rename from dependencies/crawler-1.1.2-full.jar rename to dependencies/crawler-1.2-full.jar index bf739c1f4..53a1f9dd9 100644 Binary files a/dependencies/crawler-1.1.2-full.jar and b/dependencies/crawler-1.2-full.jar differ diff --git a/report/properties/404.properties b/report/properties/errorurls.properties similarity index 100% rename from report/properties/404.properties rename to report/properties/errorurls.properties diff --git a/report/velocity/404.vm b/report/velocity/errorurls.vm similarity index 63% rename from report/velocity/404.vm rename to report/velocity/errorurls.vm index 90c2ebab7..1e4c0edf1 100644 --- a/report/velocity/404.vm +++ b/report/velocity/errorurls.vm @@ -4,11 +4,11 @@

- These pages returned 404 from the crawl + These pages returned errors from the crawl

#foreach ($url in $document.getRootElement().getChildren()) -

$url.getValue()

+

$url.getAttribute("reason").getValue() : $url.getValue()

#end
diff --git a/sitespeed.io b/sitespeed.io index 12bb1a7ae..28e984ed7 100755 --- a/sitespeed.io +++ b/sitespeed.io @@ -215,7 +215,7 @@ NOPROTOCOL=${URL#*//} HOST=${NOPROTOCOL%%/*} # Jar files -CRAWLER_JAR=crawler-1.1.2-full.jar +CRAWLER_JAR=crawler-1.2-full.jar VELOCITY_JAR=xml-velocity-1.3-full.jar HTMLCOMPRESSOR_JAR=htmlcompressor-1.5.3.jar @@ -239,7 +239,7 @@ if $OUTPUT_IMAGES mkdir $REPORT_IMAGE_PAGES_DIR fi -java -Xmx"$JAVA_HEAP"m -Xms"$JAVA_HEAP"m -Dcom.soulgalore.crawler.propertydir=$DEPENDENCIES_DIR/ $PROXY_CRAWLER $USER_AGENT_CRAWLER -cp $DEPENDENCIES_DIR/$CRAWLER_JAR com.soulgalore.crawler.run.CrawlToFile -u $URL -l $DEPTH $FOLLOW_PATH $NOT_IN_URL -f $REPORT_DATA_DIR/urls.txt -ef $REPORT_DATA_DIR/404.txt +java -Xmx"$JAVA_HEAP"m -Xms"$JAVA_HEAP"m -Dcom.soulgalore.crawler.propertydir=$DEPENDENCIES_DIR/ $PROXY_CRAWLER $USER_AGENT_CRAWLER -cp $DEPENDENCIES_DIR/$CRAWLER_JAR com.soulgalore.crawler.run.CrawlToFile -u $URL -l $DEPTH $FOLLOW_PATH $NOT_IN_URL -f $REPORT_DATA_DIR/urls.txt -ef $REPORT_DATA_DIR/errorurls.txt if [ ! -e $REPORT_DATA_DIR/urls.txt ]; then @@ -273,22 +273,22 @@ done # make sure all processes has finished wait -# take care of 404:s -if [ -e $REPORT_DATA_DIR/404.txt ]; +# take care of error urls +if [ -e $REPORT_DATA_DIR/errorurls.txt ]; then - result404=() + resultError=() while read txt ; do - result404[${#result404[@]}]=$txt - done < $REPORT_DATA_DIR/404.txt + resultError[${#resultError[@]}]=$txt + done < $REPORT_DATA_DIR/errorurls.txt - echo '' > $REPORT_DATA_DIR/404.xml - for url in "${result404[@]}" - do echo "$url" >> $REPORT_DATA_DIR/404.xml + echo '' > $REPORT_DATA_DIR/errorurls.xml + for url in "${resultError[@]}" + do echo "${url/*,/ }" >> $REPORT_DATA_DIR/errorurls.xml done - echo '' >> $REPORT_DATA_DIR/404.xml - echo 'Create the 404.html' - java -Xmx"$JAVA_HEAP"m -Xms"$JAVA_HEAP"m -jar $DEPENDENCIES_DIR/$VELOCITY_JAR $REPORT_DATA_DIR/404.xml $VELOCITY_DIR/404.vm $PROPERTIES_DIR/404.properties $REPORT_DIR/404.html || exit 1 - java -jar $DEPENDENCIES_DIR/$HTMLCOMPRESSOR_JAR --type html --compress-css --compress-js -o $REPORT_DIR/404.html $REPORT_DIR/404.html + echo '' >> $REPORT_DATA_DIR/errorurls.xml + echo 'Create the errorurls.html' + java -Xmx"$JAVA_HEAP"m -Xms"$JAVA_HEAP"m -jar $DEPENDENCIES_DIR/$VELOCITY_JAR $REPORT_DATA_DIR/errorurls.xml $VELOCITY_DIR/errorurls.vm $PROPERTIES_DIR/errorurls.properties $REPORT_DIR/errorurls.html || exit 1 + java -jar $DEPENDENCIES_DIR/$HTMLCOMPRESSOR_JAR --type html --compress-css --compress-js -o $REPORT_DIR/errorurls.html $REPORT_DIR/errorurls.html fi