diff --git a/Makefile b/Makefile index f188d4c40..30ff84a10 100644 --- a/Makefile +++ b/Makefile @@ -25,7 +25,7 @@ package: @cp sitespeed.io CHANGELOG LICENSE $(BUILD)/ @cp $(DEP)/LICENSE.txt $(BUILD)/$(DEP)/ - @cp $(DEP)/crawler-1.1-full.jar $(BUILD)/$(DEP)/ + @cp $(DEP)/crawler-1.1.1-full.jar $(BUILD)/$(DEP)/ @cp $(DEP)/crawler.properties $(BUILD)/$(DEP)/ @cp $(DEP)/xml-velocity-1.3-full.jar $(BUILD)/$(DEP)/ @cp $(DEP)/rasterize.js $(BUILD)/$(DEP)/ diff --git a/dependencies/crawler-1.1-full.jar b/dependencies/crawler-1.1.1-full.jar similarity index 87% rename from dependencies/crawler-1.1-full.jar rename to dependencies/crawler-1.1.1-full.jar index 4d1bb3f95..2669ceb0f 100644 Binary files a/dependencies/crawler-1.1-full.jar and b/dependencies/crawler-1.1.1-full.jar differ diff --git a/sitespeed.io b/sitespeed.io index 3ed788797..fc8ed553b 100755 --- a/sitespeed.io +++ b/sitespeed.io @@ -203,7 +203,7 @@ NOPROTOCOL=${URL#*//} HOST=${NOPROTOCOL%%/*} # Jar files -CRAWLER_JAR=crawler-1.1-full.jar +CRAWLER_JAR=crawler-1.1.1-full.jar VELOCITY_JAR=xml-velocity-1.3-full.jar HTMLCOMPRESSOR_JAR=htmlcompressor-1.5.3.jar @@ -227,7 +227,7 @@ if $OUTPUT_IMAGES mkdir $REPORT_IMAGE_PAGES_DIR fi -java -Xmx"$JAVA_HEAP"m -Xms"$JAVA_HEAP"m -Dcom.soulgalore.crawler.propertydir=$DEPENDENCIES_DIR/ $PROXY_CRAWLER -cp $DEPENDENCIES_DIR/$CRAWLER_JAR com.soulgalore.crawler.run.CrawlToFile -u $URL -l $DEPTH $FOLLOW_PATH $NOT_IN_URL -f $REPORT_DATA_DIR/urls.txt -ef $REPORT_DATA_DIR/nonworkingurls.txt +java -Xmx"$JAVA_HEAP"m -Xms"$JAVA_HEAP"m -Dcom.soulgalore.crawler.propertydir=$DEPENDENCIES_DIR/ $PROXY_CRAWLER -cp $DEPENDENCIES_DIR/$CRAWLER_JAR com.soulgalore.crawler.run.CrawlToFile -u $URL -l $DEPTH $FOLLOW_PATH $NOT_IN_URL -f $REPORT_DATA_DIR/urls.txt -ef $REPORT_DATA_DIR/404.txt if [ ! -e $REPORT_DATA_DIR/urls.txt ]; then @@ -261,6 +261,25 @@ done # make sure all processes has finished wait +# take care of 404:s +if [ -e $REPORT_DATA_DIR/404.txt ]; +then + result404=() + while read txt ; do + result404[${#result[@]}]=$txt + done < $REPORT_DATA_DIR/404.txt + + echo '' > $REPORT_DATA_DIR/404.xml + for url in "${result404[@]}" + do echo "$url" >> $REPORT_DATA_DIR/404.xml + done + echo '' >> $REPORT_DATA_DIR/404.xml + echo 'Create the 404.html' + java -Xmx"$JAVA_HEAP"m -Xms"$JAVA_HEAP"m -jar $DEPENDENCIES_DIR/$VELOCITY_JAR $REPORT_DATA_DIR/404.xml $VELOCITY_DIR/404.vm $PROPERTIES_DIR/404.properties $REPORT_DIR/404.html || exit 1 + java -jar $DEPENDENCIES_DIR/$HTMLCOMPRESSOR_JAR --type html --compress-css --compress-js -o $REPORT_DIR/404.html $REPORT_DIR/404.html + +fi + echo "Create result.xml" echo '' > $REPORT_DATA_DIR/result.xml diff --git a/test/server/www/index.html b/test/server/www/index.html index 96fa1425e..a72d58109 100644 --- a/test/server/www/index.html +++ b/test/server/www/index.html @@ -23,7 +23,7 @@
Yes this is a test - + this is a link to a non existing page.