crawler now you can specify path not to include
This commit is contained in:
parent
3a7c864ec9
commit
c0d3781ac6
Binary file not shown.
|
|
@ -69,7 +69,7 @@ mkdir $REPORT_DATA_DIR
|
|||
mkdir $REPORT_PAGES_DIR
|
||||
mkdir $REPORT_DATA_PAGES_DIR
|
||||
|
||||
java -Xmx256m -Xms256m -cp dependencies/crawler-0.9-full.jar com.soulgalore.crawler.run.CrawlToFile -u $URL -l $DEPTH $FOLLOW_PATH -f $REPORT_DATA_DIR/urls.txt -ef $REPORT_DATA_DIR/nonworkingurls.txt
|
||||
java -Xmx256m -Xms256m -cp dependencies/crawler-0.9.1-full.jar com.soulgalore.crawler.run.CrawlToFile -u $URL -l $DEPTH $FOLLOW_PATH -f $REPORT_DATA_DIR/urls.txt -ef $REPORT_DATA_DIR/nonworkingurls.txt
|
||||
|
||||
# read the urls
|
||||
result=()
|
||||
|
|
|
|||
Loading…
Reference in New Issue