#!/usr/bin/awk -f # Script by Ypnose - http://ywstd.fr # HTTP url extractor BEGIN { FS = " " } { # Sanitize gsub(/[Hh][Rr][Ee][Ff]/,"href") gsub(/[Ss][Rr][Cc]/,"src") split($0,field) for (i in field) { if (field[i] ~ /^(href|src)=/) { match(field[i],/^(href|src)=".*"/) # Extract URL if (field[i] ~ /^href=/) url = substr(field[i],RSTART+6,RLENGTH-7) else if (field[i] ~ /^src=/) url = substr(field[i],RSTART+5,RLENGTH-6) gsub(/\\".*$/,"",url) # Clear a few unnecessary lines if (url !~ /^(\/|#)?$/ && url != oldurl) printf("%s\n", url) oldurl = url } } }