summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xscrape26
1 files changed, 16 insertions, 10 deletions
diff --git a/scrape b/scrape
index 7da2125..fde6b2b 100755
--- a/scrape
+++ b/scrape
@@ -1,10 +1,15 @@
#!/usr/bin/env python3
+"""
+Scrape a webpage for emails or links, or both.
+"""
+
import argparse
import re
import sys
import urllib.request
+
def get(url):
page = urllib.request.urlopen(url)
byts = page.read()
@@ -12,19 +17,20 @@ def get(url):
page.close()
return s
+
def extract_links(content):
- links = re.findall('"((http|ftp)s?://.*?)"', content)
- return links
+ return re.findall('"((http|ftp)s?://.*?)"', content)
+
def extract_emails(content):
- emails = re.findall('([\w\.,]+@[\w\.,]+\.\w+)', content)
- return emails
-
-if __name__ == '__main__':
- cli = argparse.ArgumentParser(description='Scrape a web page for stuff.')
- cli.add_argument('url', metavar='URL', type=str, help='url to fetch')
- cli.add_argument('-e', '--emails', action="store_true", help='get emails')
- cli.add_argument('-l', '--links', action="store_true", help='get links')
+ return re.findall("([\w\.,]+@[\w\.,]+\.\w+)", content)
+
+
+if __name__ == "__main__":
+ cli = argparse.ArgumentParser(description=__doc__)
+ cli.add_argument("url", metavar="URL", type=str, help="url to fetch")
+ cli.add_argument("-e", "--emails", action="store_true", help="get emails")
+ cli.add_argument("-l", "--links", action="store_true", help="get links")
args = cli.parse_args()
if not (args.emails or args.links):