From 95a3cb30f61e90fe00cd859f10e323bbc3e62e99 Mon Sep 17 00:00:00 2001 From: Ben Sima Date: Sun, 19 May 2019 14:16:19 -0700 Subject: cleanup textract and rdr --- rdr | 5 +++-- textract | 19 +++++++++---------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/rdr b/rdr index a258698..184d7d4 100755 --- a/rdr +++ b/rdr @@ -2,5 +2,6 @@ #! nix-shell -i bash -p pandoc textract "$1" \ - | pandoc -f html -t markdown \ - | less + | pandoc -f html -t markdown --strip-comments --reference-links \ + | sed 's/^:::.*$//g' \ + | sed 's/{.css.*}//g' diff --git a/textract b/textract index f80be36..707cd27 100755 --- a/textract +++ b/textract @@ -9,13 +9,12 @@ import sys cli = argparse.ArgumentParser('read a url') cli.add_argument('url', type=str) -if __name__ == '__main__': - args = cli.parse_args() - try: - resp = requests.get(args.url) - doc = Document(resp.text) - sys.stdout.write(doc.summary()) - sys.exit(0) - except: - print("Could not fetch document.") - sys.exit(1) +args = cli.parse_args() + +try: + resp = requests.get(args.url) + doc = Document(resp.text) + sys.stdout.write(doc.summary()) +except: + print("textract: Could not fetch document.") + sys.exit(1) -- cgit v1.2.3