This is the code and notes from my live Twitch programming and drawing classes.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

28 lines
635 B

import pdftotext
import sys
import re
# Load your PDF
with open(sys.argv[1], "rb") as f:
pdf = pdftotext.PDF(f)
assert len(pdf) == 1, "This report is more than 1 page!"
lines = "".join(pdf).split("\n")
numbers = re.compile(r"^[,\d\s]+$")
ignore = re.compile(r"^\s*$")
for line in lines:
if line == "\x0c" or ignore.match(line):
# ignore blank lines and trailing junk
continue
elif numbers.match(line):
# it's a number
try:
n = int(line.replace(',',''))
print(n)
except:
print("BAD", repr(line));
else:
print("TEXT", line)