import pdftotext
import sys
import re

# Load your PDF
with open(sys.argv[1], "rb") as f:
    pdf = pdftotext.PDF(f)

    assert len(pdf) == 1, "This report is more than 1 page!"

    lines = "".join(pdf).split("\n")

numbers = re.compile(r"^[,\d\s]+$")
ignore = re.compile(r"^\s*$")

for line in lines:
    if line == "\x0c" or ignore.match(line):
        # ignore blank lines and trailing junk
        continue
    elif numbers.match(line):
        # it's a number
        try:
            n = int(line.replace(',',''))
            print(n)
        except:
            print("BAD", repr(line));
    else:
        print("TEXT", line)