#!/usr/bin/python
"""Rearrange pages of a PDF document.

Each page of the output document can contain several pages of the input
document, possibly rotated and scaled. With the --reformat option, the margins
of the original pages are cut off, so that more space of the output pages can
be used for content.
"""

import argparse
import logging
import math
import re
import string
import sys

import gi
from gi.repository import Gio
gi.require_version('Poppler', '0.18')
from gi.repository import Poppler
import cairo

__author__ = "Emmanuel Beffara"
__email__ = "manu@beffara.org"
__version__ = "2020.03.21"

# Pythonic tools  {{{1

class Extender (object):

    """
    Hack that enables to extend objects of native types when inheriting
    classes cannot be used, e.g. for objects returned by functions.
    """

    def __init__ (self, base_object):
        self._base_object = base_object

    def __getattr__ (self, name):
        return getattr(self._base_object, name)


def memoizing (function):
    """Cache the results of a method.

    This decorator caches the results of a method self.foo by storing computed
    values into an attribute self._foo_cache. The arguments of the function
    must be hashable.

    """
    cache_name = '_%s_cache' % function.__name__
    def method (self, *args):
        try:
            return getattr(self, cache_name)[args]
        except AttributeError:
            value = function(self, *args)
            setattr(self, cache_name, { args : value })
            return value
        except KeyError:
            value = function(self, *args)
            getattr(self, cache_name)[args] = value
            return value
    return method


# Documents and pages  {{{1

class Document (Extender):

    """
    A wrapper around Poppler.Document with a extra methods for bounding box
    computation.
    """

    def __init__ (self, filename):
        uri = Gio.File.new_for_commandline_arg(filename).get_uri()
        Extender.__init__(self, Poppler.Document.new_from_file(uri, None))
        self.page_cache = {}

    @memoizing
    def get_page (self, number):
        return Page(self, number)

    def pages (self):
        """Return an iterator over the pages of the document."""
        for page_number in range(0, self.get_n_pages()):
            yield self.get_page(page_number)

    def max_crop_box (self):
        """Get a maximal crop box for the document."""
        box = None
        for page in self.pages():
            if box is None:
                box = page.get_crop_box()
            else:
                box.max_with(page.get_crop_box())
        return box


class Page (Extender):

    """
    A wrapper around Poppler.Page with a extra methods for bounding box
    computation.
    """

    def __init__ (self, document, number):
        self.number = number
        Extender.__init__(self, document._base_object.get_page(number))

    @property
    @memoizing
    def crop_box (self):
        """The page size."""
        rect = self._base_object.get_crop_box()
        return Rectangle(rect.x1, rect.y1, rect.x2, rect.y2)

    def get_image (self):
        """Get an image of the page, with 1 pixel for 1 point."""
        r = self.crop_box
        image = A8Image(int(r.x2), int(r.y2))
        cr = cairo.Context(image.get_surface())
        self.render(cr)
        image.flush()        
        return image

    @property
    @memoizing
    def bbox (self):
        """The bounding box of the contents of the page."""
        box = self.get_image().get_bbox()
        logging.debug('bbox for page %d: %r' % (self.number, box))
        return box


class A8Image (Extender):

    """
    A thin wrapper around Cairo's image surfaces that adds bounding box
    computation. Only 8-bit alpha format is available here.
    """

    def __init__ (self, width, height):
        Extender.__init__(self, cairo.ImageSurface(cairo.FORMAT_A8, width, height))

    def get_surface (self):
        """Get the Cairo surface associated to the image."""
        return self._base_object

    def get_bbox_native (self):
        """Compute the bounding box of the contents of the surface.
        
        In this method, all computation is done in Python, which is not very
        efficient.

        """
        buf = self.get_data()
        x1 = y1 = x2 = y2 = None
        pos = 0
        stride = self.get_stride()
        for y in range(0, self.get_height()):
            for x in range(0, self.get_width()):
                if buf[pos + x] == '\0':
                    continue
                if y1 is None:
                    y1 = y
                y2 = y + 1
                if x1 is None or x < x1:
                    x1 = x
                if x2 is None or x > x2:
                    x2 = x + 1
            pos += stride
        if x1 is None:
            return None
        return Rectangle(x1,y1,x2,y2)

    def get_bbox_pil (self):
        """Compute the bounding box of the contents of the surface.
        
        This method uses the computation method from the Python Imaging
        Library, it is much more efficient that the pure Python one.

        """
        import PIL.Image
        image = PIL.Image.frombuffer('L',
            (self.get_width(), self.get_height()), self.get_data(),
            'raw', 'L', self.get_stride(), 1)
        bbox = image.getbbox()
        if bbox is None:
            return None
        return Rectangle(*bbox)

    # Choose the most efficient version for get_bbox.
    try:
        import PIL.Image
        get_bbox = get_bbox_pil
    except ImportError:
        get_bbox = get_bbox_native


# Basic geometry  {{{1

class Rectangle:

    """
    A rectangle, defined by its extremal coordinates.
    """

    def __init__ (self, x1, y1, x2, y2):
        self.x1 = x1
        self.y1 = y1
        self.x2 = x2
        self.y2 = y2

    def __repr__ (self):
        return '(%g,%g,%g,%g)' % (self.x1, self.y1, self.x2, self.y2)

    @property
    def width (self):
        return self.x2 - self.x1

    @property
    def height (self):
        return self.y2 - self.y1

    def max_with (self, other):
        """Compute the bounding box of this rectangle and another one.

        The object is modified in place, no value is returned.

        """
        self.x1 = min(self.x1, other.x1)
        self.y1 = min(self.y1, other.y1)
        self.x2 = max(self.x2, other.x2)
        self.y2 = max(self.y2, other.y2)

    def scaled (self, scale):
        """Return a new rectangle obtained by scaling this one."""
        return Rectangle(
                self.x1 * scale, self.y1 * scale,
                self.x2 * scale, self.y2 * scale)

    def shifted (self, dx, dy):
        """Return a new Rectangle obtained by shifting this one."""
        return Rectangle(self.x1 + dx, self.y1 + dy, self.x2 + dx, self.y2 + dy)

    @staticmethod
    def bbox (points):
        """Make a rectangle as the bounding box of a set of points."""
        x1 = y1 = x2 = y2 = None
        for x, y in points:
            if x1 is None:
                x1 = x2 = x
                y1 = y2 = y
            else:
                x1 = min(x1, x)
                y1 = min(y1, y)
                x2 = max(x2, x)
                y2 = max(y2, y)
        if x1 is None:
            return None
        return Rectangle(x1, y1, x2, y2)

    def rotate_bbox (self, angle):
        """Return the bounding box of this rectangle rotated.
        
        The angle is given in radians.

        """
        return Rectangle.bbox([rotate((x, y), angle)
            for x in (self.x1, self.x2) for y in (self.y1, self.y2)])


def rotate (point, angle):
    x, y = point
    return (
        math.cos(angle) * x - math.sin(angle) * y,
        math.sin(angle) * x + math.cos(angle) * y)


# Layouts  {{{1

class Layout:

    """
    This represents a way of rearranging the pages of a document.
    """

    def __init__ (self, document, opts):
        """Create a layout using options from the 'opts' object."""
        self.document = document
        self.opts = opts

    @property
    @memoizing
    def chunk_list (self):
        """The list of page chunks used for layout.
        
        Each chunk is a list of input page numbers, in their order in the
        output document. Chunks may contain occurrences of None to indicate
        blank pages inserted in the layout.

        """
        pages = (
            [None] * self.opts.pad_before +
            list(self.opts.select.iterate(self.document.get_n_pages())) +
            [None] * self.opts.pad_after )
        logging.debug('logical pages: %r' % pages)

        if self.opts.chunks is None:
            chunks = [pages]
        else:
            chunks = []
            chunk = []
            index = 0
            for number in pages:
                chunk.append(number)
                index += 1
                if index == self.opts.chunks:
                    chunks.append(chunk)
                    chunk = []
                    index = 0
            if index > 0:
                if self.opts.pad_last:
                        chunk.extend([None] * (self.opts.chunks - index))
                chunks.append(chunk)

        if self.opts.folding:
            chunks = map(self.reorder_for_folding, chunks)

        logging.debug('page chunks: %r' % chunks)
        return chunks

    @property
    @memoizing
    def page_numbers (self):
        """The set of pages actually considered in the source document."""
        parts = []
        for chunk in self.chunk_list:
            parts.append(set([x for x in chunk if x is not None]))
        return set.union(*parts)

    @staticmethod
    def reorder_for_folding (iterable):
        items = list(iterable)
        length = len(items)

        # Pad to a multiple of 4

        if length % 4 != 0:
            pad = 4 - (length % 4)
            items.extend([None]*pad)
            length += pad

        # Enumerate pages in order

        reordered = []
        for group in range(0, length/4):
            reordered.extend([
                items[length - 2*group - 1],
                items[2*group],
                items[2*group + 1],
                items[length - 2*group - 2] ])
        return reordered

    @memoizing
    def source_crop (self, number):
        """The table of crop boxes (i.e. page sizes) of input pages."""
        return self.document.get_page(number).crop_box

    @memoizing
    def source_bbox (self, number):
        """The table of actual bounding boxes of input pages."""
        return self.document.get_page(number).bbox

    @memoizing
    def page_angle (self, number):
        """The actual rotation applied to a given page."""
        angle = self.opts.angle
        for (PageList, extra_angle) in self.opts.rotate_pages:
            if number in PageList:
                angle += extra_angle
        return angle

    @property
    @memoizing
    def layout (self):
        """The complete layout, as a list of output page descriptions.

        Each page description is a list of tuples with the following items:
        - input page number (not None)
        - scaling
        - angle (in radians, counterclockwise relative to the origin)
        - horizontal shift (in points)
        - vertical shift (in points)

        """
        s = self.opts
        page_width = s.size[0]
        page_height = s.size[1]
        cols = s.cols
        rows = s.rows
        margin = s.margin
        halign = s.align[0]
        valign = s.align[1]
        scale = s.scale

        # Compute the positions for output pages

        width = (page_width - margin) / cols - margin
        height = (page_height - margin) / rows - margin

        positions = tuple(
                (margin + x*(width + margin), margin + y*(height + margin))
                for y in range(0, rows)
                for x in range(0, cols)
            )
        modulo = len(positions)

        logging.debug('computed layout: %r', positions)

        # Select the useful part of each page

        if s.reformat:
            boxes = self.source_bbox
        else:
            boxes = self.source_crop

        # Compute the scaling factor

        if scale is None:
            scales = []
            for number in self.page_numbers:
                box = boxes(number)
                if box is not None:
                    box = box \
                        .shifted(-box.x1,-box.y1) \
                        .rotate_bbox(self.page_angle(number))
                    scales.append(min(width/box.width, height/box.height))
            scale = min(scales)

        logging.info('scale: %g', scale)

        # Deduce the transformation for each page

        layout = []

        for chunk in self.chunk_list:
            index = 0
            current_page = []

            for number in chunk:
                if number is not None and boxes(number) is not None:
                    angle = self.page_angle(number)
                    box = boxes(number).scaled(scale).rotate_bbox(angle)
                    px, py = positions[index]
                    px += halign * (width - box.width) - box.x1
                    py += valign * (height -box.height) - box.y1
                    current_page.append((number, scale, angle, px, py))
                index = (index + 1) % modulo
                if index == 0:
                    layout.append(current_page)
                    current_page = []
            if index != 0:
                layout.append(current_page)

        logging.debug('computed layout: %r' % layout)
        return layout

    def render (self, cr):
        """Render the layout on a Cairo context."""
        for out_page in self.layout:
            for page, scale, angle, dx, dy in out_page:
                cr.identity_matrix()
                cr.translate(dx, dy)
                cr.scale(scale, scale)
                cr.rotate(angle)
                Page(self.document, page).render(cr)
            cr.show_page()


# Command-line interface  {{{1

# Constrained integers  {{{2

def positive_int (text):
    """Convert a string to a strictly positive integer.
    
    Raise ArgumentTypeError for any invalid input.
    
    """
    value = int(text)
    if value <= 0:
        raise argparse.ArgumentTypeError("strictly positive integer expected")
    return value


def non_negative_int (text):
    """Convert a string to a non-negative integer.
    
    Raise ArgumentTypeError for any invalid input.
    
    """
    value = int(text)
    if value < 0:
        raise argparse.ArgumentTypeError("non-negative integer expected")
    return value


# Numbers with units  {{{2

def parse_float_with_unit (text, units):
    """Turn a string into a float, using a unit in the string.
    
    The argument 'units' maps unit names to conversion factors. A default unit can be
    specified by including the empty string as a unit name. Unit names are
    case sensitive.

    """
    text = text.strip()
    m = re.match(r'[+-]?(\d+[.]?\d*|\d*[.]\d+)([eE][+-]?\d+)?', text)
    if m is None:
        raise argparse.ArgumentTypeError("invalid format for float with unit")
    number = float(m.group(0))
    try:
        number *= units[text[m.end(0):].strip()]
    except KeyError:
        raise argparse.ArgumentTypeError("unknown unit of measure")
    return number


def parse_tuple (text, value_parser,
        separator=',', min_size=0, max_size=None, named_values={}):
    """Parse a tuple of values of a given type.
    
    The argument 'value_parser' is used to parse individual components. A
    minimum and maximum size for the tuple can be specified. A set of named
    values can be used, then the input text is first matched against these
    values, and if no value is found then parsing occurs.

    """
    text = text.strip()
    if text in named_values:
        return named_values[text]
    parts = text.split(separator)
    if len(parts) < min_size or (max_size is not None and len(parts) > max_size):
        raise argparse.ArgumentTypeError("wrong number of components")
    return tuple(value_parser(part.strip()) for part in parts)


def curry_first (name, function, *args, **kwargs):
    def curried (arg1):
        return function(arg1, *args, **kwargs)
    curried.__name__ = name
    return curried


ratio = curry_first('ratio', parse_float_with_unit, {
    '': 1.,
    '%': 0.01,
})


angle = curry_first('angle', parse_float_with_unit, {
    '': math.pi / 180.,
    'deg': math.pi / 180.,
    'rad': 1.,
})


dimension = curry_first('dimension', parse_float_with_unit, {
    'pt': 1.,
    'in': 72.,
    'cm': 72. / 2.54,
    'mm': 72. / 25.4,
})


pagesize = curry_first(
    'pagesize', parse_tuple, dimension, min_size = 2, max_size = 2,
    named_values = {
        'a0':  (dimension( '841 mm'), dimension('1189 mm')),
        'a1':  (dimension( '594 mm'), dimension( '841 mm')),
        'a2':  (dimension( '420 mm'), dimension( '594 mm')),
        'a3':  (dimension( '297 mm'), dimension( '420 mm')),
        'a4':  (dimension( '210 mm'), dimension( '297 mm')),
        'a5':  (dimension( '148 mm'), dimension( '210 mm')),
        'a6':  (dimension( '105 mm'), dimension( '149 mm')),
        'a7':  (dimension(  '74 mm'), dimension( '105 mm')),
        'a8':  (dimension(  '52 mm'), dimension(  '74 mm')),
        'a9':  (dimension(  '37 mm'), dimension(  '52 mm')),
        'a10': (dimension(  '26 mm'), dimension(  '37 mm')),
        'b0':  (dimension('1000 mm'), dimension('1414 mm')),
        'b1':  (dimension( '707 mm'), dimension('1000 mm')),
        'b2':  (dimension( '500 mm'), dimension( '707 mm')),
        'b3':  (dimension( '353 mm'), dimension( '500 mm')),
        'b4':  (dimension( '250 mm'), dimension( '353 mm')),
        'b5':  (dimension( '176 mm'), dimension( '250 mm')),
        'b6':  (dimension( '125 mm'), dimension( '176 mm')),
        'b7':  (dimension(  '88 mm'), dimension( '125 mm')),
        'b8':  (dimension(  '62 mm'), dimension(  '88 mm')),
        'b9':  (dimension(  '44 mm'), dimension(  '62 mm')),
        'b10': (dimension(  '31 mm'), dimension(  '44 mm')),
        'letter':        (dimension( '8.5  in'), dimension('11    in')),
        'note':          (dimension( '8.5  in'), dimension('11    in')),
        'legal':         (dimension( '8.5  in'), dimension('14    in')),
        'executive':     (dimension( '7.25 in'), dimension('10.5  in')),
        'halfletter':    (dimension( '5.5  in'), dimension( '8.5  in')),
        'halfexecutive': (dimension( '5.25 in'), dimension( '7.25 in')),
        '11x17':         (dimension('11    in'), dimension('17    in')),
        'statement':     (dimension( '5.5  in'), dimension( '8.5  in')),
        'folio':         (dimension( '8.5  in'), dimension('13    in')),
        '10x14':         (dimension('10    in'), dimension('14    in')),
        'ledger':        (dimension('17    in'), dimension('11    in')),
        'tabloid':       (dimension('11    in'), dimension('17    in')),
    })


alignment = curry_first(
    'alignment', parse_tuple, ratio, min_size = 2, max_size = 2,
    named_values = {
        'center' : (0.5, 0.5),
        'top left': (0., 0.),
        'top' : (0.5, 0.),
        'top right' : (1., 0.),
        'left': (0., 0.5),
        'center' : (0.5, 0.5),
        'right' : (1., 0.5),
        'bottom left': (0., 1.),
        'bottom' : (0.5, 1.),
        'bottom right' : (1., 1.),
    })


int_pair = curry_first(
    'int_pair', parse_tuple, int, min_size = 2, max_size = 2)


# Page lists {{{2

class PageList:

    """
    Represents list of ranges of natural numbers, used for lists of page numbers.

    Caveat: in the input syntax, pages are numbered starting at 1 because the
    human convention is to do that. In the iterators and membership tests,
    pages are numbered from 0, because it is the way the document APIs work.
    In other words, the actual ranges are shifted down by 1 with respect to
    the syntax parsed by the __init__ method.

    """

    def __init__ (self, text):
        """Parse a list of ranges using the usual commas and dashes syntax.
        
        The precise syntax is as follows:
          list ::= item ( "," item )*
          item ::= [ number ] [ "-" [ number ] ]
        An item consisting of a single number represents a single number, an
        item consisting of two numbers a and b separated by a dash represents
        the interval from a to b inclusive, omitting any of the bound
        represents an interval ubounded on one side. In particular, "-"
        represents the whole set of natural numbers.
        
        The syntax uses page numbers (the first one has number 1) instead of
        page indices (the first page has index 0), according to human
        convention.

        """
        number = re.compile(r'\d+')
        text = text.strip()
        self.ranges = []
        while len(text) > 0:
            m = number.match(text)
            if m:
                first = int(m.group(0))
                text = text[m.end():].lstrip()
            else:
                first = None
            if text[:1] == '-':
                text = text[1:].lstrip()
                m = number.match(text)
                if m:
                    last = int(m.group(0))
                    text = text[m.end():].lstrip()
                else:
                    last = None
                self.ranges.append((first, last))
            elif first is not None:
                self.ranges.append((first, first))
            if len(text) == 0:
                break
            if text[0] != ',':
                raise argparse.ArgumentTypeError("unexpected character in page list: %r" % text[0])
            text = text[1:].lstrip()

    def iterate (self, number):
        """Iterate over page indices, assuming a given maximum number."""
        for first, last in self.ranges:
            if first is None:
                first = 1
            if last is None:
                last = number
            for i in range(first - 1, last):
                yield i

    def __contains__ (self, number):
        """Test if a page index is in the set."""
        for first, last in self.ranges:
            if first is not None and number < first - 1:
                continue
            if last is not None and number >= last:
                continue
            return True
        return False

# }}}2

def page_rotation (text):
    parts = text.split(':')
    if len(parts) != 2:
        raise argparse.ArgumentTypeError("wrong syntax")
    return (PageList(parts[0]), angle(parts[1]))

command_line_parser = argparse.ArgumentParser(
    description=__doc__)

command_line_parser.add_argument('--version', action='version',
        version='%(prog)s ' + __version__,
        help="show version information and exit")

command_line_parser.add_argument('source',
        help="source PDF document")
command_line_parser.add_argument('--output', '-o', default='out.pdf',
        help="output file name (default: out.pdf)")
command_line_parser.add_argument('--log-level', metavar='LEVEL',
        choices=('DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'), default='WARNING',
        help="set the logging level")

layout = command_line_parser.add_argument_group(title="layout options")
layout.add_argument('--size', type=pagesize, default=pagesize('a4'),
        help="page size for output (default: a4)")
layout.add_argument('--landscape', action='store_true',
        help="use landscape orientation for output")
layout.add_argument('--portrait', action='store_false', dest='landscape',
        help="use portrait orientation for output")
layout.add_argument('--reformat', action='store_true',
        help="reformat the input document (default: no)")
layout.add_argument('--scale', type=ratio, default=None,
        help="the scaling factor when reformatting (default: auto)")
layout.add_argument('--angle', type=angle, default=0,
        help="the angle by which pages are rotated (default: 0)")
layout.add_argument('--align', type=alignment, default=alignment('center'),
        help="page alignment when reformatting (default: center)")
layout.add_argument('--margin', type=dimension, default=None,
        help="extra margin in the output document (default: 1cm when reformatting, 0 otherwise)")
layout.add_argument('--rows', type=positive_int, default=1, metavar="N",
        help="put N rows of input pages per output page")
layout.add_argument('--cols', type=positive_int, default=1, metavar="N",
        help="put N columns of input pages per output page")
layout.add_argument('--rotate-pages', type=page_rotation, action='append',
        default=[], metavar="PAGES:ANGLE",
        help="apply an extra rotation of ANGLE to some PAGES")

ordering = command_line_parser.add_argument_group(title="page selection and ordering")
ordering.add_argument('--select', type=PageList, default=PageList('-'), metavar="PAGES",
        help="select a subset of pages in the document")
ordering.add_argument('--pad-before', type=non_negative_int, default=0, metavar="N",
        help="insert N blank pages before the first one")
ordering.add_argument('--pad-after', type=non_negative_int, default=0, metavar="N",
        help="insert N blank pages after the last one")
ordering.add_argument('--folding', action='store_true',
        help="reorder pages for folding")
ordering.add_argument('--chunks', type=positive_int, metavar="N",
        help="treat the input document by chunks of N pages")
ordering.add_argument('--pad-last', action='store_true',
        help="pad the last chunk to the required number of pages")

presets = command_line_parser.add_argument_group(title="predefined settings")

class NupAction (argparse.Action):
    """
    This command-line action sets default options for N-up printing.
    The number N of input pages per output page must be a power of two.
    """
    def __call__ (self, parser, opts, number, option=None):
        if number <= 0:
            raise argparse.ArgumentError(self, "a positive integer is required")
        exp = 0
        while number & 1 == 0:
            exp += 1
            number >>= 1
        if number != 1:
            raise argparse.ArgumentError(self, "only powers of 2 are supported")
        side = 1 << (exp >> 1)
        opts.cols = side
        if exp & 1 == 0:
            opts.rows = side
        else:
            opts.rows = side + 1
            opts.angle = math.pi / 2
            opts.landscape = True
presets.add_argument('--nup', action=NupAction, type=int, metavar='N',
        help="print N input pages per output page (powers of 2 only)")

class BookletAction (argparse.Action):
    """
    This command-line action sets default options for booklet printing.
    This includes a default layout of two input pages per output page plus
    reordering of pages for folding.
    """
    def __call__ (self, parser, opts, number, option=None):
        opts.cols = 1
        opts.rows = 2
        opts.angle = math.pi / 2
        opts.landscape = True
        opts.folding = True
presets.add_argument('--booklet', action=BookletAction, nargs=0,
        help="rearrange for duplex printing of a foldable booklet")

def parse_options (*args, **kwargs):
    opts = command_line_parser.parse_args()

    logging.basicConfig(level=getattr(logging, opts.log_level))

    if opts.margin is None:
        if opts.reformat:
            opts.margin = dimension('1cm')
        else:
            opts.margin = 0.

    if opts.landscape:
        opts.size = tuple(reversed(opts.size))
        opts.cols, opts.rows = opts.rows, opts.cols
        opts.angle -= math.pi/2

    return opts

# }}}1

if __name__ == '__main__':
    opts = parse_options()
    logging.debug('options: %r' % opts)
    doc = Document(opts.source)
    layout = Layout(doc, opts)
    surface = cairo.PDFSurface(opts.output, opts.size[0], opts.size[1])
    cr = cairo.Context(surface)
    layout.render(cr)
    surface.flush()
