Source code for amazoncaptcha.utils

# -*- coding: utf-8 -*-

"""
amazoncaptcha.utils
~~~~~~~~~~~~~~~~~~~

This module contains the set of amazoncaptcha's utilities.
"""

from PIL import Image, ImageChops

#--------------------------------------------------------------------------------------------------------------

[docs]def cut_the_white(letter): """ Cuts white spaces/borders to get a clear letter. Args: letter (PIL.Image): Letter to be processed. Returns: PIL.Image: The letter without white spaces. """ background = Image.new(letter.mode, letter.size, 255) diff = ImageChops.difference(letter, background) bbox = diff.getbbox() return letter.crop(bbox)
[docs]def merge_horizontally(img1, img2): """ Merges two letters horizontally. Created in case an image is corrupted and the last letter ends at the beginning of the image, causing letter to be unreadable. Args: img1 (PIL.Image): First letter. img2 (PIL.Image): Second letter. Returns: PIL.Image: Two merged letters. """ merged = Image.new('L', (img1.width + img2.width, img1.height)) merged.paste(img1, (0, 0)) merged.paste(img2, (img1.width, 0)) return merged
[docs]def find_letter_boxes(img, maxlength): """ Finds and separates letters from a captcha image. Args: img (PIL.Image): Monochromed captcha. maxlength (int): Maximum letter length by X axis. Returns: letter_boxes (:obj:`list` of :obj:`tuple`): List with X coords of each letter. """ image_columns = [[img.getpixel((x, y)) for y in range(img.height)] for x in range(img.width)] image_code = [1 if 0 in column else 0 for column in image_columns] xpoints = [d for d, s in zip(range(len(image_code)), image_code) if s] xcoords = [x for x in xpoints if x - 1 not in xpoints or x + 1 not in xpoints] if len(xcoords) % 2: xcoords.insert(1, xcoords[0]) letter_boxes = list() for s, e in zip(xcoords[0::2], xcoords[1::2]): start, end = s, min(e + 1, img.width - 1) if end - start <= maxlength: letter_boxes.append((start, end)) else: two_letters = {k: v.count(0) for k, v in enumerate(image_columns[start + 5:end - 5])} divider = min(two_letters, key=two_letters.get) + 5 letter_boxes.extend([(start, start + divider), (start + divider + 1, end)]) return letter_boxes
#--------------------------------------------------------------------------------------------------------------