Skip to content

Instantly share code, notes, and snippets.

@davidbsaltzman
Last active February 12, 2024 19:02
Show Gist options
  • Save davidbsaltzman/4be555baa38ebdf61641ac47b0d3f672 to your computer and use it in GitHub Desktop.
Save davidbsaltzman/4be555baa38ebdf61641ac47b0d3f672 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
#
# This script prepares fonts for use for embedded software. It minimizes them
# (to reduce size and increase performance), encodes them into C files (needed
# by devices without file systems), and prints debug info.
#
# When minimizing, it subsets the glyphs (to a latin set as-is here) and strips
# out layout features. It provides options for whether to keep kerning and in
# which format.
#
# You will likely want to customize this based on which glyphs and font features
# you need and how you use fonts in your system.
#
# Example usage:
# ./prepare_font.py MyFont.ttf my_font_ttf.c --kern gpos_repacked --debug
import argparse
import os
import re
import subprocess
import sys
from enum import Enum
from fontTools import subset
from fontTools import ttLib
from fontTools.ttLib.tables import otTables as ot
from fontTools.ttLib.tables import otBase as otBase
from fontTools.ttLib.tables import otTables
import fontFeatures
class KernTableOption(Enum):
NONE = 'none'
KERN_TABLE = 'kern_table'
GPOS_EXISTING = 'gpos_existing'
GPOS_REPACKED = 'gpos_repacked'
# Unicode values to subset the font to based on
# https://github.com/googlefonts/glyphsets/blob/main/Lib/glyphsets/encodings/latin_unique-glyphs.nam
# Plus a few more: 2011 (non-breaking hyphen), 2713 (check mark), 2715 (cross sign)
SUBSET_UNICODE_VALUES = [
0x0000, 0x000D, 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026,
0x0027, 0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F,
0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038,
0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F, 0x0040, 0x0041,
0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A,
0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x0053,
0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x005B, 0x005C,
0x005D, 0x005E, 0x005F, 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065,
0x0066, 0x0067, 0x0068, 0x0069, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E,
0x006F, 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
0x0078, 0x0079, 0x007A, 0x007B, 0x007C, 0x007D, 0x007E, 0x00A0, 0x00A1,
0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, 0x00A8, 0x00A9, 0x00AA,
0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF, 0x00B0, 0x00B1, 0x00B2, 0x00B3,
0x00B4, 0x00B5, 0x00B6, 0x00B7, 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC,
0x00BD, 0x00BE, 0x00BF, 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5,
0x00C6, 0x00C7, 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE,
0x00CF, 0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7,
0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF, 0x00E0,
0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, 0x00E8, 0x00E9,
0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF, 0x00F0, 0x00F1, 0x00F2,
0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7, 0x00F8, 0x00F9, 0x00FA, 0x00FB,
0x00FC, 0x00FD, 0x00FE, 0x00FF, 0x0131, 0x0152, 0x0153, 0x02BB, 0x02BC,
0x02C6, 0x02DA, 0x02DC, 0x2002, 0x2009, 0x200B, 0x2011, 0x2013, 0x2014,
0x2018, 0x2019, 0x201A, 0x201C, 0x201D, 0x201E, 0x2022, 0x2026, 0x2032,
0x2033, 0x2039, 0x203A, 0x2044, 0x2074, 0x20AC, 0x2122, 0x2191, 0x2193,
0x2212, 0x2215, 0x2713, 0x2715
]
def read_kerning_data(font):
"""Reads the kerning data from an existing font. This only reads from GPOS
layout features; a fallback for reading from kern tables could be added if
needed.
It reads only simple advance-only kerning, not advance+placement kerning etc.
Returns:
A dict mapping first glyph to a dict mapping second glyph to kerning
adjustment. For example:
{'A': {'c': -15, 'W': -50},
'B': {'y': -10}}
"""
ff = fontFeatures.ttLib.unparse(font, do_gdef=True, doLookups=True)
kern_data = {}
# Kerning parsing reference from
# https://gist.github.com/simoncozens/7ac602196878d161208130f1ce45a08f
for feature in ['kern', 'dist']:
if not feature in ff.features:
continue
kern = ff.features[feature]
for rr in kern:
for rule in rr.routine.rules:
if isinstance(rule, fontFeatures.Positioning):
# Is it pair positioning
if len(rule.glyphs) != 2:
break
# Is it a simple kern?
if not (rule.valuerecords[0] and not rule.valuerecords[1]):
continue
if not rule.valuerecords[0].xAdvance:
continue
if rule.valuerecords[0].xPlacement or rule.valuerecords[0].yPlacement:
continue
# Add the rule into kern_data
for first_glyph in rule.glyphs[0]:
for second_glyph in rule.glyphs[1]:
if first_glyph not in kern_data:
kern_data[first_glyph] = {}
if second_glyph not in kern_data[first_glyph]:
kern_data[first_glyph][second_glyph] = \
rule.valuerecords[0].xAdvance
return kern_data
def convert_gpos_to_kern(font_filename, subsetter):
"""Reads the GPOS kerning pairs, writes them into a kern table, and strips out
the GPOS kerning layout feature.
"""
print("Converting GPOS kerning to kern table...")
font = subset.load_font(font_filename,
subsetter.options,
dontLoadGlyphNames=True)
kern_data = read_kerning_data(font)
if len(kern_data) == 0:
print('Font has no kerning data')
else:
kern_table = ttLib.tables._k_e_r_n.KernTable_format_0()
kern_table.coverage = 1
kern_table.kernTable = {
(first_glyph, second_glyph): kern_data[first_glyph][second_glyph]
for first_glyph in kern_data for second_glyph in kern_data[first_glyph]
}
font['kern'] = ttLib.newTable('kern')
font['kern'].kernTables = []
font['kern'].version = 0
font['kern'].kernTables.append(kern_table)
# Strip out GPOS kern feature, since we have a standalone kern table now
subsetter.options.layout_features = []
subsetter.options.drop_tables = []
subsetter.options.legacy_kern = True
subsetter.subset(font)
font.save(font_filename)
def repack_gpos(font_filename, subsetter):
"""Reads the GPOS kerning pairs and repacks them into an optimized subtable
Note "optimized" is in the eye of the beholder here. This isn't a generalized
repacking scheme; it's optimized for speed for the user of the font and only
for a limited feature set. It:
* Keeps advance-only kerning pairs and drops the rest
* If you need advance+placement kerning etc, this may not be what you want
* Uses a single subtable
* If you have a giant font which can't fit everything in one subtable, or
if you want some advance-only and some advance+placement kerning, this may
not be what you want
* Inserts all glyphs in the font into the coverage range
* If only a small portion of the glyphs in your font are the first glyphs of
kerning pairs, this may not be what you want
The primary purpose of this repacking is to speed up kerning pair lookup time
at runtime when using the font. Its effect on file size may vary (and as
mentioned above, it may not be viable for large fonts if you don't subset
them). With this packing, it will be O(1) to look up the entries for the first
glyph in the pair (because it is a single complete coverage entry), so just a
single binary search across just the second glyphs for the first glyph will be
needed. This is faster than a legacy kern table (which needs a binary search
across the combination of first plus second glyphs), which is faster than
common GPOS packing which spreads across many sparse subtables (which requires
many small binary searches).
"""
print("Optimizing GPOS kerning...")
font = subset.load_font(font_filename,
subsetter.options,
dontLoadGlyphNames=True)
kern_data = read_kerning_data(font)
# Strip out existing kerning data that we just read from the font
subsetter.options.layout_features = []
subsetter.options.drop_tables = ['kern']
subsetter.options.legacy_kern = False
subsetter.subset(font)
if len(kern_data) == 0:
print('Font has no kerning data')
# Save and return early since there's no kerning data to create a table
font.save(font_filename)
return
# Create new tables for GPOS kerning
pair_pos_table = ot.PairPos()
pair_pos_table.Format = 1
pair_pos_table.Coverage = ot.Coverage()
pair_pos_table.ValueFormat1 = 4
pair_pos_table.ValueFormat2 = 0
# Add all glyphs in the font to the coverage table rather than just the
# glyphs in kern_data keys so that the coverage table can be packed with a
# single range lookup for O(1) lookups first glyph lookups instead of
# needing to search through a sparse list. Given most of our glyphs have
# kerning pairs anyway, this can end up being more compact too.
pair_pos_table.Coverage.glyphs = font.glyphOrder
pair_pos_table.PairSet = []
pair_pos_table.PairSetCount = 0
for first_glyph in font.glyphOrder:
pair_set = ot.PairSet()
pair_set.PairValueRecord = []
pair_set.PairValueCount = 0
if first_glyph in kern_data:
for second_glyph in sorted(
kern_data[first_glyph].keys(),
key=lambda glyph: font.glyphOrder.index(glyph)):
pair_value = ot.PairValueRecord()
pair_value.SecondGlyph = second_glyph
value = otBase.ValueRecord()
value.XAdvance = kern_data[first_glyph][second_glyph]
pair_value.Value1 = value
pair_set.PairValueRecord.append(pair_value)
pair_set.PairValueCount += 1
pair_pos_table.PairSet.append(pair_set)
pair_pos_table.PairSetCount += 1
lookup = ot.Lookup()
lookup.LookupFlag = 8
lookup.LookupType = 2
lookup.SubTableCount = 1
lookup.SubTable = [pair_pos_table]
font['GPOS'].table.LookupList.LookupCount = 1
font['GPOS'].table.LookupList.Lookup = [lookup]
feature_record = otTables.FeatureRecord()
feature = otTables.Feature()
feature.FeatureParams = None
feature.LookupCount = 1
feature.LookupListIndex = [0]
feature_record.Feature = feature
feature_record.FeatureTag = 'kern'
font['GPOS'].table.FeatureList.FeatureCount = 1
font['GPOS'].table.FeatureList.FeatureRecord = [feature_record]
if font['GPOS'].table.ScriptList.ScriptCount == 0:
# Create a default script
font['GPOS'].table.ScriptList.ScriptCount = 1
lang_sys = ot.LangSys()
lang_sys.LookupOrder = None
lang_sys.ReqFeatureIndex = 65535
script = ot.Script()
script.DefaultLangSys = lang_sys
script.LangSysCount = 0
script.LangSysRecord = []
script_record = ot.ScriptRecord()
script_record.Script = script
script_record.ScriptTag = 'DFLT'
font['GPOS'].table.ScriptList.ScriptCount = 1
font['GPOS'].table.ScriptList.ScriptRecord = [script_record]
font['GPOS'].table.ScriptList.ScriptRecord[0] \
.Script.DefaultLangSys.FeatureCount = 1
font['GPOS'].table.ScriptList.ScriptRecord[0] \
.Script.DefaultLangSys.FeatureIndex = [0]
font.save(font_filename)
def minimize_font(font_filename, minimized_font_filename, kern):
"""Minimize font_filename and save result into minimized_font_filename
This function subsets the glyphs in the font based on a whitelist in
SUBSET_UNICODE_VALUES. It will also drop layout features, hinting, and names
to minimize space, only including kerning if requested.
Args:
font_filename, minimized_font_filename (str): Input and output files
kern (KernTableOption): which kerning format (if any) to include in the
minimized font
"""
print("Minimizing font into " + minimized_font_filename + "...")
options = subset.Options()
font = subset.load_font(font_filename, options, dontLoadGlyphNames=True)
# By default, drop kerning from both GPOS layout and legacy kern
options.layout_features = []
options.drop_tables = ['kern']
options.legacy_kern = False
# Check kern option and decide what of the original data to keep on the
# initial subset and what kern_repack_func to run after subsetting
match kern:
case KernTableOption.NONE:
kern_repack_func = None
case KernTableOption.KERN_TABLE:
if font.has_key('kern'):
# Keep existing kern table
options.drop_tables = []
options.legacy_kern = True
kern_repack_func = None
else:
# Start with GPOS layout kern, then convert it to kern table
options.layout_features = ['kern']
kern_repack_func = convert_gpos_to_kern
case KernTableOption.GPOS_EXISTING:
# Keep existing GPOS layout kern
options.layout_features = ['kern']
kern_repack_func = None
case KernTableOption.GPOS_REPACKED:
# Start with existing GPOS layout kern, then repack
options.layout_features = ['kern']
kern_repack_func = repack_gpos
options.hinting = False
options.drop_tables += ['FFTM']
options.notdef_outline = True
subsetter = subset.Subsetter(options=options)
subsetter.populate(unicodes=SUBSET_UNICODE_VALUES)
subsetter.subset(font)
font.save(minimized_font_filename)
if kern_repack_func:
kern_repack_func(minimized_font_filename, subsetter)
def convert_font_to_c(original_font_filename, font_filename, output_filename):
"""Converts a font to a const array in a C file"""
initial_name = re.sub(r'\W', '_', font_filename)
output_base = os.path.basename(output_filename)
desired_name = os.path.splitext(output_base)[0]
print("Converting font into " + output_filename + "...")
subprocess.run("xxd -i " + font_filename + " > " + output_filename,
shell=True)
with open(output_filename, 'r') as file:
file_data = file.read()
file_data = file_data.replace('unsigned char ' + initial_name,
'const unsigned char ' + desired_name)
file_data = file_data.replace('unsigned int ' + initial_name,
'const unsigned int ' + desired_name)
with open(output_filename, 'w') as file:
input_base = os.path.basename(original_font_filename)
file.write("/* This file is auto-generated by prepare_font.py.\n"
" *\n"
" * eg run `./prepare_font.py " + input_base + " " +
output_base + "`\n"
" */\n\n")
file.write(file_data)
def print_static_font_ratios(font_filename):
"""Prints assorted data from the font file"""
font = ttLib.TTFont(font_filename)
t = font['cmap'].getBestCmap()
s = font.getGlyphSet()
def get_glyph(c):
glyph, _ = s[t[c]]._getGlyphAndOffset()
return glyph
line_gap = font['OS/2'].sTypoLineGap
ascender = font['OS/2'].sTypoAscender + line_gap // 2
descender = font['OS/2'].sTypoDescender + (-line_gap // 2)
units_per_em = font['head'].unitsPerEm
glyph_g = get_glyph(ord('g')) if ord('g') in t else None
glyph_M = get_glyph(ord('M')) if ord('M') in t else None
numeral_descender = 0
for c in '0123456789':
numeral_descender = min(numeral_descender, get_glyph(ord(c)).yMin)
max_outline_points = 0
max_contours = 0
for c in t:
glyph = get_glyph(c)
if glyph.numberOfContours > 0:
max_outline_points = max(max_outline_points, len(glyph.coordinates))
max_contours = max(max_contours, glyph.numberOfContours)
# Freetype requires 4 phantom points as well (see TT_Load_Simple_Glyph)
max_outline_points = max_outline_points + 4
print('\nStatic font ratios (update these in fontmetrics.h):')
print('Units per em: ' + str(units_per_em))
print('Capital ymax: ' +
(str(glyph_M.yMax / units_per_em) if glyph_M else 'None'))
print('Lowercase ymax: ' +
(str(glyph_g.yMax / units_per_em) if glyph_g else 'None'))
print('Ascender: ' + str(ascender / units_per_em))
print('Descender: ' + str(descender / units_per_em))
print('Numeral descender: ' + str(numeral_descender / units_per_em))
print('Outline points: ' + str(max_outline_points))
print('Contours: ' + str(max_contours))
def debug_font(font_filename):
"""Prints stats about glyphs in the font and saves it to an xml file"""
debug_prefix = '\033[93mDebug:\033[0m'
font = ttLib.TTFont(font_filename)
t = font['cmap'].getBestCmap()
s = font.getGlyphSet()
def get_glyph(c):
glyph, _ = s[t[c]]._getGlyphAndOffset()
return glyph
printGlyphStats = lambda c : print(f' Glyph {c:5} {t[c]:15} —— ' \
f'points: {len(get_glyph(c).coordinates):3}, ' \
f'contours: {get_glyph(c).numberOfContours}') \
if get_glyph(c).numberOfContours > 0 else None
print(f'{debug_prefix} all glyph data:')
[printGlyphStats(c) for c in t]
print(f'{debug_prefix} top glyphs by outline points (add 4 to get font ' \
'metrics max points):')
top3 = sorted(t, key = lambda c: len(get_glyph(c).coordinates) \
if get_glyph(c).numberOfContours > 0 else 0, reverse=True)[:3]
[printGlyphStats(c) for c in top3]
xml_filename = f'{os.path.splitext(font_filename)[0]}.ttx'
print(f'{debug_prefix} saving XML font to {xml_filename}')
font.saveXML(xml_filename)
def prepare_font(font_filename, output_filename, debug, kern):
"""Minimizes, converts to C file, prints data, and debugs a font"""
font_splitext = os.path.splitext(font_filename)
minimized_font_filename = \
font_splitext[0] + \
'.minimized' + \
('' if kern == KernTableOption.NONE else '.kern') + \
font_splitext[1]
minimize_font(font_filename, minimized_font_filename, kern)
if output_filename:
convert_font_to_c(font_filename, minimized_font_filename, output_filename)
print_static_font_ratios(minimized_font_filename)
if debug:
debug_font(minimized_font_filename)
def parse_args():
description = (
'This script prepares font files for use in embedded systems\n'
'(or for whatever else you want to do with them).\n'
'\n'
'It does three things:\n'
' 1. Reduces file size by stripping out unneeded[*] tables and glyphs\n'
' 2. Converts font to a .c file with the data in a const array\n'
' 3. Prints out static font ratios\n'
'\n'
'[*] Make sure that what this strips out is unneeded by you! Read the code.')
parser = argparse.ArgumentParser(
formatter_class=argparse.RawTextHelpFormatter,
usage='Run with the -h flag for more information',
description=description)
parser.add_argument('font_filename',
type=str,
nargs='?',
default=None,
help='Font file to prepare, eg "MyFont.ttf"')
parser.add_argument(
'output_filename',
type=str,
nargs='?',
default=None,
help='[Optional] Name for the output C file, eg "my_font_ttf.c"')
parser.add_argument(
'--kern',
type=KernTableOption,
default=KernTableOption.NONE,
help=f'Which kerning format (if any) to include in the minimized font\n'
f'Options: {[item.value for item in KernTableOption]}')
parser.add_argument(
'--debug',
dest='debug',
action='store_true',
help='Print glyph stats and export ttx file for debugging')
return parser.parse_args()
def main():
args = parse_args()
prepare_font(font_filename=args.font_filename,
output_filename=args.output_filename,
debug=args.debug,
kern=args.kern)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment