Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ offline/
tools/pdf/cmap-resources/
## Adobe Glyph List (fetched by tools/pdf/generate_encoding_data.py)
tools/pdf/glyphlist.txt
## Adobe Core-14 AFM metrics (fetched by tools/pdf/generate_afm_data.py)
tools/pdf/afm/

*.zip
*.svg
Expand Down
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,8 @@ set(ODR_SOURCE_FILES
"src/odr/internal/ooxml/ooxml_meta.cpp"
"src/odr/internal/ooxml/ooxml_util.cpp"

"src/odr/internal/pdf/pdf_afm.cpp"
"src/odr/internal/pdf/pdf_afm_data.cpp"
"src/odr/internal/pdf/pdf_cid.cpp"
"src/odr/internal/pdf/pdf_cmap.cpp"
"src/odr/internal/pdf/pdf_cmap_parser.cpp"
Expand Down
32 changes: 31 additions & 1 deletion src/odr/internal/html/pdf_file.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,21 @@ std::string rgb_to_css(const std::array<double, 3> &rgb) {
return std::move(s).str();
}

/// The CSS declaration a non-embedded font renders through: its substitute
/// `font-family` stack plus the weight/style implied by the `/BaseFont` name
/// and `/FontDescriptor` flags. Interned as an `ff` atomic class on the
/// fallback (`font == 0`) runs of either text mode.
std::string font_substitute_declaration(const pdf::FontSubstitute &substitute) {
std::string declaration = "font-family:" + substitute.css_family;
if (substitute.bold) {
declaration += ";font-weight:bold";
}
if (substitute.italic) {
declaration += ";font-style:italic";
}
return declaration;
}

/// Build an SVG `d` attribute from a path's subpaths, each point mapped through
/// `to_box` (PDF user space -> the page box, y-down). Lines become `L`, cubic
/// Béziers `C`, and an explicitly closed subpath ends with `Z`.
Expand Down Expand Up @@ -788,6 +803,11 @@ class HtmlServiceImpl final : public HtmlService {
if (font != 0) {
run_classes += ' ';
run_classes += font_class(font_class_used, font, invisible);
} else if (text.font != nullptr && text.font->substitute) {
// Non-embedded font: render the real Unicode in the substitute
// family (embedded fonts carry the family in `font_class`).
add_class(run_classes, "ff",
font_substitute_declaration(*text.font->substitute));
}
if (vis_margin_pt != 0) {
add_class(run_classes, "ml", pt_decl("margin-left", vis_margin_pt));
Expand Down Expand Up @@ -1366,9 +1386,17 @@ class HtmlServiceImpl final : public HtmlService {
}

// ---- Flow grouping -----------------------------------------------
// The visible substitute family of a non-embedded font (`font == 0`);
// part of the flow key so two different substitutes (e.g. a Helvetica
// run then a Times run) never share one line block's `font_class`.
const std::string substitute_declaration =
(font == 0 && !invisible && text.font != nullptr &&
text.font->substitute)
? font_substitute_declaration(*text.font->substitute)
: std::string();
std::ostringstream fk;
fk << font << '|' << invisible << '|' << font_size_pt << '|' << cs_pt
<< '|' << ws_pt;
<< '|' << ws_pt << '|' << substitute_declaration;
const std::string flow_key = std::move(fk).str();
bool new_line = is_matrix || prev_was_matrix || cur_line < 0 ||
flow_key != cur_flow_key;
Expand Down Expand Up @@ -1407,6 +1435,8 @@ class HtmlServiceImpl final : public HtmlService {
line.classes = std::move(base);
if (font != 0) {
line.font_class = font_class(font_class_used, font, invisible);
} else if (!substitute_declaration.empty()) {
line.font_class = styles.intern("ff", substitute_declaration);
}
line.runs.push_back(std::move(run));
page_out.items.push_back(std::move(line));
Expand Down
199 changes: 199 additions & 0 deletions src/odr/internal/pdf/pdf_afm.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
#include <odr/internal/pdf/pdf_afm.hpp>

#include <odr/internal/pdf/pdf_afm_data.hpp>

#include <algorithm>
#include <array>
#include <cctype>

namespace odr::internal::pdf {

namespace {

const afm_data::FontMetrics &metrics_of(const StandardFont font) {
return afm_data::fonts[static_cast<std::size_t>(font)];
}

/// ISO 32000-1 Table 121 `/FontDescriptor` `/Flags` bits used here.
constexpr std::uint32_t flag_fixed_pitch = 1U << 0;
constexpr std::uint32_t flag_serif = 1U << 1;
constexpr std::uint32_t flag_italic = 1U << 6;
constexpr std::uint32_t flag_force_bold = 1U << 18;

/// The `/BaseFont` name with a subset prefix (`ABCDEF+`) stripped, lowercased
/// for case-insensitive substring matching.
std::string normalize_name(std::string_view base_font) {
if (base_font.size() > 7 && base_font[6] == '+') {
bool prefix = true;
for (std::size_t i = 0; i < 6; ++i) {
if (base_font[i] < 'A' || base_font[i] > 'Z') {
prefix = false;
break;
}
}
if (prefix) {
base_font.remove_prefix(7);
}
}
std::string result(base_font);
for (char &c : result) {
c = static_cast<char>(std::tolower(static_cast<unsigned char>(c)));
}
return result;
}

bool contains(const std::string &haystack, const std::string_view needle) {
return haystack.find(needle) != std::string::npos;
}

enum class Family { sans, serif, mono, symbol, zapf };

Family classify_family(const std::string &name, const std::uint32_t flags) {
if (contains(name, "zapfdingbats") || contains(name, "dingbats")) {
return Family::zapf;
}
if (contains(name, "symbol")) {
return Family::symbol;
}
if (contains(name, "courier") || contains(name, "mono") ||
contains(name, "consol")) {
return Family::mono;
}
if (contains(name, "times") || contains(name, "georgia") ||
contains(name, "roman") || contains(name, "serif") ||
contains(name, "minion") || contains(name, "garamond")) {
return Family::serif;
}
if (contains(name, "arial") || contains(name, "helvetica") ||
contains(name, "verdana") || contains(name, "tahoma") ||
contains(name, "segoe") || contains(name, "sans")) {
return Family::sans;
}
// No name hint: fall back to the descriptor flags.
if ((flags & flag_fixed_pitch) != 0) {
return Family::mono;
}
if ((flags & flag_serif) != 0) {
return Family::serif;
}
return Family::sans;
}

StandardFont pick_metrics(const Family family, const bool bold,
const bool italic) {
switch (family) {
case Family::serif:
if (bold && italic) {
return StandardFont::times_bold_italic;
}
if (bold) {
return StandardFont::times_bold;
}
if (italic) {
return StandardFont::times_italic;
}
return StandardFont::times_roman;
case Family::mono:
if (bold && italic) {
return StandardFont::courier_bold_oblique;
}
if (bold) {
return StandardFont::courier_bold;
}
if (italic) {
return StandardFont::courier_oblique;
}
return StandardFont::courier;
case Family::symbol:
return StandardFont::symbol;
case Family::zapf:
return StandardFont::zapf_dingbats;
case Family::sans:
default:
if (bold && italic) {
return StandardFont::helvetica_bold_oblique;
}
if (bold) {
return StandardFont::helvetica_bold;
}
if (italic) {
return StandardFont::helvetica_oblique;
}
return StandardFont::helvetica;
}
}

std::string_view family_stack(const Family family) {
switch (family) {
case Family::serif:
return "'Times New Roman',Times,serif";
case Family::mono:
return "'Courier New',Courier,monospace";
case Family::symbol:
return "Symbol,serif";
case Family::zapf:
return "'Zapf Dingbats',fantasy";
case Family::sans:
default:
return "Helvetica,Arial,sans-serif";
}
}

} // namespace

} // namespace odr::internal::pdf

namespace odr::internal {

pdf::FontSubstitute pdf::resolve_font_substitute(
const std::string_view base_font, const std::uint32_t flags,
const std::int32_t font_weight, const double italic_angle) {
const std::string name = normalize_name(base_font);
const Family family = classify_family(name, flags);

const bool bold = contains(name, "bold") || font_weight >= 600 ||
(flags & flag_force_bold) != 0;
// Symbol and ZapfDingbats have no bold/italic AFM variant; the name may still
// carry the words, but there is nothing to select.
const bool italic = family != Family::symbol && family != Family::zapf &&
(contains(name, "italic") || contains(name, "oblique") ||
(flags & flag_italic) != 0 || italic_angle != 0);

FontSubstitute substitute;
substitute.css_family = std::string(family_stack(family));
substitute.bold = bold;
substitute.italic = italic;
substitute.metrics = pick_metrics(family, bold, italic);
return substitute;
}

std::optional<double> pdf::afm_width(const StandardFont font,
const std::string_view glyph_name) {
const afm_data::FontMetrics &m = metrics_of(font);
const afm_data::GlyphWidth *const begin = m.glyphs;
const afm_data::GlyphWidth *const end = m.glyphs + m.glyph_count;
const auto *const it = std::lower_bound(
begin, end, glyph_name,
[](const afm_data::GlyphWidth &g, const std::string_view name) {
return g.name < name;
});
if (it != end && it->name == glyph_name) {
return it->width;
}
return std::nullopt;
}

std::optional<double> pdf::afm_code_width(const StandardFont font,
const std::uint8_t code) {
const std::int16_t width = metrics_of(font).code_widths[code];
if (width < 0) {
return std::nullopt;
}
return width;
}

double pdf::afm_ascender(const StandardFont font) {
return metrics_of(font).ascender / 1000.0;
}

} // namespace odr::internal
72 changes: 72 additions & 0 deletions src/odr/internal/pdf/pdf_afm.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
#pragma once

#include <cstdint>
#include <optional>
#include <string>
#include <string_view>

namespace odr::internal::pdf {

/// The 14 standard fonts (ISO 32000-1 9.6.2.2) whose glyph metrics ship in the
/// Core AFM tables (`pdf_afm_data`). The order matches `afm_data::fonts`.
enum class StandardFont : std::uint8_t {
helvetica,
helvetica_bold,
helvetica_oblique,
helvetica_bold_oblique,
times_roman,
times_bold,
times_italic,
times_bold_italic,
courier,
courier_bold,
courier_oblique,
courier_bold_oblique,
symbol,
zapf_dingbats,
};

/// How a non-embedded font is rendered: with the browser's own fonts, chosen by
/// a CSS `font-family` stack, and placed with the standard-14 AFM advance
/// widths (`metrics`) when the font maps onto one of them. Resolved once at
/// parse time (`resolve_font_substitute`) and carried on `Font::substitute`.
struct FontSubstitute {
/// A generic CSS `font-family` value, e.g. `"Helvetica,Arial,sans-serif"`.
/// Generic families always resolve, so a viewer never sees a missing glyph
/// box; the shapes are approximate, the metrics (below) make placement exact.
std::string css_family;
bool bold{false};
bool italic{false};
/// The standard-14 font whose AFM widths drive `advance_width`, or `nullopt`
/// when the font resembles none of them (then `/Widths`/`/MissingWidth` are
/// the only metric source, as before).
std::optional<StandardFont> metrics;
};

/// Choose a substitute for a non-embedded simple font from its `/BaseFont` name
/// and `/FontDescriptor` hints (`/Flags`, `/FontWeight`, `/ItalicAngle`;
/// `font_weight <= 0` and `italic_angle == 0` mean "absent"). A subset prefix
/// (`ABCDEF+`) on the name is ignored. Never fails: an unrecognized font falls
/// back to a sans-serif stack with Helvetica metrics.
[[nodiscard]] FontSubstitute resolve_font_substitute(std::string_view base_font,
std::uint32_t flags,
std::int32_t font_weight,
double italic_angle);

/// The advance width of a glyph (by name) in `font`, glyph space (1/1000 em),
/// or `nullopt` when the font has no such glyph.
[[nodiscard]] std::optional<double> afm_width(StandardFont font,
std::string_view glyph_name);

/// The advance width of a code in `font`'s built-in encoding, glyph space
/// (1/1000 em), or `nullopt` when the slot is empty. The fallback when a font
/// carries no `/Encoding` (notably Symbol/ZapfDingbats).
[[nodiscard]] std::optional<double> afm_code_width(StandardFont font,
std::uint8_t code);

/// The font's `Ascender` metric, em units (glyph space / 1000); used as the
/// baseline-shift fallback for a substitute with no `/FontDescriptor`
/// `/Ascent`.
[[nodiscard]] double afm_ascender(StandardFont font);

} // namespace odr::internal::pdf
Loading
Loading