Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/build_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,7 @@ jobs:
with:
token: ${{ secrets.PAT_ANDIWAND }}
submodules: true
lfs: true

- name: ubuntu install tidy
if: runner.os == 'Linux'
Expand Down
32 changes: 32 additions & 0 deletions src/odr/html.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,21 @@ enum class HtmlTableGridlines {
hard,
};

/// @brief PDF text rendering mode.
///
/// Selects how text is emitted in PDF→HTML output.
///
/// - `dual_layer`: A visual layer (paint order, embedded PUA glyphs) and a
/// separate transparent selection/search layer (reading order, real Unicode).
/// Similar to pdf.js. No JavaScript required.
/// - `single_layer`: A single combined layer where every glyph is mapped to
/// Unicode via frequency analysis. Similar to pdf2htmlEX. No JavaScript
/// required.
enum class PdfTextMode {
dual_layer,
single_layer,
};

/// @brief HTML configuration.
struct HtmlConfig {
// document output file names
Expand Down Expand Up @@ -106,6 +121,23 @@ struct HtmlConfig {
std::string background_image_format{"png"};
double background_image_dpi{144.0};

// PDF text mode
PdfTextMode pdf_text_mode{PdfTextMode::dual_layer};
// `dual_layer`'s invisible selection-layer text is rendered in a local
// system font (tried in order; the first that resolves wins) rather than
// the embedded PDF font, so its natural width rarely matches the
// PDF-derived box width CSS `text-justify` is asked to fill (justify can
// only add spacing, never compress).
// `pdf_dual_layer_fallback_font_size_adjust` is applied as that @font-face's
// `size-adjust` (0-1, written out as a percent) to shrink the fallback font's
// metrics toward the PDF's, leaving less — ideally no — gap for justify to
// compress instead of stretch into. Safe to underestimate (justify then just
// spreads characters further; harmless on an invisible layer) but not to
// overestimate (the excess is clipped, not shrunk).
std::vector<std::string> pdf_dual_layer_fallback_fonts{
"Arial", "Helvetica", "Liberation Sans", "DejaVu Sans", "Nimbus Sans"};
double pdf_dual_layer_fallback_font_size_adjust{0.5};

// drm options
bool no_drm{false};

Expand Down
11 changes: 0 additions & 11 deletions src/odr/internal/font/cff_font.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,11 @@
#include <odr/internal/font/cff_standard_strings.hpp>
#include <odr/internal/pdf/pdf_encoding.hpp>
#include <odr/internal/util/byte_string.hpp>
#include <odr/internal/util/stream_util.hpp>

#include <cmath>
#include <cstdint>
#include <istream>
#include <iterator>
#include <map>
#include <memory>
#include <stdexcept>
#include <utility>
#include <vector>
Expand Down Expand Up @@ -217,14 +214,6 @@ bool CffFont::is_cff(const std::string_view data) {
static_cast<std::uint8_t>(data[3]) <= 4;
}

CffFont::CffFont(std::unique_ptr<std::istream> stream) {
if (stream == nullptr) {
throw std::invalid_argument("cff: null input stream");
}
m_data = util::stream::read(*stream);
parse();
}

CffFont::CffFont(std::string data) : m_data{std::move(data)} { parse(); }

std::vector<CffFont::Range> CffFont::read_index(const std::uint32_t offset,
Expand Down
4 changes: 0 additions & 4 deletions src/odr/internal/font/cff_font.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
#include <odr/internal/abstract/font.hpp>

#include <cstdint>
#include <iosfwd>
#include <memory>
#include <optional>
#include <string>
#include <string_view>
Expand All @@ -29,8 +27,6 @@ class CffFont final : public abstract::Font {
/// Cheap magic test: a CFF header (major version 1, sane `hdrSize`).
[[nodiscard]] static bool is_cff(std::string_view data);

/// Parse the facts from @p stream; the raw bytes are retained for `data()`.
explicit CffFont(std::unique_ptr<std::istream> stream);
/// Parse the facts from an in-memory CFF blob (retained for `data()`).
explicit CffFont(std::string data);

Expand Down
25 changes: 13 additions & 12 deletions src/odr/internal/font/cff_transform.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
#include <algorithm>
#include <cstdint>
#include <map>
#include <sstream>
#include <string>
#include <utility>
#include <vector>
Expand Down Expand Up @@ -145,9 +144,9 @@ std::string cff::wrap_to_otf(const CffFont &font,
const std::map<char32_t, std::uint16_t> &extra) {
const std::uint16_t glyphs = font.glyph_count();

// The uniform PUA re-encode: pua_code_point(glyph) -> glyph over
// every glyph. serialize_cmap throws if a code point is beyond the BMP, which
// also bounds the glyph count to the PUA capacity.
// The uniform PUA re-encode: pua_code_point(glyph) -> glyph over every glyph.
// Glyphs past the 6400-slot BMP PUA overflow into Supplementary PUA-A, and
// serialize_cmap emits a format-12 subtable to cover them.
std::map<char32_t, std::uint16_t> pua;
for (std::uint16_t glyph = 0; glyph < glyphs; ++glyph) {
pua[pua_code_point(glyph)] = glyph;
Expand Down Expand Up @@ -184,14 +183,16 @@ std::string cff::wrap_to_otf(const CffFont &font,
tables.emplace_back("cmap", serialize_cmap(pua));
tables.emplace_back("name", serialize_name(font.name()));
tables.emplace_back("post", serialize_post());
tables.emplace_back("OS/2",
serialize_os2(font.units_per_em(), bbox.y_min, bbox.y_max,
static_cast<std::uint16_t>(first),
static_cast<std::uint16_t>(last)));

std::ostringstream out;
build_sfnt(out, 0x4f54544f /* 'OTTO' */, std::move(tables));
return std::move(out).str();
// OS/2 usFirst/usLastCharIndex are u16; a beyond-BMP PUA code point (large
// glyph counts overflow into Supplementary PUA-A) is clamped to 0xFFFF.
tables.emplace_back(
"OS/2",
serialize_os2(
font.units_per_em(), bbox.y_min, bbox.y_max,
static_cast<std::uint16_t>(std::min<char32_t>(first, 0xffff)),
static_cast<std::uint16_t>(std::min<char32_t>(last, 0xffff))));

return build_sfnt(0x4f54544f /* 'OTTO' */, std::move(tables));
}

} // namespace odr::internal::font
6 changes: 5 additions & 1 deletion src/odr/internal/font/font_file.cpp
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
#include <odr/internal/font/font_file.hpp>

#include <odr/internal/abstract/file.hpp>
#include <odr/internal/abstract/font.hpp>
#include <odr/internal/font/sfnt_font.hpp>
#include <odr/internal/util/stream_util.hpp>

#include <istream>
#include <utility>

namespace odr::internal::font {
Expand All @@ -12,7 +15,8 @@ FontFile::FontFile(std::shared_ptr<abstract::File> file,
: m_file{std::move(file)}, m_file_type{file_type} {
// Parse eagerly: a parse failure is how detection rejects a non-font, so the
// open-strategy try/catch can fall through.
m_font = std::make_shared<sfnt::SfntFont>(m_file->stream());
m_font =
std::make_shared<sfnt::SfntFont>(util::stream::read(*m_file->stream()));
}

std::shared_ptr<abstract::File> FontFile::file() const noexcept {
Expand Down
16 changes: 2 additions & 14 deletions src/odr/internal/font/sfnt_font.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,10 @@

#include <odr/internal/font/sfnt_transform.hpp>
#include <odr/internal/util/byte_string.hpp>
#include <odr/internal/util/stream_util.hpp>
#include <odr/internal/util/string_util.hpp>

#include <algorithm>
#include <cstdint>
#include <istream>
#include <memory>
#include <ostream>
#include <stdexcept>
#include <string>
#include <string_view>
Expand Down Expand Up @@ -179,14 +175,6 @@ bool SfntFont::is_sfnt(const std::string_view data) {
tag == "true" || tag == "ttcf" || tag == "typ1";
}

SfntFont::SfntFont(std::unique_ptr<std::istream> stream) {
if (stream == nullptr) {
throw std::invalid_argument("sfnt: null input stream");
}
m_data = util::stream::read(*stream);
parse();
}

SfntFont::SfntFont(std::string data) : m_data{std::move(data)} { parse(); }

void SfntFont::parse() {
Expand Down Expand Up @@ -506,7 +494,7 @@ void SfntFont::set_cmap(std::map<char32_t, std::uint16_t> cmap) {
update_reverse();
}

void SfntFont::write(std::ostream &out) const {
std::string SfntFont::write() const {
std::vector<std::pair<std::string, std::string>> tables;
tables.reserve(m_tables.size() + 1);
for (const auto &[tag, location] : m_tables) {
Expand Down Expand Up @@ -543,7 +531,7 @@ void SfntFont::write(std::ostream &out) const {
const std::uint32_t version = m_format == FontFormat::opentype_cff
? 0x4f54544fU /* 'OTTO' */
: 0x00010000U;
build_sfnt(out, version, std::move(tables));
return build_sfnt(version, std::move(tables));
}

std::optional<SfntFont::Table>
Expand Down
14 changes: 4 additions & 10 deletions src/odr/internal/font/sfnt_font.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,7 @@

#include <cstdint>
#include <functional>
#include <iosfwd>
#include <map>
#include <memory>
#include <optional>
#include <string>
#include <string_view>
Expand All @@ -26,9 +24,6 @@ class SfntFont final : public abstract::Font {
/// Cheap magic test: a recognised SFNT version tag at the head of @p data.
[[nodiscard]] static bool is_sfnt(std::string_view data);

/// Reads @p stream fully into an in-memory buffer and parses the facts from
/// it; the bytes are retained for pass-through (see `write()`).
explicit SfntFont(std::unique_ptr<std::istream> stream);
/// Parses the facts from an in-memory SFNT blob (retained for `write()`).
explicit SfntFont(std::string data);

Expand All @@ -55,11 +50,10 @@ class SfntFont final : public abstract::Font {
/// font (see `sfnt_transform.hpp`'s `reencode_to_pua`), then `write()`.
void set_cmap(std::map<char32_t, std::uint16_t> cmap);

/// Serialize the current state to @p out: the (possibly mutated) `cmap`
/// rebuilt from `cmap()`, every other table copied verbatim from the source
/// stream, with a freshly computed table directory and checksums. @p out need
/// only be a forward sink (see `build_sfnt`).
void write(std::ostream &out) const;
/// Serialize the current state and return the bytes: the (possibly mutated)
/// `cmap` rebuilt from `cmap()`, every other table copied verbatim from the
/// source stream, with a freshly computed table directory and checksums.
[[nodiscard]] std::string write() const;

private:
/// Parse the facts from `m_data` (called by both constructors).
Expand Down
Loading
Loading