html2md  v1.6.6
Simple and fast HTML to Markdown converter
Loading...
Searching...
No Matches
html2md.cpp
Go to the documentation of this file.
1// Copyright (c) Tim Gromeyer
2// Licensed under the MIT License - https://opensource.org/licenses/MIT
3
4#include "html2md.h"
5#include "table.h"
6
7#include <algorithm>
8#include <cstring>
9#include <memory>
10#include <sstream>
11#include <vector>
12
13using std::make_shared;
14using std::string;
15using std::vector;
16
17namespace {
18bool startsWith(const string &str, const string &prefix) {
19 return str.size() >= prefix.size() &&
20 0 == str.compare(0, prefix.size(), prefix);
21}
22
23bool endsWith(const string &str, const string &suffix) {
24 return str.size() >= suffix.size() &&
25 0 == str.compare(str.size() - suffix.size(), suffix.size(), suffix);
26}
27
28size_t ReplaceAll(string *haystack, const string &needle,
29 const string &replacement) {
30 // Get first occurrence
31 size_t pos = (*haystack).find(needle);
32
33 size_t amount_replaced = 0;
34
35 // Repeat until end is reached
36 while (pos != string::npos) {
37 // Replace this occurrence of sub string
38 (*haystack).replace(pos, needle.size(), replacement);
39
40 // Get the next occurrence from the current position
41 pos = (*haystack).find(needle, pos + replacement.size());
42
43 ++amount_replaced;
44 }
45
46 return amount_replaced;
47}
48
49size_t ReplaceAll(string *haystack, const string &needle, const char c) {
50 return ReplaceAll(haystack, needle, string({c}));
51}
52
53// Split given string by given character delimiter into vector of strings
54vector<string> Split(string const &str, char delimiter) {
55 vector<string> result;
56 std::stringstream iss(str);
57
58 for (string token; getline(iss, token, delimiter);)
59 result.push_back(token);
60
61 return result;
62}
63
64string Repeat(const string &str, size_t amount) {
65 if (amount == 0)
66 return "";
67 else if (amount == 1)
68 return str;
69
70 string out;
71
72 for (size_t i = 0; i < amount; ++i)
73 out.append(str);
74
75 return out;
76}
77} // namespace
78
79namespace html2md {
80
81Converter::Converter(string *html, Options *options) : html_(*html) {
82 if (options)
83 option = *options;
84
85 tags_.reserve(41);
86
87 // non-printing tags
88 auto tagIgnored = make_shared<Converter::TagIgnored>();
89 tags_[kTagHead] = tagIgnored;
90 tags_[kTagMeta] = tagIgnored;
91 tags_[kTagNav] = tagIgnored;
92 tags_[kTagNoScript] = tagIgnored;
93 tags_[kTagScript] = tagIgnored;
94 tags_[kTagStyle] = tagIgnored;
95 tags_[kTagTemplate] = tagIgnored;
96
97 // printing tags
98 tags_[kTagAnchor] = make_shared<Converter::TagAnchor>();
99 tags_[kTagBreak] = make_shared<Converter::TagBreak>();
100 tags_[kTagDiv] = make_shared<Converter::TagDiv>();
101 tags_[kTagHeader1] = make_shared<Converter::TagHeader1>();
102 tags_[kTagHeader2] = make_shared<Converter::TagHeader2>();
103 tags_[kTagHeader3] = make_shared<Converter::TagHeader3>();
104 tags_[kTagHeader4] = make_shared<Converter::TagHeader4>();
105 tags_[kTagHeader5] = make_shared<Converter::TagHeader5>();
106 tags_[kTagHeader6] = make_shared<Converter::TagHeader6>();
107 tags_[kTagListItem] = make_shared<Converter::TagListItem>();
108 tags_[kTagOption] = make_shared<Converter::TagOption>();
109 tags_[kTagOrderedList] = make_shared<Converter::TagOrderedList>();
110 tags_[kTagPre] = make_shared<Converter::TagPre>();
111 tags_[kTagCode] = make_shared<Converter::TagCode>();
112 tags_[kTagParagraph] = make_shared<Converter::TagParagraph>();
113 tags_[kTagSpan] = make_shared<Converter::TagSpan>();
114 tags_[kTagUnorderedList] = make_shared<Converter::TagUnorderedList>();
115 tags_[kTagTitle] = make_shared<Converter::TagTitle>();
116 tags_[kTagImg] = make_shared<Converter::TagImage>();
117 tags_[kTagSeperator] = make_shared<Converter::TagSeperator>();
118
119 // Text formatting
120 auto tagBold = make_shared<Converter::TagBold>();
121 tags_[kTagBold] = tagBold;
122 tags_[kTagStrong] = tagBold;
123
124 auto tagItalic = make_shared<Converter::TagItalic>();
125 tags_[kTagItalic] = tagItalic;
126 tags_[kTagItalic2] = tagItalic;
127 tags_[kTagDefinition] = tagItalic;
128 tags_[kTagCitation] = tagItalic;
129
130 tags_[kTagUnderline] = make_shared<Converter::TagUnderline>();
131
132 auto tagStrighthrought = make_shared<Converter::TagStrikethrought>();
133 tags_[kTagStrighthrought] = tagStrighthrought;
134 tags_[kTagStrighthrought2] = tagStrighthrought;
135
136 tags_[kTagBlockquote] = make_shared<Converter::TagBlockquote>();
137
138 // Tables
139 tags_[kTagTable] = make_shared<Converter::TagTable>();
140 tags_[kTagTableRow] = make_shared<Converter::TagTableRow>();
141 tags_[kTagTableHeader] = make_shared<Converter::TagTableHeader>();
142 tags_[kTagTableData] = make_shared<Converter::TagTableData>();
143}
144
145void Converter::CleanUpMarkdown() {
146 TidyAllLines(&md_);
147
148 ReplaceAll(&md_, " , ", ", ");
149
150 ReplaceAll(&md_, "\n.\n", ".\n");
151 ReplaceAll(&md_, "\n↵\n", " ↵\n");
152 ReplaceAll(&md_, "\n*\n", "\n");
153 ReplaceAll(&md_, "\n. ", ".\n");
154
155 ReplaceAll(&md_, "&quot;", '"');
156 ReplaceAll(&md_, "&lt;", "<");
157 ReplaceAll(&md_, "&gt;", ">");
158 ReplaceAll(&md_, "&amp;", '&');
159 ReplaceAll(&md_, "&nbsp;", ' ');
160 ReplaceAll(&md_, "&rarr;", "→");
161
162 ReplaceAll(&md_, "\t\t ", "\t\t");
163}
164
166 if (IsInIgnoredTag())
167 return this;
168
169 if (index_blockquote != 0 && ch == '\n') {
170 if (is_in_pre_) {
171 md_ += ch;
172 chars_in_curr_line_ = 0;
173 appendToMd(Repeat("> ", index_blockquote));
174 }
175
176 return this;
177 }
178
179 md_ += ch;
180
181 if (ch == '\n')
182 chars_in_curr_line_ = 0;
183 else
184 ++chars_in_curr_line_;
185
186 return this;
187}
188
190 if (IsInIgnoredTag())
191 return this;
192
193 md_ += str;
194
195 auto str_len = strlen(str);
196
197 for (auto i = 0; i < str_len; ++i) {
198 if (str[i] == '\n')
199 chars_in_curr_line_ = 0;
200 else
201 ++chars_in_curr_line_;
202 }
203
204 return this;
205}
206
208 UpdatePrevChFromMd();
209
210 if (prev_ch_in_md_ == '\n' ||
211 (prev_ch_in_md_ == '*' && prev_prev_ch_in_md_ == '*'))
212 return this;
213
214 return appendToMd(' ');
215}
216
217bool Converter::ok() const {
218 return !is_in_pre_ && !is_in_list_ && !is_in_p_ && !is_in_table_ &&
219 !is_in_tag_ && index_blockquote == 0 && index_li == 0;
220}
221
222void Converter::LTrim(string *s) {
223 (*s).erase((*s).begin(),
224 find_if((*s).begin(), (*s).end(),
225 [](unsigned char ch) { return !std::isspace(ch); }));
226}
227
228Converter *Converter::RTrim(string *s, bool trim_only_blank) {
229 (*s).erase(find_if((*s).rbegin(), (*s).rend(),
230 [trim_only_blank](unsigned char ch) {
231 if (trim_only_blank)
232 return !isblank(ch);
233
234 return !isspace(ch);
235 })
236 .base(),
237 (*s).end());
238
239 return this;
240}
241
242// NOTE: Pay attention when changing one of the trim functions. It can break the
243// output!
244Converter *Converter::Trim(string *s) {
245 if (!startsWith(*s, "\t"))
246 LTrim(s);
247
248 if (!(startsWith(*s, " "), endsWith(*s, " ")))
249 RTrim(s);
250
251 return this;
252}
253
254void Converter::TidyAllLines(string *str) {
255 auto lines = Split(*str, '\n');
256 string res;
257
258 uint8_t amount_newlines = 0;
259 bool in_code_block = false;
260
261 for (auto line : lines) {
262 if (startsWith(line, "```") || startsWith(line, "~~~"))
263 in_code_block = !in_code_block;
264 if (in_code_block) {
265 res += line + '\n';
266 continue;
267 }
268
269 Trim(&line);
270
271 if (line.empty()) {
272 if (amount_newlines < 2 && !res.empty()) {
273 res += '\n';
274 amount_newlines++;
275 }
276 } else {
277 amount_newlines = 0;
278
279 res += line + '\n';
280 }
281 }
282
283 *str = res;
284}
285
286string Converter::ExtractAttributeFromTagLeftOf(const string &attr) {
287 // Extract the whole tag from current offset, e.g. from '>', backwards
288 auto tag = html_.substr(offset_lt_, index_ch_in_html_ - offset_lt_);
289
290 // locate given attribute
291 auto offset_attr = tag.find(attr);
292
293 if (offset_attr == string::npos)
294 return "";
295
296 // locate attribute-value pair's '='
297 auto offset_equals = tag.find('=', offset_attr);
298
299 if (offset_equals == string::npos)
300 return "";
301
302 // locate value's surrounding quotes
303 auto offset_double_quote = tag.find('"', offset_equals);
304 auto offset_single_quote = tag.find('\'', offset_equals);
305
306 bool has_double_quote = offset_double_quote != string::npos;
307 bool has_single_quote = offset_single_quote != string::npos;
308
309 if (!has_double_quote && !has_single_quote)
310 return "";
311
312 char wrapping_quote = 0;
313
314 size_t offset_opening_quote = 0;
315 size_t offset_closing_quote = 0;
316
317 if (has_double_quote) {
318 if (!has_single_quote) {
319 wrapping_quote = '"';
320 offset_opening_quote = offset_double_quote;
321 } else {
322 if (offset_double_quote < offset_single_quote) {
323 wrapping_quote = '"';
324 offset_opening_quote = offset_double_quote;
325 } else {
326 wrapping_quote = '\'';
327 offset_opening_quote = offset_single_quote;
328 }
329 }
330 } else {
331 // has only single quote
332 wrapping_quote = '\'';
333 offset_opening_quote = offset_single_quote;
334 }
335
336 if (offset_opening_quote == string::npos)
337 return "";
338
339 offset_closing_quote = tag.find(wrapping_quote, offset_opening_quote + 1);
340
341 if (offset_closing_quote == string::npos)
342 return "";
343
344 return tag.substr(offset_opening_quote + 1,
345 offset_closing_quote - 1 - offset_opening_quote);
346}
347
348void Converter::TurnLineIntoHeader1() {
349 appendToMd('\n' + Repeat("=", chars_in_curr_line_) + "\n\n");
350
351 chars_in_curr_line_ = 0;
352}
353
354void Converter::TurnLineIntoHeader2() {
355 appendToMd('\n' + Repeat("-", chars_in_curr_line_) + "\n\n");
356
357 chars_in_curr_line_ = 0;
358}
359
361 // We already converted
362 if (index_ch_in_html_ == html_.size())
363 return md_;
364
365 reset();
366
367 for (char ch : html_) {
368 ++index_ch_in_html_;
369
370 if (!is_in_tag_ && ch == '<') {
371 OnHasEnteredTag();
372
373 continue;
374 }
375
376 if (is_in_tag_)
377 ParseCharInTag(ch);
378 else
379 ParseCharInTagContent(ch);
380 }
381
382 CleanUpMarkdown();
383
384 return md_;
385}
386
387void Converter::OnHasEnteredTag() {
388 offset_lt_ = index_ch_in_html_;
389 is_in_tag_ = true;
390 prev_tag_ = current_tag_;
391 current_tag_ = "";
392
393 if (!md_.empty()) {
394 UpdatePrevChFromMd();
395 }
396}
397
398Converter *Converter::UpdatePrevChFromMd() {
399 if (!md_.empty()) {
400 prev_ch_in_md_ = md_[md_.length() - 1];
401
402 if (md_.length() > 1)
403 prev_prev_ch_in_md_ = md_[md_.length() - 2];
404 }
405
406 return this;
407}
408
409bool Converter::ParseCharInTag(char ch) {
410 if (ch == '/' && !is_in_attribute_value_) {
411 is_closing_tag_ = current_tag_.empty();
412 is_self_closing_tag_ = !is_closing_tag_;
413
414 return true;
415 }
416
417 if (ch == '>')
418 return OnHasLeftTag();
419
420 if (ch == '"') {
421 if (is_in_attribute_value_) {
422 is_in_attribute_value_ = false;
423 } else if (current_tag_[current_tag_.length() - 1] == '=') {
424 is_in_attribute_value_ = true;
425 }
426
427 return true;
428 }
429
430 current_tag_ += ch;
431
432 return false;
433}
434
435bool Converter::OnHasLeftTag() {
436 is_in_tag_ = false;
437
438 UpdatePrevChFromMd();
439
440 if (!is_closing_tag_)
441 if (TagContainsAttributesToHide(&current_tag_))
442 return true;
443
444 auto cut_tags = Split(current_tag_, ' ');
445 if (cut_tags.empty())
446 return true;
447
448 current_tag_ = cut_tags[0];
449
450 auto tag = tags_[current_tag_];
451
452 if (!tag)
453 return true;
454
455 if (!is_closing_tag_) {
456 tag->OnHasLeftOpeningTag(this);
457 }
458 if (is_closing_tag_ || is_self_closing_tag_) {
459 is_closing_tag_ = false;
460
461 tag->OnHasLeftClosingTag(this);
462 }
463
464 return true;
465}
466
467Converter *Converter::ShortenMarkdown(size_t chars) {
468 md_ = md_.substr(0, md_.length() - chars);
469
470 if (chars > chars_in_curr_line_)
471 chars_in_curr_line_ = 0;
472 else
473 chars_in_curr_line_ = chars_in_curr_line_ - chars;
474
475 return this->UpdatePrevChFromMd();
476}
477
478bool Converter::ParseCharInTagContent(char ch) {
479 if (is_in_code_) {
480 md_ += ch;
481
482 if (index_blockquote != 0 && ch == '\n')
483 appendToMd(Repeat("> ", index_blockquote));
484
485 return true;
486 }
487
488 if (IsInIgnoredTag() || current_tag_ == kTagLink) {
489 prev_ch_in_html_ = ch;
490
491 return true;
492 }
493
494 if (ch == '\n') {
495 if (index_blockquote != 0) {
496 md_ += '\n';
497 chars_in_curr_line_ = 0;
498 appendToMd(Repeat("> ", index_blockquote));
499 }
500
501 return true;
502 }
503
504 switch (ch) {
505 case '*':
506 appendToMd("\\*");
507 break;
508 case '`':
509 appendToMd("\\`");
510 break;
511 case '\\':
512 appendToMd("\\\\");
513 break;
514 default:
515 md_ += ch;
516 ++chars_in_curr_line_;
517 break;
518 }
519
520 if (chars_in_curr_line_ > option.softBreak && !is_in_table_ && !is_in_list_ &&
521 current_tag_ != kTagImg && current_tag_ != kTagAnchor &&
522 option.splitLines) {
523 if (ch == ' ') { // If the next char is - it will become a list
524 md_ += '\n';
525 chars_in_curr_line_ = 0;
526 } else if (chars_in_curr_line_ > option.hardBreak) {
527 ReplacePreviousSpaceInLineByNewline();
528 }
529 }
530
531 return false;
532}
533
534bool Converter::ReplacePreviousSpaceInLineByNewline() {
535 if (current_tag_ == kTagParagraph ||
536 is_in_table_ && (prev_tag_ != kTagCode && prev_tag_ != kTagPre))
537 return false;
538
539 auto offset = md_.length() - 1;
540
541 if (md_.length() == 0)
542 return true;
543
544 do {
545 if (md_[offset] == '\n')
546 return false;
547
548 if (md_[offset] == ' ') {
549 md_[offset] = '\n';
550 chars_in_curr_line_ = md_.length() - offset;
551
552 return true;
553 }
554
555 --offset;
556 } while (offset > 0);
557
558 return false;
559}
560
561void Converter::TagAnchor::OnHasLeftOpeningTag(Converter *c) {
562 if (c->prev_tag_ == kTagImg)
563 c->appendToMd('\n');
564
565 current_title_ = c->ExtractAttributeFromTagLeftOf(kAttributeTitle);
566
567 c->appendToMd('[');
568 current_href_ = c->ExtractAttributeFromTagLeftOf(kAttributeHref);
569}
570
571void Converter::TagAnchor::OnHasLeftClosingTag(Converter *c) {
572 if (!c->shortIfPrevCh('[')) {
573 c->appendToMd("](")->appendToMd(current_href_);
574
575 // If title is set append it
576 if (!current_title_.empty()) {
577 c->appendToMd(" \"")->appendToMd(current_title_)->appendToMd('"');
578 current_title_.clear();
579 }
580
581 c->appendToMd(')');
582
583 if (c->prev_tag_ == kTagImg)
584 c->appendToMd('\n');
585 }
586}
587
588void Converter::TagBold::OnHasLeftOpeningTag(Converter *c) {
589 c->appendToMd("**");
590}
591
592void Converter::TagBold::OnHasLeftClosingTag(Converter *c) {
593 c->appendToMd("**");
594}
595
596void Converter::TagItalic::OnHasLeftOpeningTag(Converter *c) {
597 c->appendToMd('*');
598}
599
600void Converter::TagItalic::OnHasLeftClosingTag(Converter *c) {
601 c->appendToMd('*');
602}
603
604void Converter::TagUnderline::OnHasLeftOpeningTag(Converter *c) {
605 c->appendToMd("<u>");
606}
607
608void Converter::TagUnderline::OnHasLeftClosingTag(Converter *c) {
609 c->appendToMd("</u>");
610}
611
612void Converter::TagStrikethrought::OnHasLeftOpeningTag(Converter *c) {
613 c->appendToMd('~');
614}
615
616void Converter::TagStrikethrought::OnHasLeftClosingTag(Converter *c) {
617 c->appendToMd('~');
618}
619
620void Converter::TagBreak::OnHasLeftOpeningTag(Converter *c) {
621 if (c->is_in_list_) { // When it's in a list, it's not in a paragraph
622 c->appendToMd(" \n");
623 c->appendToMd(Repeat(" ", c->index_li));
624 } else if (c->is_in_table_) {
625 c->appendToMd("<br>");
626 } else if (!c->md_.empty())
627 c->appendToMd(" \n");
628}
629
630void Converter::TagBreak::OnHasLeftClosingTag(Converter *c) {}
631
632void Converter::TagDiv::OnHasLeftOpeningTag(Converter *c) {
633 if (c->prev_ch_in_md_ != '\n')
634 c->appendToMd('\n');
635
636 if (c->prev_prev_ch_in_md_ != '\n')
637 c->appendToMd('\n');
638}
639
640void Converter::TagDiv::OnHasLeftClosingTag(Converter *c) {}
641
642void Converter::TagHeader1::OnHasLeftOpeningTag(Converter *c) {
643 c->appendToMd("\n# ");
644}
645
646void Converter::TagHeader1::OnHasLeftClosingTag(Converter *c) {
647 if (c->prev_prev_ch_in_md_ != ' ')
648 c->appendToMd('\n');
649}
650
651void Converter::TagHeader2::OnHasLeftOpeningTag(Converter *c) {
652 c->appendToMd("\n## ");
653}
654
655void Converter::TagHeader2::OnHasLeftClosingTag(Converter *c) {
656 if (c->prev_prev_ch_in_md_ != ' ')
657 c->appendToMd('\n');
658}
659
660void Converter::TagHeader3::OnHasLeftOpeningTag(Converter *c) {
661 c->appendToMd("\n### ");
662}
663
664void Converter::TagHeader3::OnHasLeftClosingTag(Converter *c) {
665 if (c->prev_prev_ch_in_md_ != ' ')
666 c->appendToMd('\n');
667}
668
669void Converter::TagHeader4::OnHasLeftOpeningTag(Converter *c) {
670 c->appendToMd("\n#### ");
671}
672
673void Converter::TagHeader4::OnHasLeftClosingTag(Converter *c) {
674 if (c->prev_prev_ch_in_md_ != ' ')
675 c->appendToMd('\n');
676}
677
678void Converter::TagHeader5::OnHasLeftOpeningTag(Converter *c) {
679 c->appendToMd("\n##### ");
680}
681
682void Converter::TagHeader5::OnHasLeftClosingTag(Converter *c) {
683 if (c->prev_prev_ch_in_md_ != ' ')
684 c->appendToMd('\n');
685}
686
687void Converter::TagHeader6::OnHasLeftOpeningTag(Converter *c) {
688 c->appendToMd("\n###### ");
689}
690
691void Converter::TagHeader6::OnHasLeftClosingTag(Converter *c) {
692 if (c->prev_prev_ch_in_md_ != ' ')
693 c->appendToMd('\n');
694}
695
696void Converter::TagListItem::OnHasLeftOpeningTag(Converter *c) {
697 if (c->is_in_table_)
698 return;
699
700 if (!c->is_in_ordered_list_) {
701 c->appendToMd(string({c->option.unorderedList, ' '}));
702 return;
703 }
704
705 ++c->index_ol;
706
707 string num = std::to_string(c->index_ol);
708 num.append({c->option.orderedList, ' '});
709 c->appendToMd(num);
710}
711
712void Converter::TagListItem::OnHasLeftClosingTag(Converter *c) {
713 if (c->is_in_table_)
714 return;
715
716 if (c->prev_ch_in_md_ != '\n')
717 c->appendToMd('\n');
718}
719
720void Converter::TagOption::OnHasLeftOpeningTag(Converter *c) {}
721
722void Converter::TagOption::OnHasLeftClosingTag(Converter *c) {
723 if (c->md_.length() > 0)
724 c->appendToMd(" \n");
725}
726
727void Converter::TagOrderedList::OnHasLeftOpeningTag(Converter *c) {
728 if (c->is_in_table_)
729 return;
730
731 c->is_in_list_ = true;
732 c->is_in_ordered_list_ = true;
733 c->index_ol = 0;
734
735 ++c->index_li;
736
737 c->ReplacePreviousSpaceInLineByNewline();
738
739 c->appendToMd('\n');
740}
741
742void Converter::TagOrderedList::OnHasLeftClosingTag(Converter *c) {
743 if (c->is_in_table_)
744 return;
745
746 c->is_in_ordered_list_ = false;
747
748 if (c->index_li != 0)
749 --c->index_li;
750
751 c->is_in_list_ = c->index_li != 0;
752
753 c->appendToMd('\n');
754}
755
756void Converter::TagParagraph::OnHasLeftOpeningTag(Converter *c) {
757 c->is_in_p_ = true;
758
759 if (c->is_in_list_ && c->prev_tag_ == kTagParagraph)
760 c->appendToMd("\n\t");
761 else if (!c->is_in_list_)
762 c->appendToMd('\n');
763}
764
765void Converter::TagParagraph::OnHasLeftClosingTag(Converter *c) {
766 c->is_in_p_ = false;
767
768 if (!c->md_.empty())
769 c->appendToMd("\n"); // Workaround \n restriction for blockquotes
770
771 if (c->index_blockquote != 0)
772 c->appendToMd(Repeat("> ", c->index_blockquote));
773}
774
775void Converter::TagPre::OnHasLeftOpeningTag(Converter *c) {
776 c->is_in_pre_ = true;
777
778 if (c->prev_ch_in_md_ != '\n')
779 c->appendToMd('\n');
780
781 if (c->prev_prev_ch_in_md_ != '\n')
782 c->appendToMd('\n');
783
784 if (c->is_in_list_ && c->prev_tag_ != kTagParagraph)
785 c->ShortenMarkdown(2);
786
787 if (c->is_in_list_)
788 c->appendToMd("\t\t");
789 else
790 c->appendToMd("```");
791}
792
793void Converter::TagPre::OnHasLeftClosingTag(Converter *c) {
794 c->is_in_pre_ = false;
795
796 if (c->is_in_list_)
797 return;
798
799 c->appendToMd("```");
800 c->appendToMd('\n'); // Don't combine because of blockquote
801}
802
803void Converter::TagCode::OnHasLeftOpeningTag(Converter *c) {
804 c->is_in_code_ = true;
805
806 if (c->is_in_pre_) {
807 if (c->is_in_list_)
808 return;
809
810 auto code = c->ExtractAttributeFromTagLeftOf(kAttributeClass);
811 if (!code.empty()) {
812 if (startsWith(code, "language-"))
813 code.erase(0, 9); // remove language-
814 c->appendToMd(code);
815 }
816 c->appendToMd('\n');
817 } else
818 c->appendToMd('`');
819}
820
821void Converter::TagCode::OnHasLeftClosingTag(Converter *c) {
822 c->is_in_code_ = false;
823
824 if (c->is_in_pre_)
825 return;
826
827 c->appendToMd('`');
828}
829
830void Converter::TagSpan::OnHasLeftOpeningTag(Converter *c) {}
831
832void Converter::TagSpan::OnHasLeftClosingTag(Converter *c) {}
833
834void Converter::TagTitle::OnHasLeftOpeningTag(Converter *c) {}
835
836void Converter::TagTitle::OnHasLeftClosingTag(Converter *c) {
837 c->TurnLineIntoHeader1();
838}
839
840void Converter::TagUnorderedList::OnHasLeftOpeningTag(Converter *c) {
841 if (c->is_in_list_ || c->is_in_table_)
842 return;
843
844 c->is_in_list_ = true;
845
846 ++c->index_li;
847
848 c->appendToMd('\n');
849}
850
851void Converter::TagUnorderedList::OnHasLeftClosingTag(Converter *c) {
852 if (c->is_in_table_)
853 return;
854
855 if (c->index_li != 0)
856 --c->index_li;
857
858 c->is_in_list_ = c->index_li != 0;
859
860 if (c->prev_prev_ch_in_md_ == '\n' && c->prev_ch_in_md_ == '\n')
861 c->ShortenMarkdown();
862 else if (c->prev_ch_in_md_ != '\n')
863 c->appendToMd('\n');
864}
865
866void Converter::TagImage::OnHasLeftOpeningTag(Converter *c) {
867 if (c->prev_tag_ != kTagAnchor && c->prev_ch_in_md_ != '\n')
868 c->appendToMd('\n');
869
870 c->appendToMd("![")
871 ->appendToMd(c->ExtractAttributeFromTagLeftOf(kAttributeAlt))
872 ->appendToMd("](")
873 ->appendToMd(c->ExtractAttributeFromTagLeftOf(kAttributeSrc));
874
875 auto title = c->ExtractAttributeFromTagLeftOf(kAttributeTitle);
876 if (!title.empty()) {
877 c->appendToMd(" \"")->appendToMd(title)->appendToMd('"');
878 }
879
880 c->appendToMd(")");
881}
882
883void Converter::TagImage::OnHasLeftClosingTag(Converter *c) {
884 if (c->prev_tag_ == kTagAnchor)
885 c->appendToMd('\n');
886}
887
888void Converter::TagSeperator::OnHasLeftOpeningTag(Converter *c) {
889 c->appendToMd("\n---\n"); // NOTE: We can make this an option
890}
891
892void Converter::TagSeperator::OnHasLeftClosingTag(Converter *c) {}
893
894void Converter::TagTable::OnHasLeftOpeningTag(Converter *c) {
895 c->is_in_table_ = true;
896 c->appendToMd('\n');
897 c->table_start = c->md_.length();
898}
899
900void Converter::TagTable::OnHasLeftClosingTag(Converter *c) {
901 c->is_in_table_ = false;
902 c->appendToMd('\n');
903
904 if (!c->option.formatTable)
905 return;
906
907 string table = c->md_.substr(c->table_start);
908 table = formatMarkdownTable(table);
909 c->ShortenMarkdown(c->md_.size() - c->table_start);
910 c->appendToMd(table);
911}
912
913void Converter::TagTableRow::OnHasLeftOpeningTag(Converter *c) {
914 c->appendToMd('\n');
915}
916
917void Converter::TagTableRow::OnHasLeftClosingTag(Converter *c) {
918 c->UpdatePrevChFromMd();
919 if (c->prev_ch_in_md_ == '|')
920 c->appendToMd('\n'); // There's a bug
921 else
922 c->appendToMd('|');
923
924 if (!c->tableLine.empty()) {
925 if (c->prev_ch_in_md_ != '\n')
926 c->appendToMd('\n');
927
928 c->tableLine.append("|\n");
929 c->appendToMd(c->tableLine);
930 c->tableLine.clear();
931 }
932}
933
934void Converter::TagTableHeader::OnHasLeftOpeningTag(Converter *c) {
935 auto align = c->ExtractAttributeFromTagLeftOf(kAttrinuteAlign);
936
937 string line = "| ";
938
939 if (align == "left" || align == "center")
940 line += ':';
941
942 line += '-';
943
944 if (align == "right" || align == "center")
945 line += ": ";
946 else
947 line += ' ';
948
949 c->tableLine.append(line);
950
951 c->appendToMd("| ");
952}
953
954void Converter::TagTableHeader::OnHasLeftClosingTag(Converter *c) {}
955
956void Converter::TagTableData::OnHasLeftOpeningTag(Converter *c) {
957 if (c->prev_prev_ch_in_md_ != '|')
958 c->appendToMd("| ");
959}
960
961void Converter::TagTableData::OnHasLeftClosingTag(Converter *c) {}
962
963void Converter::TagBlockquote::OnHasLeftOpeningTag(Converter *c) {
964 ++c->index_blockquote;
965}
966
967void Converter::TagBlockquote::OnHasLeftClosingTag(Converter *c) {
968 --c->index_blockquote;
969 c->ShortenMarkdown(2); // Remove the '> '
970}
971
973 md_.clear();
974 prev_ch_in_md_ = 0;
975 prev_prev_ch_in_md_ = 0;
976 index_ch_in_html_ = 0;
977}
978
979bool Converter::IsInIgnoredTag() const {
980 if (current_tag_ == kTagTitle && !option.includeTitle)
981 return true;
982
983 return IsIgnoredTag(current_tag_);
984}
985} // namespace html2md
std::string convert()
Convert HTML into Markdown.
Definition html2md.cpp:360
Converter * appendToMd(char ch)
Append a char to the Markdown.
Definition html2md.cpp:165
Converter * appendBlank()
Appends a ' ' in certain cases.
Definition html2md.cpp:207
bool ok() const
Checks if everything was closed properly(in the HTML).
Definition html2md.cpp:217
Converter(std::string &html, struct Options *options=nullptr)
Standard initializer, takes HTML as parameter. Also prepares everything.
Definition html2md.h:173
void reset()
Reset the generated Markdown.
Definition html2md.cpp:972
html2md namespace
Definition html2md.h:21
Options for the conversion from HTML to Markdown.
Definition html2md.h:38
std::string formatMarkdownTable(const std::string &inputTable)