");
@@ -139,7 +140,7 @@ void print_html_node(GString *out, node *n, scratch_pad *scratch) {
case VERBATIMFENCE:
pad(out, 2, scratch);
if ((n->children != NULL) && (n->children->key == VERBATIMTYPE)) {
- trim_trailing_whitespace(n->children->str);
+ trim_trailing_whitespace_in_node_str(n->children);
if (strlen(n->children->str) > 0)
g_string_append_printf(out, "", n->children->str);
else
@@ -244,11 +245,11 @@ void print_html_node(GString *out, node *n, scratch_pad *scratch) {
print_html_node(out, n->children, scratch);
g_string_append_printf(out, "\"/>\n");
} else if (strcmp(n->str, "xhtmlheader") == 0) {
- trim_trailing_whitespace(n->children->str);
+ trim_trailing_whitespace_in_node_str(n->children);
print_raw_node(out, n->children);
g_string_append_printf(out, "\n");
} else if (strcmp(n->str, "htmlheader") == 0) {
- trim_trailing_whitespace(n->children->str);
+ trim_trailing_whitespace_in_node_str(n->children);
print_raw_node(out, n->children);
g_string_append_printf(out, "\n");
} else if (strcmp(n->str, "mmdfooter") == 0) {
@@ -261,7 +262,7 @@ void print_html_node(GString *out, node *n, scratch_pad *scratch) {
}
break;
case METAVALUE:
- trim_trailing_whitespace(n->str);
+ trim_trailing_whitespace_in_node_str(n);
print_html_string(out,n->str, scratch);
break;
case FOOTER:
diff --git a/latex.c b/latex.c
index a9b2bc2..2e40fc2 100644
--- a/latex.c
+++ b/latex.c
@@ -99,16 +99,33 @@ void print_latex_node(GString *out, node *n, scratch_pad *scratch) {
case ABBREVIATION:
/* We combine the short and full names, since stripping non-ascii characters may result
in a conflict otherwise. This at least makes it less likely. */
- width = ascii_label_from_node(n->children);
- temp = ascii_label_from_string(n->str);
- g_string_append_printf(out, "\\newacro{%s%s}[",width,temp);
- print_latex_node_tree(out, n->children, scratch);
- g_string_append_printf(out, "]{");
- trim_trailing_whitespace(n->str);
+ temp_str = g_string_new("");
+
+ if(LATEX_ACRONYM_LABEL_STYLE_ABBR == (scratch->latex_acronym_config&LATEX_ACRONYM_LABEL_STYLE_MASK)) {
+ print_raw_node(temp_str, n->children);
+ } else {
+ g_string_append(temp_str, ascii_label_from_node(n->children));
+ g_string_append(temp_str, ascii_label_from_string(n->str));
+ }
+
+ switch(scratch->latex_acronym_config&LATEX_ACRONYM_PACKAGE_MASK) {
+ case LATEX_ACRONYM_PACKAGE_GLOSSARIES:
+ g_string_append_printf(out, "\\newacronym{%s}{",temp_str->str);
+ print_latex_node_tree(out, n->children, scratch);
+ g_string_append_printf(out, "}{");
+ break;
+ case LATEX_ACRONYM_PACKAGE_ACRONYM:
+ default:
+ g_string_append_printf(out, "\\newacro{%s}[",temp_str->str);
+ print_latex_node_tree(out, n->children, scratch);
+ g_string_append_printf(out, "]{");
+ break;
+ }
+ trim_trailing_whitespace_in_node_str(n);
print_latex_string(out, n->str, scratch);
g_string_append_printf(out, "}\n");
- free(temp);
- free(width);
+
+ g_string_free(temp_str, FALSE);
break;
case ABBRSTART:
/* Strip out nodes that are being replaced with the abbreviation */
@@ -122,13 +139,29 @@ void print_latex_node(GString *out, node *n, scratch_pad *scratch) {
n->next = temp_node->next;
temp_node->next = NULL;
free_node(temp_node);
+ case ABBRPLURAL:
case ABBR:
/* In either case, now we call on the abbreviation */
- width = ascii_label_from_node(n->children->children);
- temp = ascii_label_from_string(n->children->str);
- g_string_append_printf(out, "\\ac{%s%s}", width, temp);
- free(temp);
- free(width);
+ temp_str = g_string_new("");
+
+ if(LATEX_ACRONYM_LABEL_STYLE_ABBR == (scratch->latex_acronym_config&LATEX_ACRONYM_LABEL_STYLE_MASK)) {
+ print_raw_node(temp_str, n->children->children);
+ } else {
+ g_string_append(temp_str, ascii_label_from_node(n->children->children));
+ g_string_append(temp_str, ascii_label_from_string(n->children->str));
+ }
+
+ switch(scratch->latex_acronym_config&LATEX_ACRONYM_PACKAGE_MASK) {
+ case LATEX_ACRONYM_PACKAGE_GLOSSARIES:
+ g_string_append_printf(out, "\\gls%s{%s}", (n->key == ABBRPLURAL) ? "pl" : "", temp_str->str);
+ break;
+ case LATEX_ACRONYM_PACKAGE_ACRONYM:
+ default:
+ g_string_append_printf(out, "\\ac%s{%s}", (n->key == ABBRPLURAL) ? "p" : "", temp_str->str);
+ break;
+ }
+
+ g_string_free(temp_str, FALSE);
break;
case ABBRSTOP:
break;
@@ -165,7 +198,7 @@ void print_latex_node(GString *out, node *n, scratch_pad *scratch) {
case VERBATIMFENCE:
pad(out, 2, scratch);
if ((n->children != NULL) && (n->children->key == VERBATIMTYPE)) {
- trim_trailing_whitespace(n->children->str);
+ trim_trailing_whitespace_in_node_str(n->children);
if (strlen(n->children->str) > 0) {
g_string_append_printf(out, "\\begin{lstlisting}[language=%s]\n%s\\end{lstlisting}", n->children->str,n->str);
scratch->padded = 0;
@@ -264,14 +297,28 @@ void print_latex_node(GString *out, node *n, scratch_pad *scratch) {
} else if (strcmp(n->str, "mmdheader") == 0) {
} else if (strcmp(n->str, "lang") == 0) {
} else if (strcmp(n->str, "latexinput") == 0) {
- trim_trailing_whitespace(n->children->str);
+ trim_trailing_whitespace_in_node_str(n->children);
g_string_append_printf(out, "\\input{%s}\n", n->children->str);
} else if (strcmp(n->str, "latexfooter") == 0) {
- trim_trailing_whitespace(n->children->str);
+ trim_trailing_whitespace_in_node_str(n->children);
scratch->latex_footer = strdup(n->children->str);
} else if (strcmp(n->str, "bibtex") == 0) {
- trim_trailing_whitespace(n->children->str);
+ trim_trailing_whitespace_in_node_str(n->children);
g_string_append_printf(out, "\\def\\bibliocommand{\\bibliography{%s}}\n",n->children->str);
+ } else if (strcmp(n->str, "latexacronympackage") == 0) {
+ temp = label_from_node_tree(n->children);
+ if(strcmp(temp, "acronym") == 0) {
+ scratch->latex_acronym_config = (scratch->latex_acronym_config&LATEX_ACRONYM_LABEL_STYLE_MASK)|LATEX_ACRONYM_PACKAGE_ACRONYM;
+ } else if(strcmp(temp, "glossaries") == 0) {
+ scratch->latex_acronym_config = (scratch->latex_acronym_config&LATEX_ACRONYM_LABEL_STYLE_MASK)|LATEX_ACRONYM_PACKAGE_GLOSSARIES;
+ }
+ } else if (strcmp(n->str, "latexacronymlabelstyle") == 0) {
+ temp = label_from_node_tree(n->children);
+ if(strcmp(temp, "full") == 0) {
+ scratch->latex_acronym_config = (scratch->latex_acronym_config&LATEX_ACRONYM_PACKAGE_MASK)|LATEX_ACRONYM_LABEL_STYLE_FULL;
+ } else if(strncmp(temp, "abbreviation", sizeof("abbr")-1) == 0) {
+ scratch->latex_acronym_config = (scratch->latex_acronym_config&LATEX_ACRONYM_PACKAGE_MASK)|LATEX_ACRONYM_LABEL_STYLE_ABBR;
+ }
} else {
g_string_append_printf(out, "\\def\\");
print_latex_string(out, n->str, scratch);
@@ -281,7 +328,7 @@ void print_latex_node(GString *out, node *n, scratch_pad *scratch) {
}
break;
case METAVALUE:
- trim_trailing_whitespace(n->str);
+ trim_trailing_whitespace_in_node_str(n);
print_latex_string(out,n->str, scratch);
break;
case FOOTER:
diff --git a/libMultiMarkdown.h b/libMultiMarkdown.h
index 302ce0c..cabd878 100644
--- a/libMultiMarkdown.h
+++ b/libMultiMarkdown.h
@@ -147,17 +147,31 @@ enum keys {
VARIABLE,
ABBREVIATION,
ABBR,
+ ABBRPLURAL,
ABBRSTART,
ABBRSTOP,
TOC,
KEY_COUNTER /* This *MUST* be the last item in the list */
};
+// The 'correct' data type, i.e., size_t, appears to be unnecessarily big for a
+// node; even an unsigned char might turn out to be sufficient
+#ifdef NODE_LEN_SIZE_T
+typedef size_t NODE_LEN;
+#elif NODE_LEN_UINT
+typedef unsigned int NODE_LEN;
+#else
+typedef unsigned short int NODE_LEN;
+#endif
/* This is the element used in the resulting parse tree */
+// the len element is added in order to prevent iterating
+// The string (e.g., strdup, string trimming) and helps ultimately comparing
+// acronyms much faster.
struct node {
short key; /* what type of element are we? */
char *str; /* relevant string from source for element */
+ NODE_LEN len; // the length of the string
struct link_data *link_data; /* store link info when relevant */
struct node *children; /* child elements */
struct node *next; /* next element */
diff --git a/lyx.c b/lyx.c
index 6ddff50..c42a720 100644
--- a/lyx.c
+++ b/lyx.c
@@ -666,6 +666,7 @@ void print_lyx_node(GString *out, node *n, scratch_pad *scratch, bool no_newline
n->next = temp_node->next;
temp_node->next = NULL;
free_node(temp_node);
+ case ABBRPLURAL:
case ABBR:
/* In either case, now we call on the abbreviation */
// width = ascii_label_from_node(n->children->children);
@@ -824,7 +825,7 @@ void print_lyx_node(GString *out, node *n, scratch_pad *scratch, bool no_newline
g_string_append(out,"\\begin_layout Standard\n");
g_string_append(out,"\\begin_inset listings\n");
if ((n->children != NULL) && (n->children->key == VERBATIMTYPE)) {
- trim_trailing_whitespace(n->children->str);
+ trim_trailing_whitespace_in_node_str(n->children);
if (strlen(n->children->str) > 0) {
// NOTE: the language must match the LyX (LaTex) languages (e.g: Perl, not perl)
g_string_append_printf(out, "lstparams \"basicstyle={\\footnotesize\\ttfamily},language=%s\"\n", n->children->str,n->str);
diff --git a/memoir.c b/memoir.c
index eae547a..b63b7ec 100644
--- a/memoir.c
+++ b/memoir.c
@@ -41,7 +41,7 @@ void print_memoir_node(GString *out, node *n, scratch_pad *scratch) {
case VERBATIMFENCE:
pad(out, 2, scratch);
if ((n->children != NULL) && (n->children->key == VERBATIMTYPE)) {
- trim_trailing_whitespace(n->children->str);
+ trim_trailing_whitespace_in_node_str(n->children);
if (strlen(n->children->str) > 0) {
g_string_append_printf(out, "\\begin{adjustwidth}{2.5em}{2.5em}\n\\begin{lstlisting}[language=%s]\n", n->children->str);
print_raw_node(out, n);
diff --git a/odf.c b/odf.c
index 18b25dc..29f957f 100644
--- a/odf.c
+++ b/odf.c
@@ -84,6 +84,7 @@ void print_odf_node(GString *out, node *n, scratch_pad *scratch) {
print_odf_node_tree(out,n->children,scratch);
break;
case STR:
+ case ABBRPLURAL:
case ABBR:
case ABBRSTART:
case ABBRSTOP:
@@ -270,7 +271,7 @@ void print_odf_node(GString *out, node *n, scratch_pad *scratch) {
free(temp);
break;
case METAVALUE:
- trim_trailing_whitespace(n->str);
+ trim_trailing_whitespace_in_node_str(n);
print_odf_string(out,n->str);
break;
case FOOTER:
diff --git a/opml.c b/opml.c
index a74413f..a49c123 100644
--- a/opml.c
+++ b/opml.c
@@ -113,7 +113,7 @@ void print_opml_node(GString *out, node *n, scratch_pad *scratch) {
g_string_append_printf(out, "str);
g_string_append_printf(out, "\" _note=\"");
- trim_trailing_newlines(n->children->str);
+ trim_trailing_newlines_in_node_str(n->children);
print_opml_string(out, n->children->str);
g_string_append_printf(out, "\"/>");
break;
diff --git a/parse_utilities.c b/parse_utilities.c
index 7a9045a..4a1e000 100644
--- a/parse_utilities.c
+++ b/parse_utilities.c
@@ -26,12 +26,10 @@
/* Create a new node in the parse tree */
node * mk_node(int key) {
- node *result = (node *) malloc(sizeof(node));
+ node *result = (node *) calloc(1, sizeof(node));
+ // initializes the node structure in a more flexible way (e.g., if a new
+ // node element like len is introduced)
result->key = key;
- result->str = NULL;
- result->children = NULL;
- result->next = NULL;
- result->link_data = NULL;
return result;
}
@@ -39,7 +37,7 @@ node * mk_node(int key) {
node * mk_str(char *string) {
node *result = mk_node(STR);
assert(string != NULL);
- result->str = strdup(string);
+ result->str = strdup_ext(string, &(result->len));
return result;
}
@@ -53,6 +51,7 @@ node * mk_str_from_list(node *list, bool extra_newline) {
g_string_append(c, "\n");
result->str = c->str;
+ result->len = c->currentStringLength;
g_string_free(c, false);
return result;
@@ -107,7 +106,7 @@ node * mk_list(int key, node *list) {
node * mk_pos_node(int key, char *string, unsigned int start, unsigned int stop) {
node *result = mk_node(key);
if (string != NULL)
- result->str = strdup(string);
+ result->str = strdup_ext(string, &(result->len));
return result;
}
@@ -134,6 +133,7 @@ void free_node(node *n) {
if (n->str != NULL)
free(n->str);
n->str = NULL;
+ n->len = 0;
free_link_data(n->link_data);
n->link_data = NULL;
@@ -300,6 +300,8 @@ scratch_pad * mk_scratch_pad(unsigned long extensions) {
result->lyx_table_need_line = FALSE; /* CRC - No table yet */
result->lyx_table_total_rows = 0; /* CRC - No rows */
result->lyx_table_total_cols = 0; /* CRC - No Columns */
+
+ result->latex_acronym_config = LATEX_ACRONYM_LABEL_STYLE_DEFAULT|LATEX_ACRONYM_PACKAGE_DEFAULT;
return result;
}
@@ -769,46 +771,66 @@ char * metavalue_for_key(char *key, node *list) {
return result;
}
-/* Trim spaces at end of string */
-void trim_trailing_whitespace(char *str) {
- unsigned long l;
-
- if (str == NULL)
- return;
+// the trimming character recognition function type
+typedef int (is_trim_char)(int c);
+
+// identifies new line characters
+static inline int is_newline(int c) {
+ return '\n' == c || '\r' == c || '\f' == c;
+}
+
+/**
+ Trims a string according to a trimming character recognition function.
+
+ This implementation is significantly faster than the previous one and returns
+ the trimmed string's length.
+
+ Example usage:
+
+ @code
+ char *s0 = strdup(" 1 23\n\r\f\n\r\f");
+ len = trim_trailing_newlines(s0);
+ printf("-->%s<-- [%lu]\n", s0, len);
+ free(s0);
+ @endcode
+
+ @param s
+ The source string to be trimmed.
+ @return The length of the trimmed string s.
+ */
+NODE_LEN trim_trailing_trim_char(char *s, is_trim_char f) {
+
+ if (s == NULL || !s[0])
+ return 0;
- l = strlen(str);
+ char *p = s;
- if (l < 1)
- return;
+ while(*p) p++;
+ while(--p >= s && f(*p)) ;
- while ( (l > 0) && (( str[l - 1] == ' ' ) ||
- ( str[l - 1] == '\n' ) ||
- ( str[l - 1] == '\r' ) ||
- ( str[l - 1] == '\t' )) ) {
- str[l - 1] = '\0';
- l = strlen(str);
- }
+ *(p+1)='\0';
+ return (NODE_LEN)((p+1)-s);
}
/* Trim spaces at end of string */
-void trim_trailing_newlines(char *str) {
- unsigned long l;
-
- if (str == NULL)
- return;
-
- l = strlen(str);
-
- if (l < 1)
- return;
-
- while ( (l > 0) && (( str[l - 1] == '\n' ) ||
- ( str[l - 1] == '\r' )) ) {
- str[l - 1] = '\0';
- l = strlen(str);
- }
+inline NODE_LEN trim_trailing_whitespace(char *str) {
+ return trim_trailing_trim_char(str, &isspace);
+}
+
+inline void trim_trailing_whitespace_in_node_str(node *node) {
+ node->len = trim_trailing_whitespace(node->str);
}
+/* Trim spaces at end of string */
+NODE_LEN trim_trailing_newlines(char *str) {
+ return trim_trailing_trim_char(str, &is_newline);
+}
+
+inline void trim_trailing_newlines_in_node_str(node *node) {
+ node->len = trim_trailing_newlines(node->str);
+}
+
+
/* Return version */
char * mmd_version(void) {
char *result;
@@ -839,10 +861,10 @@ node * copy_node(node *n) {
else {
node *m = (node *) malloc(sizeof(node));
- *m = *n;
+ memcpy(m, n, sizeof(node)); // explicitly initializes all node elements
if (n->str != NULL)
- m->str = strdup(n->str);
+ m->str = strdup(n->str); // len already set by memcpy
if (n->link_data != NULL) {
m->link_data = mk_link_data(n->link_data->label, n->link_data->source, n->link_data->title, copy_node_tree(n->link_data->attr));
@@ -881,4 +903,44 @@ char * my_strndup(const char * source, size_t n) {
result[len] = '\0';
return result;
+}
+
+/**
+ A strdup() replacement returning in addition to the duplicated string
+ the length of the string.
+
+ Warning: In order to prevent repeated casting the length receiving
+ parameter is intentionally chosen to be of type NODE_LEN - instead of
+ using the fixed size_t - thereby matching the type decision taken for the
+ node's len element.
+
+ Example usage:
+
+ @code
+ char *s = "test;
+ char *d = NULL;
+ unsigned short len = 0;
+
+ d = strdup_ext(s, &len);
+ free(d);
+ @endcode
+
+ @param s
+ The source string to be duplicated.
+ @param len
+ A pointer receiving the length of s.
+ @return A copy of the string s. The returned string has to be freed by the
+ caller.
+ */
+char *strdup_ext(const char *s, NODE_LEN *len) {
+ char *p = NULL;
+
+ *len = 0;
+ if(s) {
+ *len = (NODE_LEN)strlen(s);
+ if((p = malloc((*len)+1))) {
+ memcpy(p, s, (*len)+1);
+ }
+ }
+ return p;
}
\ No newline at end of file
diff --git a/parser.h b/parser.h
index 49e0a8e..67dadcb 100644
--- a/parser.h
+++ b/parser.h
@@ -88,6 +88,7 @@ typedef struct {
int lyx_table_total_cols; /* CRC - The total number of columns in the table */
node *lyx_table_caption; /* CRC - Hold the table caption */
GString *lyx_debug_pad; /* CRC - padding to indent debugging informaiton */
+ unsigned short int latex_acronym_config; // indicates the acronym package and label style to be used
} scratch_pad;
/* Define smart typography languages -- first in list is default */
@@ -112,6 +113,26 @@ enum smartelements {
APOS,
};
+enum latex_acronym_package {
+ LATEX_ACRONYM_PACKAGE_UNDEF = 0,
+ // LaTeX acronym package by Tobias Oetiker
+ // see http://www.ctan.org/tex-archive/macros/latex/contrib/acronym/
+ LATEX_ACRONYM_PACKAGE_ACRONYM,
+ // LaTeX glossaries package by Nicola Talbot
+ // see http://www.ctan.org/pkg/glossaries
+ LATEX_ACRONYM_PACKAGE_GLOSSARIES,
+ LATEX_ACRONYM_PACKAGE_DEFAULT = LATEX_ACRONYM_PACKAGE_ACRONYM,
+};
+
+enum latex_acronym_label_style {
+ LATEX_ACRONYM_LABEL_STYLE_UNDEF,
+ LATEX_ACRONYM_LABEL_STYLE_FULL = 1 << 8,
+ LATEX_ACRONYM_LABEL_STYLE_ABBR = 2 << 8,
+ LATEX_ACRONYM_LABEL_STYLE_DEFAULT = LATEX_ACRONYM_LABEL_STYLE_FULL,
+};
+
+static const int LATEX_ACRONYM_PACKAGE_MASK = 0x00FF;
+static const int LATEX_ACRONYM_LABEL_STYLE_MASK = 0xFF00;
/* parser utilities declarations */
@@ -155,8 +176,10 @@ void extract_abbreviations(node *list, scratch_pad *scratch);
bool extension(int ext, unsigned long extensions);
/* export utilities */
-void trim_trailing_whitespace(char *str);
-void trim_trailing_newlines(char *str);
+NODE_LEN trim_trailing_whitespace(char *str);
+NODE_LEN trim_trailing_newlines(char *str);
+void trim_trailing_whitespace_in_node_str(node *node);
+void trim_trailing_newlines_in_node_str(node *node);
/* other utilities */
char * label_from_string(char *str);
@@ -184,5 +207,6 @@ void debug_node(node *n);
void debug_node_tree(node *n);
char * my_strndup(const char * source, size_t n);
+char *strdup_ext(const char *s, NODE_LEN *len);
#endif
diff --git a/rtf.c b/rtf.c
index d6cbb6a..c7a6169 100644
--- a/rtf.c
+++ b/rtf.c
@@ -120,7 +120,7 @@ void print_rtf_node(GString *out, node *n, scratch_pad *scratch) {
}
break;
case METAVALUE:
- trim_trailing_whitespace(n->str);
+ trim_trailing_whitespace_in_node_str(n);
print_rtf_string(out, n->str, scratch);
break;
case BLOCKQUOTEMARKER:
diff --git a/writer.c b/writer.c
index e7bddf6..450a79c 100644
--- a/writer.c
+++ b/writer.c
@@ -231,7 +231,7 @@ void extract_abbreviations(node *list, scratch_pad *scratch) {
case ABBREVIATION:
temp = copy_node(list);
list->key = KEY_COUNTER; /* Mark this as dead; we will use it elsewhere */
- trim_trailing_whitespace(temp->str);
+ trim_trailing_whitespace_in_node_str(temp);
scratch->abbreviations = cons(temp, scratch->abbreviations);
break;
case HEADINGSECTION:
@@ -249,12 +249,12 @@ void extract_abbreviations(node *list, scratch_pad *scratch) {
}
}
-
/* find_abbreviations -- use abbreviations to look for matching strings */
void find_abbreviations(node *list, scratch_pad *scratch) {
node *abbr = scratch->abbreviations;
node *temp, *target, *end = NULL;
bool ismatch;
+ bool isplural = false;
// Don't look if we didn't define any abbreviations */
if (abbr->key == KEY_COUNTER)
@@ -275,8 +275,18 @@ void find_abbreviations(node *list, scratch_pad *scratch) {
while((ismatch) && (temp != NULL) && (target != NULL)) {
switch (temp->key) {
case STR:
- if (strcmp(temp->str, target->str) != 0) {
+ if (strncmp(temp->str, target->str, temp->len) != 0) {
+ ismatch = false;
+ isplural = false;
+ } else if (temp->len == target->len) {
+ ismatch = true;
+ isplural = false;
+ } else if (1 == target->len - temp->len && 's' == target->str[target->len - 1]) {
+ ismatch = true;
+ isplural = true;
+ } else {
ismatch = false;
+ isplural = false;
}
case SPACE:
case KEY_COUNTER:
@@ -299,7 +309,7 @@ void find_abbreviations(node *list, scratch_pad *scratch) {
if (end != NULL)
end->key = ABBRSTOP;
} else {
- list->key = ABBR;
+ list->key = isplural ? ABBRPLURAL : ABBR;
}
}
}