diff --git a/main.c b/main.c index f288b38..19b97f5 100644 --- a/main.c +++ b/main.c @@ -9,7 +9,21 @@ int main(int argc, char *argv[]) { char *contents = NULL; - FILE *fp = fopen(argv[1], "r"); + + char *filename; + if (argc > 1) { + filename = argv[1]; + } else { + printf("Usage: %s \n", argv[0]); + return -1; + } + + FILE *fp = fopen(filename, "r"); + if (!fp) { + printf("Error opening %s\n", filename); + return -1; + } + fseek(fp, 0, SEEK_END); long len = ftell(fp); rewind(fp); diff --git a/readable.c b/readable.c index 69e3e8d..66bb8be 100644 --- a/readable.c +++ b/readable.c @@ -46,14 +46,14 @@ KHASH_MAP_INIT_STR(str, int); -#define __pointer_hash(x) (uintptr_t)(x) +#define __pointer_hash(x) (khint_t)(x) #define __pointer_equal(x, y) (x == y) KHASH_INIT(score, htmlNodePtr, float, 1, __pointer_hash, __pointer_equal); #ifdef READABLE_USE_LIBICU UChar *uastrdup(const char *s) { - int len = strlen(s); + int len = (int)strlen(s); UChar *us = malloc(sizeof(UChar) * (len + 1)); u_uastrcpy(us, s); return us; @@ -302,18 +302,20 @@ node_inner_html(htmlDocPtr doc, htmlNodePtr node) char *html = NULL; for (htmlNodePtr cur = node->children; cur; cur = cur->next) { char *cur_html = node_html(doc, cur); - int len = strlen(cur_html); + ssize_t len = (ssize_t)strlen(cur_html); ssize_t available_size = allocated_size - data_size - 1; if (len > available_size) { while (len > available_size) { - allocated_size = MAX(allocated_size * 1.2, 512); + allocated_size = MAX(allocated_size * 1.2f, 512); available_size = allocated_size - data_size - 1; } html = realloc(html, allocated_size); } strncpy(html + data_size, cur_html, len); data_size += len; - html[data_size] = '\0'; + if (html) { // This satisfies the static analyzer + html[data_size] = '\0'; + } free(cur_html); } return html; @@ -337,7 +339,7 @@ node_text_len(htmlNodePtr node) int len = 0; char *inner_text = node_inner_text(node); if (inner_text) { - len = strlen(inner_text); + len = (int)strlen(inner_text); free(inner_text); } return len; @@ -425,10 +427,10 @@ float name_score(const xmlChar *name) { float score = 0; - if (matches(POSITIVE_SCORE, name)) { + if (matches(POSITIVE_SCORE, (const char *)name)) { score += 25; } - if (matches(NEGATIVE_SCORE, name)) { + if (matches(NEGATIVE_SCORE, (const char *)name)) { score -= 25; } return score; @@ -587,7 +589,7 @@ clean_node_conditionally(htmlNodePtr node, kh_score_t *scores, char *node_text = node_inner_text(node); if (node_text) { commas = number_of_commas(node_text); - text_len = strlen(node_text); + text_len = (int)strlen(node_text); free(node_text); } if (commas < 10) { @@ -707,8 +709,8 @@ clean_node(htmlDocPtr doc, htmlNodePtr node, kh_score_t *scores, int options, } xmlChar *alt = xmlGetProp(node, BAD_CAST "alt"); xmlChar *title = xmlGetProp(node, BAD_CAST "title"); -#define xlen(x) (x ? strlen((char *)x) : 0) - int len = xlen(alt) + xlen(title) + strlen((char *)src); +#define xlen(x) (x ? (int)strlen((char *)x) : 0) + int len = xlen(alt) + xlen(title) + (int)strlen((char *)src); char *test = malloc(len + 1); strcpy(test, (char *)src); free(src); @@ -797,7 +799,7 @@ clean_node(htmlDocPtr doc, htmlNodePtr node, kh_score_t *scores, int options, } } } - if (node->name[0] == 'p') { + if (node->name && node->name[0] == 'p') { int nospaces = node_nospaces_len(node); if (!nospaces) { kh_str_t *tags = node_tags_count(node); @@ -885,7 +887,7 @@ search_article_image(htmlNodePtr node, htmlNodePtr prev) xmlChar *width = xmlGetProp(image, BAD_CAST "width"); xmlChar *height = xmlGetProp(image, BAD_CAST "height"); - if (matches(UNLIKELY_ARTICLE_IMAGE, src)) { + if (matches(UNLIKELY_ARTICLE_IMAGE, (const char *)src)) { score -= 20; } char *dot = strrchr((char *)src, '.'); @@ -1097,8 +1099,8 @@ style_px_dimensions(xmlChar *style, int *width, int *height) if (*wp && *hp) { char *wep = NULL; char *hep = NULL; - *width = strtol(wp, &wep, 10); - *height = strtol(hp, &hep, 10); + *width = (int)strtol(wp, &wep, 10); + *height = (int)strtol(hp, &hep, 10); if (wep && hep && *wep == 'p' && *hep == 'p' && *width && *height) { @@ -1220,7 +1222,7 @@ readable(const char *html, const char *url, const char *encoding, int options) if (!inner_text) { continue; } - int text_length = strlen(inner_text); + int text_length = (int)strlen(inner_text); if (text_length < 25) { free(inner_text); continue; @@ -1238,7 +1240,7 @@ readable(const char *html, const char *url, const char *encoding, int options) grand_parent_score = initialize_node_score(scores, grand_parent, options); candidates = rd_list_append(candidates, grand_parent); /* Look up the parent score again, since the hash - table might have grown and rehased, invalidating + table might have grown and rehashed, invalidating the pointer */ parent_score = lookup_score_ptr(scores, parent); @@ -1296,6 +1298,10 @@ readable(const char *html, const char *url, const char *encoding, int options) kh_value(scores, iter) = 0; top_candidate_score = &(kh_value(scores, iter)); } + if (!top_candidate) { + xmlFreeDoc(doc); + return NULL; + } #ifdef READABLE_DEBUG char *debug_name = node_test_name(top_candidate); DEBUG_LOG("Top candidate %s with score %f\n", debug_name, *top_candidate_score); @@ -1322,7 +1328,7 @@ readable(const char *html, const char *url, const char *encoding, int options) xmlChar *top_candidate_class = xmlGetProp(top_candidate, BAD_CAST "class"); DEBUG_LOG("Threshold %f\n", threshold); /* Insert nodes in the article */ - htmlNodePtr start = top_candidate->parent ? : top_candidate; + htmlNodePtr start = top_candidate->parent ? top_candidate->parent : top_candidate; htmlNodePtr next; for (htmlNodePtr cur = start->children; cur; cur = next) { next = cur->next; @@ -1355,7 +1361,7 @@ readable(const char *html, const char *url, const char *encoding, int options) if (xmlStrEqual(cur->name, BAD_CAST "p")) { float link_density = node_link_density(cur); char *inner_text = node_inner_text(cur); - int text_len = inner_text ? strlen(inner_text) : 0; + int text_len = inner_text ? (int)strlen(inner_text) : 0; if (text_len > 80 && link_density < 0.25) { #ifdef READABLE_DEBUG @@ -1487,7 +1493,7 @@ find_next_link(htmlDocPtr doc, xmlNodePtr node, const char *url) return NULL; } - if (xmlStrstr(href, BAD_CAST "http://") == href || + if (xmlStrstr(BAD_CAST href, BAD_CAST "http://") == href || xmlStrstr(BAD_CAST url, BAD_CAST "https://") == href) { return href;