Skip to content

Commit eddc4e3

Browse files
5.4.20 (#103)
* add long name parameters, update help * refactor: best component selection logic. Improve rank processing * third party filter tune-up * improve component_hint_date_comparation comments * refactor: update license report by SP-3766 * add rank filtered status to report * update readme and help
1 parent d23df88 commit eddc4e3

25 files changed

+813
-363
lines changed

README.md

Lines changed: 81 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -43,16 +43,87 @@ You can create your own knowledgebase with the minr command, available at https:
4343

4444
Syntax: scanoss [parameters] [TARGET]
4545

46-
Configuration:
47-
* -w Treats TARGET as a .wfp file regardless of the actual file extension
48-
* -s FILE Use assets specified in the provided JSON SBOM (CycloneDX/SPDX2.2 JSON format) as input to identification
49-
* -b FILE Ignore matches to assets specified in the provided JSON SBOM (CycloneDX/SPDX2.2 JSON format)
50-
51-
Options:
52-
* -t Tests engine performance
53-
* -v Display version and exit
54-
* -h Display this help and exit
55-
* -d Enable debugging information
46+
## Configuration Options
47+
48+
### Basic Configuration
49+
* `-w, --wfp` - Process TARGET as a .wfp file, regardless of its actual extension
50+
* `-H, --hpsm` - Enable High Precision Snippet Match mode (requires 'libhpsm.so' in the system)
51+
* `-M, --max-snippets NUM` - Search for up to NUM different components in each file (maximum: 9)
52+
* `-N, --max-components NUM` - Set maximum number of components (default: 5)
53+
* `-T, --tolerance NUM` - Set snippet scanning tolerance percentage (default: 0.1)
54+
* `-r, --rank NUM` - Set maximum component rank accepted (default: 11)
55+
* `--max-files NUM` - Set maximum number of files to fetch during matching (default: 12000)
56+
* `--min-match-hits NUM` - Set minimum snippet ID hits for a match (default: 3, disables auto-adjust)
57+
* `--min-match-lines NUM` - Set minimum matched lines for a range (default: 10, disables auto-adjust)
58+
* `--range-tolerance NUM` - Set max non-matched lines tolerated in a range (default: 5)
59+
* `--ignore-file-ext` - Ignore file extension during snippet matching (default: honor extension)
60+
61+
### SBOM and Filtering
62+
* `-s, --sbom FILE` - Include assets from a JSON SBOM file (CycloneDX/SPDX2.2 format) in identification
63+
* `-b, --blacklist FILE` - Exclude matches from assets listed in JSON SBOM file (CycloneDX/SPDX2.2 format)
64+
* `--force-snippet` - Same as "-b" but with forced snippet scanning
65+
* `-c, --component HINT` - Add a component HINT to guide scan results
66+
67+
### Attribution and Licenses
68+
* `-a, --attribution FILE` - Show attribution notices for the provided SBOM.json file
69+
* `-k, --key KEY` - Show contents of the specified KEY file from MZ sources archive
70+
* `-l, --license LICENSE` - Display OSADL metadata for the given SPDX license ID
71+
* `-L, --full-license` - Enable full license report
72+
* `-F, --flags FLAGS` - Set engine scanning flags (see Engine Flags section below)
73+
74+
### General Options
75+
* `-t, --test` - Run engine performance tests
76+
* `-v, --version` - Show version information and exit
77+
* `-n, --name NAME` - Set database name (default: oss)
78+
* `-h, --help` - Display help information and exit
79+
* `-d, --debug` - Store debugging information to disk (/tmp)
80+
* `-q, --quiet` - Suppress JSON output (show only debugging info via STDERR)
81+
82+
## Environment Variables
83+
84+
* `SCANOSS_MATCHMAP_MAX` - Set the snippet scanning match map size (default: 10000)
85+
* `SCANOSS_FILE_CONTENTS_URL` - Define the API URL endpoint for sources. Source URL won't be reported if not defined
86+
87+
## Engine Scanning Flags
88+
89+
Configure the scanning engine using flags with the `-F/--flags` parameter. These settings can also be specified in `/etc/scanoss_flags.cfg`
90+
91+
| Flag | Setting |
92+
|-------|-------------------------------------------------------|
93+
| 1 | Disable snippet matching (default: enabled) |
94+
| 2 | Enable snippet_ids (default: disabled) |
95+
| 4 | Disable dependencies (default: enabled) |
96+
| 8 | Disable licenses (default: enabled) |
97+
| 16 | Disable copyrights (default: enabled) |
98+
| 32 | Disable vulnerabilities (default: enabled) |
99+
| 64 | Disable quality (default: enabled) |
100+
| 128 | Disable cryptography (default: enabled) |
101+
| 256 | Disable best match only (default: enabled) |
102+
| 512 | Hide identified files (default: disabled) |
103+
| 1024 | Enable download_url (default: disabled) |
104+
| 2048 | Enable "use path hint" logic (default: disabled) |
105+
| 4096 | Disable extended server stats (default: enabled) |
106+
| 8192 | Disable health layer (default: enabled) |
107+
| 16384 | Enable high accuracy, slower scan (default: disabled) |
108+
109+
### Examples:
110+
```bash
111+
# Scan DIRECTORY without license and dependency data
112+
scanoss -F 12 DIRECTORY
113+
scanoss --flags 12 DIRECTORY
114+
115+
# Scan TARGET including SBOM assets
116+
scanoss --sbom my_sbom.json TARGET
117+
118+
# Scan with custom snippet matching parameters
119+
scanoss --min-match-hits 5 --min-match-lines 15 TARGET
120+
121+
# Scan with custom range tolerance
122+
scanoss --range-tolerance 10 TARGET
123+
124+
# Ignore file extensions during matching
125+
scanoss --ignore-file-ext TARGET
126+
```
56127

57128
# File matching logic
58129

inc/component.h

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#define __COMPONENT_H
33

44
#include "scanoss.h"
5+
#include "limits.h"
56

67
#define COMPONENT_DEFAULT_RANK 999 //default rank for components without rank information
78
#define COMPONENT_RANK_SELECTION_MAX 8 //max rank to be considered in component selection
@@ -18,11 +19,13 @@ extern int component_rank_max;
1819
*
1920
*/
2021
enum {
22+
IDENTIFIED_FILTERED = -1,
2123
IDENTIFIED_NONE = 0,
2224
IDENTIFIED_PURL,
23-
IDENTIFIED_PURL_VERSION
25+
IDENTIFIED_PURL_VERSION,
2426
};
2527

28+
2629
typedef struct component_data_t
2730
{
2831
char * vendor; /* component vendor */
@@ -62,6 +65,40 @@ typedef struct component_data_t
6265
int third_party_rank; /* Saves third party ranking*/
6366
} component_data_t;
6467

68+
typedef struct keywords
69+
{
70+
int count;
71+
char word[MAX_FIELD_LN];
72+
} keywords;
73+
74+
75+
typedef struct file_recordset
76+
{
77+
uint8_t url_id[MD5_LEN];
78+
char path[MAX_FILE_PATH];
79+
int path_ln;
80+
bool external;
81+
} file_recordset;
82+
83+
typedef struct len_rank
84+
{
85+
int id;
86+
int len;
87+
} len_rank;
88+
89+
typedef struct component_item
90+
{
91+
char * vendor;
92+
char * component;
93+
char * purl;
94+
char * version;
95+
char * license;
96+
} component_item;
97+
98+
extern component_item *ignore_components;
99+
extern component_item *declared_components;
100+
101+
65102
component_data_t * component_init(void);
66103
void component_data_free(component_data_t * data);
67104
bool fill_component(component_data_t * component, uint8_t *url_key, char *file_path, uint8_t *url_record);

inc/limits.h

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -34,25 +34,24 @@
3434
#define MAX_QUERY_RESPONSE (1024 * 1024 * 8)
3535
#define SLOW_QUERY_LIMIT_IN_USEC 2000000
3636
#define MAX_JSON_VALUE_LEN 4096
37+
#define MAX_FILE_PATH 1024
38+
#define FETCH_MAX_FILES_DEFAULT 12000
39+
#define MIN_FILE_SIZE 256 // files below this size will be ignored
40+
#define CRC_LIST_LEN 1024 // list of crc checksums to avoid metadata duplicates
3741

3842
/* Snippets */
3943
#define DEFAULT_MATCHMAP_FILES 10000 // Default number of files evaluated in snippet matching
4044
#define MAX_MATCHMAP_FILES (DEFAULT_MATCHMAP_FILES * 10) // Max number of files evaluated in snippet matching to prevent performance issues
4145
#define MIN_LINES_COVERAGE 0.8
4246
#define SKIP_SNIPPETS_IF_FILE_BIGGER (1024 * 1024 * 4)
4347
#define MAX_SNIPPETS_SCANNED 2500
44-
48+
#define SNIPPETS_DEFAULT_RANGE_TOLERANCE 5 /** A maximum number of non-matched lines tolerated inside a matching range */
49+
#define SNIPPETS_DEFAULT_MIN_MATCH_LINES 5 /** Minimum number of lines matched for a match range to be acepted */
50+
#define SNIPPETS_DEFAULT_MIN_MATCH_HITS 2 /** Minimum number of snippet ID hits to produce a snippet match*/
51+
#define SNIPPETS_DEFAULT_ADJUST_TOLERANCE true /** Adjust tolerance based on file size */
52+
#define SNIPPETS_DEFAULT_HONOR_FILE_EXTENSION true /** Honor file extension during snippet matching */
53+
#define DEFAULT_FETCH_MAX_FILES 12000 /** Maximum number of files to fetch during component matching */
4554
/* Variables */
46-
47-
/* During snippet scanning, when a wfp (with more than consecutive_threshold wfps) produces a score higher
48-
than consecutive_score by consecutive_hits in a row, the scan will skip consecutive_jump snippets */
49-
extern int consecutive_score;
50-
extern int consecutive_hits;
51-
extern int consecutive_jump;
52-
extern int consecutive_threshold;
53-
54-
extern int range_tolerance; // A maximum number of non-matched lines tolerated inside a matching range
55-
extern int min_match_lines; // Minimum number of lines matched for a match range to be acepted
56-
extern int min_match_hits; // Minimum number of snippet ID hits to produce a snippet match
55+
extern int fetch_max_files; // Maximum number of files to fetch during component matching
5756

5857
#endif

inc/match.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ typedef struct match_data_t
2525
uint32_t * crclist; /* pointer to crc list used in for processing */
2626
char * quality_text; /* quality string used in json output format */
2727
char * crytography_text; /* crytography string used in json output format */
28-
uint16_t from;
2928
} match_data_t;
3029

3130
match_data_t * match_data_copy(match_data_t * in);

inc/parse.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#include <stdint.h>
55
#include <stdbool.h>
66
#include "scanoss.h"
7+
#include "component.h"
78

89
void extract_csv(char *out, char *in, int n, long limit);
910
void lowercase(char *word);

inc/scan.h

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -66,16 +66,22 @@ typedef struct scan_data_t
6666
int max_matchmap_size;
6767
bool printed_succed;
6868
bool windows_line_endings;
69+
bool snippet_adjust_tolerance; // Enable adjust snippet tolerance based on file size
70+
int component_ranking_threshold; //-1 = disable ranking. 0 = all accepted
71+
int snippet_min_hits;
72+
int snippet_min_lines;
73+
int snippet_range_tolerance;
74+
bool snippet_honor_file_extension;
6975
} scan_data_t;
7076

7177
extern bool force_snippet_scan;
7278

73-
scan_data_t * scan_data_init(char *target, int max_snippets, int max_components);
79+
scan_data_t * scan_data_init(char *target, int max_snippets, int max_components, bool adjust_tolerance, int component_ranking_threshold, int snippet_min_hits, int snippet_min_lines, int snippet_range_tolerance, bool snippet_honor_file_extension);
7480
void scan_data_free (scan_data_t * scan);
7581

7682
void ldb_scan(scan_data_t * scan);
7783
match_t ldb_scan_snippets(scan_data_t *scan_ptr);
78-
int wfp_scan(char * path, int scan_max_snippets, int scan_max_components);
79-
int hash_scan(char *path, int scan_max_snippets, int scan_max_components);
84+
int wfp_scan(char * path, int scan_max_snippets, int scan_max_components, bool adjust_tolerance, int component_ranking_threshold, int snippet_min_hits, int snippet_min_lines, int snippet_range_tolerance, bool snippet_honor_file_extension);
85+
int hash_scan(char *path, int scan_max_snippets, int scan_max_components, bool adjust_tolerance, int component_ranking_threshold, int snippet_min_hits, int snippet_min_lines, int snippet_range_tolerance, bool snippet_honor_file_extension);
8086

8187
#endif

inc/scanoss.h

Lines changed: 1 addition & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -30,16 +30,10 @@
3030
#include <unistd.h>
3131
#include "limits.h"
3232

33-
#define MAX_FILE_PATH 1024
34-
#define FETCH_MAX_FILES 12000
35-
#define MIN_FILE_SIZE 256 // files below this size will be ignored
36-
#define CRC_LIST_LEN 1024 // list of crc checksums to avoid metadata duplicates
37-
#define SNIPPET_LINE_TOLERANCE 10
38-
3933
#define WFP_LN 4
4034
#define WFP_REC_LN 18
4135

42-
#define SCANOSS_VERSION "5.4.19"
36+
#define SCANOSS_VERSION "5.4.20"
4337

4438
/* Log files */
4539
#define SCAN_LOG "/tmp/scanoss_scan.log"
@@ -84,40 +78,8 @@ extern const char *dependency_sources[];
8478

8579
typedef enum {MATCH_NONE, MATCH_FILE, MATCH_SNIPPET, MATCH_BINARY} match_t;
8680

87-
typedef struct keywords
88-
{
89-
int count;
90-
char word[MAX_FIELD_LN];
91-
} keywords;
92-
93-
94-
typedef struct file_recordset
95-
{
96-
uint8_t url_id[MD5_LEN];
97-
char path[MAX_FILE_PATH];
98-
int path_ln;
99-
bool external;
100-
} file_recordset;
101-
102-
typedef struct len_rank
103-
{
104-
int id;
105-
int len;
106-
} len_rank;
107-
108-
typedef struct component_item
109-
{
110-
char * vendor;
111-
char * component;
112-
char * purl;
113-
char * version;
114-
char * license;
115-
} component_item;
116-
117-
11881
extern long microseconds_start;
11982
extern int map_rec_len;
120-
extern bool match_extensions;
12183

12284
/*component hint hold the last component matched/guessed */
12385
extern char * component_hint;
@@ -141,12 +103,8 @@ extern struct ldb_table oss_notices;
141103

142104

143105
extern bool first_file;
144-
extern int max_vulnerabilities;
145106

146107
extern char *ignored_assets;
147-
extern component_item *ignore_components;
148-
extern component_item *declared_components;
149-
150108

151109
/* Prototype declarations */
152110

src/binary_scan.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -270,7 +270,7 @@ int binary_scan(char * input)
270270
char * file_name = field_n(3,input);
271271
int target_len = strchr(file_name,',') - file_name;
272272
char * target = strndup(file_name, target_len);
273-
scan_data_t * scan = scan_data_init(target, 1, 1);
273+
scan_data_t * scan = scan_data_init(target, 1, 1, true, 0, 3, 5, SNIPPETS_DEFAULT_RANGE_TOLERANCE, false);
274274
free(target);
275275
memcpy(scan->md5, bin_md5, MD5_LEN);
276276
scan->match_type = MATCH_FILE;

src/component.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -253,7 +253,7 @@ bool fill_component(component_data_t *component, uint8_t *url_key, char *file_pa
253253
extract_csv(license, (char *)url_record, 5, sizeof(license));
254254
extract_csv(purl, (char *)url_record, 6, sizeof(purl));
255255
extract_csv(url, (char *)url_record, 7, sizeof(url));
256-
extract_csv(rank, (char *)url_record, 13, sizeof(rank)); //extracts the rank field if available
256+
extract_csv(rank, (char *)url_record, 14, sizeof(rank)); //extracts the rank field if available
257257
/* Fill url stats if these are available*/
258258
for (int i = 0; i < 5; i++) {
259259
char stat[16] = "\0";
@@ -292,10 +292,10 @@ bool fill_component(component_data_t *component, uint8_t *url_key, char *file_pa
292292
MD5((uint8_t *)component->purls[0], strlen(component->purls[0]), component->purls_md5[0]);
293293
}
294294
component->age = -1;
295-
if (*rank && strlen(rank) < 3)
295+
if (*rank)
296296
{
297297
component->rank = atoi(rank);
298-
//scanlog("Component rank from DB: %d\n", component->rank);
298+
//scanlog("Component rank from DB: %s- %d\n", rank, component->rank);
299299
}
300300
else
301301
component->rank = COMPONENT_DEFAULT_RANK;

src/debug.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ void scan_benchmark()
138138

139139
for (int f = 0; f < total_files ; f++)
140140
{
141-
scan_data_t * scan = scan_data_init("pseudo_file", 0, 0);
141+
scan_data_t * scan = scan_data_init("pseudo_file", 0, 0, true, 0, 3, 5, SNIPPETS_DEFAULT_RANGE_TOLERANCE, false);
142142
scan->preload = true;
143143
memcpy(scan->md5, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0", MD5_LEN);
144144
strcpy(scan->file_size, "1024");

0 commit comments

Comments
 (0)