|
5 | 5 | /** |
6 | 6 | * A lightweight, fast, and optimized XML file splitter with build in tag data validation, written with the XMLParser library. The main goal of this is to split an XML file into multiple small chunks (hence the name), then save it into multiple different little XML files, so that slower servers, plugins etc can process XML files with more than even 10.000+ records. It is built on XMLParser, a powerful php xml processing library. |
7 | 7 | * |
| 8 | + * MINIMUM PHP VERSION: 7.4 |
| 9 | + * |
8 | 10 | * @author Borsodi Gergő |
9 | 11 | * @version 1.0 |
| 12 | + * |
10 | 13 | */ |
11 | 14 | class Chunker{ |
12 | 15 |
|
| 16 | + /** |
| 17 | + * The name of the file to be processed. |
| 18 | + * @var string |
| 19 | + */ |
13 | 20 | private string $xmlFile; |
| 21 | + |
| 22 | + /** |
| 23 | + * The maximum chunksize. |
| 24 | + * @var int |
| 25 | + */ |
14 | 26 | private int $chunkSize; |
| 27 | + |
| 28 | + /** |
| 29 | + * Counter for the chunks. |
| 30 | + * @var int |
| 31 | + */ |
15 | 32 | private int $CHUNKS; |
| 33 | + |
| 34 | + /** |
| 35 | + * The data that will be written into a chunk. |
| 36 | + * @var string |
| 37 | + */ |
16 | 38 | private string $PAYLOAD = ''; |
| 39 | + |
| 40 | + /** |
| 41 | + * The data used for one iteration of the main tag. |
| 42 | + * @var string |
| 43 | + */ |
17 | 44 | private string $PAYLOAD_TEMP = ''; |
| 45 | + |
| 46 | + /** |
| 47 | + * A container used to implement validation. |
| 48 | + * @var |
| 49 | + */ |
18 | 50 | private string $DATA_BETWEEN = ''; |
| 51 | + |
| 52 | + /** |
| 53 | + * The root tag of the yet-to-process xml file. |
| 54 | + * @var string |
| 55 | + */ |
19 | 56 | private string $rootTag; |
| 57 | + |
| 58 | + /** |
| 59 | + * The charset used for the decoding/encoding process. |
| 60 | + * @var string |
| 61 | + */ |
20 | 62 | private string $CHARSET; |
| 63 | + |
| 64 | + /** |
| 65 | + * The prefix used for the output files. |
| 66 | + * @var string |
| 67 | + */ |
21 | 68 | private string $outputFilePrefix; |
| 69 | + |
| 70 | + /** |
| 71 | + * Counter for the items put into one chunk. |
| 72 | + * @var int |
| 73 | + */ |
22 | 74 | private int $ITEMCOUNT = 0; |
| 75 | + |
| 76 | + /** |
| 77 | + * The main tag, of which defines one item in the chunking. |
| 78 | + * @var string |
| 79 | + */ |
23 | 80 | private string $CHUNKON; |
| 81 | + |
| 82 | + /** |
| 83 | + * A variable used for logging. |
| 84 | + * @var string |
| 85 | + */ |
24 | 86 | private string $log = ""; |
| 87 | + |
| 88 | + /** |
| 89 | + * The total number of processed main tags. |
| 90 | + * @var int |
| 91 | + */ |
25 | 92 | private int $totalItems = 0; |
| 93 | + |
| 94 | + /** |
| 95 | + * A variable that indicates if a maintag that doesn't satisfy the validation has been found. |
| 96 | + * @var bool |
| 97 | + */ |
26 | 98 | private bool $excludedItemFound = false; |
| 99 | + |
| 100 | + /** |
| 101 | + * A variable to indicate that the next data that will be read, has to be validated since its opening tag is present in $checkingTags. |
| 102 | + * @var bool |
| 103 | + */ |
27 | 104 | private bool $checkNextData = false; |
| 105 | + |
| 106 | + /** |
| 107 | + * A variable that carries the tagname of the data that is about to be validated. |
| 108 | + * @var string |
| 109 | + */ |
28 | 110 | private string $checkNextDataTag = ''; |
| 111 | + |
| 112 | + /** |
| 113 | + * An array of tags, where their data has to be validated runtime. |
| 114 | + * @var array |
| 115 | + */ |
29 | 116 | private array $checkingTags = array(); |
| 117 | + |
| 118 | + /** |
| 119 | + * A callback function, that processes the validation. Has to be a callable. |
| 120 | + * @var callable |
| 121 | + */ |
30 | 122 | private $passesValidation; |
31 | 123 |
|
32 | 124 | /** |
@@ -199,7 +291,7 @@ private function createXMLParser($CHARSET = "UTF-8", $bareXML = false) { |
199 | 291 | * A funcion to start the chunking process. It will initiate the parsint instance, and start the XML parsing, along with the chunking of the data in every specified $chunkSize intervals. |
200 | 292 | * @param string $mainTag The tag of which will be used to count the number of main elements in a chunk. Usually the second-level XML tag in a document. |
201 | 293 | * @param string $rootTag The root tag of which every other $mainTag is the children of. There is only one of this in an XML document (not the XML header, which is in the first row). |
202 | | - * @param string $charset The character set used by the parser. **Default: UTF-8** |
| 294 | + * @param string $charset The character set used by the parser. **Default: UTF-8** Possible values: "UTF-8", "ISO-8859-1" |
203 | 295 | * |
204 | 296 | * @return string The main log that was created during the chunking |
205 | 297 | */ |
|
0 commit comments