| 1 | // ================================================================ // |
|---|
| 2 | // // |
|---|
| 3 | // File : arb_zfile.cxx // |
|---|
| 4 | // Purpose : Compressed file I/O // |
|---|
| 5 | // // |
|---|
| 6 | // Coded by Ralf Westram (coder@reallysoft.de) in November 2015 // |
|---|
| 7 | // http://www.arb-home.de/ // |
|---|
| 8 | // // |
|---|
| 9 | // ================================================================ // |
|---|
| 10 | |
|---|
| 11 | #include "arb_zfile.h" |
|---|
| 12 | #include "arb_file.h" |
|---|
| 13 | #include "arb_msg.h" |
|---|
| 14 | #include "arb_misc.h" |
|---|
| 15 | #include "arb_string.h" |
|---|
| 16 | |
|---|
| 17 | #include <string> |
|---|
| 18 | #include <map> |
|---|
| 19 | |
|---|
| 20 | using namespace std; |
|---|
| 21 | |
|---|
| 22 | class zinfo { |
|---|
| 23 | // info stored for each sucessfully opened file |
|---|
| 24 | // to support proper error message on close. |
|---|
| 25 | bool writing; // false -> reading |
|---|
| 26 | string filename; |
|---|
| 27 | string pipe_cmd; |
|---|
| 28 | public: |
|---|
| 29 | zinfo() {} |
|---|
| 30 | zinfo(bool writing_, const char *filename_, const char *pipe_cmd_) |
|---|
| 31 | : writing(writing_), |
|---|
| 32 | filename(filename_), |
|---|
| 33 | pipe_cmd(pipe_cmd_) |
|---|
| 34 | {} |
|---|
| 35 | |
|---|
| 36 | bool isOutputPipe() const { return writing; } |
|---|
| 37 | const char *get_filename() const { return filename.c_str(); } |
|---|
| 38 | const char *get_pipecmd() const { return pipe_cmd.c_str(); } |
|---|
| 39 | }; |
|---|
| 40 | static map<FILE*,zinfo> zfile_info; |
|---|
| 41 | |
|---|
| 42 | FILE *ARB_zfopen(const char *name, const char *mode, FileCompressionMode cmode, GB_ERROR& error, bool hideStderr) { |
|---|
| 43 | arb_assert(!error); |
|---|
| 44 | |
|---|
| 45 | if (strchr(mode, 'a')) { |
|---|
| 46 | error = "Cannot append to file using ARB_zfopen"; |
|---|
| 47 | return NULp; |
|---|
| 48 | } |
|---|
| 49 | if (strchr(mode, 't')) { |
|---|
| 50 | error = "Cannot use textmode for ARB_zfopen"; |
|---|
| 51 | return NULp; |
|---|
| 52 | } |
|---|
| 53 | if (strchr(mode, '+')) { |
|---|
| 54 | error = "Cannot open file in read and write mode with ARB_zfopen"; |
|---|
| 55 | return NULp; |
|---|
| 56 | } |
|---|
| 57 | |
|---|
| 58 | bool forOutput = strchr(mode, 'w'); |
|---|
| 59 | FILE *fp = NULp; |
|---|
| 60 | |
|---|
| 61 | if (cmode == ZFILE_AUTODETECT) { |
|---|
| 62 | if (forOutput) { |
|---|
| 63 | error = "Autodetecting compression mode only works for input files"; |
|---|
| 64 | } |
|---|
| 65 | else { |
|---|
| 66 | fp = fopen(name, "rb"); |
|---|
| 67 | if (!fp) error = GB_IO_error("opening", name); |
|---|
| 68 | else { |
|---|
| 69 | // detect compression and set 'cmode' |
|---|
| 70 | const size_t MAGICSIZE = 5; |
|---|
| 71 | char buffer[MAGICSIZE]; |
|---|
| 72 | |
|---|
| 73 | size_t bytes_read = fread(buffer, 1, MAGICSIZE, fp); |
|---|
| 74 | fclose(fp); |
|---|
| 75 | fp = NULp; |
|---|
| 76 | |
|---|
| 77 | if (bytes_read>=2 && strncmp(buffer, "\x1f\x8b", 2) == 0) cmode = ZFILE_GZIP; |
|---|
| 78 | else if (bytes_read>=2 && strncmp(buffer, "BZ", 2) == 0) cmode = ZFILE_BZIP2; |
|---|
| 79 | else if (bytes_read>=5 && strncmp(buffer, "\xfd" "7zXZ", 5) == 0) cmode = ZFILE_XZ; |
|---|
| 80 | else { |
|---|
| 81 | cmode = ZFILE_UNCOMPRESSED; |
|---|
| 82 | } |
|---|
| 83 | } |
|---|
| 84 | } |
|---|
| 85 | } |
|---|
| 86 | |
|---|
| 87 | if (cmode == ZFILE_UNCOMPRESSED) { |
|---|
| 88 | fp = fopen(name, mode); |
|---|
| 89 | if (!fp) error = GB_IO_error("opening", name); |
|---|
| 90 | else { |
|---|
| 91 | zfile_info[fp] = zinfo(forOutput, name, ""); |
|---|
| 92 | } |
|---|
| 93 | } |
|---|
| 94 | else { |
|---|
| 95 | if (!error) { |
|---|
| 96 | const char *compressor = NULp; // command used to compress (and decompress) |
|---|
| 97 | const char *decompress_flag = "-d"; // flag needed to decompress (assumes none to compress) |
|---|
| 98 | |
|---|
| 99 | switch (cmode) { |
|---|
| 100 | case ZFILE_GZIP: { |
|---|
| 101 | static char *pigz = ARB_executable("pigz", ARB_getenv_ignore_empty("PATH")); |
|---|
| 102 | compressor = pigz ? pigz : "gzip"; |
|---|
| 103 | break; |
|---|
| 104 | } |
|---|
| 105 | case ZFILE_BZIP2: compressor = "bzip2"; break; |
|---|
| 106 | case ZFILE_XZ: compressor = "xz"; break; |
|---|
| 107 | |
|---|
| 108 | default: |
|---|
| 109 | error = GBS_global_string("Invalid compression mode (%i)", int(cmode)); |
|---|
| 110 | break; |
|---|
| 111 | |
|---|
| 112 | #if defined(USE_BROKEN_COMPRESSION) |
|---|
| 113 | case ZFILE_BROKEN: |
|---|
| 114 | compressor = "arb_weirdo"; // a non-existing command! |
|---|
| 115 | break; |
|---|
| 116 | #endif |
|---|
| 117 | } |
|---|
| 118 | |
|---|
| 119 | if (!error) { |
|---|
| 120 | char *pipeCmd = forOutput |
|---|
| 121 | ? GBS_global_string_copy("%s > %s", compressor, name) |
|---|
| 122 | : GBS_global_string_copy("%s %s < %s", compressor, decompress_flag, name); |
|---|
| 123 | |
|---|
| 124 | if (hideStderr) { |
|---|
| 125 | freeset(pipeCmd, GBS_global_string_copy("( %s 2>/dev/null )", pipeCmd)); |
|---|
| 126 | } |
|---|
| 127 | |
|---|
| 128 | // remove 'b' from mode (pipes are binary by default) |
|---|
| 129 | char *impl_b_mode = ARB_strdup(mode); |
|---|
| 130 | while (1) { |
|---|
| 131 | char *b = strchr(impl_b_mode, 'b'); |
|---|
| 132 | if (!b) break; |
|---|
| 133 | strcpy(b, b+1); |
|---|
| 134 | } |
|---|
| 135 | |
|---|
| 136 | if (forOutput) { // write to pipe |
|---|
| 137 | fp = popen(pipeCmd, impl_b_mode); |
|---|
| 138 | if (!fp) error = GB_IO_error("writing to pipe", pipeCmd); |
|---|
| 139 | } |
|---|
| 140 | else { // read from pipe |
|---|
| 141 | fp = popen(pipeCmd, impl_b_mode); |
|---|
| 142 | if (!fp) error = GB_IO_error("reading from pipe", pipeCmd); |
|---|
| 143 | } |
|---|
| 144 | |
|---|
| 145 | if (!error) { |
|---|
| 146 | zfile_info[fp] = zinfo(forOutput, name, pipeCmd); |
|---|
| 147 | } |
|---|
| 148 | |
|---|
| 149 | free(impl_b_mode); |
|---|
| 150 | free(pipeCmd); |
|---|
| 151 | } |
|---|
| 152 | } |
|---|
| 153 | } |
|---|
| 154 | |
|---|
| 155 | arb_assert(contradicted(fp, error)); |
|---|
| 156 | arb_assert(implicated(error, error[0])); // deny empty error |
|---|
| 157 | return fp; |
|---|
| 158 | } |
|---|
| 159 | |
|---|
| 160 | GB_ERROR ARB_zfclose(FILE *fp) { |
|---|
| 161 | bool fifo = GB_is_fifo(fp); |
|---|
| 162 | |
|---|
| 163 | arb_assert(zfile_info.find(fp) != zfile_info.end()); // file was not opened using ARB_zfopen! |
|---|
| 164 | |
|---|
| 165 | zinfo info = zfile_info[fp]; |
|---|
| 166 | zfile_info.erase(fp); |
|---|
| 167 | |
|---|
| 168 | int res; |
|---|
| 169 | if (fifo) { |
|---|
| 170 | res = pclose(fp); |
|---|
| 171 | } |
|---|
| 172 | else { |
|---|
| 173 | res = fclose(fp); |
|---|
| 174 | } |
|---|
| 175 | |
|---|
| 176 | GB_ERROR error = NULp; |
|---|
| 177 | if (res != 0) { |
|---|
| 178 | int exited = WIFEXITED(res); |
|---|
| 179 | int status = WEXITSTATUS(res); |
|---|
| 180 | #if defined(DEBUG) |
|---|
| 181 | int signaled = WIFSIGNALED(res); |
|---|
| 182 | #endif |
|---|
| 183 | |
|---|
| 184 | if (exited) { |
|---|
| 185 | if (status) { |
|---|
| 186 | if (fifo) { |
|---|
| 187 | error = GBS_global_string("pipe %s\n" |
|---|
| 188 | " file='%s'\n" |
|---|
| 189 | " using cmd='%s'\n" |
|---|
| 190 | " failed with exitcode=%i (broken pipe? corrupted archive?)\n", |
|---|
| 191 | info.isOutputPipe() ? "writing to" : "reading from", |
|---|
| 192 | info.get_filename(), |
|---|
| 193 | info.get_pipecmd(), |
|---|
| 194 | status); |
|---|
| 195 | } |
|---|
| 196 | } |
|---|
| 197 | } |
|---|
| 198 | if (!error) error = GB_IO_error("closing", info.get_filename()); |
|---|
| 199 | #if defined(DEBUG) |
|---|
| 200 | error = GBS_global_string("%s (res=%i, exited=%i, signaled=%i, status=%i)", error, res, exited, signaled, status); |
|---|
| 201 | #endif |
|---|
| 202 | } |
|---|
| 203 | return error; |
|---|
| 204 | } |
|---|
| 205 | |
|---|
| 206 | // -------------------------------------------------------------------------------- |
|---|
| 207 | |
|---|
| 208 | #ifdef UNIT_TESTS |
|---|
| 209 | #ifndef TEST_UNIT_H |
|---|
| 210 | #include <test_unit.h> |
|---|
| 211 | #endif |
|---|
| 212 | |
|---|
| 213 | static char *fileContent(FILE *in, size_t& bytes_read) { |
|---|
| 214 | const size_t BUFFERSIZE = 1000; |
|---|
| 215 | char *buffer = ARB_alloc<char>(BUFFERSIZE+1); |
|---|
| 216 | bytes_read = fread(buffer, 1, BUFFERSIZE, in); |
|---|
| 217 | arb_assert(bytes_read<BUFFERSIZE); |
|---|
| 218 | buffer[bytes_read] = 0; |
|---|
| 219 | return buffer; |
|---|
| 220 | } |
|---|
| 221 | |
|---|
| 222 | #define TEST_EXPECT_ZFOPEN_FAILS(name,mode,cmode,errpart) do{ \ |
|---|
| 223 | GB_ERROR error = NULp; \ |
|---|
| 224 | FILE *fp = ARB_zfopen(name, mode, cmode, error, false); \ |
|---|
| 225 | \ |
|---|
| 226 | if (fp) { \ |
|---|
| 227 | TEST_EXPECT_NULL(error); \ |
|---|
| 228 | error = ARB_zfclose(fp); \ |
|---|
| 229 | } \ |
|---|
| 230 | else { \ |
|---|
| 231 | TEST_EXPECT_NULL(fp); \ |
|---|
| 232 | } \ |
|---|
| 233 | TEST_REJECT_NULL(error); \ |
|---|
| 234 | TEST_EXPECT_CONTAINS(error, errpart); \ |
|---|
| 235 | }while(0) |
|---|
| 236 | |
|---|
| 237 | void TEST_compressed_io() { |
|---|
| 238 | const char *inText = "general/text.input"; |
|---|
| 239 | const char *outFile = "compressed.out"; |
|---|
| 240 | |
|---|
| 241 | TEST_EXPECT_ZFOPEN_FAILS("", "", ZFILE_UNCOMPRESSED, "Invalid argument"); |
|---|
| 242 | TEST_EXPECT_ZFOPEN_FAILS(outFile, "a", ZFILE_UNCOMPRESSED, "Cannot append to file using ARB_zfopen"); |
|---|
| 243 | TEST_EXPECT_ZFOPEN_FAILS(outFile, "r", ZFILE_UNDEFINED, "Invalid compression mode"); |
|---|
| 244 | TEST_EXPECT_ZFOPEN_FAILS(outFile, "w", ZFILE_AUTODETECT, "only works for input files"); |
|---|
| 245 | TEST_EXPECT_ZFOPEN_FAILS(outFile, "rt", ZFILE_AUTODETECT, "Cannot use textmode"); |
|---|
| 246 | TEST_EXPECT_ZFOPEN_FAILS(outFile, "r+", ZFILE_AUTODETECT, "Cannot open file in read and write mode"); |
|---|
| 247 | |
|---|
| 248 | #if defined(USE_BROKEN_COMPRESSION) |
|---|
| 249 | TEST_EXPECT_ZFOPEN_FAILS(outFile, "r", ZFILE_BROKEN, "broken pipe"); |
|---|
| 250 | TEST_EXPECT_ZFOPEN_FAILS(outFile, "w", ZFILE_BROKEN, "broken pipe"); |
|---|
| 251 | #endif |
|---|
| 252 | |
|---|
| 253 | char *testText; |
|---|
| 254 | const size_t TEST_TEXT_SIZE = 428; |
|---|
| 255 | { |
|---|
| 256 | GB_ERROR error = NULp; |
|---|
| 257 | FILE *in = ARB_zfopen(inText, "r", ZFILE_UNCOMPRESSED, error, false); |
|---|
| 258 | TEST_EXPECT_NULL(error); |
|---|
| 259 | TEST_REJECT_NULL(in); |
|---|
| 260 | |
|---|
| 261 | size_t bytes_read; |
|---|
| 262 | testText = fileContent(in, bytes_read); |
|---|
| 263 | TEST_EXPECT_EQUAL(bytes_read, TEST_TEXT_SIZE); |
|---|
| 264 | |
|---|
| 265 | TEST_EXPECT_NO_ERROR(ARB_zfclose(in)); |
|---|
| 266 | } |
|---|
| 267 | |
|---|
| 268 | int successful_compressions = 0; |
|---|
| 269 | |
|---|
| 270 | for (FileCompressionMode cmode = FileCompressionMode(ZFILE_AUTODETECT+1); |
|---|
| 271 | cmode != ZFILE_UNDEFINED; |
|---|
| 272 | cmode = FileCompressionMode(cmode+1)) |
|---|
| 273 | { |
|---|
| 274 | TEST_ANNOTATE(GBS_global_string("cmode=%i", int(cmode))); |
|---|
| 275 | |
|---|
| 276 | bool compressed_save_failed = false; |
|---|
| 277 | { |
|---|
| 278 | GB_ERROR error = NULp; |
|---|
| 279 | FILE *out = ARB_zfopen(outFile, "w", cmode, error, false); |
|---|
| 280 | |
|---|
| 281 | TEST_EXPECT_NO_ERROR(error); |
|---|
| 282 | TEST_REJECT_NULL(out); |
|---|
| 283 | |
|---|
| 284 | TEST_EXPECT_DIFFERENT(EOF, fputs(testText, out)); |
|---|
| 285 | |
|---|
| 286 | error = ARB_zfclose(out); |
|---|
| 287 | if (error && strstr(error, "failed with exitcode=127") && cmode != ZFILE_UNCOMPRESSED) { |
|---|
| 288 | // assume compression utility is not installed |
|---|
| 289 | compressed_save_failed = true; |
|---|
| 290 | } |
|---|
| 291 | else { |
|---|
| 292 | TEST_EXPECT_NO_ERROR(error); |
|---|
| 293 | } |
|---|
| 294 | } |
|---|
| 295 | |
|---|
| 296 | if (!compressed_save_failed) { |
|---|
| 297 | for (int detect = 0; detect<=1; ++detect) { |
|---|
| 298 | TEST_ANNOTATE(GBS_global_string("cmode=%i detect=%i", int(cmode), detect)); |
|---|
| 299 | |
|---|
| 300 | GB_ERROR error = NULp; |
|---|
| 301 | FILE *in = ARB_zfopen(outFile, "r", detect ? ZFILE_AUTODETECT : cmode, error, false); |
|---|
| 302 | |
|---|
| 303 | TEST_REJECT(error); |
|---|
| 304 | TEST_REJECT_NULL(in); |
|---|
| 305 | |
|---|
| 306 | size_t bytes_read; |
|---|
| 307 | char *content = fileContent(in, bytes_read); |
|---|
| 308 | TEST_EXPECT_NO_ERROR(ARB_zfclose(in)); |
|---|
| 309 | TEST_EXPECT_EQUAL(content, testText); // if this fails for detect==1 -> detection does not work |
|---|
| 310 | free(content); |
|---|
| 311 | } |
|---|
| 312 | successful_compressions++; |
|---|
| 313 | } |
|---|
| 314 | } |
|---|
| 315 | |
|---|
| 316 | TEST_EXPECT(successful_compressions>=3); // at least ZFILE_UNCOMPRESSED, ZFILE_GZIP and ZFILE_BZIP should succeed |
|---|
| 317 | |
|---|
| 318 | free(testText); |
|---|
| 319 | TEST_EXPECT_DIFFERENT(GB_unlink(outFile), -1); |
|---|
| 320 | } |
|---|
| 321 | |
|---|
| 322 | #endif // UNIT_TESTS |
|---|
| 323 | |
|---|
| 324 | // -------------------------------------------------------------------------------- |
|---|
| 325 | |
|---|