JSON file compression by eliminating spaces

I am working with a large json file (~ 100,000 lines) and should compress it to speed up the program. I want to remove all horizontal tabs, returns, etc., in order to minimize file size.

For example, if there was originally a line:

"name_id": "Richard Feynman",
"occupation": "Professional Bongos Player"

it should be compressed to:

"name_id":"Richard Feynman","occupation":"Professional Bongos Player"`

I browsed the Internet (forgive me if this is a simple answer, I'm new) and cannot find a command for the terminal that will help me do this. Any help would be greatly appreciated

+4
source share
3 answers

It looks like you are looking for JSON minifier .

There are some, both online and offline .

+ , , , .

, JSON, , JSON. .

+10

GNU awk RT:

$ awk 'BEGIN{RS="\""} NR%2{gsub(/[[:space:]]/,"")} {ORS=RT;print} END{printf "\n"}' file
"name_id":"Richard Feynman","occupation":"Professional Bongos Player"
+1

flex (1) . json , . .

,

make json

:

json [ file ... ]

, stdin.

:

%{
/* json-min.  JSON minimizer.
 * Author: Luis Colorado <lc@luiscoloradosistemas.com>
 * Date: Wed Aug 13 07:35:23 EEST 2014
 * Disclaimer: This program is GPL, as of GPL version 3, you
 * may have received a copy of that document, or you can
 * instead look at http://www.gnu.org/licenses/gpl.txt to read
 * it.  There no warranty, nor assumed nor implicit on the
 * use of this program, you receive it `as is' so whatever you
 * do with it is only your responsibility.  Luis Colorado
 * won't assume any responsibility of the use or misuse of
 * this program.  You are warned.
 */
%}

dec ([1-9][0-9]*)
oct (0[0-7]*)
hex (0[xX][0-9a-fA-F]*)
doub    ({dec}"."([0-9]*)?|{dec}?"."[0-9]+)
strd    (\"([^\"]|\\.)*\")
t   "true"
f   "false"
n   "null"
com1    "//".*
com2b   "/*"
endc    "*/"
ident   ([a-zA-Z_][a-zA-Z0-9_]*)

%x INCOMMENT
%option noyywrap

%%

{dec}           |
{oct}           |
{hex}           |
{doub}          |
{strd}              |
{t}             |
{f}         |
{n}         |
"{"         |
":"         |
";"         |
"}"         |
"["         |
"]"         |
","         ECHO;

[\ \t\n]        |
{com1}          ;
{com2b}         BEGIN(INCOMMENT);
<INCOMMENT>.        ;
<INCOMMENT>{endc}   BEGIN(INITIAL);

{ident}         { fprintf(stderr, "WARNING:"
                "unquoted identifier %s "
                "in source.  Quoting.\n",
                yytext);
              printf("\"%s\"", yytext);
            }
.           { fprintf(stderr,
                "WARNING: unknown symbol %s "
                "in source, copied to output\n",
                yytext);
              ECHO;
            }

%%

void process(const char *fn);

int main(int argc, const char **argv)
{
    int i;
    if (argc > 1) for (i = 1; i < argc; i++)
        process(argv[i]);
    else process(NULL); /* <-- stdin */
} /* main */

void process(const char *fn)
{
    FILE *f = stdin;
    if (fn) {
        f = fopen(fn, "r");
        if (!f) {
            fprintf(stderr,
                "ERROR:fopen:%s:%s(errno=%d)\n",
                fn, strerror(errno), errno);
            exit(EXIT_FAILURE);
        } /* if */
    } /* if */
    yyin = f;
    yylex();
    if (fn) /* only close if we opened, don't close stdin. */
        fclose(f);
    printf("\n");
}

, . ( ). , stdout, , .

BR,

0

All Articles