The JavaScript code reads a JSON file using a stream, transforms it into a Parquet format leveraging a defined schema, and then writes it to a Parquet file. The transformation utilizes the "parquetjs" library for Parquet format handling and "stream-json" for efficient JSON streaming, thereby converting a large JSON file to a Parquet file with specified schema mapping, which can be more efficient for storage and querying.
import { createReadStream, createWriteStream } from "fs";
import { ParquetSchema, ParquetTransformer } from "parquetjs";
import * as StreamArray from "stream-json/streamers/StreamArray";
const reader = createReadStream("data-json.json");
const destination = createWriteStream("countries.parquet");
const schema = new ParquetSchema({
value: {
fields: {
Country: { type: "UTF8" },
Indicator: { type: "UTF8" },
Value: { type: "FLOAT" },
Year: { type: "INT64" }
}
}
});
reader
.pipe(StreamArray.withParser())
.pipe(new ParquetTransformer(schema))
.pipe(destination);