Skip to content

Instantly share code, notes, and snippets.

@chrispahm
Created January 24, 2022 19:28
Show Gist options
  • Save chrispahm/c226cca151b25147869288600151a5f8 to your computer and use it in GitHub Desktop.
Save chrispahm/c226cca151b25147869288600151a5f8 to your computer and use it in GitHub Desktop.
Stream GeoJSON file and get startByte and endByte of each JSON record in the file
import { open } from 'fs/promises'
import { Buffer } from 'buffer'
const HIGHWATERMARK = 64 * 1024 / 8
function getFeaturePositionsInFile(fd) {
return new Promise((resolve,reject) => {
const featurePositionsInFile = []
const stream = fd.createReadStream({highWaterMark: HIGHWATERMARK, autoClose: false});
// this RegEx will solely work with standard GeoJSON without any foreign members:
// https://datatracker.ietf.org/doc/html/rfc7946#section-6.1
// The properties object has to be present, and has to be that last key in the GeoJSON object
const jsonExtractor = /\{[\n\r\s]*?"type":[\n\r\s]*?"Feature"[\S\s]*?\}(?:[\n\r\s]*\})+/g
let string = ''
let endPos = 0
stream.on('data', (d) => {
const section = d.toString()
const sectionLength = (new TextEncoder().encode(section)).length
string += section
endPos+= sectionLength
let match
let latestEndPositionInString = 0
while ((match = jsonExtractor.exec(string)) != null) {
const startPositionInString = match.index
const featureString = match[0]
const endPositionInString = startPositionInString + featureString.length
const curStringLength = (new TextEncoder().encode(string)).length
// calculate starting position in file
const startPosition = endPos - curStringLength + startPositionInString
// calculate number of bytes in feature
const byteLength = (new TextEncoder().encode(featureString)).length
// store info for later in our lookup array
featurePositionsInFile.push({
startPosition,
byteLength
})
if (endPositionInString > latestEndPositionInString) {
latestEndPositionInString = endPositionInString
}
}
// remove features from string to free memory
string = string.substring(latestEndPositionInString)
})
stream.on('end', () => resolve(featurePositionsInFile))
stream.on('error', () => reject)
})
}
function readSingleFeatureFromFile(fd, startPosition, length) {
return new Promise(async (resolve, reject) => {
try {
const buff = Buffer.alloc(length)
const offset = 0
const { buffer } = await fd.read(buff, offset, length, startPosition)
const featureString = buffer.toString()
const singleFeature = JSON.parse(featureString)
resolve(singleFeature)
} catch (e) {
reject(e)
}
})
}
async function getFeature(featureIndexToRead, featurePositionsInFile) {
const { startPosition, byteLength } = featurePositionsInFile[featureIndexToRead]
const singleFeature = await readSingleFeatureFromFile(fd, startPosition, byteLength)
return singleFeature
}
// source: https://raw.githubusercontent.com/node-geojson/geojson-stream/master/test/data/featurecollection.geojson
const path = 'featurecollection.geojson'
// -> has 3 features
const fd = await open(path, 'r');
const featurePositionsInFile = await getFeaturePositionsInFile(fd)
// get nth (e.g 3rd) feature in file
const firstFeature = await getFeature(2, featurePositionsInFile)
console.log(firstFeature)
// done! make sure to close the filehandle
fd.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment