Read a text file line by line with Node.js

How to access lines of a document when you can only load block of data?

Node's File System module offers many functions to read and write a file. You can load an entire file into a string. Lines can be stored in a text file, one after another. But what you can not do is read a file line by line. When it is large or when you want to limit the reading to a small number of lines, this becomes annoying.

Several solutions have been proposed to this problem, with or without additional module, but none is satisfactory if one wants to reproduce the functions of PHP or C or any other language: open the file, read a line in a loop, close the file.

Here is a fairly simple algorithm that leads to this result, which requires no external module.

  1. An associative array FileBuffer is created with the file resource for key and for value, an array containing the lines read.
  2. FilePtr is another array which has also the file resource as key and the position in the file as value.
  3. We test whether the table has content. If this is the case, we read the first element, that is deleted by shift.
  4. Else a data block is read from the file, from pos and to 4096 bytes.
  5. The actual number of bytes read is returned in br.
  6. If this number is less than 4096, there will be no other reading, we removes the file entry in filePtr (this will be tested by the eof function).
  7. The buffer is converted to a string and the string splitted into an array that is assigned to fileBuffer[handle].
  8. The last item in the array is deleted because the line is truncated in most cases.
  9. The next position in the file is determined by adding the number of bytes read, minus the size of the last element we have deleted.
  10. When the table is empty, we start at 4, unless the end of file is detected.

Code source of the module:

var fs = require('fs');
var filePtr = {}
var fileBuffer = {}
var buffer = new Buffer(4096)

exports.fopen = function(path, mode) {
  var handle = fs.openSync(path, mode)
  filePtr[handle] = 0
  fileBuffer[handle]= []
  return handle
}

exports.fclose = function(handle) {
  fs.closeSync(handle)
  if (handle in filePtr) {
    delete filePtr[handle]
    delete fileBuffer[handle]
  } 
  return
}

exports.fgets = function(handle) { 
  if(fileBuffer[handle].length == 0)
  {
    var pos = filePtr[handle]
    var br = fs.readSync(handle, buffer, 0, 4096, pos)
    if(br < 4096) {
      delete filePtr[handle]
      if(br == 0)  return false
    }
    var lst = buffer.slice(0, br).toString().split("\n")
    var minus = 0
    if(lst.length > 1) {
      var x = lst.pop()
      minus = x.length
    } 
    fileBuffer[handle] = lst 
    filePtr[handle] = pos + br - minus
  }
  return fileBuffer[handle].shift()
}

exports.eof = function(handle) {
  return (handle in filePtr) == false && (fileBuffer[handle].length == 0) 
}

You can import the module as in the example below or directly integrate the functions in the project by removing the export prefix.

Code source of the demo

In this example, we read a file line by line and copy it into a new file line by line also but through the File System functions.

var fs = require('fs')
var readline = require("/scripts/node-readline/node-readline.js")

var source="/scripts/node-readline/demosrc.htm"
var target="/scripts/node-readline/demotgt.htm"

var r=readline.fopen(source,"r")
if(r===false)
{
   console.log("Error, can't open ", source)
   process.exit(1)
} 

var w = fs.openSync(target,"w")
var count=0
do
{
   var line=readline.fgets(r)
   console.log(line)
   fs.writeSync(w, line + "\n", null, 'utf8')
   count+=1
}
while (!readline.eof(r))
readline.fclose(r)
fs.closeSync(w)

console.log(count, " lines read.")

Change the name of the source file and the target file to what you want. You can also adjust the buffer size to your needs. In case a file contains more than 4096 bytes long lines (this is quite rare when it must be read line by line), the buffer must be increased in proportion.

Download the full source code