模块:Csv.lua:修订间差异

来自「荏苒之境」
创建页面,内容为“--[[ CSV Library v1 (Author: Michael Lutz, 2022-12-14) Built on: http://lua-users.org/wiki/LuaCsv csv.load = function(filename, delimiter, header) filename := CSV file to load delimiter := Delimiter (";", ",", "\t", etc..), default = ',' header := (optional) if first line is a header (true/false), default = false automatically removes quotes from text returns a table csv.save = function(filename, delimiter, data, header) filename := CSV file to…”
 
无编辑摘要
第1行: 第1行:
--[[
local ftcsv = {
CSV Library v1 (Author: Michael Lutz, 2022-12-14)
    _VERSION = 'ftcsv 1.4.0',
Built on: http://lua-users.org/wiki/LuaCsv
    _DESCRIPTION = 'CSV library for Lua',
    _URL        = 'https://github.com/FourierTransformer/ftcsv',
csv.load = function(filename, delimiter, header)
    _LICENSE    = [[
filename := CSV file to load
        The MIT License (MIT)
delimiter := Delimiter (";", ",", "\t", etc..), default = ','
 
header := (optional) if first line is a header (true/false), default = false
        Copyright (c) 2016-2023 Shakil Thakur
 
automatically removes quotes from text
        Permission is hereby granted, free of charge, to any person obtaining a copy
returns a table
        of this software and associated documentation files (the "Software"), to deal
        in the Software without restriction, including without limitation the rights
csv.save = function(filename, delimiter, data, header)
        to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
filename := CSV file to write to
        copies of the Software, and to permit persons to whom the Software is
delimiter := Delimiter (";", ",", "\t", etc..), default = ','
        furnished to do so, subject to the following conditions:
data := a Lua table that holds the rows and columns
 
header := a Lua table that holds the names of the columns e.g. { "Name", "Address", "Email", ... }
        The above copyright notice and this permission notice shall be included in all
        copies or substantial portions of the Software.
--]]
 
local function parse_row(input, sep, pos)
        THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
local row = {}
        IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
local pos = pos or 1
        FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
--io.read()
        AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
while true do
        LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
local c = string.sub(input,pos,pos)
        OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
if (c == "") then break end
        SOFTWARE.
if (c == '"') then
    ]]
local text = ''
}
local s,e,txt,c1
 
repeat
-- perf
s,e,txt,c1 = string.find(input, '^(.-")(.)', pos+1)
local sbyte = string.byte
text = text..txt
local ssub = string.sub
pos = e
 
--print(txt, e, c1)
-- luajit/lua compatability layer
until ((c1 == sep) or (c1 == "\r") or (c1 == "\n"))
local luaCompatibility = {}
--print(string.sub(text,1,-2), c1)
if type(jit) == 'table' or _ENV then
table.insert(row, string.sub(text,1,-2))
    -- luajit and lua 5.2+
c = c1
    luaCompatibility.load = _G.load
pos = pos + 1
else
else
    -- lua 5.1
local s,e,text,c1 = string.find(input, "^([^%"..sep.."\r\n]-)([%"..sep.."\r\n])", pos)
    luaCompatibility.load = loadstring
pos = e+1
end
--print(text, c1)
 
table.insert(row, text)
-- luajit specific speedups
c = c1
-- luajit performs faster with iterating over string.byte,
end
-- whereas vanilla lua performs faster with string.find
if c == "\n" then
if type(jit) == 'table' then
return row, pos
    luaCompatibility.LuaJIT = true
end
    -- finds the end of an escape sequence
if c == "\r" then
    function luaCompatibility.findClosingQuote(i, inputLength, inputString, quote, doubleQuoteEscape)
return row, pos+1
        local currentChar, nextChar = sbyte(inputString, i), nil
end
        while i <= inputLength do
end
            nextChar = sbyte(inputString, i+1)
 
            -- this one deals with " double quotes that are escaped "" within single quotes "
            -- these should be turned into a single quote at the end of the field
            if currentChar == quote and nextChar == quote then
                doubleQuoteEscape = true
                i = i + 2
                currentChar = sbyte(inputString, i)
 
            -- identifies the escape toggle
            elseif currentChar == quote and nextChar ~= quote then
                return i-1, doubleQuoteEscape
            else
                i = i + 1
                currentChar = nextChar
            end
        end
    end
 
else
    luaCompatibility.LuaJIT = false
 
    -- vanilla lua closing quote finder
    function luaCompatibility.findClosingQuote(i, inputLength, inputString, quote, doubleQuoteEscape)
        local j, difference
        i, j = inputString:find('"+', i)
        if j == nil then
            return nil
        end
        difference = j - i
        if difference >= 1 then doubleQuoteEscape = true end
        if difference % 2 == 1 then
            return luaCompatibility.findClosingQuote(j+1, inputLength, inputString, quote, doubleQuoteEscape)
        end
        return j-1, doubleQuoteEscape
    end
end
 
 
-- determine the real headers as opposed to the header mapping
local function determineRealHeaders(headerField, fieldsToKeep)
    local realHeaders = {}
    local headerSet = {}
    for i = 1, #headerField do
        if not headerSet[headerField[i]] then
            if fieldsToKeep ~= nil and fieldsToKeep[headerField[i]] then
                table.insert(realHeaders, headerField[i])
                headerSet[headerField[i]] = true
            elseif fieldsToKeep == nil then
                table.insert(realHeaders, headerField[i])
                headerSet[headerField[i]] = true
            end
        end
    end
    return realHeaders
end
end


csv = {}
csv.parse = function(csv, delimiter, header)
sep = string.sub(delimiter,1,1) or ','
local pos = 1
local t_csv = {}
local f_header = nil
local t_header = {}
if header then
t_header,pos = parse_row(csv, sep, pos)
local head = {}
for i,v in ipairs(t_header) do
head[v] = i
end
f_header = function (t,k)
local i = head[k]
if i then
return t[i]
end
return nil
end
end


local row = {}
local function determineTotalColumnCount(headerField, fieldsToKeep)
row, pos = parse_row(csv, sep, pos)
    local totalColumnCount = 0
while row do
    local headerFieldSet = {}
if header then
    for _, header in pairs(headerField) do
setmetatable(row, { __index = f_header })
        -- count unique columns and
end
        -- also figure out if it's a field to keep
table.insert(t_csv, row)
        if not headerFieldSet[header] and
row, pos = parse_row(csv, sep, pos)
            (fieldsToKeep == nil or fieldsToKeep[header]) then
end
            headerFieldSet[header] = true
return t_csv, t_header
            totalColumnCount = totalColumnCount + 1
        end
    end
    return totalColumnCount
end
end


local function format_csv(str, sep)
local function generateHeadersMetamethod(finalHeaders)
local str, matches = string.gsub(str or "", '"', '""')
    -- if a header field tries to escape, we will simply return nil
if (string.find(str, "[%"..sep.."\r\n]") or (matches > 0)) then
    -- the parser will still parse, but wont get the performance benefit of
return '"'..str..'"'
    -- having headers predefined
end
    for _, headers in ipairs(finalHeaders) do
return str
        if headers:find("]") then
            return nil
        end
    end
    local rawSetup = "local t, k, _ = ... \
    rawset(t, k, {[ [[%s]] ]=true})"
    rawSetup = rawSetup:format(table.concat(finalHeaders, "]] ]=true, [ [["))
    return luaCompatibility.load(rawSetup)
end
end


csv.format = function(delimiter, data, header)
-- main function used to parse
r = {}
local function parseString(inputString, i, options)
sep = string.sub(delimiter,1,1) or ','
 
if header then
    -- keep track of my chars!
for i,v in ipairs(header) do
    local inputLength = options.inputLength or #inputString
r[#r+1] = format_csv(v, sep)
    local currentChar, nextChar = sbyte(inputString, i), nil
r[#r+1] = sep
    local skipChar = 0
end
    local field
r[#r] = "\n"
    local fieldStart = i
end
    local fieldNum = 1
    local lineNum = 1
for i,v in ipairs(data) do
    local lineStart = i
for i2,v2 in ipairs(v) do
    local doubleQuoteEscape, emptyIdentified = false, false
r[#r+1] = format_csv(v2, sep)
 
r[#r+1] = sep
    local skipIndex
end
    local charPatternToSkip = "[" .. options.delimiter .. "\r\n]"
r[#r] = "\n"
 
end
    --bytes
return table.concat(r)
    local CR = sbyte("\r")
    local LF = sbyte("\n")
    local quote = sbyte('"')
    local delimiterByte = sbyte(options.delimiter)
 
    -- explode most used options
    local headersMetamethod = options.headersMetamethod
    local fieldsToKeep = options.fieldsToKeep
    local ignoreQuotes = options.ignoreQuotes
    local headerField = options.headerField
    local endOfFile = options.endOfFile
    local buffered = options.buffered
 
    local outResults = {}
 
    -- in the first run, the headers haven't been set yet.
    if headerField == nil then
        headerField = {}
        -- setup a metatable to simply return the key that's passed in
        local headerMeta = {__index = function(_, key) return key end}
        setmetatable(headerField, headerMeta)
    end
 
    if headersMetamethod then
        setmetatable(outResults, {__newindex = headersMetamethod})
    end
    outResults[1] = {}
 
    -- totalColumnCount based on unique headers and fieldsToKeep
    local totalColumnCount = options.totalColumnCount or determineTotalColumnCount(headerField, fieldsToKeep)
 
    local function assignValueToField()
        if fieldsToKeep == nil or fieldsToKeep[headerField[fieldNum]] then
 
            -- create new field
            if ignoreQuotes == false and sbyte(inputString, i-1) == quote then
                field = ssub(inputString, fieldStart, i-2)
            else
                field = ssub(inputString, fieldStart, i-1)
            end
            if doubleQuoteEscape then
                field = field:gsub('""', '"')
            end
 
            -- reset flags
            doubleQuoteEscape = false
            emptyIdentified = false
 
            -- assign field in output
            if headerField[fieldNum] ~= nil then
                outResults[lineNum][headerField[fieldNum]] = field
            else
                error('ftcsv: too many columns in row ' .. options.rowOffset + lineNum)
            end
        end
    end
 
    while i <= inputLength do
        -- go by two chars at a time,
        --  currentChar is set at the bottom.
        nextChar = sbyte(inputString, i+1)
 
        -- empty string
        if ignoreQuotes == false and currentChar == quote and nextChar == quote then
            skipChar = 1
            fieldStart = i + 2
            emptyIdentified = true
 
        -- escape toggle.
        -- This can only happen if fields have quotes around them
        -- so the current "start" has to be where a quote character is.
        elseif ignoreQuotes == false and currentChar == quote and nextChar ~= quote and fieldStart == i then
            fieldStart = i + 1
            -- if an empty field was identified before assignment, it means
            -- that this is a quoted field that starts with escaped quotes
            -- ex: """a"""
            if emptyIdentified then
                fieldStart = fieldStart - 2
                emptyIdentified = false
            end
            skipChar = 1
            i, doubleQuoteEscape = luaCompatibility.findClosingQuote(i+1, inputLength, inputString, quote, doubleQuoteEscape)
 
        -- create some fields
        elseif currentChar == delimiterByte then
            assignValueToField()
 
            -- increaseFieldIndices
            fieldNum = fieldNum + 1
            fieldStart = i + 1
 
        -- newline
        elseif (currentChar == LF or currentChar == CR) then
            assignValueToField()
 
            -- handle CRLF
            if (currentChar == CR and nextChar == LF) then
                skipChar = 1
                fieldStart = fieldStart + 1
            end
 
            -- incrememnt for new line
            if fieldNum < totalColumnCount then
                -- sometimes in buffered mode, the buffer starts with a newline
                -- this skips the newline and lets the parsing continue.
                if buffered and lineNum == 1 and fieldNum == 1 and field == "" then
                    fieldStart = i + 1 + skipChar
                    lineStart = fieldStart
                else
                    error('ftcsv: too few columns in row ' .. options.rowOffset + lineNum)
                end
            else
                lineNum = lineNum + 1
                outResults[lineNum] = {}
                fieldNum = 1
                fieldStart = i + 1 + skipChar
                lineStart = fieldStart
            end
 
        elseif luaCompatibility.LuaJIT == false then
            skipIndex = inputString:find(charPatternToSkip, i)
            if skipIndex then
                skipChar = skipIndex - i - 1
            end
 
        end
 
        -- in buffered mode and it can't find the closing quote
        -- it usually means in the middle of a buffer and need to backtrack
        if i == nil then
            if buffered then
                outResults[lineNum] = nil
                return outResults, lineStart
            else
                error("ftcsv: can't find closing quote in row " .. options.rowOffset + lineNum ..
                ". Try running with the option ignoreQuotes=true if the source incorrectly uses quotes.")
            end
        end
 
        -- Increment Counter
        i = i + 1 + skipChar
        if (skipChar > 0) then
            currentChar = sbyte(inputString, i)
        else
            currentChar = nextChar
        end
        skipChar = 0
    end
 
    if buffered and not endOfFile then
        outResults[lineNum] = nil
        return outResults, lineStart
    end
 
    -- create last new field
    assignValueToField()
 
    -- remove last field if empty
    if fieldNum < totalColumnCount then
 
        -- indicates last field was really just a CRLF,
        -- so, it can be removed
        if fieldNum == 1 and field == "" then
            outResults[lineNum] = nil
        else
            error('ftcsv: too few columns in row ' .. options.rowOffset + lineNum)
        end
    end
 
    return outResults, i, totalColumnCount
end
 
local function handleHeaders(headerField, options)
    -- for files where there aren't headers!
    if options.headers == false then
        for j = 1, #headerField do
            headerField[j] = j
        end
    else
        -- make sure a header isn't empty if there are headers
        for _, headerName in ipairs(headerField) do
            if #headerName == 0 then
                error('ftcsv: Cannot parse a file which contains empty headers')
            end
        end
    end
 
    -- rename fields as needed!
    if options.rename then
        -- basic rename (["a" = "apple"])
        for j = 1, #headerField do
            if options.rename[headerField[j]] then
                headerField[j] = options.rename[headerField[j]]
            end
        end
        -- files without headers, but with a options.rename need to be handled too!
        if #options.rename > 0 then
            for j = 1, #options.rename do
                headerField[j] = options.rename[j]
            end
        end
    end
 
    -- apply some sweet header manipulation
    if options.headerFunc then
        for j = 1, #headerField do
            headerField[j] = options.headerFunc(headerField[j])
        end
    end
 
    return headerField
end
 
-- load an entire file into memory
local function loadFile(textFile, amount)
    local file = io.open(textFile, "r")
    if not file then error("ftcsv: File not found at " .. textFile) end
    local lines = file:read(amount)
    if amount == "*all" then
        file:close()
    end
    return lines, file
end
 
local function initializeInputFromStringOrFile(inputFile, options, amount)
    -- handle input via string or file!
    local inputString, file
    if options.loadFromString then
        inputString = inputFile
    else
        inputString, file = loadFile(inputFile, amount)
    end
 
    -- if they sent in an empty file...
    if inputString == "" then
        error('ftcsv: Cannot parse an empty file')
    end
    return inputString, file
end
 
local function determineArgumentOrder(delimiter, options)
    -- backwards compatibile layer
    if type(delimiter) == "string" then
        return delimiter, options
 
    -- the new format for parseLine
    elseif type(delimiter) == "table" then
        local realDelimiter = delimiter.delimiter or ","
        return realDelimiter, delimiter
 
    -- if nothing is specified, assume "," delimited and call it a day!
    else
        return ",", nil
    end
end
 
local function parseOptions(delimiter, options, fromParseLine)
    -- delimiter MUST be one character
    assert(#delimiter == 1 and type(delimiter) == "string", "the delimiter must be of string type and exactly one character")
 
    local fieldsToKeep = nil
 
    if options then
 
    if options.headers ~= nil then
        assert(type(options.headers) == "boolean", "ftcsv only takes the boolean 'true' or 'false' for the optional parameter 'headers' (default 'true'). You passed in '" .. tostring(options.headers) .. "' of type '" .. type(options.headers) .. "'.")
    end
 
    if options.rename ~= nil then
        assert(type(options.rename) == "table", "ftcsv only takes in a key-value table for the optional parameter 'rename'. You passed in '" .. tostring(options.rename) .. "' of type '" .. type(options.rename) .. "'.")
    end
 
    if options.fieldsToKeep ~= nil then
        assert(type(options.fieldsToKeep) == "table", "ftcsv only takes in a list (as a table) for the optional parameter 'fieldsToKeep'. You passed in '" .. tostring(options.fieldsToKeep) .. "' of type '" .. type(options.fieldsToKeep) .. "'.")
        local ofieldsToKeep = options.fieldsToKeep
        if ofieldsToKeep ~= nil then
            fieldsToKeep = {}
            for j = 1, #ofieldsToKeep do
                fieldsToKeep[ofieldsToKeep[j]] = true
            end
        end
        if options.headers == false and options.rename == nil then
            error("ftcsv: fieldsToKeep only works with header-less files when using the 'rename' functionality")
        end
    end
 
    if options.loadFromString ~= nil then
        assert(type(options.loadFromString) == "boolean", "ftcsv only takes a boolean value for optional parameter 'loadFromString'. You passed in '" .. tostring(options.loadFromString) .. "' of type '" .. type(options.loadFromString) .. "'.")
    end
 
    if options.headerFunc ~= nil then
        assert(type(options.headerFunc) == "function", "ftcsv only takes a function value for optional parameter 'headerFunc'. You passed in '" .. tostring(options.headerFunc) .. "' of type '" .. type(options.headerFunc) .. "'.")
    end
 
    if options.ignoreQuotes == nil then
        options.ignoreQuotes = false
    else
        assert(type(options.ignoreQuotes) == "boolean", "ftcsv only takes a boolean value for optional parameter 'ignoreQuotes'. You passed in '" .. tostring(options.ignoreQuotes) .. "' of type '" .. type(options.ignoreQuotes) .. "'.")
    end
 
    if fromParseLine == true then
        if options.bufferSize == nil then
            options.bufferSize = 2^16
        else
            assert(type(options.bufferSize) == "number", "ftcsv only takes a number value for optional parameter 'bufferSize'. You passed in '" .. tostring(options.bufferSize) .. "' of type '" .. type(options.bufferSize) .. "'.")
        end
 
    else
        if options.bufferSize ~= nil then
            error("ftcsv: bufferSize can only be specified using 'parseLine'. When using 'parse', the entire file is read into memory")
        end
    end
 
    else
        options = {
            ["headers"] = true,
            ["loadFromString"] = false,
            ["ignoreQuotes"] = false,
            ["bufferSize"] = 2^16
        }
    end
 
    return options, fieldsToKeep
 
end
 
local function findEndOfHeaders(str, entireFile)
    local i = 1
    local quote = sbyte('"')
    local newlines = {
        [sbyte("\n")] = true,
        [sbyte("\r")] = true
    }
    local quoted = false
    local char = sbyte(str, i)
    repeat
        -- this should still work for escaped quotes
        -- ex: " a "" b \r\n " -- there is always a pair around the newline
        if char == quote then
            quoted = not quoted
        end
        i = i + 1
        char = sbyte(str, i)
    until (newlines[char] and not quoted) or char == nil
 
    if not entireFile and char == nil then
        error("ftcsv: bufferSize needs to be larger to parse this file")
    end
 
    local nextChar = sbyte(str, i+1)
    if nextChar == sbyte("\n") and char == sbyte("\r") then
        i = i + 1
    end
    return i
end
 
local function determineBOMOffset(inputString)
    -- BOM files start with bytes 239, 187, 191
    if sbyte(inputString, 1) == 239
        and sbyte(inputString, 2) == 187
        and sbyte(inputString, 3) == 191 then
        return 4
    else
        return 1
    end
end
 
local function parseHeadersAndSetupArgs(inputString, delimiter, options, fieldsToKeep, entireFile)
    local startLine = determineBOMOffset(inputString)
 
    local endOfHeaderRow = findEndOfHeaders(inputString, entireFile)
 
    local parserArgs = {
        delimiter = delimiter,
        headerField = nil,
        fieldsToKeep = nil,
        inputLength = endOfHeaderRow,
        buffered = false,
        ignoreQuotes = options.ignoreQuotes,
        rowOffset = 0
    }
 
    local rawHeaders, endOfHeaders = parseString(inputString, startLine, parserArgs)
 
    -- manipulate the headers as per the options
    local modifiedHeaders = handleHeaders(rawHeaders[1], options)
    parserArgs.headerField = modifiedHeaders
    parserArgs.fieldsToKeep = fieldsToKeep
    parserArgs.inputLength = nil
 
    if options.headers == false then endOfHeaders = startLine end
 
    local finalHeaders = determineRealHeaders(modifiedHeaders, fieldsToKeep)
    if options.headers ~= false then
        local headersMetamethod = generateHeadersMetamethod(finalHeaders)
        parserArgs.headersMetamethod = headersMetamethod
    end
 
    return endOfHeaders, parserArgs, finalHeaders
end
 
-- runs the show!
function ftcsv.parse(inputFile, delimiter, options)
    local delimiter, options = determineArgumentOrder(delimiter, options)
 
    local options, fieldsToKeep = parseOptions(delimiter, options, false)
 
    local inputString = initializeInputFromStringOrFile(inputFile, options, "*all")
 
    local endOfHeaders, parserArgs, finalHeaders = parseHeadersAndSetupArgs(inputString, delimiter, options, fieldsToKeep, true)
 
    local output = parseString(inputString, endOfHeaders, parserArgs)
 
    return output, finalHeaders
end
 
local function getFileSize (file)
    local current = file:seek()
    local size = file:seek("end")
    file:seek("set", current)
    return size
end
 
local function determineAtEndOfFile(file, fileSize)
    if file:seek() >= fileSize then
        return true
    else
        return false
    end
end
 
local function initializeInputFile(inputString, options)
    if options.loadFromString == true then
        error("ftcsv: parseLine currently doesn't support loading from string")
    end
    return initializeInputFromStringOrFile(inputString, options, options.bufferSize)
end
 
function ftcsv.parseLine(inputFile, delimiter, userOptions)
    local delimiter, userOptions = determineArgumentOrder(delimiter, userOptions)
    local options, fieldsToKeep = parseOptions(delimiter, userOptions, true)
    local inputString, file = initializeInputFile(inputFile, options)
 
 
    local fileSize, atEndOfFile = 0, false
    fileSize = getFileSize(file)
    atEndOfFile = determineAtEndOfFile(file, fileSize)
 
    local endOfHeaders, parserArgs, _ = parseHeadersAndSetupArgs(inputString, delimiter, options, fieldsToKeep, atEndOfFile)
    parserArgs.buffered = true
    parserArgs.endOfFile = atEndOfFile
 
    local parsedBuffer, endOfParsedInput, totalColumnCount = parseString(inputString, endOfHeaders, parserArgs)
    parserArgs.totalColumnCount = totalColumnCount
 
    inputString = ssub(inputString, endOfParsedInput)
    local bufferIndex, returnedRowsCount = 0, 0
    local currentRow, buffer
 
    return function()
        -- check parsed buffer for value
        bufferIndex = bufferIndex + 1
        currentRow = parsedBuffer[bufferIndex]
        if currentRow then
            returnedRowsCount = returnedRowsCount + 1
            return returnedRowsCount, currentRow
        end
 
        -- read more of the input
        buffer = file:read(options.bufferSize)
        if not buffer then
            file:close()
            return nil
        else
            parserArgs.endOfFile = determineAtEndOfFile(file, fileSize)
        end
 
        -- appends the new input to what was left over
        inputString = inputString .. buffer
 
        -- re-analyze and load buffer
        parserArgs.rowOffset = returnedRowsCount
        parsedBuffer, endOfParsedInput = parseString(inputString, 1, parserArgs)
        bufferIndex = 1
 
        -- cut the input string down
        inputString = ssub(inputString, endOfParsedInput)
 
        if #parsedBuffer == 0 then
            error("ftcsv: bufferSize needs to be larger to parse this file")
        end
 
        returnedRowsCount = returnedRowsCount + 1
        return returnedRowsCount, parsedBuffer[bufferIndex]
    end
end
 
 
 
-- The ENCODER code is below here
-- This could be broken out, but is kept here for portability
 
 
local function delimitField(field)
    field = tostring(field)
    if field:find('"') then
        return field:gsub('"', '""')
    else
        return field
    end
end
 
local function generateDelimitAndQuoteField(delimiter)
    local generatedFunction = function(field)
        field = tostring(field)
        if field:find('"') then
            return '"' .. field:gsub('"', '""') .. '"'
        elseif field:find('[\n' .. delimiter .. ']') then
            return '"' .. field .. '"'
        else
            return field
        end
    end
    return generatedFunction
end
 
local function escapeHeadersForLuaGenerator(headers)
    local escapedHeaders = {}
    for i = 1, #headers do
        if headers[i]:find('"') then
            escapedHeaders[i] = headers[i]:gsub('"', '\\"')
        else
            escapedHeaders[i] = headers[i]
        end
    end
    return escapedHeaders
end
 
-- a function that compiles some lua code to quickly print out the csv
local function csvLineGenerator(inputTable, delimiter, headers, options)
    local escapedHeaders = escapeHeadersForLuaGenerator(headers)
 
    local outputFunc = [[
        local args, i = ...
        i = i + 1;
        if i > ]] .. #inputTable .. [[ then return nil end;
        return i, '"' .. args.delimitField(args.t[i]["]] ..
            table.concat(escapedHeaders, [["]) .. '"]] ..
            delimiter .. [["' .. args.delimitField(args.t[i]["]]) ..
            [["]) .. '"\r\n']]
 
    if options and options.onlyRequiredQuotes == true then
        outputFunc = [[
            local args, i = ...
            i = i + 1;
            if i > ]] .. #inputTable .. [[ then return nil end;
            return i, args.delimitField(args.t[i]["]] ..
                table.concat(escapedHeaders, [["]) .. ']] ..
                delimiter .. [[' .. args.delimitField(args.t[i]["]]) ..
                [["]) .. '\r\n']]
    end
 
    local arguments = {}
    arguments.t = inputTable
    -- we want to use the same delimitField throughout,
    -- so we're just going to pass it in
    if options and options.onlyRequiredQuotes == true then
        arguments.delimitField = generateDelimitAndQuoteField(delimiter)
    else
        arguments.delimitField = delimitField
    end
 
    return luaCompatibility.load(outputFunc), arguments, 0
 
end
 
local function validateHeaders(headers, inputTable)
    for i = 1, #headers do
        if inputTable[1][headers[i]] == nil then
            error("ftcsv: the field '" .. headers[i] .. "' doesn't exist in the inputTable")
        end
    end
end
 
local function initializeOutputWithEscapedHeaders(escapedHeaders, delimiter, options)
    local output = {}
    if options and options.onlyRequiredQuotes == true then
        output[1] = table.concat(escapedHeaders, delimiter) .. '\r\n'
    else
        output[1] = '"' .. table.concat(escapedHeaders, '"' .. delimiter .. '"') .. '"\r\n'
    end
    return output
end
 
local function escapeHeadersForOutput(headers, delimiter, options)
    local escapedHeaders = {}
    local delimitField = delimitField
    if options and options.onlyRequiredQuotes == true then
        delimitField = generateDelimitAndQuoteField(delimiter)
    end
    for i = 1, #headers do
        escapedHeaders[i] = delimitField(headers[i])
    end
 
    return escapedHeaders
end
 
local function extractHeadersFromTable(inputTable)
    local headers = {}
    for key, _ in pairs(inputTable[1]) do
        headers[#headers+1] = key
    end
 
    -- lets make the headers alphabetical
    table.sort(headers)
 
    return headers
end
 
local function getHeadersFromOptions(options)
    local headers = nil
    if options then
        if options.fieldsToKeep ~= nil then
            assert(
                type(options.fieldsToKeep) == "table", "ftcsv only takes in a list (as a table) for the optional parameter 'fieldsToKeep'. You passed in '" .. tostring(options.headers) .. "' of type '" .. type(options.headers) .. "'.")
            headers = options.fieldsToKeep
        end
    end
    return headers
end
 
local function initializeGenerator(inputTable, delimiter, options)
    -- delimiter MUST be one character
    assert(#delimiter == 1 and type(delimiter) == "string", "the delimiter must be of string type and exactly one character")
 
    local headers = getHeadersFromOptions(options)
    if headers == nil then
        headers = extractHeadersFromTable(inputTable)
    end
    validateHeaders(headers, inputTable)
 
    local escapedHeaders = escapeHeadersForOutput(headers, delimiter, options)
    local output = initializeOutputWithEscapedHeaders(escapedHeaders, delimiter, options)
    return output, headers
end
 
-- works really quickly with luajit-2.1, because table.concat life
function ftcsv.encode(inputTable, delimiter, options)
    local delimiter, options = determineArgumentOrder(delimiter, options)
    local output, headers = initializeGenerator(inputTable, delimiter, options)
 
    for i, line in csvLineGenerator(inputTable, delimiter, headers, options) do
        output[i+1] = line
    end
 
    -- combine and return final string
    return table.concat(output)
end
end


return csv
return ftcsv

2025年8月1日 (五) 23:05的版本

此模块的文档可以在模块:Csv.lua/doc创建

local ftcsv = {
    _VERSION = 'ftcsv 1.4.0',
    _DESCRIPTION = 'CSV library for Lua',
    _URL         = 'https://github.com/FourierTransformer/ftcsv',
    _LICENSE     = [[
        The MIT License (MIT)

        Copyright (c) 2016-2023 Shakil Thakur

        Permission is hereby granted, free of charge, to any person obtaining a copy
        of this software and associated documentation files (the "Software"), to deal
        in the Software without restriction, including without limitation the rights
        to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
        copies of the Software, and to permit persons to whom the Software is
        furnished to do so, subject to the following conditions:

        The above copyright notice and this permission notice shall be included in all
        copies or substantial portions of the Software.

        THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
        IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
        FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
        AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
        LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
        OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
        SOFTWARE.
    ]]
}

-- perf
local sbyte = string.byte
local ssub = string.sub

-- luajit/lua compatability layer
local luaCompatibility = {}
if type(jit) == 'table' or _ENV then
    -- luajit and lua 5.2+
    luaCompatibility.load = _G.load
else
    -- lua 5.1
    luaCompatibility.load = loadstring
end

-- luajit specific speedups
-- luajit performs faster with iterating over string.byte,
-- whereas vanilla lua performs faster with string.find
if type(jit) == 'table' then
    luaCompatibility.LuaJIT = true
    -- finds the end of an escape sequence
    function luaCompatibility.findClosingQuote(i, inputLength, inputString, quote, doubleQuoteEscape)
        local currentChar, nextChar = sbyte(inputString, i), nil
        while i <= inputLength do
            nextChar = sbyte(inputString, i+1)

            -- this one deals with " double quotes that are escaped "" within single quotes "
            -- these should be turned into a single quote at the end of the field
            if currentChar == quote and nextChar == quote then
                doubleQuoteEscape = true
                i = i + 2
                currentChar = sbyte(inputString, i)

            -- identifies the escape toggle
            elseif currentChar == quote and nextChar ~= quote then
                return i-1, doubleQuoteEscape
            else
                i = i + 1
                currentChar = nextChar
            end
        end
    end

else
    luaCompatibility.LuaJIT = false

    -- vanilla lua closing quote finder
    function luaCompatibility.findClosingQuote(i, inputLength, inputString, quote, doubleQuoteEscape)
        local j, difference
        i, j = inputString:find('"+', i)
        if j == nil then
            return nil
        end
        difference = j - i
        if difference >= 1 then doubleQuoteEscape = true end
        if difference % 2 == 1 then
            return luaCompatibility.findClosingQuote(j+1, inputLength, inputString, quote, doubleQuoteEscape)
        end
        return j-1, doubleQuoteEscape
    end
end


-- determine the real headers as opposed to the header mapping
local function determineRealHeaders(headerField, fieldsToKeep)
    local realHeaders = {}
    local headerSet = {}
    for i = 1, #headerField do
        if not headerSet[headerField[i]] then
            if fieldsToKeep ~= nil and fieldsToKeep[headerField[i]] then
                table.insert(realHeaders, headerField[i])
                headerSet[headerField[i]] = true
            elseif fieldsToKeep == nil then
                table.insert(realHeaders, headerField[i])
                headerSet[headerField[i]] = true
            end
        end
    end
    return realHeaders
end


local function determineTotalColumnCount(headerField, fieldsToKeep)
    local totalColumnCount = 0
    local headerFieldSet = {}
    for _, header in pairs(headerField) do
        -- count unique columns and
        -- also figure out if it's a field to keep
        if not headerFieldSet[header] and
            (fieldsToKeep == nil or fieldsToKeep[header]) then
            headerFieldSet[header] = true
            totalColumnCount = totalColumnCount + 1
        end
    end
    return totalColumnCount
end

local function generateHeadersMetamethod(finalHeaders)
    -- if a header field tries to escape, we will simply return nil
    -- the parser will still parse, but wont get the performance benefit of
    -- having headers predefined
    for _, headers in ipairs(finalHeaders) do
        if headers:find("]") then
            return nil
        end
    end
    local rawSetup = "local t, k, _ = ... \
    rawset(t, k, {[ [[%s]] ]=true})"
    rawSetup = rawSetup:format(table.concat(finalHeaders, "]] ]=true, [ [["))
    return luaCompatibility.load(rawSetup)
end

-- main function used to parse
local function parseString(inputString, i, options)

    -- keep track of my chars!
    local inputLength = options.inputLength or #inputString
    local currentChar, nextChar = sbyte(inputString, i), nil
    local skipChar = 0
    local field
    local fieldStart = i
    local fieldNum = 1
    local lineNum = 1
    local lineStart = i
    local doubleQuoteEscape, emptyIdentified = false, false

    local skipIndex
    local charPatternToSkip = "[" .. options.delimiter .. "\r\n]"

    --bytes
    local CR = sbyte("\r")
    local LF = sbyte("\n")
    local quote = sbyte('"')
    local delimiterByte = sbyte(options.delimiter)

    -- explode most used options
    local headersMetamethod = options.headersMetamethod
    local fieldsToKeep = options.fieldsToKeep
    local ignoreQuotes = options.ignoreQuotes
    local headerField = options.headerField
    local endOfFile = options.endOfFile
    local buffered = options.buffered

    local outResults = {}

    -- in the first run, the headers haven't been set yet.
    if headerField == nil then
        headerField = {}
        -- setup a metatable to simply return the key that's passed in
        local headerMeta = {__index = function(_, key) return key end}
        setmetatable(headerField, headerMeta)
    end

    if headersMetamethod then
        setmetatable(outResults, {__newindex = headersMetamethod})
    end
    outResults[1] = {}

    -- totalColumnCount based on unique headers and fieldsToKeep
    local totalColumnCount = options.totalColumnCount or determineTotalColumnCount(headerField, fieldsToKeep)

    local function assignValueToField()
        if fieldsToKeep == nil or fieldsToKeep[headerField[fieldNum]] then

            -- create new field
            if ignoreQuotes == false and sbyte(inputString, i-1) == quote then
                field = ssub(inputString, fieldStart, i-2)
            else
                field = ssub(inputString, fieldStart, i-1)
            end
            if doubleQuoteEscape then
                field = field:gsub('""', '"')
            end

            -- reset flags
            doubleQuoteEscape = false
            emptyIdentified = false

            -- assign field in output
            if headerField[fieldNum] ~= nil then
                outResults[lineNum][headerField[fieldNum]] = field
            else
                error('ftcsv: too many columns in row ' .. options.rowOffset + lineNum)
            end
        end
    end

    while i <= inputLength do
        -- go by two chars at a time,
        --  currentChar is set at the bottom.
        nextChar = sbyte(inputString, i+1)

        -- empty string
        if ignoreQuotes == false and currentChar == quote and nextChar == quote then
            skipChar = 1
            fieldStart = i + 2
            emptyIdentified = true

        -- escape toggle.
        -- This can only happen if fields have quotes around them
        -- so the current "start" has to be where a quote character is.
        elseif ignoreQuotes == false and currentChar == quote and nextChar ~= quote and fieldStart == i then
            fieldStart = i + 1
            -- if an empty field was identified before assignment, it means
            -- that this is a quoted field that starts with escaped quotes
            -- ex: """a"""
            if emptyIdentified then
                fieldStart = fieldStart - 2
                emptyIdentified = false
            end
            skipChar = 1
            i, doubleQuoteEscape = luaCompatibility.findClosingQuote(i+1, inputLength, inputString, quote, doubleQuoteEscape)

        -- create some fields
        elseif currentChar == delimiterByte then
            assignValueToField()

            -- increaseFieldIndices
            fieldNum = fieldNum + 1
            fieldStart = i + 1

        -- newline
        elseif (currentChar == LF or currentChar == CR) then
            assignValueToField()

            -- handle CRLF
            if (currentChar == CR and nextChar == LF) then
                skipChar = 1
                fieldStart = fieldStart + 1
            end

            -- incrememnt for new line
            if fieldNum < totalColumnCount then
                -- sometimes in buffered mode, the buffer starts with a newline
                -- this skips the newline and lets the parsing continue.
                if buffered and lineNum == 1 and fieldNum == 1 and field == "" then
                    fieldStart = i + 1 + skipChar
                    lineStart = fieldStart
                else
                    error('ftcsv: too few columns in row ' .. options.rowOffset + lineNum)
                end
            else
                lineNum = lineNum + 1
                outResults[lineNum] = {}
                fieldNum = 1
                fieldStart = i + 1 + skipChar
                lineStart = fieldStart
            end

        elseif luaCompatibility.LuaJIT == false then
            skipIndex = inputString:find(charPatternToSkip, i)
            if skipIndex then
                skipChar = skipIndex - i - 1
            end

        end

        -- in buffered mode and it can't find the closing quote
        -- it usually means in the middle of a buffer and need to backtrack
        if i == nil then
            if buffered then
                outResults[lineNum] = nil
                return outResults, lineStart
            else
                error("ftcsv: can't find closing quote in row " .. options.rowOffset + lineNum ..
                 ". Try running with the option ignoreQuotes=true if the source incorrectly uses quotes.")
            end
        end

        -- Increment Counter
        i = i + 1 + skipChar
        if (skipChar > 0) then
            currentChar = sbyte(inputString, i)
        else
            currentChar = nextChar
        end
        skipChar = 0
    end

    if buffered and not endOfFile then
        outResults[lineNum] = nil
        return outResults, lineStart
    end

    -- create last new field
    assignValueToField()

    -- remove last field if empty
    if fieldNum < totalColumnCount then

        -- indicates last field was really just a CRLF,
        -- so, it can be removed
        if fieldNum == 1 and field == "" then
            outResults[lineNum] = nil
        else
            error('ftcsv: too few columns in row ' .. options.rowOffset + lineNum)
        end
    end

    return outResults, i, totalColumnCount
end

local function handleHeaders(headerField, options)
    -- for files where there aren't headers!
    if options.headers == false then
        for j = 1, #headerField do
            headerField[j] = j
        end
    else
        -- make sure a header isn't empty if there are headers
        for _, headerName in ipairs(headerField) do
            if #headerName == 0 then
                error('ftcsv: Cannot parse a file which contains empty headers')
            end
        end
    end

    -- rename fields as needed!
    if options.rename then
        -- basic rename (["a" = "apple"])
        for j = 1, #headerField do
            if options.rename[headerField[j]] then
                headerField[j] = options.rename[headerField[j]]
            end
        end
        -- files without headers, but with a options.rename need to be handled too!
        if #options.rename > 0 then
            for j = 1, #options.rename do
                headerField[j] = options.rename[j]
            end
        end
    end

    -- apply some sweet header manipulation
    if options.headerFunc then
        for j = 1, #headerField do
            headerField[j] = options.headerFunc(headerField[j])
        end
    end

    return headerField
end

-- load an entire file into memory
local function loadFile(textFile, amount)
    local file = io.open(textFile, "r")
    if not file then error("ftcsv: File not found at " .. textFile) end
    local lines = file:read(amount)
    if amount == "*all" then
        file:close()
    end
    return lines, file
end

local function initializeInputFromStringOrFile(inputFile, options, amount)
    -- handle input via string or file!
    local inputString, file
    if options.loadFromString then
        inputString = inputFile
    else
        inputString, file = loadFile(inputFile, amount)
    end

    -- if they sent in an empty file...
    if inputString == "" then
        error('ftcsv: Cannot parse an empty file')
    end
    return inputString, file
end

local function determineArgumentOrder(delimiter, options)
    -- backwards compatibile layer
    if type(delimiter) == "string" then
        return delimiter, options

    -- the new format for parseLine
    elseif type(delimiter) == "table" then
        local realDelimiter = delimiter.delimiter or ","
        return realDelimiter, delimiter

    -- if nothing is specified, assume "," delimited and call it a day!
    else
        return ",", nil
    end
end

local function parseOptions(delimiter, options, fromParseLine)
    -- delimiter MUST be one character
    assert(#delimiter == 1 and type(delimiter) == "string", "the delimiter must be of string type and exactly one character")

    local fieldsToKeep = nil

    if options then

    if options.headers ~= nil then
        assert(type(options.headers) == "boolean", "ftcsv only takes the boolean 'true' or 'false' for the optional parameter 'headers' (default 'true'). You passed in '" .. tostring(options.headers) .. "' of type '" .. type(options.headers) .. "'.")
    end

    if options.rename ~= nil then
        assert(type(options.rename) == "table", "ftcsv only takes in a key-value table for the optional parameter 'rename'. You passed in '" .. tostring(options.rename) .. "' of type '" .. type(options.rename) .. "'.")
    end

    if options.fieldsToKeep ~= nil then
        assert(type(options.fieldsToKeep) == "table", "ftcsv only takes in a list (as a table) for the optional parameter 'fieldsToKeep'. You passed in '" .. tostring(options.fieldsToKeep) .. "' of type '" .. type(options.fieldsToKeep) .. "'.")
        local ofieldsToKeep = options.fieldsToKeep
        if ofieldsToKeep ~= nil then
            fieldsToKeep = {}
            for j = 1, #ofieldsToKeep do
                fieldsToKeep[ofieldsToKeep[j]] = true
            end
        end
        if options.headers == false and options.rename == nil then
            error("ftcsv: fieldsToKeep only works with header-less files when using the 'rename' functionality")
        end
    end

    if options.loadFromString ~= nil then
        assert(type(options.loadFromString) == "boolean", "ftcsv only takes a boolean value for optional parameter 'loadFromString'. You passed in '" .. tostring(options.loadFromString) .. "' of type '" .. type(options.loadFromString) .. "'.")
    end

    if options.headerFunc ~= nil then
        assert(type(options.headerFunc) == "function", "ftcsv only takes a function value for optional parameter 'headerFunc'. You passed in '" .. tostring(options.headerFunc) .. "' of type '" .. type(options.headerFunc) .. "'.")
    end

    if options.ignoreQuotes == nil then
        options.ignoreQuotes = false
    else
        assert(type(options.ignoreQuotes) == "boolean", "ftcsv only takes a boolean value for optional parameter 'ignoreQuotes'. You passed in '" .. tostring(options.ignoreQuotes) .. "' of type '" .. type(options.ignoreQuotes) .. "'.")
    end

    if fromParseLine == true then
        if options.bufferSize == nil then
            options.bufferSize = 2^16
        else
            assert(type(options.bufferSize) == "number", "ftcsv only takes a number value for optional parameter 'bufferSize'. You passed in '" .. tostring(options.bufferSize) .. "' of type '" .. type(options.bufferSize) .. "'.")
        end

    else
        if options.bufferSize ~= nil then
            error("ftcsv: bufferSize can only be specified using 'parseLine'. When using 'parse', the entire file is read into memory")
        end
    end

    else
        options = {
            ["headers"] = true,
            ["loadFromString"] = false,
            ["ignoreQuotes"] = false,
            ["bufferSize"] = 2^16
        }
    end

    return options, fieldsToKeep

end

local function findEndOfHeaders(str, entireFile)
    local i = 1
    local quote = sbyte('"')
    local newlines = {
        [sbyte("\n")] = true,
        [sbyte("\r")] = true
    }
    local quoted = false
    local char = sbyte(str, i)
    repeat
        -- this should still work for escaped quotes
        -- ex: " a "" b \r\n " -- there is always a pair around the newline
        if char == quote then
            quoted = not quoted
        end
        i = i + 1
        char = sbyte(str, i)
    until (newlines[char] and not quoted) or char == nil

    if not entireFile and char == nil then
        error("ftcsv: bufferSize needs to be larger to parse this file")
    end

    local nextChar = sbyte(str, i+1)
    if nextChar == sbyte("\n") and char == sbyte("\r") then
        i = i + 1
    end
    return i
end

local function determineBOMOffset(inputString)
    -- BOM files start with bytes 239, 187, 191
    if sbyte(inputString, 1) == 239
        and sbyte(inputString, 2) == 187
        and sbyte(inputString, 3) == 191 then
        return 4
    else
        return 1
    end
end

local function parseHeadersAndSetupArgs(inputString, delimiter, options, fieldsToKeep, entireFile)
    local startLine = determineBOMOffset(inputString)

    local endOfHeaderRow = findEndOfHeaders(inputString, entireFile)

    local parserArgs = {
        delimiter = delimiter,
        headerField = nil,
        fieldsToKeep = nil,
        inputLength = endOfHeaderRow,
        buffered = false,
        ignoreQuotes = options.ignoreQuotes,
        rowOffset = 0
    }

    local rawHeaders, endOfHeaders = parseString(inputString, startLine, parserArgs)

    -- manipulate the headers as per the options
    local modifiedHeaders = handleHeaders(rawHeaders[1], options)
    parserArgs.headerField = modifiedHeaders
    parserArgs.fieldsToKeep = fieldsToKeep
    parserArgs.inputLength = nil

    if options.headers == false then endOfHeaders = startLine end

    local finalHeaders = determineRealHeaders(modifiedHeaders, fieldsToKeep)
    if options.headers ~= false then
        local headersMetamethod = generateHeadersMetamethod(finalHeaders)
        parserArgs.headersMetamethod = headersMetamethod
    end

    return endOfHeaders, parserArgs, finalHeaders
end

-- runs the show!
function ftcsv.parse(inputFile, delimiter, options)
    local delimiter, options = determineArgumentOrder(delimiter, options)

    local options, fieldsToKeep = parseOptions(delimiter, options, false)

    local inputString = initializeInputFromStringOrFile(inputFile, options, "*all")

    local endOfHeaders, parserArgs, finalHeaders = parseHeadersAndSetupArgs(inputString, delimiter, options, fieldsToKeep, true)

    local output = parseString(inputString, endOfHeaders, parserArgs)

    return output, finalHeaders
end

local function getFileSize (file)
    local current = file:seek()
    local size = file:seek("end")
    file:seek("set", current)
    return size
end

local function determineAtEndOfFile(file, fileSize)
    if file:seek() >= fileSize then
        return true
    else
        return false
    end
end

local function initializeInputFile(inputString, options)
    if options.loadFromString == true then
        error("ftcsv: parseLine currently doesn't support loading from string")
    end
    return initializeInputFromStringOrFile(inputString, options, options.bufferSize)
end

function ftcsv.parseLine(inputFile, delimiter, userOptions)
    local delimiter, userOptions = determineArgumentOrder(delimiter, userOptions)
    local options, fieldsToKeep = parseOptions(delimiter, userOptions, true)
    local inputString, file = initializeInputFile(inputFile, options)


    local fileSize, atEndOfFile = 0, false
    fileSize = getFileSize(file)
    atEndOfFile = determineAtEndOfFile(file, fileSize)

    local endOfHeaders, parserArgs, _ = parseHeadersAndSetupArgs(inputString, delimiter, options, fieldsToKeep, atEndOfFile)
    parserArgs.buffered = true
    parserArgs.endOfFile = atEndOfFile

    local parsedBuffer, endOfParsedInput, totalColumnCount = parseString(inputString, endOfHeaders, parserArgs)
    parserArgs.totalColumnCount = totalColumnCount

    inputString = ssub(inputString, endOfParsedInput)
    local bufferIndex, returnedRowsCount = 0, 0
    local currentRow, buffer

    return function()
        -- check parsed buffer for value
        bufferIndex = bufferIndex + 1
        currentRow = parsedBuffer[bufferIndex]
        if currentRow then
            returnedRowsCount = returnedRowsCount + 1
            return returnedRowsCount, currentRow
        end

        -- read more of the input
        buffer = file:read(options.bufferSize)
        if not buffer then
            file:close()
            return nil
        else
            parserArgs.endOfFile = determineAtEndOfFile(file, fileSize)
        end

        -- appends the new input to what was left over
        inputString = inputString .. buffer

        -- re-analyze and load buffer
        parserArgs.rowOffset = returnedRowsCount
        parsedBuffer, endOfParsedInput = parseString(inputString, 1, parserArgs)
        bufferIndex = 1

        -- cut the input string down
        inputString = ssub(inputString, endOfParsedInput)

        if #parsedBuffer == 0 then
            error("ftcsv: bufferSize needs to be larger to parse this file")
        end

        returnedRowsCount = returnedRowsCount + 1
        return returnedRowsCount, parsedBuffer[bufferIndex]
    end
end



-- The ENCODER code is below here
-- This could be broken out, but is kept here for portability


local function delimitField(field)
    field = tostring(field)
    if field:find('"') then
        return field:gsub('"', '""')
    else
        return field
    end
end

local function generateDelimitAndQuoteField(delimiter)
    local generatedFunction = function(field)
        field = tostring(field)
        if field:find('"') then
            return '"' .. field:gsub('"', '""') .. '"'
        elseif field:find('[\n' .. delimiter .. ']') then
            return '"' .. field .. '"'
        else
            return field
        end
    end
    return generatedFunction
end

local function escapeHeadersForLuaGenerator(headers)
    local escapedHeaders = {}
    for i = 1, #headers do
        if headers[i]:find('"') then
            escapedHeaders[i] = headers[i]:gsub('"', '\\"')
        else
            escapedHeaders[i] = headers[i]
        end
    end
    return escapedHeaders
end

-- a function that compiles some lua code to quickly print out the csv
local function csvLineGenerator(inputTable, delimiter, headers, options)
    local escapedHeaders = escapeHeadersForLuaGenerator(headers)

    local outputFunc = [[
        local args, i = ...
        i = i + 1;
        if i > ]] .. #inputTable .. [[ then return nil end;
        return i, '"' .. args.delimitField(args.t[i]["]] ..
            table.concat(escapedHeaders, [["]) .. '"]] ..
            delimiter .. [["' .. args.delimitField(args.t[i]["]]) ..
            [["]) .. '"\r\n']]

    if options and options.onlyRequiredQuotes == true then
        outputFunc = [[
            local args, i = ...
            i = i + 1;
            if i > ]] .. #inputTable .. [[ then return nil end;
            return i, args.delimitField(args.t[i]["]] ..
                table.concat(escapedHeaders, [["]) .. ']] ..
                delimiter .. [[' .. args.delimitField(args.t[i]["]]) ..
                [["]) .. '\r\n']]
    end

    local arguments = {}
    arguments.t = inputTable
    -- we want to use the same delimitField throughout,
    -- so we're just going to pass it in
    if options and options.onlyRequiredQuotes == true then
        arguments.delimitField = generateDelimitAndQuoteField(delimiter)
    else
        arguments.delimitField = delimitField
    end

    return luaCompatibility.load(outputFunc), arguments, 0

end

local function validateHeaders(headers, inputTable)
    for i = 1, #headers do
        if inputTable[1][headers[i]] == nil then
            error("ftcsv: the field '" .. headers[i] .. "' doesn't exist in the inputTable")
        end
    end
end

local function initializeOutputWithEscapedHeaders(escapedHeaders, delimiter, options)
    local output = {}
    if options and options.onlyRequiredQuotes == true then
        output[1] = table.concat(escapedHeaders, delimiter) .. '\r\n'
    else
        output[1] = '"' .. table.concat(escapedHeaders, '"' .. delimiter .. '"') .. '"\r\n'
    end
    return output
end

local function escapeHeadersForOutput(headers, delimiter, options)
    local escapedHeaders = {}
    local delimitField = delimitField
    if options and options.onlyRequiredQuotes == true then
        delimitField = generateDelimitAndQuoteField(delimiter)
    end
    for i = 1, #headers do
        escapedHeaders[i] = delimitField(headers[i])
    end

    return escapedHeaders
end

local function extractHeadersFromTable(inputTable)
    local headers = {}
    for key, _ in pairs(inputTable[1]) do
        headers[#headers+1] = key
    end

    -- lets make the headers alphabetical
    table.sort(headers)

    return headers
end

local function getHeadersFromOptions(options)
    local headers = nil
    if options then
        if options.fieldsToKeep ~= nil then
            assert(
                type(options.fieldsToKeep) == "table", "ftcsv only takes in a list (as a table) for the optional parameter 'fieldsToKeep'. You passed in '" .. tostring(options.headers) .. "' of type '" .. type(options.headers) .. "'.")
            headers = options.fieldsToKeep
        end
    end
    return headers
end

local function initializeGenerator(inputTable, delimiter, options)
    -- delimiter MUST be one character
    assert(#delimiter == 1 and type(delimiter) == "string", "the delimiter must be of string type and exactly one character")

    local headers = getHeadersFromOptions(options)
    if headers == nil then
        headers = extractHeadersFromTable(inputTable)
    end
    validateHeaders(headers, inputTable)

    local escapedHeaders = escapeHeadersForOutput(headers, delimiter, options)
    local output = initializeOutputWithEscapedHeaders(escapedHeaders, delimiter, options)
    return output, headers
end

-- works really quickly with luajit-2.1, because table.concat life
function ftcsv.encode(inputTable, delimiter, options)
    local delimiter, options = determineArgumentOrder(delimiter, options)
    local output, headers = initializeGenerator(inputTable, delimiter, options)

    for i, line in csvLineGenerator(inputTable, delimiter, headers, options) do
        output[i+1] = line
    end

    -- combine and return final string
    return table.concat(output)
end

return ftcsv