Saving file after getBytes()

Hi, I am having problems with saving a PDF file after doing some manipulations:

let fm = FileManager.local()
let bm = fm.bookmarkedPath("Downloads")
let ar = fm.read(bm+"/input.pdf").getBytes()  // Typed Array

// Some manipulations here, e.g.
// ar[0] = 21;

fm.write(bm+"/output.pdf", ar)  // fails because 'ar' is not 'Data' type 
fm.writeString(bm+"/output.pdf", ar)  // fails because 'ar' is not a string
fm.writeString(bm+"/output.pdf", ar.toRawString())  // fails because toRawString() does not work with typed arrays
fm.writeString(bm+"/output.pdf", ar2st_1((ar))  // fails with 'URIError: URI error'
fm.writeString(bm+"/output.pdf", ar2st_2((ar))  // works but output.pdf is corrupt

// Function from: https://github.com/dchest/tweetnacl-util-js/blob/master/nacl-util.js
function ar2st_1(arr) {
    let i, s = [];
    for (i = 0; i < arr.length; i++) s.push(String.fromCharCode(arr[i]));
    return decodeURIComponent(escape(s.join('')));
}

 // Try skipping the UTF trick: http://ecmanaut.blogspot.com/2006/07/encoding-decoding-utf8-in-javascript.html
function ar2st_2(arr) {
    let i, s = [];
    for (i = 0; i < arr.length; i++) s.push(String.fromCharCode(arr[i]));
   return s.join('');
}

I guess my question boils down to how to best convert the typed array representation of the binary PDF data to Scriptable ‘Data’ type and save it subsequently to a file?

1 Like

It looks like maybe the only way to create Data from “raw” bytes is Data.fromBase64String.

btoa: Base64 encoding

In browsers, the btoa (MDN) function (Binary to (Base64) Ascii) is the built-in way to do Base64 encoding. Its input format is a bit wonky (a string of code points between U+0 and U+FF, representing byte values), but your functions already produce what it needs (the for loop through the .join('')). btoa takes this input string and produces the Base64 encoding of the series of represented bytes.


Scriptable has a btoa function, but I ran into a problem when I tried to use it to “round-trip” full-range byte data.

testB64('Scriptable btoa U+41', () => btoa('A'), 'QQ==', [65]); // ok
testB64('Scriptable btoa U+80', () => btoa('\u{80}'), 'gA==', [128]); // woA= [194,128]
testB64('Scriptable btoa U+100', () => btoa('\u{100}'), /* no expected values, should throw */); // xIA= [196,128]
testB64
/**
 * @param {string} m
 * @param {()=>string} fn
 * @param {string|undefined} eB64
 * @param {number[]|undefined} eBytes
 */
function testB64(m, fn, eB64 = undefined, eBytes = undefined) {
  let b64;
  try {
    b64 = fn();
  } catch (e) {
    if (eB64)
      console.error(`ERROR: ${m} expected ${eB64}, but threw error ${String(e)}`);
    else
      console.log(`OK: ${m} threw error ${String(e)}`);
    return;
  }
  const b64Ok = b64 == eB64;

  if (!b64Ok)
    console.error(`ERROR: ${m}: expected ${eB64 ?? 'an error'}, got ${b64}`);

  const bytes = Data.fromBase64String(b64).getBytes();
  const bytesOk = eBytes && eq(eBytes, bytes)
  if (!bytesOk)
    console.error(`ERROR: ${m}: expected ${eBytes ? `[${eBytes}]` : 'an error'}, got [${bytes}]`);

  if (b64Ok && bytesOk)
    console.log(`OK: ${m}`)

  /**
   * @param {number[]} a
   * @param {number[]} b
   */
  function eq(a, b) {
    if (a.length != b.length) return false;
    return a.every((v, i) => v == b[i]);
  }
}

It looks like the problem is that the input string is encoded as UTF-8 before being Base64 encoded. In my testing, any code point higher than U+7F is represented by multiple bytes (that match its UTF-8 encoding), and code points over U+FF are not rejected like they “should” be (browsers and Node btoa throw an error).

WebView to the rescue?

If we ship the data over to a WebView its btoa works as expected, but we have to do it asynchronously.

const wv = new WebView;
/**
 * @param {string} s
 * @returns {Promise<string>}
 */
function webBtoa(s) {
  return wv.evaluateJavaScript(`btoa(${JSON.stringify(s)})`);
}

await testAsyncB64('WebView btoa U+41', () => webBtoa('A'), 'QQ==', [65]);
await testAsyncB64('WebView btoa U+80', () => webBtoa('\u{80}'), 'gA==', [128]);
await testAsyncB64('WebView btoa U+100', () => webBtoa('\u{100}'), /* no expected values, should throw */);
testB64 and testAsyncB64
/**
 * @param {string} m
 * @param {()=>string} fn
 * @param {string|undefined} eB64
 * @param {number[]|undefined} eBytes
 */
function testB64(m, fn, eB64 = undefined, eBytes = undefined) {
  let b64;
  try {
    b64 = fn();
  } catch (e) {
    if (eB64)
      console.error(`ERROR: ${m} expected ${eB64}, but threw error ${String(e)}`);
    else
      console.log(`OK: ${m} threw error ${String(e)}`);
    return;
  }
  const b64Ok = b64 == eB64;

  if (!b64Ok)
    console.error(`ERROR: ${m}: expected ${eB64 ?? 'an error'}, got ${b64}`);

  const bytes = Data.fromBase64String(b64).getBytes();
  const bytesOk = eBytes && eq(eBytes, bytes)
  if (!bytesOk)
    console.error(`ERROR: ${m}: expected ${eBytes ? `[${eBytes}]` : 'an error'}, got [${bytes}]`);

  if (b64Ok && bytesOk)
    console.log(`OK: ${m}`)

  /**
   * @param {number[]} a
   * @param {number[]} b
   */
  function eq(a, b) {
    if (a.length != b.length) return false;
    return a.every((v, i) => v == b[i]);
  }
}
/**
 * @param {string} m
 * @param {()=>Promise<string>} fn
 * @param {string|undefined} eB64
 * @param {number[]|undefined} eBytes
 * @returns {Promise<void>}
 */
async function testAsyncB64(m, fn, eB64 = undefined, eBytes = undefined) {
  try {
    const b64 = await fn();
    return void testB64(m, () => b64, eB64, eBytes);
  } catch (e) {
    if (eB64)
      console.error(`ERROR: ${m} expected ${eB64}, but threw error ${String(e)}`);
    else
      console.log(`OK: ${m} threw error ${String(e)}`);
    return;
  }
}

Here is an ar2data function and tests that show it encodes (with WebView btoa) and decodes (with Data .getBytes) arbitrary byte data properly:

/**
 * @param {number[]} ar
 * @returns {Promise<Data>}
 */
async function ar2data(ar) {
  return Data.fromBase64String(await webBtoa(ar.map(b => String.fromCodePoint(b)).join('')));
}

// const ar = Data.fromFile(pathname).getBytes()
const ar = new Array(256).fill(0).map((b, i) => i); // fake data: 0-255 values
const rt = (await ar2data(ar)).getBytes();

showEq('round-tripped 0-255', ar, rt);

const arPlus = ar.map(b => (b * 3) % 256); // all 0-255 byte values, but different order
const rtPlus = (await ar2data(arPlus)).getBytes();

showEq('round-tripped (0-255)*3 mod 256', arPlus, rtPlus);
webBtoa and showEq
const wv = new WebView;
/**
 * @param {string} s
 * @returns {Promise<string>}
 */
function webBtoa(s) {
  return wv.evaluateJavaScript(`btoa(${JSON.stringify(s)})`);
}
/**
 * @param {string} m
 * @param {number[]} a
 * @param {number[]} b
 */
function showEq(m, a, b) {
  if (a.length != b.length)
    return void console.error(`${m} length mismatch: ${a.length} vs ${b.length}`)

  for (let i = 0; i < a.length; i++)
    if (a[i] != b[i])
      return void console.error(`${m} mismatch at ${i}: ${a[i]} vs ${b[i]}`);

  console.log(`${m} ok`);
}

Once you have a Data object from (e.g.) ar2data, you should be able to write it to a file with a FileManager like your first .write call. I was able to generate Data from my “all bytes” blobs, write them to a temporary file, and verified that I got the same byte sequences back after reading into a new Data object from FileManager.

Copy/Re-implement btoa?

If you don’t want to deal with asynchronous code, or maybe using a WebView is too much overhead, you could probably integrate a “pure JS” btoa implementation. npm.js shows several under btoa or base64. MDN links to btoa in core-js that looks like it wouldn’t be too hard to rework into a standalone function. Just be sure to follow any licensing that applies.

Typed Array

Oh, and you mentioned Typed Arrays. It looks like .getBytes() returns a plain array, not the Uint8Array. This probably doesn’t matter in most cases. Your for loops would work fine with Type Arrays, but my use of .map to convert byte values to strings would fail (Typed Array .map produces a new Typed Array, which can’t hold strings; Array.from() could help by creating a new plain array from the typed one).

const a = [];
console.log(`( [] ) gives ${a.constructor.name}`); // Array
const a2 = new Array;
console.log(`( new Array ) gives ${a2.constructor.name}`); // Array
const b = Data.fromString('').getBytes();
console.log(`( [Data obj].getBytes() ) gives ${b.constructor.name}`); // Array
const u8 = new Uint8Array;
console.log(`( new Uint8Array ) gives ${u8.constructor.name}`); // Uint8Array
3 Likes

@ChrisJ, Thank you so much for the comprehensive answer. For anybody looking to solve similar problems, I ended up using this function.

// Minified https://gist.github.com/jonleighton/958841
function ar2b64(r){for(var e,n="",t="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/",a=new Uint8Array(r),f=a.byteLength,u=f%3,y=f-u,b=0;b<y;b+=3)n+=t[(16515072&(e=a[b]<<16|a[b+1]<<8|a[b+2]))>>18]+t[(258048&e)>>12]+t[(4032&e)>>6]+t[63&e];return 1==u?n+=t[(252&(e=a[y]))>>2]+t[(3&e)<<4]+"==":2==u&&(n+=t[(64512&(e=a[y]<<8|a[y+1]))>>10]+t[(1008&e)>>4]+t[(15&e)<<2]+"="),n}
let fm = FileManager.local()
let bm = fm.bookmarkedPath("Downloads")
let ar = fm.read(bm+"/input.pdf").getBytes()  // Array

// Some manipulations here, e.g. ar[0] = 21

ar = Data.fromBase64String(ar2b64(ar))
fm.write(bm+"/output.pdf", ar) 

Unfortunately, using this strategy Scriptable crashes when processing larger files. I guess having a native array-to-Data function in the Data API would be more efficient. Apparently, this has been discussed before.

@simonbs, it would be really great to be able to reverse Data.getBytes() directly using a Scriptable function, but I do understand that there are probably many other more pressing issues on your list.

EDIT: For anyone reading this in 2024, since v1.7.14 Scriptable has now the Data.fromBytes() function that sorts this out neatly!