qiniuFileHash.js 6.57 KB
import Rusha from 'rusha'
/**
 * 完全遵循七牛云对象存储(Qiniu Kodo)的 ETag 计算规则,在浏览器中计算文件 ETag。
 *
 * @param {File} file 用户通过 <input> 选择的 File 对象
 * @param {(progress: { computed: number, total: number, percent: number }) => void} [progressCallback] 计算进度的回调函数
 * @returns {Promise<string>} 一个 Promise,最终解析为计算出的 eTag 字符串
 */
export const qiniuFileHash = async(file, progressCallback) => {
  // --- 辅助函数 ---

  // 1. SHA-1 计算
  const createSha1 = window.crypto && window.crypto.subtle
    ? (data) => window.crypto.subtle.digest('SHA-1', data) // 使用 Web Crypto API
    : (data) => Rusha.createHash().update(data).digest(); // https://github.com/srijs/rusha

  // 2. 拼接 ArrayBuffer
  const concatArrayBuffers = (buffers) => {
    const totalLength = buffers.reduce((acc, b) => acc + b.byteLength, 0);
    const result = new Uint8Array(totalLength);
    let offset = 0;
    for (const buffer of buffers) {
      result.set(new Uint8Array(buffer), offset);
      offset += buffer.byteLength;
    }
    return result.buffer;
  };

  // 3. ArrayBuffer 到 URL 安全 Base64 的转换
  // 使用一个更健壮的 Base64 编码函数,避免大文件时出现栈溢出
  const urlSafeBase64Encode = (buffer) => {
    let binary = '';
    const bytes = new Uint8Array(buffer);
    // 对于小数据量 (21字节),这种方式性能足够,且比 fromCharCode.apply 更安全
    for (let i = 0; i < bytes.byteLength; i++) {
      binary += String.fromCharCode(bytes[i]);
    }
    return btoa(binary).replace(/\//g, '_').replace(/\+/g, '-');
  };

  // 4. 带节流的进度回调包装器
  let lastUpdateTime = 0;
  const throttleInterval = 100; // 每 100ms 更新一次进度

  const throttledProgressCallback = (progress) => {
    if (!progressCallback) return;

    const now = Date.now();
    // 对于最后 100% 的进度,我们总是希望它被立即调用,以确保状态最终正确。
    if (progress.percent === 100) {
      setTimeout(() => progressCallback(progress), 0);
      return;
    }

    if (now - lastUpdateTime > throttleInterval) {
      lastUpdateTime = now;
      // 使用 setTimeout 解耦,防止阻塞
      setTimeout(() => progressCallback(progress), 0);
    }
  };

  // --- 主逻辑 ---

  if (file.size === 0) {
    throttledProgressCallback({ computed: 0, total: 0, percent: 100 });
    return 'Fto5o-5ea0sNMlW_75VgGJCv2AcJ';
  }

  const blockSize = 4 * 1024 * 1024; // 4MB

  // --- 1. 小文件处理 (小于等于4MB) ---
  if (file.size <= blockSize) {
    const fileBuffer = await file.arrayBuffer();
    const sha1Buffer = await createSha1(fileBuffer);
    const prefix = new Uint8Array([0x16]);
    const finalBuffer = concatArrayBuffers([prefix.buffer, sha1Buffer]);
    const hash = urlSafeBase64Encode(finalBuffer);
    throttledProgressCallback({ computed: file.size, total: file.size, percent: 100 });
    return hash;
  }

  // --- 2. 大文件处理 (需要分块) ---
  const sha1Results = []; // 存放每个完整块的 SHA-1 结果
  let computed = 0;

  // 尝试使用高性能的 BYOB 流模式
  let reader;
  try {
    const stream = file.stream();
    reader = stream.getReader({ mode: 'byob' });
  } catch (error) {
    // console.warn("BYOB reader not supported, falling back to slice() mode.", error);
  }

  if (reader) {
    // --- 2a. 高性能 BYOB 流模式 ---
    let buffer = new Uint8Array(blockSize); // 我们需要一个缓冲区来累积数据,直到达到 4MB
    let offset = 0; // 当前缓冲区已填充的数据量

    while (true) {
      // BYOB 读取器需要一个视图 (view) 来写入数据
      // 我们让它写入到我们累积缓冲区的剩余空间
      // 每次都基于当前 buffer 和 offset 创建 view
      const view = new Uint8Array(buffer.buffer, offset, buffer.byteLength - offset);
      const { done, value } = await reader.read(view);

      // 恢复对 buffer 的引用,因为 read() 后它可能被转移 (detached)
      buffer = new Uint8Array(value.buffer);

      if (done) {
        // 文件读取完毕,处理最后一个不满 4MB 的块
        if (offset > 0) {
          const finalChunkView = new Uint8Array(buffer.buffer, 0, offset);
          const chunkSha1 = await createSha1(finalChunkView);
          // 最后一次计算文件分片时,不给计算进度,在所有计算都完成后,在给出计算进度
          computed += finalChunkView.byteLength;
          sha1Results.push(chunkSha1);
        }
        break;
      }

      // 更新偏移量,value.byteLength 是本次实际读取到的字节数
      offset += value.byteLength;

      // 检查缓冲区是否已满
      if (offset === blockSize) {
        // 缓冲区满了,计算整个块的 SHA-1
        const chunkSha1 = await createSha1(buffer);
        computed += buffer.byteLength;
        throttledProgressCallback({ computed: computed, total: file.size, percent: computed / file.size * 100 });
        sha1Results.push(chunkSha1);
        offset = 0; // 重置偏移,复用 buffer
      }
    }
    reader.releaseLock(); // 释放流的锁
  }
  else {
    // --- 2b. 回退到 slice() 并行模式 ---
    const blockCount = Math.ceil(file.size / blockSize);
    const promises = [];

    for (let i = 0; i < blockCount; i++) {
      const start = i * blockSize;
      const end = Math.min(start + blockSize, file.size);
      const chunk = file.slice(start, end);

      const promise = (async () => {
        const buffer = await chunk.arrayBuffer();
        const chunkSha1 = await createSha1(buffer);
        computed += buffer.byteLength;
        if (computed < file.size) {
          // 最后一次计算文件分片时,不给计算进度,在所有计算都完成后,在给出计算进度
          throttledProgressCallback({ computed: computed, total: file.size, percent: computed / file.size * 100 });
        }
        return chunkSha1;
      })();
      promises.push(promise);
    }

    // 使用 Promise.all 并行执行所有块的读取和 SHA-1 计算
    const resolvedSha1s = await Promise.all(promises);
    sha1Results.push(...resolvedSha1s);
  }

  // --- 3. 最终计算 ---
  // 所有块的 SHA-1 计算完毕,进行最终的合并计算
  const concatenatedSha1s = concatArrayBuffers(sha1Results);
  const finalSha1 = await createSha1(concatenatedSha1s);
  const prefix = new Uint8Array([0x96]);
  const finalBuffer = concatArrayBuffers([prefix.buffer, finalSha1]);

  const hash = urlSafeBase64Encode(finalBuffer);
  throttledProgressCallback({ computed: file.size, total: file.size, percent: 100 });
  return hash;
}