// Usage
/*
  Usage: run the code below in the context of the page, or by injecting another a tag pointing to this file:
      <script src="src/example_browsertrix_driver.js"></script>

  It uses window.Browsertrix, a hypothetical API that would be provided by the Browsertrix extension.
*/

const sleep = (ms) => new Promise(resolve => setTimeout(resolve, ms));

const BrowsertrixInBrowserCrawlDriver = {
    name: 'BrowsertrixInBrowserCrawlDriver',
    schema: 'BehaviorDriverSchema@0.1.0',

    state: {
        warc_file: null,
    },

    hooks: {
        browser: {
            FS_WRITE_FILE: async (event, BehaviorBus, page) => {
                const opfsRoot = await window.navigator.storage.getDirectory();
                const fileHandle = await opfsRoot.getFileHandle("fast", { create: true });
                const accessHandle = await fileHandle.createSyncAccessHandle();
                accessHandle.write(content); accessHandle.flush(); accessHandle.close();
                
                await window.Browsertrix.addExtraFileToWarc(path, accessHandle);
            },
            DISCOVERED_OUTLINK: async (event, BehaviorBus, page) => {
                await window.Browsertrix.addLinkToCrawlQueue(event.url);
            },
            DISCOVERED_TEXT: async (event, BehaviorBus, page) => {
                await window.Browsertrix.addTextToSearchIndex(event.text);
            },
            PAGE_CAPTURE_COMPLETE: async (event, BehaviorBus, page) => {
                BrowsertrixCrawlDriver.state.warc_file = await window.Browsertrix.saveWarc();
            },
        },
        serviceworker: {
            PAGE_CAPTURE_COMPLETE: async (event, BehaviorBus, page) => {
                await window.Browsertrix.uploadWarc(BrowsertrixCrawlDriver.state.warc_file);
            },
        }
    },
}

const crawlInBrowsertrixInBrowser = async (url, behaviors) => {
    // navigate to the url we want to archive in the browser
    window.location.href = url;

    // inject the behavior_bus.js implementation + example_behaviors.js into the page
    const behavior_bus_tag = document.createElement("script");
    behavior_bus_tag.src = "src/behavior_bus.js";
    document.head.appendChild(behavior_bus_tag);
    const behaviors_tag = document.createElement("script");
    behaviors_tag.src = "src/example_behaviors.js";
    document.head.appendChild(behaviors_tag);

    // initialize the WindowBehaviorBus bus
    const BehaviorBus = window.initWindowBehaviorBus([BrowsertrixInBrowserCrawlDriver, ...behaviors]);
    BehaviorBus.emit({type: 'PAGE_SETUP', url}, {path: [BrowsertrixInBrowserCrawlDriver.name]})

    // run the page lifecycle events
    window.addEventListener('load', async () => {
        BehaviorBus.emit({type: 'PAGE_LOAD', url: window.location.href}, {path: [BrowsertrixInBrowserCrawlDriver.name]})
        await sleep(5000);
        BehaviorBus.emit({type: 'PAGE_CAPTURE', url: window.location.href}, {path: [BrowsertrixInBrowserCrawlDriver.name]})
        await sleep(5000);
        BehaviorBus.emit({type: 'PAGE_CAPTURE_COMPLETE', url: window.location.href}, {path: [BrowsertrixInBrowserCrawlDriver.name]})
    });
}

// run the example
await crawlInBrowsertrixInBrowser('https://example.com', window.BEHAVIORS);