import { OnTranscriptEvent, RecordingState, SpeechToText } from '../speech-to-text';

export type WebkitSpeechToTextConfig = {
  onTranscript: OnTranscriptEvent;
  silenceDuration: number;
  shortSentenceLength: number;
  shortSentenceSilenceFactor: number;
  deviceId?: string;
};

type WebkitSpeechContext = {
  primarySilenceDuration: number;
  secondarySilenceDuration: number;
  onTranscript: OnTranscriptEvent;
  primarySilenceTimer: NodeJS.Timeout | undefined;
  secondarySilenceTimer: NodeJS.Timeout | undefined;
  shortSentenceLength: number;
  recognition: SpeechRecognition;
  recordingState: RecordingState;
  restartOnEnd: boolean;
  result: string | undefined;
};


const countWords = (s: string | undefined): number => (s) ? s.split(/\s+/).length : 0;

const stopSilenceTimer = (context: WebkitSpeechContext) => {
  if (context.primarySilenceTimer !== undefined) {
    clearTimeout(context.primarySilenceTimer);
    context.primarySilenceTimer = undefined;

  } else if (context.secondarySilenceTimer !== undefined) {
    clearTimeout(context.secondarySilenceTimer);
    context.secondarySilenceTimer = undefined;
  }
};

const startSilenceTimer = (context: WebkitSpeechContext, handler: () => void) => {
  stopSilenceTimer(context);

  // Timeout for the minimum period of silence
  context.primarySilenceTimer = setTimeout(() => {

      // If we have a short sentence, then we need to wait for a secondary silence period.
      if (context.secondarySilenceDuration && countWords(context.result) <= context.shortSentenceLength) {
        context.primarySilenceTimer = undefined;
        context.secondarySilenceTimer = setTimeout(handler, context.secondarySilenceDuration);
      } else {
        handler();
      }
  }, context.primarySilenceDuration);
};

const onRecognitionResult = async (context: WebkitSpeechContext, event: SpeechRecognitionEvent) => {
  if (context.recordingState === RecordingState.RECORDING) {
    if (event.results.length > 0) {
      const speech: string[] = [];
      for (let i = 0; i < event.results.length; ++i) {
        const result = event.results.item(i);
        const item = result.item(0).transcript;
        if (item && item.length > 0) {
          speech.push(item);
        }
      }
      if (speech.length > 0) {
        context.result = speech.join(' ');
        context.onTranscript(context.result, false);
      }
    }

    // The only exit is from the silence timer
    startSilenceTimer(context, async () => {
      await stopRecognition(context, RecordingState.PAUSED);
    });
  }
};

// As we are in continuous mode, the end should never be closed unless we stop it explictly.
// If the restart flag is set, then that's what we do.
const onRecognitionEnd = async (context: WebkitSpeechContext) => {
    if (context.restartOnEnd) {
      context.restartOnEnd = false;
      await startRecognition(context, RecordingState.RECORDING);
    }
};

const onRecognitionError = async (context: WebkitSpeechContext, event: SpeechRecognitionErrorEvent) => {
  if (event.error === 'no-speech') {
    // We need to restart the recognition if we have no-speech detected. This is a quirk
    // of the recognition API.
    switch (context.recordingState) {
      case RecordingState.RECORDING:
        context.restartOnEnd = true;
        stopSilenceTimer(context);
        context.recognition.stop();
    }
  }
};

const startRecognition  = (speechContext: WebkitSpeechContext, state: RecordingState): Promise<void> => {
  return new Promise((resolve, reject) => {
    speechContext.result = undefined;
    try {
      speechContext.recognition.onresult = (event: SpeechRecognitionEvent) => onRecognitionResult(speechContext, event);
      speechContext.recognition.onend = () => onRecognitionEnd(speechContext);
      speechContext.recognition.onerror = (event: SpeechRecognitionErrorEvent) => onRecognitionError(speechContext, event);
      speechContext.recognition.start();
      speechContext.recordingState = state;
      resolve();
    } catch (e) {
      reject(e);
    }
  });
};

const stopRecognition  = async (speechContext: WebkitSpeechContext, state: RecordingState): Promise<void> => {
  return new Promise((resolve, reject) => {
    try {
      speechContext.recordingState = state;
      speechContext.recognition.onerror = null;
      speechContext.recognition.onend = null;
      speechContext.recognition.onresult = null;
      stopSilenceTimer(speechContext);
      speechContext.recognition.stop();
      if (speechContext.result) {
        speechContext.onTranscript(speechContext.result, true);
        speechContext.result = undefined;
      }
      resolve();
    } catch (e) {
      reject(e);
    }
  });
};


/**
 * An implementation of the Webkit Speech Recognition.
 *
 * @author Jason Waring
 */
export class WebkitSpeechToText implements SpeechToText {
  private speechContext: WebkitSpeechContext;

  constructor(config: WebkitSpeechToTextConfig) {
    const recognition = new (window.SpeechRecognition || window.webkitSpeechRecognition)();
    this.speechContext = {
      primarySilenceDuration: config.silenceDuration,
      secondarySilenceDuration: config.silenceDuration * config.shortSentenceSilenceFactor - config.silenceDuration,
      onTranscript: config.onTranscript,
      primarySilenceTimer: undefined,
      secondarySilenceTimer: undefined,
      shortSentenceLength: config.shortSentenceLength,
      restartOnEnd: false,
      recognition: recognition,
      recordingState: RecordingState.NOT_RECORDING,
      result: undefined
    };

    recognition.continuous = true;
    recognition.interimResults = true;
  }

  async start(): Promise<void> {
    switch (this.speechContext.recordingState) {
      case RecordingState.NOT_RECORDING:
      case RecordingState.PAUSED:
        return startRecognition(this.speechContext, RecordingState.RECORDING);

      case RecordingState.RECORDING:
        return Promise.resolve();

      case RecordingState.UNAVAILABLE:
        return Promise.reject('The recorder is not available.');
    }
  }

  async pause(): Promise<void> {
    switch (this.speechContext.recordingState) {
      case RecordingState.NOT_RECORDING:
      case RecordingState.PAUSED:
        return Promise.resolve();

      case RecordingState.RECORDING:
        return stopRecognition(this.speechContext, RecordingState.PAUSED);

      case RecordingState.UNAVAILABLE:
        return Promise.reject('The recorder is not available.');
    }
  }

  async resume(): Promise<void> {
    switch (this.speechContext.recordingState) {
      case RecordingState.NOT_RECORDING:
      case RecordingState.PAUSED:
        return startRecognition(this.speechContext, RecordingState.RECORDING);

      case RecordingState.RECORDING:
        return Promise.resolve();

      case RecordingState.UNAVAILABLE:
        return Promise.reject('The recorder is not available.');
    }
  }

  async stop(): Promise<void> {
    switch (this.speechContext.recordingState) {
      case RecordingState.NOT_RECORDING:
        return Promise.resolve();

      case RecordingState.PAUSED:
      case RecordingState.RECORDING:
        return stopRecognition(this.speechContext, RecordingState.NOT_RECORDING);

      case RecordingState.UNAVAILABLE:
        return Promise.reject('The recorder is not available.');
    }
  }

  getState(): Promise<RecordingState> {
    return Promise.resolve(this.speechContext.recordingState);
  }

  onTranscript(transcriptionHandler: OnTranscriptEvent) {
    this.speechContext.onTranscript = transcriptionHandler;
  }
}
