| 1 |
- (()=>{"use strict";var e={428:(e,t,s)=>{Object.defineProperty(t,"__esModule",{value:!0}),t.FrameProcessor=t.validateOptions=t.defaultFrameProcessorOptions=void 0;const i=s(294),r=s(842),o=[512,1024,1536];t.defaultFrameProcessorOptions={positiveSpeechThreshold:.5,negativeSpeechThreshold:.35,preSpeechPadFrames:1,redemptionFrames:8,frameSamples:1536,minSpeechFrames:3,submitUserSpeechOnPause:!1},t.validateOptions=function(e){o.includes(e.frameSamples)||r.log.warn("You are using an unusual frame size"),(e.positiveSpeechThreshold<0||e.negativeSpeechThreshold>1)&&r.log.error("postiveSpeechThreshold should be a number between 0 and 1"),(e.negativeSpeechThreshold<0||e.negativeSpeechThreshold>e.positiveSpeechThreshold)&&r.log.error("negativeSpeechThreshold should be between 0 and postiveSpeechThreshold"),e.preSpeechPadFrames<0&&r.log.error("preSpeechPadFrames should be positive"),e.redemptionFrames<0&&r.log.error("preSpeechPadFrames should be positive")};const n=e=>{const t=e.reduce(((e,t)=>(e.push(e.at(-1)+t.length),e)),[0]),s=new Float32Array(t.at(-1));return e.forEach(((e,i)=>{const r=t[i];s.set(e,r)})),s};t.FrameProcessor=class{constructor(e,t,s){this.modelProcessFunc=e,this.modelResetFunc=t,this.options=s,this.speaking=!1,this.redemptionCounter=0,this.active=!1,this.reset=()=>{this.speaking=!1,this.audioBuffer=[],this.modelResetFunc(),this.redemptionCounter=0},this.pause=()=>(this.active=!1,this.options.submitUserSpeechOnPause?this.endSegment():(this.reset(),{})),this.resume=()=>{this.active=!0},this.endSegment=()=>{const e=this.audioBuffer;this.audioBuffer=[];const t=this.speaking;this.reset();const s=e.reduce(((e,t)=>e+ +t.isSpeech),0);if(t){if(s>=this.options.minSpeechFrames){const t=n(e.map((e=>e.frame)));return{msg:i.Message.SpeechEnd,audio:t}}return{msg:i.Message.VADMisfire}}return{}},this.process=async e=>{if(!this.active)return{};const t=await this.modelProcessFunc(e);if(this.audioBuffer.push({frame:e,isSpeech:t.isSpeech>=this.options.positiveSpeechThreshold}),t.isSpeech>=this.options.positiveSpeechThreshold&&this.redemptionCounter&&(this.redemptionCounter=0),t.isSpeech>=this.options.positiveSpeechThreshold&&!this.speaking)return this.speaking=!0,{probs:t,msg:i.Message.SpeechStart};if(t.isSpeech<this.options.negativeSpeechThreshold&&this.speaking&&++this.redemptionCounter>=this.options.redemptionFrames){this.redemptionCounter=0,this.speaking=!1;const e=this.audioBuffer;if(this.audioBuffer=[],e.reduce(((e,t)=>e+ +t.isSpeech),0)>=this.options.minSpeechFrames){const s=n(e.map((e=>e.frame)));return{probs:t,msg:i.Message.SpeechEnd,audio:s}}return{probs:t,msg:i.Message.VADMisfire}}if(!this.speaking)for(;this.audioBuffer.length>this.options.preSpeechPadFrames;)this.audioBuffer.shift();return{probs:t}},this.audioBuffer=[],this.reset()}}},14:function(e,t,s){var i=this&&this.__createBinding||(Object.create?function(e,t,s,i){void 0===i&&(i=s);var r=Object.getOwnPropertyDescriptor(t,s);r&&!("get"in r?!t.__esModule:r.writable||r.configurable)||(r={enumerable:!0,get:function(){return t[s]}}),Object.defineProperty(e,i,r)}:function(e,t,s,i){void 0===i&&(i=s),e[i]=t[s]}),r=this&&this.__setModuleDefault||(Object.create?function(e,t){Object.defineProperty(e,"default",{enumerable:!0,value:t})}:function(e,t){e.default=t}),o=this&&this.__importStar||function(e){if(e&&e.__esModule)return e;var t={};if(null!=e)for(var s in e)"default"!==s&&Object.prototype.hasOwnProperty.call(e,s)&&i(t,e,s);return r(t,e),t},n=this&&this.__exportStar||function(e,t){for(var s in e)"default"===s||Object.prototype.hasOwnProperty.call(t,s)||i(t,e,s)};Object.defineProperty(t,"__esModule",{value:!0}),t.utils=void 0;const a=o(s(26));t.utils={minFramesForTargetMS:a.minFramesForTargetMS,arrayBufferToBase64:a.arrayBufferToBase64,encodeWAV:a.encodeWAV},n(s(405),t),n(s(428),t),n(s(294),t),n(s(842),t),n(s(260),t),n(s(724),t)},842:(e,t)=>{Object.defineProperty(t,"__esModule",{value:!0}),t.log=t.LOG_PREFIX=void 0,t.LOG_PREFIX="[VAD]";const s=["error","debug","warn"].reduce(((e,s)=>(e[s]=function(e){return(...s)=>{console[e](t.LOG_PREFIX,...s)}}(s),e)),{});t.log=s},294:(e,t)=>{var s;Object.defineProperty(t,"__esModule",{value:!0}),t.Message=void 0,function(e){e.AudioFrame="AUDIO_FRAME",e.SpeechStart="SPEECH_START",e.VADMisfire="VAD_MISFIRE",e.SpeechEnd="SPEECH_END",e.SpeechStop="SPEECH_STOP"}(s||(t.Message=s={}))},260:(e,t,s)=>{var i;Object.defineProperty(t,"__esModule",{value:!0}),t.Silero=void 0;const r=s(842);class o{constructor(e,t){this.ort=e,this.modelFetcher=t,this.init=async()=>{r.log.debug("initializing vad");const e=await this.modelFetcher();this._session=await this.ort.InferenceSession.create(e),this._sr=new this.ort.Tensor("int64",[16000n]),this.reset_state(),r.log.debug("vad is initialized")},this.reset_state=()=>{const e=Array(128).fill(0);this._h=new this.ort.Tensor("float32",e,[2,1,64]),this._c=new this.ort.Tensor("float32",e,[2,1,64])},this.process=async e=>{const t={input:new this.ort.Tensor("float32",e,[1,e.length]),h:this._h,c:this._c,sr:this._sr},s=await this._session.run(t);this._h=s.hn,this._c=s.cn;const[i]=s.output.data;return{notSpeech:1-i,isSpeech:i}}}}t.Silero=o,i=o,o.new=async(e,t)=>{const s=new i(e,t);return await s.init(),s}},405:(e,t,s)=>{Object.defineProperty(t,"__esModule",{value:!0}),t.PlatformAgnosticNonRealTimeVAD=t.defaultNonRealTimeVADOptions=void 0;const i=s(428),r=s(294),o=s(260),n=s(724);t.defaultNonRealTimeVADOptions={...i.defaultFrameProcessorOptions,ortConfig:void 0},t.PlatformAgnosticNonRealTimeVAD=class{static async _new(e,s,i={}){const r={...t.defaultNonRealTimeVADOptions,...i};void 0!==r.ortConfig&&r.ortConfig(s);const o=new this(e,s,r);return await o.init(),o}constructor(e,t,s){this.modelFetcher=e,this.ort=t,this.options=s,this.init=async()=>{const e=await o.Silero.new(this.ort,this.modelFetcher);this.frameProcessor=new i.FrameProcessor(e.process,e.reset_state,{frameSamples:this.options.frameSamples,positiveSpeechThreshold:this.options.positiveSpeechThreshold,negativeSpeechThreshold:this.options.negativeSpeechThreshold,redemptionFrames:this.options.redemptionFrames,preSpeechPadFrames:this.options.preSpeechPadFrames,minSpeechFrames:this.options.minSpeechFrames,submitUserSpeechOnPause:this.options.submitUserSpeechOnPause}),this.frameProcessor.resume()},this.run=async function*(e,t){const s={nativeSampleRate:t,targetSampleRate:16e3,targetFrameSize:this.options.frameSamples},i=new n.Resampler(s);let o=0,a=0,h=0;for await(const t of i.stream(e)){const{msg:e,audio:s}=await this.frameProcessor.process(t);switch(e){case r.Message.SpeechStart:o=h*this.options.frameSamples/16;break;case r.Message.SpeechEnd:a=(h+1)*this.options.frameSamples/16,yield{audio:s,start:o,end:a}}h++}const{msg:p,audio:c}=this.frameProcessor.endSegment();p==r.Message.SpeechEnd&&(yield{audio:c,start:o,end:h*this.options.frameSamples/16})},(0,i.validateOptions)(s)}}},724:(e,t,s)=>{Object.defineProperty(t,"__esModule",{value:!0}),t.Resampler=void 0;const i=s(842);t.Resampler=class{constructor(e){this.options=e,this.process=e=>{const t=[];for(this.fillInputBuffer(e);this.hasEnoughDataForFrame();){const e=this.generateOutputFrame();t.push(e)}return t},this.stream=async function*(e){for(this.fillInputBuffer(e);this.hasEnoughDataForFrame();){const e=this.generateOutputFrame();yield e}},e.nativeSampleRate<16e3&&i.log.error("nativeSampleRate is too low. Should have 16000 = targetSampleRate <= nativeSampleRate"),this.inputBuffer=[]}fillInputBuffer(e){for(const t of e)this.inputBuffer.push(t)}hasEnoughDataForFrame(){return this.inputBuffer.length*this.options.targetSampleRate/this.options.nativeSampleRate>=this.options.targetFrameSize}generateOutputFrame(){const e=new Float32Array(this.options.targetFrameSize);let t=0,s=0;for(;t<this.options.targetFrameSize;){let i=0,r=0;for(;s<Math.min(this.inputBuffer.length,(t+1)*this.options.nativeSampleRate/this.options.targetSampleRate);){const e=this.inputBuffer[s];void 0!==e&&(i+=e,r++),s++}e[t]=i/r,t++}return this.inputBuffer=this.inputBuffer.slice(s),e}}},26:(e,t)=>{function s(e,t,s){for(var i=0;i<s.length;i++)e.setUint8(t+i,s.charCodeAt(i))}Object.defineProperty(t,"__esModule",{value:!0}),t.encodeWAV=t.arrayBufferToBase64=t.minFramesForTargetMS=void 0,t.minFramesForTargetMS=function(e,t,s=16e3){return Math.ceil(e*s/1e3/t)},t.arrayBufferToBase64=function(e){for(var t="",s=new Uint8Array(e),i=s.byteLength,r=0;r<i;r++)t+=String.fromCharCode(s[r]);return btoa(t)},t.encodeWAV=function(e,t=3,i=16e3,r=1,o=32){var n=o/8,a=r*n,h=new ArrayBuffer(44+e.length*n),p=new DataView(h);return s(p,0,"RIFF"),p.setUint32(4,36+e.length*n,!0),s(p,8,"WAVE"),s(p,12,"fmt "),p.setUint32(16,16,!0),p.setUint16(20,t,!0),p.setUint16(22,r,!0),p.setUint32(24,i,!0),p.setUint32(28,i*a,!0),p.setUint16(32,a,!0),p.setUint16(34,o,!0),s(p,36,"data"),p.setUint32(40,e.length*n,!0),1===t?function(e,t,s){for(var i=0;i<s.length;i++,t+=2){var r=Math.max(-1,Math.min(1,s[i]));e.setInt16(t,r<0?32768*r:32767*r,!0)}}(p,44,e):function(e,t,s){for(var i=0;i<s.length;i++,t+=4)e.setFloat32(t,s[i],!0)}(p,44,e),h}}},t={};function s(i){var r=t[i];if(void 0!==r)return r.exports;var o=t[i]={exports:{}};return e[i].call(o.exports,o,o.exports,s),o.exports}(()=>{const e=s(14);class t extends AudioWorkletProcessor{constructor(t){super(),this._initialized=!1,this._stopProcessing=!1,this.init=async()=>{e.log.debug("initializing worklet"),this.resampler=new e.Resampler({nativeSampleRate:sampleRate,targetSampleRate:16e3,targetFrameSize:this.options.frameSamples}),this._initialized=!0,e.log.debug("initialized worklet")},this.options=t.processorOptions,this.port.onmessage=t=>{t.data.message===e.Message.SpeechStop&&(this._stopProcessing=!0)},this.init()}process(t,s,i){if(this._stopProcessing)return!1;const r=t[0][0];if(this._initialized&&r instanceof Float32Array){const t=this.resampler.process(r);for(const s of t)this.port.postMessage({message:e.Message.AudioFrame,data:s.buffer},[s.buffer])}return!0}}registerProcessor("vad-helper-worklet",t)})()})();
|