/* * Copyright (c) 2001-2006 MUSIC TECHNOLOGY GROUP (MTG) * UNIVERSITAT POMPEU FABRA * * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * */ #include "Schema.hxx" #include #include #include "SongFiles.hxx" #include "Project.hxx" #include "FrameDivision.hxx" #include #include #include //For Descriptor Computation #include #include #include #include #include #include #include #include #include #include void BuildSchema(CLAM_Annotator::Project & project); void PopulatePool(const std::string& song, CLAM::DescriptionDataPool& pool); void GenerateRandomDescriptorValues(CLAM::TData* values, int size); unsigned GenerateRandomSegmentationMarks(CLAM::DataArray & segmentation, CLAM::TData songDurationSeconds, CLAM::TData minSegmentDuration, CLAM::TData maxSegmentDuration); unsigned GenerateNonOverlappingSegments(CLAM::DataArray & segmentation, CLAM::TData songDurationInSeconds, CLAM::TData maxGap, CLAM::TData maxSegmentDuration); unsigned GenerateOverlappingSegments(CLAM::DataArray & segmentation, CLAM::TData songDurationInSeconds, CLAM::TData maxGap, CLAM::TData maxSegmentDuration); void OpenSoundFile(const std::string& filename, CLAM::Audio& audio, CLAM::Text & artist, CLAM::Text & title); void FFTAnalysis(const CLAM::Audio& audio, CLAM::Segment& s); void ComputeSegment(const CLAM::Audio& audio,CLAM::Segment& segment, CLAM::SegmentDescriptors& segmentD); void MFCC2Pool(const CLAM::Segment& segment, CLAM::DescriptionDataPool& pool); void SegmentD2Pool(const CLAM::SegmentDescriptors& segmentD, CLAM::DescriptionDataPool& pool); void ComputeSegmentationMarks(CLAM::Segment& segment,CLAM::SegmentDescriptors& segmentD); void Segment2Marks(const CLAM::Segment& segment, CLAM::DataArray & marks); CLAM::TData GetDurationInSeconds(const std::string& fileName); const char * copyright = "CLAM Extractor Example v0.3.\n" "Copyright 2006 Universitat Pompeu Fabra\n" "\n" ; const char * usage = "Example of music descriptor extractor based on CLAM.\n" "Usage: ClamExtractorExample [-s out.sc] [-f ] ...\n" "\nOptions:\n" " -h shows this help\n" " -s dump the schema to the standard output\n" " -f append to the generated descriptors file (default: '.pool')\n" " -p Generates a dummy project with all the wave files\n" "\nUsage examples:\n" " ClamExtractorExample -s schema.sc\n" " ClamExtractorExample -f .beats song1.wav song2.mp3 song3.ogg\n" " ClamExtractorExample -p path/to/project/Project.pro\n" " ClamExtractorExample -p path/to/project/Project.pro -f .beats path/to/project/*.{wav,ogg,mp3}\n" ; const char * projectDescription = "

Example project for the CLAM Descriptors Extractor

\n" "

\n" "This is a generated project to test the CLAM music annotator." "Descriptors have been generated by the program ClamExtractorExample provided with the annotator." "You could use that extractor as example to build your own one." "

\n" "

Description:

\n" "

\n" "In a real project this documentation may be useful for example to give instructions about" "the annotation task or the aspects to take into account for the review." "

\n" "

Reference descriptors:

\n" "

\n" "You can take some descriptors as reference such as the energy.\n" "Consider that such descriptors are computed every 2048 samples so their precission may be not as accurate as it seems.\n" "

\n" ; unsigned fftSize = 513; unsigned frameSize = 1024; int main(int argc, char ** argv) { std::cout << copyright << std::endl; CLAM_Annotator::Project myProject; std::string relativeSchemaLocation = "Schema.sc"; const char * songFileNames[] = { "SongsTest/LisaRein-SomethingBetter.mp3", "SongsTest/LisaRein-spunkyfunk.mp3", 0 }; for (unsigned i=0; i=argc) break; myProject.SetProjectPath(argv[i+1]); hasProjectOption=true; } bool hasSchemaOption = false; bool isSchema = false; bool isSuffix = false; bool isProject = false; bool isConfiguration = false; for (unsigned i = 1; i::iterator currentSong; srand(time(NULL)); for ( currentSong = myProject.GetSongs().begin(); currentSong != myProject.GetSongs().end(); currentSong++) { std::string songFile = myProject.RelativeToAbsolute(currentSong->GetSoundFile()); std::cout<<"Computing Descriptors for file "<< songFile <<" Please wait..."<The Danceability is a 0 to 10 score that depends on how much defined is the rhythm

\n" "

Data for this data is not the one in Simac but randomly generated.

\n" }, {"Frame", "Mean", "

The spectral power mean value.\n" "The unit of this measure can be dB or none, depending on the scale set for the measured Spectrum object.

\n" "

\\f[ Mean(X) = \\frac{\\sum x_i }{Size(X)} \\f]

\n" "

Being X the spectrum magnitude array.

\n" }, {0,0,0} }; for (unsigned i=0; docs[i].name; i++) { schema.AttributeDocumentation(docs[i].scope, docs[i].name, docs[i].documentation); } project.CreatePoolScheme(); } void PopulatePool(const std::string & song, CLAM::DescriptionDataPool& pool) { //Create Descriptors Pool pool.SetNumberOfContexts("Song",1); //Generate LLDs values CLAM::Audio audio; CLAM::Segment segment; CLAM::SegmentDescriptors segmentD; CLAM::Text artist="Unknown Artist"; CLAM::Text title="Unknown Title"; OpenSoundFile(song, audio, artist, title); ComputeSegment(audio,segment,segmentD); CLAM::TData sampleRate = audio.GetSampleRate(); CLAM::TData firstCenter = segment.GetFrame(0).GetCenterTime()/1000; CLAM::TData secondCenter = segment.GetFrame(1).GetCenterTime()/1000; CLAM_Annotator::FrameDivision & frames = pool.GetWritePool("Song","Frames")[0]; frames.SetFirstCenter(firstCenter*sampleRate); frames.SetInterCenterGap((secondCenter-firstCenter)*sampleRate); SegmentD2Pool(segmentD,pool); std::cout<<"calling MFCC2Pool"<("Song","Artist")[0] = artist; pool.GetWritePool("Song","Title")[0] = title; pool.GetWritePool("Song","Genre")[0] = "Folk"; pool.GetWritePool("Song","Danceability")[0] = 7.2; pool.GetWritePool("Song","Key")[0] = "C"; pool.GetWritePool("Song","Mode")[0] = "Minor"; pool.GetWritePool("Song","DynamicComplexity")[0] = 8.1; pool.GetWritePool("Song","BPM")[0] = 100; // Onset Segmentation CLAM::DataArray & segmentation = pool.GetWritePool("Song","Onsets")[0]; ComputeSegmentationMarks(segment, segmentD); Segment2Marks(segment,segmentation); unsigned nOnsets = segmentation.Size(); pool.SetNumberOfContexts("Onset",nOnsets); CLAM::TData * onsetForces = pool.GetWritePool("Onset","Relevance"); CLAM_Annotator::Enumerated * onsetChange = pool.GetWritePool("Onset","DetectedChange"); for (unsigned i = 0; i1? "PitchChange" : "EnergyChange"; } // Random Segmentation CLAM::DataArray* randomSegmentation = pool.GetWritePool("Song","RandomSegments"); GenerateRandomSegmentationMarks(randomSegmentation[0], GetDurationInSeconds(song), .3, 4.); // Note Segmentation CLAM::DataArray* noteSegmentation = pool.GetWritePool("Song","Notes"); unsigned nNotes = GenerateOverlappingSegments(noteSegmentation[0], GetDurationInSeconds(song), .5, .8); pool.SetNumberOfContexts("Note",nNotes); CLAM_Annotator::Enumerated * notePitch = pool.GetWritePool("Note","Pitch"); int * noteOctave = pool.GetWritePool("Note","Octave"); for (unsigned i = 0; i("Song","Chords"); unsigned nChords = GenerateRandomSegmentationMarks(chordSegmentation[0], GetDurationInSeconds(song)-4, .5, 8. )+1; pool.SetNumberOfContexts("Chord",nChords); CLAM_Annotator::Enumerated * chordRoot = pool.GetWritePool("Chord","Root"); CLAM_Annotator::Enumerated * chordMode = pool.GetWritePool("Chord","Mode"); for (unsigned i = 0; i("Song","Structure"); unsigned nParts = GenerateNonOverlappingSegments(structuralSegmentation[0], GetDurationInSeconds(song), 4., 30.); pool.SetNumberOfContexts("StructuralPart",nParts); CLAM_Annotator::Enumerated * partDescription = pool.GetWritePool("StructuralPart","Description"); CLAM_Annotator::Enumerated * partGroup = pool.GetWritePool("StructuralPart","SimilarityGroup"); const char * partDescriptionValues[] = { "Versus", "Chorus", "Solo", "Accapella", 0 }; const char * partGroupIds[] = { "A","B","C","D","E", "F","G","H","I","J",0}; for (unsigned i = 0; i100) randomInt = 80; if(randomInt<0) randomInt=20; values[i] = randomInt; } } CLAM::TData GetDurationInSeconds(const std::string& fileName) { CLAM::AudioFileSource file; file.OpenExisting(fileName); return file.GetHeader().GetLength()/1000; } unsigned GenerateRandomSegmentationMarks(CLAM::DataArray & segmentation, CLAM::TData songDurationInSeconds, CLAM::TData minSegmentDuration, CLAM::TData maxSegmentDuration) { unsigned nSegments = 0; CLAM::TData positionInSeconds = 0; while(positionInSeconds=songDurationInSeconds) break; segmentation.AddElem(positionInSeconds); nSegments++; } return nSegments; } unsigned GenerateNonOverlappingSegments(CLAM::DataArray & segmentation, CLAM::TData songDurationInSeconds, CLAM::TData maxGap, CLAM::TData maxSegmentDuration) { unsigned nSegments = 0; CLAM::TData lastOffset = 0; while(lastOffsetsongDurationInSeconds) break; segmentation.AddElem(randomOnset); segmentation.AddElem(randomOffset); lastOffset = randomOffset; nSegments++; } return nSegments; } unsigned GenerateOverlappingSegments(CLAM::DataArray & segmentation, CLAM::TData songDurationInSeconds, CLAM::TData maxGap, CLAM::TData maxSize) { return GenerateNonOverlappingSegments(segmentation, songDurationInSeconds, maxGap, maxSize); unsigned nSegments = 0; CLAM::TData lastOnset = 0; while(lastOnsetsongDurationInSeconds) break; segmentation.AddElem(randomOnset); segmentation.AddElem(randomOffset); lastOnset = randomOnset; nSegments++; } return nSegments; } #include #include void MFCC2Pool(const CLAM::Segment& segment, CLAM::DescriptionDataPool& pool) { unsigned nFrames = segment.GetnFrames(); CLAM::MelFilterBankConfig melFilterBankConfig; melFilterBankConfig.SetSpectrumSize(fftSize); melFilterBankConfig.SetNumBands(20); melFilterBankConfig.SetSpectralRange( nFrames? segment.GetFrame(0).GetSpectrum().GetSpectralRange():0); melFilterBankConfig.SetLowCutoff(0); melFilterBankConfig.SetHighCutoff(11025); CLAM::MelFilterBank melFilterBank(melFilterBankConfig); CLAM::CepstralTransformConfig cepstralTransformConfig; cepstralTransformConfig.SetNumMelCoefficients(20); cepstralTransformConfig.SetNumCepstrumCoefficients(20); CLAM::CepstralTransform cepstralTransform(cepstralTransformConfig); CLAM::MelSpectrum melSpectrum; CLAM::MelCepstrum melCepstrum; melFilterBank.Start(); cepstralTransform.Start(); CLAM::DataArray* values= pool.GetWritePool("Frame","MelFrequencyCepstrumCoefficients"); CLAM::DataArray* spectrumMagnitude= pool.GetWritePool("Frame", "SpectrumMagnitude"); for(int i=0; i& frameD = segmentD.GetFramesD(); struct GetterMap { const char * name; CLAM::TData & (CLAM::SpectralDescriptors::*getter)() const; } spectralDescriptors[] = { {"Mean", &CLAM::SpectralDescriptors::GetMean}, {"GeometricMean", &CLAM::SpectralDescriptors::GetGeometricMean}, {"Energy", &CLAM::SpectralDescriptors::GetEnergy}, {"Centroid", &CLAM::SpectralDescriptors::GetCentroid}, {"Moment2", &CLAM::SpectralDescriptors::GetMoment2}, {"Moment3", &CLAM::SpectralDescriptors::GetMoment3}, {"Moment4", &CLAM::SpectralDescriptors::GetMoment4}, {"Moment5", &CLAM::SpectralDescriptors::GetMoment5}, {"Moment6", &CLAM::SpectralDescriptors::GetMoment6}, {"Flatness", &CLAM::SpectralDescriptors::GetFlatness}, {"MagnitudeKurtosis", &CLAM::SpectralDescriptors::GetMagnitudeKurtosis}, {"MaxMagFreq", &CLAM::SpectralDescriptors::GetMaxMagFreq}, {"LowFreqEnergyRelation", &CLAM::SpectralDescriptors::GetLowFreqEnergyRelation}, {"Spread", &CLAM::SpectralDescriptors::GetSpread}, {"MagnitudeSkewness", &CLAM::SpectralDescriptors::GetMagnitudeSkewness}, {"Rolloff", &CLAM::SpectralDescriptors::GetRolloff}, {"Slope", &CLAM::SpectralDescriptors::GetSlope}, {"HighFrequencyContent", &CLAM::SpectralDescriptors::GetHighFrequencyContent}, {0,0} }; for (GetterMap * map = spectralDescriptors; map->name; map++) { CLAM::TData* values= pool.GetWritePool("Frame",map->name); for(int i=0; igetter))(); } } } void ComputeSegment(const CLAM::Audio& audio,CLAM::Segment& segment, CLAM::SegmentDescriptors& segmentDescriptors) { FFTAnalysis(audio, segment); segmentDescriptors.AddAll(); segmentDescriptors.UpdateData(); CLAM::SpectralDescriptors specProto; specProto.AddAll(); specProto.UpdateData(); CLAM::FrameDescriptors frameProto; frameProto.AddSpectrumD(); frameProto.UpdateData(); frameProto.GetSpectrumD().SetPrototype(specProto); segmentDescriptors.SetFramePrototype(frameProto,segment.GetnFrames()); segmentDescriptors.SetpSegment(&segment); CLAM::DescriptorComputation processing; processing.Do(segmentDescriptors); } void OpenSoundFile(const std::string& filename, CLAM::Audio& audio, CLAM::Text & artist, CLAM::Text & title) { const CLAM::TSize readSize = 1024; CLAM::MonoAudioFileReaderConfig cfg; cfg.SetSourceFile( filename ); CLAM::MonoAudioFileReader reader; if (!reader.Configure(cfg)) CLAM_ASSERT(false, ("Error opening '" + filename +"'").c_str()); const CLAM::AudioTextDescriptors & textDescriptors = reader.GetTextDescriptors(); if (textDescriptors.HasArtist()) artist = textDescriptors.GetArtist(); if (textDescriptors.HasTitle()) title = textDescriptors.GetTitle(); int nChannels = reader.GetHeader().GetChannels(); CLAM::Audio audioFrame; audioFrame.SetSize(readSize); reader.Start(); int beginSample=0; int nSamples = reader.GetHeader().GetSamples(); audio.SetSize(nSamples); while(reader.Do(audioFrame)) { audio.SetAudioChunk(beginSample,audioFrame); beginSample+=readSize; if(beginSample+readSize>nSamples) break; } reader.Stop(); } void FFTAnalysis(const CLAM::Audio& audio, CLAM::Segment& s) { CLAM::FFTConfig cfg; cfg.SetAudioSize(frameSize); CLAM::FFT fft(cfg); fft.Start(); CLAM::SpectrumConfig spcfg; spcfg.SetSize(fftSize); int audioSize = audio.GetSize(); int samplingRate = audio.GetSampleRate(); int duration = 1000*frameSize/samplingRate; for (int i=0; i< audioSize; i+=frameSize) { CLAM::Audio audioFrame; audio.GetAudioChunk(i, i+frameSize, audioFrame); CLAM::Spectrum spec(spcfg); fft.Do(audioFrame, spec); CLAM::Frame tmpFrame; tmpFrame.AddSpectrum(); tmpFrame.UpdateData(); tmpFrame.SetSpectrum(spec); tmpFrame.SetDuration(duration); tmpFrame.SetCenterTime(1000*(i+frameSize*0.5)/samplingRate); s.AddFrame(tmpFrame); } } void ComputeSegmentationMarks(CLAM::Segment& segment,CLAM::SegmentDescriptors& segmentD) { CLAM::TData ePercentil, eThr, minLength; bool useDefault=true; if(useDefault) { ePercentil = 400; eThr = CLAM::TData(2.00);//0.0032; minLength = 2; } CLAM::SegmentatorConfig sgConfig; CLAM::TDescriptorsParams tmpParams; tmpParams.id=CLAM::SpectralEnergyId; tmpParams.percentil=ePercentil; tmpParams.threshold=eThr; sgConfig.AddDescParams(tmpParams); sgConfig.SetMinSegmentLength(int(minLength)); CLAM::Segmentator mySegmentator(sgConfig); mySegmentator.Start(); //Segmentate mySegmentator.Do(segment,segmentD); } void Segment2Marks(const CLAM::Segment& segment, CLAM::DataArray & marks) { CLAM::List& children = segment.GetChildren(); children.DoFirst(); int samplingRate = segment.GetSamplingRate(); int nSegments = children.Size(); int segmentDuration = segment.GetEndTime(); for (int i=0; i0&¤tTime