# Copyright (c) 2001-2006 MUSIC TECHNOLOGY GROUP (MTG)
# UNIVERSITAT POMPEU FABRA
#
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
import os, sys
import gzip
import urlparse, urllib, urllib2
import xml.dom.ext
from xml.dom.ext.reader.Sax2 import FromXmlStream
from xml.sax import saxutils
from Pool import Pool
import ServiceStub
import shelve
tryremove = lambda name : os.path.exists( name ) and os.remove( name )
clamAnnotatorProjectSkeleton = """
%s
%s.sc
ClamExtractorExample
%s
"""
clamAnnotatorProjectSongSkeleton = """
%s
"""
class TaskerError( Exception ):
def __init__( self, message ):
self.message=message
def __str__( self ):
return self.message
class Tasker:
def __init__( self, printfunction=sys.stdout.write ):
self.printfunction = printfunction
def processTask( self, taskfile, projectname, path ):
path += "/"
############################ EP EP EP
tryremove( path+"/"+projectname+".info.dict" )
songlisting = shelve.open( path+"/"+projectname+".info.dict", writeback=True)
task = self._retrieveTask( taskfile )
ids, descriptors, modifydescriptors, contentlocatoruri, metadataprovideruri, description = self._extractParameters( task )
contentlocator = ServiceStub.ContentLocator( contentlocatoruri )
metadataprovider = ServiceStub.MetadataProvider( metadataprovideruri )
projectsonglisting = ""
self.printfunction( u"\n == Retrieving data ==\n" )
try:
schema = metadataprovider.QuerySchema( descriptors )
except:
raise TaskerError( "Access error\nError accessing metadata provider server." )
self._createFile( path, projectname, '.sc', schema )
for id in ids:
#Grab audio file
try:
locations = contentlocator.LocateId( id )
except:
raise TaskerError( "Access error\nError accessing content locator server." )
if locations == "ServerError": #Stop execution
raise TaskerError( "Service error\nThe server is not working properly, a ServerError was received." )
elif locations == "NotFound": #Ignore song
self.printfunction( u"\n WARNING: File with id=%s not found on the server. Id will be ignored.\n" % id )
continue
audiofilename = self._downloadSong( locations, path )
if audiofilename == None:
self.printfunction( u"\n WARNING: it was impossible to download the file with id=%s from the given source urls.\n\n" % id )
continue
#Project
projectsonglisting += clamAnnotatorProjectSongSkeleton % audiofilename
try:
pool = metadataprovider.QueryDescriptors( id, descriptors )
except:
raise TaskerError( "Access error\nError accessing metadata provider server." )
self._createFile( path, audiofilename, '.pool', pool )
#Store modification file for each pool to keep track of the ones upgraded
songlisting[audiofilename] = id
if projectsonglisting == "":
raise TaskerError( "Access error\nNo file from the task could be downloaded. Check the task file for correct ids." )
self._createFile( path, projectname, '.pro', clamAnnotatorProjectSkeleton % ( description, projectname, projectsonglisting ) )
#Print log
self.printfunction( u"\n == Task processing finished ==\n" )
self.printfunction( u" - Created files %s and %s\n" % ( projectname+".pro", projectname+".sc" ) )
self.printfunction( u" - Downloaded %d audio file(s) and generated the corresponding pools\n" % len( songlisting ) )
os.utime( taskfile, None )
songlisting.close()
def runAnnotator( self, taskfile, projectname, path ):
if not os.path.exists( path + "/" + projectname + ".pro" ):
raise TaskerError( "Workflow error\nThe corresponding Annotator project is not created, make sure you follow the usage process." )
self.printfunction( u"\n == CLAM-Annotator ==\n" )
self.printfunction( u" - Launching...\n" )
if sys.platform != 'win32':
result = os.system( "LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib:/usr/lib Annotator %s.pro &> /dev/null" % ( path+"/"+projectname ) )
else:
raise TaskError( "Run Annotator error\nWindows execution of the Annotator is still not managed." )
self.printfunction( "RESULT = %d" % result )
self.printfunction( u" - Finalized\n" )
def listModified( self, taskfile, projectname, path ):
task = self._retrieveTask( taskfile )
ids, descriptors, modifydescriptors, contentlocatoruri, metadataprovideruri, description = self._extractParameters( task )
try:
songlisting=shelve.open( path+"/"+projectname+'.info.dict',writeback=True)
except KeyError:
raise TaskerError( "Workflow error\nCould not restore configuration, make sure you follow the usage process." )
tasktime = os.path.getmtime( taskfile )
modifiedlist = []
for song in songlisting.keys():
songpool = path + "/" + song + ".pool"
if os.path.getmtime( songpool ) > tasktime:
modifiedlist.append( song )
return "\n".join( modifiedlist )
def uploadChanges( self, taskfile, projectname, path ):
task = self._retrieveTask( taskfile )
ids, descriptors, modifydescriptors, contentlocatoruri, metadataprovideruri, description = self._extractParameters( task )
metadataprovider = ServiceStub.MetadataProvider( metadataprovideruri )
try:
songlisting=shelve.open( path+"/"+projectname+'.info.dict',writeback=True)
except KeyError:
raise TaskerError( "Workflow error\nCould not restore configuration, make sure you follow the usage process." )
modifiedlist = self.listModified( taskfile, projectname, path ).split('\n')
try:
uploadfile='uploadfile.xml.gz'
self.printfunction( u"\n == Uploading ==\n" )
self.printfunction( u" - Generating file to upload\n" )
uf = gzip.GzipFile( uploadfile, 'w' )
uf.write( "\n" )
uf.write( "\n" )
count=0
for song in songlisting.keys():
#if it has been modified
if song in modifiedlist:
count += 1
self.printfunction( u"\n - Packing descriptors of: %s\n" % song )
uf.write( "" % songlisting[song] )
poolfile=path+"/"+song+".pool"
pool=Pool( poolfile )
temppool=Pool()
#Extract desired attributes
for ( scope,name ) in modifydescriptors:
temppool.InsertAttribute( pool,scope,name,scope,name )
xml.dom.ext.PrettyPrint( temppool.doc.documentElement, uf )
uf.write( "\n" )
uf.write( "\n" )
uf.close()
if count==0:
self.printfunction( u" - No valid song descriptor has been found. Not doing any upload\n" )
os.remove('uploadfile.xml.gz')
return
self.printfunction( u" - Uploading the modified descriptor file to the server\n" )
result=metadataprovider.UploadPackedDescriptors( uploadfile )
if result != "OK":
self.printfunction( u"\n - ERROR in the server when processing the file: %s\n" % result )
else:
self.printfunction( u" - File successfully uploaded: received %s\n"%result )
#Clean!
os.remove( 'uploadfile.xml.gz' )
songlisting.close()
return 0
except:
songlisting.close()
return -1
raise TaskerError("Upload Error\nError preparing files to upload. Maybe the files no longer exist?")
def clean( self, taskfile, projectname, path ):
try:
#TODO tema songlisting
songlisting = self.songlisting
except KeyError:
raise TaskerError( "Workflow error\nCould not restore configuration, make sure you follow the usage process." )
tryremove( path+"/"+projectname+".sc" )
tryremove( path+"/"+projectname+".pro" )
tryremove( path+"/"+projectname+".info.dict" )
tryremove( taskfile )
for song in songlisting.keys():
tryremove( path+"/"+song )
tryremove( path+"/"+song+".pool" )
def _retrieveTask( self, location ):
try:
return FromXmlStream( location )
except:
raise TaskerError( "Task file error\nReading file '%s'. It doesn't exist or it is a malformed XML file." % location )
def _downloadSong( self, locations, savetopath ):
#EP: quan es fagi servir el ContentProvider, el nom d'arxiu sera un problemet, diria -> caldria mirar header pel nom d'arxiu?
if len( locations ) == 0:
return None
for url in locations.splitlines():
try:
self.printfunction( u" - Trying '%s'" % url )
if urlparse.urlparse( url )[1] in ServiceStub.NoProxiesFor :
stream = urllib2.urlopen( url )
else:
stream = urllib.urlopen( url,None,ServiceStub.Proxies )
if stream.info().type not in [ "audio/mpeg", "application/ogg", "audio/x-wav" ]:
self.printfunction( u" ( ERROR )\n" )
continue
audiofilename = urllib.unquote( urlparse.urlparse( url )[2].split( '/' )[-1] )
if os.path.exists( savetopath+audiofilename ):
self.printfunction( u" Already Downloaded ( OK )\n" )
return audiofilename
file = open( savetopath+audiofilename, 'wb' )
file.write( stream.read() )
file.close()
self.printfunction( u" ( OK )\n" )
return audiofilename
except:
self.printfunction( u" ( ERROR )\n" )
continue
return None
def _extractParameters( self, task ):
ids = []
descriptors = []
modifydescriptors = []
for id in task.getElementsByTagName( "ID" ):
id.normalize()
ids.append( id.firstChild.data )
for desc in task.getElementsByTagName( "Descriptor" ):
desc.normalize()
if desc.hasAttribute( 'modify' ) and desc.getAttribute( 'modify' )=='yes':
scopename=desc.firstChild.data.split( '::' )
modifydescriptors.append( ( scopename[0],scopename[1] ) )
descriptors.append( desc.firstChild.data )
contentlocatoruri = task.getElementsByTagName( "ContentLocator" )
metadataprovideruri = task.getElementsByTagName( "MetadataProvider" )
de=task.getElementsByTagName( "Description" )
if ( len( ids )<1 ) or ( len( descriptors )<1 ) \
or len( contentlocatoruri )!=1 or len( metadataprovideruri )!=1 \
or len( de )!=1 :
raise TaskerError( "Task file error\nMalformed file, some field is wrong or missing!" )
else:
metadataprovideruri = metadataprovideruri[0].firstChild.data
contentlocatoruri = contentlocatoruri[0].firstChild.data
de[0].normalize()
description=saxutils.escape( de[0].firstChild.data )
return ids, descriptors, modifydescriptors, contentlocatoruri.strip(), metadataprovideruri.strip(), description
def _createFile( self, path, name, extension, content ):
try:
file = open( path+name+extension, 'w' )
file.write( content )
file.close()
except:
raise TaskerError( "File write error\nError writing file '%s', maybe the directory does not exist!" % ( name+extension ) )