I’ve recently had to extract all the documents from a failed Sharepoint 2007 server into individual files, and thought I’d share how. After some Googling, I found a VBScript that almost did what I wanted, but only for one file at a time; So I modified it to search for files matching a pattern and extract them all to a specified folder, while re-creating the subfolder structure of the Sharepoint site. It should even create the root output folder if it needs to.
You’ll want to edit the server, contentDatabase, whereClause (i.e., search terms), and outputPath variables to fit your needs. Just copy & paste to a file on your Sharepoint machine, modify the variables, and run cscript [scriptname].vbs to extract.
'========================================
'VBScript to extract documents from a Sharepoint 2007 Database
Dim contentDatabase
Dim whereClause
Dim outputPath
Dim fs
'========================================
'Edit these values to fit your environment:
server = "[SERVERNAME]" 'Or [SERVERNAME]\[INSTANCENAME], if applicable
contentDatabase = "WSS_Content"
whereClause = "LeafName LIKE '%.xml%' OR LeafName LIKE '%.xsn%' OR LeafName LIKE '%.doc%' OR LeafName LIKE '%.xls%'"
outputPath = "C:\sp_extract\"
'========================================
'You shouldn't need to change anything below here (Unless you want to)
Set fs = CreateObject("Scripting.FileSystemObject")
If Right(outputPath,1) <> "\" Then outputPath = outputPath + "\"
ExtractDoc server, contentDatabase, whereClause, outputPath
'========================================
Sub ExtractDoc(server, contentDatabase, whereClause, outputPath)
Dim conStr, selectStr, fileName
conStr = "Provider=SQLOLEDB;data Source=" + server + ";Initial Catalog=" + contentDatabase + ";Trusted_Connection=yes"
selectStr = "SELECT dbo.AllDocs.LeafName, dbo.AllDocs.DirName, dbo.AllDocStreams.Content FROM dbo.AllDocs "
selectStr = selectStr + "INNER JOIN dbo.AllDocStreams "
selectStr = selectStr + " ON dbo.AllDocs.ID= dbo.AllDocStreams.ID "
selectStr = selectStr + " AND dbo.AllDocs.Level = dbo.AllDocStreams.Level "
selectStr = selectStr + " WHERE " + whereClause +" AND IsCurrentVersion=1"
Set cn = CreateObject("ADODB.Connection")
Set rs = CreateObject("ADODB.Recordset")
cn.Open conStr
Set rs = cn.Execute(selectStr)
Do While Not rs.EOF
fileName = outputPath + rs.Fields("DirName").Value + "\" + rs.Fields("LeafName").Value
If Not fs.FolderExists( fs.GetParentFolderName(fileName) ) then
Call CreateFolder( fs.GetParentFolderName(fileName) )
End If
Set mstream = CreateObject("ADODB.Stream")
mstream.Type = 1
mstream.Open
mstream.Write rs.Fields("Content").Value
mstream.SaveToFile fileName, 2
mstream.Close
rs.MoveNext
Loop
rs.Close
cn.Close
End Sub
'========================================
'Recursive folder create, will create directories and parent directories
Sub CreateFolder( strPath )
On Error Resume Next
If strPath <> "" Then 'Fixes endless recursion in some instances when at lowest directory
If Not fs.FolderExists( fs.GetParentFolderName(strPath) ) then Call CreateFolder( fs.GetParentFolderName(strPath) )
fs.CreateFolder( strPath )
End If
End Sub