Howto Trac to Markdown

Si on souhaite convertir des pages du format Trac vers le format Markdown un petit script peut faciliter la tâche.

#!/usr/bin/env ruby
## Based on: https://gist.github.com/619537

class TracToMarkdown

  def convert(input)
    output = input.clone

    # convert inline code blocs
    output.gsub!(/\{\{\{([^\n]+?)\}\}\}/, '`\1`')

    # remove spaces before list items
    output.gsub!(/^\s*\*/, '*')

    # add a line break before lists
    output.gsub!(/(^[^\* ].*)\n\* /, '\1'+"\n\n* ")

    output.gsub!(/^[ ]*\{\{\{/, '{{{')
    output.gsub!(/(\S)\n\{\{\{/, '\1 '+"\n\n{{{")
    output.gsub!(/\{\{\{(.+?)\}\}\}/m) { |m|
      m.each_line.map { |x|
        x.gsub(/ *[\{\}]{3}/,"~~~")
      }.join
    }

    # convert headings syntax
    output.gsub!(/\=\=\=\=\=\s(.+?)\s\=\=\=\=\=/, '##### \1')
    output.gsub!(/\=\=\=\=\s(.+?)\s\=\=\=\=/, '#### \1')
    output.gsub!(/\=\=\=\s(.+?)\s\=\=\=/, '### \1')
    output.gsub!(/\=\=\s(.+?)\s\=\=/, '## \1')
    output.gsub!(/\=\s(.+?)\s\=/, '# \1')

    # convert links
    output.gsub!(/\[(https?[^\s\[\]]+)\s([^\[\]]+)\]/, '[\2](\1)')
    output.gsub!(/([^(])(https?[^\s\[\]]+)/, '\1<\2>')
    output.gsub!(/\!(([A-Z][a-z0-9]+){2,})/, '\1')

    output.gsub!(/'''(.+?)'''/, '*\1*')
    output.gsub!(/''(.+?)''/, '_\1_')
    output.gsub!(/^\s\d\./, '1.')

    # custom add-on for pedantic satisfaction
    # output.gsub!(/ ?(\.){3,}/, '…')

    output
  end

  def remove_outline(input)
    input.gsub(/^\[\[PageOutline\]\]\s+/, "")
  end

  def add_trac_banner(input)
    banner = "**Cette page a été importée automatiquement de notre ancien wiki mais n'a pas encore été révisée.**"

    banner + "\n\n" + input
  end

end

converter = TracToMarkdown.new

output = converter.convert(STDIN.read)

STDOUT.write(output)

Copiez simplement ce script là où vous le souhaitez, rendez-le exécutable puis :

$ cat Page.trac | trac2md

Note : le script ne gère pas une conversion parfaite, il faudra relire le document et faire quelques corrections manuelles (titre en “front matter Yaml”, etc.). Patches welcome :)

Voici le script qui a servi à convertir tout le contenu du Trac (nécessite la gem Mechanize) :

require 'mechanize'
require 'net/http'
require_relative "./trac_to_markdown"

domain = "http://trac.evolix.net"
index_url = "#{domain}/infogerance/wiki/TitleIndex"

base_pattern = %r(/infogerance/wiki/)
links = Mechanize.new.get(index_url).links_with(base_pattern)

skip_pages = [
  %r(/infogerance/wiki/TracGuide),
  %r(/infogerance/wiki/TitleIndex),
  %r(/infogerance/wiki/CamelCase),
  %r(/infogerance/wiki/RecentChange),
  %r(/infogerance/wiki/TitleIndex.*),
  %r(/infogerance/wiki/Wiki.*),
  %r(/infogerance/wiki/InterWiki),
  %r(/infogerance/wiki/InterTrac),
  %r(/infogerance/wiki/InterMapTxt),
  %r(/infogerance/wiki/SandBox),
  %r(/infogerance/wiki/Trac[A-Z]\w*),
]

skip_content = /ATTENTION, CETTE PAGE EST OBSOLÈTE/

save_path = Pathname.new(".")
# save_path = Pathname.new("../wiki")

converter = TracToMarkdown.new

links.each do |link|
  page = link.href

  if skip_pages.any? { |skip_page| skip_page.match(page) }
    puts "  #{page} skipped"
    next
  end

  file_path = save_path.join(page.gsub(base_pattern, "") + ".md")
  file_path.dirname.mkpath

  uri = URI.parse(domain + page + "?format=txt")
  trac_content = Net::HTTP.get(uri).force_encoding("UTF-8")

  if skip_content.match(trac_content)
    puts "  #{page} skipped"
    next
  end

  md_content = converter.convert(trac_content)
  md_content = converter.remove_outline(md_content)
  md_content = converter.add_trac_banner(md_content)

  file_path.write(md_content)
  puts "+ #{page} saved"
end