diff options
Diffstat (limited to 'debian/htdig/htdig-3.2.0b6/installdir/HtFileType-magic.mime')
-rw-r--r-- | debian/htdig/htdig-3.2.0b6/installdir/HtFileType-magic.mime | 495 |
1 files changed, 495 insertions, 0 deletions
diff --git a/debian/htdig/htdig-3.2.0b6/installdir/HtFileType-magic.mime b/debian/htdig/htdig-3.2.0b6/installdir/HtFileType-magic.mime new file mode 100644 index 00000000..e1ce8746 --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/installdir/HtFileType-magic.mime @@ -0,0 +1,495 @@ +# Magic data for for file(1) command +# +# The format is 4-5 columns: +# Column #1: byte number to begin checking from, ">" indicates continuation +# Column #2: type of data to match +# Column #3: contents of data to match +# Column #4: MIME type of result +# Column #5: MIME encoding of result (optional) +# +# Modified by <mailto:lha@users.sourceforge.net> for compatibility with +# different versions of file(1): +# - Columns are separated by TABs (for traditional versions) +# - spaces and '<'s within a column are escaped by '\' (for new versions) +# - Hex numbers in strings are given as '\0x' (traditional) and '\x' (new) +# - Null characters (\000) traditionally terminate strings, but now don't + +#------------------------------------------------------------------------------ +# Localstuff: file(1) magic for locally observed files +# Add any locally observed files here. + +#------------------------------------------------------------------------------ +# end local stuff +#------------------------------------------------------------------------------ + +#------------------------------------------------------------------------------ +# Java + +0 short 0xcafe +>2 short 0xbabe application/java + +# Microsoft WAVE format (*.wav) +# [GRR 950115: probably all of the shorts and longs should be leshort/lelong] +# Microsoft RIFF +0 string RIFF audio/unknown +# - WAVE format +>8 string WAVE audio/x-wav +>8 string AVI video/x-msvideo +# +0 belong 0x2e7261fd application/x-realaudio + +# MPEG Layer 3 sound files +0 beshort &0xffe0 audio/mpeg +#MP3 with ID3 tag +0 string ID3 audio/mpeg +# Ogg/Vorbis +0 string OggS audio/x-ogg + +#------------------------------------------------------------------------------ +# commands: file(1) magic for various shells and interpreters +# +#0 string :\ shell archive or commands for antique kernel text +0 string #!/bin/sh application/x-shellscript +0 string #!\ /bin/sh application/x-shellscript +0 string #!/bin/csh application/x-shellscript +0 string #!\ /bin/csh application/x-shellscript +# korn shell magic, sent by George Wu, gwu@clyde.att.com +0 string #!/bin/ksh application/x-shellscript +0 string #!\ /bin/ksh application/x-shellscript +0 string #!/bin/tcsh application/x-shellscript +0 string #!\ /bin/tcsh application/x-shellscript +0 string #!/usr/local/tcsh application/x-shellscript +0 string #!\ /usr/local/tcsh application/x-shellscript +0 string #!/usr/local/bin/tcsh application/x-shellscript +0 string #!\ /usr/local/bin/tcsh application/x-shellscript +# bash shell magic, from Peter Tobias (tobias@server.et-inf.fho-emden.de) +0 string #!/bin/bash application/x-shellscript +0 string #!\ /bin/bash application/x-shellscript +0 string #!/usr/local/bin/bash application/x-shellscript +0 string #!\ /usr/local/bin/bash application/x-shellscript + +# +# zsh/ash/ae/nawk/gawk magic from cameron@cs.unsw.oz.au (Cameron Simpson) +0 string #!/usr/local/bin/zsh application/x-shellscript +0 string #!\ /usr/local/bin/zsh application/x-shellscript +0 string #!/usr/local/bin/ash application/x-shellscript +0 string #!\ /usr/local/bin/ash application/x-shellscript +#0 string #!/usr/local/bin/ae Neil Brown's ae +#0 string #!\ /usr/local/bin/ae Neil Brown's ae +0 string #!/bin/nawk application/x-nawk +0 string #!\ /bin/nawk application/x-nawk +0 string #!/usr/bin/nawk application/x-nawk +0 string #!\ /usr/bin/nawk application/x-nawk +0 string #!/usr/local/bin/nawk application/x-nawk +0 string #!\ /usr/local/bin/nawk application/x-nawk +0 string #!/bin/gawk application/x-gawk +0 string #!\ /bin/gawk application/x-gawk +0 string #!/usr/bin/gawk application/x-gawk +0 string #!\ /usr/bin/gawk application/x-gawk +0 string #!/usr/local/bin/gawk application/x-gawk +0 string #!\ /usr/local/bin/gawk application/x-gawk +# +0 string #!/bin/awk application/x-awk +0 string #!\ /bin/awk application/x-awk +0 string #!/usr/bin/awk application/x-awk +0 string #!\ /usr/bin/awk application/x-awk +0 string BEGIN application/x-awk + +# For Larry Wall's perl language. The ``eval'' line recognizes an +# outrageously clever hack for USG systems. +# Keith Waclena <keith@cerberus.uchicago.edu> +0 string #!/bin/perl application/x-perl +0 string #!\ /bin/perl application/x-perl +0 string eval\ "exec\ /bin/perl application/x-perl +0 string #!/usr/bin/perl application/x-perl +0 string #!\ /usr/bin/perl application/x-perl +0 string eval\ "exec\ /usr/bin/perl application/x-perl +0 string #!/usr/local/bin/perl application/x-perl +0 string #!\ /usr/local/bin/perl application/x-perl +0 string eval\ "exec\ /usr/local/bin/perl application/x-perl + +#------------------------------------------------------------------------------ +# compress: file(1) magic for pure-compression formats (no archives) +# +# compress, gzip, pack, compact, huf, squeeze, crunch, freeze, yabba, whap, etc. +# +# Formats for various forms of compressed data +# Formats for "compress" proper have been moved into "compress.c", +# because it tries to uncompress it to figure out what's inside. +# (Technically, "gzip", "bzip2" etc. are encodings, not mime-types, +# and should also decompress to find out the type of data inside.) + +# standard unix compress +0 string \037\235 application/x-compress + +# gzip (GNU zip, not to be confused with [Info-ZIP/PKWARE] zip archiver) +0 string \037\213 application/x-gzip + +# bzip2 +0 string BZh application/x-bzip2 + +0 string PK\003\004 application/x-zip + +# According to gzip.h, this is the correct byte order for packed data. +0 string \037\036 application/octet-stream +# +# This magic number is byte-order-independent. +# +0 short 017437 application/octet-stream + +# XXX - why *two* entries for "compacted data", one of which is +# byte-order independent, and one of which is byte-order dependent? +# +# compacted data +0 short 0x1fff application/octet-stream +0 string \377\037 application/octet-stream +# huf output +0 short 0145405 application/octet-stream + +# Squeeze and Crunch... +# These numbers were gleaned from the Unix versions of the programs to +# handle these formats. Note that I can only uncrunch, not crunch, and +# I didn't have a crunched file handy, so the crunch number is untested. +# Keith Waclena <keith@cerberus.uchicago.edu> +#0 leshort 0x76FF squeezed data (CP/M, DOS) +#0 leshort 0x76FE crunched data (CP/M, DOS) + +# Freeze +#0 string \037\237 Frozen file 2.1 +#0 string \037\236 Frozen file 1.0 (or gzip 0.5) + +# lzh? +#0 string \037\240 LZH compressed data + +257 string ustar\0 application/x-tar posix +257 string ustar\040\040\0 application/x-tar gnu + +0 short 070707 application/x-cpio +0 short 0143561 application/x-cpio swapped + +0 string =<ar> application/x-archive +0 string !<arch> application/x-archive +>8 string debian application/x-debian-package + +#------------------------------------------------------------------------------ +# +# RPM: file(1) magic for Red Hat Packages Erik Troan (ewt@redhat.com) +# +0 beshort 0xedab +>2 beshort 0xeedb application/x-rpm + +0 lelong&0x8080ffff 0x0000081a application/x-arc lzw +0 lelong&0x8080ffff 0x0000091a application/x-arc squashed +0 lelong&0x8080ffff 0x0000021a application/x-arc uncompressed +0 lelong&0x8080ffff 0x0000031a application/x-arc packed +0 lelong&0x8080ffff 0x0000041a application/x-arc squeezed +0 lelong&0x8080ffff 0x0000061a application/x-arc crunched + +0 leshort 0xea60 application/octet-stream x-arj + +# LHARC/LHA archiver (Greg Roelofs, newt@uchicago.edu) +2 string -lh0- application/x-lharc lh0 +2 string -lh1- application/x-lharc lh1 +2 string -lz4- application/x-lharc lz4 +2 string -lz5- application/x-lharc lz5 +# [never seen any but the last; -lh4- reported in comp.compression:] +2 string -lzs- application/x-lha lzs +2 string -lh\ - application/x-lha lh +2 string -lhd- application/x-lha lhd +2 string -lh2- application/x-lha lh2 +2 string -lh3- application/x-lha lh3 +2 string -lh4- application/x-lha lh4 +2 string -lh5- application/x-lha lh5 +2 string -lh6- application/x-lha lh6 +2 string -lh7- application/x-lha lh7 +# Shell archives +10 string #\ This\ is\ a\ shell\ archive application/octet-stream x-shell + +#------------------------------------------------------------------------------ +# frame: file(1) magic for FrameMaker files +# +# This stuff came on a FrameMaker demo tape, most of which is +# copyright, but this file is "published" as witness the following: +# +0 string \<MakerFile application/x-frame +0 string \<MIFFile application/x-frame +0 string \<MakerDictionary application/x-frame +0 string \<MakerScreenFon application/x-frame +0 string \<MML application/x-frame +0 string \<Book application/x-frame +0 string \<Maker application/x-frame + +#------------------------------------------------------------------------------ +# html: file(1) magic for HTML (HyperText Markup Language) docs +# +# from Daniel Quinlan <quinlan@yggdrasil.com> +# modified by Lachlan Andrew <lha@users.sourceforge.net> to +# match leading whitespace, but still work with old versions +# of file(1) which don't recognise the /cb options +# +0 string \<HEAD text/html +0 string \<head text/html +0 string \<TITLE text/html +0 string \<title text/html +0 string \<HTML text/html +0 string \<html text/html +0 string \<!-- text/html +0 string \<H1 text/html +0 string \<h1 text/html +0 string \<!DOCTYPE\ HTML text/html +0 string \<!doctype\ HTML text/html +0 string \<!doctype\ html text/html +0 string \<!DOCTYPE\ NETSCAPE-Bookmark text/html +0 string/cb \ <head text/html +0 string/cb \ <html text/html +0 string/cb \ <title text/html +0 string/cb \ <!doctype\ html text/html +0 string \<!\ text/html + +# Extensible markup language (XML), a subset of SGML +# from Marc Prud'hommeaux (marc@apocalypse.org) +0 string \<?xml text/xml +0 string/cb \ \<?xml text/xml + +# SGML, mostly from rph@sq +0 string \<!doctype text/sgml +0 string \<!subdoc text/sgml +0 string/cb \ \<!doctype text/sgml +0 string/cb \ \<!subdoc text/sgml + + + +#------------------------------------------------------------------------------ +# images: file(1) magic for image formats (see also "c-lang" for XPM bitmaps) +# +# originally from jef@helios.ee.lbl.gov (Jef Poskanzer), +# additions by janl@ifi.uio.no as well as others. Jan also suggested +# merging several one- and two-line files into here. +# +# XXX - byte order for GIF and TIFF fields? +# [GRR: TIFF allows both byte orders; GIF is probably little-endian] +# + +# [GRR: what the hell is this doing in here?] +#0 string xbtoa btoa'd file + +# PBMPLUS +# PBM file +0 string P1 image/x-portable-bitmap 7bit +# PGM file +0 string P2 image/x-portable-greymap 7bit +# PPM file +0 string P3 image/x-portable-pixmap 7bit +# PBM "rawbits" file +0 string P4 image/x-portable-bitmap +# PGM "rawbits" file +0 string P5 image/x-portable-greymap +# PPM "rawbits" file +0 string P6 image/x-portable-pixmap + +# NIFF (Navy Interchange File Format, a modification of TIFF) +# [GRR: this *must* go before TIFF] +0 string IIN1 image/x-niff + +# TIFF and friends +# TIFF file, big-endian +0 string MM image/tiff +# TIFF file, little-endian +0 string II image/tiff + +# possible GIF replacements; none yet released! +# (Greg Roelofs, newt@uchicago.edu) +# +# GRR 950115: this was mine ("Zip GIF"): +# ZIF image (GIF+deflate alpha) +0 string GIF94z image/unknown +# +# GRR 950115: this is Jeremy Wohl's Free Graphics Format (better): +# FGF image (GIF+deflate beta) +0 string FGF95a image/unknown +# +# GRR 950115: this is Thomas Boutell's Portable Bitmap Format proposal +# (best; not yet implemented): +# PBF image (deflate compression) +0 string PBF image/unknown + +# GIF +0 string GIF image/gif + +# JPEG images +0 beshort 0xffd8 image/jpeg +0 string \377\330\377\340 image/jpeg +0 string \377\330\377\341 image/jpeg +0 string \377\330\377\356 image/jpeg + + +# PC bitmaps (OS/2, Windoze BMP files) (Greg Roelofs, newt@uchicago.edu) +0 string BM image/bmp +#>14 byte 12 (OS/2 1.x format) +#>14 byte 64 (OS/2 2.x format) +#>14 byte 40 (Windows 3.x format) +#0 string IC icon +#0 string PI pointer +#0 string CI color icon +#0 string CP color pointer +#0 string BA bitmap array + + +#------------------------------------------------------------------------------ +# lisp: file(1) magic for lisp programs +# +# various lisp types, from Daniel Quinlan (quinlan@yggdrasil.com) +0 string ;; text/plain 8bit +# Emacs 18 - this is always correct, but not very magical. +0 string \012( application/x-elc +# Emacs 19 +0 string ;ELC\023\000\000\000 application/x-elc + +#------------------------------------------------------------------------------ +# mail.news: file(1) magic for mail and news +# +# There are tests to ascmagic.c to cope with mail and news. +0 string Relay-Version: message/rfc822 7bit +0 string #!\ rnews message/rfc822 7bit +0 string N#!\ rnews message/rfc822 7bit +0 string Forward\ to message/rfc822 7bit +0 string Pipe\ to message/rfc822 7bit +0 string Return-Path: message/rfc822 7bit +0 string Path: message/news 8bit +0 string Xref: message/news 8bit +0 string From: message/rfc822 7bit +0 string Article message/news 8bit +#------------------------------------------------------------------------------ +# msword: file(1) magic for MS Word files +# +# Contributor claims: +# Reversed-engineered MS Word magic numbers +# Some of these also occur in PowerPoint. -- lha@users.sourceforge.net + +0 string \376\067\0\043 application/msword +0 string \320\317\021\340\241\261 application/msword +0 string \333\245-\0\0\0 application/msword + + + +#------------------------------------------------------------------------------ +# printer: file(1) magic for printer-formatted files +# + +# PostScript +0 string %! application/postscript +0 string \004%! application/postscript +0 string \033%-12345X%!PS application/postscript + +# Acrobat +# (due to clamen@cs.cmu.edu) +0 string %PDF- application/pdf + +#------------------------------------------------------------------------------ +# sc: file(1) magic for "sc" spreadsheet +# +38 string Spreadsheet application/x-sc + +#------------------------------------------------------------------------------ +# tex: file(1) magic for TeX files +# +# XXX - needs byte-endian stuff (big-endian and little-endian DVI?) +# +# From <conklin@talisman.kaleida.com> + +# Although we may know the offset of certain text fields in TeX DVI +# and font files, we can't use them reliably because they are not +# zero terminated. [but we do anyway, christos] +0 string \367\002 application/x-dvi +#0 string \367\203 TeX generic font data +#0 string \367\131 TeX packed font data +#0 string \367\312 TeX virtual font data +#0 string This\ is\ TeX, TeX transcript text +#0 string This\ is\ METAFONT, METAFONT transcript text + +# Texinfo and GNU Info, from Daniel Quinlan (quinlan@yggdrasil.com) +#0 string \\input\ texinfo Texinfo source text +#0 string This\ is\ Info\ file GNU Info text + +# correct TeX magic for Linux (and maybe more) +# from Peter Tobias (tobias@server.et-inf.fho-emden.de) +# +0 leshort 0x02f7 application/x-dvi + +# RTF - Rich Text Format +0 string {\\rtf text/rtf + +#------------------------------------------------------------------------------ +# animation: file(1) magic for animation/movie formats +# +# animation formats, originally from vax@ccwf.cc.utexas.edu (VaX#n8) +# MPEG file +0 belong 0x000001b3 video/mpeg +0 belong 0x000001ba video/mpeg +# FLI animation format +0 leshort 0xAF11 video/fli +# FLC animation format +0 leshort 0xAF12 video/flc +# AVI +>8 string AVI\ video/avi +# +# SGI and Apple formats +# +0 string MOVI video/sgi +4 string moov video/quicktime moov +4 string mdat video/quicktime mdat +# The contributor claims: +# I couldn't find a real magic number for these, however, this +# -appears- to work. Note that it might catch other files, too, +# so BE CAREFUL! +# +# Note that title and author appear in the two 20-byte chunks +# at decimal offsets 2 and 22, respectively, but they are XOR'ed with +# 255 (hex FF)! DL format SUCKS BIG ROCKS. +# +# DL file version 1 , medium format (160x100, 4 images/screen) +0 byte 1 video/unknown +0 byte 2 video/unknown +# +# Databases +# +# GDBM magic numbers +# Will be maintained as part of the GDBM distribution in the future. +# <downsj@teeny.org> +0 belong 0x13579ace application/x-gdbm +0 lelong 0x13579ace application/x-gdbm +0 string GDBM application/x-gdbm +# +0 belong 0x061561 application/x-dbm +# +# Executables +# +0 string \177ELF +>4 byte 0 +>4 byte 1 +>4 byte 2 +>5 byte 0 +>5 byte 1 +>>16 leshort 0 +>>16 leshort 1 application/x-object +>>16 leshort 2 application/x-executable +>>16 leshort 3 application/x-sharedlib +>>16 leshort 4 application/x-coredump +# +# DOS +0 string MZ application/x-dosexec +# +# KDE +0 string [KDE\ Desktop\ Entry] application/x-kdelnk +0 string \#\ KDE\ Config\ File application/x-kdelnk +# xmcd database file for kscd +0 string \#\ xmcd text/xmcd + +#------------------------------------------------------------------------------ +# pkgadd: file(1) magic for SysV R4 PKG Datastreams +# +0 string #\ PaCkAgE\ DaTaStReAm application/x-svr4-package + +#PNG Image Format +0 string \x89PNG image/png +0 string \0x89PNG image/png |