Project

General

Profile

Bug #175 » imdb.patch

svn diff - msephton, 22/08/2011 07:01 PM

View differences:

imdb.xml (working copy)
<?xml version="1.0" encoding="UTF-8"?>
<scraper framework="1.12x" date="2010-10-05" name="IMDb.com" content="movies" thumb="imdb.png" language="en">
<?xml version="1.0" encoding="UTF-8"?>
<scraper framework="1.12x" date="2010-10-05" name="IMDb.com" content="movies" thumb="imdb.png" language="en">
<include>common/imdb.xml</include>
<include>common/tmdb.xml</include>
<include>common/movieposterdb.xml</include>
......
</RegExp>
</GetSettings>
<NfoUrl dest="3">
<RegExp input="$$1" output="&lt;url&gt;http://www.\1/title/tt\2/&lt;/url&gt;&lt;id&gt;tt\2&lt;/id&gt;" dest="3">
<expression clear="yes" noclean="1">(imdb.com)/Title\?([0-9]*)</expression>
<RegExp input="$$1" output="&lt;url&gt;http://www.\1/title/tt\2/&lt;/url&gt;&lt;id&gt;tt\2&lt;/id&gt;" dest="3">
<expression clear="yes" noclean="1">(imdb.com)/Title\?([0-9]*)</expression>
</RegExp>
<RegExp input="$$1" output="&lt;url&gt;http://www.\1\2/&lt;/url&gt;&lt;id&gt;tt\2&lt;/id&gt;" dest="3+">
<RegExp input="$$1" output="&lt;url&gt;http://www.\1\2/&lt;/url&gt;&lt;id&gt;tt\2&lt;/id&gt;" dest="3+">
<expression noclean="1">(imdb.com/title/tt)([0-9]*)</expression>
</RegExp>
</NfoUrl>
<CreateSearchUrl dest="3" SearchStringEncoding="iso-8859-1">
<RegExp input="$$1" output="&lt;url&gt;http://akas.imdb.com/find?s=tt;q=\1$$4&lt;/url&gt;" dest="3">
<CreateSearchUrl dest="3" SearchStringEncoding="iso-8859-1">
<RegExp input="$$1" output="&lt;url&gt;http://akas.imdb.com/find?s=tt;q=\1$$4&lt;/url&gt;" dest="3">
<RegExp input="$$2" output="%20(\1)" dest="4">
<expression clear="yes">(.+)</expression>
</RegExp>
......
<RegExp input="$$1" output="\1" dest="7">
<expression clear="yes" noclean="1">og:url&quot; content=&quot;.+?/title/([t0-9]*)/</expression>
</RegExp>
<RegExp input="$$1" output="&lt;entity&gt;&lt;title&gt;\1&lt;/title&gt;&lt;year&gt;\2&lt;/year&gt;&lt;url&gt;http://akas.imdb.com/title/$$7/&lt;/url&gt;&lt;id&gt;$$7&lt;/id&gt;&lt;/entity&gt;" dest="5">
<RegExp input="$$1" output="&lt;entity&gt;&lt;title&gt;\1&lt;/title&gt;&lt;year&gt;\2&lt;/year&gt;&lt;url&gt;http://akas.imdb.com/title/$$7/&lt;/url&gt;&lt;id&gt;$$7&lt;/id&gt;&lt;/entity&gt;" dest="5">
<expression clear="yes" noclean="1">&lt;meta name="title" content="([^"]*) \([^\)0-9]*([0-9]+)\)</expression>
</RegExp>
<RegExp input="$$1" output="\1" dest="4">
<expression noclean="1">(&gt;&lt;a href=&quot;/title.*)</expression>
</RegExp>
<RegExp input="$$4" output="&lt;entity&gt;&lt;title&gt;\2&lt;/title&gt;&lt;year&gt;\3&lt;/year&gt;&lt;url&gt;http://akas.imdb.com/title/\1/&lt;/url&gt;&lt;id&gt;\1&lt;/id&gt;&lt;/entity&gt;" dest="5+">
<RegExp input="$$4" output="&lt;entity&gt;&lt;title&gt;\2&lt;/title&gt;&lt;year&gt;\3&lt;/year&gt;&lt;url&gt;http://akas.imdb.com/title/\1/&lt;/url&gt;&lt;id&gt;\1&lt;/id&gt;&lt;/entity&gt;" dest="5+">
<expression repeat="yes" noclean="1,2">&gt;&lt;a href=&quot;/title/([t0-9]*)/[^&gt;]*&gt;([^&lt;]*)&lt;/a&gt; *\(([0-9]*)</expression>
</RegExp>
<expression clear="yes" noclean="1"/>
......
<expression fixchars="1" trim="1" noclean="1">&lt;h1 class="header" itemprop="name"&gt;([^&lt;]*)</expression>
</RegExp>
<RegExp input="$$1" output="&lt;year&gt;\1&lt;/year&gt;" dest="5+">
<expression fixchars="1">a href=&quot;/year/([0-9]*)</expression>
<expression fixchars="1">a href=&quot;/year/([0-9]*)</expression>
</RegExp>
<RegExp input="$$1" output="&lt;top250&gt;\1&lt;/top250&gt;" dest="5+">
<expression fixchars="1">Top 250 #([0-9]*)&lt;/a&gt;</expression>
</RegExp>
<RegExp input="$$1" output="&lt;mpaa&gt;\1&lt;/mpaa&gt;" dest="5+">
<expression fixchars="1">MPAA&lt;/a&gt;\)&lt;/h4&gt;\n([^&lt;]*)</expression>
<expression>MPAA&lt;/a&gt;\)&lt;/h4&gt;\n?&lt;span itemprop=&quot;contentRating&quot;&gt;([^&lt;]*)</expression>
</RegExp>
<RegExp input="$$1" output="&lt;certification&gt;\1 \3&lt;/certification&gt;" dest="5+">
<expression fixchars="1" repeat="yes">&lt;a href=&quot;/search/title\?certificates=[^&quot;]*&quot;&gt;([^&lt;]*)&lt;/a&gt;[^&lt;]*(&lt;i&gt;([^&lt;]*)&lt;/i&gt;)?</expression>
<expression fixchars="1" repeat="yes">&lt;a href=&quot;/search/title\?certificates=[^&quot;]*&quot;&gt;([^&lt;]*)&lt;/a&gt;[^&lt;]*(&lt;i&gt;([^&lt;]*)&lt;/i&gt;)?</expression>
</RegExp>
<RegExp input="$$1" output="&lt;tagline&gt;\1&lt;/tagline&gt;" dest="5+">
<expression fixchars="1">&gt;Taglines:&lt;/h4&gt;([^&lt;]*)</expression>
<expression>&lt;h4[^&gt;]*?&gt;Taglines:&lt;/h4&gt;\n?([^&lt;]*)</expression>
</RegExp>
<RegExp input="$$1" output="&lt;runtime&gt;\1&lt;/runtime&gt;" dest="5+">
<expression noclean="1">&gt;Runtime:&lt;/h4&gt;[^0-9]*([,0-9]+) min</expression>
<expression trim="1">&lt;h4[^&gt;]*?&gt;Runtime:&lt;/h4&gt;[^0-9]*([0-9]+)</expression>
</RegExp>
<RegExp input="$$1" output="&lt;rating&gt;\1&lt;/rating&gt;" dest="5+">
<expression fixchars="1">&quot;star-bar-user-rate&quot;&gt;&lt;b&gt;([0-9.]+).*?&gt;.*?title=&quot;([0-9,]+)\s</expression>
<RegExp input="$$1" output="&lt;rating&gt;\1&lt;/rating&gt;&lt;votes&gt;\2&lt;/votes&gt;" dest="5+">
<expression fixchars="1">&lt;span itemprop=&quot;ratingValue&quot;&gt;([0-9.]+).*?&gt;.*?title=&quot;([0-9,]+)\s</expression>
</RegExp>
<RegExp input="$$1" output="&lt;votes&gt;\1&lt;/votes&gt;" dest="5+">
<expression fixchars="1">&gt;([0-9,]+) votes&lt;/a&gt;</expression>
</RegExp>
<RegExp input="$$1" output="&lt;genre&gt;\1&lt;/genre&gt;" dest="5+">
<expression fixchars="1" repeat="yes">"/genre/[^/]*"&gt;([^&lt;]*)&lt;/a&gt;</expression>
</RegExp>
<RegExp input="$$1" output="&lt;studio&gt;\1&lt;/studio&gt;" dest="5+">
<expression fixchars="1" repeat="yes">&quot;/company/[^/]*/&quot;&gt;([^&lt;]*)&lt;/a&gt;</expression>
</RegExp>
<RegExp input="$$9" output="&lt;outline&gt;\1&lt;/outline&gt;&lt;plot&gt;\1&lt;/plot&gt;"dest="5+">
<RegExp input="$$1" output="\1" dest="8">
<expression >reviews&lt;/a&gt;&lt;/span&gt;(.*?)(?:&lt;a href="plotsummary"&gt;|&lt;/p&gt;)</expression>
<RegExp input="$$1" output="&lt;outline&gt;\1&lt;/outline&gt;&lt;plot&gt;\1&lt;/plot&gt;" dest="5+">
<expression trim="1">&lt;p&gt;\n&lt;p itemprop=&quot;description&quot;&gt;([^&lt;]+)&lt;/p&gt;</expression>
</RegExp>
<RegExp input="$$8" output="\1 " dest="9+">
<expression repeat="yes">([^\n]+)</expression>
</RegExp>
<expression noclean="1"></expression>
</RegExp>
<RegExp input="$$2" output="&lt;url function=&quot;GetIMDBPlot&quot;&gt;$$3plotsummary&lt;/url&gt;" dest="5+">
<expression fixchars="1"/>
</RegExp>
......
<RegExp conditional="movieposterdb" input="$$1" output="&lt;url function=&quot;GetMoviePosterDBLink&quot;&gt;http://www.movieposterdb.com/browse/search?type=movies&amp;amp;query=\1&lt;/url&gt;" dest="5+">
<expression fixchars="1">/title/tt([t0-9]*)/faq</expression>
</RegExp>
<RegExp conditional="trailer" input="$$1" output="&lt;url function=&quot;GetIMDBTrailer&quot;&gt;http://akas.imdb.com/video/imdb/vi\1/player&lt;/url&gt;" dest="5+">
<expression fixchars="1">/vi([0-9]*)/</expression>
<RegExp conditional="trailer" input="$$1" output="&lt;url function=&quot;GetIMDBTrailer&quot;&gt;http://akas.imdb.com/video/imdb/vi\1/player&lt;/url&gt;" dest="5+">
<expression fixchars="1">/vi([0-9]*)/</expression>
</RegExp>
<RegExp conditional="dtrailer" input="$$6" output="&lt;url function=&quot;GetDTrailerLink&quot;&gt;http://en.dtrailer.com/movies/search/\1&lt;/url&gt;" dest="5+">
<RegExp input="$$4" output="\1-" dest="6">
<RegExp input="$$1" output="\1" dest="4">
<expression fixchars="1" trim="1" noclean="1">&lt;h1&gt;([^&lt;]*)</expression>
</RegExp>
<expression fixchars="1" repeat="yes">([a-zA-Z0-9]+)</expression>
</RegExp>
<expression fixchars="1"/>
<RegExp conditional="dtrailer" input="$$6" output="&lt;url function=&quot;GetDTrailerLink&quot;&gt;http://en.dtrailer.com/movies/search/\1&lt;/url&gt;" dest="5+">
<RegExp input="$$4" output="\1-" dest="6">
<RegExp input="$$1" output="\1" dest="4">
<expression fixchars="1" trim="1" noclean="1">&lt;h1&gt;([^&lt;]*)</expression>
</RegExp>
<expression fixchars="1" repeat="yes">([a-zA-Z0-9]+)</expression>
</RegExp>
<expression fixchars="1"/>
</RegExp>
<RegExp input="$$2" output="&lt;url cache=&quot;$$2-posters.html&quot; function=&quot;GetIMDBThumbs&quot;&gt;$$3posters&lt;/url&gt;" dest="5+">
<expression fixchars="1"/>
common/imdb.xml (working copy)
</GetIMDBTrailer>
<GetIMDBWriters dest="5">
<RegExp input="$$2" output="&lt;details&gt;\1&lt;/details&gt;" dest="5">
<RegExp conditional="!fullcredits" input="$$1" output="\1" dest="6">
<expression noclean="1">Writers?\:(.*?)&lt;/div&gt;</expression>
<RegExp input="$$1" output="\1" dest="6">
<expression noclean="1">Writer[s]?.*?&lt;/h4&gt;(.*?)&lt;div class</expression>
</RegExp>
<RegExp conditional="fullcredits" input="$$1" output="\1" dest="6">
<expression noclean="1">Writing credits(.*?)name=&quot;</expression>
</RegExp>
<RegExp input="$$6" output="&lt;credits&gt;\1&lt;/credits&gt;" dest="2+">
<expression fixchars="1" repeat="yes">/name/[^&gt;"]*"&gt;([^&lt;]*)&lt;</expression>
<expression repeat="yes" fixchars="1">&lt;a href=&quot;/name[^&gt;]*&gt;([^&lt;]*)&lt;</expression>
</RegExp>
<expression noclean="1"/>
</RegExp>
</GetIMDBWriters>
<GetIMDBCast dest="5">
<RegExp input="$$2" output="&lt;details&gt;\1&lt;/details&gt;" dest="5">
<RegExp conditional="!fullcredits" input="$$1" output="\1" dest="6">
<expression noclean="1">&lt;table class="cast_list"&gt;(.*?)&lt;/table&gt;</expression>
<RegExp input="$$1" output="\1" dest="6">
<expression noclean="1">&lt;table class=&quot;cast_list&quot;&gt;(.*?)&lt;/table&gt;</expression>
</RegExp>
<RegExp conditional="fullcredits" input="$$1" output="\1" dest="6">
<expression noclean="1">&lt;table class="cast"&gt;(.*?)&lt;/table&gt;</expression>
<RegExp input="$$6" output="&lt;actor&gt;&lt;thumb&gt;\2_SX512_SY512_\3&lt;/thumb&gt;&lt;name&gt;\1&lt;/name&gt;&lt;role&gt;\5&lt;/role&gt;&lt;/actor&gt;" dest="7">
<expression repeat="yes" clear="yes" trim="3,4" noclean="1,2">&lt;noscript&gt;&lt;img \n[^a]*alt=&quot;([^&quot;]*)&quot;[^&quot;]*&quot;[^&quot;]*&quot;[^s]*src=&quot;(?:([^&quot;]*\.)[^&quot;]*(\.jpg))[^&gt;]*.*?ter"&gt;[^&gt;]*&gt;\n\s*(&lt;[^&gt;]*&gt;)?([^&lt;\(]*)?</expression>
</RegExp>
<RegExp input="$$6" output="&lt;actor&gt;&lt;thumb&gt;\1_SX$INFO[imdbscale]_SY$INFO[imdbscale]_\2&lt;/thumb&gt;&lt;name&gt;\3&lt;/name&gt;&lt;role&gt;\5&lt;/role&gt;&lt;/actor&gt;" dest="7">
<expression repeat="yes" clear="yes" fixchars="3,5" trim="3,5" noclean="1,2">&lt;img src=&quot;(?:([^&quot;]*\.)[^&quot;]*(\.jpg))?[^&gt;]*[^&quot;]*&quot;nm&quot;&gt;&lt;a href=&quot;[^&quot;]*[^&gt;]*&gt;([^&lt;]*)&lt;[^&quot;]*&quot;ddd&quot;&gt;([^&lt;]&lt;)?[^&quot;]*&quot;char&quot;&gt;(.*?)&lt;/td&gt;</expression>
<RegExp input="$$6" output="&lt;actor&gt;&lt;thumb&gt;&lt;/thumb&gt;&lt;name&gt;\1&lt;/name&gt;&lt;role&gt;\3&lt;/role&gt;&lt;/actor&gt;" dest="7+">
<expression repeat="yes" trim="1,2" noclean="1,2">&lt;img \n[^a]*alt=&quot;([^&quot;]*)&quot;[^&quot;]*&quot;[^&quot;]*&quot;[^s]*src=&quot;(?:[^&quot;]*\.[^&quot;]*\.png)&quot; class=&quot;&quot; /&gt;*.*?ter"&gt;[^&gt;]*&gt;\n\s*(&lt;[^&gt;]*&gt;)?([^&lt;\(]*)?</expression>
</RegExp>
<RegExp input="$$7" output="&lt;actor&gt;&lt;thumb&gt;\1&lt;/thumb&gt;\2&lt;/actor&gt;" dest="2+">
<expression repeat="yes" clear="yes" noclean="1,2,3">&lt;actor&gt;&lt;thumb&gt;(?:(http.*?)|_SX[0-9]+_SY[0-9]+_)&lt;/thumb&gt;(.*?)&lt;/actor&gt;</expression>
<expression repeat="yes" clear="yes" noclean="1,2,3">&lt;actor&gt;&lt;thumb&gt;(?:(http.*?)|_SX[0-9]+_SY[0-9]+_)?&lt;/thumb&gt;(.*?)&lt;/actor&gt;</expression>
</RegExp>
<expression noclean="1"/>
<expression noclean="1" />
</RegExp>
</GetIMDBCast>
<GetIMDBDirectors dest="5">
<RegExp input="$$2" output="&lt;details&gt;\1&lt;/details&gt;" dest="5">
<RegExp conditional="!fullcredits" input="$$1" output="\1" dest="6">
<expression clear="yes" noclean="1">Directors?\:(.*?)&lt;/div&gt;</expression>
<RegExp input="$$1" output="\1" dest="6">
<expression clear="yes" noclean="1">Director[s]?.*?&lt;/h4&gt;(.*?)&lt;div class</expression>
</RegExp>
<RegExp conditional="fullcredits" input="$$1" output="\1" dest="6">
<expression clear="yes" noclean="1">Directed by(.*?)&lt;/table&gt;</expression>
</RegExp>
<RegExp input="$$6" output="&lt;director&gt;\1&lt;/director&gt;" dest="2+">
<expression clear="yes" repeat="yes" fixchars="1">/name/[^&gt;"]*"&gt;([^&lt;]*)&lt;</expression>
<expression repeat="yes" fixchars="1" clear="yes">&lt;a href=&quot;/name[^&gt;]*&gt;([^&lt;]*)&lt;</expression>
</RegExp>
<expression noclean="1"/>
<expression noclean="1" />
</RegExp>
</GetIMDBDirectors>
<GetIMDBPlot dest="5">
(3-3/3)